diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index fb4735fd73b0945fa5cd4d4cb972068892352173..99ca040e3f900e399b59e2b5233fa9169ac61ccb 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -486,8 +486,8 @@ replaced by copy-on-write) part of the underlying shmem object out on swap. "SwapPss" shows proportional swap share of this mapping. Unlike "Swap", this does not take into account swapped out page of underlying shmem objects. "Locked" indicates whether the mapping is locked in memory or not. -"THPeligible" indicates whether the mapping is eligible for THP pages - 1 if -true, 0 otherwise. +"THPeligible" indicates whether the mapping is eligible for allocating THP +pages - 1 if true, 0 otherwise. It just shows the current status. "VmFlags" field deserves a separate description. This member represents the kernel flags associated with the particular virtual memory area in two letter encoded diff --git a/Documentation/vm/memory-model.rst b/Documentation/vm/memory-model.rst index 382f72ace1fcab0d1826b3cdc25f093772899d64..58a12376b7df71dd1d8c534f6b468792a94e6b79 100644 --- a/Documentation/vm/memory-model.rst +++ b/Documentation/vm/memory-model.rst @@ -181,3 +181,43 @@ that is eventually passed to vmemmap_populate() through a long chain of function calls. The vmemmap_populate() implementation may use the `vmem_altmap` along with :c:func:`altmap_alloc_block_buf` helper to allocate memory map on the persistent memory device. + +ZONE_DEVICE +=========== +The `ZONE_DEVICE` facility builds upon `SPARSEMEM_VMEMMAP` to offer +`struct page` `mem_map` services for device driver identified physical +address ranges. The "device" aspect of `ZONE_DEVICE` relates to the fact +that the page objects for these address ranges are never marked online, +and that a reference must be taken against the device, not just the page +to keep the memory pinned for active use. `ZONE_DEVICE`, via +:c:func:`devm_memremap_pages`, performs just enough memory hotplug to +turn on :c:func:`pfn_to_page`, :c:func:`page_to_pfn`, and +:c:func:`get_user_pages` service for the given range of pfns. Since the +page reference count never drops below 1 the page is never tracked as +free memory and the page's `struct list_head lru` space is repurposed +for back referencing to the host device / driver that mapped the memory. + +While `SPARSEMEM` presents memory as a collection of sections, +optionally collected into memory blocks, `ZONE_DEVICE` users have a need +for smaller granularity of populating the `mem_map`. Given that +`ZONE_DEVICE` memory is never marked online it is subsequently never +subject to its memory ranges being exposed through the sysfs memory +hotplug api on memory block boundaries. The implementation relies on +this lack of user-api constraint to allow sub-section sized memory +ranges to be specified to :c:func:`arch_add_memory`, the top-half of +memory hotplug. Sub-section support allows for 2MB as the cross-arch +common alignment granularity for :c:func:`devm_memremap_pages`. + +The users of `ZONE_DEVICE` are: + +* pmem: Map platform persistent memory to be used as a direct-I/O target + via DAX mappings. + +* hmm: Extend `ZONE_DEVICE` with `->page_fault()` and `->page_free()` + event callbacks to allow a device-driver to coordinate memory management + events related to device-memory, typically GPU memory. See + Documentation/vm/hmm.rst. + +* p2pdma: Create `struct page` objects to allow peer devices in a + PCI/-E topology to coordinate direct-DMA operations between themselves, + i.e. bypass host memory. diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index e661469cabdd4e5e1235bf2e817198ebab25c244..750a69dde39bdd76a425b4f7e4d59a093936c5b6 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1074,4 +1074,21 @@ int arch_add_memory(int nid, u64 start, u64 size, return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, restrictions); } +void arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct zone *zone; + + /* + * FIXME: Cleanup page tables (also in arch_add_memory() in case + * adding fails). Until then, this function should only be used + * during memory hotplug (adding memory), not for memory + * unplug. ARCH_ENABLE_MEMORY_HOTREMOVE must not be + * unlocked yet. + */ + zone = page_zone(pfn_to_page(start_pfn)); + __remove_pages(zone, start_pfn, nr_pages, altmap); +} #endif diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index d28e29103bdbab519293b97a455286edac33d6e7..aae75fd7b810309edf5f3960b0dd0d163e39b9c6 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -681,7 +681,6 @@ int arch_add_memory(int nid, u64 start, u64 size, return ret; } -#ifdef CONFIG_MEMORY_HOTREMOVE void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { @@ -693,4 +692,3 @@ void arch_remove_memory(int nid, u64 start, u64 size, __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif -#endif diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 26a8da3723bb07aff23aae446ddabf17946841b1..9259337d737466b247d7c5241fff78f2a7856047 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -125,7 +125,6 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, return __add_pages(nid, start_pfn, nr_pages, restrictions); } -#ifdef CONFIG_MEMORY_HOTREMOVE void __ref arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { @@ -151,7 +150,6 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, pr_warn("Hash collision while resizing HPT\n"); } #endif -#endif /* CONFIG_MEMORY_HOTPLUG */ #ifndef CONFIG_NEED_MULTIPLE_NODES void __init mem_topology_setup(void) diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 5e53c1392d3b03ebc91bfc291532073ccb96b348..eb2e75dac369a4fc6e658b5088d408a375df1428 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -70,23 +70,23 @@ static int change_memblock_state(struct memory_block *mem, void *arg) /* called with device_hotplug_lock held */ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) { - u64 end_pfn = start_pfn + nr_pages - 1; + const unsigned long start = PFN_PHYS(start_pfn); + const unsigned long size = PFN_PHYS(nr_pages); - if (walk_memory_range(start_pfn, end_pfn, NULL, - check_memblock_online)) + if (walk_memory_blocks(start, size, NULL, check_memblock_online)) return false; - walk_memory_range(start_pfn, end_pfn, (void *)MEM_GOING_OFFLINE, - change_memblock_state); + walk_memory_blocks(start, size, (void *)MEM_GOING_OFFLINE, + change_memblock_state); if (offline_pages(start_pfn, nr_pages)) { - walk_memory_range(start_pfn, end_pfn, (void *)MEM_ONLINE, - change_memblock_state); + walk_memory_blocks(start, size, (void *)MEM_ONLINE, + change_memblock_state); return false; } - walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE, - change_memblock_state); + walk_memory_blocks(start, size, (void *)MEM_OFFLINE, + change_memblock_state); return true; @@ -242,9 +242,8 @@ static int memtrace_online(void) */ if (!memhp_auto_online) { lock_device_hotplug(); - walk_memory_range(PFN_DOWN(ent->start), - PFN_UP(ent->start + ent->size - 1), - NULL, online_mem_block); + walk_memory_blocks(ent->start, ent->size, NULL, + online_mem_block); unlock_device_hotplug(); } diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index e4b58240ec5370791d1174132fbd164cff63c26b..aa738cad1338095a804f6f564c706cf23eb0905a 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -220,15 +220,13 @@ appldata_timer_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int timer_active = appldata_timer_active; - int zero = 0; - int one = 1; int rc; struct ctl_table ctl_entry = { .procname = ctl->procname, .data = &timer_active, .maxlen = sizeof(int), - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }; rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); @@ -255,13 +253,12 @@ appldata_interval_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int interval = appldata_interval; - int one = 1; int rc; struct ctl_table ctl_entry = { .procname = ctl->procname, .data = &interval, .maxlen = sizeof(int), - .extra1 = &one, + .extra1 = SYSCTL_ONE, }; rc = proc_dointvec_minmax(&ctl_entry, write, buffer, lenp, ppos); @@ -289,13 +286,11 @@ appldata_generic_handler(struct ctl_table *ctl, int write, struct list_head *lh; int rc, found; int active; - int zero = 0; - int one = 1; struct ctl_table ctl_entry = { .data = &active, .maxlen = sizeof(int), - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }; found = 0; diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 8964a3f60aadbb1fdc5d7c884acd44d565c72265..2db6fb405a9a508bade68470b52b2ccd0ed58236 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -587,15 +587,13 @@ static int topology_ctl_handler(struct ctl_table *ctl, int write, { int enabled = topology_is_enabled(); int new_mode; - int zero = 0; - int one = 1; int rc; struct ctl_table ctl_entry = { .procname = ctl->procname, .data = &enabled, .maxlen = sizeof(int), - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }; rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index f0bee6af39601717f9fce160d800643ddb4cf941..4e5bbe328594dcfaa5beecffea1a308fd48d1baa 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -273,6 +273,9 @@ int arch_add_memory(int nid, u64 start, u64 size, unsigned long size_pages = PFN_DOWN(size); int rc; + if (WARN_ON_ONCE(restrictions->altmap)) + return -EINVAL; + rc = vmem_add_mapping(start, size); if (rc) return rc; @@ -283,16 +286,15 @@ int arch_add_memory(int nid, u64 start, u64 size, return rc; } -#ifdef CONFIG_MEMORY_HOTREMOVE void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { - /* - * There is no hardware or firmware interface which could trigger a - * hot memory remove on s390. So there is nothing that needs to be - * implemented. - */ - BUG(); + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct zone *zone; + + zone = page_zone(pfn_to_page(start_pfn)); + __remove_pages(zone, start_pfn, nr_pages, altmap); + vmem_remove_mapping(start, size); } -#endif #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 13c6a6bb5fd9b7250a932ddf71ac1929b9791ce0..dfdbaa50946ebbdfb2c1853b738eaf72ae0d734e 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -429,7 +429,6 @@ int memory_add_physaddr_to_nid(u64 addr) EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif -#ifdef CONFIG_MEMORY_HOTREMOVE void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { @@ -440,5 +439,4 @@ void arch_remove_memory(int nid, u64 start, u64 size, zone = page_zone(pfn_to_page(start_pfn)); __remove_pages(zone, start_pfn, nr_pages, altmap); } -#endif #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c index 42d4c89f990ed1d752b24c641329671587313ba0..240626e7f55aa1e6ac2cd0b74138a4a97296cb7d 100644 --- a/arch/x86/entry/vdso/vdso32-setup.c +++ b/arch/x86/entry/vdso/vdso32-setup.c @@ -65,9 +65,6 @@ subsys_initcall(sysenter_setup); /* Register vsyscall32 into the ABI table */ #include <linux/sysctl.h> -static const int zero; -static const int one = 1; - static struct ctl_table abi_table2[] = { { .procname = "vsyscall32", @@ -75,8 +72,8 @@ static struct ctl_table abi_table2[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = (int *)&zero, - .extra2 = (int *)&one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, {} }; diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c index 838cf8a32c49728f12e1a06d248909e23f82703a..1cb3ca9bba49b4fbff33f9f1926e572b9d69f991 100644 --- a/arch/x86/kernel/itmt.c +++ b/arch/x86/kernel/itmt.c @@ -65,8 +65,6 @@ static int sched_itmt_update_handler(struct ctl_table *table, int write, return ret; } -static unsigned int zero; -static unsigned int one = 1; static struct ctl_table itmt_kern_table[] = { { .procname = "sched_itmt_enabled", @@ -74,8 +72,8 @@ static struct ctl_table itmt_kern_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = sched_itmt_update_handler, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, {} }; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index f265a431617956010ebf566e5228199b67fd0a5b..4068abb9427f9e9731316484c62a11fa4c72af63 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -860,7 +860,6 @@ int arch_add_memory(int nid, u64 start, u64 size, return __add_pages(nid, start_pfn, nr_pages, restrictions); } -#ifdef CONFIG_MEMORY_HOTREMOVE void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { @@ -872,7 +871,6 @@ void arch_remove_memory(int nid, u64 start, u64 size, __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif -#endif int kernel_set_to_readonly __read_mostly; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 08bbf648827b24e211f2a7a3e08ebe29fdccbb5a..a6b5c653727badfd0823a6035eb5c1f9e0f5eb3c 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1198,7 +1198,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end, remove_pagetable(start, end, false, altmap); } -#ifdef CONFIG_MEMORY_HOTREMOVE static void __meminit kernel_physical_mapping_remove(unsigned long start, unsigned long end) { @@ -1219,7 +1218,6 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, __remove_pages(zone, start_pfn, nr_pages, altmap); kernel_physical_mapping_remove(start, start + size); } -#endif #endif /* CONFIG_MEMORY_HOTPLUG */ static struct kcore_list kcore_vsyscall; @@ -1520,7 +1518,9 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, { int err; - if (boot_cpu_has(X86_FEATURE_PSE)) + if (end - start < PAGES_PER_SECTION * sizeof(struct page)) + err = vmemmap_populate_basepages(start, end, node); + else if (boot_cpu_has(X86_FEATURE_PSE)) err = vmemmap_populate_hugepages(start, end, node, altmap); else if (altmap) { pr_err_once("%s: no cpu support for altmap allocations\n", diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index db013dc21c0240b897f7214dbb0a0e8390285118..e294f44a785046656379b574893061c6377b5beb 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -155,16 +155,6 @@ static int acpi_memory_check_device(struct acpi_memory_device *mem_device) return 0; } -static unsigned long acpi_meminfo_start_pfn(struct acpi_memory_info *info) -{ - return PFN_DOWN(info->start_addr); -} - -static unsigned long acpi_meminfo_end_pfn(struct acpi_memory_info *info) -{ - return PFN_UP(info->start_addr + info->length-1); -} - static int acpi_bind_memblk(struct memory_block *mem, void *arg) { return acpi_bind_one(&mem->dev, arg); @@ -173,9 +163,8 @@ static int acpi_bind_memblk(struct memory_block *mem, void *arg) static int acpi_bind_memory_blocks(struct acpi_memory_info *info, struct acpi_device *adev) { - return walk_memory_range(acpi_meminfo_start_pfn(info), - acpi_meminfo_end_pfn(info), adev, - acpi_bind_memblk); + return walk_memory_blocks(info->start_addr, info->length, adev, + acpi_bind_memblk); } static int acpi_unbind_memblk(struct memory_block *mem, void *arg) @@ -186,8 +175,8 @@ static int acpi_unbind_memblk(struct memory_block *mem, void *arg) static void acpi_unbind_memory_blocks(struct acpi_memory_info *info) { - walk_memory_range(acpi_meminfo_start_pfn(info), - acpi_meminfo_end_pfn(info), NULL, acpi_unbind_memblk); + walk_memory_blocks(info->start_addr, info->length, NULL, + acpi_unbind_memblk); } static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) diff --git a/drivers/base/firmware_loader/fallback_table.c b/drivers/base/firmware_loader/fallback_table.c index 776dd69cf5bead65906c90f878ebbabbeae58cad..ba9d30b28edc5a9d8a439f0a6a07610a5590cbb4 100644 --- a/drivers/base/firmware_loader/fallback_table.c +++ b/drivers/base/firmware_loader/fallback_table.c @@ -16,9 +16,6 @@ * firmware fallback configuration table */ -static unsigned int zero; -static unsigned int one = 1; - struct firmware_fallback_config fw_fallback_config = { .force_sysfs_fallback = IS_ENABLED(CONFIG_FW_LOADER_USER_HELPER_FALLBACK), .loading_timeout = 60, @@ -26,6 +23,7 @@ struct firmware_fallback_config fw_fallback_config = { }; EXPORT_SYMBOL_GPL(fw_fallback_config); +#ifdef CONFIG_SYSCTL struct ctl_table firmware_config_table[] = { { .procname = "force_sysfs_fallback", @@ -33,8 +31,8 @@ struct ctl_table firmware_config_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_douintvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "ignore_sysfs_fallback", @@ -42,9 +40,10 @@ struct ctl_table firmware_config_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_douintvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { } }; EXPORT_SYMBOL_GPL(firmware_config_table); +#endif diff --git a/drivers/base/memory.c b/drivers/base/memory.c index f180427e48f4e59341795c3090d51d84a5f83dc8..20c39d1bcef8ae52ae187c98fef208247af35f6f 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -34,11 +34,21 @@ static DEFINE_MUTEX(mem_sysfs_mutex); static int sections_per_block; -static inline int base_memory_block_id(int section_nr) +static inline unsigned long base_memory_block_id(unsigned long section_nr) { return section_nr / sections_per_block; } +static inline unsigned long pfn_to_block_id(unsigned long pfn) +{ + return base_memory_block_id(pfn_to_section_nr(pfn)); +} + +static inline unsigned long phys_to_block_id(unsigned long phys) +{ + return pfn_to_block_id(PFN_DOWN(phys)); +} + static int memory_subsys_online(struct device *dev); static int memory_subsys_offline(struct device *dev); @@ -126,9 +136,9 @@ static ssize_t phys_index_show(struct device *dev, static ssize_t removable_show(struct device *dev, struct device_attribute *attr, char *buf) { - unsigned long i, pfn; - int ret = 1; struct memory_block *mem = to_memory_block(dev); + unsigned long pfn; + int ret = 1, i; if (mem->state != MEM_ONLINE) goto out; @@ -578,23 +588,13 @@ int __weak arch_get_memory_phys_device(unsigned long start_pfn) return 0; } -/* - * A reference for the returned object is held and the reference for the - * hinted object is released. - */ -struct memory_block *find_memory_block_hinted(struct mem_section *section, - struct memory_block *hint) +/* A reference for the returned memory block device is acquired. */ +static struct memory_block *find_memory_block_by_id(unsigned long block_id) { - int block_id = base_memory_block_id(__section_nr(section)); - struct device *hintdev = hint ? &hint->dev : NULL; struct device *dev; - dev = subsys_find_device_by_id(&memory_subsys, block_id, hintdev); - if (hint) - put_device(&hint->dev); - if (!dev) - return NULL; - return to_memory_block(dev); + dev = subsys_find_device_by_id(&memory_subsys, block_id, NULL); + return dev ? to_memory_block(dev) : NULL; } /* @@ -607,7 +607,9 @@ struct memory_block *find_memory_block_hinted(struct mem_section *section, */ struct memory_block *find_memory_block(struct mem_section *section) { - return find_memory_block_hinted(section, NULL); + unsigned long block_id = base_memory_block_id(__section_nr(section)); + + return find_memory_block_by_id(block_id); } static struct attribute *memory_memblk_attrs[] = { @@ -652,20 +654,22 @@ int register_memory(struct memory_block *memory) } static int init_memory_block(struct memory_block **memory, - struct mem_section *section, unsigned long state) + unsigned long block_id, unsigned long state) { struct memory_block *mem; unsigned long start_pfn; - int scn_nr; int ret = 0; + mem = find_memory_block_by_id(block_id); + if (mem) { + put_device(&mem->dev); + return -EEXIST; + } mem = kzalloc(sizeof(*mem), GFP_KERNEL); if (!mem) return -ENOMEM; - scn_nr = __section_nr(section); - mem->start_section_nr = - base_memory_block_id(scn_nr) * sections_per_block; + mem->start_section_nr = block_id * sections_per_block; mem->end_section_nr = mem->start_section_nr + sections_per_block - 1; mem->state = state; start_pfn = section_nr_to_pfn(mem->start_section_nr); @@ -677,97 +681,101 @@ static int init_memory_block(struct memory_block **memory, return ret; } -static int add_memory_block(int base_section_nr) +static int add_memory_block(unsigned long base_section_nr) { + int ret, section_count = 0; struct memory_block *mem; - int i, ret, section_count = 0, section_nr; + unsigned long nr; - for (i = base_section_nr; - i < base_section_nr + sections_per_block; - i++) { - if (!present_section_nr(i)) - continue; - if (section_count == 0) - section_nr = i; - section_count++; - } + for (nr = base_section_nr; nr < base_section_nr + sections_per_block; + nr++) + if (present_section_nr(nr)) + section_count++; if (section_count == 0) return 0; - ret = init_memory_block(&mem, __nr_to_section(section_nr), MEM_ONLINE); + ret = init_memory_block(&mem, base_memory_block_id(base_section_nr), + MEM_ONLINE); if (ret) return ret; mem->section_count = section_count; return 0; } +static void unregister_memory(struct memory_block *memory) +{ + if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys)) + return; + + /* drop the ref. we got via find_memory_block() */ + put_device(&memory->dev); + device_unregister(&memory->dev); +} + /* - * need an interface for the VM to add new memory regions, - * but without onlining it. + * Create memory block devices for the given memory area. Start and size + * have to be aligned to memory block granularity. Memory block devices + * will be initialized as offline. */ -int hotplug_memory_register(int nid, struct mem_section *section) +int create_memory_block_devices(unsigned long start, unsigned long size) { - int ret = 0; + const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); + unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); struct memory_block *mem; + unsigned long block_id; + int ret = 0; - mutex_lock(&mem_sysfs_mutex); + if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || + !IS_ALIGNED(size, memory_block_size_bytes()))) + return -EINVAL; - mem = find_memory_block(section); - if (mem) { - mem->section_count++; - put_device(&mem->dev); - } else { - ret = init_memory_block(&mem, section, MEM_OFFLINE); + mutex_lock(&mem_sysfs_mutex); + for (block_id = start_block_id; block_id != end_block_id; block_id++) { + ret = init_memory_block(&mem, block_id, MEM_OFFLINE); if (ret) - goto out; - mem->section_count++; + break; + mem->section_count = sections_per_block; + } + if (ret) { + end_block_id = block_id; + for (block_id = start_block_id; block_id != end_block_id; + block_id++) { + mem = find_memory_block_by_id(block_id); + mem->section_count = 0; + unregister_memory(mem); + } } - -out: mutex_unlock(&mem_sysfs_mutex); return ret; } -#ifdef CONFIG_MEMORY_HOTREMOVE -static void -unregister_memory(struct memory_block *memory) -{ - BUG_ON(memory->dev.bus != &memory_subsys); - - /* drop the ref. we got via find_memory_block() */ - put_device(&memory->dev); - device_unregister(&memory->dev); -} - -void unregister_memory_section(struct mem_section *section) +/* + * Remove memory block devices for the given memory area. Start and size + * have to be aligned to memory block granularity. Memory block devices + * have to be offline. + */ +void remove_memory_block_devices(unsigned long start, unsigned long size) { + const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); + const unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); struct memory_block *mem; + unsigned long block_id; - if (WARN_ON_ONCE(!present_section(section))) + if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || + !IS_ALIGNED(size, memory_block_size_bytes()))) return; mutex_lock(&mem_sysfs_mutex); - - /* - * Some users of the memory hotplug do not want/need memblock to - * track all sections. Skip over those. - */ - mem = find_memory_block(section); - if (!mem) - goto out_unlock; - - unregister_mem_sect_under_nodes(mem, __section_nr(section)); - - mem->section_count--; - if (mem->section_count == 0) + for (block_id = start_block_id; block_id != end_block_id; block_id++) { + mem = find_memory_block_by_id(block_id); + if (WARN_ON_ONCE(!mem)) + continue; + mem->section_count = 0; + unregister_memory_block_under_nodes(mem); unregister_memory(mem); - else - put_device(&mem->dev); - -out_unlock: + } mutex_unlock(&mem_sysfs_mutex); } -#endif /* CONFIG_MEMORY_HOTREMOVE */ /* return true if the memory block is offlined, otherwise, return false */ bool is_memblock_offlined(struct memory_block *mem) @@ -804,10 +812,9 @@ static const struct attribute_group *memory_root_attr_groups[] = { */ int __init memory_dev_init(void) { - unsigned int i; int ret; int err; - unsigned long block_sz; + unsigned long block_sz, nr; ret = subsys_system_register(&memory_subsys, memory_root_attr_groups); if (ret) @@ -821,9 +828,9 @@ int __init memory_dev_init(void) * during boot and have been initialized */ mutex_lock(&mem_sysfs_mutex); - for (i = 0; i <= __highest_present_section_nr; - i += sections_per_block) { - err = add_memory_block(i); + for (nr = 0; nr <= __highest_present_section_nr; + nr += sections_per_block) { + err = add_memory_block(nr); if (!ret) ret = err; } @@ -834,3 +841,43 @@ int __init memory_dev_init(void) printk(KERN_ERR "%s() failed: %d\n", __func__, ret); return ret; } + +/** + * walk_memory_blocks - walk through all present memory blocks overlapped + * by the range [start, start + size) + * + * @start: start address of the memory range + * @size: size of the memory range + * @arg: argument passed to func + * @func: callback for each memory section walked + * + * This function walks through all present memory blocks overlapped by the + * range [start, start + size), calling func on each memory block. + * + * In case func() returns an error, walking is aborted and the error is + * returned. + */ +int walk_memory_blocks(unsigned long start, unsigned long size, + void *arg, walk_memory_blocks_func_t func) +{ + const unsigned long start_block_id = phys_to_block_id(start); + const unsigned long end_block_id = phys_to_block_id(start + size - 1); + struct memory_block *mem; + unsigned long block_id; + int ret = 0; + + if (!size) + return 0; + + for (block_id = start_block_id; block_id <= end_block_id; block_id++) { + mem = find_memory_block_by_id(block_id); + if (!mem) + continue; + + ret = func(mem, arg); + put_device(&mem->dev); + if (ret) + break; + } + return ret; +} diff --git a/drivers/base/node.c b/drivers/base/node.c index aa878fbcf7051452c1f12cffdca121abe741c70c..75b7e6f6535b5ac54351a1fd7a4c772ccada5e25 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -753,7 +753,8 @@ static int __ref get_nid_for_pfn(unsigned long pfn) } /* register memory section under specified node if it spans that node */ -int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg) +static int register_mem_sect_under_node(struct memory_block *mem_blk, + void *arg) { int ret, nid = *(int *)arg; unsigned long pfn, sect_start_pfn, sect_end_pfn; @@ -802,23 +803,18 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg) return 0; } -/* unregister memory section under all nodes that it spans */ -int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, - unsigned long phys_index) +/* + * Unregister memory block device under all nodes that it spans. + * Has to be called with mem_sysfs_mutex held (due to unlinked_nodes). + */ +void unregister_memory_block_under_nodes(struct memory_block *mem_blk) { - NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL); unsigned long pfn, sect_start_pfn, sect_end_pfn; + static nodemask_t unlinked_nodes; - if (!mem_blk) { - NODEMASK_FREE(unlinked_nodes); - return -EFAULT; - } - if (!unlinked_nodes) - return -ENOMEM; - nodes_clear(*unlinked_nodes); - - sect_start_pfn = section_nr_to_pfn(phys_index); - sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; + nodes_clear(unlinked_nodes); + sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); + sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr); for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { int nid; @@ -827,21 +823,20 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, continue; if (!node_online(nid)) continue; - if (node_test_and_set(nid, *unlinked_nodes)) + if (node_test_and_set(nid, unlinked_nodes)) continue; sysfs_remove_link(&node_devices[nid]->dev.kobj, kobject_name(&mem_blk->dev.kobj)); sysfs_remove_link(&mem_blk->dev.kobj, kobject_name(&node_devices[nid]->dev.kobj)); } - NODEMASK_FREE(unlinked_nodes); - return 0; } int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn) { - return walk_memory_range(start_pfn, end_pfn, (void *)&nid, - register_mem_sect_under_node); + return walk_memory_blocks(PFN_PHYS(start_pfn), + PFN_PHYS(end_pfn - start_pfn), (void *)&nid, + register_mem_sect_under_node); } #ifdef CONFIG_HUGETLBFS diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 3d8162d28730d1173b36662b26b90b17c8c66e8c..a700c5c3d1673516355bdbac0468adca9cfc664e 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -274,8 +274,6 @@ #define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY) /* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */ -static int zero; -static int one = 1; static u32 i915_perf_stream_paranoid = true; /* The maximum exponent the hardware accepts is 63 (essentially it selects one @@ -3366,8 +3364,8 @@ static struct ctl_table oa_table[] = { .maxlen = sizeof(i915_perf_stream_paranoid), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "oa_max_sample_rate", @@ -3375,7 +3373,7 @@ static struct ctl_table oa_table[] = { .maxlen = sizeof(i915_oa_max_sample_rate), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &oa_sample_rate_hard_limit, }, {} diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 894da5abdc550461fe0f6d9031ac1def01c81731..ebd35fc3529064e7ee0522273b5d4a1302561926 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1197,8 +1197,6 @@ static struct kmsg_dumper hv_kmsg_dumper = { }; static struct ctl_table_header *hv_ctl_table_hdr; -static int zero; -static int one = 1; /* * sysctl option to allow the user to control whether kmsg data should be @@ -1211,8 +1209,8 @@ static struct ctl_table hv_ctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, {} }; diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c index 49fc18ee0565ec6cc4e72f343fcfa58ee1cdaca4..6d22b0f83b3b083901f58fddcfad41f7a604ba7c 100644 --- a/drivers/nvdimm/dax_devs.c +++ b/drivers/nvdimm/dax_devs.c @@ -118,7 +118,7 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns) nvdimm_bus_unlock(&ndns->dev); if (!dax_dev) return -ENOMEM; - pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); + pfn_sb = devm_kmalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); nd_pfn->pfn_sb = pfn_sb; rc = nd_pfn_validate(nd_pfn, DAX_SIG); dev_dbg(dev, "dax: %s\n", rc == 0 ? dev_name(dax_dev) : "<none>"); diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h index f58b849e455b1ad0bee4f08c8459049644aeab51..7381673b7b70b9804b00ed982ddac2153d44f305 100644 --- a/drivers/nvdimm/pfn.h +++ b/drivers/nvdimm/pfn.h @@ -28,22 +28,9 @@ struct nd_pfn_sb { __le32 end_trunc; /* minor-version-2 record the base alignment of the mapping */ __le32 align; + /* minor-version-3 guarantee the padding and flags are zero */ u8 padding[4000]; __le64 checksum; }; -#ifdef CONFIG_SPARSEMEM -#define PFN_SECTION_ALIGN_DOWN(x) SECTION_ALIGN_DOWN(x) -#define PFN_SECTION_ALIGN_UP(x) SECTION_ALIGN_UP(x) -#else -/* - * In this case ZONE_DEVICE=n and we will disable 'pfn' device support, - * but we still want pmem to compile. - */ -#define PFN_SECTION_ALIGN_DOWN(x) (x) -#define PFN_SECTION_ALIGN_UP(x) (x) -#endif - -#define PHYS_SECTION_ALIGN_DOWN(x) PFN_PHYS(PFN_SECTION_ALIGN_DOWN(PHYS_PFN(x))) -#define PHYS_SECTION_ALIGN_UP(x) PFN_PHYS(PFN_SECTION_ALIGN_UP(PHYS_PFN(x))) #endif /* __NVDIMM_PFN_H */ diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 55fb6b7433ed2e7360c0f15502feab885c48fae1..df2bdbd22450488af1c9c4d68a4a2569c64f143e 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -412,6 +412,15 @@ static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn) return 0; } +/** + * nd_pfn_validate - read and validate info-block + * @nd_pfn: fsdax namespace runtime state / properties + * @sig: 'devdax' or 'fsdax' signature + * + * Upon return the info-block buffer contents (->pfn_sb) are + * indeterminate when validation fails, and a coherent info-block + * otherwise. + */ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) { u64 checksum, offset; @@ -557,7 +566,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) nvdimm_bus_unlock(&ndns->dev); if (!pfn_dev) return -ENOMEM; - pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); + pfn_sb = devm_kmalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); nd_pfn = to_nd_pfn(pfn_dev); nd_pfn->pfn_sb = pfn_sb; rc = nd_pfn_validate(nd_pfn, PFN_SIG); @@ -578,14 +587,14 @@ static u32 info_block_reserve(void) } /* - * We hotplug memory at section granularity, pad the reserved area from - * the previous section base to the namespace base address. + * We hotplug memory at sub-section granularity, pad the reserved area + * from the previous section base to the namespace base address. */ static unsigned long init_altmap_base(resource_size_t base) { unsigned long base_pfn = PHYS_PFN(base); - return PFN_SECTION_ALIGN_DOWN(base_pfn); + return SUBSECTION_ALIGN_DOWN(base_pfn); } static unsigned long init_altmap_reserve(resource_size_t base) @@ -593,7 +602,7 @@ static unsigned long init_altmap_reserve(resource_size_t base) unsigned long reserve = info_block_reserve() >> PAGE_SHIFT; unsigned long base_pfn = PHYS_PFN(base); - reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn); + reserve += base_pfn - SUBSECTION_ALIGN_DOWN(base_pfn); return reserve; } @@ -623,8 +632,7 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) return -EINVAL; nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); } else if (nd_pfn->mode == PFN_MODE_PMEM) { - nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res) - - offset) / PAGE_SIZE); + nd_pfn->npfns = PHYS_PFN((resource_size(res) - offset)); if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) dev_info(&nd_pfn->dev, "number of pfns truncated from %lld to %ld\n", @@ -640,60 +648,20 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) return 0; } -static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys) -{ - return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys), - ALIGN_DOWN(phys, nd_pfn->align)); -} - -/* - * Check if pmem collides with 'System RAM', or other regions when - * section aligned. Trim it accordingly. - */ -static void trim_pfn_device(struct nd_pfn *nd_pfn, u32 *start_pad, u32 *end_trunc) -{ - struct nd_namespace_common *ndns = nd_pfn->ndns; - struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); - struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent); - const resource_size_t start = nsio->res.start; - const resource_size_t end = start + resource_size(&nsio->res); - resource_size_t adjust, size; - - *start_pad = 0; - *end_trunc = 0; - - adjust = start - PHYS_SECTION_ALIGN_DOWN(start); - size = resource_size(&nsio->res) + adjust; - if (region_intersects(start - adjust, size, IORESOURCE_SYSTEM_RAM, - IORES_DESC_NONE) == REGION_MIXED - || nd_region_conflict(nd_region, start - adjust, size)) - *start_pad = PHYS_SECTION_ALIGN_UP(start) - start; - - /* Now check that end of the range does not collide. */ - adjust = PHYS_SECTION_ALIGN_UP(end) - end; - size = resource_size(&nsio->res) + adjust; - if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, - IORES_DESC_NONE) == REGION_MIXED - || !IS_ALIGNED(end, nd_pfn->align) - || nd_region_conflict(nd_region, start, size)) - *end_trunc = end - phys_pmem_align_down(nd_pfn, end); -} - static int nd_pfn_init(struct nd_pfn *nd_pfn) { struct nd_namespace_common *ndns = nd_pfn->ndns; struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); - u32 start_pad, end_trunc, reserve = info_block_reserve(); resource_size_t start, size; struct nd_region *nd_region; + unsigned long npfns, align; struct nd_pfn_sb *pfn_sb; - unsigned long npfns; phys_addr_t offset; const char *sig; u64 checksum; int rc; - pfn_sb = devm_kzalloc(&nd_pfn->dev, sizeof(*pfn_sb), GFP_KERNEL); + pfn_sb = devm_kmalloc(&nd_pfn->dev, sizeof(*pfn_sb), GFP_KERNEL); if (!pfn_sb) return -ENOMEM; @@ -702,11 +670,14 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) sig = DAX_SIG; else sig = PFN_SIG; + rc = nd_pfn_validate(nd_pfn, sig); if (rc != -ENODEV) return rc; /* no info block, do init */; + memset(pfn_sb, 0, sizeof(*pfn_sb)); + nd_region = to_nd_region(nd_pfn->dev.parent); if (nd_region->ro) { dev_info(&nd_pfn->dev, @@ -715,43 +686,35 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) return -ENXIO; } - memset(pfn_sb, 0, sizeof(*pfn_sb)); - - trim_pfn_device(nd_pfn, &start_pad, &end_trunc); - if (start_pad + end_trunc) - dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n", - dev_name(&ndns->dev), start_pad + end_trunc); - /* * Note, we use 64 here for the standard size of struct page, * debugging options may cause it to be larger in which case the * implementation will limit the pfns advertised through * ->direct_access() to those that are included in the memmap. */ - start = nsio->res.start + start_pad; + start = nsio->res.start; size = resource_size(&nsio->res); - npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - reserve) - / PAGE_SIZE); + npfns = PHYS_PFN(size - SZ_8K); + align = max(nd_pfn->align, (1UL << SUBSECTION_SHIFT)); if (nd_pfn->mode == PFN_MODE_PMEM) { /* * The altmap should be padded out to the block size used * when populating the vmemmap. This *should* be equal to * PMD_SIZE for most architectures. */ - offset = ALIGN(start + reserve + 64 * npfns, - max(nd_pfn->align, PMD_SIZE)) - start; + offset = ALIGN(start + SZ_8K + 64 * npfns, align) - start; } else if (nd_pfn->mode == PFN_MODE_RAM) - offset = ALIGN(start + reserve, nd_pfn->align) - start; + offset = ALIGN(start + SZ_8K, align) - start; else return -ENXIO; - if (offset + start_pad + end_trunc >= size) { + if (offset >= size) { dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n", dev_name(&ndns->dev)); return -ENXIO; } - npfns = (size - offset - start_pad - end_trunc) / SZ_4K; + npfns = PHYS_PFN(size - offset); pfn_sb->mode = cpu_to_le32(nd_pfn->mode); pfn_sb->dataoff = cpu_to_le64(offset); pfn_sb->npfns = cpu_to_le64(npfns); @@ -759,9 +722,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); pfn_sb->version_major = cpu_to_le16(1); - pfn_sb->version_minor = cpu_to_le16(2); - pfn_sb->start_pad = cpu_to_le32(start_pad); - pfn_sb->end_trunc = cpu_to_le32(end_trunc); + pfn_sb->version_minor = cpu_to_le16(3); pfn_sb->align = cpu_to_le32(nd_pfn->align); checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); pfn_sb->checksum = cpu_to_le64(checksum); diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index fde8d4073e74103acd33a5c84db79fa36857fa3b..4c49f53afa3ed3ca66c04ecd3f60a07d1dde0274 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -855,8 +855,6 @@ void tty_ldisc_deinit(struct tty_struct *tty) tty->ldisc = NULL; } -static int zero; -static int one = 1; static struct ctl_table tty_table[] = { { .procname = "ldisc_autoload", @@ -864,8 +862,8 @@ static struct ctl_table tty_table[] = { .maxlen = sizeof(tty_ldisc_autoload), .mode = 0644, .proc_handler = proc_dointvec, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { } }; diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index d37dd5bb7a8fb73fb014466ff8a647ddf17fd8c0..37a36c6b9f937399dadd98f948d1b3c22e7cebfd 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -77,9 +77,6 @@ static int xen_hotplug_unpopulated; #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG -static int zero; -static int one = 1; - static struct ctl_table balloon_table[] = { { .procname = "hotplug_unpopulated", @@ -87,8 +84,8 @@ static struct ctl_table balloon_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { } }; diff --git a/fs/aio.c b/fs/aio.c index 8327db0c8e08392bef1a602b648f5d40c8572a83..8b3aa2739906589c8e4c5a8627378b883ac5f430 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -425,7 +425,7 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, BUG_ON(PageWriteback(old)); get_page(new); - rc = migrate_page_move_mapping(mapping, new, old, mode, 1); + rc = migrate_page_move_mapping(mapping, new, old, 1); if (rc != MIGRATEPAGE_SUCCESS) { put_page(new); goto out_unlock; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 0f9c073d78d5ed2fababaeea22054f6de2c2b833..d7f1f5011fac3997f55a36dd32409c5e8326a38e 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -291,7 +291,7 @@ static LIST_HEAD(tfile_check_list); #include <linux/sysctl.h> -static long zero; +static long long_zero; static long long_max = LONG_MAX; struct ctl_table epoll_table[] = { @@ -301,7 +301,7 @@ struct ctl_table epoll_table[] = { .maxlen = sizeof(max_user_watches), .mode = 0644, .proc_handler = proc_doulongvec_minmax, - .extra1 = &zero, + .extra1 = &long_zero, .extra2 = &long_max, }, { } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 4eb2f39201401e228a5698329200cf008cd8c485..abbf14e9bd725fd5ee975a9b59b680f31a08df34 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2919,7 +2919,7 @@ int f2fs_migrate_page(struct address_space *mapping, /* one extra reference was held for atomic_write page */ extra_count = atomic_written ? 1 : 0; rc = migrate_page_move_mapping(mapping, newpage, - page, mode, extra_count); + page, extra_count); if (rc != MIGRATEPAGE_SUCCESS) { if (atomic_written) mutex_unlock(&fi->inmem_lock); diff --git a/fs/iomap.c b/fs/iomap.c index 217c3e5a13d6c435b02284c95cb67a623dd7a6a3..3e7f16a0565341551f3c95cf0e09f88d264bb5d0 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -566,7 +566,7 @@ iomap_migrate_page(struct address_space *mapping, struct page *newpage, { int ret; - ret = migrate_page_move_mapping(mapping, newpage, page, mode, 0); + ret = migrate_page_move_mapping(mapping, newpage, page, 0); if (ret != MIGRATEPAGE_SUCCESS) return ret; diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index cce8de32779fb4d125d347058b98278eb22ac175..0b815178126e51571550e893cc268b9141a156bd 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -45,8 +45,6 @@ struct kmem_cache *inotify_inode_mark_cachep __read_mostly; #include <linux/sysctl.h> -static int zero; - struct ctl_table inotify_table[] = { { .procname = "max_user_instances", @@ -54,7 +52,7 @@ struct ctl_table inotify_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, { .procname = "max_user_watches", @@ -62,7 +60,7 @@ struct ctl_table inotify_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, { .procname = "max_queued_events", @@ -70,7 +68,7 @@ struct ctl_table inotify_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero + .extra1 = SYSCTL_ZERO }, { } }; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 36ad1b0d625975383f48d469a0e278efb96c1dfb..d80989b6c3448db78c06e94c5e2323f261b7f4e4 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -22,6 +22,10 @@ static const struct inode_operations proc_sys_inode_operations; static const struct file_operations proc_sys_dir_file_operations; static const struct inode_operations proc_sys_dir_operations; +/* shared constants to be used in various sysctls */ +const int sysctl_vals[] = { 0, 1, INT_MAX }; +EXPORT_SYMBOL(sysctl_vals); + /* Support for permanently empty directories */ struct ctl_table sysctl_mount_point[] = { diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 818cedbed95fec30f236a914deac04b6c85eb394..731642e0f5a0fb42c13873bf3abfc21cedb67dd9 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -832,7 +832,8 @@ static int show_smap(struct seq_file *m, void *v) __show_smap(m, &mss, false); - seq_printf(m, "THPeligible: %d\n", transparent_hugepage_enabled(vma)); + seq_printf(m, "THPeligible: %d\n", + transparent_hugepage_enabled(vma)); if (arch_pkeys_enabled()) seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma)); diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index e5f8de62fc51d5836f62da9a4a02a22f98f1552d..400970d740bb8b628b979f515711df963bf85448 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1470,7 +1470,7 @@ static int ubifs_migrate_page(struct address_space *mapping, { int rc; - rc = migrate_page_move_mapping(mapping, newpage, page, mode, 0); + rc = migrate_page_move_mapping(mapping, newpage, page, 0); if (rc != MIGRATEPAGE_SUCCESS) return rc; diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 7cd5c150c21d9ebed9aeba2c0306a7d88b4233a7..45ede62aa85be47370719dae4004c88ec1796e06 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -121,6 +121,23 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) bool transparent_hugepage_enabled(struct vm_area_struct *vma); +#define HPAGE_CACHE_INDEX_MASK (HPAGE_PMD_NR - 1) + +static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, + unsigned long haddr) +{ + /* Don't have to check pgoff for anonymous vma */ + if (!vma_is_anonymous(vma)) { + if (((vma->vm_start >> PAGE_SHIFT) & HPAGE_CACHE_INDEX_MASK) != + (vma->vm_pgoff & HPAGE_CACHE_INDEX_MASK)) + return false; + } + + if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) + return false; + return true; +} + #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG)) @@ -271,6 +288,12 @@ static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma) return false; } +static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, + unsigned long haddr) +{ + return false; +} + static inline void prep_transhuge_page(struct page *page) {} #define transparent_hugepage_flags 0UL diff --git a/include/linux/memory.h b/include/linux/memory.h index e1dc1bb2b787756d833266697161edfb69ceb7cb..02e633f3ede05e83bde2ed499462661f08b9c7b5 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -111,16 +111,15 @@ extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); extern int register_memory_isolate_notifier(struct notifier_block *nb); extern void unregister_memory_isolate_notifier(struct notifier_block *nb); -int hotplug_memory_register(int nid, struct mem_section *section); -#ifdef CONFIG_MEMORY_HOTREMOVE -extern void unregister_memory_section(struct mem_section *); -#endif +int create_memory_block_devices(unsigned long start, unsigned long size); +void remove_memory_block_devices(unsigned long start, unsigned long size); extern int memory_dev_init(void); extern int memory_notify(unsigned long val, void *v); extern int memory_isolate_notify(unsigned long val, void *v); -extern struct memory_block *find_memory_block_hinted(struct mem_section *, - struct memory_block *); extern struct memory_block *find_memory_block(struct mem_section *); +typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *); +extern int walk_memory_blocks(unsigned long start, unsigned long size, + void *arg, walk_memory_blocks_func_t func); #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT) #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 988fde33cd7f549db917a36ffdd7110c3f61e476..f46ea71b4ffde05acb3a7e7bce3a77ba74b61939 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -123,20 +123,10 @@ static inline bool movable_node_is_enabled(void) return movable_node_enabled; } -#ifdef CONFIG_MEMORY_HOTREMOVE extern void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap); extern void __remove_pages(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); -#endif /* CONFIG_MEMORY_HOTREMOVE */ - -/* - * Do we want sysfs memblock files created. This will allow userspace to online - * and offline memory explicitly. Lack of this bit means that the caller has to - * call move_pfn_range_to_zone to finish the initialization. - */ - -#define MHP_MEMBLOCK_API (1<<0) /* reasonably generic interface to expand the physical pages */ extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, @@ -350,17 +340,16 @@ static inline void __remove_memory(int nid, u64 start, u64 size) {} #endif /* CONFIG_MEMORY_HOTREMOVE */ extern void __ref free_area_init_core_hotplug(int nid); -extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, - void *arg, int (*func)(struct memory_block *, void *)); extern int __add_memory(int nid, u64 start, u64 size); extern int add_memory(int nid, u64 start, u64 size); extern int add_memory_resource(int nid, struct resource *resource); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); extern bool is_memblock_offlined(struct memory_block *mem); -extern int sparse_add_one_section(int nid, unsigned long start_pfn, - struct vmem_altmap *altmap); -extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, +extern int sparse_add_section(int nid, unsigned long pfn, + unsigned long nr_pages, struct vmem_altmap *altmap); +extern void sparse_remove_section(struct mem_section *ms, + unsigned long pfn, unsigned long nr_pages, unsigned long map_offset, struct vmem_altmap *altmap); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); diff --git a/include/linux/migrate.h b/include/linux/migrate.h index e13d9bf2f9a5ac29ad685ce4ebf18e59d62b854e..7f04754c7f2b8e2df34d6f7cc76ed8421976fdde 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -77,8 +77,7 @@ extern void migrate_page_copy(struct page *newpage, struct page *page); extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); extern int migrate_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page, enum migrate_mode mode, - int extra_count); + struct page *newpage, struct page *page, int extra_count); #else static inline void putback_movable_pages(struct list_head *l) {} diff --git a/include/linux/mm.h b/include/linux/mm.h index bd6512559bed3f828cf8a766a0a08755376b174a..0334ca97c584d8dc85e2038399bf1dfb2432c557 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -541,6 +541,23 @@ static inline void vma_set_anonymous(struct vm_area_struct *vma) vma->vm_ops = NULL; } +static inline bool vma_is_anonymous(struct vm_area_struct *vma) +{ + return !vma->vm_ops; +} + +#ifdef CONFIG_SHMEM +/* + * The vma_is_shmem is not inline because it is used only by slow + * paths in userfault. + */ +bool vma_is_shmem(struct vm_area_struct *vma); +#else +static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; } +#endif + +int vma_is_stack_for_current(struct vm_area_struct *vma); + /* flush_tlb_range() takes a vma, not a mm, and can care about flags */ #define TLB_FLUSH_VMA(mm,flags) { .vm_mm = (mm), .vm_flags = (flags) } @@ -1620,23 +1637,6 @@ int clear_page_dirty_for_io(struct page *page); int get_cmdline(struct task_struct *task, char *buffer, int buflen); -static inline bool vma_is_anonymous(struct vm_area_struct *vma) -{ - return !vma->vm_ops; -} - -#ifdef CONFIG_SHMEM -/* - * The vma_is_shmem is not inline because it is used only by slow - * paths in userfault. - */ -bool vma_is_shmem(struct vm_area_struct *vma); -#else -static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; } -#endif - -int vma_is_stack_for_current(struct vm_area_struct *vma); - extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len, @@ -2767,8 +2767,8 @@ static inline void print_vma_addr(char *prefix, unsigned long rip) #endif void *sparse_buffer_alloc(unsigned long size); -struct page *sparse_mem_map_populate(unsigned long pnum, int nid, - struct vmem_altmap *altmap); +struct page * __populate_section_memmap(unsigned long pfn, + unsigned long nr_pages, int nid, struct vmem_altmap *altmap); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 70394cabaf4e0fe6efc3c18701796bb04c71168b..d77d717c620cbe3362b5787086b546f08b938f46 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -855,18 +855,6 @@ static inline int local_memory_node(int node_id) { return node_id; }; */ #define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones) -#ifdef CONFIG_ZONE_DEVICE -static inline bool is_dev_zone(const struct zone *zone) -{ - return zone_idx(zone) == ZONE_DEVICE; -} -#else -static inline bool is_dev_zone(const struct zone *zone) -{ - return false; -} -#endif - /* * Returns true if a zone has pages managed by the buddy allocator. * All the reclaim decisions have to use this function rather than @@ -1160,6 +1148,29 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec) #define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK) #define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK) +#define SUBSECTION_SHIFT 21 + +#define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT) +#define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT) +#define PAGE_SUBSECTION_MASK (~(PAGES_PER_SUBSECTION-1)) + +#if SUBSECTION_SHIFT > SECTION_SIZE_BITS +#error Subsection size exceeds section size +#else +#define SUBSECTIONS_PER_SECTION (1UL << (SECTION_SIZE_BITS - SUBSECTION_SHIFT)) +#endif + +#define SUBSECTION_ALIGN_UP(pfn) ALIGN((pfn), PAGES_PER_SUBSECTION) +#define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK) + +struct mem_section_usage { + DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION); + /* See declaration of similar field in struct zone */ + unsigned long pageblock_flags[0]; +}; + +void subsection_map_init(unsigned long pfn, unsigned long nr_pages); + struct page; struct page_ext; struct mem_section { @@ -1177,8 +1188,7 @@ struct mem_section { */ unsigned long section_mem_map; - /* See declaration of similar field in struct zone */ - unsigned long *pageblock_flags; + struct mem_section_usage *usage; #ifdef CONFIG_PAGE_EXTENSION /* * If SPARSEMEM, pgdat doesn't have page_ext pointer. We use @@ -1209,6 +1219,11 @@ extern struct mem_section **mem_section; extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; #endif +static inline unsigned long *section_to_usemap(struct mem_section *ms) +{ + return ms->usage->pageblock_flags; +} + static inline struct mem_section *__nr_to_section(unsigned long nr) { #ifdef CONFIG_SPARSEMEM_EXTREME @@ -1219,8 +1234,8 @@ static inline struct mem_section *__nr_to_section(unsigned long nr) return NULL; return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; } -extern int __section_nr(struct mem_section* ms); -extern unsigned long usemap_size(void); +extern unsigned long __section_nr(struct mem_section *ms); +extern size_t mem_section_usage_size(void); /* * We use the lower bits of the mem_map pointer to store @@ -1238,7 +1253,8 @@ extern unsigned long usemap_size(void); #define SECTION_MARKED_PRESENT (1UL<<0) #define SECTION_HAS_MEM_MAP (1UL<<1) #define SECTION_IS_ONLINE (1UL<<2) -#define SECTION_MAP_LAST_BIT (1UL<<3) +#define SECTION_IS_EARLY (1UL<<3) +#define SECTION_MAP_LAST_BIT (1UL<<4) #define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1)) #define SECTION_NID_SHIFT 3 @@ -1264,6 +1280,11 @@ static inline int valid_section(struct mem_section *section) return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP)); } +static inline int early_section(struct mem_section *section) +{ + return (section && (section->section_mem_map & SECTION_IS_EARLY)); +} + static inline int valid_section_nr(unsigned long nr) { return valid_section(__nr_to_section(nr)); @@ -1291,14 +1312,42 @@ static inline struct mem_section *__pfn_to_section(unsigned long pfn) return __nr_to_section(pfn_to_section_nr(pfn)); } -extern int __highest_present_section_nr; +extern unsigned long __highest_present_section_nr; + +static inline int subsection_map_index(unsigned long pfn) +{ + return (pfn & ~(PAGE_SECTION_MASK)) / PAGES_PER_SUBSECTION; +} + +#ifdef CONFIG_SPARSEMEM_VMEMMAP +static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) +{ + int idx = subsection_map_index(pfn); + + return test_bit(idx, ms->usage->subsection_map); +} +#else +static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) +{ + return 1; +} +#endif #ifndef CONFIG_HAVE_ARCH_PFN_VALID static inline int pfn_valid(unsigned long pfn) { + struct mem_section *ms; + if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; - return valid_section(__nr_to_section(pfn_to_section_nr(pfn))); + ms = __nr_to_section(pfn_to_section_nr(pfn)); + if (!valid_section(ms)) + return 0; + /* + * Traditionally early sections always returned pfn_valid() for + * the entire section-sized span. + */ + return early_section(ms) || pfn_section_valid(ms, pfn); } #endif @@ -1330,6 +1379,7 @@ void sparse_init(void); #define sparse_init() do {} while (0) #define sparse_index_init(_sec, _nid) do {} while (0) #define pfn_present pfn_valid +#define subsection_map_init(_pfn, _nr_pages) do {} while (0) #endif /* CONFIG_SPARSEMEM */ /* diff --git a/include/linux/node.h b/include/linux/node.h index 1a557c589ecb9772a63214e2c5e2c39f1211a95e..4866f32a02d8d4a00d68cdd43fdf3839819c68e7 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -137,10 +137,7 @@ static inline int register_one_node(int nid) extern void unregister_one_node(int nid); extern int register_cpu_under_node(unsigned int cpu, unsigned int nid); extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); -extern int register_mem_sect_under_node(struct memory_block *mem_blk, - void *arg); -extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, - unsigned long phys_index); +extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk); extern int register_memory_node_under_compute_node(unsigned int mem_nid, unsigned int cpu_nid, @@ -171,15 +168,8 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) { return 0; } -static inline int register_mem_sect_under_node(struct memory_block *mem_blk, - void *arg) +static inline void unregister_memory_block_under_nodes(struct memory_block *mem_blk) { - return 0; -} -static inline int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, - unsigned long phys_index) -{ - return 0; } static inline void register_hugetlbfs_with_node(node_registration_func_t reg, diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index aadd310769d080f1d45db14b2a72fc8ad36f1196..6df477329b76e57dbcb4d1cf9e78ce2e97c0bb69 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -37,6 +37,13 @@ struct ctl_table_root; struct ctl_table_header; struct ctl_dir; +/* Keep the same order as in fs/proc/proc_sysctl.c */ +#define SYSCTL_ZERO ((void *)&sysctl_vals[0]) +#define SYSCTL_ONE ((void *)&sysctl_vals[1]) +#define SYSCTL_INT_MAX ((void *)&sysctl_vals[2]) + +extern const int sysctl_vals[]; + typedef int proc_handler (struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 2b14ce8ce73fa08f694e166c1ceb98ebeb946d44..affd66537e87550a3fb88c144957506f9140dcd1 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -113,9 +113,6 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, #define proc_ipc_sem_dointvec NULL #endif -static int zero; -static int one = 1; -static int int_max = INT_MAX; int ipc_mni = IPCMNI; int ipc_mni_shift = IPCMNI_SHIFT; int ipc_min_cycle = RADIX_TREE_MAP_SIZE; @@ -141,7 +138,7 @@ static struct ctl_table ipc_kern_table[] = { .maxlen = sizeof(init_ipc_ns.shm_ctlmni), .mode = 0644, .proc_handler = proc_ipc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &ipc_mni, }, { @@ -150,8 +147,8 @@ static struct ctl_table ipc_kern_table[] = { .maxlen = sizeof(init_ipc_ns.shm_rmid_forced), .mode = 0644, .proc_handler = proc_ipc_dointvec_minmax_orphans, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "msgmax", @@ -159,8 +156,8 @@ static struct ctl_table ipc_kern_table[] = { .maxlen = sizeof(init_ipc_ns.msg_ctlmax), .mode = 0644, .proc_handler = proc_ipc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &int_max, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "msgmni", @@ -168,7 +165,7 @@ static struct ctl_table ipc_kern_table[] = { .maxlen = sizeof(init_ipc_ns.msg_ctlmni), .mode = 0644, .proc_handler = proc_ipc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &ipc_mni, }, { @@ -177,8 +174,8 @@ static struct ctl_table ipc_kern_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_ipc_auto_msgmni, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "msgmnb", @@ -186,8 +183,8 @@ static struct ctl_table ipc_kern_table[] = { .maxlen = sizeof(init_ipc_ns.msg_ctlmnb), .mode = 0644, .proc_handler = proc_ipc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &int_max, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "sem", @@ -203,8 +200,8 @@ static struct ctl_table ipc_kern_table[] = { .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), .mode = 0644, .proc_handler = proc_ipc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &int_max, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "msg_next_id", @@ -212,8 +209,8 @@ static struct ctl_table ipc_kern_table[] = { .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), .mode = 0644, .proc_handler = proc_ipc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &int_max, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "shm_next_id", @@ -221,8 +218,8 @@ static struct ctl_table ipc_kern_table[] = { .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), .mode = 0644, .proc_handler = proc_ipc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &int_max, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, }, #endif {} diff --git a/kernel/memremap.c b/kernel/memremap.c index bea6f887adad100f7419efcb71c589578c5f6a9e..6ee03a816d67a889729e834b8c614675ac4cf94c 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -54,7 +54,7 @@ static void pgmap_array_delete(struct resource *res) static unsigned long pfn_first(struct dev_pagemap *pgmap) { - return (pgmap->res.start >> PAGE_SHIFT) + + return PHYS_PFN(pgmap->res.start) + vmem_altmap_offset(pgmap_altmap(pgmap)); } @@ -98,7 +98,6 @@ static void devm_memremap_pages_release(void *data) struct dev_pagemap *pgmap = data; struct device *dev = pgmap->dev; struct resource *res = &pgmap->res; - resource_size_t align_start, align_size; unsigned long pfn; int nid; @@ -108,25 +107,21 @@ static void devm_memremap_pages_release(void *data) dev_pagemap_cleanup(pgmap); /* pages are dead and unused, undo the arch mapping */ - align_start = res->start & ~(SECTION_SIZE - 1); - align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) - - align_start; - - nid = page_to_nid(pfn_to_page(align_start >> PAGE_SHIFT)); + nid = page_to_nid(pfn_to_page(PHYS_PFN(res->start))); mem_hotplug_begin(); if (pgmap->type == MEMORY_DEVICE_PRIVATE) { - pfn = align_start >> PAGE_SHIFT; + pfn = PHYS_PFN(res->start); __remove_pages(page_zone(pfn_to_page(pfn)), pfn, - align_size >> PAGE_SHIFT, NULL); + PHYS_PFN(resource_size(res)), NULL); } else { - arch_remove_memory(nid, align_start, align_size, + arch_remove_memory(nid, res->start, resource_size(res), pgmap_altmap(pgmap)); - kasan_remove_zero_shadow(__va(align_start), align_size); + kasan_remove_zero_shadow(__va(res->start), resource_size(res)); } mem_hotplug_done(); - untrack_pfn(NULL, PHYS_PFN(align_start), align_size); + untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res)); pgmap_array_delete(res); dev_WARN_ONCE(dev, pgmap->altmap.alloc, "%s: failed to free all reserved pages\n", __func__); @@ -162,13 +157,12 @@ static void dev_pagemap_percpu_release(struct percpu_ref *ref) */ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) { - resource_size_t align_start, align_size, align_end; struct resource *res = &pgmap->res; struct dev_pagemap *conflict_pgmap; struct mhp_restrictions restrictions = { /* * We do not want any optional features only our own memmap - */ + */ .altmap = pgmap_altmap(pgmap), }; pgprot_t pgprot = PAGE_KERNEL; @@ -225,12 +219,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) return ERR_PTR(error); } - align_start = res->start & ~(SECTION_SIZE - 1); - align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) - - align_start; - align_end = align_start + align_size - 1; - - conflict_pgmap = get_dev_pagemap(PHYS_PFN(align_start), NULL); + conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->start), NULL); if (conflict_pgmap) { dev_WARN(dev, "Conflicting mapping in same section\n"); put_dev_pagemap(conflict_pgmap); @@ -238,7 +227,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) goto err_array; } - conflict_pgmap = get_dev_pagemap(PHYS_PFN(align_end), NULL); + conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->end), NULL); if (conflict_pgmap) { dev_WARN(dev, "Conflicting mapping in same section\n"); put_dev_pagemap(conflict_pgmap); @@ -246,7 +235,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) goto err_array; } - is_ram = region_intersects(align_start, align_size, + is_ram = region_intersects(res->start, resource_size(res), IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE); if (is_ram != REGION_DISJOINT) { @@ -267,8 +256,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) if (nid < 0) nid = numa_mem_id(); - error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(align_start), 0, - align_size); + error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(res->start), 0, + resource_size(res)); if (error) goto err_pfn_remap; @@ -286,16 +275,16 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) * arch_add_memory(). */ if (pgmap->type == MEMORY_DEVICE_PRIVATE) { - error = add_pages(nid, align_start >> PAGE_SHIFT, - align_size >> PAGE_SHIFT, &restrictions); + error = add_pages(nid, PHYS_PFN(res->start), + PHYS_PFN(resource_size(res)), &restrictions); } else { - error = kasan_add_zero_shadow(__va(align_start), align_size); + error = kasan_add_zero_shadow(__va(res->start), resource_size(res)); if (error) { mem_hotplug_done(); goto err_kasan; } - error = arch_add_memory(nid, align_start, align_size, + error = arch_add_memory(nid, res->start, resource_size(res), &restrictions); } @@ -303,8 +292,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) struct zone *zone; zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE]; - move_pfn_range_to_zone(zone, align_start >> PAGE_SHIFT, - align_size >> PAGE_SHIFT, pgmap_altmap(pgmap)); + move_pfn_range_to_zone(zone, PHYS_PFN(res->start), + PHYS_PFN(resource_size(res)), restrictions.altmap); } mem_hotplug_done(); @@ -316,8 +305,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) * to allow us to do the work while not holding the hotplug lock. */ memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], - align_start >> PAGE_SHIFT, - align_size >> PAGE_SHIFT, pgmap); + PHYS_PFN(res->start), + PHYS_PFN(resource_size(res)), pgmap); percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap)); error = devm_add_action_or_reset(dev, devm_memremap_pages_release, @@ -328,9 +317,9 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) return __va(res->start); err_add_memory: - kasan_remove_zero_shadow(__va(align_start), align_size); + kasan_remove_zero_shadow(__va(res->start), resource_size(res)); err_kasan: - untrack_pfn(NULL, PHYS_PFN(align_start), align_size); + untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res)); err_pfn_remap: pgmap_array_delete(res); err_array: diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 6d726cef241cc82a252090ad70bde2c4af0cff94..a6a79f85c81a8972d1c9c3674aea9aeff691f192 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -291,14 +291,13 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write, } extern int pid_max; -static int zero = 0; static struct ctl_table pid_ns_ctl_table[] = { { .procname = "ns_last_pid", .maxlen = sizeof(int), .mode = 0666, /* permissions are checked in the handler */ .proc_handler = pid_ns_ctl_handler, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &pid_max, }, { } diff --git a/kernel/resource.c b/kernel/resource.c index d22423e85cf801ba358581f08390041c0929a2cf..7ea4306503c5787e840b87fe7b1fad0d3c64b8a8 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -326,7 +326,7 @@ EXPORT_SYMBOL(release_resource); * * If a resource is found, returns 0 and @*res is overwritten with the part * of the resource that's within [@start..@end]; if none is found, returns - * -1 or -EINVAL for other invalid parameters. + * -ENODEV. Returns -EINVAL for invalid parameters. * * This function walks the whole tree and not just first level children * unless @first_lvl is true. @@ -342,6 +342,7 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end, unsigned long flags, unsigned long desc, bool first_lvl, struct resource *res) { + bool siblings_only = true; struct resource *p; if (!res) @@ -352,29 +353,43 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end, read_lock(&resource_lock); - for (p = iomem_resource.child; p; p = next_resource(p, first_lvl)) { - if ((p->flags & flags) != flags) - continue; - if ((desc != IORES_DESC_NONE) && (desc != p->desc)) - continue; + for (p = iomem_resource.child; p; p = next_resource(p, siblings_only)) { + /* If we passed the resource we are looking for, stop */ if (p->start > end) { p = NULL; break; } - if ((p->end >= start) && (p->start <= end)) - break; + + /* Skip until we find a range that matches what we look for */ + if (p->end < start) + continue; + + /* + * Now that we found a range that matches what we look for, + * check the flags and the descriptor. If we were not asked to + * use only the first level, start looking at children as well. + */ + siblings_only = first_lvl; + + if ((p->flags & flags) != flags) + continue; + if ((desc != IORES_DESC_NONE) && (desc != p->desc)) + continue; + + /* Found a match, break */ + break; + } + + if (p) { + /* copy data */ + res->start = max(start, p->start); + res->end = min(end, p->end); + res->flags = p->flags; + res->desc = p->desc; } read_unlock(&resource_lock); - if (!p) - return -1; - - /* copy data */ - res->start = max(start, p->start); - res->end = min(end, p->end); - res->flags = p->flags; - res->desc = p->desc; - return 0; + return p ? 0 : -ENODEV; } static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 43186ccfa139662417e5bbaba78915cb6b121b6c..078950d9605ba2f6c109c5837f52b1a6cc07d186 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -125,9 +125,6 @@ static int sixty = 60; #endif static int __maybe_unused neg_one = -1; - -static int zero; -static int __maybe_unused one = 1; static int __maybe_unused two = 2; static int __maybe_unused four = 4; static unsigned long zero_ul; @@ -385,8 +382,8 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = sysctl_schedstats, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif /* CONFIG_SCHEDSTATS */ #endif /* CONFIG_SMP */ @@ -418,7 +415,7 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = SYSCTL_ONE, }, { .procname = "numa_balancing", @@ -426,8 +423,8 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = sysctl_numa_balancing, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_SCHED_DEBUG */ @@ -475,8 +472,8 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif #ifdef CONFIG_CFS_BANDWIDTH @@ -486,7 +483,7 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = SYSCTL_ONE, }, #endif #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) @@ -496,8 +493,8 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = sched_energy_aware_handler, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif #ifdef CONFIG_PROVE_LOCKING @@ -562,7 +559,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &neg_one, - .extra2 = &one, + .extra2 = SYSCTL_ONE, }, #endif #ifdef CONFIG_LATENCYTOP @@ -696,8 +693,8 @@ static struct ctl_table kern_table[] = { .mode = 0644, /* only handle a transition from default "0" to "1" */ .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &one, + .extra1 = SYSCTL_ONE, + .extra2 = SYSCTL_ONE, }, #endif #ifdef CONFIG_MODULES @@ -715,8 +712,8 @@ static struct ctl_table kern_table[] = { .mode = 0644, /* only handle a transition from default "0" to "1" */ .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &one, + .extra1 = SYSCTL_ONE, + .extra2 = SYSCTL_ONE, }, #endif #ifdef CONFIG_UEVENT_HELPER @@ -875,7 +872,7 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &ten_thousand, }, { @@ -891,8 +888,8 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax_sysadmin, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "kptr_restrict", @@ -900,7 +897,7 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax_sysadmin, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &two, }, #endif @@ -925,8 +922,8 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_watchdog, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "watchdog_thresh", @@ -934,7 +931,7 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_watchdog_thresh, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &sixty, }, { @@ -943,8 +940,8 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = NMI_WATCHDOG_SYSCTL_PERM, .proc_handler = proc_nmi_watchdog, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "watchdog_cpumask", @@ -960,8 +957,8 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_soft_watchdog, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "softlockup_panic", @@ -969,8 +966,8 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #ifdef CONFIG_SMP { @@ -979,8 +976,8 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif /* CONFIG_SMP */ #endif @@ -991,8 +988,8 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #ifdef CONFIG_SMP { @@ -1001,8 +998,8 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif /* CONFIG_SMP */ #endif @@ -1115,8 +1112,8 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "hung_task_check_count", @@ -1124,7 +1121,7 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, { .procname = "hung_task_timeout_secs", @@ -1201,7 +1198,7 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(sysctl_perf_event_sample_rate), .mode = 0644, .proc_handler = perf_proc_update_handler, - .extra1 = &one, + .extra1 = SYSCTL_ONE, }, { .procname = "perf_cpu_time_max_percent", @@ -1209,7 +1206,7 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(sysctl_perf_cpu_time_max_percent), .mode = 0644, .proc_handler = perf_cpu_time_max_percent_handler, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &one_hundred, }, { @@ -1218,7 +1215,7 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(sysctl_perf_event_max_stack), .mode = 0644, .proc_handler = perf_event_max_stack_handler, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &six_hundred_forty_kb, }, { @@ -1227,7 +1224,7 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(sysctl_perf_event_max_contexts_per_stack), .mode = 0644, .proc_handler = perf_event_max_stack_handler, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &one_thousand, }, #endif @@ -1237,8 +1234,8 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) { @@ -1247,8 +1244,8 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = timer_migration_handler, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif #ifdef CONFIG_BPF_SYSCALL @@ -1259,8 +1256,8 @@ static struct ctl_table kern_table[] = { .mode = 0644, /* only handle a transition from default "0" to "1" */ .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &one, + .extra1 = SYSCTL_ONE, + .extra2 = SYSCTL_ONE, }, { .procname = "bpf_stats_enabled", @@ -1277,8 +1274,8 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(sysctl_panic_on_rcu_stall), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE @@ -1288,8 +1285,8 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = 0600, .proc_handler = stack_erasing_sysctl, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif { } @@ -1302,7 +1299,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(sysctl_overcommit_memory), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &two, }, { @@ -1311,7 +1308,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(sysctl_panic_on_oom), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &two, }, { @@ -1348,7 +1345,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, { .procname = "dirty_background_ratio", @@ -1356,7 +1353,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(dirty_background_ratio), .mode = 0644, .proc_handler = dirty_background_ratio_handler, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &one_hundred, }, { @@ -1373,7 +1370,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(vm_dirty_ratio), .mode = 0644, .proc_handler = dirty_ratio_handler, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &one_hundred, }, { @@ -1397,7 +1394,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(dirty_expire_interval), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, { .procname = "dirtytime_expire_seconds", @@ -1405,7 +1402,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(dirtytime_expire_interval), .mode = 0644, .proc_handler = dirtytime_interval_handler, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, { .procname = "swappiness", @@ -1413,7 +1410,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(vm_swappiness), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &one_hundred, }, #ifdef CONFIG_HUGETLB_PAGE @@ -1438,8 +1435,8 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = sysctl_vm_numa_stat_handler, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif { @@ -1470,7 +1467,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = drop_caches_sysctl_handler, - .extra1 = &one, + .extra1 = SYSCTL_ONE, .extra2 = &four, }, #ifdef CONFIG_COMPACTION @@ -1496,8 +1493,8 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif /* CONFIG_COMPACTION */ @@ -1507,7 +1504,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(min_free_kbytes), .mode = 0644, .proc_handler = min_free_kbytes_sysctl_handler, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, { .procname = "watermark_boost_factor", @@ -1515,7 +1512,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(watermark_boost_factor), .mode = 0644, .proc_handler = watermark_boost_factor_sysctl_handler, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, { .procname = "watermark_scale_factor", @@ -1523,7 +1520,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(watermark_scale_factor), .mode = 0644, .proc_handler = watermark_scale_factor_sysctl_handler, - .extra1 = &one, + .extra1 = SYSCTL_ONE, .extra2 = &one_thousand, }, { @@ -1532,7 +1529,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(percpu_pagelist_fraction), .mode = 0644, .proc_handler = percpu_pagelist_fraction_sysctl_handler, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, #ifdef CONFIG_MMU { @@ -1541,7 +1538,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(sysctl_max_map_count), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, #else { @@ -1550,7 +1547,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(sysctl_nr_trim_pages), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, #endif { @@ -1566,7 +1563,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(block_dump), .mode = 0644, .proc_handler = proc_dointvec, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, { .procname = "vfs_cache_pressure", @@ -1574,7 +1571,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(sysctl_vfs_cache_pressure), .mode = 0644, .proc_handler = proc_dointvec, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT { @@ -1583,7 +1580,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(sysctl_legacy_va_layout), .mode = 0644, .proc_handler = proc_dointvec, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, #endif #ifdef CONFIG_NUMA @@ -1593,7 +1590,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(node_reclaim_mode), .mode = 0644, .proc_handler = proc_dointvec, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, { .procname = "min_unmapped_ratio", @@ -1601,7 +1598,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(sysctl_min_unmapped_ratio), .mode = 0644, .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &one_hundred, }, { @@ -1610,7 +1607,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(sysctl_min_slab_ratio), .mode = 0644, .proc_handler = sysctl_min_slab_ratio_sysctl_handler, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &one_hundred, }, #endif @@ -1661,7 +1658,7 @@ static struct ctl_table vm_table[] = { #endif .mode = 0644, .proc_handler = proc_dointvec, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, #endif #ifdef CONFIG_HIGHMEM @@ -1671,8 +1668,8 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(vm_highmem_is_dirtyable), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif #ifdef CONFIG_MEMORY_FAILURE @@ -1682,8 +1679,8 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(sysctl_memory_failure_early_kill), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "memory_failure_recovery", @@ -1691,8 +1688,8 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(sysctl_memory_failure_recovery), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif { @@ -1738,8 +1735,8 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(sysctl_unprivileged_userfaultfd), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif { } @@ -1875,8 +1872,8 @@ static struct ctl_table fs_table[] = { .maxlen = sizeof(int), .mode = 0600, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "protected_hardlinks", @@ -1884,8 +1881,8 @@ static struct ctl_table fs_table[] = { .maxlen = sizeof(int), .mode = 0600, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "protected_fifos", @@ -1893,7 +1890,7 @@ static struct ctl_table fs_table[] = { .maxlen = sizeof(int), .mode = 0600, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &two, }, { @@ -1902,7 +1899,7 @@ static struct ctl_table fs_table[] = { .maxlen = sizeof(int), .mode = 0600, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &two, }, { @@ -1911,7 +1908,7 @@ static struct ctl_table fs_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax_coredump, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &two, }, #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) @@ -1948,7 +1945,7 @@ static struct ctl_table fs_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = SYSCTL_ONE, }, { } }; @@ -1970,8 +1967,8 @@ static struct ctl_table debug_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_kprobes_optimization_handler, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif { } @@ -3395,8 +3392,8 @@ int proc_do_static_key(struct ctl_table *table, int write, .data = &val, .maxlen = sizeof(val), .mode = table->mode, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }; if (write && !capable(CAP_SYS_ADMIN)) diff --git a/kernel/ucount.c b/kernel/ucount.c index feb128c7b5d96954068f1ca9bb37cf5c96df4d79..a53cc2b4179c089c2f2e8e80de74741b9b3c7c26 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -52,16 +52,14 @@ static struct ctl_table_root set_root = { .permissions = set_permissions, }; -static int zero = 0; -static int int_max = INT_MAX; #define UCOUNT_ENTRY(name) \ { \ .procname = name, \ .maxlen = sizeof(int), \ .mode = 0644, \ .proc_handler = proc_dointvec_minmax, \ - .extra1 = &zero, \ - .extra2 = &int_max, \ + .extra1 = SYSCTL_ZERO, \ + .extra2 = SYSCTL_INT_MAX, \ } static struct ctl_table user_table[] = { UCOUNT_ENTRY("max_user_namespaces"), diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 885642c82aaa2f1423ad2fd74b59239d5effee94..1334ede667a82aa1d5701526836c7f54487e9996 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -63,10 +63,15 @@ struct page *huge_zero_page __read_mostly; bool transparent_hugepage_enabled(struct vm_area_struct *vma) { + /* The addr is used to check if the vma size fits */ + unsigned long addr = (vma->vm_end & HPAGE_PMD_MASK) - HPAGE_PMD_SIZE; + + if (!transhuge_vma_suitable(vma, addr)) + return false; if (vma_is_anonymous(vma)) return __transparent_hugepage_enabled(vma); - if (vma_is_shmem(vma) && shmem_huge_enabled(vma)) - return __transparent_hugepage_enabled(vma); + if (vma_is_shmem(vma)) + return shmem_huge_enabled(vma); return false; } @@ -689,7 +694,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) struct page *page; unsigned long haddr = vmf->address & HPAGE_PMD_MASK; - if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) + if (!transhuge_vma_suitable(vma, haddr)) return VM_FAULT_FALLBACK; if (unlikely(anon_vma_prepare(vma))) return VM_FAULT_OOM; diff --git a/mm/memory.c b/mm/memory.c index 89325f9c617381da6ec2a4a295c0da6c8af0dcb9..e2bb51b6242e5814896b79cce637008f73a073bb 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3162,19 +3162,6 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf) } #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE - -#define HPAGE_CACHE_INDEX_MASK (HPAGE_PMD_NR - 1) -static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, - unsigned long haddr) -{ - if (((vma->vm_start >> PAGE_SHIFT) & HPAGE_CACHE_INDEX_MASK) != - (vma->vm_pgoff & HPAGE_CACHE_INDEX_MASK)) - return false; - if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) - return false; - return true; -} - static void deposit_prealloc_pte(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 4ebe696138e8597d6a4538ef82c412028e8ec006..2a9bbddb0e55427125e90dde4795b6460cdbdae1 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -166,9 +166,10 @@ void put_page_bootmem(struct page *page) #ifndef CONFIG_SPARSEMEM_VMEMMAP static void register_page_bootmem_info_section(unsigned long start_pfn) { - unsigned long *usemap, mapsize, section_nr, i; + unsigned long mapsize, section_nr, i; struct mem_section *ms; struct page *page, *memmap; + struct mem_section_usage *usage; section_nr = pfn_to_section_nr(start_pfn); ms = __nr_to_section(section_nr); @@ -188,10 +189,10 @@ static void register_page_bootmem_info_section(unsigned long start_pfn) for (i = 0; i < mapsize; i++, page++) get_page_bootmem(section_nr, page, SECTION_INFO); - usemap = ms->pageblock_flags; - page = virt_to_page(usemap); + usage = ms->usage; + page = virt_to_page(usage); - mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT; + mapsize = PAGE_ALIGN(mem_section_usage_size()) >> PAGE_SHIFT; for (i = 0; i < mapsize; i++, page++) get_page_bootmem(section_nr, page, MIX_SECTION_INFO); @@ -200,9 +201,10 @@ static void register_page_bootmem_info_section(unsigned long start_pfn) #else /* CONFIG_SPARSEMEM_VMEMMAP */ static void register_page_bootmem_info_section(unsigned long start_pfn) { - unsigned long *usemap, mapsize, section_nr, i; + unsigned long mapsize, section_nr, i; struct mem_section *ms; struct page *page, *memmap; + struct mem_section_usage *usage; section_nr = pfn_to_section_nr(start_pfn); ms = __nr_to_section(section_nr); @@ -211,10 +213,10 @@ static void register_page_bootmem_info_section(unsigned long start_pfn) register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION); - usemap = ms->pageblock_flags; - page = virt_to_page(usemap); + usage = ms->usage; + page = virt_to_page(usage); - mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT; + mapsize = PAGE_ALIGN(mem_section_usage_size()) >> PAGE_SHIFT; for (i = 0; i < mapsize; i++, page++) get_page_bootmem(section_nr, page, MIX_SECTION_INFO); @@ -250,22 +252,31 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat) } #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */ -static int __meminit __add_section(int nid, unsigned long phys_start_pfn, - struct vmem_altmap *altmap, bool want_memblock) +static int check_pfn_span(unsigned long pfn, unsigned long nr_pages, + const char *reason) { - int ret; - - if (pfn_valid(phys_start_pfn)) - return -EEXIST; - - ret = sparse_add_one_section(nid, phys_start_pfn, altmap); - if (ret < 0) - return ret; - - if (!want_memblock) - return 0; - - return hotplug_memory_register(nid, __pfn_to_section(phys_start_pfn)); + /* + * Disallow all operations smaller than a sub-section and only + * allow operations smaller than a section for + * SPARSEMEM_VMEMMAP. Note that check_hotplug_memory_range() + * enforces a larger memory_block_size_bytes() granularity for + * memory that will be marked online, so this check should only + * fire for direct arch_{add,remove}_memory() users outside of + * add_memory_resource(). + */ + unsigned long min_align; + + if (IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) + min_align = PAGES_PER_SUBSECTION; + else + min_align = PAGES_PER_SECTION; + if (!IS_ALIGNED(pfn, min_align) + || !IS_ALIGNED(nr_pages, min_align)) { + WARN(1, "Misaligned __%s_pages start: %#lx end: #%lx\n", + reason, pfn, pfn + nr_pages - 1); + return -EINVAL; + } + return 0; } /* @@ -274,62 +285,54 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, * call this function after deciding the zone to which to * add the new pages. */ -int __ref __add_pages(int nid, unsigned long phys_start_pfn, - unsigned long nr_pages, struct mhp_restrictions *restrictions) +int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages, + struct mhp_restrictions *restrictions) { - unsigned long i; - int err = 0; - int start_sec, end_sec; + int err; + unsigned long nr, start_sec, end_sec; struct vmem_altmap *altmap = restrictions->altmap; - /* during initialize mem_map, align hot-added range to section */ - start_sec = pfn_to_section_nr(phys_start_pfn); - end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); - if (altmap) { /* * Validate altmap is within bounds of the total request */ - if (altmap->base_pfn != phys_start_pfn + if (altmap->base_pfn != pfn || vmem_altmap_offset(altmap) > nr_pages) { pr_warn_once("memory add fail, invalid altmap\n"); - err = -EINVAL; - goto out; + return -EINVAL; } altmap->alloc = 0; } - for (i = start_sec; i <= end_sec; i++) { - err = __add_section(nid, section_nr_to_pfn(i), altmap, - restrictions->flags & MHP_MEMBLOCK_API); + err = check_pfn_span(pfn, nr_pages, "add"); + if (err) + return err; - /* - * EEXIST is finally dealt with by ioresource collision - * check. see add_memory() => register_memory_resource() - * Warning will be printed if there is collision. - */ - if (err && (err != -EEXIST)) + start_sec = pfn_to_section_nr(pfn); + end_sec = pfn_to_section_nr(pfn + nr_pages - 1); + for (nr = start_sec; nr <= end_sec; nr++) { + unsigned long pfns; + + pfns = min(nr_pages, PAGES_PER_SECTION + - (pfn & ~PAGE_SECTION_MASK)); + err = sparse_add_section(nid, pfn, pfns, altmap); + if (err) break; - err = 0; + pfn += pfns; + nr_pages -= pfns; cond_resched(); } vmemmap_populate_print_last(); -out: return err; } -#ifdef CONFIG_MEMORY_HOTREMOVE /* find the smallest valid pfn in the range [start_pfn, end_pfn) */ static unsigned long find_smallest_section_pfn(int nid, struct zone *zone, unsigned long start_pfn, unsigned long end_pfn) { - struct mem_section *ms; - - for (; start_pfn < end_pfn; start_pfn += PAGES_PER_SECTION) { - ms = __pfn_to_section(start_pfn); - - if (unlikely(!valid_section(ms))) + for (; start_pfn < end_pfn; start_pfn += PAGES_PER_SUBSECTION) { + if (unlikely(!pfn_valid(start_pfn))) continue; if (unlikely(pfn_to_nid(start_pfn) != nid)) @@ -349,15 +352,12 @@ static unsigned long find_biggest_section_pfn(int nid, struct zone *zone, unsigned long start_pfn, unsigned long end_pfn) { - struct mem_section *ms; unsigned long pfn; /* pfn is the end pfn of a memory section. */ pfn = end_pfn - 1; - for (; pfn >= start_pfn; pfn -= PAGES_PER_SECTION) { - ms = __pfn_to_section(pfn); - - if (unlikely(!valid_section(ms))) + for (; pfn >= start_pfn; pfn -= PAGES_PER_SUBSECTION) { + if (unlikely(!pfn_valid(pfn))) continue; if (unlikely(pfn_to_nid(pfn) != nid)) @@ -379,7 +379,6 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn, unsigned long z = zone_end_pfn(zone); /* zone_end_pfn namespace clash */ unsigned long zone_end_pfn = z; unsigned long pfn; - struct mem_section *ms; int nid = zone_to_nid(zone); zone_span_writelock(zone); @@ -416,17 +415,15 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn, * it check the zone has only hole or not. */ pfn = zone_start_pfn; - for (; pfn < zone_end_pfn; pfn += PAGES_PER_SECTION) { - ms = __pfn_to_section(pfn); - - if (unlikely(!valid_section(ms))) + for (; pfn < zone_end_pfn; pfn += PAGES_PER_SUBSECTION) { + if (unlikely(!pfn_valid(pfn))) continue; if (page_zone(pfn_to_page(pfn)) != zone) continue; - /* If the section is current section, it continues the loop */ - if (start_pfn == pfn) + /* Skip range to be removed */ + if (pfn >= start_pfn && pfn < end_pfn) continue; /* If we find valid section, we have nothing to do */ @@ -447,7 +444,6 @@ static void shrink_pgdat_span(struct pglist_data *pgdat, unsigned long p = pgdat_end_pfn(pgdat); /* pgdat_end_pfn namespace clash */ unsigned long pgdat_end_pfn = p; unsigned long pfn; - struct mem_section *ms; int nid = pgdat->node_id; if (pgdat_start_pfn == start_pfn) { @@ -484,17 +480,15 @@ static void shrink_pgdat_span(struct pglist_data *pgdat, * has only hole or not. */ pfn = pgdat_start_pfn; - for (; pfn < pgdat_end_pfn; pfn += PAGES_PER_SECTION) { - ms = __pfn_to_section(pfn); - - if (unlikely(!valid_section(ms))) + for (; pfn < pgdat_end_pfn; pfn += PAGES_PER_SUBSECTION) { + if (unlikely(!pfn_valid(pfn))) continue; if (pfn_to_nid(pfn) != nid) continue; - /* If the section is current section, it continues the loop */ - if (start_pfn == pfn) + /* Skip range to be removed */ + if (pfn >= start_pfn && pfn < end_pfn) continue; /* If we find valid section, we have nothing to do */ @@ -506,10 +500,10 @@ static void shrink_pgdat_span(struct pglist_data *pgdat, pgdat->node_spanned_pages = 0; } -static void __remove_zone(struct zone *zone, unsigned long start_pfn) +static void __remove_zone(struct zone *zone, unsigned long start_pfn, + unsigned long nr_pages) { struct pglist_data *pgdat = zone->zone_pgdat; - int nr_pages = PAGES_PER_SECTION; unsigned long flags; pgdat_resize_lock(zone->zone_pgdat, &flags); @@ -518,29 +512,23 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn) pgdat_resize_unlock(zone->zone_pgdat, &flags); } -static void __remove_section(struct zone *zone, struct mem_section *ms, - unsigned long map_offset, - struct vmem_altmap *altmap) +static void __remove_section(struct zone *zone, unsigned long pfn, + unsigned long nr_pages, unsigned long map_offset, + struct vmem_altmap *altmap) { - unsigned long start_pfn; - int scn_nr; + struct mem_section *ms = __nr_to_section(pfn_to_section_nr(pfn)); if (WARN_ON_ONCE(!valid_section(ms))) return; - unregister_memory_section(ms); - - scn_nr = __section_nr(ms); - start_pfn = section_nr_to_pfn((unsigned long)scn_nr); - __remove_zone(zone, start_pfn); - - sparse_remove_one_section(zone, ms, map_offset, altmap); + __remove_zone(zone, pfn, nr_pages); + sparse_remove_section(ms, pfn, nr_pages, map_offset, altmap); } /** * __remove_pages() - remove sections of pages from a zone * @zone: zone from which pages need to be removed - * @phys_start_pfn: starting pageframe (must be aligned to start of a section) + * @pfn: starting pageframe (must be aligned to start of a section) * @nr_pages: number of pages to remove (must be multiple of section size) * @altmap: alternative device page map or %NULL if default memmap is used * @@ -549,38 +537,35 @@ static void __remove_section(struct zone *zone, struct mem_section *ms, * sure that pages are marked reserved and zones are adjust properly by * calling offline_pages(). */ -void __remove_pages(struct zone *zone, unsigned long phys_start_pfn, +void __remove_pages(struct zone *zone, unsigned long pfn, unsigned long nr_pages, struct vmem_altmap *altmap) { - unsigned long i; unsigned long map_offset = 0; - int sections_to_remove; + unsigned long nr, start_sec, end_sec; - /* In the ZONE_DEVICE case device driver owns the memory region */ - if (is_dev_zone(zone)) - map_offset = vmem_altmap_offset(altmap); + map_offset = vmem_altmap_offset(altmap); clear_zone_contiguous(zone); - /* - * We can only remove entire sections - */ - BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK); - BUG_ON(nr_pages % PAGES_PER_SECTION); + if (check_pfn_span(pfn, nr_pages, "remove")) + return; - sections_to_remove = nr_pages / PAGES_PER_SECTION; - for (i = 0; i < sections_to_remove; i++) { - unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; + start_sec = pfn_to_section_nr(pfn); + end_sec = pfn_to_section_nr(pfn + nr_pages - 1); + for (nr = start_sec; nr <= end_sec; nr++) { + unsigned long pfns; cond_resched(); - __remove_section(zone, __pfn_to_section(pfn), map_offset, - altmap); + pfns = min(nr_pages, PAGES_PER_SECTION + - (pfn & ~PAGE_SECTION_MASK)); + __remove_section(zone, pfn, pfns, map_offset, altmap); + pfn += pfns; + nr_pages -= pfns; map_offset = 0; } set_zone_contiguous(zone); } -#endif /* CONFIG_MEMORY_HOTREMOVE */ int set_online_page_callback(online_page_callback_t callback) { @@ -1049,16 +1034,11 @@ int try_online_node(int nid) static int check_hotplug_memory_range(u64 start, u64 size) { - unsigned long block_sz = memory_block_size_bytes(); - u64 block_nr_pages = block_sz >> PAGE_SHIFT; - u64 nr_pages = size >> PAGE_SHIFT; - u64 start_pfn = PFN_DOWN(start); - /* memory range must be block size aligned */ - if (!nr_pages || !IS_ALIGNED(start_pfn, block_nr_pages) || - !IS_ALIGNED(nr_pages, block_nr_pages)) { + if (!size || !IS_ALIGNED(start, memory_block_size_bytes()) || + !IS_ALIGNED(size, memory_block_size_bytes())) { pr_err("Block size [%#lx] unaligned hotplug range: start %#llx, size %#llx", - block_sz, start, size); + memory_block_size_bytes(), start, size); return -EINVAL; } @@ -1078,9 +1058,7 @@ static int online_memory_block(struct memory_block *mem, void *arg) */ int __ref add_memory_resource(int nid, struct resource *res) { - struct mhp_restrictions restrictions = { - .flags = MHP_MEMBLOCK_API, - }; + struct mhp_restrictions restrictions = {}; u64 start, size; bool new_node = false; int ret; @@ -1112,6 +1090,13 @@ int __ref add_memory_resource(int nid, struct resource *res) if (ret < 0) goto error; + /* create memory block devices after memory was added */ + ret = create_memory_block_devices(start, size); + if (ret) { + arch_remove_memory(nid, start, size, NULL); + goto error; + } + if (new_node) { /* If sysfs file of new node can't be created, cpu on the node * can't be hot-added. There is no rollback way now. @@ -1135,8 +1120,7 @@ int __ref add_memory_resource(int nid, struct resource *res) /* online pages if requested */ if (memhp_auto_online) - walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), - NULL, online_memory_block); + walk_memory_blocks(start, size, NULL, online_memory_block); return ret; error: @@ -1671,58 +1655,7 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages) { return __offline_pages(start_pfn, start_pfn + nr_pages); } -#endif /* CONFIG_MEMORY_HOTREMOVE */ -/** - * walk_memory_range - walks through all mem sections in [start_pfn, end_pfn) - * @start_pfn: start pfn of the memory range - * @end_pfn: end pfn of the memory range - * @arg: argument passed to func - * @func: callback for each memory section walked - * - * This function walks through all present mem sections in range - * [start_pfn, end_pfn) and call func on each mem section. - * - * Returns the return value of func. - */ -int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, - void *arg, int (*func)(struct memory_block *, void *)) -{ - struct memory_block *mem = NULL; - struct mem_section *section; - unsigned long pfn, section_nr; - int ret; - - for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { - section_nr = pfn_to_section_nr(pfn); - if (!present_section_nr(section_nr)) - continue; - - section = __nr_to_section(section_nr); - /* same memblock? */ - if (mem) - if ((section_nr >= mem->start_section_nr) && - (section_nr <= mem->end_section_nr)) - continue; - - mem = find_memory_block_hinted(section, mem); - if (!mem) - continue; - - ret = func(mem, arg); - if (ret) { - kobject_put(&mem->dev.kobj); - return ret; - } - } - - if (mem) - kobject_put(&mem->dev.kobj); - - return 0; -} - -#ifdef CONFIG_MEMORY_HOTREMOVE static int check_memblock_offlined_cb(struct memory_block *mem, void *arg) { int ret = !is_memblock_offlined(mem); @@ -1833,8 +1766,7 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) * whether all memory blocks in question are offline and return error * if this is not the case. */ - rc = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL, - check_memblock_offlined_cb); + rc = walk_memory_blocks(start, size, NULL, check_memblock_offlined_cb); if (rc) goto done; @@ -1843,6 +1775,9 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) memblock_free(start, size); memblock_remove(start, size); + /* remove memory block devices before removing memory */ + remove_memory_block_devices(start, size); + arch_remove_memory(nid, start, size, NULL); __release_memory_resource(start, size); diff --git a/mm/migrate.c b/mm/migrate.c index 3445747e229d0ba8566597973e2b67fab46bfc6d..8992741f10aad4bb1e56c4635ef84aac57176550 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -394,8 +394,7 @@ static int expected_page_refs(struct address_space *mapping, struct page *page) * 3 for pages with a mapping and PagePrivate/PagePrivate2 set. */ int migrate_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page, enum migrate_mode mode, - int extra_count) + struct page *newpage, struct page *page, int extra_count) { XA_STATE(xas, &mapping->i_pages, page_index(page)); struct zone *oldzone, *newzone; @@ -681,7 +680,7 @@ int migrate_page(struct address_space *mapping, BUG_ON(PageWriteback(page)); /* Writeback must be complete */ - rc = migrate_page_move_mapping(mapping, newpage, page, mode, 0); + rc = migrate_page_move_mapping(mapping, newpage, page, 0); if (rc != MIGRATEPAGE_SUCCESS) return rc; @@ -780,7 +779,7 @@ static int __buffer_migrate_page(struct address_space *mapping, } } - rc = migrate_page_move_mapping(mapping, newpage, page, mode, 0); + rc = migrate_page_move_mapping(mapping, newpage, page, 0); if (rc != MIGRATEPAGE_SUCCESS) goto unlock_buffers; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e515bfcf7f288fbea352cbafc7daf4f0f2320ca3..272c6de1bf4ead31c22aca202940da00bc8eaf8b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -450,7 +450,7 @@ static inline unsigned long *get_pageblock_bitmap(struct page *page, unsigned long pfn) { #ifdef CONFIG_SPARSEMEM - return __pfn_to_section(pfn)->pageblock_flags; + return section_to_usemap(__pfn_to_section(pfn)); #else return page_zone(page)->pageblock_flags; #endif /* CONFIG_SPARSEMEM */ @@ -5926,7 +5926,7 @@ void __ref memmap_init_zone_device(struct zone *zone, unsigned long start = jiffies; int nid = pgdat->node_id; - if (WARN_ON_ONCE(!pgmap || !is_dev_zone(zone))) + if (WARN_ON_ONCE(!pgmap || zone_idx(zone) != ZONE_DEVICE)) return; /* @@ -5974,7 +5974,7 @@ void __ref memmap_init_zone_device(struct zone *zone, * pfn out of zone. * * Please note that MEMMAP_HOTPLUG path doesn't clear memmap - * because this is done early in sparse_add_one_section + * because this is done early in section_activate() */ if (!(pfn & (pageblock_nr_pages - 1))) { set_pageblock_migratetype(page, MIGRATE_MOVABLE); @@ -7351,12 +7351,18 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) (u64)zone_movable_pfn[i] << PAGE_SHIFT); } - /* Print out the early node map */ + /* + * Print out the early node map, and initialize the + * subsection-map relative to active online memory ranges to + * enable future "sub-section" extensions of the memory map. + */ pr_info("Early memory node ranges\n"); - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { pr_info(" node %3d: [mem %#018Lx-%#018Lx]\n", nid, (u64)start_pfn << PAGE_SHIFT, ((u64)end_pfn << PAGE_SHIFT) - 1); + subsection_map_init(start_pfn, end_pfn - start_pfn); + } /* Initialise every node */ mminit_verify_pageflags_layout(); diff --git a/mm/shmem.c b/mm/shmem.c index 99497cb32e71aab00fcd566e6daaa47aaa0639dd..c88a30919ae5b220e66a39f027b55dfd80f6cf2b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3874,6 +3874,9 @@ bool shmem_huge_enabled(struct vm_area_struct *vma) loff_t i_size; pgoff_t off; + if ((vma->vm_flags & VM_NOHUGEPAGE) || + test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) + return false; if (shmem_huge == SHMEM_HUGE_FORCE) return true; if (shmem_huge == SHMEM_HUGE_DENY) diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 7fec057967966c27fdedf3ccffc082ce12eb7ef7..200aef686722675d73450977c6ce7f2c80210453 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -245,19 +245,26 @@ int __meminit vmemmap_populate_basepages(unsigned long start, return 0; } -struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid, - struct vmem_altmap *altmap) +struct page * __meminit __populate_section_memmap(unsigned long pfn, + unsigned long nr_pages, int nid, struct vmem_altmap *altmap) { unsigned long start; unsigned long end; - struct page *map; - map = pfn_to_page(pnum * PAGES_PER_SECTION); - start = (unsigned long)map; - end = (unsigned long)(map + PAGES_PER_SECTION); + /* + * The minimum granularity of memmap extensions is + * PAGES_PER_SUBSECTION as allocations are tracked in the + * 'subsection_map' bitmap of the section. + */ + end = ALIGN(pfn + nr_pages, PAGES_PER_SUBSECTION); + pfn &= PAGE_SUBSECTION_MASK; + nr_pages = end - pfn; + + start = (unsigned long) pfn_to_page(pfn); + end = start + nr_pages * sizeof(struct page); if (vmemmap_populate(start, end, nid, altmap)) return NULL; - return map; + return pfn_to_page(pfn); } diff --git a/mm/sparse.c b/mm/sparse.c index fd13166949b52c81e725fdda3b413516aa251fe3..72f010d9bff5014a47fd0ff5703e73ce5be1cf48 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -83,8 +83,15 @@ static int __meminit sparse_index_init(unsigned long section_nr, int nid) unsigned long root = SECTION_NR_TO_ROOT(section_nr); struct mem_section *section; + /* + * An existing section is possible in the sub-section hotplug + * case. First hot-add instantiates, follow-on hot-add reuses + * the existing section. + * + * The mem_hotplug_lock resolves the apparent race below. + */ if (mem_section[root]) - return -EEXIST; + return 0; section = sparse_index_alloc(nid); if (!section) @@ -102,7 +109,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid) #endif #ifdef CONFIG_SPARSEMEM_EXTREME -int __section_nr(struct mem_section* ms) +unsigned long __section_nr(struct mem_section *ms) { unsigned long root_nr; struct mem_section *root = NULL; @@ -121,9 +128,9 @@ int __section_nr(struct mem_section* ms) return (root_nr * SECTIONS_PER_ROOT) + (ms - root); } #else -int __section_nr(struct mem_section* ms) +unsigned long __section_nr(struct mem_section *ms) { - return (int)(ms - mem_section[0]); + return (unsigned long)(ms - mem_section[0]); } #endif @@ -178,10 +185,10 @@ void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn, * Keeping track of this gives us an easy way to break out of * those loops early. */ -int __highest_present_section_nr; +unsigned long __highest_present_section_nr; static void section_mark_present(struct mem_section *ms) { - int section_nr = __section_nr(ms); + unsigned long section_nr = __section_nr(ms); if (section_nr > __highest_present_section_nr) __highest_present_section_nr = section_nr; @@ -189,7 +196,7 @@ static void section_mark_present(struct mem_section *ms) ms->section_mem_map |= SECTION_MARKED_PRESENT; } -static inline int next_present_section_nr(int section_nr) +static inline unsigned long next_present_section_nr(unsigned long section_nr) { do { section_nr++; @@ -210,6 +217,41 @@ static inline unsigned long first_present_section_nr(void) return next_present_section_nr(-1); } +void subsection_mask_set(unsigned long *map, unsigned long pfn, + unsigned long nr_pages) +{ + int idx = subsection_map_index(pfn); + int end = subsection_map_index(pfn + nr_pages - 1); + + bitmap_set(map, idx, end - idx + 1); +} + +void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages) +{ + int end_sec = pfn_to_section_nr(pfn + nr_pages - 1); + unsigned long nr, start_sec = pfn_to_section_nr(pfn); + + if (!nr_pages) + return; + + for (nr = start_sec; nr <= end_sec; nr++) { + struct mem_section *ms; + unsigned long pfns; + + pfns = min(nr_pages, PAGES_PER_SECTION + - (pfn & ~PAGE_SECTION_MASK)); + ms = __nr_to_section(nr); + subsection_mask_set(ms->usage->subsection_map, pfn, pfns); + + pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr, + pfns, subsection_map_index(pfn), + subsection_map_index(pfn + pfns - 1)); + + pfn += pfns; + nr_pages -= pfns; + } +} + /* Record a memory area against a node. */ void __init memory_present(int nid, unsigned long start, unsigned long end) { @@ -288,33 +330,31 @@ struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pn static void __meminit sparse_init_one_section(struct mem_section *ms, unsigned long pnum, struct page *mem_map, - unsigned long *pageblock_bitmap) + struct mem_section_usage *usage, unsigned long flags) { ms->section_mem_map &= ~SECTION_MAP_MASK; - ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) | - SECTION_HAS_MEM_MAP; - ms->pageblock_flags = pageblock_bitmap; + ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) + | SECTION_HAS_MEM_MAP | flags; + ms->usage = usage; } -unsigned long usemap_size(void) +static unsigned long usemap_size(void) { return BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS) * sizeof(unsigned long); } -#ifdef CONFIG_MEMORY_HOTPLUG -static unsigned long *__kmalloc_section_usemap(void) +size_t mem_section_usage_size(void) { - return kmalloc(usemap_size(), GFP_KERNEL); + return sizeof(struct mem_section_usage) + usemap_size(); } -#endif /* CONFIG_MEMORY_HOTPLUG */ #ifdef CONFIG_MEMORY_HOTREMOVE -static unsigned long * __init +static struct mem_section_usage * __init sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, unsigned long size) { + struct mem_section_usage *usage; unsigned long goal, limit; - unsigned long *p; int nid; /* * A page may contain usemaps for other sections preventing the @@ -330,15 +370,16 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, limit = goal + (1UL << PA_SECTION_SHIFT); nid = early_pfn_to_nid(goal >> PAGE_SHIFT); again: - p = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, goal, limit, nid); - if (!p && limit) { + usage = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, goal, limit, nid); + if (!usage && limit) { limit = 0; goto again; } - return p; + return usage; } -static void __init check_usemap_section_nr(int nid, unsigned long *usemap) +static void __init check_usemap_section_nr(int nid, + struct mem_section_usage *usage) { unsigned long usemap_snr, pgdat_snr; static unsigned long old_usemap_snr; @@ -352,7 +393,7 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) old_pgdat_snr = NR_MEM_SECTIONS; } - usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT); + usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT); pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); if (usemap_snr == pgdat_snr) return; @@ -380,14 +421,15 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) usemap_snr, pgdat_snr, nid); } #else -static unsigned long * __init +static struct mem_section_usage * __init sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, unsigned long size) { return memblock_alloc_node(size, SMP_CACHE_BYTES, pgdat->node_id); } -static void __init check_usemap_section_nr(int nid, unsigned long *usemap) +static void __init check_usemap_section_nr(int nid, + struct mem_section_usage *usage) { } #endif /* CONFIG_MEMORY_HOTREMOVE */ @@ -404,8 +446,8 @@ static unsigned long __init section_map_size(void) return PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION); } -struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid, - struct vmem_altmap *altmap) +struct page __init *__populate_section_memmap(unsigned long pfn, + unsigned long nr_pages, int nid, struct vmem_altmap *altmap) { unsigned long size = section_map_size(); struct page *map = sparse_buffer_alloc(size); @@ -474,32 +516,35 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, unsigned long pnum_end, unsigned long map_count) { - unsigned long pnum, usemap_longs, *usemap; + struct mem_section_usage *usage; + unsigned long pnum; struct page *map; - usemap_longs = BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS); - usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid), - usemap_size() * - map_count); - if (!usemap) { + usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid), + mem_section_usage_size() * map_count); + if (!usage) { pr_err("%s: node[%d] usemap allocation failed", __func__, nid); goto failed; } sparse_buffer_init(map_count * section_map_size(), nid); for_each_present_section_nr(pnum_begin, pnum) { + unsigned long pfn = section_nr_to_pfn(pnum); + if (pnum >= pnum_end) break; - map = sparse_mem_map_populate(pnum, nid, NULL); + map = __populate_section_memmap(pfn, PAGES_PER_SECTION, + nid, NULL); if (!map) { pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.", __func__, nid); pnum_begin = pnum; goto failed; } - check_usemap_section_nr(nid, usemap); - sparse_init_one_section(__nr_to_section(pnum), pnum, map, usemap); - usemap += usemap_longs; + check_usemap_section_nr(nid, usage); + sparse_init_one_section(__nr_to_section(pnum), pnum, map, usage, + SECTION_IS_EARLY); + usage = (void *) usage + mem_section_usage_size(); } sparse_buffer_fini(); return; @@ -590,21 +635,20 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn) #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP -static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, - struct vmem_altmap *altmap) +static struct page *populate_section_memmap(unsigned long pfn, + unsigned long nr_pages, int nid, struct vmem_altmap *altmap) { - /* This will make the necessary allocations eventually. */ - return sparse_mem_map_populate(pnum, nid, altmap); + return __populate_section_memmap(pfn, nr_pages, nid, altmap); } -static void __kfree_section_memmap(struct page *memmap, + +static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages, struct vmem_altmap *altmap) { - unsigned long start = (unsigned long)memmap; - unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); + unsigned long start = (unsigned long) pfn_to_page(pfn); + unsigned long end = start + nr_pages * sizeof(struct page); vmemmap_free(start, end, altmap); } -#ifdef CONFIG_MEMORY_HOTREMOVE static void free_map_bootmem(struct page *memmap) { unsigned long start = (unsigned long)memmap; @@ -612,9 +656,9 @@ static void free_map_bootmem(struct page *memmap) vmemmap_free(start, end, NULL); } -#endif /* CONFIG_MEMORY_HOTREMOVE */ #else -static struct page *__kmalloc_section_memmap(void) +struct page *populate_section_memmap(unsigned long pfn, + unsigned long nr_pages, int nid, struct vmem_altmap *altmap) { struct page *page, *ret; unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION; @@ -635,15 +679,11 @@ static struct page *__kmalloc_section_memmap(void) return ret; } -static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, +static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages, struct vmem_altmap *altmap) { - return __kmalloc_section_memmap(); -} + struct page *memmap = pfn_to_page(pfn); -static void __kfree_section_memmap(struct page *memmap, - struct vmem_altmap *altmap) -{ if (is_vmalloc_addr(memmap)) vfree(memmap); else @@ -651,7 +691,6 @@ static void __kfree_section_memmap(struct page *memmap, get_order(sizeof(struct page) * PAGES_PER_SECTION)); } -#ifdef CONFIG_MEMORY_HOTREMOVE static void free_map_bootmem(struct page *memmap) { unsigned long maps_section_nr, removing_section_nr, i; @@ -681,13 +720,122 @@ static void free_map_bootmem(struct page *memmap) put_page_bootmem(page); } } -#endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_SPARSEMEM_VMEMMAP */ +static void section_deactivate(unsigned long pfn, unsigned long nr_pages, + struct vmem_altmap *altmap) +{ + DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 }; + DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 }; + struct mem_section *ms = __pfn_to_section(pfn); + bool section_is_early = early_section(ms); + struct page *memmap = NULL; + unsigned long *subsection_map = ms->usage + ? &ms->usage->subsection_map[0] : NULL; + + subsection_mask_set(map, pfn, nr_pages); + if (subsection_map) + bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION); + + if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION), + "section already deactivated (%#lx + %ld)\n", + pfn, nr_pages)) + return; + + /* + * There are 3 cases to handle across two configurations + * (SPARSEMEM_VMEMMAP={y,n}): + * + * 1/ deactivation of a partial hot-added section (only possible + * in the SPARSEMEM_VMEMMAP=y case). + * a/ section was present at memory init + * b/ section was hot-added post memory init + * 2/ deactivation of a complete hot-added section + * 3/ deactivation of a complete section from memory init + * + * For 1/, when subsection_map does not empty we will not be + * freeing the usage map, but still need to free the vmemmap + * range. + * + * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified + */ + bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION); + if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION)) { + unsigned long section_nr = pfn_to_section_nr(pfn); + + if (!section_is_early) { + kfree(ms->usage); + ms->usage = NULL; + } + memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); + ms->section_mem_map = sparse_encode_mem_map(NULL, section_nr); + } + + if (section_is_early && memmap) + free_map_bootmem(memmap); + else + depopulate_section_memmap(pfn, nr_pages, altmap); +} + +static struct page * __meminit section_activate(int nid, unsigned long pfn, + unsigned long nr_pages, struct vmem_altmap *altmap) +{ + DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 }; + struct mem_section *ms = __pfn_to_section(pfn); + struct mem_section_usage *usage = NULL; + unsigned long *subsection_map; + struct page *memmap; + int rc = 0; + + subsection_mask_set(map, pfn, nr_pages); + + if (!ms->usage) { + usage = kzalloc(mem_section_usage_size(), GFP_KERNEL); + if (!usage) + return ERR_PTR(-ENOMEM); + ms->usage = usage; + } + subsection_map = &ms->usage->subsection_map[0]; + + if (bitmap_empty(map, SUBSECTIONS_PER_SECTION)) + rc = -EINVAL; + else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION)) + rc = -EEXIST; + else + bitmap_or(subsection_map, map, subsection_map, + SUBSECTIONS_PER_SECTION); + + if (rc) { + if (usage) + ms->usage = NULL; + kfree(usage); + return ERR_PTR(rc); + } + + /* + * The early init code does not consider partially populated + * initial sections, it simply assumes that memory will never be + * referenced. If we hot-add memory into such a section then we + * do not need to populate the memmap and can simply reuse what + * is already there. + */ + if (nr_pages < PAGES_PER_SECTION && early_section(ms)) + return pfn_to_page(pfn); + + memmap = populate_section_memmap(pfn, nr_pages, nid, altmap); + if (!memmap) { + section_deactivate(pfn, nr_pages, altmap); + return ERR_PTR(-ENOMEM); + } + + return memmap; +} + /** - * sparse_add_one_section - add a memory section + * sparse_add_section - add a memory section, or populate an existing one * @nid: The node to add section on * @start_pfn: start pfn of the memory range + * @nr_pages: number of pfns to add in the section * @altmap: device page map * * This is only intended for hotplug. @@ -697,56 +845,40 @@ static void free_map_bootmem(struct page *memmap) * * -EEXIST - Section has been present. * * -ENOMEM - Out of memory. */ -int __meminit sparse_add_one_section(int nid, unsigned long start_pfn, - struct vmem_altmap *altmap) +int __meminit sparse_add_section(int nid, unsigned long start_pfn, + unsigned long nr_pages, struct vmem_altmap *altmap) { unsigned long section_nr = pfn_to_section_nr(start_pfn); struct mem_section *ms; struct page *memmap; - unsigned long *usemap; int ret; - /* - * no locking for this, because it does its own - * plus, it does a kmalloc - */ ret = sparse_index_init(section_nr, nid); - if (ret < 0 && ret != -EEXIST) + if (ret < 0) return ret; - ret = 0; - memmap = kmalloc_section_memmap(section_nr, nid, altmap); - if (!memmap) - return -ENOMEM; - usemap = __kmalloc_section_usemap(); - if (!usemap) { - __kfree_section_memmap(memmap, altmap); - return -ENOMEM; - } - ms = __pfn_to_section(start_pfn); - if (ms->section_mem_map & SECTION_MARKED_PRESENT) { - ret = -EEXIST; - goto out; - } + memmap = section_activate(nid, start_pfn, nr_pages, altmap); + if (IS_ERR(memmap)) + return PTR_ERR(memmap); /* * Poison uninitialized struct pages in order to catch invalid flags * combinations. */ - page_init_poison(memmap, sizeof(struct page) * PAGES_PER_SECTION); + page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages); + ms = __pfn_to_section(start_pfn); + set_section_nid(section_nr, nid); section_mark_present(ms); - sparse_init_one_section(ms, section_nr, memmap, usemap); -out: - if (ret < 0) { - kfree(usemap); - __kfree_section_memmap(memmap, altmap); - } - return ret; + /* Align memmap to section boundary in the subsection case */ + if (section_nr_to_pfn(section_nr) != start_pfn) + memmap = pfn_to_kaddr(section_nr_to_pfn(section_nr)); + sparse_init_one_section(ms, section_nr, memmap, ms->usage, 0); + + return 0; } -#ifdef CONFIG_MEMORY_HOTREMOVE #ifdef CONFIG_MEMORY_FAILURE static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) { @@ -777,51 +909,12 @@ static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) } #endif -static void free_section_usemap(struct page *memmap, unsigned long *usemap, +void sparse_remove_section(struct mem_section *ms, unsigned long pfn, + unsigned long nr_pages, unsigned long map_offset, struct vmem_altmap *altmap) { - struct page *usemap_page; - - if (!usemap) - return; - - usemap_page = virt_to_page(usemap); - /* - * Check to see if allocation came from hot-plug-add - */ - if (PageSlab(usemap_page) || PageCompound(usemap_page)) { - kfree(usemap); - if (memmap) - __kfree_section_memmap(memmap, altmap); - return; - } - - /* - * The usemap came from bootmem. This is packed with other usemaps - * on the section which has pgdat at boot time. Just keep it as is now. - */ - - if (memmap) - free_map_bootmem(memmap); -} - -void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, - unsigned long map_offset, struct vmem_altmap *altmap) -{ - struct page *memmap = NULL; - unsigned long *usemap = NULL; - - if (ms->section_mem_map) { - usemap = ms->pageblock_flags; - memmap = sparse_decode_mem_map(ms->section_mem_map, - __section_nr(ms)); - ms->section_mem_map = 0; - ms->pageblock_flags = NULL; - } - - clear_hwpoisoned_pages(memmap + map_offset, - PAGES_PER_SECTION - map_offset); - free_section_usemap(memmap, usemap, altmap); + clear_hwpoisoned_pages(pfn_to_page(pfn) + map_offset, + nr_pages - map_offset); + section_deactivate(pfn, nr_pages, altmap); } -#endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 742cea4ce72e4ecc308aafa18fa474476e2a2004..26da97359d5b55b0f3c4d018420c3944dbdf4908 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3374,8 +3374,6 @@ void neigh_app_ns(struct neighbour *n) EXPORT_SYMBOL(neigh_app_ns); #ifdef CONFIG_SYSCTL -static int zero; -static int int_max = INT_MAX; static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); static int proc_unres_qlen(struct ctl_table *ctl, int write, @@ -3384,7 +3382,7 @@ static int proc_unres_qlen(struct ctl_table *ctl, int write, int size, ret; struct ctl_table tmp = *ctl; - tmp.extra1 = &zero; + tmp.extra1 = SYSCTL_ZERO; tmp.extra2 = &unres_qlen_max; tmp.data = &size; @@ -3449,8 +3447,8 @@ static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write, struct ctl_table tmp = *ctl; int ret; - tmp.extra1 = &zero; - tmp.extra2 = &int_max; + tmp.extra1 = SYSCTL_ZERO; + tmp.extra2 = SYSCTL_INT_MAX; ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); neigh_proc_update(ctl, write); @@ -3595,24 +3593,24 @@ static struct neigh_sysctl_table { .procname = "gc_thresh1", .maxlen = sizeof(int), .mode = 0644, - .extra1 = &zero, - .extra2 = &int_max, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_GC_THRESH2] = { .procname = "gc_thresh2", .maxlen = sizeof(int), .mode = 0644, - .extra1 = &zero, - .extra2 = &int_max, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_GC_THRESH3] = { .procname = "gc_thresh3", .maxlen = sizeof(int), .mode = 0644, - .extra1 = &zero, - .extra2 = &int_max, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, .proc_handler = proc_dointvec_minmax, }, {}, diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index f9204719aeeeb4700582d03fda244f2f8961f8a7..8da5b3a54dac40c3eb939408486b6e1036306747 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -22,8 +22,6 @@ #include <net/busy_poll.h> #include <net/pkt_sched.h> -static int zero = 0; -static int one = 1; static int two __maybe_unused = 2; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; @@ -390,10 +388,10 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax_bpf_enable, # ifdef CONFIG_BPF_JIT_ALWAYS_ON - .extra1 = &one, - .extra2 = &one, + .extra1 = SYSCTL_ONE, + .extra2 = SYSCTL_ONE, # else - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &two, # endif }, @@ -404,7 +402,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0600, .proc_handler = proc_dointvec_minmax_bpf_restricted, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &two, }, { @@ -413,8 +411,8 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0600, .proc_handler = proc_dointvec_minmax_bpf_restricted, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, # endif { @@ -461,8 +459,8 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, #ifdef CONFIG_RPS { @@ -493,7 +491,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, { .procname = "busy_read", @@ -501,7 +499,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, #endif #ifdef CONFIG_NET_SCHED @@ -533,7 +531,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = SYSCTL_ONE, .extra2 = &max_skb_frags, }, { @@ -542,7 +540,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, { .procname = "fb_tunnels_only_for_init_net", @@ -550,8 +548,8 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "devconf_inherit_init_net", @@ -559,7 +557,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &two, }, { @@ -578,7 +576,7 @@ static struct ctl_table netns_core_table[] = { .data = &init_net.core.sysctl_somaxconn, .maxlen = sizeof(int), .mode = 0644, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .proc_handler = proc_dointvec_minmax }, { } diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index b59040f268a9988e2b882b8062dd68b195279898..ee8d4f5afa72262c96b220bfdd5ef954d11daa38 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -16,9 +16,7 @@ #endif /* Boundary values */ -static int zero = 0, - one = 1, - u8_max = 0xFF; +static int u8_max = 0xFF; static unsigned long seqw_min = DCCPF_SEQ_WMIN, seqw_max = 0xFFFFFFFF; /* maximum on 32 bit */ @@ -38,7 +36,7 @@ static struct ctl_table dccp_default_table[] = { .maxlen = sizeof(sysctl_dccp_rx_ccid), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &u8_max, /* RFC 4340, 10. */ }, { @@ -47,7 +45,7 @@ static struct ctl_table dccp_default_table[] = { .maxlen = sizeof(sysctl_dccp_tx_ccid), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &u8_max, /* RFC 4340, 10. */ }, { @@ -56,7 +54,7 @@ static struct ctl_table dccp_default_table[] = { .maxlen = sizeof(sysctl_dccp_request_retries), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = SYSCTL_ONE, .extra2 = &u8_max, }, { @@ -65,7 +63,7 @@ static struct ctl_table dccp_default_table[] = { .maxlen = sizeof(sysctl_dccp_retries1), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &u8_max, }, { @@ -74,7 +72,7 @@ static struct ctl_table dccp_default_table[] = { .maxlen = sizeof(sysctl_dccp_retries2), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &u8_max, }, { @@ -83,7 +81,7 @@ static struct ctl_table dccp_default_table[] = { .maxlen = sizeof(sysctl_dccp_tx_qlen), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, { .procname = "sync_ratelimit", diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 7d66306b5f39719ac432178948ceac256018d52a..0b980e84192738362e19ce3a9e921cff23a1449b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -28,8 +28,6 @@ #include <net/protocol.h> #include <net/netevent.h> -static int zero; -static int one = 1; static int two = 2; static int four = 4; static int thousand = 1000; @@ -576,7 +574,7 @@ static struct ctl_table ipv4_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, { .procname = "icmp_msgs_burst", @@ -584,7 +582,7 @@ static struct ctl_table ipv4_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, { .procname = "udp_mem", @@ -674,8 +672,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif { @@ -763,8 +761,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = ipv4_fwd_update_priority, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "ip_nonlocal_bind", @@ -794,8 +792,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif { @@ -864,7 +862,7 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one + .extra1 = SYSCTL_ONE }, #endif { @@ -969,7 +967,7 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &two, }, { @@ -1011,7 +1009,7 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_tfo_blackhole_detect_timeout, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { @@ -1020,8 +1018,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "fib_multipath_hash_policy", @@ -1029,8 +1027,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_fib_multipath_hash_policy, - .extra1 = &zero, - .extra2 = &two, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif { @@ -1047,8 +1045,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif { @@ -1078,7 +1076,7 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &four, }, { @@ -1222,7 +1220,7 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = SYSCTL_ONE, .extra2 = &gso_max_segs, }, { @@ -1231,7 +1229,7 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &one_day_secs }, { @@ -1240,8 +1238,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "tcp_invalid_ratelimit", @@ -1256,7 +1254,7 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &thousand, }, { @@ -1265,7 +1263,7 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &thousand, }, { @@ -1274,7 +1272,7 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(init_net.ipv4.sysctl_tcp_wmem), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = SYSCTL_ONE, }, { .procname = "tcp_rmem", @@ -1282,7 +1280,7 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(init_net.ipv4.sysctl_tcp_rmem), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = SYSCTL_ONE, }, { .procname = "tcp_comp_sack_delay_ns", @@ -1297,7 +1295,7 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &comp_sack_nr_max, }, { @@ -1306,7 +1304,7 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(init_net.ipv4.sysctl_udp_rmem_min), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one + .extra1 = SYSCTL_ONE }, { .procname = "udp_wmem_min", @@ -1314,7 +1312,7 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(init_net.ipv4.sysctl_udp_wmem_min), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one + .extra1 = SYSCTL_ONE }, { } }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 521e3203e83a473d3234151e61c97a81baa02da3..dc73888c7859a2d0d499fabd14aa8b71e12b0e77 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6432,8 +6432,6 @@ int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write, } static int minus_one = -1; -static const int zero = 0; -static const int one = 1; static const int two_five_five = 255; static const struct ctl_table addrconf_sysctl[] = { @@ -6450,7 +6448,7 @@ static const struct ctl_table addrconf_sysctl[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = (void *)&one, + .extra1 = (void *)SYSCTL_ONE, .extra2 = (void *)&two_five_five, }, { @@ -6809,7 +6807,7 @@ static const struct ctl_table addrconf_sysctl[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = (void *)&zero, + .extra1 = (void *)SYSCTL_ZERO, .extra2 = (void *)&two_five_five, }, { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4d2e6b31a8d66877c66f8075828ae9f3d1b439ca..8b0c33fb19a2667174f58f0f92ffbefe12b42932 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6031,9 +6031,6 @@ int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, return 0; } -static int zero; -static int one = 1; - static struct ctl_table ipv6_route_table_template[] = { { .procname = "flush", @@ -6111,8 +6108,8 @@ static struct ctl_table ipv6_route_table_template[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { } }; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index dc4c91e0bfb8efcc9599b7505f526d443527e62d..ec8fcfc60a27ba2a238cf05ecae292e2db4daedc 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -21,8 +21,6 @@ #include <net/calipso.h> #endif -static int zero; -static int one = 1; static int flowlabel_reflect_max = 0x7; static int auto_flowlabels_min; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; @@ -115,7 +113,7 @@ static struct ctl_table ipv6_table_template[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &flowlabel_reflect_max, }, { @@ -152,8 +150,8 @@ static struct ctl_table ipv6_table_template[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_rt6_multipath_hash_policy, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "seg6_flowlabel", @@ -179,7 +177,7 @@ static struct ctl_table ipv6_rotable[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one + .extra1 = SYSCTL_ONE }, #ifdef CONFIG_NETLABEL { diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 198ec4fe4148b897a58ec74aa04ec6dfd9b3078e..c312741df2ce9edb84ab5e60697d98bf0286d500 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -37,8 +37,6 @@ #define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1) -static int zero = 0; -static int one = 1; static int label_limit = (1 << 20) - 1; static int ttl_max = 255; @@ -2607,7 +2605,7 @@ static int mpls_platform_labels(struct ctl_table *table, int write, .data = &platform_labels, .maxlen = sizeof(int), .mode = table->mode, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &label_limit, }; @@ -2636,8 +2634,8 @@ static const struct ctl_table mpls_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "default_ttl", @@ -2645,7 +2643,7 @@ static const struct ctl_table mpls_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = SYSCTL_ONE, .extra2 = &ttl_max, }, { } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 07e0967bf1296671657bda8f77e311cb28d3cb99..060565e7d227a6b904941301aea2ba0c507d893b 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1726,7 +1726,6 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs) #ifdef CONFIG_SYSCTL -static int zero; static int three = 3; static int @@ -1935,7 +1934,7 @@ static struct ctl_table vs_vars[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &three, }, { diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 1e3fa67d91aa8ee05b25f1e29a185329670a8c76..2bbb38161851a48965272ef2984e23f76b4cb308 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -11,7 +11,6 @@ #include "ar-internal.h" static struct ctl_table_header *rxrpc_sysctl_reg_table; -static const unsigned int one = 1; static const unsigned int four = 4; static const unsigned int thirtytwo = 32; static const unsigned int n_65535 = 65535; @@ -97,7 +96,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = (void *)&one, + .extra1 = (void *)SYSCTL_ONE, .extra2 = (void *)&rxrpc_max_client_connections, }, { @@ -115,7 +114,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = (void *)&one, + .extra1 = (void *)SYSCTL_ONE, .extra2 = (void *)&n_max_acks, }, { @@ -124,7 +123,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = (void *)&one, + .extra1 = (void *)SYSCTL_ONE, .extra2 = (void *)&n_65535, }, { @@ -133,7 +132,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = (void *)&one, + .extra1 = (void *)SYSCTL_ONE, .extra2 = (void *)&four, }, diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 9a19147902f175aeb8963d7a93b8a5c47d68524e..1250751bca1bc2283307aaf668a788013d7149b4 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -25,10 +25,7 @@ #include <net/sctp/sctp.h> #include <linux/sysctl.h> -static int zero = 0; -static int one = 1; static int timer_max = 86400000; /* ms in one day */ -static int int_max = INT_MAX; static int sack_timer_min = 1; static int sack_timer_max = 500; static int addr_scope_max = SCTP_SCOPE_POLICY_MAX; @@ -92,7 +89,7 @@ static struct ctl_table sctp_net_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = SYSCTL_ONE, .extra2 = &timer_max }, { @@ -101,7 +98,7 @@ static struct ctl_table sctp_net_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_sctp_do_rto_min, - .extra1 = &one, + .extra1 = SYSCTL_ONE, .extra2 = &init_net.sctp.rto_max }, { @@ -137,8 +134,8 @@ static struct ctl_table sctp_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &int_max + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "cookie_preserve_enable", @@ -160,7 +157,7 @@ static struct ctl_table sctp_net_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = SYSCTL_ONE, .extra2 = &timer_max }, { @@ -178,7 +175,7 @@ static struct ctl_table sctp_net_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = SYSCTL_ONE, .extra2 = &timer_max }, { @@ -187,8 +184,8 @@ static struct ctl_table sctp_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &int_max + .extra1 = SYSCTL_ONE, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "path_max_retrans", @@ -196,8 +193,8 @@ static struct ctl_table sctp_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &int_max + .extra1 = SYSCTL_ONE, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "max_init_retransmits", @@ -205,8 +202,8 @@ static struct ctl_table sctp_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &int_max + .extra1 = SYSCTL_ONE, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "pf_retrans", @@ -214,8 +211,8 @@ static struct ctl_table sctp_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &int_max + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "sndbuf_policy", @@ -286,7 +283,7 @@ static struct ctl_table sctp_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &addr_scope_max, }, { @@ -295,7 +292,7 @@ static struct ctl_table sctp_net_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, - .extra1 = &one, + .extra1 = SYSCTL_ONE, .extra2 = &rwnd_scale_max, }, { diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 52abddac19e5c4402e46e1f2285c8b1f71243e4a..2ec349ed47702e1ad9f75638a23779662407ef9e 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -80,7 +80,6 @@ static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE; static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE; static unsigned int min_inline_size = RPCRDMA_MIN_INLINE; static unsigned int max_inline_size = RPCRDMA_MAX_INLINE; -static unsigned int zero; static unsigned int max_padding = PAGE_SIZE; static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; static unsigned int max_memreg = RPCRDMA_LAST - 1; @@ -122,7 +121,7 @@ static struct ctl_table xr_tunables_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &max_padding, }, { diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c index 9df82a573aa7768f583999e740022ce00295bbd4..6159d327db76a577607928142c8e881e3d1cac30 100644 --- a/net/tipc/sysctl.c +++ b/net/tipc/sysctl.c @@ -38,8 +38,6 @@ #include <linux/sysctl.h> -static int zero; -static int one = 1; static struct ctl_table_header *tipc_ctl_hdr; static struct ctl_table tipc_table[] = { @@ -49,7 +47,7 @@ static struct ctl_table tipc_table[] = { .maxlen = sizeof(sysctl_tipc_rmem), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = SYSCTL_ONE, }, { .procname = "named_timeout", @@ -57,7 +55,7 @@ static struct ctl_table tipc_table[] = { .maxlen = sizeof(sysctl_tipc_named_timeout), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, { .procname = "sk_filter", diff --git a/security/keys/sysctl.c b/security/keys/sysctl.c index dd1e21fab827a51fd66a7363e9463f5c197aeaa0..b46b651b3c4cc65bcb0409e9a3e3c1e74604ac9d 100644 --- a/security/keys/sysctl.c +++ b/security/keys/sysctl.c @@ -9,8 +9,6 @@ #include <linux/sysctl.h> #include "internal.h" -static const int zero, one = 1, max = INT_MAX; - struct ctl_table key_sysctls[] = { { .procname = "maxkeys", @@ -18,8 +16,8 @@ struct ctl_table key_sysctls[] = { .maxlen = sizeof(unsigned), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = (void *) &one, - .extra2 = (void *) &max, + .extra1 = (void *) SYSCTL_ONE, + .extra2 = (void *) SYSCTL_INT_MAX, }, { .procname = "maxbytes", @@ -27,8 +25,8 @@ struct ctl_table key_sysctls[] = { .maxlen = sizeof(unsigned), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = (void *) &one, - .extra2 = (void *) &max, + .extra1 = (void *) SYSCTL_ONE, + .extra2 = (void *) SYSCTL_INT_MAX, }, { .procname = "root_maxkeys", @@ -36,8 +34,8 @@ struct ctl_table key_sysctls[] = { .maxlen = sizeof(unsigned), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = (void *) &one, - .extra2 = (void *) &max, + .extra1 = (void *) SYSCTL_ONE, + .extra2 = (void *) SYSCTL_INT_MAX, }, { .procname = "root_maxbytes", @@ -45,8 +43,8 @@ struct ctl_table key_sysctls[] = { .maxlen = sizeof(unsigned), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = (void *) &one, - .extra2 = (void *) &max, + .extra1 = (void *) SYSCTL_ONE, + .extra2 = (void *) SYSCTL_INT_MAX, }, { .procname = "gc_delay", @@ -54,8 +52,8 @@ struct ctl_table key_sysctls[] = { .maxlen = sizeof(unsigned), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = (void *) &zero, - .extra2 = (void *) &max, + .extra1 = (void *) SYSCTL_ZERO, + .extra2 = (void *) SYSCTL_INT_MAX, }, #ifdef CONFIG_PERSISTENT_KEYRINGS { @@ -64,8 +62,8 @@ struct ctl_table key_sysctls[] = { .maxlen = sizeof(unsigned), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = (void *) &zero, - .extra2 = (void *) &max, + .extra1 = (void *) SYSCTL_ZERO, + .extra2 = (void *) SYSCTL_INT_MAX, }, #endif { } diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c index 81519c804888c613fb86afc164f67238fcd333ba..ee5cb944f4ad1bdd70c2a91001c53bc72ba1b2e1 100644 --- a/security/loadpin/loadpin.c +++ b/security/loadpin/loadpin.c @@ -43,8 +43,6 @@ static struct super_block *pinned_root; static DEFINE_SPINLOCK(pinned_root_spinlock); #ifdef CONFIG_SYSCTL -static int zero; -static int one = 1; static struct ctl_path loadpin_sysctl_path[] = { { .procname = "kernel", }, @@ -59,8 +57,8 @@ static struct ctl_table loadpin_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { } }; diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index 01c6239c4493a53a0b5584a8e9ad05574174dc33..94dc346370b106a73a78da6d3acfbcb82c297541 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -445,7 +445,6 @@ static int yama_dointvec_minmax(struct ctl_table *table, int write, return proc_dointvec_minmax(&table_copy, write, buffer, lenp, ppos); } -static int zero; static int max_scope = YAMA_SCOPE_NO_ATTACH; static struct ctl_path yama_sysctl_path[] = { @@ -461,7 +460,7 @@ static struct ctl_table yama_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = yama_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, .extra2 = &max_scope, }, { }