diff --git a/Documentation/dev-tools/kmemleak.rst b/Documentation/dev-tools/kmemleak.rst index b2391b829169..cb8862659178 100644 --- a/Documentation/dev-tools/kmemleak.rst +++ b/Documentation/dev-tools/kmemleak.rst @@ -150,6 +150,7 @@ See the include/linux/kmemleak.h header for the functions prototype. - ``kmemleak_init`` - initialize kmemleak - ``kmemleak_alloc`` - notify of a memory block allocation - ``kmemleak_alloc_percpu`` - notify of a percpu memory block allocation +- ``kmemleak_vmalloc`` - notify of a vmalloc() memory allocation - ``kmemleak_free`` - notify of a memory block freeing - ``kmemleak_free_part`` - notify of a partial memory block freeing - ``kmemleak_free_percpu`` - notify of a percpu memory block freeing diff --git a/Documentation/devicetree/bindings/gpu/adreno.txt b/Documentation/devicetree/bindings/gpu/adreno.txt index a4e9ba7b057f..95bbbc0eaf70 100644 --- a/Documentation/devicetree/bindings/gpu/adreno.txt +++ b/Documentation/devicetree/bindings/gpu/adreno.txt @@ -99,7 +99,6 @@ Optional Properties: - qcom,chipid: If it exists this property is used to replace the chip identification read from the GPU hardware. This is used to override faulty hardware readings. -- qcom,disable-wake-on-touch: Boolean. Disables the GPU power up on a touch input event. - qcom,disable-busy-time-burst: Boolean. Disables the busy time burst to avoid switching of power level for large frames based on the busy time limit. @@ -206,6 +205,10 @@ Optional Properties: Specify the name of GPU temperature sensor. This name will be used to get the temperature from the thermal driver API. +- tzone-names: + Specify the names of GPU thermal zones. These will be used + to get the temperature from the thermal driver API. + - qcom,enable-midframe-timer: Boolean. Enables the use of midframe sampling timer. This timer samples the GPU powerstats if the cmdbatch expiry takes longer than diff --git a/Documentation/kbuild/00-INDEX b/Documentation/kbuild/00-INDEX index 8c5e6aa78004..b367200c94b5 100644 --- a/Documentation/kbuild/00-INDEX +++ b/Documentation/kbuild/00-INDEX @@ -1,5 +1,7 @@ 00-INDEX - this file: info on the kernel build process +llvm.txt + - how to compile Linux with llvm headers_install.txt - how to export Linux headers for use by userspace kbuild.txt diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt index 0ff6a466a05b..a6aea8f9a161 100644 --- a/Documentation/kbuild/kbuild.txt +++ b/Documentation/kbuild/kbuild.txt @@ -238,3 +238,8 @@ KBUILD_VMLINUX_MAIN All object files for the main part of vmlinux. KBUILD_VMLINUX_INIT and KBUILD_VMLINUX_MAIN together specify all the object files used to link vmlinux. + +LLVM +---- +If this variable is set to 1, Kbuild will use Clang and LLVM utilities instead +of GCC and GNU binutils to build the kernel. diff --git a/Documentation/kbuild/llvm.txt b/Documentation/kbuild/llvm.txt new file mode 100644 index 000000000000..51c009baa35a --- /dev/null +++ b/Documentation/kbuild/llvm.txt @@ -0,0 +1,86 @@ +Building Linux with Clang/LLVM +============================== + +This document covers how to build the Linux kernel with Clang and LLVM +utilities. + +About +----- + +The Linux kernel has always traditionally been compiled with GNU +toolchains such as GCC and binutils. Ongoing work has allowed for +[Clang](https://clang.llvm.org/) and [LLVM](https://llvm.org/) utilities +to be used as viable substitutes. Distributions such as +[Android](https://www.android.com/), +[ChromeOS](https://www.chromium.org/chromium-os), and +[OpenMandriva](https://www.openmandriva.org/) use Clang built kernels. +[LLVM is a collection of toolchain components implemented in terms of +C++ objects](https://www.aosabook.org/en/llvm.html). Clang is a +front-end to LLVM that supports C and the GNU C extensions required by +the kernel, and is pronounced "klang," not "see-lang." + +Clang +----- + +The compiler used can be swapped out via [CC=]{.title-ref} command line +argument to [make]{.title-ref}. [CC=]{.title-ref} should be set when +selecting a config and during a build. + +> make CC=clang defconfig +> +> make CC=clang + +Cross Compiling +--------------- + +A single Clang compiler binary will typically contain all supported +backends, which can help simplify cross compiling. + +> ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- make CC=clang + +[CROSS\_COMPILE]{.title-ref} is not used to prefix the Clang compiler +binary, instead [CROSS\_COMPILE]{.title-ref} is used to set a command +line flag: [--target ]{.title-ref}. For example: + +> clang --target aarch64-linux-gnu foo.c + +LLVM Utilities +-------------- + +LLVM has substitutes for GNU binutils utilities. These can be invoked as +additional parameters to [make]{.title-ref}. + +> make CC=clang LD=ld.lld AR=llvm-ar NM=llvm-nm STRIP=llvm-strip \\ +> OBJCOPY=llvm-objcopy OBJDUMP=llvm-objdump OBJSIZE=llvm-size \\ +> READELF=llvm-readelf HOSTCC=clang HOSTCXX=clang++ HOSTAR=llvm-ar \\ +> HOSTLD=ld.lld + +Currently, the integrated assembler is disabled by default. You can pass +`LLVM_IAS=1` to enable it. + +Getting Help +------------ + +- [Website](https://clangbuiltlinux.github.io/) +- [Mailing + List](https://groups.google.com/forum/#!forum/clang-built-linux): + +- [Issue Tracker](https://github.com/ClangBuiltLinux/linux/issues) +- IRC: \#clangbuiltlinux on chat.freenode.net +- [Telegram](https://t.me/ClangBuiltLinux): \@ClangBuiltLinux +- [Wiki](https://github.com/ClangBuiltLinux/linux/wiki) +- [Beginner + Bugs](https://github.com/ClangBuiltLinux/linux/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22) + +Getting LLVM +------------ + +- +- +- +- +- +- +- +- +- diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 91753a14d74d..a849971d241e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -676,16 +676,21 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ccw_timeout_log [S390] See Documentation/s390/CommonIO for details. - cgroup_disable= [KNL] Disable a particular controller - Format: {name of the controller(s) to disable} + cgroup_disable= [KNL] Disable a particular controller or optional feature + Format: {name of the controller(s) or feature(s) to disable} The effects of cgroup_disable=foo are: - foo isn't auto-mounted if you mount all cgroups in a single hierarchy - foo isn't visible as an individually mountable subsystem + - if foo is an optional feature then the feature is + disabled and corresponding cgroup files are not + created {Currently only "memory" controller deal with this and cut the overhead, others just disable the usage. So only cgroup_disable=memory is actually worthy} + Specifying "pressure" disables per-cgroup pressure + stall information accounting feature cgroup_no_v1= [KNL] Disable one, multiple, all cgroup controllers in v1 Format: { controller[,controller...] | "all" } diff --git a/Documentation/lzo.txt b/Documentation/lzo.txt index 285c54f66779..574c2fdcb9e8 100644 --- a/Documentation/lzo.txt +++ b/Documentation/lzo.txt @@ -73,6 +73,10 @@ Description They just have to "refill" this credit if they consume extra bytes. This is an implementation design choice independent on the algorithm or encoding. +For maximum compatibility, both versions are available under different names +(lzo and lzo-rle). Differences in the encoding are noted in this document with +e.g.: version 1 only. + Byte sequences First byte encoding : @@ -134,6 +138,11 @@ Byte sequences state = S (copy S literals after this block) End of stream is reached if distance == 16384 + In version 1 only, this instruction is also used to encode a run of + zeros if distance = 0xbfff, i.e. H = 1 and the D bits are all 1. + In this case, it is followed by a fourth byte, X. + run length = ((X << 3) | (0 0 0 0 0 L L L)) + 4. + 0 0 1 L L L L L (32..63) Copy of small block within 16kB distance (preferably less than 34B) length = 2 + (L ?: 31 + (zero_bytes * 255) + non_zero_byte) @@ -158,7 +167,9 @@ Byte sequences Authors This document was written by Willy Tarreau on 2014/07/19 during an - analysis of the decompression code available in Linux 3.16-rc5. The code is - tricky, it is possible that this document contains mistakes or that a few - corner cases were overlooked. In any case, please report any doubt, fix, or - proposed updates to the author(s) so that the document can be updated. + analysis of the decompression code available in Linux 3.16-rc5, and updated + by Dave Rodgman on 2018/10/30 to introduce run-length + encoding. The code is tricky, it is possible that this document contains + mistakes or that a few corner cases were overlooked. In any case, please + report any doubt, fix, or proposed updates to the author(s) so that the + document can be updated. diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 206c9b071cf2..823903a51dbf 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -50,6 +50,7 @@ Currently, these files are in /proc/sys/vm: - nr_trim_pages (only if CONFIG_MMU=n) - numa_zonelist_order - oom_dump_tasks +- reap_mem_on_sigkill - oom_kill_allocating_task - overcommit_kbytes - overcommit_memory @@ -634,6 +635,24 @@ The default value is 1 (enabled). ============================================================== +reap_mem_on_sigkill + +This enables or disables the memory reaping for a SIGKILL received +process and that the sending process must have the CAP_KILL capabilities. + +If this is set to 1, when a process receives SIGKILL from a process +that has the capability, CAP_KILL, the process is added into the oom_reaper +queue which can be picked up by the oom_reaper thread to reap the memory of +that process. This reaps for the process which received SIGKILL through +either sys_kill from user or kill_pid from kernel. + +If this is set to 0, we are not reaping memory of a SIGKILL, sent through +either sys_kill from user or kill_pid from kernel, received process. + +The default value is 1 (enabled). + +============================================================== + oom_kill_allocating_task This enables or disables killing the OOM-triggering task in diff --git a/Makefile b/Makefile index d32900fe677c..069c52f77e20 100644 --- a/Makefile +++ b/Makefile @@ -254,7 +254,7 @@ SUBARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ # "make" in the configured kernel build directory always uses that. # Default value for CROSS_COMPILE is not to prefix executables # Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile -ARCH ?= $(SUBARCH) +ARCH := arm64 CROSS_COMPILE ?= $(CONFIG_CROSS_COMPILE:"%"=%) # Architecture as present in compile.h @@ -301,9 +301,15 @@ CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ else if [ -x /bin/bash ]; then echo /bin/bash; \ else echo sh; fi ; fi) +ifneq ($(LLVM),) +HOSTCC = clang +HOSTCXX = clang++ +else HOSTCC = gcc HOSTCXX = g++ -HOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89 +endif + +HOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89 -pipe HOSTCXXFLAGS = -O2 # Decide whether to build built-in, modular, or both. @@ -337,18 +343,28 @@ scripts/Kbuild.include: ; include scripts/Kbuild.include # Make variables (CC, etc...) +LDGOLD = $(CROSS_COMPILE)ld.gold +CPP = $(CC) -E +ifneq ($(LLVM),) +CC = clang +LD = ld.lld +AR = llvm-ar +NM = llvm-nm +OBJCOPY = llvm-objcopy +OBJDUMP = llvm-objdump +READELF = llvm-readelf +OBJSIZE = llvm-size +STRIP = llvm-strip +else AS = $(CROSS_COMPILE)as LD = $(CROSS_COMPILE)ld -REAL_CC = $(CROSS_COMPILE)gcc -LDGOLD = $(CROSS_COMPILE)ld.gold -REAL_CC = $(CROSS_COMPILE)gcc CC = $(CROSS_COMPILE)gcc -CPP = $(CC) -E AR = $(CROSS_COMPILE)ar NM = $(CROSS_COMPILE)nm STRIP = $(CROSS_COMPILE)strip OBJCOPY = $(CROSS_COMPILE)objcopy OBJDUMP = $(CROSS_COMPILE)objdump +endif DTC = scripts/dtc/dtc AWK = awk GENKSYMS = scripts/genksyms/genksyms @@ -358,16 +374,12 @@ PERL = perl PYTHON = python CHECK = sparse -# Use the wrapper for the compiler. This wrapper scans for new -# warnings and causes the build to stop upon encountering them -CC = $(PYTHON) $(srctree)/scripts/gcc-wrapper.py $(REAL_CC) - CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ -Wbitwise -Wno-return-void $(CF) NOSTDINC_FLAGS = CFLAGS_MODULE = AFLAGS_MODULE = -LDFLAGS_MODULE = +LDFLAGS_MODULE = --strip-debug CFLAGS_KERNEL = AFLAGS_KERNEL = LDFLAGS_vmlinux = @@ -396,7 +408,7 @@ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common -fshort-wchar \ -Werror-implicit-function-declaration \ -Wno-format-security \ - -std=gnu89 + -std=gnu89 -pipe KBUILD_CPPFLAGS := -D__KERNEL__ KBUILD_AFLAGS_KERNEL := KBUILD_CFLAGS_KERNEL := @@ -404,6 +416,7 @@ KBUILD_AFLAGS_MODULE := -DMODULE KBUILD_CFLAGS_MODULE := -DMODULE KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds GCC_PLUGINS_CFLAGS := +LDFLAGS := CLANG_FLAGS := # Read KERNELRELEASE from include/config/kernel.release (if it exists) @@ -523,7 +536,9 @@ endif ifneq ($(GCC_TOOLCHAIN),) CLANG_FLAGS += --gcc-toolchain=$(GCC_TOOLCHAIN) endif +ifneq ($(LLVM_IAS),1) CLANG_FLAGS += -no-integrated-as +endif CLANG_FLAGS += -Werror=unknown-warning-option KBUILD_CFLAGS += $(CLANG_FLAGS) KBUILD_AFLAGS += $(CLANG_FLAGS) @@ -642,12 +657,14 @@ all: vmlinux # Make toolchain changes before including arch/$(SRCARCH)/Makefile to ensure # ar/cc/ld-* macros return correct values. ifdef CONFIG_LTO_CLANG +ifneq ($(ld-name),lld) # use GNU gold with LLVMgold for LTO linking, and LD for vmlinux_link LDFINAL_vmlinux := $(LD) LD := $(LDGOLD) LDFLAGS += -plugin LLVMgold.so LDFLAGS += -plugin-opt=-function-sections LDFLAGS += -plugin-opt=-data-sections +endif # use llvm-ar for building symbol tables from IR files, and llvm-dis instead # of objdump for processing symbol versions and exports LLVM_AR := llvm-ar @@ -740,7 +757,7 @@ endif ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += -Os else -KBUILD_CFLAGS += -O2 +KBUILD_CFLAGS += -O3 endif ifdef CONFIG_CC_WERROR @@ -915,9 +932,6 @@ KBUILD_CFLAGS += $(call cc-option,-fmerge-constants) # Make sure -fstack-check isn't enabled (like gentoo apparently did) KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,) -# conserve stack if available -KBUILD_CFLAGS += $(call cc-option,-fconserve-stack) - # disallow errors like 'EXPORT_GPL(foo);' with missing header KBUILD_CFLAGS += $(call cc-option,-Werror=implicit-int) @@ -1202,8 +1216,10 @@ ifdef CONFIG_LTO_CLANG ifneq ($(call clang-ifversion, -ge, 0500, y), y) @echo Cannot use CONFIG_LTO_CLANG: requires clang 5.0 or later >&2 && exit 1 endif - ifneq ($(call gold-ifversion, -ge, 112000000, y), y) - @echo Cannot use CONFIG_LTO_CLANG: requires GNU gold 1.12 or later >&2 && exit 1 + ifneq ($(ld-name),lld) + ifneq ($(call gold-ifversion, -ge, 112000000, y), y) + @echo Cannot use CONFIG_LTO_CLANG: requires GNU gold 1.12 or later >&2 && exit 1 + endif endif endif # Make sure compiler supports LTO flags diff --git a/arch/Kconfig b/arch/Kconfig index eab393fb6ddb..6fc0c37d2145 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -227,6 +227,9 @@ config ARCH_HAS_FORTIFY_SOURCE An architecture should select this when it can successfully build and run with CONFIG_FORTIFY_SOURCE. +config ARCH_HAS_SET_MEMORY + bool + config FORTIFY_COMPILE_CHECK depends on ARCH_HAS_FORTIFY_SOURCE bool diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index d3d18e77655f..fa06bb8dded9 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -55,7 +55,7 @@ config FRAME_POINTER config OLD_MCOUNT bool depends on FUNCTION_TRACER && FRAME_POINTER - default y + default n config DEBUG_USER bool "Verbose user fault messages" diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 27e2309978b0..c61f657b18b1 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -96,7 +96,7 @@ config CRYPTO_AES_ARM_CE tristate "Accelerated AES using ARMv8 Crypto Extensions" depends on KERNEL_MODE_NEON select CRYPTO_ALGAPI - select CRYPTO_ABLK_HELPER + select CRYPTO_SIMD help Use an implementation of AES in CBC, CTR and XTS modes that uses ARMv8 Crypto Extensions diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index d4ebf5679f1f..0b6e1040f9e7 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -106,6 +106,7 @@ static inline u32 arch_timer_get_cntkctl(void) static inline void arch_timer_set_cntkctl(u32 cntkctl) { asm volatile("mcr p15, 0, %0, c14, c1, 0" : : "r" (cntkctl)); + isb(); } #endif diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 8e349ce8230c..4c825169668c 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -16,6 +16,7 @@ #include #include #include +#include #define CACHE_COLOUR(vaddr) ((vaddr & (SHMLBA - 1)) >> PAGE_SHIFT) diff --git a/arch/arm/include/asm/set_memory.h b/arch/arm/include/asm/set_memory.h new file mode 100644 index 000000000000..5aa4315abe91 --- /dev/null +++ b/arch/arm/include/asm/set_memory.h @@ -0,0 +1,32 @@ +/* + * Copyright (C) 1999-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASMARM_SET_MEMORY_H +#define _ASMARM_SET_MEMORY_H + +#ifdef CONFIG_MMU +int set_memory_ro(unsigned long addr, int numpages); +int set_memory_rw(unsigned long addr, int numpages); +int set_memory_x(unsigned long addr, int numpages); +int set_memory_nx(unsigned long addr, int numpages); +#else +static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; } +static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; } +static inline int set_memory_x(unsigned long addr, int numpages) { return 0; } +static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } +#endif + +#ifdef CONFIG_STRICT_KERNEL_RWX +void set_kernel_text_rw(void); +void set_kernel_text_ro(void); +#else +static inline void set_kernel_text_rw(void) { } +static inline void set_kernel_text_ro(void) { } +#endif + +#endif diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 3da63a6a2ac6..66e5d8765601 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -92,8 +92,7 @@ void __init add_static_vm_early(struct static_vm *svm) void *vaddr; vm = &svm->vm; - if (!vm_area_check_early(vm)) - vm_area_add_early(vm); + vm_area_add_early(vm); vaddr = vm->addr; list_for_each_entry(curr_svm, &static_vmlist, list) { diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 21980d28c38d..bc000444aca2 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1462,21 +1462,12 @@ static void __init map_lowmem(void) phys_addr_t kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); #endif phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); - struct static_vm *svm; - phys_addr_t start; - phys_addr_t end; - unsigned long vaddr; - unsigned long pfn; - unsigned long length; - unsigned int type; - int nr = 0; /* Map all the lowmem memory banks. */ for_each_memblock(memory, reg) { + phys_addr_t start = reg->base; + phys_addr_t end = start + reg->size; struct map_desc map; - start = reg->base; - end = start + reg->size; - nr++; if (memblock_is_nomap(reg)) continue; @@ -1528,34 +1519,6 @@ static void __init map_lowmem(void) } } } - svm = early_alloc_aligned(sizeof(*svm) * nr, __alignof__(*svm)); - - for_each_memblock(memory, reg) { - struct vm_struct *vm; - - start = reg->base; - end = start + reg->size; - - if (end > arm_lowmem_limit) - end = arm_lowmem_limit; - if (start >= end) - break; - - vm = &svm->vm; - pfn = __phys_to_pfn(start); - vaddr = __phys_to_virt(start); - length = end - start; - type = MT_MEMORY_RW; - - vm->addr = (void *)(vaddr & PAGE_MASK); - vm->size = PAGE_ALIGN(length + (vaddr & ~PAGE_MASK)); - vm->phys_addr = __pfn_to_phys(pfn); - vm->flags = VM_LOWMEM; - vm->flags |= VM_ARM_MTYPE(type); - vm->caller = map_lowmem; - add_static_vm_early(svm++); - mark_vmalloc_reserved_area(vm->addr, vm->size); - } } #ifdef CONFIG_ARM_PV_FIXUP diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f50f0ce7d3f4..13c00fc1af18 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -9,6 +9,7 @@ config ARM64 select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE @@ -23,6 +24,7 @@ config ARM64 select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select ARCH_WANT_FRAME_POINTERS select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_HAS_SET_MEMORY select ARM_AMBA select ARM_ARCH_TIMER select HAVE_KERNEL_GZIP @@ -37,6 +39,7 @@ config ARM64 select CLONE_BACKWARDS select COMMON_CLK if !ARCH_QCOM select CPU_PM if (SUSPEND || CPU_IDLE) + select CRC32 select DCACHE_WORD_ACCESS select EDAC_SUPPORT select FRAME_POINTER diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 99d7f5127ef0..9f526cb676d2 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -10,7 +10,7 @@ # # Copyright (C) 1995-2001 by Russell King -LDFLAGS_vmlinux :=--no-undefined -X +LDFLAGS_vmlinux :=--no-undefined -X -z norelro CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) GZFLAGS :=-9 @@ -18,7 +18,7 @@ ifeq ($(CONFIG_RELOCATABLE), y) # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour # for relative relocs, since this leads to better Image compression # with the relocation offsets always being zero. -LDFLAGS_vmlinux += -pie -shared -Bsymbolic \ +LDFLAGS_vmlinux += -shared -Bsymbolic -z notext \ $(call ld-option, --no-apply-dynamic-relocs) endif @@ -100,15 +100,19 @@ ifeq ($(CONFIG_SHADOW_CALL_STACK), y) KBUILD_CFLAGS += -ffixed-x18 endif +KBUILD_CFLAGS += $(call cc-ifversion, -ge, 0500, -DCONFIG_ARCH_SUPPORTS_INT128) + ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian AS += -EB -LD += -EB +# Prefer the baremetal ELF build target, but not all toolchains include +LDFLAGS += -EB $(call ld-option, -maarch64elfb, -maarch64linuxb) UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian AS += -EL -LD += -EL +# Same as above, prefer ELF but fall back to linux target if needed. +LDFLAGS += -EL $(call ld-option, -maarch64elf, -maarch64linux) UTS_MACHINE := aarch64 endif diff --git a/arch/arm64/boot/dts/google/dsi-panel-s6e3ha8-dsc-wqhd-cmd.dtsi b/arch/arm64/boot/dts/google/dsi-panel-s6e3ha8-dsc-wqhd-cmd.dtsi index 3f2fa1c4a3d3..839b1807a65f 100644 --- a/arch/arm64/boot/dts/google/dsi-panel-s6e3ha8-dsc-wqhd-cmd.dtsi +++ b/arch/arm64/boot/dts/google/dsi-panel-s6e3ha8-dsc-wqhd-cmd.dtsi @@ -30,6 +30,7 @@ qcom,mdss-dsi-lane-2-state; qcom,mdss-dsi-lane-3-state; qcom,mdss-dsi-reset-sequence = <1 10>, <0 1>, <1 0>; + qcom,mdss-mdp-transfer-time-us = <7000>; qcom,mdss-dsi-bl-pmic-control-type = "bl_ctrl_dcs"; qcom,mdss-dsi-bl-vr-min-safe-level = <0xa>; @@ -53,11 +54,13 @@ /* qcom,esd-check-enabled; */ qcom,mdss-dsi-panel-status-check-mode = "te_signal_check"; - qcom,mdss-dsi-init-delay-us = <5000>; + qcom,mdss-dsi-init-delay-us = <2500>; qcom,mdss-pan-physical-width-dimension = <70>; qcom,mdss-pan-physical-height-dimension = <144>; + qcom,ulps-enabled; + qcom,mdss-dsi-display-timings { timing@0 { qcom,mdss-dsi-panel-framerate = <60>; @@ -74,6 +77,7 @@ qcom,mdss-dsi-h-right-border = <0>; qcom,mdss-dsi-v-top-border = <0>; qcom,mdss-dsi-v-bottom-border = <0>; + qcom,mdss-dsi-panel-jitter = <0x5 0x1>; qcom,mdss-dsi-on-command = [ 07 01 00 00 00 00 01 01 /* Compression Enable */ 05 01 00 00 78 00 01 11 /* sleep out, Wait 120 ms */ diff --git a/arch/arm64/boot/dts/google/dsi-panel-sw43408-dsc-fhd-cmd.dtsi b/arch/arm64/boot/dts/google/dsi-panel-sw43408-dsc-fhd-cmd.dtsi index ce4649dced93..a33a22c90409 100644 --- a/arch/arm64/boot/dts/google/dsi-panel-sw43408-dsc-fhd-cmd.dtsi +++ b/arch/arm64/boot/dts/google/dsi-panel-sw43408-dsc-fhd-cmd.dtsi @@ -61,6 +61,8 @@ qcom,mdss-dsi-panel-blackness-level = <42>; google,mdss-dsi-lp-brightness-compensation = <0>; + qcom,ulps-enabled; + qcom,mdss-dsi-display-timings { timing@0 { qcom,mdss-dsi-panel-framerate = <60>; diff --git a/arch/arm64/boot/dts/google/sdm845-b1-common.dtsi b/arch/arm64/boot/dts/google/sdm845-b1-common.dtsi index c5bca53cb6dc..3b64b6976c0c 100644 --- a/arch/arm64/boot/dts/google/sdm845-b1-common.dtsi +++ b/arch/arm64/boot/dts/google/sdm845-b1-common.dtsi @@ -20,6 +20,10 @@ qcom,dsi-display-active; }; +&mdss_dsi_phy0 { + qcom,panel-allow-phy-poweroff; +}; + &p9221 { fod = [a8 28 86 34 82 38 9a 12 9c 0b 84 65]; fod_epp = [a4 2a 86 36 84 3d 9a 14 9d 0b a2 fb]; diff --git a/arch/arm64/boot/dts/google/sdm845-c1-common.dtsi b/arch/arm64/boot/dts/google/sdm845-c1-common.dtsi index 4e217b060ae6..bbb6c34cce44 100644 --- a/arch/arm64/boot/dts/google/sdm845-c1-common.dtsi +++ b/arch/arm64/boot/dts/google/sdm845-c1-common.dtsi @@ -25,6 +25,10 @@ qcom,dsi-display-active; }; +&mdss_dsi_phy0 { + qcom,panel-allow-phy-poweroff; +}; + /* This overylay is required in order for acdbloader to load different files */ &soc { sound-tavil { diff --git a/arch/arm64/boot/dts/qcom/sdm845-gpu.dtsi b/arch/arm64/boot/dts/qcom/sdm845-gpu.dtsi index e8f85a92f9df..fa913e7255b8 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-gpu.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845-gpu.dtsi @@ -61,7 +61,7 @@ qcom,gpu-quirk-hfi-use-reg; qcom,gpu-quirk-secvid-set-once; - qcom,idle-timeout = <80>; //msecs + qcom,idle-timeout = <64>; //msecs qcom,no-nap; qcom,highest-bank-bit = <15>; diff --git a/arch/arm64/boot/dts/qcom/sdm845-v2.dtsi b/arch/arm64/boot/dts/qcom/sdm845-v2.dtsi index 418b48ed1ae9..43d55d2de585 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-v2.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845-v2.dtsi @@ -413,24 +413,24 @@ &energy_costs { CPU_COST_0: core-cost0 { busy-cost-data = < - 300000 21 - 403200 26 - 480000 31 - 576000 36 - 652800 41 - 748800 46 - 825600 53 - 902400 59 - 979200 64 - 1056000 70 - 1132800 79 - 1228800 89 - 1324800 99 - 1420800 108 - 1516800 118 - 1612800 134 - 1689600 143 - 1766400 160 + 300000 29 + 403200 32 + 480000 36 + 576000 46 + 652800 45 + 748800 55 + 825600 60 + 902400 61 + 979200 66 + 1056000 79 + 1132800 86 + 1228800 84 + 1324800 91 + 1420800 93 + 1516800 108 + 1612800 121 + 1689600 113 + 1766400 142 >; idle-cost-data = < 10 8 6 4 @@ -438,40 +438,30 @@ }; CPU_COST_1: core-cost1 { busy-cost-data = < - 300000 90 - 403200 113 - 480000 135 - 576000 157 - 652800 175 - 748800 198 - 825600 210 - 902400 233 - 979200 263 - 1056000 285 - 1132800 322 - 1209600 341 - 1286400 379 - 1363200 410 - 1459200 462 - 1536000 503 - 1612800 542 - 1689600 600 - 1766400 649 - 1843200 717 - 1920000 813 - 1996800 866 - 2092800 1025 - 2169600 1054 - 2246400 1130 - 2323200 1254 - 2400000 1356 - 2476800 1503 - 2553600 1745 - 2649600 2106 - 2745600 2200 - 2764800 2300 - 2784000 2350 - 2803200 2400 + 825600 137 + 902400 137 + 979200 155 + 1056000 158 + 1209600 223 + 1286400 236 + 1363200 224 + 1459200 199 + 1536000 266 + 1612800 354 + 1689600 318 + 1766400 249 + 1843200 266 + 1920000 480 + 1996800 504 + 2092800 409 + 2169600 345 + 2246400 537 + 2323200 404 + 2400000 550 + 2476800 829 + 2553600 884 + 2649600 1071 + 2803200 1237 >; idle-cost-data = < 25 20 15 10 @@ -504,42 +494,30 @@ }; CLUSTER_COST_1: cluster-cost1 { busy-cost-data = < - 300000 24 - 403200 24 - 480000 25 - 576000 25 - 652800 26 - 748800 27 - 825600 28 - 902400 29 - 979200 30 - 1056000 32 - 1132800 34 - 1209600 37 - 1286400 40 - 1363200 45 - 1459200 50 - 1536000 57 - 1612800 64 - 1689600 74 - 1766400 84 - 1843200 96 - 1920000 106 - 1996800 113 - 2092800 120 - 2169600 125 - 2246400 127 - 2323200 130 - 2400000 135 - 2476800 140 - 2553600 145 - 2649600 150 - 2745600 155 - 2764800 160 - 2784000 165 - 2803200 170 - 2841600 180 - 2956800 190 + 825600 24 + 902400 24 + 979200 25 + 1056000 25 + 1209600 26 + 1286400 27 + 1363200 28 + 1459200 29 + 1536000 30 + 1612800 32 + 1689600 34 + 1766400 37 + 1843200 40 + 1920000 45 + 1996800 50 + 2092800 57 + 2169600 64 + 2246400 74 + 2323200 84 + 2400000 96 + 2476800 106 + 2553600 113 + 2649600 120 + 2803200 125 >; idle-cost-data = < 4 3 2 1 @@ -557,7 +535,7 @@ &msm_gpu { /* Updated chip ID */ qcom,chipid = <0x06030001>; - qcom,initial-pwrlevel = <6>; + qcom,initial-pwrlevel = <7>; qcom,gpu-pwrlevels { #address-cells = <1>; @@ -623,6 +601,14 @@ qcom,gpu-pwrlevel@7 { reg = <7>; + qcom,gpu-freq = <180000000>; + qcom,bus-freq = <3>; + qcom,bus-min = <2>; + qcom,bus-max = <4>; + }; + + qcom,gpu-pwrlevel@8 { + reg = <8>; qcom,gpu-freq = <0>; qcom,bus-freq = <0>; qcom,bus-min = <0>; diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 212ffd9567a5..e03d7de1e220 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -56,7 +56,7 @@ compatible = "arm,armv8"; reg = <0x0 0x0>; enable-method = "psci"; - efficiency = <1024>; + efficiency = <611>; cache-size = <0x8000>; cpu-release-addr = <0x0 0x90000000>; qcom,lmh-dcvs = <&lmh_dcvs0>; @@ -93,7 +93,7 @@ compatible = "arm,armv8"; reg = <0x0 0x100>; enable-method = "psci"; - efficiency = <1024>; + efficiency = <611>; cache-size = <0x8000>; cpu-release-addr = <0x0 0x90000000>; qcom,lmh-dcvs = <&lmh_dcvs0>; @@ -124,7 +124,7 @@ compatible = "arm,armv8"; reg = <0x0 0x200>; enable-method = "psci"; - efficiency = <1024>; + efficiency = <611>; cache-size = <0x8000>; cpu-release-addr = <0x0 0x90000000>; qcom,lmh-dcvs = <&lmh_dcvs0>; @@ -155,7 +155,7 @@ compatible = "arm,armv8"; reg = <0x0 0x300>; enable-method = "psci"; - efficiency = <1024>; + efficiency = <611>; cache-size = <0x8000>; cpu-release-addr = <0x0 0x90000000>; qcom,lmh-dcvs = <&lmh_dcvs0>; @@ -186,7 +186,7 @@ compatible = "arm,armv8"; reg = <0x0 0x400>; enable-method = "psci"; - efficiency = <1740>; + efficiency = <1024>; cache-size = <0x20000>; cpu-release-addr = <0x0 0x90000000>; qcom,lmh-dcvs = <&lmh_dcvs1>; @@ -217,7 +217,7 @@ compatible = "arm,armv8"; reg = <0x0 0x500>; enable-method = "psci"; - efficiency = <1740>; + efficiency = <1024>; cache-size = <0x20000>; cpu-release-addr = <0x0 0x90000000>; qcom,lmh-dcvs = <&lmh_dcvs1>; @@ -248,7 +248,7 @@ compatible = "arm,armv8"; reg = <0x0 0x600>; enable-method = "psci"; - efficiency = <1740>; + efficiency = <1024>; cache-size = <0x20000>; cpu-release-addr = <0x0 0x90000000>; qcom,lmh-dcvs = <&lmh_dcvs1>; @@ -279,7 +279,7 @@ compatible = "arm,armv8"; reg = <0x0 0x700>; enable-method = "psci"; - efficiency = <1740>; + efficiency = <1024>; cache-size = <0x20000>; cpu-release-addr = <0x0 0x90000000>; qcom,lmh-dcvs = <&lmh_dcvs1>; @@ -477,7 +477,7 @@ }; chosen { - bootargs = "rcupdate.rcu_expedited=1"; + bootargs = "quiet rcupdate.rcu_normal_after_boot=1 noirqdebug"; }; soc: soc { }; @@ -506,7 +506,7 @@ removed_region: removed_region@85fc0000 { no-map; - reg = <0 0x85fc0000 0 0x2f40000>; + reg = <0 0x85fc0000 0 0x4b40000>; /* enlarge TA memory size from 34M to 62M on 2018/09/03 */ }; qseecom_mem: qseecom_region@0x8ab00000 { @@ -1409,7 +1409,7 @@ /* PM QoS */ qcom,pm-qos-cpu-groups = <0x0f 0xf0>; - qcom,pm-qos-cpu-group-latency-us = <70 70>; + qcom,pm-qos-cpu-group-latency-us = <100 100>; qcom,pm-qos-default-cpu = <0>; pinctrl-names = "dev-reset-assert", "dev-reset-deassert"; @@ -1853,6 +1853,7 @@ qcom,secure-context-bank; iommus = <&apps_smmu 0x140A 0x30>; dma-coherent; + status = "disabled"; }; qcom,msm_fastrpc_compute_cb11 { compatible = "qcom,msm-fastrpc-compute-cb"; @@ -2474,7 +2475,7 @@ qcom_seecom: qseecom@86d00000 { compatible = "qcom,qseecom"; - reg = <0x86d00000 0x2200000>; + reg = <0x86d00000 0x03E00000>; /* enlarge TA memory size from 34M to 62M on 2018/09/03 */ reg-names = "secapp-region"; qcom,hlos-num-ce-hw-instances = <1>; qcom,hlos-ce-hw-instance = <0>; diff --git a/arch/arm64/configs/b1c1_defconfig b/arch/arm64/configs/b1c1_defconfig index 832e964b2774..eafad049d1de 100644 --- a/arch/arm64/configs/b1c1_defconfig +++ b/arch/arm64/configs/b1c1_defconfig @@ -1,3 +1,5 @@ +CONFIG_LOCALVERSION="-📱cybertron-v9⚡" +# CONFIG_LOCALVERSION_AUTO is not set CONFIG_KERNEL_LZ4=y # CONFIG_FHANDLE is not set CONFIG_AUDIT=y @@ -41,13 +43,14 @@ CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_LZ4 is not set # CONFIG_SYSFS_SYSCALL is not set CONFIG_KALLSYMS_ALL=y +# CONFIG_PROC_KALLSYMS is not set CONFIG_BPF_SYSCALL=y CONFIG_EMBEDDED=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_JUMP_LABEL=y CONFIG_CC_STACKPROTECTOR_STRONG=y -CONFIG_LTO_CLANG=y +# CONFIG_LTO_CLANG is not set CONFIG_CFI_CLANG=y CONFIG_SHADOW_CALL_STACK=y CONFIG_ARCH_MMAP_RND_COMPAT_BITS=16 @@ -123,6 +126,28 @@ CONFIG_INET_ESP=y CONFIG_INET_IPCOMP=y CONFIG_INET_UDP_DIAG=y CONFIG_INET_DIAG_DESTROY=y +CONFIG_TCP_CONG_ADVANCED=y +# CONFIG_TCP_CONG_BIC is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_TCP_CONG_WESTWOOD=y +# CONFIG_TCP_CONG_HTCP is not set +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_NV is not set +# CONFIG_TCP_CONG_SCALABLE is not set +# CONFIG_TCP_CONG_LP is not set +# CONFIG_TCP_CONG_VENO is not set +# CONFIG_TCP_CONG_YEAH is not set +# CONFIG_TCP_CONG_ILLINOIS is not set +# CONFIG_TCP_CONG_DCTCP is not set +# CONFIG_TCP_CONG_CDG is not set +CONFIG_TCP_CONG_BBR=y +# CONFIG_DEFAULT_CUBIC is not set +# CONFIG_DEFAULT_WESTWOOD is not set +CONFIG_DEFAULT_BBR=y +# CONFIG_DEFAULT_RENO is not set +CONFIG_DEFAULT_TCP_CONG="bbr" CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y CONFIG_IPV6_OPTIMISTIC_DAD=y @@ -411,6 +436,7 @@ CONFIG_WATCHDOG=y CONFIG_SOFT_WATCHDOG=y CONFIG_MFD_I2C_PMIC=y CONFIG_MFD_SPMI_PMIC=y +CONFIG_MSM_CDC_PINCTRL=y CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_REGULATOR_PROXY_CONSUMER=y CONFIG_REGULATOR_QPNP_LABIBB=y @@ -433,7 +459,6 @@ CONFIG_CAMERA_FW_UPDATE=y CONFIG_QCOM_KGSL=y CONFIG_DRM=y CONFIG_DRM_SDE_RSC=y -CONFIG_FB_VIRTUAL=y CONFIG_BACKLIGHT_LCD_SUPPORT=y # CONFIG_LCD_CLASS_DEVICE is not set CONFIG_BACKLIGHT_CLASS_DEVICE=y @@ -518,7 +543,7 @@ CONFIG_ION=y CONFIG_ION_MSM=y CONFIG_S2MPB04=y CONFIG_GOOGLE_WLAN_MAC=y -CONFIG_QCA_CLD_WLAN=m +CONFIG_QCA_CLD_WLAN=y CONFIG_GSI=y CONFIG_IPA3=y CONFIG_RMNET_IPA3=y @@ -583,6 +608,7 @@ CONFIG_QCOM_COMMAND_DB=y CONFIG_MSM_CDSP_LOADER=y CONFIG_QCOM_SMCINVOKE=y CONFIG_MSM_EVENT_TIMER=y +CONFIG_MSM_AVTIMER=y CONFIG_MSM_PM=y CONFIG_QTI_RPM_STATS_LOG=y CONFIG_QCOM_FORCE_WDOG_BITE_ON_PANIC=y @@ -621,6 +647,7 @@ CONFIG_QFMT_V2=y CONFIG_FUSE_FS=y CONFIG_OVERLAY_FS=y CONFIG_VFAT_FS=y +CONFIG_EXFAT_FS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_ECRYPT_FS=y CONFIG_ECRYPT_FS_MESSAGING=y @@ -630,12 +657,13 @@ CONFIG_PSTORE_CONSOLE=y CONFIG_PSTORE_PMSG=y CONFIG_PSTORE_RAM=y CONFIG_BLDR_DEBUG_LOG=y +CONFIG_EROFS_FS=y # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y -CONFIG_DEBUG_INFO=y +# CONFIG_DEBUG_INFO is not set CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x0 CONFIG_DETECT_HUNG_TASK=y CONFIG_PANIC_TIMEOUT=1 @@ -681,3 +709,63 @@ CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y CONFIG_CRYPTO_CRC32_ARM64=y CONFIG_XZ_DEC=y CONFIG_QMI_ENCDEC=y +CONFIG_CIRRUS_SPKR_PROTECTION=y +CONFIG_MSM_ADSP_LOADER=y +CONFIG_MSM_QDSP6_APRV2_GLINK=y +CONFIG_MSM_QDSP6_NOTIFIER=y +CONFIG_MSM_QDSP6_PDR=y +CONFIG_MSM_QDSP6_SSR=y +CONFIG_MSM_QDSP6V2_CODECS=y +CONFIG_PINCTRL_WCD=y +CONFIG_QTI_PP=y +CONFIG_SND_HWDEP_ROUTING=y +CONFIG_SND_SOC_CS35L36=y +CONFIG_SND_SOC_CS35L36_I2S=y +CONFIG_SND_SOC_CS35L36_TDM=y +CONFIG_SND_SOC_MACHINE_SDM845=y +CONFIG_SND_SOC_MACHINE_SDM845_MAX98927=y +CONFIG_SND_SOC_MSM_HOSTLESS_PCM=y +CONFIG_SND_SOC_MSM_QDSP6V2_INTF=y +CONFIG_SND_SOC_MSM_STUB=y +CONFIG_SND_SOC_QDSP6V2=y +CONFIG_SND_SOC_SDM845=y +CONFIG_SND_SOC_WCD934X=y +CONFIG_SND_SOC_WCD9XXX_V2=y +CONFIG_SND_SOC_WCD_SPI=y +CONFIG_WCD9XXX_CODEC_CORE=y +CONFIG_WCD_DSP_GLINK=y +CONFIG_WCD_MICBIAS_USE_PULLUP=y +CONFIG_WCD_SPI_AC=y +CONFIG_VMSTAT_INTERVAL=20 + +# +# MISC +# + +# Klapse +CONFIG_KLAPSE=y + +# Sound Control +CONFIG_SOUND_CONTROL=y + +# Wake Gestures +CONFIG_WAKE_GESTURES=y +CONFIG_BOEFFLA_WL_BLOCKER=y + +CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y +CONFIG_ARM64_SW_TTBR0_PAN=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_CPU_SW_DOMAIN_PAN=y +CONFIG_DM_VERITY=y +CONFIG_FUSE_FS=y +CONFIG_INPUT_KEYCHORD=y +CONFIG_KSM=y +CONFIG_MD=y +CONFIG_QFMT_V2=y +CONFIG_QUOTACTL=y +CONFIG_QUOTA_TREE=y +CONFIG_RANDOMIZE_BASE=y +CONFIG_TASKSTATS=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_TASK_XACCT=y +CONFIG_UPROBE_EVENT=y diff --git a/arch/arm64/configs/bonito_defconfig b/arch/arm64/configs/bonito_defconfig index 523caa5341c5..41e96520401f 100644 --- a/arch/arm64/configs/bonito_defconfig +++ b/arch/arm64/configs/bonito_defconfig @@ -121,6 +121,28 @@ CONFIG_INET_ESP=y CONFIG_INET_IPCOMP=y CONFIG_INET_UDP_DIAG=y CONFIG_INET_DIAG_DESTROY=y +CONFIG_TCP_CONG_ADVANCED=y +# CONFIG_TCP_CONG_BIC is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_TCP_CONG_WESTWOOD=y +# CONFIG_TCP_CONG_HTCP is not set +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_NV is not set +# CONFIG_TCP_CONG_SCALABLE is not set +# CONFIG_TCP_CONG_LP is not set +# CONFIG_TCP_CONG_VENO is not set +# CONFIG_TCP_CONG_YEAH is not set +# CONFIG_TCP_CONG_ILLINOIS is not set +# CONFIG_TCP_CONG_DCTCP is not set +# CONFIG_TCP_CONG_CDG is not set +CONFIG_TCP_CONG_BBR=y +# CONFIG_DEFAULT_CUBIC is not set +# CONFIG_DEFAULT_WESTWOOD is not set +CONFIG_DEFAULT_BBR=y +# CONFIG_DEFAULT_RENO is not set +CONFIG_DEFAULT_TCP_CONG="bbr" CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y CONFIG_IPV6_OPTIMISTIC_DAD=y diff --git a/arch/arm64/crypto/.gitignore b/arch/arm64/crypto/.gitignore new file mode 100644 index 000000000000..879df8781ed5 --- /dev/null +++ b/arch/arm64/crypto/.gitignore @@ -0,0 +1,2 @@ +sha256-core.S +sha512-core.S diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 2cf32e9887e1..206fe67069fd 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -23,6 +23,16 @@ config CRYPTO_GHASH_ARM64_CE depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_HASH +config CRYPTO_CRCT10DIF_ARM64_CE + tristate "CRCT10DIF digest algorithm using PMULL instructions" + depends on KERNEL_MODE_NEON && CRC_T10DIF + select CRYPTO_HASH + +config CRYPTO_CRC32_ARM64_CE + tristate "CRC32 and CRC32C digest algorithms using PMULL instructions" + depends on KERNEL_MODE_NEON && CRC32 + select CRYPTO_HASH + config CRYPTO_AES_ARM64_CE tristate "AES core cipher using ARMv8 Crypto Extensions" depends on ARM64 && KERNEL_MODE_NEON @@ -40,14 +50,14 @@ config CRYPTO_AES_ARM64_CE_BLK depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_BLKCIPHER select CRYPTO_AES_ARM64_CE - select CRYPTO_ABLK_HELPER + select CRYPTO_SIMD config CRYPTO_AES_ARM64_NEON_BLK tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions" depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_BLKCIPHER select CRYPTO_AES - select CRYPTO_ABLK_HELPER + select CRYPTO_SIMD config CRYPTO_CRC32_ARM64 tristate "CRC32 and CRC32C using optional ARMv8 instructions" diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 550e02a5aa32..5879048f2287 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -17,6 +17,12 @@ sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o +obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM64_CE) += crct10dif-ce.o +crct10dif-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o + +obj-$(CONFIG_CRYPTO_CRC32_ARM64_CE) += crc32-ce.o +crc32-ce-y:= crc32-ce-core.o crc32-ce-glue.o + obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o aes-ce-cipher-y := aes-ce-cipher-glue.o aes-ce-cipher-core.o CFLAGS_aes-ce-cipher-core.o += -march=armv8-a+crypto -Wa,-march=armv8-a+crypto $(DISABLE_LTO) diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index f4bf2f2a014c..cc5515dac74a 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -11,9 +11,9 @@ #include #include #include -#include #include #include +#include #include #include "aes-ce-setkey.h" @@ -149,12 +149,7 @@ static int ccm_encrypt(struct aead_request *req) { struct crypto_aead *aead = crypto_aead_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead); - struct blkcipher_desc desc = { .info = req->iv }; - struct blkcipher_walk walk; - struct scatterlist srcbuf[2]; - struct scatterlist dstbuf[2]; - struct scatterlist *src; - struct scatterlist *dst; + struct skcipher_walk walk; u8 __aligned(8) mac[AES_BLOCK_SIZE]; u8 buf[AES_BLOCK_SIZE]; u32 len = req->cryptlen; @@ -172,27 +167,19 @@ static int ccm_encrypt(struct aead_request *req) /* preserve the original iv for the final round */ memcpy(buf, req->iv, AES_BLOCK_SIZE); - src = scatterwalk_ffwd(srcbuf, req->src, req->assoclen); - dst = src; - if (req->src != req->dst) - dst = scatterwalk_ffwd(dstbuf, req->dst, req->assoclen); - - blkcipher_walk_init(&walk, dst, src, len); - err = blkcipher_aead_walk_virt_block(&desc, &walk, aead, - AES_BLOCK_SIZE); + err = skcipher_walk_aead_encrypt(&walk, req, true); while (walk.nbytes) { u32 tail = walk.nbytes % AES_BLOCK_SIZE; - if (walk.nbytes == len) + if (walk.nbytes == walk.total) tail = 0; ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes - tail, ctx->key_enc, num_rounds(ctx), mac, walk.iv); - len -= walk.nbytes - tail; - err = blkcipher_walk_done(&desc, &walk, tail); + err = skcipher_walk_done(&walk, tail); } if (!err) ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); @@ -203,7 +190,7 @@ static int ccm_encrypt(struct aead_request *req) return err; /* copy authtag to end of dst */ - scatterwalk_map_and_copy(mac, dst, req->cryptlen, + scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen, crypto_aead_authsize(aead), 1); return 0; @@ -214,12 +201,7 @@ static int ccm_decrypt(struct aead_request *req) struct crypto_aead *aead = crypto_aead_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead); unsigned int authsize = crypto_aead_authsize(aead); - struct blkcipher_desc desc = { .info = req->iv }; - struct blkcipher_walk walk; - struct scatterlist srcbuf[2]; - struct scatterlist dstbuf[2]; - struct scatterlist *src; - struct scatterlist *dst; + struct skcipher_walk walk; u8 __aligned(8) mac[AES_BLOCK_SIZE]; u8 buf[AES_BLOCK_SIZE]; u32 len = req->cryptlen - authsize; @@ -237,27 +219,19 @@ static int ccm_decrypt(struct aead_request *req) /* preserve the original iv for the final round */ memcpy(buf, req->iv, AES_BLOCK_SIZE); - src = scatterwalk_ffwd(srcbuf, req->src, req->assoclen); - dst = src; - if (req->src != req->dst) - dst = scatterwalk_ffwd(dstbuf, req->dst, req->assoclen); - - blkcipher_walk_init(&walk, dst, src, len); - err = blkcipher_aead_walk_virt_block(&desc, &walk, aead, - AES_BLOCK_SIZE); + err = skcipher_walk_aead_decrypt(&walk, req, true); while (walk.nbytes) { u32 tail = walk.nbytes % AES_BLOCK_SIZE; - if (walk.nbytes == len) + if (walk.nbytes == walk.total) tail = 0; ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes - tail, ctx->key_enc, num_rounds(ctx), mac, walk.iv); - len -= walk.nbytes - tail; - err = blkcipher_walk_done(&desc, &walk, tail); + err = skcipher_walk_done(&walk, tail); } if (!err) ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); @@ -268,7 +242,8 @@ static int ccm_decrypt(struct aead_request *req) return err; /* compare calculated auth tag with the stored one */ - scatterwalk_map_and_copy(buf, src, req->cryptlen - authsize, + scatterwalk_map_and_copy(buf, req->src, + req->assoclen + req->cryptlen - authsize, authsize, 0); if (crypto_memneq(mac, buf, authsize)) @@ -287,6 +262,7 @@ static struct aead_alg ccm_aes_alg = { .cra_module = THIS_MODULE, }, .ivsize = AES_BLOCK_SIZE, + .chunksize = AES_BLOCK_SIZE, .maxauthsize = AES_BLOCK_SIZE, .setkey = ccm_setkey, .setauthsize = ccm_setauthsize, diff --git a/arch/arm64/crypto/aes-ce-cipher-core.c b/arch/arm64/crypto/aes-ce-cipher-core.c index 9f4191774b35..3ec77b767420 100644 --- a/arch/arm64/crypto/aes-ce-cipher-core.c +++ b/arch/arm64/crypto/aes-ce-cipher-core.c @@ -131,7 +131,7 @@ static u32 aes_sub(u32 input) __asm__("dup v1.4s, %w[in] ;" "movi v0.16b, #0 ;" "aese v0.16b, v1.16b ;" - "umov %w[out], v0.4s[0] ;" + "umov %w[out], v0.s[0] ;" : [out] "=r"(ret) : [in] "r"(input) diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 6b2aa0fd6cd0..4e3f8adb1793 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -11,8 +11,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -80,13 +80,19 @@ struct crypto_aes_xts_ctx { struct crypto_aes_ctx __aligned(8) key2; }; -static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, +static int skcipher_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + return aes_setkey(crypto_skcipher_tfm(tfm), in_key, key_len); +} + +static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { - struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int ret; - ret = xts_check_key(tfm, in_key, key_len); + ret = xts_verify_key(tfm, in_key, key_len); if (ret) return ret; @@ -97,111 +103,101 @@ static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, if (!ret) return 0; - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = 6 + ctx->key_length / 4; - struct blkcipher_walk walk; + struct skcipher_walk walk; unsigned int blocks; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, rounds, blocks, first); - err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = 6 + ctx->key_length / 4; - struct blkcipher_walk walk; + struct skcipher_walk walk; unsigned int blocks; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_dec, rounds, blocks, first); - err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = 6 + ctx->key_length / 4; - struct blkcipher_walk walk; + struct skcipher_walk walk; unsigned int blocks; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, rounds, blocks, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = 6 + ctx->key_length / 4; - struct blkcipher_walk walk; + struct skcipher_walk walk; unsigned int blocks; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_dec, rounds, blocks, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; } -static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_encrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = 6 + ctx->key_length / 4; - struct blkcipher_walk walk; + struct skcipher_walk walk; int blocks; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); + err = skcipher_walk_virt(&walk, req, true); first = 1; kernel_neon_begin(); @@ -209,17 +205,14 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, rounds, blocks, walk.iv, first); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); first = 0; - nbytes -= blocks * AES_BLOCK_SIZE; - if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE) - break; - err = blkcipher_walk_done(desc, &walk, - walk.nbytes % AES_BLOCK_SIZE); } - if (walk.nbytes % AES_BLOCK_SIZE) { - u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; - u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; + if (walk.nbytes) { u8 __aligned(8) tail[AES_BLOCK_SIZE]; + unsigned int nbytes = walk.nbytes; + u8 *tdst = walk.dst.virt.addr; + u8 *tsrc = walk.src.virt.addr; /* * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need @@ -230,227 +223,169 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds, blocks, walk.iv, first); memcpy(tdst, tail, nbytes); - err = blkcipher_walk_done(desc, &walk, 0); + err = skcipher_walk_done(&walk, 0); } kernel_neon_end(); return err; } -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = 6 + ctx->key1.key_length / 4; - struct blkcipher_walk walk; + struct skcipher_walk walk; unsigned int blocks; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key1.key_enc, rounds, blocks, (u8 *)ctx->key2.key_enc, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; } -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = 6 + ctx->key1.key_length / 4; - struct blkcipher_walk walk; + struct skcipher_walk walk; unsigned int blocks; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key1.key_dec, rounds, blocks, (u8 *)ctx->key2.key_enc, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; } -static struct crypto_alg aes_algs[] = { { - .cra_name = "__ecb-aes-" MODE, - .cra_driver_name = "__driver-ecb-aes-" MODE, - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = 0, - .setkey = aes_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, +static struct skcipher_alg aes_algs[] = { { + .base = { + .cra_name = "__ecb(aes)", + .cra_driver_name = "__ecb-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = skcipher_aes_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, }, { - .cra_name = "__cbc-aes-" MODE, - .cra_driver_name = "__driver-cbc-aes-" MODE, - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = aes_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, + .base = { + .cra_name = "__cbc(aes)", + .cra_driver_name = "__cbc-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = skcipher_aes_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, }, { - .cra_name = "__ctr-aes-" MODE, - .cra_driver_name = "__driver-ctr-aes-" MODE, - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = aes_setkey, - .encrypt = ctr_encrypt, - .decrypt = ctr_encrypt, + .base = { + .cra_name = "__ctr(aes)", + .cra_driver_name = "__ctr-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .chunksize = AES_BLOCK_SIZE, + .setkey = skcipher_aes_setkey, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, }, { - .cra_name = "__xts-aes-" MODE, - .cra_driver_name = "__driver-xts-aes-" MODE, - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_blkcipher = { - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = xts_set_key, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, + .base = { + .cra_name = "__xts(aes)", + .cra_driver_name = "__xts-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, }, -}, { - .cra_name = "ecb(aes)", - .cra_driver_name = "ecb-aes-" MODE, - .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = 0, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - } -}, { - .cra_name = "cbc(aes)", - .cra_driver_name = "cbc-aes-" MODE, - .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - } -}, { - .cra_name = "ctr(aes)", - .cra_driver_name = "ctr-aes-" MODE, - .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - } -}, { - .cra_name = "xts(aes)", - .cra_driver_name = "xts-aes-" MODE, - .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_ablkcipher = { - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - } + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_set_key, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, } }; -static int __init aes_init(void) +static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)]; + +static void aes_exit(void) { - return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs)); + int i; + + for (i = 0; i < ARRAY_SIZE(aes_simd_algs) && aes_simd_algs[i]; i++) + simd_skcipher_free(aes_simd_algs[i]); + + crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); } -static void __exit aes_exit(void) +static int __init aes_init(void) { - crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs)); + struct simd_skcipher_alg *simd; + const char *basename; + const char *algname; + const char *drvname; + int err; + int i; + + err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { + algname = aes_algs[i].base.cra_name + 2; + drvname = aes_algs[i].base.cra_driver_name + 2; + basename = aes_algs[i].base.cra_driver_name; + simd = simd_skcipher_create_compat(algname, drvname, basename); + err = PTR_ERR(simd); + if (IS_ERR(simd)) + goto unregister_simds; + + aes_simd_algs[i] = simd; + } + + return 0; + +unregister_simds: + aes_exit(); + return err; } #ifdef USE_V8_CRYPTO_EXTENSIONS diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index 838dad5c209f..067b4e222f74 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -297,17 +297,19 @@ AES_ENTRY(aes_ctr_encrypt) eor v1.16b, v1.16b, v3.16b st1 {v0.16b-v1.16b}, [x0], #32 #else - ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ - dup v7.4s, w8 + add w7, w6, #1 mov v0.16b, v4.16b - add v7.4s, v7.4s, v8.4s + add w8, w6, #2 mov v1.16b, v4.16b - rev32 v8.16b, v7.16b + add w9, w6, #3 mov v2.16b, v4.16b + rev w7, w7 mov v3.16b, v4.16b - mov v1.s[3], v8.s[0] - mov v2.s[3], v8.s[1] - mov v3.s[3], v8.s[2] + rev w8, w8 + mov v1.s[3], w7 + rev w9, w9 + mov v2.s[3], w8 + mov v3.s[3], w9 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */ do_encrypt_block4x eor v0.16b, v5.16b, v0.16b diff --git a/arch/arm64/crypto/crc32-ce-core.S b/arch/arm64/crypto/crc32-ce-core.S new file mode 100644 index 000000000000..18f5a8442276 --- /dev/null +++ b/arch/arm64/crypto/crc32-ce-core.S @@ -0,0 +1,266 @@ +/* + * Accelerated CRC32(C) using arm64 CRC, NEON and Crypto Extensions instructions + * + * Copyright (C) 2016 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/* + * Copyright 2012 Xyratex Technology Limited + * + * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32 + * calculation. + * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE) + * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found + * at: + * http://www.intel.com/products/processor/manuals/ + * Intel(R) 64 and IA-32 Architectures Software Developer's Manual + * Volume 2B: Instruction Set Reference, N-Z + * + * Authors: Gregory Prestas + * Alexander Boyko + */ + +#include +#include + + .text + .align 6 + .cpu generic+crypto+crc + +.Lcrc32_constants: + /* + * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4 + * #define CONSTANT_R1 0x154442bd4LL + * + * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596 + * #define CONSTANT_R2 0x1c6e41596LL + */ + .octa 0x00000001c6e415960000000154442bd4 + + /* + * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0 + * #define CONSTANT_R3 0x1751997d0LL + * + * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e + * #define CONSTANT_R4 0x0ccaa009eLL + */ + .octa 0x00000000ccaa009e00000001751997d0 + + /* + * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124 + * #define CONSTANT_R5 0x163cd6124LL + */ + .quad 0x0000000163cd6124 + .quad 0x00000000FFFFFFFF + + /* + * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL + * + * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` + * = 0x1F7011641LL + * #define CONSTANT_RU 0x1F7011641LL + */ + .octa 0x00000001F701164100000001DB710641 + +.Lcrc32c_constants: + .octa 0x000000009e4addf800000000740eef02 + .octa 0x000000014cd00bd600000000f20c0dfe + .quad 0x00000000dd45aab8 + .quad 0x00000000FFFFFFFF + .octa 0x00000000dea713f10000000105ec76f0 + + vCONSTANT .req v0 + dCONSTANT .req d0 + qCONSTANT .req q0 + + BUF .req x0 + LEN .req x1 + CRC .req x2 + + vzr .req v9 + + /** + * Calculate crc32 + * BUF - buffer + * LEN - sizeof buffer (multiple of 16 bytes), LEN should be > 63 + * CRC - initial crc32 + * return %eax crc32 + * uint crc32_pmull_le(unsigned char const *buffer, + * size_t len, uint crc32) + */ +ENTRY(crc32_pmull_le) + adr x3, .Lcrc32_constants + b 0f + +ENTRY(crc32c_pmull_le) + adr x3, .Lcrc32c_constants + +0: bic LEN, LEN, #15 + ld1 {v1.16b-v4.16b}, [BUF], #0x40 + movi vzr.16b, #0 + fmov dCONSTANT, CRC + eor v1.16b, v1.16b, vCONSTANT.16b + sub LEN, LEN, #0x40 + cmp LEN, #0x40 + b.lt less_64 + + ldr qCONSTANT, [x3] + +loop_64: /* 64 bytes Full cache line folding */ + sub LEN, LEN, #0x40 + + pmull2 v5.1q, v1.2d, vCONSTANT.2d + pmull2 v6.1q, v2.2d, vCONSTANT.2d + pmull2 v7.1q, v3.2d, vCONSTANT.2d + pmull2 v8.1q, v4.2d, vCONSTANT.2d + + pmull v1.1q, v1.1d, vCONSTANT.1d + pmull v2.1q, v2.1d, vCONSTANT.1d + pmull v3.1q, v3.1d, vCONSTANT.1d + pmull v4.1q, v4.1d, vCONSTANT.1d + + eor v1.16b, v1.16b, v5.16b + ld1 {v5.16b}, [BUF], #0x10 + eor v2.16b, v2.16b, v6.16b + ld1 {v6.16b}, [BUF], #0x10 + eor v3.16b, v3.16b, v7.16b + ld1 {v7.16b}, [BUF], #0x10 + eor v4.16b, v4.16b, v8.16b + ld1 {v8.16b}, [BUF], #0x10 + + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + eor v3.16b, v3.16b, v7.16b + eor v4.16b, v4.16b, v8.16b + + cmp LEN, #0x40 + b.ge loop_64 + +less_64: /* Folding cache line into 128bit */ + ldr qCONSTANT, [x3, #16] + + pmull2 v5.1q, v1.2d, vCONSTANT.2d + pmull v1.1q, v1.1d, vCONSTANT.1d + eor v1.16b, v1.16b, v5.16b + eor v1.16b, v1.16b, v2.16b + + pmull2 v5.1q, v1.2d, vCONSTANT.2d + pmull v1.1q, v1.1d, vCONSTANT.1d + eor v1.16b, v1.16b, v5.16b + eor v1.16b, v1.16b, v3.16b + + pmull2 v5.1q, v1.2d, vCONSTANT.2d + pmull v1.1q, v1.1d, vCONSTANT.1d + eor v1.16b, v1.16b, v5.16b + eor v1.16b, v1.16b, v4.16b + + cbz LEN, fold_64 + +loop_16: /* Folding rest buffer into 128bit */ + subs LEN, LEN, #0x10 + + ld1 {v2.16b}, [BUF], #0x10 + pmull2 v5.1q, v1.2d, vCONSTANT.2d + pmull v1.1q, v1.1d, vCONSTANT.1d + eor v1.16b, v1.16b, v5.16b + eor v1.16b, v1.16b, v2.16b + + b.ne loop_16 + +fold_64: + /* perform the last 64 bit fold, also adds 32 zeroes + * to the input stream */ + ext v2.16b, v1.16b, v1.16b, #8 + pmull2 v2.1q, v2.2d, vCONSTANT.2d + ext v1.16b, v1.16b, vzr.16b, #8 + eor v1.16b, v1.16b, v2.16b + + /* final 32-bit fold */ + ldr dCONSTANT, [x3, #32] + ldr d3, [x3, #40] + + ext v2.16b, v1.16b, vzr.16b, #4 + and v1.16b, v1.16b, v3.16b + pmull v1.1q, v1.1d, vCONSTANT.1d + eor v1.16b, v1.16b, v2.16b + + /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */ + ldr qCONSTANT, [x3, #48] + + and v2.16b, v1.16b, v3.16b + ext v2.16b, vzr.16b, v2.16b, #8 + pmull2 v2.1q, v2.2d, vCONSTANT.2d + and v2.16b, v2.16b, v3.16b + pmull v2.1q, v2.1d, vCONSTANT.1d + eor v1.16b, v1.16b, v2.16b + mov w0, v1.s[1] + + ret +ENDPROC(crc32_pmull_le) +ENDPROC(crc32c_pmull_le) + + .macro __crc32, c +0: subs x2, x2, #16 + b.mi 8f + ldp x3, x4, [x1], #16 +CPU_BE( rev x3, x3 ) +CPU_BE( rev x4, x4 ) + crc32\c\()x w0, w0, x3 + crc32\c\()x w0, w0, x4 + b.ne 0b + ret + +8: tbz x2, #3, 4f + ldr x3, [x1], #8 +CPU_BE( rev x3, x3 ) + crc32\c\()x w0, w0, x3 +4: tbz x2, #2, 2f + ldr w3, [x1], #4 +CPU_BE( rev w3, w3 ) + crc32\c\()w w0, w0, w3 +2: tbz x2, #1, 1f + ldrh w3, [x1], #2 +CPU_BE( rev16 w3, w3 ) + crc32\c\()h w0, w0, w3 +1: tbz x2, #0, 0f + ldrb w3, [x1] + crc32\c\()b w0, w0, w3 +0: ret + .endm + + .align 5 +ENTRY(crc32_armv8_le) + __crc32 +ENDPROC(crc32_armv8_le) + + .align 5 +ENTRY(crc32c_armv8_le) + __crc32 c +ENDPROC(crc32c_armv8_le) diff --git a/arch/arm64/crypto/crc32-ce-glue.c b/arch/arm64/crypto/crc32-ce-glue.c new file mode 100644 index 000000000000..8594127d5e01 --- /dev/null +++ b/arch/arm64/crypto/crc32-ce-glue.c @@ -0,0 +1,212 @@ +/* + * Accelerated CRC32(C) using arm64 NEON and Crypto Extensions instructions + * + * Copyright (C) 2016 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#define PMULL_MIN_LEN 64L /* minimum size of buffer + * for crc32_pmull_le_16 */ +#define SCALE_F 16L /* size of NEON register */ + +asmlinkage u32 crc32_pmull_le(const u8 buf[], u64 len, u32 init_crc); +asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], size_t len); + +asmlinkage u32 crc32c_pmull_le(const u8 buf[], u64 len, u32 init_crc); +asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], size_t len); + +static u32 (*fallback_crc32)(u32 init_crc, const u8 buf[], size_t len); +static u32 (*fallback_crc32c)(u32 init_crc, const u8 buf[], size_t len); + +static int crc32_pmull_cra_init(struct crypto_tfm *tfm) +{ + u32 *key = crypto_tfm_ctx(tfm); + + *key = 0; + return 0; +} + +static int crc32c_pmull_cra_init(struct crypto_tfm *tfm) +{ + u32 *key = crypto_tfm_ctx(tfm); + + *key = ~0; + return 0; +} + +static int crc32_pmull_setkey(struct crypto_shash *hash, const u8 *key, + unsigned int keylen) +{ + u32 *mctx = crypto_shash_ctx(hash); + + if (keylen != sizeof(u32)) { + crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + *mctx = le32_to_cpup((__le32 *)key); + return 0; +} + +static int crc32_pmull_init(struct shash_desc *desc) +{ + u32 *mctx = crypto_shash_ctx(desc->tfm); + u32 *crc = shash_desc_ctx(desc); + + *crc = *mctx; + return 0; +} + +static int crc32_pmull_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + u32 *crc = shash_desc_ctx(desc); + unsigned int l; + + if ((u64)data % SCALE_F) { + l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F)); + + *crc = fallback_crc32(*crc, data, l); + + data += l; + length -= l; + } + + if (length >= PMULL_MIN_LEN) { + l = round_down(length, SCALE_F); + + kernel_neon_begin_partial(10); + *crc = crc32_pmull_le(data, l, *crc); + kernel_neon_end(); + + data += l; + length -= l; + } + + if (length > 0) + *crc = fallback_crc32(*crc, data, length); + + return 0; +} + +static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + u32 *crc = shash_desc_ctx(desc); + unsigned int l; + + if ((u64)data % SCALE_F) { + l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F)); + + *crc = fallback_crc32c(*crc, data, l); + + data += l; + length -= l; + } + + if (length >= PMULL_MIN_LEN) { + l = round_down(length, SCALE_F); + + kernel_neon_begin_partial(10); + *crc = crc32c_pmull_le(data, l, *crc); + kernel_neon_end(); + + data += l; + length -= l; + } + + if (length > 0) { + *crc = fallback_crc32c(*crc, data, length); + } + + return 0; +} + +static int crc32_pmull_final(struct shash_desc *desc, u8 *out) +{ + u32 *crc = shash_desc_ctx(desc); + + put_unaligned_le32(*crc, out); + return 0; +} + +static int crc32c_pmull_final(struct shash_desc *desc, u8 *out) +{ + u32 *crc = shash_desc_ctx(desc); + + put_unaligned_le32(~*crc, out); + return 0; +} + +static struct shash_alg crc32_pmull_algs[] = { { + .setkey = crc32_pmull_setkey, + .init = crc32_pmull_init, + .update = crc32_pmull_update, + .final = crc32_pmull_final, + .descsize = sizeof(u32), + .digestsize = sizeof(u32), + + .base.cra_ctxsize = sizeof(u32), + .base.cra_init = crc32_pmull_cra_init, + .base.cra_name = "crc32", + .base.cra_driver_name = "crc32-arm64-ce", + .base.cra_priority = 200, + .base.cra_blocksize = 1, + .base.cra_module = THIS_MODULE, +}, { + .setkey = crc32_pmull_setkey, + .init = crc32_pmull_init, + .update = crc32c_pmull_update, + .final = crc32c_pmull_final, + .descsize = sizeof(u32), + .digestsize = sizeof(u32), + + .base.cra_ctxsize = sizeof(u32), + .base.cra_init = crc32c_pmull_cra_init, + .base.cra_name = "crc32c", + .base.cra_driver_name = "crc32c-arm64-ce", + .base.cra_priority = 200, + .base.cra_blocksize = 1, + .base.cra_module = THIS_MODULE, +} }; + +static int __init crc32_pmull_mod_init(void) +{ + if (elf_hwcap & HWCAP_CRC32) { + fallback_crc32 = crc32_armv8_le; + fallback_crc32c = crc32c_armv8_le; + } else { + fallback_crc32 = crc32_le; + fallback_crc32c = __crc32c_le; + } + + return crypto_register_shashes(crc32_pmull_algs, + ARRAY_SIZE(crc32_pmull_algs)); +} + +static void __exit crc32_pmull_mod_exit(void) +{ + crypto_unregister_shashes(crc32_pmull_algs, + ARRAY_SIZE(crc32_pmull_algs)); +} + +module_cpu_feature_match(PMULL, crc32_pmull_mod_init); +module_exit(crc32_pmull_mod_exit); + +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S new file mode 100644 index 000000000000..d5b5a8c038c8 --- /dev/null +++ b/arch/arm64/crypto/crct10dif-ce-core.S @@ -0,0 +1,392 @@ +// +// Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions +// +// Copyright (C) 2016 Linaro Ltd +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 2 as +// published by the Free Software Foundation. +// + +// +// Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions +// +// Copyright (c) 2013, Intel Corporation +// +// Authors: +// Erdinc Ozturk +// Vinodh Gopal +// James Guilford +// Tim Chen +// +// This software is available to you under a choice of one of two +// licenses. You may choose to be licensed under the terms of the GNU +// General Public License (GPL) Version 2, available from the file +// COPYING in the main directory of this source tree, or the +// OpenIB.org BSD license below: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the +// distribution. +// +// * Neither the name of the Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// +// THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Function API: +// UINT16 crc_t10dif_pcl( +// UINT16 init_crc, //initial CRC value, 16 bits +// const unsigned char *buf, //buffer pointer to calculate CRC on +// UINT64 len //buffer length in bytes (64-bit data) +// ); +// +// Reference paper titled "Fast CRC Computation for Generic +// Polynomials Using PCLMULQDQ Instruction" +// URL: http://www.intel.com/content/dam/www/public/us/en/documents +// /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf +// +// + +#include +#include + + .text + .cpu generic+crypto + + arg1_low32 .req w0 + arg2 .req x1 + arg3 .req x2 + + vzr .req v13 + +ENTRY(crc_t10dif_pmull) + movi vzr.16b, #0 // init zero register + + // adjust the 16-bit initial_crc value, scale it to 32 bits + lsl arg1_low32, arg1_low32, #16 + + // check if smaller than 256 + cmp arg3, #256 + + // for sizes less than 128, we can't fold 64B at a time... + b.lt _less_than_128 + + // load the initial crc value + // crc value does not need to be byte-reflected, but it needs + // to be moved to the high part of the register. + // because data will be byte-reflected and will align with + // initial crc at correct place. + movi v10.16b, #0 + mov v10.s[3], arg1_low32 // initial crc + + // receive the initial 64B data, xor the initial crc value + ldp q0, q1, [arg2] + ldp q2, q3, [arg2, #0x20] + ldp q4, q5, [arg2, #0x40] + ldp q6, q7, [arg2, #0x60] + add arg2, arg2, #0x80 + +CPU_LE( rev64 v0.16b, v0.16b ) +CPU_LE( rev64 v1.16b, v1.16b ) +CPU_LE( rev64 v2.16b, v2.16b ) +CPU_LE( rev64 v3.16b, v3.16b ) +CPU_LE( rev64 v4.16b, v4.16b ) +CPU_LE( rev64 v5.16b, v5.16b ) +CPU_LE( rev64 v6.16b, v6.16b ) +CPU_LE( rev64 v7.16b, v7.16b ) + +CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 ) +CPU_LE( ext v1.16b, v1.16b, v1.16b, #8 ) +CPU_LE( ext v2.16b, v2.16b, v2.16b, #8 ) +CPU_LE( ext v3.16b, v3.16b, v3.16b, #8 ) +CPU_LE( ext v4.16b, v4.16b, v4.16b, #8 ) +CPU_LE( ext v5.16b, v5.16b, v5.16b, #8 ) +CPU_LE( ext v6.16b, v6.16b, v6.16b, #8 ) +CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 ) + + // XOR the initial_crc value + eor v0.16b, v0.16b, v10.16b + + ldr q10, rk3 // xmm10 has rk3 and rk4 + // type of pmull instruction + // will determine which constant to use + + // + // we subtract 256 instead of 128 to save one instruction from the loop + // + sub arg3, arg3, #256 + + // at this section of the code, there is 64*x+y (0<=y<64) bytes of + // buffer. The _fold_64_B_loop will fold 64B at a time + // until we have 64+y Bytes of buffer + + + // fold 64B at a time. This section of the code folds 4 vector + // registers in parallel +_fold_64_B_loop: + + .macro fold64, reg1, reg2 + ldp q11, q12, [arg2], #0x20 + + pmull2 v8.1q, \reg1\().2d, v10.2d + pmull \reg1\().1q, \reg1\().1d, v10.1d + +CPU_LE( rev64 v11.16b, v11.16b ) +CPU_LE( rev64 v12.16b, v12.16b ) + + pmull2 v9.1q, \reg2\().2d, v10.2d + pmull \reg2\().1q, \reg2\().1d, v10.1d + +CPU_LE( ext v11.16b, v11.16b, v11.16b, #8 ) +CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 ) + + eor \reg1\().16b, \reg1\().16b, v8.16b + eor \reg2\().16b, \reg2\().16b, v9.16b + eor \reg1\().16b, \reg1\().16b, v11.16b + eor \reg2\().16b, \reg2\().16b, v12.16b + .endm + + fold64 v0, v1 + fold64 v2, v3 + fold64 v4, v5 + fold64 v6, v7 + + subs arg3, arg3, #128 + + // check if there is another 64B in the buffer to be able to fold + b.ge _fold_64_B_loop + + // at this point, the buffer pointer is pointing at the last y Bytes + // of the buffer the 64B of folded data is in 4 of the vector + // registers: v0, v1, v2, v3 + + // fold the 8 vector registers to 1 vector register with different + // constants + + ldr q10, rk9 + + .macro fold16, reg, rk + pmull v8.1q, \reg\().1d, v10.1d + pmull2 \reg\().1q, \reg\().2d, v10.2d + .ifnb \rk + ldr q10, \rk + .endif + eor v7.16b, v7.16b, v8.16b + eor v7.16b, v7.16b, \reg\().16b + .endm + + fold16 v0, rk11 + fold16 v1, rk13 + fold16 v2, rk15 + fold16 v3, rk17 + fold16 v4, rk19 + fold16 v5, rk1 + fold16 v6 + + // instead of 64, we add 48 to the loop counter to save 1 instruction + // from the loop instead of a cmp instruction, we use the negative + // flag with the jl instruction + adds arg3, arg3, #(128-16) + b.lt _final_reduction_for_128 + + // now we have 16+y bytes left to reduce. 16 Bytes is in register v7 + // and the rest is in memory. We can fold 16 bytes at a time if y>=16 + // continue folding 16B at a time + +_16B_reduction_loop: + pmull v8.1q, v7.1d, v10.1d + pmull2 v7.1q, v7.2d, v10.2d + eor v7.16b, v7.16b, v8.16b + + ldr q0, [arg2], #16 +CPU_LE( rev64 v0.16b, v0.16b ) +CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 ) + eor v7.16b, v7.16b, v0.16b + subs arg3, arg3, #16 + + // instead of a cmp instruction, we utilize the flags with the + // jge instruction equivalent of: cmp arg3, 16-16 + // check if there is any more 16B in the buffer to be able to fold + b.ge _16B_reduction_loop + + // now we have 16+z bytes left to reduce, where 0<= z < 16. + // first, we reduce the data in the xmm7 register + +_final_reduction_for_128: + // check if any more data to fold. If not, compute the CRC of + // the final 128 bits + adds arg3, arg3, #16 + b.eq _128_done + + // here we are getting data that is less than 16 bytes. + // since we know that there was data before the pointer, we can + // offset the input pointer before the actual point, to receive + // exactly 16 bytes. after that the registers need to be adjusted. +_get_last_two_regs: + add arg2, arg2, arg3 + ldr q1, [arg2, #-16] +CPU_LE( rev64 v1.16b, v1.16b ) +CPU_LE( ext v1.16b, v1.16b, v1.16b, #8 ) + + // get rid of the extra data that was loaded before + // load the shift constant + adr x4, tbl_shf_table + 16 + sub x4, x4, arg3 + ld1 {v0.16b}, [x4] + + // shift v2 to the left by arg3 bytes + tbl v2.16b, {v7.16b}, v0.16b + + // shift v7 to the right by 16-arg3 bytes + movi v9.16b, #0x80 + eor v0.16b, v0.16b, v9.16b + tbl v7.16b, {v7.16b}, v0.16b + + // blend + sshr v0.16b, v0.16b, #7 // convert to 8-bit mask + bsl v0.16b, v2.16b, v1.16b + + // fold 16 Bytes + pmull v8.1q, v7.1d, v10.1d + pmull2 v7.1q, v7.2d, v10.2d + eor v7.16b, v7.16b, v8.16b + eor v7.16b, v7.16b, v0.16b + +_128_done: + // compute crc of a 128-bit value + ldr q10, rk5 // rk5 and rk6 in xmm10 + + // 64b fold + ext v0.16b, vzr.16b, v7.16b, #8 + mov v7.d[0], v7.d[1] + pmull v7.1q, v7.1d, v10.1d + eor v7.16b, v7.16b, v0.16b + + // 32b fold + ext v0.16b, v7.16b, vzr.16b, #4 + mov v7.s[3], vzr.s[0] + pmull2 v0.1q, v0.2d, v10.2d + eor v7.16b, v7.16b, v0.16b + + // barrett reduction +_barrett: + ldr q10, rk7 + mov v0.d[0], v7.d[1] + + pmull v0.1q, v0.1d, v10.1d + ext v0.16b, vzr.16b, v0.16b, #12 + pmull2 v0.1q, v0.2d, v10.2d + ext v0.16b, vzr.16b, v0.16b, #12 + eor v7.16b, v7.16b, v0.16b + mov w0, v7.s[1] + +_cleanup: + // scale the result back to 16 bits + lsr x0, x0, #16 + ret + +_less_than_128: + cbz arg3, _cleanup + + movi v0.16b, #0 + mov v0.s[3], arg1_low32 // get the initial crc value + + ldr q7, [arg2], #0x10 +CPU_LE( rev64 v7.16b, v7.16b ) +CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 ) + eor v7.16b, v7.16b, v0.16b // xor the initial crc value + + cmp arg3, #16 + b.eq _128_done // exactly 16 left + b.lt _less_than_16_left + + ldr q10, rk1 // rk1 and rk2 in xmm10 + + // update the counter. subtract 32 instead of 16 to save one + // instruction from the loop + subs arg3, arg3, #32 + b.ge _16B_reduction_loop + + add arg3, arg3, #16 + b _get_last_two_regs + +_less_than_16_left: + // shl r9, 4 + adr x0, tbl_shf_table + 16 + sub x0, x0, arg3 + ld1 {v0.16b}, [x0] + movi v9.16b, #0x80 + eor v0.16b, v0.16b, v9.16b + tbl v7.16b, {v7.16b}, v0.16b + b _128_done +ENDPROC(crc_t10dif_pmull) + +// precomputed constants +// these constants are precomputed from the poly: +// 0x8bb70000 (0x8bb7 scaled to 32 bits) + .align 4 +// Q = 0x18BB70000 +// rk1 = 2^(32*3) mod Q << 32 +// rk2 = 2^(32*5) mod Q << 32 +// rk3 = 2^(32*15) mod Q << 32 +// rk4 = 2^(32*17) mod Q << 32 +// rk5 = 2^(32*3) mod Q << 32 +// rk6 = 2^(32*2) mod Q << 32 +// rk7 = floor(2^64/Q) +// rk8 = Q + +rk1: .octa 0x06df0000000000002d56000000000000 +rk3: .octa 0x7cf50000000000009d9d000000000000 +rk5: .octa 0x13680000000000002d56000000000000 +rk7: .octa 0x000000018bb7000000000001f65a57f8 +rk9: .octa 0xbfd6000000000000ceae000000000000 +rk11: .octa 0x713c0000000000001e16000000000000 +rk13: .octa 0x80a6000000000000f7f9000000000000 +rk15: .octa 0xe658000000000000044c000000000000 +rk17: .octa 0xa497000000000000ad18000000000000 +rk19: .octa 0xe7b50000000000006ee3000000000000 + +tbl_shf_table: +// use these values for shift constants for the tbl/tbx instruction +// different alignments result in values as shown: +// DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1 +// DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2 +// DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3 +// DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4 +// DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5 +// DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6 +// DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7 +// DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8 +// DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9 +// DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10 +// DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11 +// DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12 +// DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13 +// DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14 +// DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15 + + .byte 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87 + .byte 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f + .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 + .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe , 0x0 diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c new file mode 100644 index 000000000000..60cb590c2590 --- /dev/null +++ b/arch/arm64/crypto/crct10dif-ce-glue.c @@ -0,0 +1,95 @@ +/* + * Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions + * + * Copyright (C) 2016 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#define CRC_T10DIF_PMULL_CHUNK_SIZE 16U + +asmlinkage u16 crc_t10dif_pmull(u16 init_crc, const u8 buf[], u64 len); + +static int crct10dif_init(struct shash_desc *desc) +{ + u16 *crc = shash_desc_ctx(desc); + + *crc = 0; + return 0; +} + +static int crct10dif_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + u16 *crc = shash_desc_ctx(desc); + unsigned int l; + + if (unlikely((u64)data % CRC_T10DIF_PMULL_CHUNK_SIZE)) { + l = min_t(u32, length, CRC_T10DIF_PMULL_CHUNK_SIZE - + ((u64)data % CRC_T10DIF_PMULL_CHUNK_SIZE)); + + *crc = crc_t10dif_generic(*crc, data, l); + + length -= l; + data += l; + } + + if (length > 0) { + kernel_neon_begin_partial(14); + *crc = crc_t10dif_pmull(*crc, data, length); + kernel_neon_end(); + } + + return 0; +} + +static int crct10dif_final(struct shash_desc *desc, u8 *out) +{ + u16 *crc = shash_desc_ctx(desc); + + *(u16 *)out = *crc; + return 0; +} + +static struct shash_alg crc_t10dif_alg = { + .digestsize = CRC_T10DIF_DIGEST_SIZE, + .init = crct10dif_init, + .update = crct10dif_update, + .final = crct10dif_final, + .descsize = CRC_T10DIF_DIGEST_SIZE, + + .base.cra_name = "crct10dif", + .base.cra_driver_name = "crct10dif-arm64-ce", + .base.cra_priority = 200, + .base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, +}; + +static int __init crc_t10dif_mod_init(void) +{ + return crypto_register_shash(&crc_t10dif_alg); +} + +static void __exit crc_t10dif_mod_exit(void) +{ + crypto_unregister_shash(&crc_t10dif_alg); +} + +module_cpu_feature_match(PMULL, crc_t10dif_mod_init); +module_exit(crc_t10dif_mod_exit); + +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 28196b18e394..66e779f16c37 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -31,6 +31,7 @@ generic-y += rwsem.h generic-y += segment.h generic-y += sembuf.h generic-y += serial.h +generic-y += set_memory.h generic-y += shmbuf.h generic-y += simd.h generic-y += sizes.h @@ -45,4 +46,3 @@ generic-y += types.h generic-y += unaligned.h generic-y += user.h generic-y += vga.h -generic-y += xor.h diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 1b0d7e994e0c..40cb1842eb93 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -129,6 +129,7 @@ static inline u32 arch_timer_get_cntkctl(void) static inline void arch_timer_set_cntkctl(u32 cntkctl) { write_sysreg(cntkctl, cntkctl_el1); + isb(); } static inline u64 arch_counter_get_cntpct(void) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 7d5bc694c43c..a053e07e59b8 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -386,21 +386,33 @@ alternative_endif * size: size of the region * Corrupts: kaddr, size, tmp1, tmp2 */ + .macro __dcache_op_workaround_clean_cache, op, kaddr +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE + dc \op, \kaddr +alternative_else + dc civac, \kaddr +alternative_endif + .endm + .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 dcache_line_size \tmp1, \tmp2 add \size, \kaddr, \size sub \tmp2, \tmp1, #1 bic \kaddr, \kaddr, \tmp2 9998: - .if (\op == cvau || \op == cvac) -alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE - dc \op, \kaddr -alternative_else - dc civac, \kaddr -alternative_endif + .ifc \op, cvau + __dcache_op_workaround_clean_cache \op, \kaddr + .else + .ifc \op, cvac + __dcache_op_workaround_clean_cache \op, \kaddr + .else + .ifc \op, cvap + sys 3, c7, c12, 1, \kaddr // dc cvap .else dc \op, \kaddr .endif + .endif + .endif add \kaddr, \kaddr, \tmp1 cmp \kaddr, \size b.lo 9998b diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 9377bec034fa..11ba1a570394 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -20,6 +20,7 @@ #define __ASM_CACHEFLUSH_H #include +#include /* * This flag is used to indicate that the page pointed to by a pte is clean diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index d050d720a1b4..71b1648c2d10 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -95,14 +95,7 @@ extern void secondary_entry(void); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask); -#else -static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask) -{ - BUILD_BUG(); -} -#endif extern int __cpu_disable(void); diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index f37a49c3fbc5..7222200c70e2 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -40,6 +40,7 @@ typedef unsigned long mm_segment_t; */ struct thread_info { unsigned long flags; /* low level flags */ + unsigned long padding[7]; mm_segment_t addr_limit; /* address limit */ #ifdef CONFIG_ARM64_SW_TTBR0_PAN u64 ttbr0; /* saved TTBR0_EL1 */ diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h new file mode 100644 index 000000000000..856386ad076c --- /dev/null +++ b/arch/arm64/include/asm/xor.h @@ -0,0 +1,73 @@ +/* + * arch/arm64/include/asm/xor.h + * + * Authors: Jackie Liu + * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#ifdef CONFIG_KERNEL_MODE_NEON + +extern struct xor_block_template const xor_block_inner_neon; + +static void +xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + kernel_neon_begin(); + xor_block_inner_neon.do_2(bytes, p1, p2); + kernel_neon_end(); +} + +static void +xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + kernel_neon_begin(); + xor_block_inner_neon.do_3(bytes, p1, p2, p3); + kernel_neon_end(); +} + +static void +xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + kernel_neon_begin(); + xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4); + kernel_neon_end(); +} + +static void +xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + kernel_neon_begin(); + xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5); + kernel_neon_end(); +} + +static struct xor_block_template xor_block_arm64 = { + .name = "arm64_neon", + .do_2 = xor_neon_2, + .do_3 = xor_neon_3, + .do_4 = xor_neon_4, + .do_5 = xor_neon_5 +}; +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ + do { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_32regs); \ + if (cpu_has_neon()) { \ + xor_speed(&xor_block_arm64);\ + } \ + } while (0) + +#endif /* ! CONFIG_KERNEL_MODE_NEON */ diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 3101d1b61713..ed09ffbec9fc 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -61,7 +61,7 @@ NOKPROBE_SYMBOL(mdscr_read); * Allow root to disable self-hosted debug from userspace. * This is useful if you want to connect an external JTAG debugger. */ -static bool debug_enabled = true; +static bool debug_enabled; static int create_debug_debugfs_entry(void) { diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 464fcccb23bf..7d5ba532c1c5 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -133,7 +133,7 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size) /* Save the mpidr of the cpu we called cpu_suspend() on... */ if (sleep_cpu < 0) { - pr_err("Failing to hibernate on an unkown CPU.\n"); + pr_err("Failing to hibernate on an unknown CPU.\n"); return -ENODEV; } hdr->sleep_cpu_mpidr = cpu_logical_map(sleep_cpu); @@ -549,7 +549,7 @@ int swsusp_arch_resume(void) int hibernate_resume_nonboot_cpu_disable(void) { if (sleep_cpu < 0) { - pr_err("Failing to resume from hibernate on an unkown CPU.\n"); + pr_err("Failing to resume from hibernate on an unknown CPU.\n"); return -ENODEV; } diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index e6f2efd67513..ae0860974e3c 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -1233,3 +1234,32 @@ static struct platform_driver armv8_pmu_driver = { }; builtin_platform_driver(armv8_pmu_driver); + +void arch_perf_update_userpage(struct perf_event *event, + struct perf_event_mmap_page *userpg, u64 now) +{ + u32 freq; + u32 shift; + + /* + * Internal timekeeping for enabled/running/stopped times + * is always computed with the sched_clock. + */ + freq = arch_timer_get_rate(); + userpg->cap_user_time = 1; + + clocks_calc_mult_shift(&userpg->time_mult, &shift, freq, + NSEC_PER_SEC, 0); + /* + * time_shift is not expected to be greater than 31 due to + * the original published conversion algorithm shifting a + * 32-bit value (now specifies a 64-bit value) - refer + * perf_event_mmap_page documentation in perf_event.h. + */ + if (shift == 32) { + shift = 31; + userpg->time_mult >>= 1; + } + userpg->time_shift = (u16)shift; + userpg->time_offset = -now; +} diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5818630395a7..695200060d95 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -272,7 +272,7 @@ asmlinkage notrace void secondary_start_kernel(void) * the CPU migration code to notice that the CPU is online * before we continue. */ - pr_info("CPU%u: Booted secondary processor [%08x]\n", + pr_debug("CPU%u: Booted secondary processor [%08x]\n", cpu, read_cpuid_id()); update_cpu_boot_status(CPU_BOOT_SUCCESS); set_cpu_online(cpu, true); @@ -818,12 +818,10 @@ void arch_send_call_function_single_ipi(int cpu) smp_cross_call_common(cpumask_of(cpu), IPI_CALL_FUNC); } -#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL void arch_send_wakeup_ipi_mask(const struct cpumask *mask) { smp_cross_call_common(mask, IPI_WAKEUP); } -#endif #ifdef CONFIG_IRQ_WORK void arch_irq_work_raise(void) @@ -974,13 +972,8 @@ void handle_IPI(int ipinr, struct pt_regs *regs) break; #endif -#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL case IPI_WAKEUP: - WARN_ONCE(!acpi_parking_protocol_valid(cpu), - "CPU%u: Wake-up IPI outside the ACPI parking protocol\n", - cpu); break; -#endif case IPI_CPU_BACKTRACE: ipi_cpu_backtrace(cpu, regs); diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index e3bc87826c18..91e556883d35 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -372,7 +372,7 @@ static void update_cpu_power(unsigned int cpu) set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity); - pr_info("CPU%u: update cpu_power %lu\n", + pr_debug("CPU%u: update cpu_power %lu\n", cpu, arch_scale_freq_power(NULL, cpu)); } @@ -444,7 +444,7 @@ static void update_cpu_capacity(unsigned int cpu) set_capacity_scale(cpu, capacity); - pr_info("CPU%d: update cpu_capacity %lu\n", + pr_debug("CPU%d: update cpu_capacity %lu\n", cpu, arch_scale_cpu_capacity(NULL, cpu)); } diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 55c9cbba1599..550b690daf96 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -99,7 +99,7 @@ VDSO_AFLAGS += -D__ASSEMBLY__ VDSO_LDFLAGS := $(VDSO_CPPFLAGS) # From arm vDSO Makefile VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 -VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 +VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096,-zexecstack VDSO_LDFLAGS += -nostdlib -shared -mfloat-abi=soft VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--hash-style=sysv) VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--build-id) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index c1947a3067e8..010a2ca70b58 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -122,7 +122,8 @@ SECTIONS *(.discard) *(.discard.*) *(.interp .dynamic) - *(.dynsym .dynstr .hash) + *(.dynsym .dynstr .hash .gnu.hash) + *(.eh_frame) } . = KIMAGE_VADDR + TEXT_OFFSET; @@ -193,12 +194,12 @@ SECTIONS .altinstr_replacement : { KEEP(*(.altinstr_replacement)) } - .rela : ALIGN(8) { + .rela.dyn : ALIGN(8) { *(.rela .rela*) } - __rela_offset = ABSOLUTE(ADDR(.rela) - KIMAGE_VADDR); - __rela_size = SIZEOF(.rela); + __rela_offset = ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR); + __rela_size = SIZEOF(.rela.dyn); . = ALIGN(SEGMENT_ALIGN); __init_end = .; diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 7d3ccc4c184e..d5da03d4c954 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -2,7 +2,13 @@ lib-y := bitops.o clear_user.o delay.o copy_from_user.o \ copy_to_user.o copy_in_user.o copy_page.o \ clear_page.o memchr.o memcpy.o memmove.o memset.o \ memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \ - strchr.o strrchr.o + strchr.o strrchr.o tishift.o + +ifeq ($(CONFIG_KERNEL_MODE_NEON), y) +obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o +CFLAGS_REMOVE_xor-neon.o += -mgeneral-regs-only +CFLAGS_xor-neon.o += -ffreestanding +endif # Tell the compiler to treat all general purpose registers (with the # exception of the IP registers, which are already handled by the caller @@ -16,3 +22,5 @@ CFLAGS_atomic_ll_sc.o := -ffixed-x1 -ffixed-x2 \ -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \ -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \ -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15 + +obj-$(CONFIG_CRC32) += crc32.o diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S new file mode 100644 index 000000000000..facb28b6e361 --- /dev/null +++ b/arch/arm64/lib/crc32.S @@ -0,0 +1,98 @@ +/* + * Accelerated CRC32(C) using AArch64 CRC instructions + * + * Copyright (C) 2016 - 2018 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + + .cpu generic+crc + + .macro __crc32, c + cmp x2, #16 + b.lt 8f // less than 16 bytes + + and x7, x2, #0x1f + and x2, x2, #~0x1f + cbz x7, 32f // multiple of 32 bytes + + and x8, x7, #0xf + ldp x3, x4, [x1] + add x8, x8, x1 + add x1, x1, x7 + ldp x5, x6, [x8] +CPU_BE( rev x3, x3 ) +CPU_BE( rev x4, x4 ) +CPU_BE( rev x5, x5 ) +CPU_BE( rev x6, x6 ) + + tst x7, #8 + crc32\c\()x w8, w0, x3 + csel x3, x3, x4, eq + csel w0, w0, w8, eq + tst x7, #4 + lsr x4, x3, #32 + crc32\c\()w w8, w0, w3 + csel x3, x3, x4, eq + csel w0, w0, w8, eq + tst x7, #2 + lsr w4, w3, #16 + crc32\c\()h w8, w0, w3 + csel w3, w3, w4, eq + csel w0, w0, w8, eq + tst x7, #1 + crc32\c\()b w8, w0, w3 + csel w0, w0, w8, eq + tst x7, #16 + crc32\c\()x w8, w0, x5 + crc32\c\()x w8, w8, x6 + csel w0, w0, w8, eq + cbz x2, 0f + +32: ldp x3, x4, [x1], #32 + sub x2, x2, #32 + ldp x5, x6, [x1, #-16] +CPU_BE( rev x3, x3 ) +CPU_BE( rev x4, x4 ) +CPU_BE( rev x5, x5 ) +CPU_BE( rev x6, x6 ) + crc32\c\()x w0, w0, x3 + crc32\c\()x w0, w0, x4 + crc32\c\()x w0, w0, x5 + crc32\c\()x w0, w0, x6 + cbnz x2, 32b +0: ret + +8: tbz x2, #3, 4f + ldr x3, [x1], #8 +CPU_BE( rev x3, x3 ) + crc32\c\()x w0, w0, x3 +4: tbz x2, #2, 2f + ldr w3, [x1], #4 +CPU_BE( rev w3, w3 ) + crc32\c\()w w0, w0, w3 +2: tbz x2, #1, 1f + ldrh w3, [x1], #2 +CPU_BE( rev16 w3, w3 ) + crc32\c\()h w0, w0, w3 +1: tbz x2, #0, 0f + ldrb w3, [x1] + crc32\c\()b w0, w0, w3 +0: ret + .endm + + .align 5 +ENTRY(crc32_le) + __crc32 +ENDPROC(crc32_le) + + .align 5 +ENTRY(__crc32c_le) + __crc32 c +ENDPROC(__crc32c_le) diff --git a/arch/arm64/lib/delay.c b/arch/arm64/lib/delay.c index dad4ec9bbfd1..e48ac402e7be 100644 --- a/arch/arm64/lib/delay.c +++ b/arch/arm64/lib/delay.c @@ -24,10 +24,28 @@ #include #include +#include + +#define USECS_TO_CYCLES(time_usecs) \ + xloops_to_cycles((time_usecs) * 0x10C7UL) + +static inline unsigned long xloops_to_cycles(unsigned long xloops) +{ + return (xloops * loops_per_jiffy * HZ) >> 32; +} + void __delay(unsigned long cycles) { cycles_t start = get_cycles(); + if (arch_timer_evtstrm_available()) { + const cycles_t timer_evt_period = + USECS_TO_CYCLES(ARCH_TIMER_EVT_STREAM_PERIOD_US); + + while ((get_cycles() - start + timer_evt_period) < cycles) + wfe(); + } + while ((get_cycles() - start) < cycles) cpu_relax(); } @@ -35,10 +53,7 @@ EXPORT_SYMBOL(__delay); inline void __const_udelay(unsigned long xloops) { - unsigned long loops; - - loops = xloops * loops_per_jiffy * HZ; - __delay(loops >> 32); + __delay(xloops_to_cycles(xloops)); } EXPORT_SYMBOL(__const_udelay); diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index 67613937711f..dfedd4ab1a76 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -68,9 +68,8 @@ stp \ptr, \regB, [\regC], \val .endm - .weak memcpy ENTRY(__memcpy) -ENTRY(memcpy) +WEAK(memcpy) #include "copy_template.S" ret ENDPIPROC(memcpy) diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S index a5a4459013b1..e3de8f05c21a 100644 --- a/arch/arm64/lib/memmove.S +++ b/arch/arm64/lib/memmove.S @@ -57,9 +57,8 @@ C_h .req x12 D_l .req x13 D_h .req x14 - .weak memmove ENTRY(__memmove) -ENTRY(memmove) +WEAK(memmove) cmp dstin, src b.lo __memcpy add tmp1, src, count diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index f2670a9f218c..316263c47c00 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -54,9 +54,8 @@ dst .req x8 tmp3w .req w9 tmp3 .req x9 - .weak memset ENTRY(__memset) -ENTRY(memset) +WEAK(memset) mov dst, dstin /* Preserve return value. */ and A_lw, val, #255 orr A_lw, A_lw, A_lw, lsl #8 diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S new file mode 100644 index 000000000000..d3db9b2cd479 --- /dev/null +++ b/arch/arm64/lib/tishift.S @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2017 Jason A. Donenfeld . All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +ENTRY(__ashlti3) + cbz x2, 1f + mov x3, #64 + sub x3, x3, x2 + cmp x3, #0 + b.le 2f + lsl x1, x1, x2 + lsr x3, x0, x3 + lsl x2, x0, x2 + orr x1, x1, x3 + mov x0, x2 +1: + ret +2: + neg w1, w3 + mov x2, #0 + lsl x1, x0, x1 + mov x0, x2 + ret +ENDPROC(__ashlti3) + +ENTRY(__ashrti3) + cbz x2, 1f + mov x3, #64 + sub x3, x3, x2 + cmp x3, #0 + b.le 2f + lsr x0, x0, x2 + lsl x3, x1, x3 + asr x2, x1, x2 + orr x0, x0, x3 + mov x1, x2 +1: + ret +2: + neg w0, w3 + asr x2, x1, #63 + asr x0, x1, x0 + mov x1, x2 + ret +ENDPROC(__ashrti3) + +ENTRY(__lshrti3) + cbz x2, 1f + mov x3, #64 + sub x3, x3, x2 + cmp x3, #0 + b.le 2f + lsr x0, x0, x2 + lsl x3, x1, x3 + lsr x2, x1, x2 + orr x0, x0, x3 + mov x1, x2 +1: + ret +2: + neg w0, w3 + mov x2, #0 + lsr x0, x1, x0 + mov x1, x2 + ret +ENDPROC(__lshrti3) diff --git a/arch/arm64/lib/xor-neon.c b/arch/arm64/lib/xor-neon.c new file mode 100644 index 000000000000..131c60c27dff --- /dev/null +++ b/arch/arm64/lib/xor-neon.c @@ -0,0 +1,184 @@ +/* + * arch/arm64/lib/xor-neon.c + * + * Authors: Jackie Liu + * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +void xor_arm64_neon_2(unsigned long bytes, unsigned long *p1, + unsigned long *p2) +{ + uint64_t *dp1 = (uint64_t *)p1; + uint64_t *dp2 = (uint64_t *)p2; + + register uint64x2_t v0, v1, v2, v3; + long lines = bytes / (sizeof(uint64x2_t) * 4); + + do { + /* p1 ^= p2 */ + v0 = veorq_u64(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0)); + v1 = veorq_u64(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2)); + v2 = veorq_u64(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4)); + v3 = veorq_u64(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6)); + + /* store */ + vst1q_u64(dp1 + 0, v0); + vst1q_u64(dp1 + 2, v1); + vst1q_u64(dp1 + 4, v2); + vst1q_u64(dp1 + 6, v3); + + dp1 += 8; + dp2 += 8; + } while (--lines > 0); +} + +void xor_arm64_neon_3(unsigned long bytes, unsigned long *p1, + unsigned long *p2, unsigned long *p3) +{ + uint64_t *dp1 = (uint64_t *)p1; + uint64_t *dp2 = (uint64_t *)p2; + uint64_t *dp3 = (uint64_t *)p3; + + register uint64x2_t v0, v1, v2, v3; + long lines = bytes / (sizeof(uint64x2_t) * 4); + + do { + /* p1 ^= p2 */ + v0 = veorq_u64(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0)); + v1 = veorq_u64(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2)); + v2 = veorq_u64(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4)); + v3 = veorq_u64(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6)); + + /* p1 ^= p3 */ + v0 = veorq_u64(v0, vld1q_u64(dp3 + 0)); + v1 = veorq_u64(v1, vld1q_u64(dp3 + 2)); + v2 = veorq_u64(v2, vld1q_u64(dp3 + 4)); + v3 = veorq_u64(v3, vld1q_u64(dp3 + 6)); + + /* store */ + vst1q_u64(dp1 + 0, v0); + vst1q_u64(dp1 + 2, v1); + vst1q_u64(dp1 + 4, v2); + vst1q_u64(dp1 + 6, v3); + + dp1 += 8; + dp2 += 8; + dp3 += 8; + } while (--lines > 0); +} + +void xor_arm64_neon_4(unsigned long bytes, unsigned long *p1, + unsigned long *p2, unsigned long *p3, unsigned long *p4) +{ + uint64_t *dp1 = (uint64_t *)p1; + uint64_t *dp2 = (uint64_t *)p2; + uint64_t *dp3 = (uint64_t *)p3; + uint64_t *dp4 = (uint64_t *)p4; + + register uint64x2_t v0, v1, v2, v3; + long lines = bytes / (sizeof(uint64x2_t) * 4); + + do { + /* p1 ^= p2 */ + v0 = veorq_u64(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0)); + v1 = veorq_u64(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2)); + v2 = veorq_u64(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4)); + v3 = veorq_u64(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6)); + + /* p1 ^= p3 */ + v0 = veorq_u64(v0, vld1q_u64(dp3 + 0)); + v1 = veorq_u64(v1, vld1q_u64(dp3 + 2)); + v2 = veorq_u64(v2, vld1q_u64(dp3 + 4)); + v3 = veorq_u64(v3, vld1q_u64(dp3 + 6)); + + /* p1 ^= p4 */ + v0 = veorq_u64(v0, vld1q_u64(dp4 + 0)); + v1 = veorq_u64(v1, vld1q_u64(dp4 + 2)); + v2 = veorq_u64(v2, vld1q_u64(dp4 + 4)); + v3 = veorq_u64(v3, vld1q_u64(dp4 + 6)); + + /* store */ + vst1q_u64(dp1 + 0, v0); + vst1q_u64(dp1 + 2, v1); + vst1q_u64(dp1 + 4, v2); + vst1q_u64(dp1 + 6, v3); + + dp1 += 8; + dp2 += 8; + dp3 += 8; + dp4 += 8; + } while (--lines > 0); +} + +void xor_arm64_neon_5(unsigned long bytes, unsigned long *p1, + unsigned long *p2, unsigned long *p3, + unsigned long *p4, unsigned long *p5) +{ + uint64_t *dp1 = (uint64_t *)p1; + uint64_t *dp2 = (uint64_t *)p2; + uint64_t *dp3 = (uint64_t *)p3; + uint64_t *dp4 = (uint64_t *)p4; + uint64_t *dp5 = (uint64_t *)p5; + + register uint64x2_t v0, v1, v2, v3; + long lines = bytes / (sizeof(uint64x2_t) * 4); + + do { + /* p1 ^= p2 */ + v0 = veorq_u64(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0)); + v1 = veorq_u64(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2)); + v2 = veorq_u64(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4)); + v3 = veorq_u64(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6)); + + /* p1 ^= p3 */ + v0 = veorq_u64(v0, vld1q_u64(dp3 + 0)); + v1 = veorq_u64(v1, vld1q_u64(dp3 + 2)); + v2 = veorq_u64(v2, vld1q_u64(dp3 + 4)); + v3 = veorq_u64(v3, vld1q_u64(dp3 + 6)); + + /* p1 ^= p4 */ + v0 = veorq_u64(v0, vld1q_u64(dp4 + 0)); + v1 = veorq_u64(v1, vld1q_u64(dp4 + 2)); + v2 = veorq_u64(v2, vld1q_u64(dp4 + 4)); + v3 = veorq_u64(v3, vld1q_u64(dp4 + 6)); + + /* p1 ^= p5 */ + v0 = veorq_u64(v0, vld1q_u64(dp5 + 0)); + v1 = veorq_u64(v1, vld1q_u64(dp5 + 2)); + v2 = veorq_u64(v2, vld1q_u64(dp5 + 4)); + v3 = veorq_u64(v3, vld1q_u64(dp5 + 6)); + + /* store */ + vst1q_u64(dp1 + 0, v0); + vst1q_u64(dp1 + 2, v1); + vst1q_u64(dp1 + 4, v2); + vst1q_u64(dp1 + 6, v3); + + dp1 += 8; + dp2 += 8; + dp3 += 8; + dp4 += 8; + dp5 += 8; + } while (--lines > 0); +} + +struct xor_block_template const xor_block_inner_neon = { + .name = "__inner_neon__", + .do_2 = xor_arm64_neon_2, + .do_3 = xor_arm64_neon_3, + .do_4 = xor_arm64_neon_4, + .do_5 = xor_arm64_neon_5, +}; +EXPORT_SYMBOL(xor_block_inner_neon); + +MODULE_AUTHOR("Jackie Liu "); +MODULE_DESCRIPTION("ARMv8 XOR Extensions"); +MODULE_LICENSE("GPL"); diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c index 1e7fd45b60f8..c4e0c37200c3 100644 --- a/arch/cris/mm/init.c +++ b/arch/cris/mm/init.c @@ -43,7 +43,7 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end) ClearPageReserved(virt_to_page(addr)); init_page_count(virt_to_page(addr)); free_page(addr); - totalram_pages++; + totalram_pages_inc(); } printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index bfff6ea45d51..bdc34e5ecbc1 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -343,7 +343,7 @@ void output_pm_defines(void) void output_kvm_defines(void) { - COMMENT(" KVM/MIPS Specfic offsets. "); + COMMENT(" KVM/MIPS Specific offsets. "); OFFSET(VCPU_FPR0, kvm_vcpu_arch, fpu.fpr[0]); OFFSET(VCPU_FPR1, kvm_vcpu_arch, fpu.fpr[1]); diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index a0ecdb4abcc8..3d4f5660a2e0 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -218,7 +218,7 @@ void *module_alloc(unsigned long size) * easier than trying to map the text, data, init_text and * init_data correctly */ return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL | __GFP_HIGHMEM, + GFP_KERNEL, PAGE_KERNEL_RWX, 0, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile index 7998c177f0a2..87f40454bad3 100644 --- a/arch/powerpc/crypto/Makefile +++ b/arch/powerpc/crypto/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o -obj-$(CONFIG_CRYPT_CRC32C_VPMSUM) += crc32c-vpmsum.o +obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o md5-ppc-y := md5-asm.o md5-glue.o diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c index 5d8e8b6bb1cc..3630b27a5275 100644 --- a/arch/powerpc/platforms/cell/cpufreq_spudemand.c +++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c @@ -139,30 +139,8 @@ static struct cpufreq_governor spu_governor = { .stop = spu_gov_stop, .owner = THIS_MODULE, }; - -/* - * module init and destoy - */ - -static int __init spu_gov_init(void) -{ - int ret; - - ret = cpufreq_register_governor(&spu_governor); - if (ret) - printk(KERN_ERR "registration of governor failed\n"); - return ret; -} - -static void __exit spu_gov_exit(void) -{ - cpufreq_unregister_governor(&spu_governor); -} - - -module_init(spu_gov_init); -module_exit(spu_gov_exit); +cpufreq_governor_init(spu_governor); +cpufreq_governor_exit(spu_governor); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Christian Krafft "); - diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 316eded255e7..44171fb34e23 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -174,7 +174,7 @@ static long cmm_alloc_pages(long nr) pa->page[pa->index++] = addr; loaned_pages++; - totalram_pages--; + totalram_pages_dec(); spin_unlock(&cmm_lock); nr--; } @@ -213,7 +213,7 @@ static long cmm_free_pages(long nr) free_page(addr); loaned_pages--; nr--; - totalram_pages++; + totalram_pages_inc(); } spin_unlock(&cmm_lock); cmm_dbg("End request with %ld pages unfulfilled\n", nr); @@ -257,7 +257,7 @@ static void cmm_get_mpp(void) int rc; struct hvcall_mpp_data mpp_data; signed long active_pages_target, page_loan_request, target; - signed long total_pages = totalram_pages + loaned_pages; + signed long total_pages = totalram_pages() + loaned_pages; signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE; rc = h_get_mpp(&mpp_data); @@ -288,7 +288,7 @@ static void cmm_get_mpp(void) cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n", page_loan_request, loaned_pages, loaned_pages_target, - oom_freed_pages, totalram_pages); + oom_freed_pages, totalram_pages()); } static struct notifier_block cmm_oom_nb = { @@ -552,7 +552,7 @@ static int cmm_mem_going_offline(void *arg) free_page(pa_curr->page[idx]); freed++; loaned_pages--; - totalram_pages++; + totalram_pages_inc(); pa_curr->page[idx] = pa_last->page[--pa_last->index]; if (pa_last->index == 0) { if (pa_curr == pa_last) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 1a906dd7ca7d..7f4594aa2665 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -42,7 +42,7 @@ struct s390_aes_ctx { int key_len; unsigned long fc; union { - struct crypto_skcipher *blk; + struct crypto_sync_skcipher *blk; struct crypto_cipher *cip; } fallback; }; @@ -52,7 +52,7 @@ struct s390_xts_ctx { u8 pcc_key[32]; int key_len; unsigned long fc; - struct crypto_skcipher *fallback; + struct crypto_sync_skcipher *fallback; }; static int setkey_fallback_cip(struct crypto_tfm *tfm, const u8 *in_key, @@ -171,14 +171,15 @@ static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key, struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); unsigned int ret; - crypto_skcipher_clear_flags(sctx->fallback.blk, CRYPTO_TFM_REQ_MASK); - crypto_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags & + crypto_sync_skcipher_clear_flags(sctx->fallback.blk, + CRYPTO_TFM_REQ_MASK); + crypto_sync_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags & CRYPTO_TFM_REQ_MASK); - ret = crypto_skcipher_setkey(sctx->fallback.blk, key, len); + ret = crypto_sync_skcipher_setkey(sctx->fallback.blk, key, len); tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm->crt_flags |= crypto_skcipher_get_flags(sctx->fallback.blk) & + tfm->crt_flags |= crypto_sync_skcipher_get_flags(sctx->fallback.blk) & CRYPTO_TFM_RES_MASK; return ret; @@ -191,9 +192,9 @@ static int fallback_blk_dec(struct blkcipher_desc *desc, unsigned int ret; struct crypto_blkcipher *tfm = desc->tfm; struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm); - SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk); - skcipher_request_set_tfm(req, sctx->fallback.blk); + skcipher_request_set_sync_tfm(req, sctx->fallback.blk); skcipher_request_set_callback(req, desc->flags, NULL, NULL); skcipher_request_set_crypt(req, src, dst, nbytes, desc->info); @@ -210,9 +211,9 @@ static int fallback_blk_enc(struct blkcipher_desc *desc, unsigned int ret; struct crypto_blkcipher *tfm = desc->tfm; struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm); - SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk); - skcipher_request_set_tfm(req, sctx->fallback.blk); + skcipher_request_set_sync_tfm(req, sctx->fallback.blk); skcipher_request_set_callback(req, desc->flags, NULL, NULL); skcipher_request_set_crypt(req, src, dst, nbytes, desc->info); @@ -293,8 +294,7 @@ static int fallback_init_blk(struct crypto_tfm *tfm) const char *name = tfm->__crt_alg->cra_name; struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); - sctx->fallback.blk = crypto_alloc_skcipher(name, 0, - CRYPTO_ALG_ASYNC | + sctx->fallback.blk = crypto_alloc_sync_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(sctx->fallback.blk)) { @@ -310,7 +310,7 @@ static void fallback_exit_blk(struct crypto_tfm *tfm) { struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); - crypto_free_skcipher(sctx->fallback.blk); + crypto_free_sync_skcipher(sctx->fallback.blk); } static struct crypto_alg ecb_aes_alg = { @@ -440,14 +440,15 @@ static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key, struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); unsigned int ret; - crypto_skcipher_clear_flags(xts_ctx->fallback, CRYPTO_TFM_REQ_MASK); - crypto_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags & + crypto_sync_skcipher_clear_flags(xts_ctx->fallback, + CRYPTO_TFM_REQ_MASK); + crypto_sync_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags & CRYPTO_TFM_REQ_MASK); - ret = crypto_skcipher_setkey(xts_ctx->fallback, key, len); + ret = crypto_sync_skcipher_setkey(xts_ctx->fallback, key, len); tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm->crt_flags |= crypto_skcipher_get_flags(xts_ctx->fallback) & + tfm->crt_flags |= crypto_sync_skcipher_get_flags(xts_ctx->fallback) & CRYPTO_TFM_RES_MASK; return ret; @@ -459,10 +460,10 @@ static int xts_fallback_decrypt(struct blkcipher_desc *desc, { struct crypto_blkcipher *tfm = desc->tfm; struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm); - SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback); unsigned int ret; - skcipher_request_set_tfm(req, xts_ctx->fallback); + skcipher_request_set_sync_tfm(req, xts_ctx->fallback); skcipher_request_set_callback(req, desc->flags, NULL, NULL); skcipher_request_set_crypt(req, src, dst, nbytes, desc->info); @@ -478,10 +479,10 @@ static int xts_fallback_encrypt(struct blkcipher_desc *desc, { struct crypto_blkcipher *tfm = desc->tfm; struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm); - SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback); unsigned int ret; - skcipher_request_set_tfm(req, xts_ctx->fallback); + skcipher_request_set_sync_tfm(req, xts_ctx->fallback); skcipher_request_set_callback(req, desc->flags, NULL, NULL); skcipher_request_set_crypt(req, src, dst, nbytes, desc->info); @@ -604,8 +605,7 @@ static int xts_fallback_init(struct crypto_tfm *tfm) const char *name = tfm->__crt_alg->cra_name; struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); - xts_ctx->fallback = crypto_alloc_skcipher(name, 0, - CRYPTO_ALG_ASYNC | + xts_ctx->fallback = crypto_alloc_sync_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(xts_ctx->fallback)) { @@ -620,7 +620,7 @@ static void xts_fallback_exit(struct crypto_tfm *tfm) { struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); - crypto_free_skcipher(xts_ctx->fallback); + crypto_free_sync_skcipher(xts_ctx->fallback); } static struct crypto_alg xts_aes_alg = { diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h new file mode 100644 index 000000000000..46a4db44c47a --- /dev/null +++ b/arch/s390/include/asm/set_memory.h @@ -0,0 +1,31 @@ +#ifndef _ASMS390_SET_MEMORY_H +#define _ASMS390_SET_MEMORY_H + +#define SET_MEMORY_RO 1UL +#define SET_MEMORY_RW 2UL +#define SET_MEMORY_NX 4UL +#define SET_MEMORY_X 8UL + +int __set_memory(unsigned long addr, int numpages, unsigned long flags); + +static inline int set_memory_ro(unsigned long addr, int numpages) +{ + return __set_memory(addr, numpages, SET_MEMORY_RO); +} + +static inline int set_memory_rw(unsigned long addr, int numpages) +{ + return __set_memory(addr, numpages, SET_MEMORY_RW); +} + +static inline int set_memory_nx(unsigned long addr, int numpages) +{ + return __set_memory(addr, numpages, SET_MEMORY_NX); +} + +static inline int set_memory_x(unsigned long addr, int numpages) +{ + return __set_memory(addr, numpages, SET_MEMORY_X); +} + +#endif diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index b3e9d18f2ec6..75b263e98378 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -56,7 +56,7 @@ static void __init setup_zero_pages(void) order = 7; /* Limit number of empty zero pages for small memory sizes */ - while (order > 2 && (totalram_pages >> 10) < (1UL << order)) + while (order > 2 && (totalram_pages() >> 10) < (1UL << order)) order--; empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); diff --git a/arch/sh/kernel/cpu/Makefile b/arch/sh/kernel/cpu/Makefile index accc7ca722e1..252e9fee687f 100644 --- a/arch/sh/kernel/cpu/Makefile +++ b/arch/sh/kernel/cpu/Makefile @@ -1,5 +1,5 @@ # -# Makefile for the Linux/SuperH CPU-specifc backends. +# Makefile for the Linux/SuperH CPU-specific backends. # obj-$(CONFIG_CPU_SH2) = sh2/ diff --git a/arch/sh/kernel/cpu/irq/Makefile b/arch/sh/kernel/cpu/irq/Makefile index f0c7025a67d1..3f8e79402d7d 100644 --- a/arch/sh/kernel/cpu/irq/Makefile +++ b/arch/sh/kernel/cpu/irq/Makefile @@ -1,5 +1,5 @@ # -# Makefile for the Linux/SuperH CPU-specifc IRQ handlers. +# Makefile for the Linux/SuperH CPU-specific IRQ handlers. # obj-$(CONFIG_SUPERH32) += imask.o obj-$(CONFIG_CPU_SH5) += intc-sh5.o diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index 3b7092d9ea8f..4dbe42253e75 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -61,7 +61,7 @@ void show_mem(unsigned int filter) show_free_areas(filter); printk("Free swap: %6ldkB\n", get_nr_swap_pages() << (PAGE_SHIFT-10)); - printk("%ld pages of RAM\n", totalram_pages); + printk("%ld pages of RAM\n", totalram_pages()); printk("%ld free pages\n", nr_free_pages()); } diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index adce25462b0d..13c220db307c 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -708,7 +708,7 @@ static void __init set_non_bootmem_pages_init(void) #ifdef CONFIG_HIGHMEM if (idx == ZONE_HIGHMEM) - totalhigh_pages += z->spanned_pages; + totalhigh_pages_add(z->spanned_pages); #endif if (kdata_huge) { unsigned long percpu_pfn = node_percpu_pfn[nid]; diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index e7437ec62710..50813cae069b 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -53,8 +53,8 @@ void __init mem_init(void) /* this will put all low memory onto the freelists */ free_all_bootmem(); - max_low_pfn = totalram_pages; - max_pfn = totalram_pages; + max_low_pfn = totalram_pages(); + max_pfn = totalram_pages(); mem_init_print_info(NULL); kmalloc_ok = 1; } diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index d9ae404f08c9..974e603f552c 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -21,7 +21,6 @@ #include #include -#include #include #include #include @@ -29,14 +28,14 @@ #include #include #include -#include #include #include #include #include -#include #include #include +#include +#include #include #include #ifdef CONFIG_X86_64 @@ -45,28 +44,26 @@ #define AESNI_ALIGN 16 +#define AESNI_ALIGN_ATTR __attribute__ ((__aligned__(AESNI_ALIGN))) #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE - 1)) #define RFC4106_HASH_SUBKEY_SIZE 16 +#define AESNI_ALIGN_EXTRA ((AESNI_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1)) +#define CRYPTO_AES_CTX_SIZE (sizeof(struct crypto_aes_ctx) + AESNI_ALIGN_EXTRA) +#define XTS_AES_CTX_SIZE (sizeof(struct aesni_xts_ctx) + AESNI_ALIGN_EXTRA) /* This data is stored at the end of the crypto_tfm struct. * It's a type of per "session" data storage location. * This needs to be 16 byte aligned. */ struct aesni_rfc4106_gcm_ctx { - u8 hash_subkey[16] __attribute__ ((__aligned__(AESNI_ALIGN))); - struct crypto_aes_ctx aes_key_expanded - __attribute__ ((__aligned__(AESNI_ALIGN))); + u8 hash_subkey[16] AESNI_ALIGN_ATTR; + struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR; u8 nonce[4]; }; -struct aesni_lrw_ctx { - struct lrw_table_ctx lrw_table; - u8 raw_aes_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; -}; - struct aesni_xts_ctx { - u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; - u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; + u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR; + u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR; }; asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, @@ -360,96 +357,95 @@ static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) aesni_dec(ctx, dst, src); } -static int ecb_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int aesni_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int len) +{ + return aes_set_key_common(crypto_skcipher_tfm(tfm), + crypto_skcipher_ctx(tfm), key, len); +} + +static int ecb_encrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, true); kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK); nbytes &= AES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } kernel_fpu_end(); return err; } -static int ecb_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, true); kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK); nbytes &= AES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } kernel_fpu_end(); return err; } -static int cbc_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, true); kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); nbytes &= AES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } kernel_fpu_end(); return err; } -static int cbc_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, true); kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); nbytes &= AES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } kernel_fpu_end(); @@ -458,7 +454,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc, #ifdef CONFIG_X86_64 static void ctr_crypt_final(struct crypto_aes_ctx *ctx, - struct blkcipher_walk *walk) + struct skcipher_walk *walk) { u8 *ctrblk = walk->iv; u8 keystream[AES_BLOCK_SIZE]; @@ -491,157 +487,53 @@ static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out, } #endif -static int ctr_crypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, true); kernel_fpu_begin(); while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); nbytes &= AES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } if (walk.nbytes) { ctr_crypt_final(ctx, &walk); - err = blkcipher_walk_done(desc, &walk, 0); + err = skcipher_walk_done(&walk, 0); } kernel_fpu_end(); return err; } -#endif - -static int ablk_ecb_init(struct crypto_tfm *tfm) -{ - return ablk_init_common(tfm, "__driver-ecb-aes-aesni"); -} - -static int ablk_cbc_init(struct crypto_tfm *tfm) -{ - return ablk_init_common(tfm, "__driver-cbc-aes-aesni"); -} - -#ifdef CONFIG_X86_64 -static int ablk_ctr_init(struct crypto_tfm *tfm) -{ - return ablk_init_common(tfm, "__driver-ctr-aes-aesni"); -} - -#endif - -#if IS_ENABLED(CONFIG_CRYPTO_PCBC) -static int ablk_pcbc_init(struct crypto_tfm *tfm) -{ - return ablk_init_common(tfm, "fpu(pcbc(__driver-aes-aesni))"); -} -#endif - -static void lrw_xts_encrypt_callback(void *ctx, u8 *blks, unsigned int nbytes) -{ - aesni_ecb_enc(ctx, blks, blks, nbytes); -} -static void lrw_xts_decrypt_callback(void *ctx, u8 *blks, unsigned int nbytes) -{ - aesni_ecb_dec(ctx, blks, blks, nbytes); -} - -static int lrw_aesni_setkey(struct crypto_tfm *tfm, const u8 *key, +static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { - struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int err; - err = aes_set_key_common(tfm, ctx->raw_aes_ctx, key, - keylen - AES_BLOCK_SIZE); + err = xts_verify_key(tfm, key, keylen); if (err) return err; - return lrw_init_table(&ctx->lrw_table, key + keylen - AES_BLOCK_SIZE); -} - -static void lrw_aesni_exit_tfm(struct crypto_tfm *tfm) -{ - struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); -} - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[8]; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = aes_ctx(ctx->raw_aes_ctx), - .crypt_fn = lrw_xts_encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - kernel_fpu_begin(); - ret = lrw_crypt(desc, dst, src, nbytes, &req); - kernel_fpu_end(); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[8]; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = aes_ctx(ctx->raw_aes_ctx), - .crypt_fn = lrw_xts_decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - kernel_fpu_begin(); - ret = lrw_crypt(desc, dst, src, nbytes, &req); - kernel_fpu_end(); - - return ret; -} - -static int xts_aesni_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct aesni_xts_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = xts_check_key(tfm, key, keylen); - if (err) - return err; + keylen /= 2; /* first half of xts-key is for crypt */ - err = aes_set_key_common(tfm, ctx->raw_crypt_ctx, key, keylen / 2); + err = aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_crypt_ctx, + key, keylen); if (err) return err; /* second half of xts-key is for tweak */ - return aes_set_key_common(tfm, ctx->raw_tweak_ctx, key + keylen / 2, - keylen / 2); + return aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_tweak_ctx, + key + keylen, keylen); } @@ -650,8 +542,6 @@ static void aesni_xts_tweak(void *ctx, u8 *out, const u8 *in) aesni_enc(ctx, out, in); } -#ifdef CONFIG_X86_64 - static void aesni_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) { glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_enc)); @@ -698,83 +588,28 @@ static const struct common_glue_ctx aesni_dec_xts = { } } }; -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&aesni_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(aesni_xts_tweak), - aes_ctx(ctx->raw_tweak_ctx), - aes_ctx(ctx->raw_crypt_ctx)); + return glue_xts_req_128bit(&aesni_enc_xts, req, + XTS_TWEAK_CAST(aesni_xts_tweak), + aes_ctx(ctx->raw_tweak_ctx), + aes_ctx(ctx->raw_crypt_ctx)); } -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&aesni_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(aesni_xts_tweak), - aes_ctx(ctx->raw_tweak_ctx), - aes_ctx(ctx->raw_crypt_ctx)); -} - -#else - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[8]; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx), - .tweak_fn = aesni_xts_tweak, - .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx), - .crypt_fn = lrw_xts_encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - kernel_fpu_begin(); - ret = xts_crypt(desc, dst, src, nbytes, &req); - kernel_fpu_end(); - - return ret; -} - -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[8]; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx), - .tweak_fn = aesni_xts_tweak, - .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx), - .crypt_fn = lrw_xts_decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - kernel_fpu_begin(); - ret = xts_crypt(desc, dst, src, nbytes, &req); - kernel_fpu_end(); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return ret; + return glue_xts_req_128bit(&aesni_dec_xts, req, + XTS_TWEAK_CAST(aesni_xts_tweak), + aes_ctx(ctx->raw_tweak_ctx), + aes_ctx(ctx->raw_crypt_ctx)); } -#endif - -#ifdef CONFIG_X86_64 static int rfc4106_init(struct crypto_aead *aead) { struct cryptd_aead *cryptd_tfm; @@ -1077,9 +912,7 @@ static struct crypto_alg aesni_algs[] = { { .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx) + - AESNI_ALIGN - 1, - .cra_alignmask = 0, + .cra_ctxsize = CRYPTO_AES_CTX_SIZE, .cra_module = THIS_MODULE, .cra_u = { .cipher = { @@ -1091,14 +924,12 @@ static struct crypto_alg aesni_algs[] = { { } } }, { - .cra_name = "__aes-aesni", - .cra_driver_name = "__driver-aes-aesni", - .cra_priority = 0, + .cra_name = "__aes", + .cra_driver_name = "__aes-aesni", + .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_INTERNAL, .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx) + - AESNI_ALIGN - 1, - .cra_alignmask = 0, + .cra_ctxsize = CRYPTO_AES_CTX_SIZE, .cra_module = THIS_MODULE, .cra_u = { .cipher = { @@ -1109,250 +940,94 @@ static struct crypto_alg aesni_algs[] = { { .cia_decrypt = __aes_decrypt } } -}, { - .cra_name = "__ecb-aes-aesni", - .cra_driver_name = "__driver-ecb-aes-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx) + - AESNI_ALIGN - 1, - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = aes_set_key, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-aes-aesni", - .cra_driver_name = "__driver-cbc-aes-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx) + - AESNI_ALIGN - 1, - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = aes_set_key, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "ecb(aes)", - .cra_driver_name = "ecb-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_ecb_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, +} }; + +static struct skcipher_alg aesni_skciphers[] = { + { + .base = { + .cra_name = "__ecb(aes)", + .cra_driver_name = "__ecb-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = CRYPTO_AES_CTX_SIZE, + .cra_module = THIS_MODULE, }, - }, -}, { - .cra_name = "cbc(aes)", - .cra_driver_name = "cbc-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_cbc_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = aesni_skcipher_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base = { + .cra_name = "__cbc(aes)", + .cra_driver_name = "__cbc-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = CRYPTO_AES_CTX_SIZE, + .cra_module = THIS_MODULE, }, - }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesni_skcipher_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, #ifdef CONFIG_X86_64 -}, { - .cra_name = "__ctr-aes-aesni", - .cra_driver_name = "__driver-ctr-aes-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct crypto_aes_ctx) + - AESNI_ALIGN - 1, - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = aes_set_key, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, + }, { + .base = { + .cra_name = "__ctr(aes)", + .cra_driver_name = "__ctr-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = CRYPTO_AES_CTX_SIZE, + .cra_module = THIS_MODULE, }, - }, -}, { - .cra_name = "ctr(aes)", - .cra_driver_name = "ctr-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_ctr_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .chunksize = AES_BLOCK_SIZE, + .setkey = aesni_skcipher_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, { + .base = { + .cra_name = "__xts(aes)", + .cra_driver_name = "__xts-aes-aesni", + .cra_priority = 401, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = XTS_AES_CTX_SIZE, + .cra_module = THIS_MODULE, }, - }, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_aesni_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, #endif + } +}; + +struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)]; + +struct { + const char *algname; + const char *drvname; + const char *basename; + struct simd_skcipher_alg *simd; +} aesni_simd_skciphers2[] = { #if IS_ENABLED(CONFIG_CRYPTO_PCBC) -}, { - .cra_name = "pcbc(aes)", - .cra_driver_name = "pcbc-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_pcbc_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, + { + .algname = "pcbc(aes)", + .drvname = "pcbc-aes-aesni", + .basename = "fpu(pcbc(__aes-aesni))", }, #endif -}, { - .cra_name = "__lrw-aes-aesni", - .cra_driver_name = "__driver-lrw-aes-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aesni_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_aesni_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, - .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = lrw_aesni_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "__xts-aes-aesni", - .cra_driver_name = "__driver-xts-aes-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aesni_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = xts_aesni_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "lrw(aes)", - .cra_driver_name = "lrw-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, - .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "xts(aes)", - .cra_driver_name = "xts-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -} }; +}; #ifdef CONFIG_X86_64 static struct aead_alg aesni_aead_algs[] = { { @@ -1401,9 +1076,27 @@ static const struct x86_cpu_id aesni_cpu_id[] = { }; MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); +static void aesni_free_simds(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers) && + aesni_simd_skciphers[i]; i++) + simd_skcipher_free(aesni_simd_skciphers[i]); + + for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2) && + aesni_simd_skciphers2[i].simd; i++) + simd_skcipher_free(aesni_simd_skciphers2[i].simd); +} + static int __init aesni_init(void) { + struct simd_skcipher_alg *simd; + const char *basename; + const char *algname; + const char *drvname; int err; + int i; if (!x86_match_cpu(aesni_cpu_id)) return -ENODEV; @@ -1445,13 +1138,48 @@ static int __init aesni_init(void) if (err) goto fpu_exit; + err = crypto_register_skciphers(aesni_skciphers, + ARRAY_SIZE(aesni_skciphers)); + if (err) + goto unregister_algs; + err = crypto_register_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs)); if (err) - goto unregister_algs; + goto unregister_skciphers; + + for (i = 0; i < ARRAY_SIZE(aesni_skciphers); i++) { + algname = aesni_skciphers[i].base.cra_name + 2; + drvname = aesni_skciphers[i].base.cra_driver_name + 2; + basename = aesni_skciphers[i].base.cra_driver_name; + simd = simd_skcipher_create_compat(algname, drvname, basename); + err = PTR_ERR(simd); + if (IS_ERR(simd)) + goto unregister_simds; + + aesni_simd_skciphers[i] = simd; + } - return err; + for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++) { + algname = aesni_simd_skciphers2[i].algname; + drvname = aesni_simd_skciphers2[i].drvname; + basename = aesni_simd_skciphers2[i].basename; + simd = simd_skcipher_create_compat(algname, drvname, basename); + err = PTR_ERR(simd); + if (IS_ERR(simd)) + goto unregister_simds; + aesni_simd_skciphers2[i].simd = simd; + } + + return 0; + +unregister_simds: + aesni_free_simds(); + crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs)); +unregister_skciphers: + crypto_unregister_skciphers(aesni_skciphers, + ARRAY_SIZE(aesni_skciphers)); unregister_algs: crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); fpu_exit: @@ -1461,7 +1189,10 @@ static int __init aesni_init(void) static void __exit aesni_exit(void) { + aesni_free_simds(); crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs)); + crypto_unregister_skciphers(aesni_skciphers, + ARRAY_SIZE(aesni_skciphers)); crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); crypto_fpu_exit(); diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c index e7d679e2a018..be9b3766f241 100644 --- a/arch/x86/crypto/fpu.c +++ b/arch/x86/crypto/fpu.c @@ -11,143 +11,188 @@ * */ -#include +#include #include #include #include #include #include -#include #include struct crypto_fpu_ctx { - struct crypto_blkcipher *child; + struct crypto_sync_skcipher *child; }; -static int crypto_fpu_setkey(struct crypto_tfm *parent, const u8 *key, +static int crypto_fpu_setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { - struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(parent); - struct crypto_blkcipher *child = ctx->child; + struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(parent); + struct crypto_sync_skcipher *child = ctx->child; int err; - crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) & - CRYPTO_TFM_REQ_MASK); - err = crypto_blkcipher_setkey(child, key, keylen); - crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) & - CRYPTO_TFM_RES_MASK); + crypto_sync_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_sync_skcipher_set_flags(child, + crypto_skcipher_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + err = crypto_sync_skcipher_setkey(child, key, keylen); + crypto_skcipher_set_flags(parent, + crypto_sync_skcipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); return err; } -static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int crypto_fpu_encrypt(struct skcipher_request *req) { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_sync_skcipher *child = ctx->child; + SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child); int err; - struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm); - struct crypto_blkcipher *child = ctx->child; - struct blkcipher_desc desc = { - .tfm = child, - .info = desc_in->info, - .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP, - }; + + skcipher_request_set_sync_tfm(subreq, child); + skcipher_request_set_callback(subreq, 0, NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, + req->iv); kernel_fpu_begin(); - err = crypto_blkcipher_crt(desc.tfm)->encrypt(&desc, dst, src, nbytes); + err = crypto_skcipher_encrypt(subreq); kernel_fpu_end(); + + skcipher_request_zero(subreq); return err; } -static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int crypto_fpu_decrypt(struct skcipher_request *req) { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_sync_skcipher *child = ctx->child; + SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child); int err; - struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm); - struct crypto_blkcipher *child = ctx->child; - struct blkcipher_desc desc = { - .tfm = child, - .info = desc_in->info, - .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP, - }; + + skcipher_request_set_sync_tfm(subreq, child); + skcipher_request_set_callback(subreq, 0, NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, + req->iv); kernel_fpu_begin(); - err = crypto_blkcipher_crt(desc.tfm)->decrypt(&desc, dst, src, nbytes); + err = crypto_skcipher_decrypt(subreq); kernel_fpu_end(); + + skcipher_request_zero(subreq); return err; } -static int crypto_fpu_init_tfm(struct crypto_tfm *tfm) +static int crypto_fpu_init_tfm(struct crypto_skcipher *tfm) { - struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); - struct crypto_spawn *spawn = crypto_instance_ctx(inst); - struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm); - struct crypto_blkcipher *cipher; + struct skcipher_instance *inst = skcipher_alg_instance(tfm); + struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher_spawn *spawn; + struct crypto_skcipher *cipher; - cipher = crypto_spawn_blkcipher(spawn); + spawn = skcipher_instance_ctx(inst); + cipher = crypto_spawn_skcipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); - ctx->child = cipher; + ctx->child = (struct crypto_sync_skcipher *)cipher; + return 0; } -static void crypto_fpu_exit_tfm(struct crypto_tfm *tfm) +static void crypto_fpu_exit_tfm(struct crypto_skcipher *tfm) +{ + struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm); + + crypto_free_sync_skcipher(ctx->child); +} + +static void crypto_fpu_free(struct skcipher_instance *inst) { - struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm); - crypto_free_blkcipher(ctx->child); + crypto_drop_skcipher(skcipher_instance_ctx(inst)); + kfree(inst); } -static struct crypto_instance *crypto_fpu_alloc(struct rtattr **tb) +static int crypto_fpu_create(struct crypto_template *tmpl, struct rtattr **tb) { - struct crypto_instance *inst; - struct crypto_alg *alg; + struct crypto_skcipher_spawn *spawn; + struct skcipher_instance *inst; + struct crypto_attr_type *algt; + struct skcipher_alg *alg; + const char *cipher_name; int err; - err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER); + algt = crypto_get_attr_type(tb); + if (IS_ERR(algt)) + return PTR_ERR(algt); + + if ((algt->type ^ (CRYPTO_ALG_INTERNAL | CRYPTO_ALG_TYPE_SKCIPHER)) & + algt->mask) + return -EINVAL; + + if (!(algt->mask & CRYPTO_ALG_INTERNAL)) + return -EINVAL; + + cipher_name = crypto_attr_alg_name(tb[1]); + if (IS_ERR(cipher_name)) + return PTR_ERR(cipher_name); + + inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + spawn = skcipher_instance_ctx(inst); + + crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst)); + err = crypto_grab_skcipher(spawn, cipher_name, CRYPTO_ALG_INTERNAL, + CRYPTO_ALG_INTERNAL | CRYPTO_ALG_ASYNC); if (err) - return ERR_PTR(err); - - alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER, - CRYPTO_ALG_TYPE_MASK); - if (IS_ERR(alg)) - return ERR_CAST(alg); - - inst = crypto_alloc_instance("fpu", alg); - if (IS_ERR(inst)) - goto out_put_alg; - - inst->alg.cra_flags = alg->cra_flags; - inst->alg.cra_priority = alg->cra_priority; - inst->alg.cra_blocksize = alg->cra_blocksize; - inst->alg.cra_alignmask = alg->cra_alignmask; - inst->alg.cra_type = alg->cra_type; - inst->alg.cra_blkcipher.ivsize = alg->cra_blkcipher.ivsize; - inst->alg.cra_blkcipher.min_keysize = alg->cra_blkcipher.min_keysize; - inst->alg.cra_blkcipher.max_keysize = alg->cra_blkcipher.max_keysize; - inst->alg.cra_ctxsize = sizeof(struct crypto_fpu_ctx); - inst->alg.cra_init = crypto_fpu_init_tfm; - inst->alg.cra_exit = crypto_fpu_exit_tfm; - inst->alg.cra_blkcipher.setkey = crypto_fpu_setkey; - inst->alg.cra_blkcipher.encrypt = crypto_fpu_encrypt; - inst->alg.cra_blkcipher.decrypt = crypto_fpu_decrypt; - -out_put_alg: - crypto_mod_put(alg); - return inst; -} + goto out_free_inst; -static void crypto_fpu_free(struct crypto_instance *inst) -{ - crypto_drop_spawn(crypto_instance_ctx(inst)); + alg = crypto_skcipher_spawn_alg(spawn); + + err = crypto_inst_setname(skcipher_crypto_instance(inst), "fpu", + &alg->base); + if (err) + goto out_drop_skcipher; + + inst->alg.base.cra_flags = CRYPTO_ALG_INTERNAL; + inst->alg.base.cra_priority = alg->base.cra_priority; + inst->alg.base.cra_blocksize = alg->base.cra_blocksize; + inst->alg.base.cra_alignmask = alg->base.cra_alignmask; + + inst->alg.ivsize = crypto_skcipher_alg_ivsize(alg); + inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg); + inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg); + + inst->alg.base.cra_ctxsize = sizeof(struct crypto_fpu_ctx); + + inst->alg.init = crypto_fpu_init_tfm; + inst->alg.exit = crypto_fpu_exit_tfm; + + inst->alg.setkey = crypto_fpu_setkey; + inst->alg.encrypt = crypto_fpu_encrypt; + inst->alg.decrypt = crypto_fpu_decrypt; + + inst->free = crypto_fpu_free; + + err = skcipher_register_instance(tmpl, inst); + if (err) + goto out_drop_skcipher; + +out: + return err; + +out_drop_skcipher: + crypto_drop_skcipher(spawn); +out_free_inst: kfree(inst); + goto out; } static struct crypto_template crypto_fpu_tmpl = { .name = "fpu", - .alloc = crypto_fpu_alloc, - .free = crypto_fpu_free, + .create = crypto_fpu_create, .module = THIS_MODULE, }; diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index 6a85598931b5..260a060d7275 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -27,10 +27,10 @@ #include #include +#include #include #include #include -#include static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, struct blkcipher_desc *desc, @@ -339,6 +339,41 @@ static unsigned int __glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, return nbytes; } +static unsigned int __glue_xts_req_128bit(const struct common_glue_ctx *gctx, + void *ctx, + struct skcipher_walk *walk) +{ + const unsigned int bsize = 128 / 8; + unsigned int nbytes = walk->nbytes; + u128 *src = walk->src.virt.addr; + u128 *dst = walk->dst.virt.addr; + unsigned int num_blocks, func_bytes; + unsigned int i; + + /* Process multi-block batch */ + for (i = 0; i < gctx->num_funcs; i++) { + num_blocks = gctx->funcs[i].num_blocks; + func_bytes = bsize * num_blocks; + + if (nbytes >= func_bytes) { + do { + gctx->funcs[i].fn_u.xts(ctx, dst, src, + walk->iv); + + src += num_blocks; + dst += num_blocks; + nbytes -= func_bytes; + } while (nbytes >= func_bytes); + + if (nbytes < bsize) + goto done; + } + } + +done: + return nbytes; +} + /* for implementations implementing faster XTS IV generator */ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, struct blkcipher_desc *desc, struct scatterlist *dst, @@ -379,6 +414,43 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, } EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit); +int glue_xts_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req, + common_glue_func_t tweak_fn, void *tweak_ctx, + void *crypt_ctx) +{ + const unsigned int bsize = 128 / 8; + struct skcipher_walk walk; + bool fpu_enabled = false; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + nbytes = walk.nbytes; + if (!nbytes) + return err; + + /* set minimum length to bsize, for tweak_fn */ + fpu_enabled = glue_skwalk_fpu_begin(bsize, gctx->fpu_blocks_limit, + &walk, fpu_enabled, + nbytes < bsize ? bsize : nbytes); + + /* calculate first value of T */ + tweak_fn(tweak_ctx, walk.iv, walk.iv); + + while (nbytes) { + nbytes = __glue_xts_req_128bit(gctx, crypt_ctx, &walk); + + err = skcipher_walk_done(&walk, nbytes); + nbytes = walk.nbytes; + } + + glue_fpu_end(fpu_enabled); + + return err; +} +EXPORT_SYMBOL_GPL(glue_xts_req_128bit); + void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv, common_glue_func_t fn) { diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index 03bb1065c335..29e53ea7d764 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -5,8 +5,8 @@ #ifndef _CRYPTO_GLUE_HELPER_H #define _CRYPTO_GLUE_HELPER_H +#include #include -#include #include #include @@ -69,6 +69,31 @@ static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit, return true; } +static inline bool glue_skwalk_fpu_begin(unsigned int bsize, + int fpu_blocks_limit, + struct skcipher_walk *walk, + bool fpu_enabled, unsigned int nbytes) +{ + if (likely(fpu_blocks_limit < 0)) + return false; + + if (fpu_enabled) + return true; + + /* + * Vector-registers are only used when chunk to be processed is large + * enough, so do not enable FPU until it is necessary. + */ + if (nbytes < bsize * (unsigned int)fpu_blocks_limit) + return false; + + /* prevent sleeping if FPU is in use */ + skcipher_walk_atomise(walk); + + kernel_fpu_begin(); + return true; +} + static inline void glue_fpu_end(bool fpu_enabled) { if (fpu_enabled) @@ -139,6 +164,18 @@ extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, common_glue_func_t tweak_fn, void *tweak_ctx, void *crypt_ctx); +extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, + struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes, + common_glue_func_t tweak_fn, void *tweak_ctx, + void *crypt_ctx); + +extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req, + common_glue_func_t tweak_fn, void *tweak_ctx, + void *crypt_ctx); + extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv, common_glue_func_t fn); diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h new file mode 100644 index 000000000000..eaec6c364e42 --- /dev/null +++ b/arch/x86/include/asm/set_memory.h @@ -0,0 +1,87 @@ +#ifndef _ASM_X86_SET_MEMORY_H +#define _ASM_X86_SET_MEMORY_H + +#include +#include + +/* + * The set_memory_* API can be used to change various attributes of a virtual + * address range. The attributes include: + * Cachability : UnCached, WriteCombining, WriteThrough, WriteBack + * Executability : eXeutable, NoteXecutable + * Read/Write : ReadOnly, ReadWrite + * Presence : NotPresent + * + * Within a category, the attributes are mutually exclusive. + * + * The implementation of this API will take care of various aspects that + * are associated with changing such attributes, such as: + * - Flushing TLBs + * - Flushing CPU caches + * - Making sure aliases of the memory behind the mapping don't violate + * coherency rules as defined by the CPU in the system. + * + * What this API does not do: + * - Provide exclusion between various callers - including callers that + * operation on other mappings of the same physical page + * - Restore default attributes when a page is freed + * - Guarantee that mappings other than the requested one are + * in any state, other than that these do not violate rules for + * the CPU you have. Do not depend on any effects on other mappings, + * CPUs other than the one you have may have more relaxed rules. + * The caller is required to take care of these. + */ + +int _set_memory_uc(unsigned long addr, int numpages); +int _set_memory_wc(unsigned long addr, int numpages); +int _set_memory_wt(unsigned long addr, int numpages); +int _set_memory_wb(unsigned long addr, int numpages); +int set_memory_uc(unsigned long addr, int numpages); +int set_memory_wc(unsigned long addr, int numpages); +int set_memory_wt(unsigned long addr, int numpages); +int set_memory_wb(unsigned long addr, int numpages); +int set_memory_np(unsigned long addr, int numpages); +int set_memory_4k(unsigned long addr, int numpages); + +int set_memory_array_uc(unsigned long *addr, int addrinarray); +int set_memory_array_wc(unsigned long *addr, int addrinarray); +int set_memory_array_wt(unsigned long *addr, int addrinarray); +int set_memory_array_wb(unsigned long *addr, int addrinarray); + +int set_pages_array_uc(struct page **pages, int addrinarray); +int set_pages_array_wc(struct page **pages, int addrinarray); +int set_pages_array_wt(struct page **pages, int addrinarray); +int set_pages_array_wb(struct page **pages, int addrinarray); + +/* + * For legacy compatibility with the old APIs, a few functions + * are provided that work on a "struct page". + * These functions operate ONLY on the 1:1 kernel mapping of the + * memory that the struct page represents, and internally just + * call the set_memory_* function. See the description of the + * set_memory_* function for more details on conventions. + * + * These APIs should be considered *deprecated* and are likely going to + * be removed in the future. + * The reason for this is the implicit operation on the 1:1 mapping only, + * making this not a generally useful API. + * + * Specifically, many users of the old APIs had a virtual address, + * called virt_to_page() or vmalloc_to_page() on that address to + * get a struct page* that the old API required. + * To convert these cases, use set_memory_*() on the original + * virtual address, do not use these functions. + */ + +int set_pages_uc(struct page *page, int numpages); +int set_pages_wb(struct page *page, int numpages); +int set_pages_x(struct page *page, int numpages); +int set_pages_nx(struct page *page, int numpages); +int set_pages_ro(struct page *page, int numpages); +int set_pages_rw(struct page *page, int numpages); + +extern int kernel_set_to_readonly; +void set_kernel_text_rw(void); +void set_kernel_text_ro(void); + +#endif /* _ASM_X86_SET_MEMORY_H */ diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 499bc79fc82a..55f1c48dba05 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -325,8 +325,8 @@ static ssize_t microcode_write(struct file *file, const char __user *buf, { ssize_t ret = -EINVAL; - if ((len >> PAGE_SHIFT) > totalram_pages) { - pr_err("too much data (max %ld pages)\n", totalram_pages); + if ((len >> PAGE_SHIFT) > totalram_pages()) { + pr_err("too much data (max %ld pages)\n", totalram_pages()); return ret; } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 3c09ca384199..d035ec1af1d1 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -85,7 +85,7 @@ void *module_alloc(unsigned long size) p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR + get_module_load_offset(), - MODULES_END, GFP_KERNEL | __GFP_HIGHMEM, + MODULES_END, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); if (p && (kasan_module_alloc(p, size) < 0)) { diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e5f92488c3cd..ff8a391e10c5 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -168,8 +168,8 @@ static void recalculate_apic_map(struct kvm *kvm) if (kvm_apic_present(vcpu)) max_id = max(max_id, kvm_apic_id(vcpu->arch.apic)); - new = kvm_kvzalloc(sizeof(struct kvm_apic_map) + - sizeof(struct kvm_lapic *) * ((u64)max_id + 1)); + new = kvzalloc(sizeof(struct kvm_apic_map) + + sizeof(struct kvm_lapic *) * ((u64)max_id + 1), GFP_KERNEL); if (!new) goto out; diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c index 85024e0cfaa5..d5bab386c0fb 100644 --- a/arch/x86/kvm/page_track.c +++ b/arch/x86/kvm/page_track.c @@ -38,8 +38,8 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, int i; for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { - slot->arch.gfn_track[i] = kvm_kvzalloc(npages * - sizeof(*slot->arch.gfn_track[i])); + slot->arch.gfn_track[i] = kvzalloc(npages * + sizeof(*slot->arch.gfn_track[i]), GFP_KERNEL); if (!slot->arch.gfn_track[i]) goto track_free; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 007dc3298f88..5cbf52cec4dd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8358,13 +8358,13 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, slot->base_gfn, level) + 1; slot->arch.rmap[i] = - kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i])); + kvzalloc(lpages * sizeof(*slot->arch.rmap[i]), GFP_KERNEL); if (!slot->arch.rmap[i]) goto out_free; if (i == 0) continue; - linfo = kvm_kvzalloc(lpages * sizeof(*linfo)); + linfo = kvzalloc(lpages * sizeof(*linfo), GFP_KERNEL); if (!linfo) goto out_free; diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 6d18b70ed5a9..8ac7132919a3 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -1,6 +1,6 @@ #include #include -#include /* for totalram_pages */ +#include /* for totalram_pages() */ #include void *kmap(struct page *page) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 1271bc9fa3c6..7d50d46c046f 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1897,8 +1897,6 @@ int set_pages_rw(struct page *page, int numpages) return set_memory_rw(addr, numpages); } -#ifdef CONFIG_DEBUG_PAGEALLOC - static int __set_pages_p(struct page *page, int numpages) { unsigned long tempaddr = (unsigned long) page_address(page); @@ -1937,6 +1935,17 @@ static int __set_pages_np(struct page *page, int numpages) return __change_page_attr_set_clr(&cpa, 0); } +int set_direct_map_invalid_noflush(struct page *page) +{ + return __set_pages_np(page, 1); +} + +int set_direct_map_default_noflush(struct page *page) +{ + return __set_pages_p(page, 1); +} + +#ifdef CONFIG_DEBUG_PAGEALLOC void __kernel_map_pages(struct page *page, int numpages, int enable) { if (PageHighMem(page)) @@ -1966,7 +1975,6 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) } #ifdef CONFIG_HIBERNATION - bool kernel_page_present(struct page *page) { unsigned int level; diff --git a/block/blk-mq.c b/block/blk-mq.c index 76b1acad2da1..39f8e1dfa5b3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -364,7 +364,8 @@ static void blk_mq_ipi_complete_request(struct request *rq) } cpu = get_cpu(); - if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags)) + if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) || + idle_cpu(ctx->cpu)) shared = cpus_share_cache(cpu, ctx->cpu); if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) { diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 06cf9807f49a..21f07a0f5801 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -107,8 +107,13 @@ void __blk_complete_request(struct request *req) /* * Select completion CPU + * + * Refrain from waking up an idle CPU if possible since the exit + * latency of taking req->cpu out of an idle cstate will likely + * exceed the rq->deadline constraint compared to executing the + * request locally instead. */ - if (req->cpu != -1) { + if (req->cpu != -1 && !idle_cpu(req->cpu)) { ccpu = req->cpu; if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) shared = cpus_share_cache(cpu, ccpu); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 8ae9e75675a0..d89dc60ff4c5 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -234,7 +234,7 @@ queue_show_##name(struct request_queue *q, char *page) \ return queue_var_show(neg ? !bit : bit, page); \ } \ static ssize_t \ -queue_store_##name(struct request_queue *q, const char *page, size_t count) \ +__maybe_unused queue_store_##name(struct request_queue *q, const char *page, size_t count) \ { \ unsigned long val; \ ssize_t ret; \ @@ -499,9 +499,8 @@ static struct queue_sysfs_entry queue_rq_affinity_entry = { }; static struct queue_sysfs_entry queue_iostats_entry = { - .attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR }, + .attr = {.name = "iostats", .mode = S_IRUGO }, .show = queue_show_iostats, - .store = queue_store_iostats, }; static struct queue_sysfs_entry queue_random_entry = { diff --git a/block/blk.h b/block/blk.h index ae076666cc23..980f0f029195 100644 --- a/block/blk.h +++ b/block/blk.h @@ -245,9 +245,7 @@ extern int blk_update_nr_requests(struct request_queue *, unsigned int); */ static inline int blk_do_io_stat(struct request *rq) { - return rq->rq_disk && - (rq->cmd_flags & REQ_IO_STAT) && - (rq->cmd_type == REQ_TYPE_FS); + return false; } /* diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index ab70b1a9f652..04551a07c7ef 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -30,7 +30,7 @@ static const int cfq_back_penalty = 2; static const u64 cfq_slice_sync = NSEC_PER_SEC / 10; static u64 cfq_slice_async = NSEC_PER_SEC / 25; static const int cfq_slice_async_rq = 2; -static u64 cfq_slice_idle = NSEC_PER_SEC / 125; +static u64 cfq_slice_idle = 0; static u64 cfq_group_idle = NSEC_PER_SEC / 125; static const u64 cfq_target_latency = (u64)NSEC_PER_SEC * 3/10; /* 300 ms */ static const int cfq_hist_divisor = 4; diff --git a/crypto/Kconfig b/crypto/Kconfig index e7c7330dfc4f..1001a84aa561 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -24,7 +24,7 @@ comment "Crypto core or helper" config CRYPTO_FIPS bool "FIPS 200 compliance" depends on (CRYPTO_ANSI_CPRNG || CRYPTO_DRBG) && !CRYPTO_MANAGER_DISABLE_TESTS - depends on MODULE_SIG + depends on (MODULE_SIG || !MODULES) help This options enables the fips boot option which is required if you want to system to operate in a FIPS 200 @@ -256,10 +256,14 @@ config CRYPTO_ABLK_HELPER tristate select CRYPTO_CRYPTD +config CRYPTO_SIMD + tristate + select CRYPTO_CRYPTD + config CRYPTO_GLUE_HELPER_X86 tristate depends on X86 - select CRYPTO_ALGAPI + select CRYPTO_BLKCIPHER config CRYPTO_ENGINE tristate @@ -485,7 +489,7 @@ config CRYPTO_CRC32C_INTEL gain performance compared with software implementation. Module will be crc32c-intel. -config CRYPT_CRC32C_VPMSUM +config CRYPTO_CRC32C_VPMSUM tristate "CRC32c CRC algorithm (powerpc64)" depends on PPC64 && ALTIVEC select CRYPTO_HASH @@ -976,14 +980,13 @@ config CRYPTO_AES_X86_64 config CRYPTO_AES_NI_INTEL tristate "AES cipher algorithms (AES-NI)" depends on X86 + select CRYPTO_AEAD select CRYPTO_AES_X86_64 if 64BIT select CRYPTO_AES_586 if !64BIT - select CRYPTO_CRYPTD - select CRYPTO_ABLK_HELPER select CRYPTO_ALGAPI + select CRYPTO_BLKCIPHER select CRYPTO_GLUE_HELPER_X86 if 64BIT - select CRYPTO_LRW - select CRYPTO_XTS + select CRYPTO_SIMD help Use Intel AES-NI instructions for AES algorithm. diff --git a/crypto/Makefile b/crypto/Makefile index 6c8ddd79d473..d8074a5bca40 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -121,7 +121,7 @@ obj-$(CONFIG_CRYPTO_CRC32C) += crc32c_generic.o obj-$(CONFIG_CRYPTO_CRC32) += crc32_generic.o obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o -obj-$(CONFIG_CRYPTO_LZO) += lzo.o +obj-$(CONFIG_CRYPTO_LZO) += lzo.o lzo-rle.o obj-$(CONFIG_CRYPTO_LZ4) += lz4.o obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o obj-$(CONFIG_CRYPTO_842) += 842.o @@ -148,3 +148,5 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx/ obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += asymmetric_keys/ obj-$(CONFIG_CRYPTO_HASH_INFO) += hash_info.o obj-$(CONFIG_CRYPTO_ABLK_HELPER) += ablk_helper.o +crypto_simd-y := simd.o +obj-$(CONFIG_CRYPTO_SIMD) += crypto_simd.o diff --git a/crypto/aead.c b/crypto/aead.c index 3f5c5ff004ab..186440cc7ad0 100644 --- a/crypto/aead.c +++ b/crypto/aead.c @@ -294,7 +294,7 @@ int aead_init_geniv(struct crypto_aead *aead) if (err) goto out; - ctx->sknull = crypto_get_default_null_skcipher2(); + ctx->sknull = crypto_get_default_null_skcipher(); err = PTR_ERR(ctx->sknull); if (IS_ERR(ctx->sknull)) goto out; @@ -314,7 +314,7 @@ int aead_init_geniv(struct crypto_aead *aead) return err; drop_null: - crypto_put_default_null_skcipher2(); + crypto_put_default_null_skcipher(); goto out; } EXPORT_SYMBOL_GPL(aead_init_geniv); @@ -324,7 +324,7 @@ void aead_exit_geniv(struct crypto_aead *tfm) struct aead_geniv_ctx *ctx = crypto_aead_ctx(tfm); crypto_free_aead(ctx->child); - crypto_put_default_null_skcipher2(); + crypto_put_default_null_skcipher(); } EXPORT_SYMBOL_GPL(aead_exit_geniv); diff --git a/crypto/ahash.c b/crypto/ahash.c index 9a4e87755a0b..8e0cee83782f 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -579,8 +579,8 @@ static int ahash_prepare_alg(struct ahash_alg *alg) { struct crypto_alg *base = &alg->halg.base; - if (alg->halg.digestsize > PAGE_SIZE / 8 || - alg->halg.statesize > PAGE_SIZE / 8 || + if (alg->halg.digestsize > HASH_MAX_DIGESTSIZE || + alg->halg.statesize > HASH_MAX_STATESIZE || alg->halg.statesize == 0) return -EINVAL; diff --git a/crypto/algapi.c b/crypto/algapi.c index 9e5b24329b41..9bc49e129f62 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -10,6 +10,7 @@ * */ +#include #include #include #include @@ -56,9 +57,23 @@ static int crypto_check_alg(struct crypto_alg *alg) if (alg->cra_alignmask & (alg->cra_alignmask + 1)) return -EINVAL; - if (alg->cra_blocksize > PAGE_SIZE / 8) + /* General maximums for all algs. */ + if (alg->cra_alignmask > MAX_ALGAPI_ALIGNMASK) return -EINVAL; + if (alg->cra_blocksize > MAX_ALGAPI_BLOCKSIZE) + return -EINVAL; + + /* Lower maximums for specific alg types. */ + if (!alg->cra_type && (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == + CRYPTO_ALG_TYPE_CIPHER) { + if (alg->cra_alignmask > MAX_CIPHER_ALIGNMASK) + return -EINVAL; + + if (alg->cra_blocksize > MAX_CIPHER_BLOCKSIZE) + return -EINVAL; + } + if (alg->cra_priority < 0) return -EINVAL; diff --git a/crypto/algboss.c b/crypto/algboss.c index 5cbc588555ca..1d1dbca91fc8 100644 --- a/crypto/algboss.c +++ b/crypto/algboss.c @@ -245,12 +245,8 @@ static int cryptomgr_schedule_test(struct crypto_alg *alg) memcpy(param->alg, alg->cra_name, sizeof(param->alg)); type = alg->cra_flags; - /* This piece of crap needs to disappear into per-type test hooks. */ - if (!((type ^ CRYPTO_ALG_TYPE_BLKCIPHER) & - CRYPTO_ALG_TYPE_BLKCIPHER_MASK) && !(type & CRYPTO_ALG_GENIV) && - ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == - CRYPTO_ALG_TYPE_BLKCIPHER ? alg->cra_blkcipher.ivsize : - alg->cra_ablkcipher.ivsize)) + /* Do not test internal algorithms. */ + if (type & CRYPTO_ALG_INTERNAL) type |= CRYPTO_ALG_TESTED; param->type = type; diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index ba818a738f9a..6121a0e70103 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -460,12 +460,13 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg, used -= ctx->aead_assoclen; /* take over all tx sgls from ctx */ - areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) * sgl->cur, + areq->tsgl = sock_kmalloc(sk, + sizeof(*areq->tsgl) * max_t(u32, sgl->cur, 1), GFP_KERNEL); if (unlikely(!areq->tsgl)) goto free; - sg_init_table(areq->tsgl, sgl->cur); + sg_init_table(areq->tsgl, max_t(u32, sgl->cur, 1)); for (i = 0; i < sgl->cur; i++) sg_set_page(&areq->tsgl[i], sg_page(&sgl->sg[i]), sgl->sg[i].length, sgl->sg[i].offset); @@ -561,18 +562,8 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags) lock_sock(sk); /* - * AEAD memory structure: For encryption, the tag is appended to the - * ciphertext which implies that the memory allocated for the ciphertext - * must be increased by the tag length. For decryption, the tag - * is expected to be concatenated to the ciphertext. The plaintext - * therefore has a memory size of the ciphertext minus the tag length. - * - * The memory structure for cipher operation has the following - * structure: - * AEAD encryption input: assoc data || plaintext - * AEAD encryption output: cipherntext || auth tag - * AEAD decryption input: assoc data || ciphertext || auth tag - * AEAD decryption output: plaintext + * Please see documentation of aead_request_set_crypt for the + * description of the AEAD memory structure expected from the caller. */ if (ctx->more) { diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 7ae4f79cc839..eefefa4ea6de 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -240,7 +240,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags) struct alg_sock *ask = alg_sk(sk); struct hash_ctx *ctx = ask->private; struct ahash_request *req = &ctx->req; - char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req)) ? : 1]; + char state[HASH_MAX_STATESIZE]; struct sock *sk2; struct alg_sock *ask2; struct hash_ctx *ctx2; diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 931a777a4ec4..e4854168c435 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -573,8 +573,10 @@ static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg, * need to expand */ tmp = kcalloc(tx_nents * 2, sizeof(*tmp), GFP_KERNEL); - if (!tmp) + if (!tmp) { + err = -ENOMEM; goto free; + } sg_init_table(tmp, tx_nents * 2); for (x = 0; x < tx_nents; x++) diff --git a/crypto/api.c b/crypto/api.c index b47796b33a14..85caf1bae699 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -212,8 +212,8 @@ struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask) if (!name) return ERR_PTR(-ENOENT); + type &= ~(CRYPTO_ALG_LARVAL | CRYPTO_ALG_DEAD); mask &= ~(CRYPTO_ALG_LARVAL | CRYPTO_ALG_DEAD); - type &= mask; alg = crypto_alg_lookup(name, type, mask); if (!alg && !(mask & CRYPTO_NOLOAD)) { @@ -311,24 +311,8 @@ static void crypto_exit_ops(struct crypto_tfm *tfm) { const struct crypto_type *type = tfm->__crt_alg->cra_type; - if (type) { - if (tfm->exit) - tfm->exit(tfm); - return; - } - - switch (crypto_tfm_alg_type(tfm)) { - case CRYPTO_ALG_TYPE_CIPHER: - crypto_exit_cipher_ops(tfm); - break; - - case CRYPTO_ALG_TYPE_COMPRESS: - crypto_exit_compress_ops(tfm); - break; - - default: - BUG(); - } + if (type && tfm->exit) + tfm->exit(tfm); } static unsigned int crypto_ctxsize(struct crypto_alg *alg, u32 type, u32 mask) diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index 8525fe474abd..d1af69d2ff85 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -123,6 +123,7 @@ int public_key_verify_signature(const struct public_key *pkey, if (ret) goto error_free_req; + ret = -ENOMEM; outlen = crypto_akcipher_maxsize(tfm); output = kmalloc(outlen, GFP_KERNEL); if (!output) diff --git a/crypto/authenc.c b/crypto/authenc.c index f9ab04aa6dd5..59925f90748e 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -33,7 +33,7 @@ struct authenc_instance_ctx { struct crypto_authenc_ctx { struct crypto_ahash *auth; struct crypto_skcipher *enc; - struct crypto_skcipher *null; + struct crypto_sync_skcipher *null; }; struct authenc_request_ctx { @@ -193,9 +193,9 @@ static int crypto_authenc_copy_assoc(struct aead_request *req) { struct crypto_aead *authenc = crypto_aead_reqtfm(req); struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc); - SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null); + SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null); - skcipher_request_set_tfm(skreq, ctx->null); + skcipher_request_set_sync_tfm(skreq, ctx->null); skcipher_request_set_callback(skreq, aead_request_flags(req), NULL, NULL); skcipher_request_set_crypt(skreq, req->src, req->dst, req->assoclen, @@ -326,19 +326,19 @@ static int crypto_authenc_init_tfm(struct crypto_aead *tfm) struct crypto_authenc_ctx *ctx = crypto_aead_ctx(tfm); struct crypto_ahash *auth; struct crypto_skcipher *enc; - struct crypto_skcipher *null; + struct crypto_sync_skcipher *null; int err; auth = crypto_spawn_ahash(&ictx->auth); if (IS_ERR(auth)) return PTR_ERR(auth); - enc = crypto_spawn_skcipher2(&ictx->enc); + enc = crypto_spawn_skcipher(&ictx->enc); err = PTR_ERR(enc); if (IS_ERR(enc)) goto err_free_ahash; - null = crypto_get_default_null_skcipher2(); + null = crypto_get_default_null_skcipher(); err = PTR_ERR(null); if (IS_ERR(null)) goto err_free_skcipher; @@ -372,7 +372,7 @@ static void crypto_authenc_exit_tfm(struct crypto_aead *tfm) crypto_free_ahash(ctx->auth); crypto_free_skcipher(ctx->enc); - crypto_put_default_null_skcipher2(); + crypto_put_default_null_skcipher(); } static void crypto_authenc_free(struct aead_instance *inst) @@ -429,9 +429,9 @@ static int crypto_authenc_create(struct crypto_template *tmpl, goto err_free_inst; crypto_set_skcipher_spawn(&ctx->enc, aead_crypto_instance(inst)); - err = crypto_grab_skcipher2(&ctx->enc, enc_name, 0, - crypto_requires_sync(algt->type, - algt->mask)); + err = crypto_grab_skcipher(&ctx->enc, enc_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); if (err) goto err_drop_auth; diff --git a/crypto/authencesn.c b/crypto/authencesn.c index 73b12f128ae5..4741fe89ba2c 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -36,7 +36,7 @@ struct crypto_authenc_esn_ctx { unsigned int reqoff; struct crypto_ahash *auth; struct crypto_skcipher *enc; - struct crypto_skcipher *null; + struct crypto_sync_skcipher *null; }; struct authenc_esn_request_ctx { @@ -183,9 +183,9 @@ static int crypto_authenc_esn_copy(struct aead_request *req, unsigned int len) { struct crypto_aead *authenc_esn = crypto_aead_reqtfm(req); struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(authenc_esn); - SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null); + SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null); - skcipher_request_set_tfm(skreq, ctx->null); + skcipher_request_set_sync_tfm(skreq, ctx->null); skcipher_request_set_callback(skreq, aead_request_flags(req), NULL, NULL); skcipher_request_set_crypt(skreq, req->src, req->dst, len, NULL); @@ -341,19 +341,19 @@ static int crypto_authenc_esn_init_tfm(struct crypto_aead *tfm) struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(tfm); struct crypto_ahash *auth; struct crypto_skcipher *enc; - struct crypto_skcipher *null; + struct crypto_sync_skcipher *null; int err; auth = crypto_spawn_ahash(&ictx->auth); if (IS_ERR(auth)) return PTR_ERR(auth); - enc = crypto_spawn_skcipher2(&ictx->enc); + enc = crypto_spawn_skcipher(&ictx->enc); err = PTR_ERR(enc); if (IS_ERR(enc)) goto err_free_ahash; - null = crypto_get_default_null_skcipher2(); + null = crypto_get_default_null_skcipher(); err = PTR_ERR(null); if (IS_ERR(null)) goto err_free_skcipher; @@ -390,7 +390,7 @@ static void crypto_authenc_esn_exit_tfm(struct crypto_aead *tfm) crypto_free_ahash(ctx->auth); crypto_free_skcipher(ctx->enc); - crypto_put_default_null_skcipher2(); + crypto_put_default_null_skcipher(); } static void crypto_authenc_esn_free(struct aead_instance *inst) @@ -447,9 +447,9 @@ static int crypto_authenc_esn_create(struct crypto_template *tmpl, goto err_free_inst; crypto_set_skcipher_spawn(&ctx->enc, aead_crypto_instance(inst)); - err = crypto_grab_skcipher2(&ctx->enc, enc_name, 0, - crypto_requires_sync(algt->type, - algt->mask)); + err = crypto_grab_skcipher(&ctx->enc, enc_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); if (err) goto err_drop_auth; diff --git a/crypto/cbc.c b/crypto/cbc.c index 780ee27b2d43..68f751a41a84 100644 --- a/crypto/cbc.c +++ b/crypto/cbc.c @@ -1,7 +1,7 @@ /* * CBC: Cipher Block Chaining mode * - * Copyright (c) 2006 Herbert Xu + * Copyright (c) 2006-2016 Herbert Xu * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -10,191 +10,78 @@ * */ -#include +#include +#include #include #include #include #include #include -#include #include struct crypto_cbc_ctx { struct crypto_cipher *child; }; -static int crypto_cbc_setkey(struct crypto_tfm *parent, const u8 *key, +static int crypto_cbc_setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { - struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(parent); + struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(parent); struct crypto_cipher *child = ctx->child; int err; crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) & + crypto_cipher_set_flags(child, crypto_skcipher_get_flags(parent) & CRYPTO_TFM_REQ_MASK); err = crypto_cipher_setkey(child, key, keylen); - crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) & - CRYPTO_TFM_RES_MASK); + crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); return err; } -static int crypto_cbc_encrypt_segment(struct blkcipher_desc *desc, - struct blkcipher_walk *walk, - struct crypto_cipher *tfm) +static inline void crypto_cbc_encrypt_one(struct crypto_skcipher *tfm, + const u8 *src, u8 *dst) { - void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = - crypto_cipher_alg(tfm)->cia_encrypt; - int bsize = crypto_cipher_blocksize(tfm); - unsigned int nbytes = walk->nbytes; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - u8 *iv = walk->iv; - - do { - crypto_xor(iv, src, bsize); - fn(crypto_cipher_tfm(tfm), dst, iv); - memcpy(iv, dst, bsize); - - src += bsize; - dst += bsize; - } while ((nbytes -= bsize) >= bsize); - - return nbytes; -} - -static int crypto_cbc_encrypt_inplace(struct blkcipher_desc *desc, - struct blkcipher_walk *walk, - struct crypto_cipher *tfm) -{ - void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = - crypto_cipher_alg(tfm)->cia_encrypt; - int bsize = crypto_cipher_blocksize(tfm); - unsigned int nbytes = walk->nbytes; - u8 *src = walk->src.virt.addr; - u8 *iv = walk->iv; - - do { - crypto_xor(src, iv, bsize); - fn(crypto_cipher_tfm(tfm), src, src); - iv = src; - - src += bsize; - } while ((nbytes -= bsize) >= bsize); + struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - memcpy(walk->iv, iv, bsize); - - return nbytes; + crypto_cipher_encrypt_one(ctx->child, dst, src); } -static int crypto_cbc_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int crypto_cbc_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - struct crypto_blkcipher *tfm = desc->tfm; - struct crypto_cbc_ctx *ctx = crypto_blkcipher_ctx(tfm); - struct crypto_cipher *child = ctx->child; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - - while ((nbytes = walk.nbytes)) { - if (walk.src.virt.addr == walk.dst.virt.addr) - nbytes = crypto_cbc_encrypt_inplace(desc, &walk, child); - else - nbytes = crypto_cbc_encrypt_segment(desc, &walk, child); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - return err; + return crypto_cbc_encrypt_walk(req, crypto_cbc_encrypt_one); } -static int crypto_cbc_decrypt_segment(struct blkcipher_desc *desc, - struct blkcipher_walk *walk, - struct crypto_cipher *tfm) +static inline void crypto_cbc_decrypt_one(struct crypto_skcipher *tfm, + const u8 *src, u8 *dst) { - void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = - crypto_cipher_alg(tfm)->cia_decrypt; - int bsize = crypto_cipher_blocksize(tfm); - unsigned int nbytes = walk->nbytes; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - u8 *iv = walk->iv; - - do { - fn(crypto_cipher_tfm(tfm), dst, src); - crypto_xor(dst, iv, bsize); - iv = src; - - src += bsize; - dst += bsize; - } while ((nbytes -= bsize) >= bsize); - - memcpy(walk->iv, iv, bsize); - - return nbytes; -} + struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); -static int crypto_cbc_decrypt_inplace(struct blkcipher_desc *desc, - struct blkcipher_walk *walk, - struct crypto_cipher *tfm) -{ - void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = - crypto_cipher_alg(tfm)->cia_decrypt; - int bsize = crypto_cipher_blocksize(tfm); - unsigned int nbytes = walk->nbytes; - u8 *src = walk->src.virt.addr; - u8 last_iv[bsize]; - - /* Start of the last block. */ - src += nbytes - (nbytes & (bsize - 1)) - bsize; - memcpy(last_iv, src, bsize); - - for (;;) { - fn(crypto_cipher_tfm(tfm), src, src); - if ((nbytes -= bsize) < bsize) - break; - crypto_xor(src, src - bsize, bsize); - src -= bsize; - } - - crypto_xor(src, walk->iv, bsize); - memcpy(walk->iv, last_iv, bsize); - - return nbytes; + crypto_cipher_decrypt_one(ctx->child, dst, src); } -static int crypto_cbc_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int crypto_cbc_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - struct crypto_blkcipher *tfm = desc->tfm; - struct crypto_cbc_ctx *ctx = crypto_blkcipher_ctx(tfm); - struct crypto_cipher *child = ctx->child; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct skcipher_walk walk; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); - while ((nbytes = walk.nbytes)) { - if (walk.src.virt.addr == walk.dst.virt.addr) - nbytes = crypto_cbc_decrypt_inplace(desc, &walk, child); - else - nbytes = crypto_cbc_decrypt_segment(desc, &walk, child); - err = blkcipher_walk_done(desc, &walk, nbytes); + while (walk.nbytes) { + err = crypto_cbc_decrypt_blocks(&walk, tfm, + crypto_cbc_decrypt_one); + err = skcipher_walk_done(&walk, err); } return err; } -static int crypto_cbc_init_tfm(struct crypto_tfm *tfm) +static int crypto_cbc_init_tfm(struct crypto_skcipher *tfm) { - struct crypto_instance *inst = (void *)tfm->__crt_alg; - struct crypto_spawn *spawn = crypto_instance_ctx(inst); - struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(tfm); + struct skcipher_instance *inst = skcipher_alg_instance(tfm); + struct crypto_spawn *spawn = skcipher_instance_ctx(inst); + struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_cipher *cipher; cipher = crypto_spawn_cipher(spawn); @@ -205,72 +92,94 @@ static int crypto_cbc_init_tfm(struct crypto_tfm *tfm) return 0; } -static void crypto_cbc_exit_tfm(struct crypto_tfm *tfm) +static void crypto_cbc_exit_tfm(struct crypto_skcipher *tfm) { - struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + crypto_free_cipher(ctx->child); } -static struct crypto_instance *crypto_cbc_alloc(struct rtattr **tb) +static void crypto_cbc_free(struct skcipher_instance *inst) +{ + crypto_drop_skcipher(skcipher_instance_ctx(inst)); + kfree(inst); +} + +static int crypto_cbc_create(struct crypto_template *tmpl, struct rtattr **tb) { - struct crypto_instance *inst; + struct skcipher_instance *inst; + struct crypto_spawn *spawn; struct crypto_alg *alg; int err; - err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER); + err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER); if (err) - return ERR_PTR(err); + return err; + + inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); + if (!inst) + return -ENOMEM; alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, CRYPTO_ALG_TYPE_MASK); + err = PTR_ERR(alg); if (IS_ERR(alg)) - return ERR_CAST(alg); + goto err_free_inst; - inst = ERR_PTR(-EINVAL); - if (!is_power_of_2(alg->cra_blocksize)) - goto out_put_alg; + spawn = skcipher_instance_ctx(inst); + err = crypto_init_spawn(spawn, alg, skcipher_crypto_instance(inst), + CRYPTO_ALG_TYPE_MASK); + crypto_mod_put(alg); + if (err) + goto err_free_inst; - inst = crypto_alloc_instance("cbc", alg); - if (IS_ERR(inst)) - goto out_put_alg; + err = crypto_inst_setname(skcipher_crypto_instance(inst), "cbc", alg); + if (err) + goto err_drop_spawn; - inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER; - inst->alg.cra_priority = alg->cra_priority; - inst->alg.cra_blocksize = alg->cra_blocksize; - inst->alg.cra_alignmask = alg->cra_alignmask; - inst->alg.cra_type = &crypto_blkcipher_type; + err = -EINVAL; + if (!is_power_of_2(alg->cra_blocksize)) + goto err_drop_spawn; + + inst->alg.base.cra_priority = alg->cra_priority; + inst->alg.base.cra_blocksize = alg->cra_blocksize; + inst->alg.base.cra_alignmask = alg->cra_alignmask; /* We access the data as u32s when xoring. */ - inst->alg.cra_alignmask |= __alignof__(u32) - 1; + inst->alg.base.cra_alignmask |= __alignof__(u32) - 1; - inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize; - inst->alg.cra_blkcipher.min_keysize = alg->cra_cipher.cia_min_keysize; - inst->alg.cra_blkcipher.max_keysize = alg->cra_cipher.cia_max_keysize; + inst->alg.ivsize = alg->cra_blocksize; + inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize; + inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize; - inst->alg.cra_ctxsize = sizeof(struct crypto_cbc_ctx); + inst->alg.base.cra_ctxsize = sizeof(struct crypto_cbc_ctx); - inst->alg.cra_init = crypto_cbc_init_tfm; - inst->alg.cra_exit = crypto_cbc_exit_tfm; + inst->alg.init = crypto_cbc_init_tfm; + inst->alg.exit = crypto_cbc_exit_tfm; - inst->alg.cra_blkcipher.setkey = crypto_cbc_setkey; - inst->alg.cra_blkcipher.encrypt = crypto_cbc_encrypt; - inst->alg.cra_blkcipher.decrypt = crypto_cbc_decrypt; + inst->alg.setkey = crypto_cbc_setkey; + inst->alg.encrypt = crypto_cbc_encrypt; + inst->alg.decrypt = crypto_cbc_decrypt; -out_put_alg: - crypto_mod_put(alg); - return inst; -} + inst->free = crypto_cbc_free; -static void crypto_cbc_free(struct crypto_instance *inst) -{ - crypto_drop_spawn(crypto_instance_ctx(inst)); + err = skcipher_register_instance(tmpl, inst); + if (err) + goto err_drop_spawn; + +out: + return err; + +err_drop_spawn: + crypto_drop_spawn(spawn); +err_free_inst: kfree(inst); + goto out; } static struct crypto_template crypto_cbc_tmpl = { .name = "cbc", - .alloc = crypto_cbc_alloc, - .free = crypto_cbc_free, + .create = crypto_cbc_create, .module = THIS_MODULE, }; diff --git a/crypto/ccm.c b/crypto/ccm.c index b3ace633fae9..b17ae71db56e 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -464,7 +464,7 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm) if (IS_ERR(cipher)) return PTR_ERR(cipher); - ctr = crypto_spawn_skcipher2(&ictx->ctr); + ctr = crypto_spawn_skcipher(&ictx->ctr); err = PTR_ERR(ctr); if (IS_ERR(ctr)) goto err_free_cipher; @@ -546,9 +546,9 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl, goto err_free_inst; crypto_set_skcipher_spawn(&ictx->ctr, aead_crypto_instance(inst)); - err = crypto_grab_skcipher2(&ictx->ctr, ctr_name, 0, - crypto_requires_sync(algt->type, - algt->mask)); + err = crypto_grab_skcipher(&ictx->ctr, ctr_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); if (err) goto err_drop_cipher; diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c index e640da674962..dce194c8a3d7 100644 --- a/crypto/chacha20poly1305.c +++ b/crypto/chacha20poly1305.c @@ -540,7 +540,7 @@ static int chachapoly_init(struct crypto_aead *tfm) if (IS_ERR(poly)) return PTR_ERR(poly); - chacha = crypto_spawn_skcipher2(&ictx->chacha); + chacha = crypto_spawn_skcipher(&ictx->chacha); if (IS_ERR(chacha)) { crypto_free_ahash(poly); return PTR_ERR(chacha); @@ -637,9 +637,9 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, goto err_free_inst; crypto_set_skcipher_spawn(&ctx->chacha, aead_crypto_instance(inst)); - err = crypto_grab_skcipher2(&ctx->chacha, chacha_name, 0, - crypto_requires_sync(algt->type, - algt->mask)); + err = crypto_grab_skcipher(&ctx->chacha, chacha_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); if (err) goto err_drop_poly; diff --git a/crypto/cipher.c b/crypto/cipher.c index 39541e0e537d..57836c30a49a 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -13,6 +13,7 @@ * */ +#include #include #include #include @@ -67,7 +68,7 @@ static void cipher_crypt_unaligned(void (*fn)(struct crypto_tfm *, u8 *, { unsigned long alignmask = crypto_tfm_alg_alignmask(tfm); unsigned int size = crypto_tfm_alg_blocksize(tfm); - u8 buffer[size + alignmask]; + u8 buffer[MAX_CIPHER_BLOCKSIZE + MAX_CIPHER_ALIGNMASK]; u8 *tmp = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); memcpy(tmp, src, size); @@ -116,7 +117,3 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm) return 0; } - -void crypto_exit_cipher_ops(struct crypto_tfm *tfm) -{ -} diff --git a/crypto/cmac.c b/crypto/cmac.c index 7a8bfbd548f6..04080dca8f0c 100644 --- a/crypto/cmac.c +++ b/crypto/cmac.c @@ -57,7 +57,8 @@ static int crypto_cmac_digest_setkey(struct crypto_shash *parent, unsigned long alignmask = crypto_shash_alignmask(parent); struct cmac_tfm_ctx *ctx = crypto_shash_ctx(parent); unsigned int bs = crypto_shash_blocksize(parent); - __be64 *consts = PTR_ALIGN((void *)ctx->ctx, alignmask + 1); + __be64 *consts = PTR_ALIGN((void *)ctx->ctx, + (alignmask | (__alignof__(__be64) - 1)) + 1); u64 _const[2]; int i, err = 0; u8 msb_mask, gfmask; @@ -173,7 +174,8 @@ static int crypto_cmac_digest_final(struct shash_desc *pdesc, u8 *out) struct cmac_desc_ctx *ctx = shash_desc_ctx(pdesc); struct crypto_cipher *tfm = tctx->child; int bs = crypto_shash_blocksize(parent); - u8 *consts = PTR_ALIGN((void *)tctx->ctx, alignmask + 1); + u8 *consts = PTR_ALIGN((void *)tctx->ctx, + (alignmask | (__alignof__(__be64) - 1)) + 1); u8 *odds = PTR_ALIGN((void *)ctx->ctx, alignmask + 1); u8 *prev = odds + bs; unsigned int offset = 0; @@ -243,6 +245,7 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb) case 8: break; default: + err = -EINVAL; goto out_put_alg; } @@ -257,7 +260,8 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb) if (err) goto out_free_inst; - alignmask = alg->cra_alignmask | (sizeof(long) - 1); + /* We access the data as u32s when xoring. */ + alignmask = alg->cra_alignmask | (__alignof__(u32) - 1); inst->alg.base.cra_alignmask = alignmask; inst->alg.base.cra_priority = alg->cra_priority; inst->alg.base.cra_blocksize = alg->cra_blocksize; @@ -269,7 +273,9 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb) + alg->cra_blocksize * 2; inst->alg.base.cra_ctxsize = - ALIGN(sizeof(struct cmac_tfm_ctx), alignmask + 1) + ALIGN(sizeof(struct cmac_tfm_ctx), crypto_tfm_ctx_alignment()) + + ((alignmask | (__alignof__(__be64) - 1)) & + ~(crypto_tfm_ctx_alignment() - 1)) + alg->cra_blocksize * 2; inst->alg.base.cra_init = cmac_init_tfm; diff --git a/crypto/compress.c b/crypto/compress.c index c33f0763a956..f2d522924a07 100644 --- a/crypto/compress.c +++ b/crypto/compress.c @@ -42,7 +42,3 @@ int crypto_init_compress_ops(struct crypto_tfm *tfm) return 0; } - -void crypto_exit_compress_ops(struct crypto_tfm *tfm) -{ -} diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 9b66ce7e728d..566ca77d3233 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -17,9 +17,9 @@ * */ -#include #include #include +#include #include #include #include @@ -48,6 +48,11 @@ struct cryptd_instance_ctx { struct cryptd_queue *queue; }; +struct skcipherd_instance_ctx { + struct crypto_skcipher_spawn spawn; + struct cryptd_queue *queue; +}; + struct hashd_instance_ctx { struct crypto_shash_spawn spawn; struct cryptd_queue *queue; @@ -67,6 +72,15 @@ struct cryptd_blkcipher_request_ctx { crypto_completion_t complete; }; +struct cryptd_skcipher_ctx { + atomic_t refcnt; + struct crypto_sync_skcipher *child; +}; + +struct cryptd_skcipher_request_ctx { + crypto_completion_t complete; +}; + struct cryptd_hash_ctx { atomic_t refcnt; struct crypto_shash *child; @@ -122,7 +136,6 @@ static int cryptd_enqueue_request(struct cryptd_queue *queue, { int cpu, err; struct cryptd_cpu_queue *cpu_queue; - struct crypto_tfm *tfm; atomic_t *refcnt; bool may_backlog; @@ -141,7 +154,6 @@ static int cryptd_enqueue_request(struct cryptd_queue *queue, if (!atomic_read(refcnt)) goto out_put_cpu; - tfm = request->tfm; atomic_inc(refcnt); out_put_cpu: @@ -432,6 +444,218 @@ static int cryptd_create_blkcipher(struct crypto_template *tmpl, return err; } +static int cryptd_skcipher_setkey(struct crypto_skcipher *parent, + const u8 *key, unsigned int keylen) +{ + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(parent); + struct crypto_sync_skcipher *child = ctx->child; + int err; + + crypto_sync_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_sync_skcipher_set_flags(child, + crypto_skcipher_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + err = crypto_sync_skcipher_setkey(child, key, keylen); + crypto_skcipher_set_flags(parent, + crypto_sync_skcipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); + return err; +} + +static void cryptd_skcipher_complete(struct skcipher_request *req, int err) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); + int refcnt = atomic_read(&ctx->refcnt); + + local_bh_disable(); + rctx->complete(&req->base, err); + local_bh_enable(); + + if (err != -EINPROGRESS && refcnt && atomic_dec_and_test(&ctx->refcnt)) + crypto_free_skcipher(tfm); +} + +static void cryptd_skcipher_encrypt(struct crypto_async_request *base, + int err) +{ + struct skcipher_request *req = skcipher_request_cast(base); + struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_sync_skcipher *child = ctx->child; + SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child); + + if (unlikely(err == -EINPROGRESS)) + goto out; + + skcipher_request_set_sync_tfm(subreq, child); + skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, + req->iv); + + err = crypto_skcipher_encrypt(subreq); + skcipher_request_zero(subreq); + + req->base.complete = rctx->complete; + +out: + cryptd_skcipher_complete(req, err); +} + +static void cryptd_skcipher_decrypt(struct crypto_async_request *base, + int err) +{ + struct skcipher_request *req = skcipher_request_cast(base); + struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_sync_skcipher *child = ctx->child; + SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child); + + if (unlikely(err == -EINPROGRESS)) + goto out; + + skcipher_request_set_sync_tfm(subreq, child); + skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, + req->iv); + + err = crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); + + req->base.complete = rctx->complete; + +out: + cryptd_skcipher_complete(req, err); +} + +static int cryptd_skcipher_enqueue(struct skcipher_request *req, + crypto_completion_t compl) +{ + struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cryptd_queue *queue; + + queue = cryptd_get_queue(crypto_skcipher_tfm(tfm)); + rctx->complete = req->base.complete; + req->base.complete = compl; + + return cryptd_enqueue_request(queue, &req->base); +} + +static int cryptd_skcipher_encrypt_enqueue(struct skcipher_request *req) +{ + return cryptd_skcipher_enqueue(req, cryptd_skcipher_encrypt); +} + +static int cryptd_skcipher_decrypt_enqueue(struct skcipher_request *req) +{ + return cryptd_skcipher_enqueue(req, cryptd_skcipher_decrypt); +} + +static int cryptd_skcipher_init_tfm(struct crypto_skcipher *tfm) +{ + struct skcipher_instance *inst = skcipher_alg_instance(tfm); + struct skcipherd_instance_ctx *ictx = skcipher_instance_ctx(inst); + struct crypto_skcipher_spawn *spawn = &ictx->spawn; + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher *cipher; + + cipher = crypto_spawn_skcipher(spawn); + if (IS_ERR(cipher)) + return PTR_ERR(cipher); + + ctx->child = (struct crypto_sync_skcipher *)cipher; + crypto_skcipher_set_reqsize( + tfm, sizeof(struct cryptd_skcipher_request_ctx)); + return 0; +} + +static void cryptd_skcipher_exit_tfm(struct crypto_skcipher *tfm) +{ + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + + crypto_free_sync_skcipher(ctx->child); +} + +static void cryptd_skcipher_free(struct skcipher_instance *inst) +{ + struct skcipherd_instance_ctx *ctx = skcipher_instance_ctx(inst); + + crypto_drop_skcipher(&ctx->spawn); +} + +static int cryptd_create_skcipher(struct crypto_template *tmpl, + struct rtattr **tb, + struct cryptd_queue *queue) +{ + struct skcipherd_instance_ctx *ctx; + struct skcipher_instance *inst; + struct skcipher_alg *alg; + const char *name; + u32 type; + u32 mask; + int err; + + type = 0; + mask = CRYPTO_ALG_ASYNC; + + cryptd_check_internal(tb, &type, &mask); + + name = crypto_attr_alg_name(tb[1]); + if (IS_ERR(name)) + return PTR_ERR(name); + + inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + ctx = skcipher_instance_ctx(inst); + ctx->queue = queue; + + crypto_set_skcipher_spawn(&ctx->spawn, skcipher_crypto_instance(inst)); + err = crypto_grab_skcipher(&ctx->spawn, name, type, mask); + if (err) + goto out_free_inst; + + alg = crypto_spawn_skcipher_alg(&ctx->spawn); + err = cryptd_init_instance(skcipher_crypto_instance(inst), &alg->base); + if (err) + goto out_drop_skcipher; + + inst->alg.base.cra_flags = CRYPTO_ALG_ASYNC | + (alg->base.cra_flags & CRYPTO_ALG_INTERNAL); + + inst->alg.ivsize = crypto_skcipher_alg_ivsize(alg); + inst->alg.chunksize = crypto_skcipher_alg_chunksize(alg); + inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg); + inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg); + + inst->alg.base.cra_ctxsize = sizeof(struct cryptd_skcipher_ctx); + + inst->alg.init = cryptd_skcipher_init_tfm; + inst->alg.exit = cryptd_skcipher_exit_tfm; + + inst->alg.setkey = cryptd_skcipher_setkey; + inst->alg.encrypt = cryptd_skcipher_encrypt_enqueue; + inst->alg.decrypt = cryptd_skcipher_decrypt_enqueue; + + inst->free = cryptd_skcipher_free; + + err = skcipher_register_instance(tmpl, inst); + if (err) { +out_drop_skcipher: + crypto_drop_skcipher(&ctx->spawn); +out_free_inst: + kfree(inst); + } + return err; +} + static int cryptd_hash_init_tfm(struct crypto_tfm *tfm) { struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); @@ -895,7 +1119,11 @@ static int cryptd_create(struct crypto_template *tmpl, struct rtattr **tb) switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) { case CRYPTO_ALG_TYPE_BLKCIPHER: - return cryptd_create_blkcipher(tmpl, tb, &queue); + if ((algt->type & CRYPTO_ALG_TYPE_MASK) == + CRYPTO_ALG_TYPE_BLKCIPHER) + return cryptd_create_blkcipher(tmpl, tb, &queue); + + return cryptd_create_skcipher(tmpl, tb, &queue); case CRYPTO_ALG_TYPE_DIGEST: return cryptd_create_hash(tmpl, tb, &queue); case CRYPTO_ALG_TYPE_AEAD: @@ -985,6 +1213,58 @@ void cryptd_free_ablkcipher(struct cryptd_ablkcipher *tfm) } EXPORT_SYMBOL_GPL(cryptd_free_ablkcipher); +struct cryptd_skcipher *cryptd_alloc_skcipher(const char *alg_name, + u32 type, u32 mask) +{ + char cryptd_alg_name[CRYPTO_MAX_ALG_NAME]; + struct cryptd_skcipher_ctx *ctx; + struct crypto_skcipher *tfm; + + if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME, + "cryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME) + return ERR_PTR(-EINVAL); + + tfm = crypto_alloc_skcipher(cryptd_alg_name, type, mask); + if (IS_ERR(tfm)) + return ERR_CAST(tfm); + + if (tfm->base.__crt_alg->cra_module != THIS_MODULE) { + crypto_free_skcipher(tfm); + return ERR_PTR(-EINVAL); + } + + ctx = crypto_skcipher_ctx(tfm); + atomic_set(&ctx->refcnt, 1); + + return container_of(tfm, struct cryptd_skcipher, base); +} +EXPORT_SYMBOL_GPL(cryptd_alloc_skcipher); + +struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm) +{ + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base); + + return &ctx->child->base; +} +EXPORT_SYMBOL_GPL(cryptd_skcipher_child); + +bool cryptd_skcipher_queued(struct cryptd_skcipher *tfm) +{ + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base); + + return atomic_read(&ctx->refcnt) - 1; +} +EXPORT_SYMBOL_GPL(cryptd_skcipher_queued); + +void cryptd_free_skcipher(struct cryptd_skcipher *tfm) +{ + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base); + + if (atomic_dec_and_test(&ctx->refcnt)) + crypto_free_skcipher(&tfm->base); +} +EXPORT_SYMBOL_GPL(cryptd_free_skcipher); + struct cryptd_ahash *cryptd_alloc_ahash(const char *alg_name, u32 type, u32 mask) { diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c index 6989ba0046df..f1bf3418d968 100644 --- a/crypto/crypto_engine.c +++ b/crypto/crypto_engine.c @@ -47,7 +47,7 @@ static void crypto_pump_requests(struct crypto_engine *engine, /* If another context is idling then defer */ if (engine->idling) { - kthread_queue_work(&engine->kworker, &engine->pump_requests); + kthread_queue_work(engine->kworker, &engine->pump_requests); goto out; } @@ -58,7 +58,7 @@ static void crypto_pump_requests(struct crypto_engine *engine, /* Only do teardown in the thread */ if (!in_kthread) { - kthread_queue_work(&engine->kworker, + kthread_queue_work(engine->kworker, &engine->pump_requests); goto out; } @@ -189,7 +189,7 @@ int crypto_transfer_cipher_request(struct crypto_engine *engine, ret = ablkcipher_enqueue_request(&engine->queue, req); if (!engine->busy && need_pump) - kthread_queue_work(&engine->kworker, &engine->pump_requests); + kthread_queue_work(engine->kworker, &engine->pump_requests); spin_unlock_irqrestore(&engine->queue_lock, flags); return ret; @@ -231,7 +231,7 @@ int crypto_transfer_hash_request(struct crypto_engine *engine, ret = ahash_enqueue_request(&engine->queue, req); if (!engine->busy && need_pump) - kthread_queue_work(&engine->kworker, &engine->pump_requests); + kthread_queue_work(engine->kworker, &engine->pump_requests); spin_unlock_irqrestore(&engine->queue_lock, flags); return ret; @@ -284,7 +284,7 @@ void crypto_finalize_cipher_request(struct crypto_engine *engine, req->base.complete(&req->base, err); - kthread_queue_work(&engine->kworker, &engine->pump_requests); + kthread_queue_work(engine->kworker, &engine->pump_requests); } EXPORT_SYMBOL_GPL(crypto_finalize_cipher_request); @@ -321,7 +321,7 @@ void crypto_finalize_hash_request(struct crypto_engine *engine, req->base.complete(&req->base, err); - kthread_queue_work(&engine->kworker, &engine->pump_requests); + kthread_queue_work(engine->kworker, &engine->pump_requests); } EXPORT_SYMBOL_GPL(crypto_finalize_hash_request); @@ -345,7 +345,7 @@ int crypto_engine_start(struct crypto_engine *engine) engine->running = true; spin_unlock_irqrestore(&engine->queue_lock, flags); - kthread_queue_work(&engine->kworker, &engine->pump_requests); + kthread_queue_work(engine->kworker, &engine->pump_requests); return 0; } @@ -422,11 +422,8 @@ struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt) crypto_init_queue(&engine->queue, CRYPTO_ENGINE_MAX_QLEN); spin_lock_init(&engine->queue_lock); - kthread_init_worker(&engine->kworker); - engine->kworker_task = kthread_run(kthread_worker_fn, - &engine->kworker, "%s", - engine->name); - if (IS_ERR(engine->kworker_task)) { + engine->kworker = kthread_create_worker(0, "%s", engine->name); + if (IS_ERR(engine->kworker)) { dev_err(dev, "failed to create crypto request pump task\n"); return NULL; } @@ -434,7 +431,7 @@ struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt) if (engine->rt) { dev_info(dev, "will run requests pump with realtime priority\n"); - sched_setscheduler(engine->kworker_task, SCHED_FIFO, ¶m); + sched_setscheduler(engine->kworker->task, SCHED_FIFO, ¶m); } return engine; @@ -455,8 +452,7 @@ int crypto_engine_exit(struct crypto_engine *engine) if (ret) return ret; - kthread_flush_worker(&engine->kworker); - kthread_stop(engine->kworker_task); + kthread_destroy_worker(engine->kworker); return 0; } diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c index 20ff2c746e0b..28c6f80a3c13 100644 --- a/crypto/crypto_null.c +++ b/crypto/crypto_null.c @@ -26,7 +26,7 @@ #include static DEFINE_MUTEX(crypto_default_null_skcipher_lock); -static struct crypto_skcipher *crypto_default_null_skcipher; +static struct crypto_sync_skcipher *crypto_default_null_skcipher; static int crypto_default_null_skcipher_refcnt; static int null_compress(struct crypto_tfm *tfm, const u8 *src, @@ -153,16 +153,15 @@ MODULE_ALIAS_CRYPTO("compress_null"); MODULE_ALIAS_CRYPTO("digest_null"); MODULE_ALIAS_CRYPTO("cipher_null"); -struct crypto_skcipher *crypto_get_default_null_skcipher(void) +struct crypto_sync_skcipher *crypto_get_default_null_skcipher(void) { - struct crypto_skcipher *tfm; + struct crypto_sync_skcipher *tfm; mutex_lock(&crypto_default_null_skcipher_lock); tfm = crypto_default_null_skcipher; if (!tfm) { - tfm = crypto_alloc_skcipher("ecb(cipher_null)", - 0, CRYPTO_ALG_ASYNC); + tfm = crypto_alloc_sync_skcipher("ecb(cipher_null)", 0, 0); if (IS_ERR(tfm)) goto unlock; @@ -182,7 +181,7 @@ void crypto_put_default_null_skcipher(void) { mutex_lock(&crypto_default_null_skcipher_lock); if (!--crypto_default_null_skcipher_refcnt) { - crypto_free_skcipher(crypto_default_null_skcipher); + crypto_free_sync_skcipher(crypto_default_null_skcipher); crypto_default_null_skcipher = NULL; } mutex_unlock(&crypto_default_null_skcipher_lock); diff --git a/crypto/ctr.c b/crypto/ctr.c index ff4d21eddb83..171e759d3da4 100644 --- a/crypto/ctr.c +++ b/crypto/ctr.c @@ -58,7 +58,7 @@ static void crypto_ctr_crypt_final(struct blkcipher_walk *walk, unsigned int bsize = crypto_cipher_blocksize(tfm); unsigned long alignmask = crypto_cipher_alignmask(tfm); u8 *ctrblk = walk->iv; - u8 tmp[bsize + alignmask]; + u8 tmp[MAX_CIPHER_BLOCKSIZE + MAX_CIPHER_ALIGNMASK]; u8 *keystream = PTR_ALIGN(tmp + 0, alignmask + 1); u8 *src = walk->src.virt.addr; u8 *dst = walk->dst.virt.addr; @@ -107,7 +107,7 @@ static int crypto_ctr_crypt_inplace(struct blkcipher_walk *walk, unsigned int nbytes = walk->nbytes; u8 *ctrblk = walk->iv; u8 *src = walk->src.virt.addr; - u8 tmp[bsize + alignmask]; + u8 tmp[MAX_CIPHER_BLOCKSIZE + MAX_CIPHER_ALIGNMASK]; u8 *keystream = PTR_ALIGN(tmp + 0, alignmask + 1); do { @@ -312,7 +312,7 @@ static int crypto_rfc3686_init_tfm(struct crypto_skcipher *tfm) unsigned long align; unsigned int reqsize; - cipher = crypto_spawn_skcipher2(spawn); + cipher = crypto_spawn_skcipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); @@ -370,9 +370,9 @@ static int crypto_rfc3686_create(struct crypto_template *tmpl, spawn = skcipher_instance_ctx(inst); crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst)); - err = crypto_grab_skcipher2(spawn, cipher_name, 0, - crypto_requires_sync(algt->type, - algt->mask)); + err = crypto_grab_skcipher(spawn, cipher_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); if (err) goto err_free_inst; diff --git a/crypto/cts.c b/crypto/cts.c index 51976187b2bf..9891f4a4e37f 100644 --- a/crypto/cts.c +++ b/crypto/cts.c @@ -40,6 +40,7 @@ * rfc3962 includes errata information in its Appendix A. */ +#include #include #include #include @@ -103,7 +104,7 @@ static int cts_cbc_encrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_request *subreq = &rctx->subreq; int bsize = crypto_skcipher_blocksize(tfm); - u8 d[bsize * 2] __attribute__ ((aligned(__alignof__(u32)))); + u8 d[MAX_CIPHER_BLOCKSIZE * 2] __attribute__ ((aligned(__alignof__(u32)))); struct scatterlist *sg; unsigned int offset; int lastn; @@ -183,7 +184,7 @@ static int cts_cbc_decrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_request *subreq = &rctx->subreq; int bsize = crypto_skcipher_blocksize(tfm); - u8 d[bsize * 2] __attribute__ ((aligned(__alignof__(u32)))); + u8 d[MAX_CIPHER_BLOCKSIZE * 2] __attribute__ ((aligned(__alignof__(u32)))); struct scatterlist *sg; unsigned int offset; u8 *space; @@ -290,7 +291,7 @@ static int crypto_cts_init_tfm(struct crypto_skcipher *tfm) unsigned bsize; unsigned align; - cipher = crypto_spawn_skcipher2(spawn); + cipher = crypto_spawn_skcipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); @@ -348,9 +349,9 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb) spawn = skcipher_instance_ctx(inst); crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst)); - err = crypto_grab_skcipher2(spawn, cipher_name, 0, - crypto_requires_sync(algt->type, - algt->mask)); + err = crypto_grab_skcipher(spawn, cipher_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); if (err) goto err_free_inst; diff --git a/crypto/dh.c b/crypto/dh.c index 99e20fc63cc9..b00535e148ea 100644 --- a/crypto/dh.c +++ b/crypto/dh.c @@ -114,7 +114,7 @@ static int dh_compute_value(struct kpp_request *req) if (req->src) { base = mpi_read_raw_from_sgl(req->src, req->src_len); if (!base) { - ret = EINVAL; + ret = -EINVAL; goto err_free_val; } } else { diff --git a/crypto/drbg.c b/crypto/drbg.c index 1b94a4306dfc..5374f16e4701 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -1862,6 +1862,7 @@ static int drbg_kcapi_sym_ctr(struct drbg_state *drbg, memcpy(outbuf, drbg->outscratchpad, cryptlen); outlen -= cryptlen; + outbuf += cryptlen; } ret = 0; diff --git a/crypto/echainiv.c b/crypto/echainiv.c index e3d889b122e0..cfeb5fb5eb97 100644 --- a/crypto/echainiv.c +++ b/crypto/echainiv.c @@ -47,9 +47,9 @@ static int echainiv_encrypt(struct aead_request *req) info = req->iv; if (req->src != req->dst) { - SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull); + SYNC_SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull); - skcipher_request_set_tfm(nreq, ctx->sknull); + skcipher_request_set_sync_tfm(nreq, ctx->sknull); skcipher_request_set_callback(nreq, req->base.flags, NULL, NULL); skcipher_request_set_crypt(nreq, req->src, req->dst, diff --git a/crypto/gcm.c b/crypto/gcm.c index 398f048c452a..7c9e743963ee 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -50,7 +50,7 @@ struct crypto_rfc4543_instance_ctx { struct crypto_rfc4543_ctx { struct crypto_aead *child; - struct crypto_skcipher *null; + struct crypto_sync_skcipher *null; u8 nonce[4]; }; @@ -573,7 +573,7 @@ static int crypto_gcm_init_tfm(struct crypto_aead *tfm) if (IS_ERR(ghash)) return PTR_ERR(ghash); - ctr = crypto_spawn_skcipher2(&ictx->ctr); + ctr = crypto_spawn_skcipher(&ictx->ctr); err = PTR_ERR(ctr); if (IS_ERR(ctr)) goto err_free_hash; @@ -661,9 +661,9 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl, goto err_drop_ghash; crypto_set_skcipher_spawn(&ctx->ctr, aead_crypto_instance(inst)); - err = crypto_grab_skcipher2(&ctx->ctr, ctr_name, 0, - crypto_requires_sync(algt->type, - algt->mask)); + err = crypto_grab_skcipher(&ctx->ctr, ctr_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); if (err) goto err_drop_ghash; @@ -1074,9 +1074,9 @@ static int crypto_rfc4543_copy_src_to_dst(struct aead_request *req, bool enc) unsigned int authsize = crypto_aead_authsize(aead); unsigned int nbytes = req->assoclen + req->cryptlen - (enc ? 0 : authsize); - SKCIPHER_REQUEST_ON_STACK(nreq, ctx->null); + SYNC_SKCIPHER_REQUEST_ON_STACK(nreq, ctx->null); - skcipher_request_set_tfm(nreq, ctx->null); + skcipher_request_set_sync_tfm(nreq, ctx->null); skcipher_request_set_callback(nreq, req->base.flags, NULL, NULL); skcipher_request_set_crypt(nreq, req->src, req->dst, nbytes, NULL); @@ -1100,7 +1100,7 @@ static int crypto_rfc4543_init_tfm(struct crypto_aead *tfm) struct crypto_aead_spawn *spawn = &ictx->aead; struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(tfm); struct crypto_aead *aead; - struct crypto_skcipher *null; + struct crypto_sync_skcipher *null; unsigned long align; int err = 0; @@ -1108,7 +1108,7 @@ static int crypto_rfc4543_init_tfm(struct crypto_aead *tfm) if (IS_ERR(aead)) return PTR_ERR(aead); - null = crypto_get_default_null_skcipher2(); + null = crypto_get_default_null_skcipher(); err = PTR_ERR(null); if (IS_ERR(null)) goto err_free_aead; @@ -1136,7 +1136,7 @@ static void crypto_rfc4543_exit_tfm(struct crypto_aead *tfm) struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(tfm); crypto_free_aead(ctx->child); - crypto_put_default_null_skcipher2(); + crypto_put_default_null_skcipher(); } static void crypto_rfc4543_free(struct aead_instance *inst) diff --git a/crypto/gf128mul.c b/crypto/gf128mul.c index 5276607c72d0..72015fee533d 100644 --- a/crypto/gf128mul.c +++ b/crypto/gf128mul.c @@ -263,48 +263,6 @@ EXPORT_SYMBOL(gf128mul_bbe); * t[1][BYTE] contains g*x^8*BYTE * .. * t[15][BYTE] contains g*x^120*BYTE */ -struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g) -{ - struct gf128mul_64k *t; - int i, j, k; - - t = kzalloc(sizeof(*t), GFP_KERNEL); - if (!t) - goto out; - - for (i = 0; i < 16; i++) { - t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL); - if (!t->t[i]) { - gf128mul_free_64k(t); - t = NULL; - goto out; - } - } - - t->t[0]->t[128] = *g; - for (j = 64; j > 0; j >>= 1) - gf128mul_x_lle(&t->t[0]->t[j], &t->t[0]->t[j + j]); - - for (i = 0;;) { - for (j = 2; j < 256; j += j) - for (k = 1; k < j; ++k) - be128_xor(&t->t[i]->t[j + k], - &t->t[i]->t[j], &t->t[i]->t[k]); - - if (++i >= 16) - break; - - for (j = 128; j > 0; j >>= 1) { - t->t[i]->t[j] = t->t[i - 1]->t[j]; - gf128mul_x8_lle(&t->t[i]->t[j]); - } - } - -out: - return t; -} -EXPORT_SYMBOL(gf128mul_init_64k_lle); - struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g) { struct gf128mul_64k *t; @@ -352,24 +310,11 @@ void gf128mul_free_64k(struct gf128mul_64k *t) int i; for (i = 0; i < 16; i++) - kfree(t->t[i]); - kfree(t); + kzfree(t->t[i]); + kzfree(t); } EXPORT_SYMBOL(gf128mul_free_64k); -void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t) -{ - u8 *ap = (u8 *)a; - be128 r[1]; - int i; - - *r = t->t[0]->t[ap[0]]; - for (i = 1; i < 16; ++i) - be128_xor(r, r, &t->t[i]->t[ap[i]]); - *a = *r; -} -EXPORT_SYMBOL(gf128mul_64k_lle); - void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t) { u8 *ap = (u8 *)a; diff --git a/crypto/internal.h b/crypto/internal.h index 6184c4226a8f..6262ec0435b4 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -76,9 +76,6 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask); int crypto_init_cipher_ops(struct crypto_tfm *tfm); int crypto_init_compress_ops(struct crypto_tfm *tfm); -void crypto_exit_cipher_ops(struct crypto_tfm *tfm); -void crypto_exit_compress_ops(struct crypto_tfm *tfm); - struct crypto_larval *crypto_larval_alloc(const char *name, u32 type, u32 mask); void crypto_larval_kill(struct crypto_alg *alg); struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask); diff --git a/crypto/jitterentropy-kcapi.c b/crypto/jitterentropy-kcapi.c index c4938497eedb..787dccca3715 100644 --- a/crypto/jitterentropy-kcapi.c +++ b/crypto/jitterentropy-kcapi.c @@ -39,7 +39,6 @@ #include #include -#include #include #include #include diff --git a/crypto/lrw.c b/crypto/lrw.c index fc3d4fec8ddd..50239923e378 100644 --- a/crypto/lrw.c +++ b/crypto/lrw.c @@ -17,7 +17,8 @@ * * The test vectors are included in the testing module tcrypt.[ch] */ -#include +#include +#include #include #include #include @@ -29,11 +30,30 @@ #include #include +#define LRW_BUFFER_SIZE 128u + struct priv { - struct crypto_cipher *child; + struct crypto_skcipher *child; struct lrw_table_ctx table; }; +struct rctx { + be128 buf[LRW_BUFFER_SIZE / sizeof(be128)]; + + be128 t; + + be128 *ext; + + struct scatterlist srcbuf[2]; + struct scatterlist dstbuf[2]; + struct scatterlist *src; + struct scatterlist *dst; + + unsigned int left; + + struct skcipher_request subreq; +}; + static inline void setbit128_bbe(void *b, int bit) { __set_bit(bit ^ (0x80 - @@ -76,32 +96,26 @@ void lrw_free_table(struct lrw_table_ctx *ctx) } EXPORT_SYMBOL_GPL(lrw_free_table); -static int setkey(struct crypto_tfm *parent, const u8 *key, +static int setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { - struct priv *ctx = crypto_tfm_ctx(parent); - struct crypto_cipher *child = ctx->child; + struct priv *ctx = crypto_skcipher_ctx(parent); + struct crypto_skcipher *child = ctx->child; int err, bsize = LRW_BLOCK_SIZE; const u8 *tweak = key + keylen - bsize; - crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) & - CRYPTO_TFM_REQ_MASK); - err = crypto_cipher_setkey(child, key, keylen - bsize); + crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(child, key, keylen - bsize); + crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); if (err) return err; - crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) & - CRYPTO_TFM_RES_MASK); return lrw_init_table(&ctx->table, tweak); } -struct sinfo { - be128 t; - struct crypto_tfm *tfm; - void (*fn)(struct crypto_tfm *, u8 *, const u8 *); -}; - static inline void inc(be128 *iv) { be64_add_cpu(&iv->b, 1); @@ -109,13 +123,6 @@ static inline void inc(be128 *iv) be64_add_cpu(&iv->a, 1); } -static inline void lrw_round(struct sinfo *s, void *dst, const void *src) -{ - be128_xor(dst, &s->t, src); /* PP <- T xor P */ - s->fn(s->tfm, dst, dst); /* CC <- E(Key2,PP) */ - be128_xor(dst, dst, &s->t); /* C <- T xor CC */ -} - /* this returns the number of consequative 1 bits starting * from the right, get_index128(00 00 00 00 00 00 ... 00 00 10 FB) = 2 */ static inline int get_index128(be128 *block) @@ -140,83 +147,263 @@ static inline int get_index128(be128 *block) return 127; } -static int crypt(struct blkcipher_desc *d, - struct blkcipher_walk *w, struct priv *ctx, - void (*fn)(struct crypto_tfm *, u8 *, const u8 *)) +static int post_crypt(struct skcipher_request *req) { + struct rctx *rctx = skcipher_request_ctx(req); + be128 *buf = rctx->ext ?: rctx->buf; + struct skcipher_request *subreq; + const int bs = LRW_BLOCK_SIZE; + struct skcipher_walk w; + struct scatterlist *sg; + unsigned offset; int err; - unsigned int avail; + + subreq = &rctx->subreq; + err = skcipher_walk_virt(&w, subreq, false); + + while (w.nbytes) { + unsigned int avail = w.nbytes; + be128 *wdst; + + wdst = w.dst.virt.addr; + + do { + be128_xor(wdst, buf++, wdst); + wdst++; + } while ((avail -= bs) >= bs); + + err = skcipher_walk_done(&w, avail); + } + + rctx->left -= subreq->cryptlen; + + if (err || !rctx->left) + goto out; + + rctx->dst = rctx->dstbuf; + + scatterwalk_done(&w.out, 0, 1); + sg = w.out.sg; + offset = w.out.offset; + + if (rctx->dst != sg) { + rctx->dst[0] = *sg; + sg_unmark_end(rctx->dst); + scatterwalk_crypto_chain(rctx->dst, sg_next(sg), 0, 2); + } + rctx->dst[0].length -= offset - sg->offset; + rctx->dst[0].offset = offset; + +out: + return err; +} + +static int pre_crypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct rctx *rctx = skcipher_request_ctx(req); + struct priv *ctx = crypto_skcipher_ctx(tfm); + be128 *buf = rctx->ext ?: rctx->buf; + struct skcipher_request *subreq; const int bs = LRW_BLOCK_SIZE; - struct sinfo s = { - .tfm = crypto_cipher_tfm(ctx->child), - .fn = fn - }; + struct skcipher_walk w; + struct scatterlist *sg; + unsigned cryptlen; + unsigned offset; be128 *iv; - u8 *wsrc; - u8 *wdst; + bool more; + int err; - err = blkcipher_walk_virt(d, w); - if (!(avail = w->nbytes)) - return err; + subreq = &rctx->subreq; + skcipher_request_set_tfm(subreq, tfm); - wsrc = w->src.virt.addr; - wdst = w->dst.virt.addr; + cryptlen = subreq->cryptlen; + more = rctx->left > cryptlen; + if (!more) + cryptlen = rctx->left; - /* calculate first value of T */ - iv = (be128 *)w->iv; - s.t = *iv; + skcipher_request_set_crypt(subreq, rctx->src, rctx->dst, + cryptlen, req->iv); - /* T <- I*Key2 */ - gf128mul_64k_bbe(&s.t, ctx->table.table); + err = skcipher_walk_virt(&w, subreq, false); + iv = w.iv; - goto first; + while (w.nbytes) { + unsigned int avail = w.nbytes; + be128 *wsrc; + be128 *wdst; + + wsrc = w.src.virt.addr; + wdst = w.dst.virt.addr; - for (;;) { do { + *buf++ = rctx->t; + be128_xor(wdst++, &rctx->t, wsrc++); + /* T <- I*Key2, using the optimization * discussed in the specification */ - be128_xor(&s.t, &s.t, + be128_xor(&rctx->t, &rctx->t, &ctx->table.mulinc[get_index128(iv)]); inc(iv); + } while ((avail -= bs) >= bs); -first: - lrw_round(&s, wdst, wsrc); + err = skcipher_walk_done(&w, avail); + } - wsrc += bs; - wdst += bs; - } while ((avail -= bs) >= bs); + skcipher_request_set_tfm(subreq, ctx->child); + skcipher_request_set_crypt(subreq, rctx->dst, rctx->dst, + cryptlen, NULL); - err = blkcipher_walk_done(d, w, avail); - if (!(avail = w->nbytes)) - break; + if (err || !more) + goto out; - wsrc = w->src.virt.addr; - wdst = w->dst.virt.addr; + rctx->src = rctx->srcbuf; + + scatterwalk_done(&w.in, 0, 1); + sg = w.in.sg; + offset = w.in.offset; + + if (rctx->src != sg) { + rctx->src[0] = *sg; + sg_unmark_end(rctx->src); + scatterwalk_crypto_chain(rctx->src, sg_next(sg), 0, 2); } + rctx->src[0].length -= offset - sg->offset; + rctx->src[0].offset = offset; +out: return err; } -static int encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int init_crypt(struct skcipher_request *req, crypto_completion_t done) { - struct priv *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk w; + struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + struct rctx *rctx = skcipher_request_ctx(req); + struct skcipher_request *subreq; + gfp_t gfp; + + subreq = &rctx->subreq; + skcipher_request_set_callback(subreq, req->base.flags, done, req); + + gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : + GFP_ATOMIC; + rctx->ext = NULL; + + subreq->cryptlen = LRW_BUFFER_SIZE; + if (req->cryptlen > LRW_BUFFER_SIZE) { + subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE); + rctx->ext = kmalloc(subreq->cryptlen, gfp); + } + + rctx->src = req->src; + rctx->dst = req->dst; + rctx->left = req->cryptlen; + + /* calculate first value of T */ + memcpy(&rctx->t, req->iv, sizeof(rctx->t)); + + /* T <- I*Key2 */ + gf128mul_64k_bbe(&rctx->t, ctx->table.table); - blkcipher_walk_init(&w, dst, src, nbytes); - return crypt(desc, &w, ctx, - crypto_cipher_alg(ctx->child)->cia_encrypt); + return 0; } -static int decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static void exit_crypt(struct skcipher_request *req) { - struct priv *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk w; + struct rctx *rctx = skcipher_request_ctx(req); + + rctx->left = 0; - blkcipher_walk_init(&w, dst, src, nbytes); - return crypt(desc, &w, ctx, - crypto_cipher_alg(ctx->child)->cia_decrypt); + if (rctx->ext) + kfree(rctx->ext); +} + +static int do_encrypt(struct skcipher_request *req, int err) +{ + struct rctx *rctx = skcipher_request_ctx(req); + struct skcipher_request *subreq; + + subreq = &rctx->subreq; + + while (!err && rctx->left) { + err = pre_crypt(req) ?: + crypto_skcipher_encrypt(subreq) ?: + post_crypt(req); + + if (err == -EINPROGRESS || + (err == -EBUSY && + req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + return err; + } + + exit_crypt(req); + return err; +} + +static void encrypt_done(struct crypto_async_request *areq, int err) +{ + struct skcipher_request *req = areq->data; + struct skcipher_request *subreq; + struct rctx *rctx; + + rctx = skcipher_request_ctx(req); + subreq = &rctx->subreq; + subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; + + err = do_encrypt(req, err ?: post_crypt(req)); + if (rctx->left) + return; + + skcipher_request_complete(req, err); +} + +static int encrypt(struct skcipher_request *req) +{ + return do_encrypt(req, init_crypt(req, encrypt_done)); +} + +static int do_decrypt(struct skcipher_request *req, int err) +{ + struct rctx *rctx = skcipher_request_ctx(req); + struct skcipher_request *subreq; + + subreq = &rctx->subreq; + + while (!err && rctx->left) { + err = pre_crypt(req) ?: + crypto_skcipher_decrypt(subreq) ?: + post_crypt(req); + + if (err == -EINPROGRESS || + (err == -EBUSY && + req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + return err; + } + + exit_crypt(req); + return err; +} + +static void decrypt_done(struct crypto_async_request *areq, int err) +{ + struct skcipher_request *req = areq->data; + struct skcipher_request *subreq; + struct rctx *rctx; + + rctx = skcipher_request_ctx(req); + subreq = &rctx->subreq; + subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; + + err = do_decrypt(req, err ?: post_crypt(req)); + if (rctx->left) + return; + + skcipher_request_complete(req, err); +} + +static int decrypt(struct skcipher_request *req) +{ + return do_decrypt(req, init_crypt(req, decrypt_done)); } int lrw_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst, @@ -298,95 +485,161 @@ int lrw_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst, } EXPORT_SYMBOL_GPL(lrw_crypt); -static int init_tfm(struct crypto_tfm *tfm) +static int init_tfm(struct crypto_skcipher *tfm) { - struct crypto_cipher *cipher; - struct crypto_instance *inst = (void *)tfm->__crt_alg; - struct crypto_spawn *spawn = crypto_instance_ctx(inst); - struct priv *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; + struct skcipher_instance *inst = skcipher_alg_instance(tfm); + struct crypto_skcipher_spawn *spawn = skcipher_instance_ctx(inst); + struct priv *ctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher *cipher; - cipher = crypto_spawn_cipher(spawn); + cipher = crypto_spawn_skcipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); - if (crypto_cipher_blocksize(cipher) != LRW_BLOCK_SIZE) { - *flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN; - crypto_free_cipher(cipher); - return -EINVAL; - } - ctx->child = cipher; + + crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(cipher) + + sizeof(struct rctx)); + return 0; } -static void exit_tfm(struct crypto_tfm *tfm) +static void exit_tfm(struct crypto_skcipher *tfm) { - struct priv *ctx = crypto_tfm_ctx(tfm); + struct priv *ctx = crypto_skcipher_ctx(tfm); lrw_free_table(&ctx->table); - crypto_free_cipher(ctx->child); + crypto_free_skcipher(ctx->child); +} + +static void free(struct skcipher_instance *inst) +{ + crypto_drop_skcipher(skcipher_instance_ctx(inst)); + kfree(inst); } -static struct crypto_instance *alloc(struct rtattr **tb) +static int create(struct crypto_template *tmpl, struct rtattr **tb) { - struct crypto_instance *inst; - struct crypto_alg *alg; + struct crypto_skcipher_spawn *spawn; + struct skcipher_instance *inst; + struct crypto_attr_type *algt; + struct skcipher_alg *alg; + const char *cipher_name; + char ecb_name[CRYPTO_MAX_ALG_NAME]; int err; - err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER); + algt = crypto_get_attr_type(tb); + if (IS_ERR(algt)) + return PTR_ERR(algt); + + if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask) + return -EINVAL; + + cipher_name = crypto_attr_alg_name(tb[1]); + if (IS_ERR(cipher_name)) + return PTR_ERR(cipher_name); + + inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + spawn = skcipher_instance_ctx(inst); + + crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst)); + err = crypto_grab_skcipher(spawn, cipher_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); + if (err == -ENOENT) { + err = -ENAMETOOLONG; + if (snprintf(ecb_name, CRYPTO_MAX_ALG_NAME, "ecb(%s)", + cipher_name) >= CRYPTO_MAX_ALG_NAME) + goto err_free_inst; + + err = crypto_grab_skcipher(spawn, ecb_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); + } + if (err) - return ERR_PTR(err); + goto err_free_inst; - alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, - CRYPTO_ALG_TYPE_MASK); - if (IS_ERR(alg)) - return ERR_CAST(alg); + alg = crypto_skcipher_spawn_alg(spawn); - inst = crypto_alloc_instance("lrw", alg); - if (IS_ERR(inst)) - goto out_put_alg; + err = -EINVAL; + if (alg->base.cra_blocksize != LRW_BLOCK_SIZE) + goto err_drop_spawn; - inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER; - inst->alg.cra_priority = alg->cra_priority; - inst->alg.cra_blocksize = alg->cra_blocksize; + if (crypto_skcipher_alg_ivsize(alg)) + goto err_drop_spawn; - if (alg->cra_alignmask < 7) inst->alg.cra_alignmask = 7; - else inst->alg.cra_alignmask = alg->cra_alignmask; - inst->alg.cra_type = &crypto_blkcipher_type; + err = crypto_inst_setname(skcipher_crypto_instance(inst), "lrw", + &alg->base); + if (err) + goto err_drop_spawn; - if (!(alg->cra_blocksize % 4)) - inst->alg.cra_alignmask |= 3; - inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize; - inst->alg.cra_blkcipher.min_keysize = - alg->cra_cipher.cia_min_keysize + alg->cra_blocksize; - inst->alg.cra_blkcipher.max_keysize = - alg->cra_cipher.cia_max_keysize + alg->cra_blocksize; + err = -EINVAL; + cipher_name = alg->base.cra_name; - inst->alg.cra_ctxsize = sizeof(struct priv); + /* Alas we screwed up the naming so we have to mangle the + * cipher name. + */ + if (!strncmp(cipher_name, "ecb(", 4)) { + unsigned len; - inst->alg.cra_init = init_tfm; - inst->alg.cra_exit = exit_tfm; + len = strlcpy(ecb_name, cipher_name + 4, sizeof(ecb_name)); + if (len < 2 || len >= sizeof(ecb_name)) + goto err_drop_spawn; - inst->alg.cra_blkcipher.setkey = setkey; - inst->alg.cra_blkcipher.encrypt = encrypt; - inst->alg.cra_blkcipher.decrypt = decrypt; + if (ecb_name[len - 1] != ')') + goto err_drop_spawn; -out_put_alg: - crypto_mod_put(alg); - return inst; -} + ecb_name[len - 1] = 0; -static void free_inst(struct crypto_instance *inst) -{ - crypto_drop_spawn(crypto_instance_ctx(inst)); + if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, + "lrw(%s)", ecb_name) >= CRYPTO_MAX_ALG_NAME) + return -ENAMETOOLONG; + } + + inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; + inst->alg.base.cra_priority = alg->base.cra_priority; + inst->alg.base.cra_blocksize = LRW_BLOCK_SIZE; + inst->alg.base.cra_alignmask = alg->base.cra_alignmask | + (__alignof__(u64) - 1); + + inst->alg.ivsize = LRW_BLOCK_SIZE; + inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) + + LRW_BLOCK_SIZE; + inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg) + + LRW_BLOCK_SIZE; + + inst->alg.base.cra_ctxsize = sizeof(struct priv); + + inst->alg.init = init_tfm; + inst->alg.exit = exit_tfm; + + inst->alg.setkey = setkey; + inst->alg.encrypt = encrypt; + inst->alg.decrypt = decrypt; + + inst->free = free; + + err = skcipher_register_instance(tmpl, inst); + if (err) + goto err_drop_spawn; + +out: + return err; + +err_drop_spawn: + crypto_drop_skcipher(spawn); +err_free_inst: kfree(inst); + goto out; } static struct crypto_template crypto_tmpl = { .name = "lrw", - .alloc = alloc, - .free = free_inst, + .create = create, .module = THIS_MODULE, }; diff --git a/crypto/lz4.c b/crypto/lz4.c index 99c1b2cc2976..71eff9b01b12 100644 --- a/crypto/lz4.c +++ b/crypto/lz4.c @@ -66,15 +66,13 @@ static void lz4_exit(struct crypto_tfm *tfm) static int __lz4_compress_crypto(const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx) { - size_t tmp_len = *dlen; - int err; + int out_len = LZ4_compress_default(src, dst, + slen, *dlen, ctx); - err = lz4_compress(src, slen, dst, &tmp_len, ctx); - - if (err < 0) + if (!out_len) return -EINVAL; - *dlen = tmp_len; + *dlen = out_len; return 0; } @@ -96,16 +94,13 @@ static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src, static int __lz4_decompress_crypto(const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx) { - int err; - size_t tmp_len = *dlen; - size_t __slen = slen; + int out_len = LZ4_decompress_safe(src, dst, slen, *dlen); - err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len); - if (err < 0) - return -EINVAL; + if (out_len < 0) + return out_len; - *dlen = tmp_len; - return err; + *dlen = out_len; + return 0; } static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src, diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c index 75ffc4a3f786..03a34a8109c0 100644 --- a/crypto/lz4hc.c +++ b/crypto/lz4hc.c @@ -65,15 +65,13 @@ static void lz4hc_exit(struct crypto_tfm *tfm) static int __lz4hc_compress_crypto(const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx) { - size_t tmp_len = *dlen; - int err; + int out_len = LZ4_compress_HC(src, dst, slen, + *dlen, LZ4HC_DEFAULT_CLEVEL, ctx); - err = lz4hc_compress(src, slen, dst, &tmp_len, ctx); - - if (err < 0) + if (!out_len) return -EINVAL; - *dlen = tmp_len; + *dlen = out_len; return 0; } @@ -97,16 +95,13 @@ static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src, static int __lz4hc_decompress_crypto(const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx) { - int err; - size_t tmp_len = *dlen; - size_t __slen = slen; + int out_len = LZ4_decompress_safe(src, dst, slen, *dlen); - err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len); - if (err < 0) - return -EINVAL; + if (out_len < 0) + return out_len; - *dlen = tmp_len; - return err; + *dlen = out_len; + return 0; } static int lz4hc_sdecompress(struct crypto_scomp *tfm, const u8 *src, diff --git a/crypto/lzo-rle.c b/crypto/lzo-rle.c new file mode 100644 index 000000000000..ea9c75b1db49 --- /dev/null +++ b/crypto/lzo-rle.c @@ -0,0 +1,175 @@ +/* + * Cryptographic API. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include + +struct lzorle_ctx { + void *lzorle_comp_mem; +}; + +static void *lzorle_alloc_ctx(struct crypto_scomp *tfm) +{ + void *ctx; + + ctx = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); + + return ctx; +} + +static int lzorle_init(struct crypto_tfm *tfm) +{ + struct lzorle_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->lzorle_comp_mem = lzorle_alloc_ctx(NULL); + if (IS_ERR(ctx->lzorle_comp_mem)) + return -ENOMEM; + + return 0; +} + +static void lzorle_free_ctx(struct crypto_scomp *tfm, void *ctx) +{ + kvfree(ctx); +} + +static void lzorle_exit(struct crypto_tfm *tfm) +{ + struct lzorle_ctx *ctx = crypto_tfm_ctx(tfm); + + lzorle_free_ctx(NULL, ctx->lzorle_comp_mem); +} + +static int __lzorle_compress(const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, void *ctx) +{ + size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */ + int err; + + err = lzorle1x_1_compress(src, slen, dst, &tmp_len, ctx); + + if (err != LZO_E_OK) + return -EINVAL; + + *dlen = tmp_len; + return 0; +} + +static int lzorle_compress(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + struct lzorle_ctx *ctx = crypto_tfm_ctx(tfm); + + return __lzorle_compress(src, slen, dst, dlen, ctx->lzorle_comp_mem); +} + +static int lzorle_scompress(struct crypto_scomp *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen, + void *ctx) +{ + return __lzorle_compress(src, slen, dst, dlen, ctx); +} + +static int __lzorle_decompress(const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen) +{ + int err; + size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */ + + err = lzo1x_decompress_safe(src, slen, dst, &tmp_len); + + if (err != LZO_E_OK) + return -EINVAL; + + *dlen = tmp_len; + return 0; +} + +static int lzorle_decompress(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + return __lzorle_decompress(src, slen, dst, dlen); +} + +static int lzorle_sdecompress(struct crypto_scomp *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen, + void *ctx) +{ + return __lzorle_decompress(src, slen, dst, dlen); +} + +static struct crypto_alg alg = { + .cra_name = "lzo-rle", + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_ctxsize = sizeof(struct lzorle_ctx), + .cra_module = THIS_MODULE, + .cra_init = lzorle_init, + .cra_exit = lzorle_exit, + .cra_u = { .compress = { + .coa_compress = lzorle_compress, + .coa_decompress = lzorle_decompress } } +}; + +static struct scomp_alg scomp = { + .alloc_ctx = lzorle_alloc_ctx, + .free_ctx = lzorle_free_ctx, + .compress = lzorle_scompress, + .decompress = lzorle_sdecompress, + .base = { + .cra_name = "lzo-rle", + .cra_driver_name = "lzo-rle-scomp", + .cra_module = THIS_MODULE, + } +}; + +static int __init lzorle_mod_init(void) +{ + int ret; + + ret = crypto_register_alg(&alg); + if (ret) + return ret; + + ret = crypto_register_scomp(&scomp); + if (ret) { + crypto_unregister_alg(&alg); + return ret; + } + + return ret; +} + +static void __exit lzorle_mod_fini(void) +{ + crypto_unregister_alg(&alg); + crypto_unregister_scomp(&scomp); +} + +module_init(lzorle_mod_init); +module_exit(lzorle_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZO-RLE Compression Algorithm"); +MODULE_ALIAS_CRYPTO("lzo-rle"); diff --git a/crypto/pcbc.c b/crypto/pcbc.c index de81f716cf26..37d3dfcf871d 100644 --- a/crypto/pcbc.c +++ b/crypto/pcbc.c @@ -15,39 +15,37 @@ */ #include +#include #include #include #include #include -#include #include struct crypto_pcbc_ctx { struct crypto_cipher *child; }; -static int crypto_pcbc_setkey(struct crypto_tfm *parent, const u8 *key, +static int crypto_pcbc_setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { - struct crypto_pcbc_ctx *ctx = crypto_tfm_ctx(parent); + struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(parent); struct crypto_cipher *child = ctx->child; int err; crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) & - CRYPTO_TFM_REQ_MASK); + crypto_cipher_set_flags(child, crypto_skcipher_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); err = crypto_cipher_setkey(child, key, keylen); - crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) & - CRYPTO_TFM_RES_MASK); + crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); return err; } -static int crypto_pcbc_encrypt_segment(struct blkcipher_desc *desc, - struct blkcipher_walk *walk, +static int crypto_pcbc_encrypt_segment(struct skcipher_request *req, + struct skcipher_walk *walk, struct crypto_cipher *tfm) { - void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = - crypto_cipher_alg(tfm)->cia_encrypt; int bsize = crypto_cipher_blocksize(tfm); unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; @@ -56,7 +54,7 @@ static int crypto_pcbc_encrypt_segment(struct blkcipher_desc *desc, do { crypto_xor(iv, src, bsize); - fn(crypto_cipher_tfm(tfm), dst, iv); + crypto_cipher_encrypt_one(tfm, dst, iv); memcpy(iv, dst, bsize); crypto_xor(iv, src, bsize); @@ -67,22 +65,20 @@ static int crypto_pcbc_encrypt_segment(struct blkcipher_desc *desc, return nbytes; } -static int crypto_pcbc_encrypt_inplace(struct blkcipher_desc *desc, - struct blkcipher_walk *walk, +static int crypto_pcbc_encrypt_inplace(struct skcipher_request *req, + struct skcipher_walk *walk, struct crypto_cipher *tfm) { - void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = - crypto_cipher_alg(tfm)->cia_encrypt; int bsize = crypto_cipher_blocksize(tfm); unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; u8 * const iv = walk->iv; - u8 tmpbuf[bsize]; + u8 tmpbuf[MAX_CIPHER_BLOCKSIZE]; do { memcpy(tmpbuf, src, bsize); crypto_xor(iv, src, bsize); - fn(crypto_cipher_tfm(tfm), src, iv); + crypto_cipher_encrypt_one(tfm, src, iv); memcpy(iv, tmpbuf, bsize); crypto_xor(iv, src, bsize); @@ -92,38 +88,34 @@ static int crypto_pcbc_encrypt_inplace(struct blkcipher_desc *desc, return nbytes; } -static int crypto_pcbc_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int crypto_pcbc_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - struct crypto_blkcipher *tfm = desc->tfm; - struct crypto_pcbc_ctx *ctx = crypto_blkcipher_ctx(tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_cipher *child = ctx->child; + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { if (walk.src.virt.addr == walk.dst.virt.addr) - nbytes = crypto_pcbc_encrypt_inplace(desc, &walk, + nbytes = crypto_pcbc_encrypt_inplace(req, &walk, child); else - nbytes = crypto_pcbc_encrypt_segment(desc, &walk, + nbytes = crypto_pcbc_encrypt_segment(req, &walk, child); - err = blkcipher_walk_done(desc, &walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } return err; } -static int crypto_pcbc_decrypt_segment(struct blkcipher_desc *desc, - struct blkcipher_walk *walk, +static int crypto_pcbc_decrypt_segment(struct skcipher_request *req, + struct skcipher_walk *walk, struct crypto_cipher *tfm) { - void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = - crypto_cipher_alg(tfm)->cia_decrypt; int bsize = crypto_cipher_blocksize(tfm); unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; @@ -131,7 +123,7 @@ static int crypto_pcbc_decrypt_segment(struct blkcipher_desc *desc, u8 * const iv = walk->iv; do { - fn(crypto_cipher_tfm(tfm), dst, src); + crypto_cipher_decrypt_one(tfm, dst, src); crypto_xor(dst, iv, bsize); memcpy(iv, src, bsize); crypto_xor(iv, dst, bsize); @@ -143,21 +135,19 @@ static int crypto_pcbc_decrypt_segment(struct blkcipher_desc *desc, return nbytes; } -static int crypto_pcbc_decrypt_inplace(struct blkcipher_desc *desc, - struct blkcipher_walk *walk, +static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req, + struct skcipher_walk *walk, struct crypto_cipher *tfm) { - void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = - crypto_cipher_alg(tfm)->cia_decrypt; int bsize = crypto_cipher_blocksize(tfm); unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; u8 * const iv = walk->iv; - u8 tmpbuf[bsize]; + u8 tmpbuf[MAX_CIPHER_BLOCKSIZE] __attribute__ ((aligned(__alignof__(u32)))); do { memcpy(tmpbuf, src, bsize); - fn(crypto_cipher_tfm(tfm), src, src); + crypto_cipher_decrypt_one(tfm, src, src); crypto_xor(src, iv, bsize); memcpy(iv, tmpbuf, bsize); crypto_xor(iv, src, bsize); @@ -168,37 +158,35 @@ static int crypto_pcbc_decrypt_inplace(struct blkcipher_desc *desc, return nbytes; } -static int crypto_pcbc_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int crypto_pcbc_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - struct crypto_blkcipher *tfm = desc->tfm; - struct crypto_pcbc_ctx *ctx = crypto_blkcipher_ctx(tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_cipher *child = ctx->child; + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { if (walk.src.virt.addr == walk.dst.virt.addr) - nbytes = crypto_pcbc_decrypt_inplace(desc, &walk, + nbytes = crypto_pcbc_decrypt_inplace(req, &walk, child); else - nbytes = crypto_pcbc_decrypt_segment(desc, &walk, + nbytes = crypto_pcbc_decrypt_segment(req, &walk, child); - err = blkcipher_walk_done(desc, &walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } return err; } -static int crypto_pcbc_init_tfm(struct crypto_tfm *tfm) +static int crypto_pcbc_init_tfm(struct crypto_skcipher *tfm) { - struct crypto_instance *inst = (void *)tfm->__crt_alg; - struct crypto_spawn *spawn = crypto_instance_ctx(inst); - struct crypto_pcbc_ctx *ctx = crypto_tfm_ctx(tfm); + struct skcipher_instance *inst = skcipher_alg_instance(tfm); + struct crypto_spawn *spawn = skcipher_instance_ctx(inst); + struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_cipher *cipher; cipher = crypto_spawn_cipher(spawn); @@ -209,68 +197,98 @@ static int crypto_pcbc_init_tfm(struct crypto_tfm *tfm) return 0; } -static void crypto_pcbc_exit_tfm(struct crypto_tfm *tfm) +static void crypto_pcbc_exit_tfm(struct crypto_skcipher *tfm) { - struct crypto_pcbc_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(tfm); + crypto_free_cipher(ctx->child); } -static struct crypto_instance *crypto_pcbc_alloc(struct rtattr **tb) +static void crypto_pcbc_free(struct skcipher_instance *inst) +{ + crypto_drop_skcipher(skcipher_instance_ctx(inst)); + kfree(inst); +} + +static int crypto_pcbc_create(struct crypto_template *tmpl, struct rtattr **tb) { - struct crypto_instance *inst; + struct skcipher_instance *inst; + struct crypto_attr_type *algt; + struct crypto_spawn *spawn; struct crypto_alg *alg; int err; - err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER); - if (err) - return ERR_PTR(err); + algt = crypto_get_attr_type(tb); + if (IS_ERR(algt)) + return PTR_ERR(algt); + + if (((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask) & + ~CRYPTO_ALG_INTERNAL) + return -EINVAL; + + inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); + if (!inst) + return -ENOMEM; - alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, - CRYPTO_ALG_TYPE_MASK); + alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER | + (algt->type & CRYPTO_ALG_INTERNAL), + CRYPTO_ALG_TYPE_MASK | + (algt->mask & CRYPTO_ALG_INTERNAL)); + err = PTR_ERR(alg); if (IS_ERR(alg)) - return ERR_CAST(alg); + goto err_free_inst; - inst = crypto_alloc_instance("pcbc", alg); - if (IS_ERR(inst)) - goto out_put_alg; + spawn = skcipher_instance_ctx(inst); + err = crypto_init_spawn(spawn, alg, skcipher_crypto_instance(inst), + CRYPTO_ALG_TYPE_MASK); + crypto_mod_put(alg); + if (err) + goto err_free_inst; - inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER; - inst->alg.cra_priority = alg->cra_priority; - inst->alg.cra_blocksize = alg->cra_blocksize; - inst->alg.cra_alignmask = alg->cra_alignmask; - inst->alg.cra_type = &crypto_blkcipher_type; + err = crypto_inst_setname(skcipher_crypto_instance(inst), "pcbc", alg); + if (err) + goto err_drop_spawn; + + inst->alg.base.cra_flags = alg->cra_flags & CRYPTO_ALG_INTERNAL; + inst->alg.base.cra_priority = alg->cra_priority; + inst->alg.base.cra_blocksize = alg->cra_blocksize; + inst->alg.base.cra_alignmask = alg->cra_alignmask; /* We access the data as u32s when xoring. */ - inst->alg.cra_alignmask |= __alignof__(u32) - 1; + inst->alg.base.cra_alignmask |= __alignof__(u32) - 1; - inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize; - inst->alg.cra_blkcipher.min_keysize = alg->cra_cipher.cia_min_keysize; - inst->alg.cra_blkcipher.max_keysize = alg->cra_cipher.cia_max_keysize; + inst->alg.ivsize = alg->cra_blocksize; + inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize; + inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize; - inst->alg.cra_ctxsize = sizeof(struct crypto_pcbc_ctx); + inst->alg.base.cra_ctxsize = sizeof(struct crypto_pcbc_ctx); - inst->alg.cra_init = crypto_pcbc_init_tfm; - inst->alg.cra_exit = crypto_pcbc_exit_tfm; + inst->alg.init = crypto_pcbc_init_tfm; + inst->alg.exit = crypto_pcbc_exit_tfm; - inst->alg.cra_blkcipher.setkey = crypto_pcbc_setkey; - inst->alg.cra_blkcipher.encrypt = crypto_pcbc_encrypt; - inst->alg.cra_blkcipher.decrypt = crypto_pcbc_decrypt; + inst->alg.setkey = crypto_pcbc_setkey; + inst->alg.encrypt = crypto_pcbc_encrypt; + inst->alg.decrypt = crypto_pcbc_decrypt; -out_put_alg: - crypto_mod_put(alg); - return inst; -} + inst->free = crypto_pcbc_free; -static void crypto_pcbc_free(struct crypto_instance *inst) -{ - crypto_drop_spawn(crypto_instance_ctx(inst)); + err = skcipher_register_instance(tmpl, inst); + if (err) + goto err_drop_spawn; + +out: + return err; + +err_drop_spawn: + crypto_drop_spawn(spawn); +err_free_inst: kfree(inst); + goto out; } static struct crypto_template crypto_pcbc_tmpl = { .name = "pcbc", - .alloc = crypto_pcbc_alloc, - .free = crypto_pcbc_free, + .create = crypto_pcbc_create, .module = THIS_MODULE, }; diff --git a/crypto/seqiv.c b/crypto/seqiv.c index c7049231861f..f358c06e77c4 100644 --- a/crypto/seqiv.c +++ b/crypto/seqiv.c @@ -73,9 +73,9 @@ static int seqiv_aead_encrypt(struct aead_request *req) info = req->iv; if (req->src != req->dst) { - SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull); + SYNC_SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull); - skcipher_request_set_tfm(nreq, ctx->sknull); + skcipher_request_set_sync_tfm(nreq, ctx->sknull); skcipher_request_set_callback(nreq, req->base.flags, NULL, NULL); skcipher_request_set_crypt(nreq, req->src, req->dst, diff --git a/crypto/shash.c b/crypto/shash.c index 7eebf3cde7b7..5de87906ff81 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -93,13 +93,6 @@ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key, } EXPORT_SYMBOL_GPL(crypto_shash_setkey); -static inline unsigned int shash_align_buffer_size(unsigned len, - unsigned long mask) -{ - typedef u8 __attribute__ ((aligned)) u8_aligned; - return len + (mask & ~(__alignof__(u8_aligned) - 1)); -} - static int shash_update_unaligned(struct shash_desc *desc, const u8 *data, unsigned int len) { @@ -108,11 +101,17 @@ static int shash_update_unaligned(struct shash_desc *desc, const u8 *data, unsigned long alignmask = crypto_shash_alignmask(tfm); unsigned int unaligned_len = alignmask + 1 - ((unsigned long)data & alignmask); - u8 ubuf[shash_align_buffer_size(unaligned_len, alignmask)] - __attribute__ ((aligned)); + /* + * We cannot count on __aligned() working for large values: + * https://patchwork.kernel.org/patch/9507697/ + */ + u8 ubuf[MAX_ALGAPI_ALIGNMASK * 2]; u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1); int err; + if (WARN_ON(buf + unaligned_len > ubuf + sizeof(ubuf))) + return -EINVAL; + if (unaligned_len > len) unaligned_len = len; @@ -144,11 +143,17 @@ static int shash_final_unaligned(struct shash_desc *desc, u8 *out) unsigned long alignmask = crypto_shash_alignmask(tfm); struct shash_alg *shash = crypto_shash_alg(tfm); unsigned int ds = crypto_shash_digestsize(tfm); - u8 ubuf[shash_align_buffer_size(ds, alignmask)] - __attribute__ ((aligned)); + /* + * We cannot count on __aligned() working for large values: + * https://patchwork.kernel.org/patch/9507697/ + */ + u8 ubuf[MAX_ALGAPI_ALIGNMASK + HASH_MAX_DIGESTSIZE]; u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1); int err; + if (WARN_ON(buf + ds > ubuf + sizeof(ubuf))) + return -EINVAL; + err = shash->final(desc, buf); if (err) goto out; @@ -477,9 +482,9 @@ static int shash_prepare_alg(struct shash_alg *alg) { struct crypto_alg *base = &alg->base; - if (alg->digestsize > PAGE_SIZE / 8 || - alg->descsize > PAGE_SIZE / 8 || - alg->statesize > PAGE_SIZE / 8) + if (alg->digestsize > HASH_MAX_DIGESTSIZE || + alg->descsize > HASH_MAX_DESCSIZE || + alg->statesize > HASH_MAX_STATESIZE) return -EINVAL; base->cra_type = &crypto_shash_type; diff --git a/crypto/simd.c b/crypto/simd.c new file mode 100644 index 000000000000..88203370a62f --- /dev/null +++ b/crypto/simd.c @@ -0,0 +1,226 @@ +/* + * Shared crypto simd helpers + * + * Copyright (c) 2012 Jussi Kivilinna + * Copyright (c) 2016 Herbert Xu + * + * Based on aesni-intel_glue.c by: + * Copyright (C) 2008, Intel Corp. + * Author: Huang Ying + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include +#include +#include +#include +#include +#include +#include + +struct simd_skcipher_alg { + const char *ialg_name; + struct skcipher_alg alg; +}; + +struct simd_skcipher_ctx { + struct cryptd_skcipher *cryptd_tfm; +}; + +static int simd_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher *child = &ctx->cryptd_tfm->base; + int err; + + crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(tfm) & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(child, key, key_len); + crypto_skcipher_set_flags(tfm, crypto_skcipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); + return err; +} + +static int simd_skcipher_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_request *subreq; + struct crypto_skcipher *child; + + subreq = skcipher_request_ctx(req); + *subreq = *req; + + if (!may_use_simd() || + (in_atomic() && cryptd_skcipher_queued(ctx->cryptd_tfm))) + child = &ctx->cryptd_tfm->base; + else + child = cryptd_skcipher_child(ctx->cryptd_tfm); + + skcipher_request_set_tfm(subreq, child); + + return crypto_skcipher_encrypt(subreq); +} + +static int simd_skcipher_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_request *subreq; + struct crypto_skcipher *child; + + subreq = skcipher_request_ctx(req); + *subreq = *req; + + if (!may_use_simd() || + (in_atomic() && cryptd_skcipher_queued(ctx->cryptd_tfm))) + child = &ctx->cryptd_tfm->base; + else + child = cryptd_skcipher_child(ctx->cryptd_tfm); + + skcipher_request_set_tfm(subreq, child); + + return crypto_skcipher_decrypt(subreq); +} + +static void simd_skcipher_exit(struct crypto_skcipher *tfm) +{ + struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + + cryptd_free_skcipher(ctx->cryptd_tfm); +} + +static int simd_skcipher_init(struct crypto_skcipher *tfm) +{ + struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct cryptd_skcipher *cryptd_tfm; + struct simd_skcipher_alg *salg; + struct skcipher_alg *alg; + unsigned reqsize; + + alg = crypto_skcipher_alg(tfm); + salg = container_of(alg, struct simd_skcipher_alg, alg); + + cryptd_tfm = cryptd_alloc_skcipher(salg->ialg_name, + CRYPTO_ALG_INTERNAL, + CRYPTO_ALG_INTERNAL); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + + ctx->cryptd_tfm = cryptd_tfm; + + reqsize = sizeof(struct skcipher_request); + reqsize += crypto_skcipher_reqsize(&cryptd_tfm->base); + + crypto_skcipher_set_reqsize(tfm, reqsize); + + return 0; +} + +struct simd_skcipher_alg *simd_skcipher_create_compat(const char *algname, + const char *drvname, + const char *basename) +{ + struct simd_skcipher_alg *salg; + struct crypto_skcipher *tfm; + struct skcipher_alg *ialg; + struct skcipher_alg *alg; + int err; + + tfm = crypto_alloc_skcipher(basename, CRYPTO_ALG_INTERNAL, + CRYPTO_ALG_INTERNAL | CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + return ERR_CAST(tfm); + + ialg = crypto_skcipher_alg(tfm); + + salg = kzalloc(sizeof(*salg), GFP_KERNEL); + if (!salg) { + salg = ERR_PTR(-ENOMEM); + goto out_put_tfm; + } + + salg->ialg_name = basename; + alg = &salg->alg; + + err = -ENAMETOOLONG; + if (snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", algname) >= + CRYPTO_MAX_ALG_NAME) + goto out_free_salg; + + if (snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", + drvname) >= CRYPTO_MAX_ALG_NAME) + goto out_free_salg; + + alg->base.cra_flags = CRYPTO_ALG_ASYNC; + alg->base.cra_priority = ialg->base.cra_priority; + alg->base.cra_blocksize = ialg->base.cra_blocksize; + alg->base.cra_alignmask = ialg->base.cra_alignmask; + alg->base.cra_module = ialg->base.cra_module; + alg->base.cra_ctxsize = sizeof(struct simd_skcipher_ctx); + + alg->ivsize = ialg->ivsize; + alg->chunksize = ialg->chunksize; + alg->min_keysize = ialg->min_keysize; + alg->max_keysize = ialg->max_keysize; + + alg->init = simd_skcipher_init; + alg->exit = simd_skcipher_exit; + + alg->setkey = simd_skcipher_setkey; + alg->encrypt = simd_skcipher_encrypt; + alg->decrypt = simd_skcipher_decrypt; + + err = crypto_register_skcipher(alg); + if (err) + goto out_free_salg; + +out_put_tfm: + crypto_free_skcipher(tfm); + return salg; + +out_free_salg: + kfree(salg); + salg = ERR_PTR(err); + goto out_put_tfm; +} +EXPORT_SYMBOL_GPL(simd_skcipher_create_compat); + +struct simd_skcipher_alg *simd_skcipher_create(const char *algname, + const char *basename) +{ + char drvname[CRYPTO_MAX_ALG_NAME]; + + if (snprintf(drvname, CRYPTO_MAX_ALG_NAME, "simd-%s", basename) >= + CRYPTO_MAX_ALG_NAME) + return ERR_PTR(-ENAMETOOLONG); + + return simd_skcipher_create_compat(algname, drvname, basename); +} +EXPORT_SYMBOL_GPL(simd_skcipher_create); + +void simd_skcipher_free(struct simd_skcipher_alg *salg) +{ + crypto_unregister_skcipher(&salg->alg); + kfree(salg); +} +EXPORT_SYMBOL_GPL(simd_skcipher_free); + +MODULE_LICENSE("GPL"); diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 93110d70c1d3..06fa495dfad1 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -14,9 +14,12 @@ * */ +#include #include +#include #include #include +#include #include #include #include @@ -24,6 +27,545 @@ #include "internal.h" +enum { + SKCIPHER_WALK_PHYS = 1 << 0, + SKCIPHER_WALK_SLOW = 1 << 1, + SKCIPHER_WALK_COPY = 1 << 2, + SKCIPHER_WALK_DIFF = 1 << 3, + SKCIPHER_WALK_SLEEP = 1 << 4, +}; + +struct skcipher_walk_buffer { + struct list_head entry; + struct scatter_walk dst; + unsigned int len; + u8 *data; + u8 buffer[]; +}; + +static int skcipher_walk_next(struct skcipher_walk *walk); + +static inline void skcipher_unmap(struct scatter_walk *walk, void *vaddr) +{ + if (PageHighMem(scatterwalk_page(walk))) + kunmap_atomic(vaddr); +} + +static inline void *skcipher_map(struct scatter_walk *walk) +{ + struct page *page = scatterwalk_page(walk); + + return (PageHighMem(page) ? kmap_atomic(page) : page_address(page)) + + offset_in_page(walk->offset); +} + +static inline void skcipher_map_src(struct skcipher_walk *walk) +{ + walk->src.virt.addr = skcipher_map(&walk->in); +} + +static inline void skcipher_map_dst(struct skcipher_walk *walk) +{ + walk->dst.virt.addr = skcipher_map(&walk->out); +} + +static inline void skcipher_unmap_src(struct skcipher_walk *walk) +{ + skcipher_unmap(&walk->in, walk->src.virt.addr); +} + +static inline void skcipher_unmap_dst(struct skcipher_walk *walk) +{ + skcipher_unmap(&walk->out, walk->dst.virt.addr); +} + +static inline gfp_t skcipher_walk_gfp(struct skcipher_walk *walk) +{ + return walk->flags & SKCIPHER_WALK_SLEEP ? GFP_KERNEL : GFP_ATOMIC; +} + +/* Get a spot of the specified length that does not straddle a page. + * The caller needs to ensure that there is enough space for this operation. + */ +static inline u8 *skcipher_get_spot(u8 *start, unsigned int len) +{ + u8 *end_page = (u8 *)(((unsigned long)(start + len - 1)) & PAGE_MASK); + + return max(start, end_page); +} + +static int skcipher_done_slow(struct skcipher_walk *walk, unsigned int bsize) +{ + u8 *addr; + + addr = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1); + addr = skcipher_get_spot(addr, bsize); + scatterwalk_copychunks(addr, &walk->out, bsize, + (walk->flags & SKCIPHER_WALK_PHYS) ? 2 : 1); + return 0; +} + +int skcipher_walk_done(struct skcipher_walk *walk, int err) +{ + unsigned int n = walk->nbytes - err; + unsigned int nbytes; + + nbytes = walk->total - n; + + if (unlikely(err < 0)) { + nbytes = 0; + n = 0; + } else if (likely(!(walk->flags & (SKCIPHER_WALK_PHYS | + SKCIPHER_WALK_SLOW | + SKCIPHER_WALK_COPY | + SKCIPHER_WALK_DIFF)))) { +unmap_src: + skcipher_unmap_src(walk); + } else if (walk->flags & SKCIPHER_WALK_DIFF) { + skcipher_unmap_dst(walk); + goto unmap_src; + } else if (walk->flags & SKCIPHER_WALK_COPY) { + skcipher_map_dst(walk); + memcpy(walk->dst.virt.addr, walk->page, n); + skcipher_unmap_dst(walk); + } else if (unlikely(walk->flags & SKCIPHER_WALK_SLOW)) { + if (WARN_ON(err)) { + err = -EINVAL; + nbytes = 0; + } else + n = skcipher_done_slow(walk, n); + } + + if (err > 0) + err = 0; + + walk->total = nbytes; + walk->nbytes = nbytes; + + scatterwalk_advance(&walk->in, n); + scatterwalk_advance(&walk->out, n); + scatterwalk_done(&walk->in, 0, nbytes); + scatterwalk_done(&walk->out, 1, nbytes); + + if (nbytes) { + crypto_yield(walk->flags & SKCIPHER_WALK_SLEEP ? + CRYPTO_TFM_REQ_MAY_SLEEP : 0); + return skcipher_walk_next(walk); + } + + /* Short-circuit for the common/fast path. */ + if (!((unsigned long)walk->buffer | (unsigned long)walk->page)) + goto out; + + if (walk->flags & SKCIPHER_WALK_PHYS) + goto out; + + if (walk->iv != walk->oiv) + memcpy(walk->oiv, walk->iv, walk->ivsize); + if (walk->buffer != walk->page) + kfree(walk->buffer); + if (walk->page) + free_page((unsigned long)walk->page); + +out: + return err; +} +EXPORT_SYMBOL_GPL(skcipher_walk_done); + +void skcipher_walk_complete(struct skcipher_walk *walk, int err) +{ + struct skcipher_walk_buffer *p, *tmp; + + list_for_each_entry_safe(p, tmp, &walk->buffers, entry) { + u8 *data; + + if (err) + goto done; + + data = p->data; + if (!data) { + data = PTR_ALIGN(&p->buffer[0], walk->alignmask + 1); + data = skcipher_get_spot(data, walk->chunksize); + } + + scatterwalk_copychunks(data, &p->dst, p->len, 1); + + if (offset_in_page(p->data) + p->len + walk->chunksize > + PAGE_SIZE) + free_page((unsigned long)p->data); + +done: + list_del(&p->entry); + kfree(p); + } + + if (!err && walk->iv != walk->oiv) + memcpy(walk->oiv, walk->iv, walk->ivsize); + if (walk->buffer != walk->page) + kfree(walk->buffer); + if (walk->page) + free_page((unsigned long)walk->page); +} +EXPORT_SYMBOL_GPL(skcipher_walk_complete); + +static void skcipher_queue_write(struct skcipher_walk *walk, + struct skcipher_walk_buffer *p) +{ + p->dst = walk->out; + list_add_tail(&p->entry, &walk->buffers); +} + +static int skcipher_next_slow(struct skcipher_walk *walk, unsigned int bsize) +{ + bool phys = walk->flags & SKCIPHER_WALK_PHYS; + unsigned alignmask = walk->alignmask; + struct skcipher_walk_buffer *p; + unsigned a; + unsigned n; + u8 *buffer; + void *v; + + if (!phys) { + if (!walk->buffer) + walk->buffer = walk->page; + buffer = walk->buffer; + if (buffer) + goto ok; + } + + /* Start with the minimum alignment of kmalloc. */ + a = crypto_tfm_ctx_alignment() - 1; + n = bsize; + + if (phys) { + /* Calculate the minimum alignment of p->buffer. */ + a &= (sizeof(*p) ^ (sizeof(*p) - 1)) >> 1; + n += sizeof(*p); + } + + /* Minimum size to align p->buffer by alignmask. */ + n += alignmask & ~a; + + /* Minimum size to ensure p->buffer does not straddle a page. */ + n += (bsize - 1) & ~(alignmask | a); + + v = kzalloc(n, skcipher_walk_gfp(walk)); + if (!v) + return skcipher_walk_done(walk, -ENOMEM); + + if (phys) { + p = v; + p->len = bsize; + skcipher_queue_write(walk, p); + buffer = p->buffer; + } else { + walk->buffer = v; + buffer = v; + } + +ok: + walk->dst.virt.addr = PTR_ALIGN(buffer, alignmask + 1); + walk->dst.virt.addr = skcipher_get_spot(walk->dst.virt.addr, bsize); + walk->src.virt.addr = walk->dst.virt.addr; + + scatterwalk_copychunks(walk->src.virt.addr, &walk->in, bsize, 0); + + walk->nbytes = bsize; + walk->flags |= SKCIPHER_WALK_SLOW; + + return 0; +} + +static int skcipher_next_copy(struct skcipher_walk *walk) +{ + struct skcipher_walk_buffer *p; + u8 *tmp = walk->page; + + skcipher_map_src(walk); + memcpy(tmp, walk->src.virt.addr, walk->nbytes); + skcipher_unmap_src(walk); + + walk->src.virt.addr = tmp; + walk->dst.virt.addr = tmp; + + if (!(walk->flags & SKCIPHER_WALK_PHYS)) + return 0; + + p = kmalloc(sizeof(*p), skcipher_walk_gfp(walk)); + if (!p) + return -ENOMEM; + + p->data = walk->page; + p->len = walk->nbytes; + skcipher_queue_write(walk, p); + + if (offset_in_page(walk->page) + walk->nbytes + walk->chunksize > + PAGE_SIZE) + walk->page = NULL; + else + walk->page += walk->nbytes; + + return 0; +} + +static int skcipher_next_fast(struct skcipher_walk *walk) +{ + unsigned long diff; + + walk->src.phys.page = scatterwalk_page(&walk->in); + walk->src.phys.offset = offset_in_page(walk->in.offset); + walk->dst.phys.page = scatterwalk_page(&walk->out); + walk->dst.phys.offset = offset_in_page(walk->out.offset); + + if (walk->flags & SKCIPHER_WALK_PHYS) + return 0; + + diff = walk->src.phys.offset - walk->dst.phys.offset; + diff |= walk->src.virt.page - walk->dst.virt.page; + + skcipher_map_src(walk); + walk->dst.virt.addr = walk->src.virt.addr; + + if (diff) { + walk->flags |= SKCIPHER_WALK_DIFF; + skcipher_map_dst(walk); + } + + return 0; +} + +static int skcipher_walk_next(struct skcipher_walk *walk) +{ + unsigned int bsize; + unsigned int n; + int err; + + walk->flags &= ~(SKCIPHER_WALK_SLOW | SKCIPHER_WALK_COPY | + SKCIPHER_WALK_DIFF); + + n = walk->total; + bsize = min(walk->chunksize, max(n, walk->blocksize)); + n = scatterwalk_clamp(&walk->in, n); + n = scatterwalk_clamp(&walk->out, n); + + if (unlikely(n < bsize)) { + if (unlikely(walk->total < walk->blocksize)) + return skcipher_walk_done(walk, -EINVAL); + +slow_path: + err = skcipher_next_slow(walk, bsize); + goto set_phys_lowmem; + } + + if (unlikely((walk->in.offset | walk->out.offset) & walk->alignmask)) { + if (!walk->page) { + gfp_t gfp = skcipher_walk_gfp(walk); + + walk->page = (void *)__get_free_page(gfp); + if (!walk->page) + goto slow_path; + } + + walk->nbytes = min_t(unsigned, n, + PAGE_SIZE - offset_in_page(walk->page)); + walk->flags |= SKCIPHER_WALK_COPY; + err = skcipher_next_copy(walk); + goto set_phys_lowmem; + } + + walk->nbytes = n; + + return skcipher_next_fast(walk); + +set_phys_lowmem: + if (!err && (walk->flags & SKCIPHER_WALK_PHYS)) { + walk->src.phys.page = virt_to_page(walk->src.virt.addr); + walk->dst.phys.page = virt_to_page(walk->dst.virt.addr); + walk->src.phys.offset &= PAGE_SIZE - 1; + walk->dst.phys.offset &= PAGE_SIZE - 1; + } + return err; +} +EXPORT_SYMBOL_GPL(skcipher_walk_next); + +static int skcipher_copy_iv(struct skcipher_walk *walk) +{ + unsigned a = crypto_tfm_ctx_alignment() - 1; + unsigned alignmask = walk->alignmask; + unsigned ivsize = walk->ivsize; + unsigned bs = walk->chunksize; + unsigned aligned_bs; + unsigned size; + u8 *iv; + + aligned_bs = ALIGN(bs, alignmask); + + /* Minimum size to align buffer by alignmask. */ + size = alignmask & ~a; + + if (walk->flags & SKCIPHER_WALK_PHYS) + size += ivsize; + else { + size += aligned_bs + ivsize; + + /* Minimum size to ensure buffer does not straddle a page. */ + size += (bs - 1) & ~(alignmask | a); + } + + walk->buffer = kmalloc(size, skcipher_walk_gfp(walk)); + if (!walk->buffer) + return -ENOMEM; + + iv = PTR_ALIGN(walk->buffer, alignmask + 1); + iv = skcipher_get_spot(iv, bs) + aligned_bs; + + walk->iv = memcpy(iv, walk->iv, walk->ivsize); + return 0; +} + +static int skcipher_walk_first(struct skcipher_walk *walk) +{ + walk->nbytes = 0; + + if (WARN_ON_ONCE(in_irq())) + return -EDEADLK; + + if (unlikely(!walk->total)) + return 0; + + walk->buffer = NULL; + if (unlikely(((unsigned long)walk->iv & walk->alignmask))) { + int err = skcipher_copy_iv(walk); + if (err) + return err; + } + + walk->page = NULL; + walk->nbytes = walk->total; + + return skcipher_walk_next(walk); +} + +static int skcipher_walk_skcipher(struct skcipher_walk *walk, + struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + + scatterwalk_start(&walk->in, req->src); + scatterwalk_start(&walk->out, req->dst); + + walk->total = req->cryptlen; + walk->iv = req->iv; + walk->oiv = req->iv; + + walk->flags &= ~SKCIPHER_WALK_SLEEP; + walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? + SKCIPHER_WALK_SLEEP : 0; + + walk->blocksize = crypto_skcipher_blocksize(tfm); + walk->chunksize = crypto_skcipher_chunksize(tfm); + walk->ivsize = crypto_skcipher_ivsize(tfm); + walk->alignmask = crypto_skcipher_alignmask(tfm); + + return skcipher_walk_first(walk); +} + +int skcipher_walk_virt(struct skcipher_walk *walk, + struct skcipher_request *req, bool atomic) +{ + int err; + + walk->flags &= ~SKCIPHER_WALK_PHYS; + + err = skcipher_walk_skcipher(walk, req); + + walk->flags &= atomic ? ~SKCIPHER_WALK_SLEEP : ~0; + + return err; +} +EXPORT_SYMBOL_GPL(skcipher_walk_virt); + +void skcipher_walk_atomise(struct skcipher_walk *walk) +{ + walk->flags &= ~SKCIPHER_WALK_SLEEP; +} +EXPORT_SYMBOL_GPL(skcipher_walk_atomise); + +int skcipher_walk_async(struct skcipher_walk *walk, + struct skcipher_request *req) +{ + walk->flags |= SKCIPHER_WALK_PHYS; + + INIT_LIST_HEAD(&walk->buffers); + + return skcipher_walk_skcipher(walk, req); +} +EXPORT_SYMBOL_GPL(skcipher_walk_async); + +static int skcipher_walk_aead_common(struct skcipher_walk *walk, + struct aead_request *req, bool atomic) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + int err; + + walk->flags &= ~SKCIPHER_WALK_PHYS; + + scatterwalk_start(&walk->in, req->src); + scatterwalk_start(&walk->out, req->dst); + + scatterwalk_copychunks(NULL, &walk->in, req->assoclen, 2); + scatterwalk_copychunks(NULL, &walk->out, req->assoclen, 2); + + walk->iv = req->iv; + walk->oiv = req->iv; + + if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) + walk->flags |= SKCIPHER_WALK_SLEEP; + else + walk->flags &= ~SKCIPHER_WALK_SLEEP; + + walk->blocksize = crypto_aead_blocksize(tfm); + walk->chunksize = crypto_aead_chunksize(tfm); + walk->ivsize = crypto_aead_ivsize(tfm); + walk->alignmask = crypto_aead_alignmask(tfm); + + err = skcipher_walk_first(walk); + + if (atomic) + walk->flags &= ~SKCIPHER_WALK_SLEEP; + + return err; +} + +int skcipher_walk_aead(struct skcipher_walk *walk, struct aead_request *req, + bool atomic) +{ + walk->total = req->cryptlen; + + return skcipher_walk_aead_common(walk, req, atomic); +} +EXPORT_SYMBOL_GPL(skcipher_walk_aead); + +int skcipher_walk_aead_encrypt(struct skcipher_walk *walk, + struct aead_request *req, bool atomic) +{ + walk->total = req->cryptlen; + + return skcipher_walk_aead_common(walk, req, atomic); +} +EXPORT_SYMBOL_GPL(skcipher_walk_aead_encrypt); + +int skcipher_walk_aead_decrypt(struct skcipher_walk *walk, + struct aead_request *req, bool atomic) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + + walk->total = req->cryptlen - crypto_aead_authsize(tfm); + + return skcipher_walk_aead_common(walk, req, atomic); +} +EXPORT_SYMBOL_GPL(skcipher_walk_aead_decrypt); + static unsigned int crypto_skcipher_extsize(struct crypto_alg *alg) { if (alg->cra_type == &crypto_blkcipher_type) @@ -378,6 +920,30 @@ struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name, } EXPORT_SYMBOL_GPL(crypto_alloc_skcipher); +struct crypto_sync_skcipher *crypto_alloc_sync_skcipher( + const char *alg_name, u32 type, u32 mask) +{ + struct crypto_skcipher *tfm; + + /* Only sync algorithms allowed. */ + mask |= CRYPTO_ALG_ASYNC; + + tfm = crypto_alloc_tfm(alg_name, &crypto_skcipher_type2, type, mask); + + /* + * Make sure we do not allocate something that might get used with + * an on-stack request: check the request size. + */ + if (!IS_ERR(tfm) && WARN_ON(crypto_skcipher_reqsize(tfm) > + MAX_SYNC_SKCIPHER_REQSIZE)) { + crypto_free_skcipher(tfm); + return ERR_PTR(-EINVAL); + } + + return (struct crypto_sync_skcipher *)tfm; +} +EXPORT_SYMBOL_GPL(crypto_alloc_sync_skcipher); + int crypto_has_skcipher2(const char *alg_name, u32 type, u32 mask) { return crypto_type_has_alg(alg_name, &crypto_skcipher_type2, diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index fb7956150be4..d1125ec06c02 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -73,7 +73,8 @@ static char *check[] = { "cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea", "khazad", "wp512", "wp384", "wp256", "tnepres", "xeta", "fcrypt", "camellia", "seed", "salsa20", "rmd128", "rmd160", "rmd256", "rmd320", - "lzo", "cts", "zlib", "sha3-224", "sha3-256", "sha3-384", "sha3-512", + "lzo", "lzo-rle", "cts", "zlib", "sha3-224", "sha3-256", "sha3-384", + "sha3-512", NULL }; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 898ef4d736be..3b686f2a9362 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1509,7 +1509,7 @@ static int alg_test_aead(const struct alg_test_desc *desc, const char *driver, struct crypto_aead *tfm; int err = 0; - tfm = crypto_alloc_aead(driver, type | CRYPTO_ALG_INTERNAL, mask); + tfm = crypto_alloc_aead(driver, type, mask); if (IS_ERR(tfm)) { printk(KERN_ERR "alg: aead: Failed to load transform for %s: " "%ld\n", driver, PTR_ERR(tfm)); @@ -1538,7 +1538,7 @@ static int alg_test_cipher(const struct alg_test_desc *desc, struct crypto_cipher *tfm; int err = 0; - tfm = crypto_alloc_cipher(driver, type | CRYPTO_ALG_INTERNAL, mask); + tfm = crypto_alloc_cipher(driver, type, mask); if (IS_ERR(tfm)) { printk(KERN_ERR "alg: cipher: Failed to load transform for " "%s: %ld\n", driver, PTR_ERR(tfm)); @@ -1567,7 +1567,7 @@ static int alg_test_skcipher(const struct alg_test_desc *desc, struct crypto_skcipher *tfm; int err = 0; - tfm = crypto_alloc_skcipher(driver, type | CRYPTO_ALG_INTERNAL, mask); + tfm = crypto_alloc_skcipher(driver, type, mask); if (IS_ERR(tfm)) { printk(KERN_ERR "alg: skcipher: Failed to load transform for " "%s: %ld\n", driver, PTR_ERR(tfm)); @@ -1618,7 +1618,7 @@ static int alg_test_hash(const struct alg_test_desc *desc, const char *driver, struct crypto_ahash *tfm; int err; - tfm = crypto_alloc_ahash(driver, type | CRYPTO_ALG_INTERNAL, mask); + tfm = crypto_alloc_ahash(driver, type, mask); if (IS_ERR(tfm)) { printk(KERN_ERR "alg: hash: Failed to load transform for %s: " "%ld\n", driver, PTR_ERR(tfm)); @@ -1646,7 +1646,7 @@ static int alg_test_crc32c(const struct alg_test_desc *desc, if (err) goto out; - tfm = crypto_alloc_shash(driver, type | CRYPTO_ALG_INTERNAL, mask); + tfm = crypto_alloc_shash(driver, type, mask); if (IS_ERR(tfm)) { printk(KERN_ERR "alg: crc32c: Failed to load transform for %s: " "%ld\n", driver, PTR_ERR(tfm)); @@ -1688,7 +1688,7 @@ static int alg_test_cprng(const struct alg_test_desc *desc, const char *driver, struct crypto_rng *rng; int err; - rng = crypto_alloc_rng(driver, type | CRYPTO_ALG_INTERNAL, mask); + rng = crypto_alloc_rng(driver, type, mask); if (IS_ERR(rng)) { printk(KERN_ERR "alg: cprng: Failed to load transform for %s: " "%ld\n", driver, PTR_ERR(rng)); @@ -1715,7 +1715,7 @@ static int drbg_cavs_test(struct drbg_testvec *test, int pr, if (!buf) return -ENOMEM; - drng = crypto_alloc_rng(driver, type | CRYPTO_ALG_INTERNAL, mask); + drng = crypto_alloc_rng(driver, type, mask); if (IS_ERR(drng)) { printk(KERN_ERR "alg: drbg: could not allocate DRNG handle for " "%s\n", driver); @@ -1909,7 +1909,7 @@ static int alg_test_kpp(const struct alg_test_desc *desc, const char *driver, struct crypto_kpp *tfm; int err = 0; - tfm = crypto_alloc_kpp(driver, type | CRYPTO_ALG_INTERNAL, mask); + tfm = crypto_alloc_kpp(driver, type, mask); if (IS_ERR(tfm)) { pr_err("alg: kpp: Failed to load tfm for %s: %ld\n", driver, PTR_ERR(tfm)); @@ -2068,7 +2068,7 @@ static int alg_test_akcipher(const struct alg_test_desc *desc, struct crypto_akcipher *tfm; int err = 0; - tfm = crypto_alloc_akcipher(driver, type | CRYPTO_ALG_INTERNAL, mask); + tfm = crypto_alloc_akcipher(driver, type, mask); if (IS_ERR(tfm)) { pr_err("alg: akcipher: Failed to load tfm for %s: %ld\n", driver, PTR_ERR(tfm)); @@ -2091,88 +2091,6 @@ static int alg_test_null(const struct alg_test_desc *desc, /* Please keep this list sorted by algorithm name. */ static const struct alg_test_desc alg_test_descs[] = { { - .alg = "__cbc-cast5-avx", - .test = alg_test_null, - }, { - .alg = "__cbc-cast6-avx", - .test = alg_test_null, - }, { - .alg = "__cbc-serpent-avx", - .test = alg_test_null, - }, { - .alg = "__cbc-serpent-avx2", - .test = alg_test_null, - }, { - .alg = "__cbc-serpent-sse2", - .test = alg_test_null, - }, { - .alg = "__cbc-twofish-avx", - .test = alg_test_null, - }, { - .alg = "__driver-cbc-aes-aesni", - .test = alg_test_null, - .fips_allowed = 1, - }, { - .alg = "__driver-cbc-camellia-aesni", - .test = alg_test_null, - }, { - .alg = "__driver-cbc-camellia-aesni-avx2", - .test = alg_test_null, - }, { - .alg = "__driver-cbc-cast5-avx", - .test = alg_test_null, - }, { - .alg = "__driver-cbc-cast6-avx", - .test = alg_test_null, - }, { - .alg = "__driver-cbc-serpent-avx", - .test = alg_test_null, - }, { - .alg = "__driver-cbc-serpent-avx2", - .test = alg_test_null, - }, { - .alg = "__driver-cbc-serpent-sse2", - .test = alg_test_null, - }, { - .alg = "__driver-cbc-twofish-avx", - .test = alg_test_null, - }, { - .alg = "__driver-ecb-aes-aesni", - .test = alg_test_null, - .fips_allowed = 1, - }, { - .alg = "__driver-ecb-camellia-aesni", - .test = alg_test_null, - }, { - .alg = "__driver-ecb-camellia-aesni-avx2", - .test = alg_test_null, - }, { - .alg = "__driver-ecb-cast5-avx", - .test = alg_test_null, - }, { - .alg = "__driver-ecb-cast6-avx", - .test = alg_test_null, - }, { - .alg = "__driver-ecb-serpent-avx", - .test = alg_test_null, - }, { - .alg = "__driver-ecb-serpent-avx2", - .test = alg_test_null, - }, { - .alg = "__driver-ecb-serpent-sse2", - .test = alg_test_null, - }, { - .alg = "__driver-ecb-twofish-avx", - .test = alg_test_null, - }, { - .alg = "__driver-gcm-aes-aesni", - .test = alg_test_null, - .fips_allowed = 1, - }, { - .alg = "__ghash-pclmulqdqni", - .test = alg_test_null, - .fips_allowed = 1, - }, { .alg = "adiantum(xchacha12,aes)", .test = alg_test_skcipher, .suite = { @@ -2688,55 +2606,6 @@ static const struct alg_test_desc alg_test_descs[] = { .count = CRCT10DIF_TEST_VECTORS } } - }, { - .alg = "cryptd(__driver-cbc-aes-aesni)", - .test = alg_test_null, - .fips_allowed = 1, - }, { - .alg = "cryptd(__driver-cbc-camellia-aesni)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-cbc-camellia-aesni-avx2)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-cbc-serpent-avx2)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-ecb-aes-aesni)", - .test = alg_test_null, - .fips_allowed = 1, - }, { - .alg = "cryptd(__driver-ecb-camellia-aesni)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-ecb-camellia-aesni-avx2)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-ecb-cast5-avx)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-ecb-cast6-avx)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-ecb-serpent-avx)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-ecb-serpent-avx2)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-ecb-serpent-sse2)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-ecb-twofish-avx)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-gcm-aes-aesni)", - .test = alg_test_null, - .fips_allowed = 1, - }, { - .alg = "cryptd(__ghash-pclmulqdqni)", - .test = alg_test_null, - .fips_allowed = 1, }, { .alg = "ctr(aes)", .test = alg_test_skcipher, @@ -3063,10 +2932,6 @@ static const struct alg_test_desc alg_test_descs[] = { .alg = "drbg_pr_sha512", .fips_allowed = 1, .test = alg_test_null, - }, { - .alg = "ecb(__aes-aesni)", - .test = alg_test_null, - .fips_allowed = 1, }, { .alg = "ecb(aes)", .test = alg_test_skcipher, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 125669f1e725..3256a5a36d9b 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -1334,36 +1334,50 @@ static struct hash_testvec rmd320_tv_template[] = { } }; -#define CRCT10DIF_TEST_VECTORS 3 +#define CRCT10DIF_TEST_VECTORS ARRAY_SIZE(crct10dif_tv_template) static struct hash_testvec crct10dif_tv_template[] = { { - .plaintext = "abc", - .psize = 3, -#ifdef __LITTLE_ENDIAN - .digest = "\x3b\x44", -#else - .digest = "\x44\x3b", -#endif - }, { - .plaintext = "1234567890123456789012345678901234567890" - "123456789012345678901234567890123456789", - .psize = 79, -#ifdef __LITTLE_ENDIAN - .digest = "\x70\x4b", -#else - .digest = "\x4b\x70", -#endif - }, { - .plaintext = - "abcddddddddddddddddddddddddddddddddddddddddddddddddddddd", - .psize = 56, -#ifdef __LITTLE_ENDIAN - .digest = "\xe3\x9c", -#else - .digest = "\x9c\xe3", -#endif - .np = 2, - .tap = { 28, 28 } + .plaintext = "abc", + .psize = 3, + .digest = (u8 *)(u16 []){ 0x443b }, + }, { + .plaintext = "1234567890123456789012345678901234567890" + "123456789012345678901234567890123456789", + .psize = 79, + .digest = (u8 *)(u16 []){ 0x4b70 }, + .np = 2, + .tap = { 63, 16 }, + }, { + .plaintext = "abcdddddddddddddddddddddddddddddddddddddddd" + "ddddddddddddd", + .psize = 56, + .digest = (u8 *)(u16 []){ 0x9ce3 }, + .np = 8, + .tap = { 1, 2, 28, 7, 6, 5, 4, 3 }, + }, { + .plaintext = "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "123456789012345678901234567890123456789", + .psize = 319, + .digest = (u8 *)(u16 []){ 0x44c6 }, + }, { + .plaintext = "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "123456789012345678901234567890123456789", + .psize = 319, + .digest = (u8 *)(u16 []){ 0x44c6 }, + .np = 4, + .tap = { 1, 255, 57, 6 }, } }; @@ -37859,31 +37873,62 @@ static struct hash_testvec bfin_crc_tv_template[] = { static struct comp_testvec lz4_comp_tv_template[] = { { - .inlen = 70, - .outlen = 45, - .input = "Join us now and share the software " - "Join us now and share the software ", - .output = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75" - "\x73\x20\x6e\x6f\x77\x20\x61\x6e" - "\x64\x20\x73\x68\x61\x72\x65\x20" - "\x74\x68\x65\x20\x73\x6f\x66\x74" - "\x77\x0d\x00\x0f\x23\x00\x0b\x50" - "\x77\x61\x72\x65\x20", + .inlen = 255, + .outlen = 218, + .input = "LZ4 is lossless compression algorithm, providing" + " compression speed at 400 MB/s per core, scalable " + "with multi-cores CPU. It features an extremely fast " + "decoder, with speed in multiple GB/s per core, " + "typically reaching RAM speed limits on multi-core " + "systems.", + .output = "\xf9\x21\x4c\x5a\x34\x20\x69\x73\x20\x6c\x6f\x73\x73" + "\x6c\x65\x73\x73\x20\x63\x6f\x6d\x70\x72\x65\x73\x73" + "\x69\x6f\x6e\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d" + "\x2c\x20\x70\x72\x6f\x76\x69\x64\x69\x6e\x67\x21\x00" + "\xf0\x21\x73\x70\x65\x65\x64\x20\x61\x74\x20\x34\x30" + "\x30\x20\x4d\x42\x2f\x73\x20\x70\x65\x72\x20\x63\x6f" + "\x72\x65\x2c\x20\x73\x63\x61\x6c\x61\x62\x6c\x65\x20" + "\x77\x69\x74\x68\x20\x6d\x75\x6c\x74\x69\x2d\x1a\x00" + "\xf0\x00\x73\x20\x43\x50\x55\x2e\x20\x49\x74\x20\x66" + "\x65\x61\x74\x75\x11\x00\xf2\x0b\x61\x6e\x20\x65\x78" + "\x74\x72\x65\x6d\x65\x6c\x79\x20\x66\x61\x73\x74\x20" + "\x64\x65\x63\x6f\x64\x65\x72\x2c\x3d\x00\x02\x67\x00" + "\x22\x69\x6e\x46\x00\x5a\x70\x6c\x65\x20\x47\x6c\x00" + "\xf0\x00\x74\x79\x70\x69\x63\x61\x6c\x6c\x79\x20\x72" + "\x65\x61\x63\x68\xa7\x00\x33\x52\x41\x4d\x38\x00\x83" + "\x6c\x69\x6d\x69\x74\x73\x20\x6f\x3f\x00\x01\x85\x00" + "\x90\x20\x73\x79\x73\x74\x65\x6d\x73\x2e", + }, }; static struct comp_testvec lz4_decomp_tv_template[] = { { - .inlen = 45, - .outlen = 70, - .input = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75" - "\x73\x20\x6e\x6f\x77\x20\x61\x6e" - "\x64\x20\x73\x68\x61\x72\x65\x20" - "\x74\x68\x65\x20\x73\x6f\x66\x74" - "\x77\x0d\x00\x0f\x23\x00\x0b\x50" - "\x77\x61\x72\x65\x20", - .output = "Join us now and share the software " - "Join us now and share the software ", + .inlen = 218, + .outlen = 255, + .input = "\xf9\x21\x4c\x5a\x34\x20\x69\x73\x20\x6c\x6f\x73\x73" + "\x6c\x65\x73\x73\x20\x63\x6f\x6d\x70\x72\x65\x73\x73" + "\x69\x6f\x6e\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d" + "\x2c\x20\x70\x72\x6f\x76\x69\x64\x69\x6e\x67\x21\x00" + "\xf0\x21\x73\x70\x65\x65\x64\x20\x61\x74\x20\x34\x30" + "\x30\x20\x4d\x42\x2f\x73\x20\x70\x65\x72\x20\x63\x6f" + "\x72\x65\x2c\x20\x73\x63\x61\x6c\x61\x62\x6c\x65\x20" + "\x77\x69\x74\x68\x20\x6d\x75\x6c\x74\x69\x2d\x1a\x00" + "\xf0\x00\x73\x20\x43\x50\x55\x2e\x20\x49\x74\x20\x66" + "\x65\x61\x74\x75\x11\x00\xf2\x0b\x61\x6e\x20\x65\x78" + "\x74\x72\x65\x6d\x65\x6c\x79\x20\x66\x61\x73\x74\x20" + "\x64\x65\x63\x6f\x64\x65\x72\x2c\x3d\x00\x02\x67\x00" + "\x22\x69\x6e\x46\x00\x5a\x70\x6c\x65\x20\x47\x6c\x00" + "\xf0\x00\x74\x79\x70\x69\x63\x61\x6c\x6c\x79\x20\x72" + "\x65\x61\x63\x68\xa7\x00\x33\x52\x41\x4d\x38\x00\x83" + "\x6c\x69\x6d\x69\x74\x73\x20\x6f\x3f\x00\x01\x85\x00" + "\x90\x20\x73\x79\x73\x74\x65\x6d\x73\x2e", + .output = "LZ4 is lossless compression algorithm, providing" + " compression speed at 400 MB/s per core, scalable " + "with multi-cores CPU. It features an extremely fast " + "decoder, with speed in multiple GB/s per core, " + "typically reaching RAM speed limits on multi-core " + "systems.", }, }; @@ -37892,31 +37937,62 @@ static struct comp_testvec lz4_decomp_tv_template[] = { static struct comp_testvec lz4hc_comp_tv_template[] = { { - .inlen = 70, - .outlen = 45, - .input = "Join us now and share the software " - "Join us now and share the software ", - .output = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75" - "\x73\x20\x6e\x6f\x77\x20\x61\x6e" - "\x64\x20\x73\x68\x61\x72\x65\x20" - "\x74\x68\x65\x20\x73\x6f\x66\x74" - "\x77\x0d\x00\x0f\x23\x00\x0b\x50" - "\x77\x61\x72\x65\x20", + .inlen = 255, + .outlen = 216, + .input = "LZ4 is lossless compression algorithm, providing" + " compression speed at 400 MB/s per core, scalable " + "with multi-cores CPU. It features an extremely fast " + "decoder, with speed in multiple GB/s per core, " + "typically reaching RAM speed limits on multi-core " + "systems.", + .output = "\xf9\x21\x4c\x5a\x34\x20\x69\x73\x20\x6c\x6f\x73\x73" + "\x6c\x65\x73\x73\x20\x63\x6f\x6d\x70\x72\x65\x73\x73" + "\x69\x6f\x6e\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d" + "\x2c\x20\x70\x72\x6f\x76\x69\x64\x69\x6e\x67\x21\x00" + "\xf0\x21\x73\x70\x65\x65\x64\x20\x61\x74\x20\x34\x30" + "\x30\x20\x4d\x42\x2f\x73\x20\x70\x65\x72\x20\x63\x6f" + "\x72\x65\x2c\x20\x73\x63\x61\x6c\x61\x62\x6c\x65\x20" + "\x77\x69\x74\x68\x20\x6d\x75\x6c\x74\x69\x2d\x1a\x00" + "\xf0\x00\x73\x20\x43\x50\x55\x2e\x20\x49\x74\x20\x66" + "\x65\x61\x74\x75\x11\x00\xf2\x0b\x61\x6e\x20\x65\x78" + "\x74\x72\x65\x6d\x65\x6c\x79\x20\x66\x61\x73\x74\x20" + "\x64\x65\x63\x6f\x64\x65\x72\x2c\x3d\x00\x02\x67\x00" + "\x22\x69\x6e\x46\x00\x5a\x70\x6c\x65\x20\x47\x6c\x00" + "\xf0\x00\x74\x79\x70\x69\x63\x61\x6c\x6c\x79\x20\x72" + "\x65\x61\x63\x68\xa7\x00\x33\x52\x41\x4d\x38\x00\x97" + "\x6c\x69\x6d\x69\x74\x73\x20\x6f\x6e\x85\x00\x90\x20" + "\x73\x79\x73\x74\x65\x6d\x73\x2e", + }, }; static struct comp_testvec lz4hc_decomp_tv_template[] = { { - .inlen = 45, - .outlen = 70, - .input = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75" - "\x73\x20\x6e\x6f\x77\x20\x61\x6e" - "\x64\x20\x73\x68\x61\x72\x65\x20" - "\x74\x68\x65\x20\x73\x6f\x66\x74" - "\x77\x0d\x00\x0f\x23\x00\x0b\x50" - "\x77\x61\x72\x65\x20", - .output = "Join us now and share the software " - "Join us now and share the software ", + .inlen = 216, + .outlen = 255, + .input = "\xf9\x21\x4c\x5a\x34\x20\x69\x73\x20\x6c\x6f\x73\x73" + "\x6c\x65\x73\x73\x20\x63\x6f\x6d\x70\x72\x65\x73\x73" + "\x69\x6f\x6e\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d" + "\x2c\x20\x70\x72\x6f\x76\x69\x64\x69\x6e\x67\x21\x00" + "\xf0\x21\x73\x70\x65\x65\x64\x20\x61\x74\x20\x34\x30" + "\x30\x20\x4d\x42\x2f\x73\x20\x70\x65\x72\x20\x63\x6f" + "\x72\x65\x2c\x20\x73\x63\x61\x6c\x61\x62\x6c\x65\x20" + "\x77\x69\x74\x68\x20\x6d\x75\x6c\x74\x69\x2d\x1a\x00" + "\xf0\x00\x73\x20\x43\x50\x55\x2e\x20\x49\x74\x20\x66" + "\x65\x61\x74\x75\x11\x00\xf2\x0b\x61\x6e\x20\x65\x78" + "\x74\x72\x65\x6d\x65\x6c\x79\x20\x66\x61\x73\x74\x20" + "\x64\x65\x63\x6f\x64\x65\x72\x2c\x3d\x00\x02\x67\x00" + "\x22\x69\x6e\x46\x00\x5a\x70\x6c\x65\x20\x47\x6c\x00" + "\xf0\x00\x74\x79\x70\x69\x63\x61\x6c\x6c\x79\x20\x72" + "\x65\x61\x63\x68\xa7\x00\x33\x52\x41\x4d\x38\x00\x97" + "\x6c\x69\x6d\x69\x74\x73\x20\x6f\x6e\x85\x00\x90\x20" + "\x73\x79\x73\x74\x65\x6d\x73\x2e", + .output = "LZ4 is lossless compression algorithm, providing" + " compression speed at 400 MB/s per core, scalable " + "with multi-cores CPU. It features an extremely fast " + "decoder, with speed in multiple GB/s per core, " + "typically reaching RAM speed limits on multi-core " + "systems.", }, }; diff --git a/crypto/xcbc.c b/crypto/xcbc.c index df90b332554c..9b0317b51766 100644 --- a/crypto/xcbc.c +++ b/crypto/xcbc.c @@ -58,15 +58,17 @@ struct xcbc_desc_ctx { u8 ctx[]; }; +#define XCBC_BLOCKSIZE 16 + static int crypto_xcbc_digest_setkey(struct crypto_shash *parent, const u8 *inkey, unsigned int keylen) { unsigned long alignmask = crypto_shash_alignmask(parent); struct xcbc_tfm_ctx *ctx = crypto_shash_ctx(parent); - int bs = crypto_shash_blocksize(parent); u8 *consts = PTR_ALIGN(&ctx->ctx[0], alignmask + 1); int err = 0; - u8 key1[bs]; + u8 key1[XCBC_BLOCKSIZE]; + int bs = sizeof(key1); if ((err = crypto_cipher_setkey(ctx->child, inkey, keylen))) return err; @@ -213,7 +215,7 @@ static int xcbc_create(struct crypto_template *tmpl, struct rtattr **tb) return PTR_ERR(alg); switch(alg->cra_blocksize) { - case 16: + case XCBC_BLOCKSIZE: break; default: goto out_put_alg; diff --git a/crypto/xts.c b/crypto/xts.c index e41ed483e8a5..410a2e299085 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -13,7 +13,8 @@ * Software Foundation; either version 2 of the License, or (at your option) * any later version. */ -#include +#include +#include #include #include #include @@ -25,140 +26,320 @@ #include #include +#define XTS_BUFFER_SIZE 128u + struct priv { - struct crypto_cipher *child; + struct crypto_skcipher *child; struct crypto_cipher *tweak; }; -static int setkey(struct crypto_tfm *parent, const u8 *key, +struct xts_instance_ctx { + struct crypto_skcipher_spawn spawn; + char name[CRYPTO_MAX_ALG_NAME]; +}; + +struct rctx { + be128 buf[XTS_BUFFER_SIZE / sizeof(be128)]; + + be128 t; + + be128 *ext; + + struct scatterlist srcbuf[2]; + struct scatterlist dstbuf[2]; + struct scatterlist *src; + struct scatterlist *dst; + + unsigned int left; + + struct skcipher_request subreq; +}; + +static int setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { - struct priv *ctx = crypto_tfm_ctx(parent); - struct crypto_cipher *child = ctx->tweak; + struct priv *ctx = crypto_skcipher_ctx(parent); + struct crypto_skcipher *child; + struct crypto_cipher *tweak; int err; - err = xts_check_key(parent, key, keylen); + err = xts_verify_key(parent, key, keylen); if (err) return err; + keylen /= 2; + /* we need two cipher instances: one to compute the initial 'tweak' * by encrypting the IV (usually the 'plain' iv) and the other * one to encrypt and decrypt the data */ /* tweak cipher, uses Key2 i.e. the second half of *key */ - crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) & + tweak = ctx->tweak; + crypto_cipher_clear_flags(tweak, CRYPTO_TFM_REQ_MASK); + crypto_cipher_set_flags(tweak, crypto_skcipher_get_flags(parent) & CRYPTO_TFM_REQ_MASK); - err = crypto_cipher_setkey(child, key + keylen/2, keylen/2); + err = crypto_cipher_setkey(tweak, key + keylen, keylen); + crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(tweak) & + CRYPTO_TFM_RES_MASK); if (err) return err; - crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) & - CRYPTO_TFM_RES_MASK); - + /* data cipher, uses Key1 i.e. the first half of *key */ child = ctx->child; + crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(child, key, keylen); + crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); - /* data cipher, uses Key1 i.e. the first half of *key */ - crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) & - CRYPTO_TFM_REQ_MASK); - err = crypto_cipher_setkey(child, key, keylen/2); - if (err) - return err; + return err; +} - crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) & - CRYPTO_TFM_RES_MASK); +static int post_crypt(struct skcipher_request *req) +{ + struct rctx *rctx = skcipher_request_ctx(req); + be128 *buf = rctx->ext ?: rctx->buf; + struct skcipher_request *subreq; + const int bs = XTS_BLOCK_SIZE; + struct skcipher_walk w; + struct scatterlist *sg; + unsigned offset; + int err; - return 0; -} + subreq = &rctx->subreq; + err = skcipher_walk_virt(&w, subreq, false); -struct sinfo { - be128 *t; - struct crypto_tfm *tfm; - void (*fn)(struct crypto_tfm *, u8 *, const u8 *); -}; + while (w.nbytes) { + unsigned int avail = w.nbytes; + be128 *wdst; -static inline void xts_round(struct sinfo *s, void *dst, const void *src) -{ - be128_xor(dst, s->t, src); /* PP <- T xor P */ - s->fn(s->tfm, dst, dst); /* CC <- E(Key1,PP) */ - be128_xor(dst, dst, s->t); /* C <- T xor CC */ + wdst = w.dst.virt.addr; + + do { + be128_xor(wdst, buf++, wdst); + wdst++; + } while ((avail -= bs) >= bs); + + err = skcipher_walk_done(&w, avail); + } + + rctx->left -= subreq->cryptlen; + + if (err || !rctx->left) + goto out; + + rctx->dst = rctx->dstbuf; + + scatterwalk_done(&w.out, 0, 1); + sg = w.out.sg; + offset = w.out.offset; + + if (rctx->dst != sg) { + rctx->dst[0] = *sg; + sg_unmark_end(rctx->dst); + scatterwalk_crypto_chain(rctx->dst, sg_next(sg), 0, 2); + } + rctx->dst[0].length -= offset - sg->offset; + rctx->dst[0].offset = offset; + +out: + return err; } -static int crypt(struct blkcipher_desc *d, - struct blkcipher_walk *w, struct priv *ctx, - void (*tw)(struct crypto_tfm *, u8 *, const u8 *), - void (*fn)(struct crypto_tfm *, u8 *, const u8 *)) +static int pre_crypt(struct skcipher_request *req) { - int err; - unsigned int avail; + struct rctx *rctx = skcipher_request_ctx(req); + be128 *buf = rctx->ext ?: rctx->buf; + struct skcipher_request *subreq; const int bs = XTS_BLOCK_SIZE; - struct sinfo s = { - .tfm = crypto_cipher_tfm(ctx->child), - .fn = fn - }; - u8 *wsrc; - u8 *wdst; - - err = blkcipher_walk_virt(d, w); - if (!w->nbytes) - return err; + struct skcipher_walk w; + struct scatterlist *sg; + unsigned cryptlen; + unsigned offset; + bool more; + int err; - s.t = (be128 *)w->iv; - avail = w->nbytes; + subreq = &rctx->subreq; + cryptlen = subreq->cryptlen; - wsrc = w->src.virt.addr; - wdst = w->dst.virt.addr; + more = rctx->left > cryptlen; + if (!more) + cryptlen = rctx->left; - /* calculate first value of T */ - tw(crypto_cipher_tfm(ctx->tweak), w->iv, w->iv); + skcipher_request_set_crypt(subreq, rctx->src, rctx->dst, + cryptlen, NULL); - goto first; + err = skcipher_walk_virt(&w, subreq, false); - for (;;) { - do { - gf128mul_x_ble(s.t, s.t); + while (w.nbytes) { + unsigned int avail = w.nbytes; + be128 *wsrc; + be128 *wdst; -first: - xts_round(&s, wdst, wsrc); + wsrc = w.src.virt.addr; + wdst = w.dst.virt.addr; - wsrc += bs; - wdst += bs; + do { + *buf++ = rctx->t; + be128_xor(wdst++, &rctx->t, wsrc++); + gf128mul_x_ble(&rctx->t, &rctx->t); } while ((avail -= bs) >= bs); - err = blkcipher_walk_done(d, w, avail); - if (!w->nbytes) - break; + err = skcipher_walk_done(&w, avail); + } + + skcipher_request_set_crypt(subreq, rctx->dst, rctx->dst, + cryptlen, NULL); - avail = w->nbytes; + if (err || !more) + goto out; - wsrc = w->src.virt.addr; - wdst = w->dst.virt.addr; + rctx->src = rctx->srcbuf; + + scatterwalk_done(&w.in, 0, 1); + sg = w.in.sg; + offset = w.in.offset; + + if (rctx->src != sg) { + rctx->src[0] = *sg; + sg_unmark_end(rctx->src); + scatterwalk_crypto_chain(rctx->src, sg_next(sg), 0, 2); } + rctx->src[0].length -= offset - sg->offset; + rctx->src[0].offset = offset; +out: return err; } -static int encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int init_crypt(struct skcipher_request *req, crypto_completion_t done) { - struct priv *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk w; + struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + struct rctx *rctx = skcipher_request_ctx(req); + struct skcipher_request *subreq; + gfp_t gfp; + + subreq = &rctx->subreq; + skcipher_request_set_tfm(subreq, ctx->child); + skcipher_request_set_callback(subreq, req->base.flags, done, req); + + gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : + GFP_ATOMIC; + rctx->ext = NULL; + + subreq->cryptlen = XTS_BUFFER_SIZE; + if (req->cryptlen > XTS_BUFFER_SIZE) { + subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE); + rctx->ext = kmalloc(subreq->cryptlen, gfp); + } + + rctx->src = req->src; + rctx->dst = req->dst; + rctx->left = req->cryptlen; - blkcipher_walk_init(&w, dst, src, nbytes); - return crypt(desc, &w, ctx, crypto_cipher_alg(ctx->tweak)->cia_encrypt, - crypto_cipher_alg(ctx->child)->cia_encrypt); + /* calculate first value of T */ + crypto_cipher_encrypt_one(ctx->tweak, (u8 *)&rctx->t, req->iv); + + return 0; } -static int decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static void exit_crypt(struct skcipher_request *req) { - struct priv *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk w; + struct rctx *rctx = skcipher_request_ctx(req); + + rctx->left = 0; - blkcipher_walk_init(&w, dst, src, nbytes); - return crypt(desc, &w, ctx, crypto_cipher_alg(ctx->tweak)->cia_encrypt, - crypto_cipher_alg(ctx->child)->cia_decrypt); + if (rctx->ext) + kzfree(rctx->ext); +} + +static int do_encrypt(struct skcipher_request *req, int err) +{ + struct rctx *rctx = skcipher_request_ctx(req); + struct skcipher_request *subreq; + + subreq = &rctx->subreq; + + while (!err && rctx->left) { + err = pre_crypt(req) ?: + crypto_skcipher_encrypt(subreq) ?: + post_crypt(req); + + if (err == -EINPROGRESS || + (err == -EBUSY && + req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + return err; + } + + exit_crypt(req); + return err; +} + +static void encrypt_done(struct crypto_async_request *areq, int err) +{ + struct skcipher_request *req = areq->data; + struct skcipher_request *subreq; + struct rctx *rctx; + + rctx = skcipher_request_ctx(req); + subreq = &rctx->subreq; + subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; + + err = do_encrypt(req, err ?: post_crypt(req)); + if (rctx->left) + return; + + skcipher_request_complete(req, err); +} + +static int encrypt(struct skcipher_request *req) +{ + return do_encrypt(req, init_crypt(req, encrypt_done)); +} + +static int do_decrypt(struct skcipher_request *req, int err) +{ + struct rctx *rctx = skcipher_request_ctx(req); + struct skcipher_request *subreq; + + subreq = &rctx->subreq; + + while (!err && rctx->left) { + err = pre_crypt(req) ?: + crypto_skcipher_decrypt(subreq) ?: + post_crypt(req); + + if (err == -EINPROGRESS || + (err == -EBUSY && + req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + return err; + } + + exit_crypt(req); + return err; +} + +static void decrypt_done(struct crypto_async_request *areq, int err) +{ + struct skcipher_request *req = areq->data; + struct skcipher_request *subreq; + struct rctx *rctx; + + rctx = skcipher_request_ctx(req); + subreq = &rctx->subreq; + subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; + + err = do_decrypt(req, err ?: post_crypt(req)); + if (rctx->left) + return; + + skcipher_request_complete(req, err); +} + +static int decrypt(struct skcipher_request *req) +{ + return do_decrypt(req, init_crypt(req, decrypt_done)); } int xts_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst, @@ -233,112 +414,168 @@ int xts_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst, } EXPORT_SYMBOL_GPL(xts_crypt); -static int init_tfm(struct crypto_tfm *tfm) +static int init_tfm(struct crypto_skcipher *tfm) { - struct crypto_cipher *cipher; - struct crypto_instance *inst = (void *)tfm->__crt_alg; - struct crypto_spawn *spawn = crypto_instance_ctx(inst); - struct priv *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; - - cipher = crypto_spawn_cipher(spawn); - if (IS_ERR(cipher)) - return PTR_ERR(cipher); - - if (crypto_cipher_blocksize(cipher) != XTS_BLOCK_SIZE) { - *flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN; - crypto_free_cipher(cipher); - return -EINVAL; - } + struct skcipher_instance *inst = skcipher_alg_instance(tfm); + struct xts_instance_ctx *ictx = skcipher_instance_ctx(inst); + struct priv *ctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher *child; + struct crypto_cipher *tweak; - ctx->child = cipher; + child = crypto_spawn_skcipher(&ictx->spawn); + if (IS_ERR(child)) + return PTR_ERR(child); - cipher = crypto_spawn_cipher(spawn); - if (IS_ERR(cipher)) { - crypto_free_cipher(ctx->child); - return PTR_ERR(cipher); - } + ctx->child = child; - /* this check isn't really needed, leave it here just in case */ - if (crypto_cipher_blocksize(cipher) != XTS_BLOCK_SIZE) { - crypto_free_cipher(cipher); - crypto_free_cipher(ctx->child); - *flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN; - return -EINVAL; + tweak = crypto_alloc_cipher(ictx->name, 0, 0); + if (IS_ERR(tweak)) { + crypto_free_skcipher(ctx->child); + return PTR_ERR(tweak); } - ctx->tweak = cipher; + ctx->tweak = tweak; + + crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(child) + + sizeof(struct rctx)); return 0; } -static void exit_tfm(struct crypto_tfm *tfm) +static void exit_tfm(struct crypto_skcipher *tfm) { - struct priv *ctx = crypto_tfm_ctx(tfm); - crypto_free_cipher(ctx->child); + struct priv *ctx = crypto_skcipher_ctx(tfm); + + crypto_free_skcipher(ctx->child); crypto_free_cipher(ctx->tweak); } -static struct crypto_instance *alloc(struct rtattr **tb) +static void free(struct skcipher_instance *inst) +{ + crypto_drop_skcipher(skcipher_instance_ctx(inst)); + kfree(inst); +} + +static int create(struct crypto_template *tmpl, struct rtattr **tb) { - struct crypto_instance *inst; - struct crypto_alg *alg; + struct skcipher_instance *inst; + struct crypto_attr_type *algt; + struct xts_instance_ctx *ctx; + struct skcipher_alg *alg; + const char *cipher_name; int err; - err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER); + algt = crypto_get_attr_type(tb); + if (IS_ERR(algt)) + return PTR_ERR(algt); + + if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask) + return -EINVAL; + + cipher_name = crypto_attr_alg_name(tb[1]); + if (IS_ERR(cipher_name)) + return PTR_ERR(cipher_name); + + inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + ctx = skcipher_instance_ctx(inst); + + crypto_set_skcipher_spawn(&ctx->spawn, skcipher_crypto_instance(inst)); + err = crypto_grab_skcipher(&ctx->spawn, cipher_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); + if (err == -ENOENT) { + err = -ENAMETOOLONG; + if (snprintf(ctx->name, CRYPTO_MAX_ALG_NAME, "ecb(%s)", + cipher_name) >= CRYPTO_MAX_ALG_NAME) + goto err_free_inst; + + err = crypto_grab_skcipher(&ctx->spawn, ctx->name, 0, + crypto_requires_sync(algt->type, + algt->mask)); + } + if (err) - return ERR_PTR(err); + goto err_free_inst; - alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, - CRYPTO_ALG_TYPE_MASK); - if (IS_ERR(alg)) - return ERR_CAST(alg); + alg = crypto_skcipher_spawn_alg(&ctx->spawn); - inst = crypto_alloc_instance("xts", alg); - if (IS_ERR(inst)) - goto out_put_alg; + err = -EINVAL; + if (alg->base.cra_blocksize != XTS_BLOCK_SIZE) + goto err_drop_spawn; - inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER; - inst->alg.cra_priority = alg->cra_priority; - inst->alg.cra_blocksize = alg->cra_blocksize; + if (crypto_skcipher_alg_ivsize(alg)) + goto err_drop_spawn; - if (alg->cra_alignmask < 7) - inst->alg.cra_alignmask = 7; - else - inst->alg.cra_alignmask = alg->cra_alignmask; + err = crypto_inst_setname(skcipher_crypto_instance(inst), "xts", + &alg->base); + if (err) + goto err_drop_spawn; - inst->alg.cra_type = &crypto_blkcipher_type; + err = -EINVAL; + cipher_name = alg->base.cra_name; - inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize; - inst->alg.cra_blkcipher.min_keysize = - 2 * alg->cra_cipher.cia_min_keysize; - inst->alg.cra_blkcipher.max_keysize = - 2 * alg->cra_cipher.cia_max_keysize; + /* Alas we screwed up the naming so we have to mangle the + * cipher name. + */ + if (!strncmp(cipher_name, "ecb(", 4)) { + unsigned len; - inst->alg.cra_ctxsize = sizeof(struct priv); + len = strlcpy(ctx->name, cipher_name + 4, sizeof(ctx->name)); + if (len < 2 || len >= sizeof(ctx->name)) + goto err_drop_spawn; - inst->alg.cra_init = init_tfm; - inst->alg.cra_exit = exit_tfm; + if (ctx->name[len - 1] != ')') + goto err_drop_spawn; - inst->alg.cra_blkcipher.setkey = setkey; - inst->alg.cra_blkcipher.encrypt = encrypt; - inst->alg.cra_blkcipher.decrypt = decrypt; + ctx->name[len - 1] = 0; -out_put_alg: - crypto_mod_put(alg); - return inst; -} + if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, + "xts(%s)", ctx->name) >= CRYPTO_MAX_ALG_NAME) + return -ENAMETOOLONG; + } else + goto err_drop_spawn; -static void free_inst(struct crypto_instance *inst) -{ - crypto_drop_spawn(crypto_instance_ctx(inst)); + inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; + inst->alg.base.cra_priority = alg->base.cra_priority; + inst->alg.base.cra_blocksize = XTS_BLOCK_SIZE; + inst->alg.base.cra_alignmask = alg->base.cra_alignmask | + (__alignof__(u64) - 1); + + inst->alg.ivsize = XTS_BLOCK_SIZE; + inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) * 2; + inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg) * 2; + + inst->alg.base.cra_ctxsize = sizeof(struct priv); + + inst->alg.init = init_tfm; + inst->alg.exit = exit_tfm; + + inst->alg.setkey = setkey; + inst->alg.encrypt = encrypt; + inst->alg.decrypt = decrypt; + + inst->free = free; + + err = skcipher_register_instance(tmpl, inst); + if (err) + goto err_drop_spawn; + +out: + return err; + +err_drop_spawn: + crypto_drop_skcipher(&ctx->spawn); +err_free_inst: kfree(inst); + goto out; } static struct crypto_template crypto_tmpl = { .name = "xts", - .alloc = alloc, - .free = free_inst, + .create = create, .module = THIS_MODULE, }; diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 535e7828445a..a3c62f57672a 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -104,7 +104,7 @@ config ACPI_PROCFS_POWER Say N to delete power /proc/acpi/ directories that have moved to /sys/ config ACPI_REV_OVERRIDE_POSSIBLE - bool "Allow supported ACPI revision to be overriden" + bool "Allow supported ACPI revision to be overridden" depends on X86 default y help diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 4e178f01b4bf..69feeeffa149 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -136,8 +136,7 @@ enum { BINDER_DEBUG_PRIORITY_CAP = 1U << 13, BINDER_DEBUG_SPINLOCKS = 1U << 14, }; -static uint32_t binder_debug_mask = BINDER_DEBUG_USER_ERROR | - BINDER_DEBUG_FAILED_TRANSACTION | BINDER_DEBUG_DEAD_TRANSACTION; +static uint32_t binder_debug_mask = 0; module_param_named(debug_mask, binder_debug_mask, uint, 0644); static char *binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES; @@ -159,6 +158,7 @@ static int binder_set_stop_on_user_error(const char *val, module_param_call(stop_on_user_error, binder_set_stop_on_user_error, param_get_int, &binder_stop_on_user_error, 0644); +#ifdef DEBUG #define binder_debug(mask, x...) \ do { \ if (binder_debug_mask & mask) \ @@ -172,7 +172,16 @@ module_param_call(stop_on_user_error, binder_set_stop_on_user_error, if (binder_stop_on_user_error) \ binder_stop_on_user_error = 2; \ } while (0) - +#else +static inline void binder_debug(uint32_t mask, const char *fmt, ...) +{ +} +static inline void binder_user_error(const char *fmt, ...) +{ + if (binder_stop_on_user_error) + binder_stop_on_user_error = 2; +} +#endif #define to_flat_binder_object(hdr) \ container_of(hdr, struct flat_binder_object, hdr) @@ -273,6 +282,14 @@ struct binder_device { struct binder_context context; }; +static struct kmem_cache *binder_node_pool; +static struct kmem_cache *binder_proc_pool; +static struct kmem_cache *binder_ref_death_pool; +static struct kmem_cache *binder_ref_pool; +static struct kmem_cache *binder_thread_pool; +static struct kmem_cache *binder_transaction_pool; +static struct kmem_cache *binder_work_pool; + /** * struct binder_work - work enqueued on a worklist * @entry: node enqueued on list @@ -1382,8 +1399,9 @@ static struct binder_node *binder_init_node_ilocked( static struct binder_node *binder_new_node(struct binder_proc *proc, struct flat_binder_object *fp) { - struct binder_node *node; - struct binder_node *new_node = kzalloc(sizeof(*node), GFP_KERNEL); + struct binder_node *node, *new_node; + + new_node = kmem_cache_zalloc(binder_node_pool, GFP_KERNEL); if (!new_node) return NULL; @@ -1394,14 +1412,14 @@ static struct binder_node *binder_new_node(struct binder_proc *proc, /* * The node was already added by another thread */ - kfree(new_node); + kmem_cache_free(binder_node_pool, new_node); return node; } static void binder_free_node(struct binder_node *node) { - kfree(node); + kmem_cache_free(binder_node_pool, node); binder_stats_deleted(BINDER_STAT_NODE); } @@ -1937,8 +1955,9 @@ static void binder_free_ref(struct binder_ref *ref) { if (ref->node) binder_free_node(ref->node); - kfree(ref->death); - kfree(ref); + if (ref->death) + kmem_cache_free(binder_ref_death_pool, ref->death); + kmem_cache_free(binder_ref_pool, ref); } /** @@ -2031,7 +2050,7 @@ static int binder_inc_ref_for_node(struct binder_proc *proc, ref = binder_get_ref_for_node_olocked(proc, node, NULL); if (!ref) { binder_proc_unlock(proc); - new_ref = kzalloc(sizeof(*ref), GFP_KERNEL); + new_ref = kmem_cache_zalloc(binder_ref_pool, GFP_KERNEL); if (!new_ref) return -ENOMEM; binder_proc_lock(proc); @@ -2057,7 +2076,7 @@ static int binder_inc_ref_for_node(struct binder_proc *proc, * Another thread created the ref first so * free the one we allocated */ - kfree(new_ref); + kmem_cache_free(binder_ref_pool, new_ref); return ret; } @@ -2200,7 +2219,11 @@ static void binder_free_transaction(struct binder_transaction *t) */ spin_unlock(&t->lock); } - kfree(t); + /* + * If the transaction has no target_proc, then + * t->buffer->transaction has already been cleared. + */ + kmem_cache_free(binder_transaction_pool, t); binder_stats_deleted(BINDER_STAT_TRANSACTION); } @@ -3236,7 +3259,7 @@ static void binder_transaction(struct binder_proc *proc, e->to_proc = target_proc->pid; /* TODO: reuse incoming transaction for reply */ - t = kzalloc(sizeof(*t), GFP_KERNEL); + t = kmem_cache_zalloc(binder_transaction_pool, GFP_KERNEL); if (t == NULL) { return_error = BR_FAILED_REPLY; return_error_param = -ENOMEM; @@ -3246,7 +3269,7 @@ static void binder_transaction(struct binder_proc *proc, binder_stats_created(BINDER_STAT_TRANSACTION); spin_lock_init(&t->lock); - tcomplete = kzalloc(sizeof(*tcomplete), GFP_KERNEL); + tcomplete = kmem_cache_zalloc(binder_work_pool, GFP_KERNEL); if (tcomplete == NULL) { return_error = BR_FAILED_REPLY; return_error_param = -ENOMEM; @@ -3603,8 +3626,8 @@ static void binder_transaction(struct binder_proc *proc, BUG_ON(t->buffer->async_transaction != 0); binder_pop_transaction_ilocked(target_thread, in_reply_to); binder_enqueue_thread_work_ilocked(target_thread, &t->work); + wake_up_interruptible(&target_thread->wait); binder_inner_proc_unlock(target_proc); - wake_up_interruptible_sync(&target_thread->wait); binder_restore_priority(current, in_reply_to->saved_priority); binder_free_transaction(in_reply_to); } else if (!(t->flags & TF_ONE_WAY)) { @@ -3670,10 +3693,10 @@ static void binder_transaction(struct binder_proc *proc, if (secctx) security_release_secctx(secctx, secctx_sz); err_get_secctx_failed: - kfree(tcomplete); + kmem_cache_free(binder_work_pool, tcomplete); binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); err_alloc_tcomplete_failed: - kfree(t); + kmem_cache_free(binder_transaction_pool, t); binder_stats_deleted(BINDER_STAT_TRANSACTION); err_alloc_t_failed: err_bad_call_stack: @@ -4032,7 +4055,7 @@ static int binder_thread_write(struct binder_proc *proc, * Allocate memory for death notification * before taking lock */ - death = kzalloc(sizeof(*death), GFP_KERNEL); + death = kmem_cache_zalloc(binder_ref_death_pool, GFP_KERNEL); if (death == NULL) { WARN_ON(thread->return_error.cmd != BR_OK); @@ -4057,7 +4080,8 @@ static int binder_thread_write(struct binder_proc *proc, "BC_CLEAR_DEATH_NOTIFICATION", target); binder_proc_unlock(proc); - kfree(death); + if (death) + kmem_cache_free(binder_ref_death_pool, death); break; } @@ -4078,7 +4102,7 @@ static int binder_thread_write(struct binder_proc *proc, proc->pid, thread->pid); binder_node_unlock(ref->node); binder_proc_unlock(proc); - kfree(death); + kmem_cache_free(binder_ref_death_pool, death); break; } binder_stats_created(BINDER_STAT_DEATH); @@ -4261,7 +4285,7 @@ static int binder_wait_for_work(struct binder_thread *thread, binder_inner_proc_lock(proc); list_del_init(&thread->waiting_thread_node); if (signal_pending(current)) { - ret = -ERESTARTSYS; + ret = -EINTR; break; } } @@ -4386,7 +4410,7 @@ static int binder_thread_read(struct binder_proc *proc, binder_debug(BINDER_DEBUG_TRANSACTION_COMPLETE, "%d:%d BR_TRANSACTION_COMPLETE\n", proc->pid, thread->pid); - kfree(w); + kmem_cache_free(binder_work_pool, w); binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); } break; case BINDER_WORK_NODE: { @@ -4499,7 +4523,7 @@ static int binder_thread_read(struct binder_proc *proc, (u64)cookie); if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION) { binder_inner_proc_unlock(proc); - kfree(death); + kmem_cache_free(binder_ref_death_pool, death); binder_stats_deleted(BINDER_STAT_DEATH); } else { binder_enqueue_work_ilocked( @@ -4673,7 +4697,7 @@ static void binder_release_work(struct binder_proc *proc, case BINDER_WORK_TRANSACTION_COMPLETE: { binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, "undelivered TRANSACTION_COMPLETE\n"); - kfree(w); + kmem_cache_free(binder_work_pool, w); binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); } break; case BINDER_WORK_DEAD_BINDER_AND_CLEAR: @@ -4684,7 +4708,7 @@ static void binder_release_work(struct binder_proc *proc, binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, "undelivered death notification, %016llx\n", (u64)death->cookie); - kfree(death); + kmem_cache_free(binder_ref_death_pool, death); binder_stats_deleted(BINDER_STAT_DEATH); } break; case BINDER_WORK_NODE: @@ -4747,14 +4771,14 @@ static struct binder_thread *binder_get_thread(struct binder_proc *proc) thread = binder_get_thread_ilocked(proc, NULL); binder_inner_proc_unlock(proc); if (!thread) { - new_thread = kzalloc(sizeof(*thread), GFP_KERNEL); + new_thread = kmem_cache_zalloc(binder_thread_pool, GFP_KERNEL); if (new_thread == NULL) return NULL; binder_inner_proc_lock(proc); thread = binder_get_thread_ilocked(proc, new_thread); binder_inner_proc_unlock(proc); if (thread != new_thread) - kfree(new_thread); + kmem_cache_free(binder_thread_pool, new_thread); } return thread; } @@ -4767,7 +4791,7 @@ static void binder_free_proc(struct binder_proc *proc) put_task_struct(proc->tsk); put_cred(proc->cred); binder_stats_deleted(BINDER_STAT_PROC); - kfree(proc); + kmem_cache_free(binder_proc_pool, proc); } static void binder_free_thread(struct binder_thread *thread) @@ -4776,7 +4800,7 @@ static void binder_free_thread(struct binder_thread *thread) binder_stats_deleted(BINDER_STAT_THREAD); binder_proc_dec_tmpref(thread->proc); put_task_struct(thread->task); - kfree(thread); + kmem_cache_free(binder_thread_pool, thread); } static int binder_thread_release(struct binder_proc *proc, @@ -5189,7 +5213,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) thread->looper_need_return = false; wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); if (ret && ret != -ERESTARTSYS) - pr_info("%d:%d ioctl %x %lx returned %d\n", proc->pid, current->pid, cmd, arg, ret); + pr_debug("%d:%d ioctl %x %lx returned %d\n", proc->pid, current->pid, cmd, arg, ret); err_unlocked: trace_binder_ioctl_done(ret); return ret; @@ -5281,7 +5305,7 @@ static int binder_open(struct inode *nodp, struct file *filp) binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__, current->group_leader->pid, current->pid); - proc = kzalloc(sizeof(*proc), GFP_KERNEL); + proc = kmem_cache_zalloc(binder_proc_pool, GFP_KERNEL); if (proc == NULL) return -ENOMEM; spin_lock_init(&proc->inner_lock); @@ -6153,6 +6177,73 @@ static int __init init_binder_device(const char *name) return ret; } +static int __init binder_create_pools(void) +{ + int ret; + + ret = binder_buffer_pool_create(); + if (ret) + return ret; + + binder_node_pool = KMEM_CACHE(binder_node, SLAB_HWCACHE_ALIGN); + if (!binder_node_pool) + goto err_node_pool; + + binder_proc_pool = KMEM_CACHE(binder_proc, SLAB_HWCACHE_ALIGN); + if (!binder_proc_pool) + goto err_proc_pool; + + binder_ref_death_pool = KMEM_CACHE(binder_ref_death, SLAB_HWCACHE_ALIGN); + if (!binder_ref_death_pool) + goto err_ref_death_pool; + + binder_ref_pool = KMEM_CACHE(binder_ref, SLAB_HWCACHE_ALIGN); + if (!binder_ref_pool) + goto err_ref_pool; + + binder_thread_pool = KMEM_CACHE(binder_thread, SLAB_HWCACHE_ALIGN); + if (!binder_thread_pool) + goto err_thread_pool; + + binder_transaction_pool = KMEM_CACHE(binder_transaction, SLAB_HWCACHE_ALIGN); + if (!binder_transaction_pool) + goto err_transaction_pool; + + binder_work_pool = KMEM_CACHE(binder_work, SLAB_HWCACHE_ALIGN); + if (!binder_work_pool) + goto err_work_pool; + + return 0; + +err_work_pool: + kmem_cache_destroy(binder_transaction_pool); +err_transaction_pool: + kmem_cache_destroy(binder_thread_pool); +err_thread_pool: + kmem_cache_destroy(binder_ref_pool); +err_ref_pool: + kmem_cache_destroy(binder_ref_death_pool); +err_ref_death_pool: + kmem_cache_destroy(binder_proc_pool); +err_proc_pool: + kmem_cache_destroy(binder_node_pool); +err_node_pool: + binder_buffer_pool_destroy(); + return -ENOMEM; +} + +static void __init binder_destroy_pools(void) +{ + binder_buffer_pool_destroy(); + kmem_cache_destroy(binder_node_pool); + kmem_cache_destroy(binder_proc_pool); + kmem_cache_destroy(binder_ref_death_pool); + kmem_cache_destroy(binder_ref_pool); + kmem_cache_destroy(binder_thread_pool); + kmem_cache_destroy(binder_transaction_pool); + kmem_cache_destroy(binder_work_pool); +} + static int __init binder_init(void) { int ret; @@ -6160,10 +6251,14 @@ static int __init binder_init(void) struct binder_device *device; struct hlist_node *tmp; - ret = binder_alloc_shrinker_init(); + ret = binder_create_pools(); if (ret) return ret; + ret = binder_alloc_shrinker_init(); + if (ret) + goto err_alloc_shrinker_failed; + atomic_set(&binder_transaction_log.cur, ~0U); atomic_set(&binder_transaction_log_failed.cur, ~0U); @@ -6232,6 +6327,9 @@ static int __init binder_init(void) err_alloc_device_names_failed: debugfs_remove_recursive(binder_debugfs_dir_entry_root); +err_alloc_shrinker_failed: + binder_destroy_pools(); + return ret; } diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 5addcd56afb4..7269bc88b448 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -48,11 +48,33 @@ static uint32_t binder_alloc_debug_mask; module_param_named(debug_mask, binder_alloc_debug_mask, uint, 0644); +#ifdef DEBUG #define binder_alloc_debug(mask, x...) \ do { \ if (binder_alloc_debug_mask & mask) \ pr_info(x); \ } while (0) +#else +static inline void binder_alloc_debug(uint32_t mask, const char *fmt, ...) +{ +} +#endif + +static struct kmem_cache *binder_buffer_pool; + +int binder_buffer_pool_create(void) +{ + binder_buffer_pool = KMEM_CACHE(binder_buffer, SLAB_HWCACHE_ALIGN); + if (!binder_buffer_pool) + return -ENOMEM; + + return 0; +} + +void binder_buffer_pool_destroy(void) +{ + kmem_cache_destroy(binder_buffer_pool); +} static struct binder_buffer *binder_buffer_next(struct binder_buffer *buffer) { @@ -478,7 +500,7 @@ static struct binder_buffer *binder_alloc_new_buf_locked( if (buffer_size != size) { struct binder_buffer *new_buffer; - new_buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); + new_buffer = kmem_cache_zalloc(binder_buffer_pool, GFP_KERNEL); if (!new_buffer) { pr_err("%s: %d failed to alloc new buffer struct\n", __func__, alloc->pid); @@ -613,7 +635,7 @@ static void binder_delete_free_buffer(struct binder_alloc *alloc, buffer_start_page(buffer) + PAGE_SIZE); } list_del(&buffer->entry); - kfree(buffer); + kmem_cache_free(binder_buffer_pool, buffer); } static void binder_free_buf_locked(struct binder_alloc *alloc, @@ -741,7 +763,7 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, } alloc->buffer_size = vma->vm_end - vma->vm_start; - buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); + buffer = kmem_cache_zalloc(binder_buffer_pool, GFP_KERNEL); if (!buffer) { ret = -ENOMEM; failure_string = "alloc buffer struct"; @@ -806,7 +828,7 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc) list_del(&buffer->entry); WARN_ON_ONCE(!list_empty(&alloc->buffers)); - kfree(buffer); + kmem_cache_free(binder_buffer_pool, buffer); } page_count = 0; diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h index fdb220eb55d1..424cc1a61aa7 100644 --- a/drivers/android/binder_alloc.h +++ b/drivers/android/binder_alloc.h @@ -148,6 +148,8 @@ extern void binder_alloc_print_allocated(struct seq_file *m, struct binder_alloc *alloc); void binder_alloc_print_pages(struct seq_file *m, struct binder_alloc *alloc); +extern int binder_buffer_pool_create(void); +extern void binder_buffer_pool_destroy(void); /** * binder_alloc_get_free_async_space() - get free space available for async diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile index 5998c53280f5..a1d4db450cf1 100644 --- a/drivers/base/power/Makefile +++ b/drivers/base/power/Makefile @@ -4,5 +4,6 @@ obj-$(CONFIG_PM_TRACE_RTC) += trace.o obj-$(CONFIG_PM_OPP) += opp/ obj-$(CONFIG_PM_GENERIC_DOMAINS) += domain.o domain_governor.o obj-$(CONFIG_HAVE_CLK) += clock_ops.o +obj-$(CONFIG_BOEFFLA_WL_BLOCKER) += boeffla_wl_blocker.o ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG diff --git a/drivers/base/power/boeffla_wl_blocker.c b/drivers/base/power/boeffla_wl_blocker.c new file mode 100644 index 000000000000..e9c93ce97ce9 --- /dev/null +++ b/drivers/base/power/boeffla_wl_blocker.c @@ -0,0 +1,236 @@ +/* + * Author: andip71, 01.09.2017 + * + * Version 1.1.0 + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +/* + * Change log: + * + * 1.1.0 (01.09.2017) + * - By default, the following wakelocks are blocked in an own list + * qcom_rx_wakelock, wlan, wlan_wow_wl, wlan_extscan_wl, NETLINK + * + * 1.0.1 (29.08.2017) + * - Add killing wakelock when currently active + * + * 1.0.0 (28.08.2017) + * - Initial version + * + */ + +#include +#include +#include +#include +#include +#include +#include "boeffla_wl_blocker.h" + + +/*****************************************/ +// Variables +/*****************************************/ + +char list_wl[LENGTH_LIST_WL] = {0}; +char list_wl_default[LENGTH_LIST_WL_DEFAULT] = {0}; + +extern char list_wl_search[LENGTH_LIST_WL_SEARCH]; +extern bool wl_blocker_active; +extern bool wl_blocker_debug; + + +/*****************************************/ +// internal functions +/*****************************************/ + +static void build_search_string(char *list1, char *list2) +{ + // store wakelock list and search string (with semicolons added at start and end) + sprintf(list_wl_search, ";%s;%s;", list1, list2); + + // set flag if wakelock blocker should be active (for performance reasons) + if (strlen(list_wl_search) > 5) + wl_blocker_active = true; + else + wl_blocker_active = false; +} + + +/*****************************************/ +// sysfs interface functions +/*****************************************/ + +// show list of user configured wakelocks +static ssize_t wakelock_blocker_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + // return list of wakelocks to be blocked + return sprintf(buf, "%s\n", list_wl); +} + + +// store list of user configured wakelocks +static ssize_t wakelock_blocker_store(struct device * dev, struct device_attribute *attr, + const char * buf, size_t n) +{ + int len = n; + + // check if string is too long to be stored + if (len > LENGTH_LIST_WL) + return -EINVAL; + + // store user configured wakelock list and rebuild search string + sscanf(buf, "%s", list_wl); + build_search_string(list_wl_default, list_wl); + + return n; +} + + +// show list of default, predefined wakelocks +static ssize_t wakelock_blocker_default_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + // return list of wakelocks to be blocked + return sprintf(buf, "%s\n", list_wl_default); +} + + +// store list of default, predefined wakelocks +static ssize_t wakelock_blocker_default_store(struct device * dev, struct device_attribute *attr, + const char * buf, size_t n) +{ + int len = n; + + // check if string is too long to be stored + if (len > LENGTH_LIST_WL_DEFAULT) + return -EINVAL; + + // store default, predefined wakelock list and rebuild search string + sscanf(buf, "%s", list_wl_default); + build_search_string(list_wl_default, list_wl); + + return n; +} + + +// show debug information of driver internals +static ssize_t debug_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + // return current debug status + return sprintf(buf, "Debug status: %d\n\nUser list: %s\nDefault list: %s\nSearch list: %s\nActive: %d\n", + wl_blocker_debug, list_wl, list_wl_default, list_wl_search, wl_blocker_active); +} + + +// store debug mode on/off (1/0) +static ssize_t debug_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned int ret = -EINVAL; + unsigned int val; + + // check data and store if valid + ret = sscanf(buf, "%d", &val); + + if (ret != 1) + return -EINVAL; + + if (val == 1) + wl_blocker_debug = true; + else + wl_blocker_debug = false; + + return count; +} + + +static ssize_t version_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + // return version information + return sprintf(buf, "%s\n", BOEFFLA_WL_BLOCKER_VERSION); +} + + + +/*****************************************/ +// Initialize sysfs objects +/*****************************************/ + +// define objects +static DEVICE_ATTR(wakelock_blocker, 0644, wakelock_blocker_show, wakelock_blocker_store); +static DEVICE_ATTR(wakelock_blocker_default, 0644, wakelock_blocker_default_show, wakelock_blocker_default_store); +static DEVICE_ATTR(debug, 0664, debug_show, debug_store); +static DEVICE_ATTR(version, 0664, version_show, NULL); + +// define attributes +static struct attribute *boeffla_wl_blocker_attributes[] = { + &dev_attr_wakelock_blocker.attr, + &dev_attr_wakelock_blocker_default.attr, + &dev_attr_debug.attr, + &dev_attr_version.attr, + NULL +}; + +// define attribute group +static struct attribute_group boeffla_wl_blocker_control_group = { + .attrs = boeffla_wl_blocker_attributes, +}; + +// define control device +static struct miscdevice boeffla_wl_blocker_control_device = { + .minor = MISC_DYNAMIC_MINOR, + .name = "boeffla_wakelock_blocker", +}; + + +/*****************************************/ +// Driver init and exit functions +/*****************************************/ + +static int boeffla_wl_blocker_init(void) +{ + // register boeffla wakelock blocker control device + misc_register(&boeffla_wl_blocker_control_device); + if (sysfs_create_group(&boeffla_wl_blocker_control_device.this_device->kobj, + &boeffla_wl_blocker_control_group) < 0) { + printk("Boeffla WL blocker: failed to create sys fs object.\n"); + return 0; + } + + // initialize default list + sprintf(list_wl_default, "%s", LIST_WL_DEFAULT); + build_search_string(list_wl_default, list_wl); + + // Print debug info + printk("Boeffla WL blocker: driver version %s started\n", BOEFFLA_WL_BLOCKER_VERSION); + + return 0; +} + + +static void boeffla_wl_blocker_exit(void) +{ + // remove boeffla wakelock blocker control device + sysfs_remove_group(&boeffla_wl_blocker_control_device.this_device->kobj, + &boeffla_wl_blocker_control_group); + + // Print debug info + printk("Boeffla WL blocker: driver stopped\n"); +} + + +/* define driver entry points */ +module_init(boeffla_wl_blocker_init); +module_exit(boeffla_wl_blocker_exit); diff --git a/drivers/base/power/boeffla_wl_blocker.h b/drivers/base/power/boeffla_wl_blocker.h new file mode 100644 index 000000000000..1817f8c41623 --- /dev/null +++ b/drivers/base/power/boeffla_wl_blocker.h @@ -0,0 +1,23 @@ +/* + * Author: andip71, 01.09.2017 + * + * Version 1.1.0 + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#define BOEFFLA_WL_BLOCKER_VERSION "1.1.0" + +#define LIST_WL_DEFAULT "" + +#define LENGTH_LIST_WL 255 +#define LENGTH_LIST_WL_DEFAULT 100 +#define LENGTH_LIST_WL_SEARCH LENGTH_LIST_WL + LENGTH_LIST_WL_DEFAULT + 5 diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 23770b8b0aed..7a3f307afbdd 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -370,7 +370,7 @@ static void dpm_show_time(ktime_t starttime, pm_message_t state, char *info) usecs = usecs64; if (usecs == 0) usecs = 1; - pr_info("PM: %s%s%s of devices complete after %ld.%03ld msecs\n", + pr_debug("PM: %s%s%s of devices complete after %ld.%03ld msecs\n", info ?: "", info ? " " : "", pm_verb(state.event), usecs / USEC_PER_MSEC, usecs % USEC_PER_MSEC); } @@ -670,6 +670,10 @@ void dpm_resume_early(pm_message_t state) struct device *dev; ktime_t starttime = ktime_get(); +#ifdef CONFIG_BOEFFLA_WL_BLOCKER + pm_print_active_wakeup_sources(); +#endif + trace_suspend_resume(TPS("dpm_resume_early"), state.event, true); mutex_lock(&dpm_list_mtx); pm_transition = state; diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 3a1411fa1bb7..51cc4671a536 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -23,6 +23,18 @@ #include "power.h" + +#ifdef CONFIG_BOEFFLA_WL_BLOCKER +#include "boeffla_wl_blocker.h" + +char list_wl_search[LENGTH_LIST_WL_SEARCH] = {0}; +bool wl_blocker_active = false; +bool wl_blocker_debug = false; + +static void wakeup_source_deactivate(struct wakeup_source *ws); +#endif + + /* * If set, the suspend/hibernate code will abort transitions to a sleep state * if wakeup events are registered during or immediately before the transition. @@ -555,19 +567,77 @@ static void wakeup_source_activate(struct wakeup_source *ws) trace_wakeup_source_activate(ws->name, cec); } +#ifdef CONFIG_BOEFFLA_WL_BLOCKER +// AP: Function to check if a wakelock is on the wakelock blocker list +static bool check_for_block(struct wakeup_source *ws) +{ + char wakelock_name[52] = {0}; + int length; + + // if debug mode on, print every wakelock requested + if (wl_blocker_debug) + printk("Boeffla WL blocker: %s requested\n", ws->name); + + // if there is no list of wakelocks to be blocked, exit without futher checking + if (!wl_blocker_active) + return false; + + // only if ws structure is valid + if (ws) + { + // wake lock names handled have maximum length=50 and minimum=1 + length = strlen(ws->name); + if ((length > 50) || (length < 1)) + return false; + + // check if wakelock is in wake lock list to be blocked + sprintf(wakelock_name, ";%s;", ws->name); + + if(strstr(list_wl_search, wakelock_name) == NULL) + return false; + + // wake lock is in list, print it if debug mode on + if (wl_blocker_debug) + printk("Boeffla WL blocker: %s blocked\n", ws->name); + + // if it is currently active, deactivate it immediately + log in debug mode + if (ws->active) + { + wakeup_source_deactivate(ws); + + if (wl_blocker_debug) + printk("Boeffla WL blocker: %s killed\n", ws->name); + } + + // finally block it + return true; + } + + // there was no valid ws structure, do not block by default + return false; +} +#endif + /** * wakeup_source_report_event - Report wakeup event using the given source. * @ws: Wakeup source to report the event for. */ static void wakeup_source_report_event(struct wakeup_source *ws) { - ws->event_count++; - /* This is racy, but the counter is approximate anyway. */ - if (events_check_enabled) - ws->wakeup_count++; - - if (!ws->active) - wakeup_source_activate(ws); +#ifdef CONFIG_BOEFFLA_WL_BLOCKER + if (!check_for_block(ws)) // AP: check if wakelock is on wakelock blocker list + { +#endif + ws->event_count++; + /* This is racy, but the counter is approximate anyway. */ + if (events_check_enabled) + ws->wakeup_count++; + + if (!ws->active) + wakeup_source_activate(ws); +#ifdef CONFIG_BOEFFLA_WL_BLOCKER + } +#endif } /** @@ -593,6 +663,7 @@ void __pm_stay_awake(struct wakeup_source *ws) } EXPORT_SYMBOL_GPL(__pm_stay_awake); + /** * pm_stay_awake - Notify the PM core that a wakeup event is being processed. * @dev: Device the wakeup event is related to. @@ -856,7 +927,10 @@ void pm_print_active_wakeup_sources(void) list_for_each_entry_rcu(ws, &wakeup_sources, entry) { if (ws->active) { pr_info("active wakeup source: %s\n", ws->name); - active = 1; +#ifdef CONFIG_BOEFFLA_WL_BLOCKER + if (!check_for_block(ws)) // AP: check if wakelock is on wakelock blocker list +#endif + active = 1; } else if (!active && (!last_activity_ws || ktime_to_ns(ws->last_time) > diff --git a/drivers/block/cryptoloop.c b/drivers/block/cryptoloop.c index adbfd3e2a60f..6a4cb87dc815 100644 --- a/drivers/block/cryptoloop.c +++ b/drivers/block/cryptoloop.c @@ -46,7 +46,7 @@ cryptoloop_init(struct loop_device *lo, const struct loop_info64 *info) char *cipher; char *mode; char *cmsp = cms; /* c-m string pointer */ - struct crypto_skcipher *tfm; + struct crypto_sync_skcipher *tfm; /* encryption breaks for non sector aligned offsets */ @@ -82,13 +82,13 @@ cryptoloop_init(struct loop_device *lo, const struct loop_info64 *info) *cmsp++ = ')'; *cmsp = 0; - tfm = crypto_alloc_skcipher(cms, 0, CRYPTO_ALG_ASYNC); + tfm = crypto_alloc_sync_skcipher(cms, 0, 0); if (IS_ERR(tfm)) return PTR_ERR(tfm); - err = crypto_skcipher_setkey(tfm, info->lo_encrypt_key, - info->lo_encrypt_key_size); - + err = crypto_sync_skcipher_setkey(tfm, info->lo_encrypt_key, + info->lo_encrypt_key_size); + if (err != 0) goto out_free_tfm; @@ -96,7 +96,7 @@ cryptoloop_init(struct loop_device *lo, const struct loop_info64 *info) return 0; out_free_tfm: - crypto_free_skcipher(tfm); + crypto_free_sync_skcipher(tfm); out: return err; @@ -111,8 +111,8 @@ cryptoloop_transfer(struct loop_device *lo, int cmd, struct page *loop_page, unsigned loop_off, int size, sector_t IV) { - struct crypto_skcipher *tfm = lo->key_data; - SKCIPHER_REQUEST_ON_STACK(req, tfm); + struct crypto_sync_skcipher *tfm = lo->key_data; + SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm); struct scatterlist sg_out; struct scatterlist sg_in; @@ -121,7 +121,7 @@ cryptoloop_transfer(struct loop_device *lo, int cmd, unsigned in_offs, out_offs; int err; - skcipher_request_set_tfm(req, tfm); + skcipher_request_set_sync_tfm(req, tfm); skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); @@ -177,9 +177,9 @@ cryptoloop_ioctl(struct loop_device *lo, int cmd, unsigned long arg) static int cryptoloop_release(struct loop_device *lo) { - struct crypto_skcipher *tfm = lo->key_data; + struct crypto_sync_skcipher *tfm = lo->key_data; if (tfm != NULL) { - crypto_free_skcipher(tfm); + crypto_free_sync_skcipher(tfm); lo->key_data = NULL; return 0; } diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index dece26f119d4..a804a4107fbc 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -409,7 +409,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) new_pages = kzalloc(bytes, GFP_NOIO | __GFP_NOWARN); if (!new_pages) { new_pages = __vmalloc(bytes, - GFP_NOIO | __GFP_HIGHMEM | __GFP_ZERO, + GFP_NOIO | __GFP_ZERO, PAGE_KERNEL); if (!new_pages) return NULL; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 0c15ae2067b0..93e2b702c8e2 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1936,6 +1936,8 @@ static int loop_add(struct loop_device **l, int i) */ queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, lo->lo_queue); + err = -ENOMEM; disk = lo->lo_disk = alloc_disk(1 << part_shift); if (!disk) diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index c084a7f9763d..cf8ecacc1a3f 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -20,6 +20,7 @@ static const char * const backends[] = { "lzo", + "lzo-rle", #if IS_ENABLED(CONFIG_CRYPTO_LZ4) "lz4", #endif diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c index 38ffb281df97..004a3ce8ba72 100644 --- a/drivers/char/agp/backend.c +++ b/drivers/char/agp/backend.c @@ -115,9 +115,9 @@ static int agp_find_max(void) long memory, index, result; #if PAGE_SHIFT < 20 - memory = totalram_pages >> (20 - PAGE_SHIFT); + memory = totalram_pages() >> (20 - PAGE_SHIFT); #else - memory = totalram_pages << (PAGE_SHIFT - 20); + memory = totalram_pages() << (PAGE_SHIFT - 20); #endif index = 1; diff --git a/drivers/char/diag/diag_debugfs.c b/drivers/char/diag/diag_debugfs.c index a7f29e63e214..e9169d224123 100644 --- a/drivers/char/diag/diag_debugfs.c +++ b/drivers/char/diag/diag_debugfs.c @@ -824,7 +824,9 @@ static ssize_t diag_dbgfs_write_debug(struct file *fp, const char __user *buf, if (value < 0) return -EINVAL; +#ifdef DIAG_DEBUG diag_debug_mask = (uint16_t)value; +#endif return count; } diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h index e1c2193f2ed3..129bf59a2728 100644 --- a/drivers/char/tpm/tpm_tis_core.h +++ b/drivers/char/tpm/tpm_tis_core.h @@ -62,10 +62,10 @@ enum tis_defaults { /* Some timeout values are needed before it is known whether the chip is * TPM 1.0 or TPM 2.0. */ -#define TIS_TIMEOUT_A_MAX max(TIS_SHORT_TIMEOUT, TPM2_TIMEOUT_A) -#define TIS_TIMEOUT_B_MAX max(TIS_LONG_TIMEOUT, TPM2_TIMEOUT_B) -#define TIS_TIMEOUT_C_MAX max(TIS_SHORT_TIMEOUT, TPM2_TIMEOUT_C) -#define TIS_TIMEOUT_D_MAX max(TIS_SHORT_TIMEOUT, TPM2_TIMEOUT_D) +#define TIS_TIMEOUT_A_MAX max_t(int, TIS_SHORT_TIMEOUT, TPM2_TIMEOUT_A) +#define TIS_TIMEOUT_B_MAX max_t(int, TIS_LONG_TIMEOUT, TPM2_TIMEOUT_B) +#define TIS_TIMEOUT_C_MAX max_t(int, TIS_SHORT_TIMEOUT, TPM2_TIMEOUT_C) +#define TIS_TIMEOUT_D_MAX max_t(int, TIS_SHORT_TIMEOUT, TPM2_TIMEOUT_D) #define TPM_ACCESS(l) (0x0000 | ((l) << 12)) #define TPM_INT_ENABLE(l) (0x0008 | ((l) << 12)) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 6921f89d2a49..e04a67bb7c1f 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -2632,10 +2632,6 @@ DEFINE_SIMPLE_ATTRIBUTE(clock_enable_fops, clock_debug_enable_get, do { \ if (m) \ seq_printf(m, fmt, ##__VA_ARGS__); \ - else if (c) \ - pr_cont(fmt, ##__VA_ARGS__); \ - else \ - pr_info(fmt, ##__VA_ARGS__); \ } while (0) /* diff --git a/drivers/clk/qcom/clk-cpu-osm.c b/drivers/clk/qcom/clk-cpu-osm.c index f53d1eee6c6e..de8c6efe28fc 100644 --- a/drivers/clk/qcom/clk-cpu-osm.c +++ b/drivers/clk/qcom/clk-cpu-osm.c @@ -97,6 +97,8 @@ struct clk_osm { u32 prev_cycle_counter; u32 max_core_count; u32 mx_turbo_freq; + ktime_t last_update; + struct mutex update_lock; }; static bool is_sdm845v1; @@ -209,7 +211,7 @@ static int clk_cpu_set_rate(struct clk_hw *hw, unsigned long rate, struct clk_osm *c = to_clk_osm(hw); struct clk_hw *p_hw = clk_hw_get_parent(hw); struct clk_osm *parent = to_clk_osm(p_hw); - int core_num, index = 0; + int core_num, index; if (!c || !parent) return -EINVAL; @@ -232,38 +234,6 @@ static int clk_cpu_set_rate(struct clk_hw *hw, unsigned long rate, return 0; } -static int clk_pwrcl_set_rate(struct clk_hw *hw, unsigned long rate, - unsigned long parent_rate) -{ - struct clk_hw *p_hw = clk_hw_get_parent(hw); - struct clk_osm *parent = to_clk_osm(p_hw); - int ret, index = 0, count = 40; - u32 curr_lval; - - ret = clk_cpu_set_rate(hw, rate, parent_rate); - if (ret) - return ret; - - index = clk_osm_search_table(parent->osm_table, - parent->num_entries, rate); - if (index < 0) - return -EINVAL; - - /* - * Poll the CURRENT_FREQUENCY value of the PSTATE_STATUS register to - * check if the L_VAL has been updated. - */ - while (count-- > 0) { - curr_lval = CURRENT_LVAL(clk_osm_read_reg(parent, - PSTATE_STATUS)); - if (curr_lval <= parent->osm_table[index].lval) - return 0; - udelay(50); - } - pr_err("cannot set %s to %lu\n", clk_hw_get_name(hw), rate); - return -ETIMEDOUT; -} - static unsigned long clk_cpu_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { @@ -341,7 +311,7 @@ static int l3_clk_set_rate(struct clk_hw *hw, unsigned long rate, cpuclk->rate = rate; return 0; } - udelay(50); + usleep_range(50, 100); } pr_err("cannot set %s to %lu\n", clk_hw_get_name(hw), rate); return -ETIMEDOUT; @@ -378,7 +348,7 @@ static const struct clk_ops clk_ops_l3_osm = { }; static const struct clk_ops clk_ops_pwrcl_core = { - .set_rate = clk_pwrcl_set_rate, + .set_rate = clk_cpu_set_rate, .round_rate = clk_cpu_round_rate, .recalc_rate = clk_cpu_recalc_rate, .debug_init = clk_debug_measure_add, @@ -404,7 +374,7 @@ static struct clk_init_data osm_clks_init[] = { .name = "l3_clk", .parent_names = (const char *[]){ "bi_tcxo_ao" }, .num_parents = 1, - .flags = CLK_CHILD_NO_RATE_PROP, + .flags = CLK_CHILD_NO_RATE_PROP | CLK_GET_RATE_NOCACHE, .ops = &clk_ops_l3_osm, .vdd_class = &vdd_l3_mx_ao, }, @@ -412,6 +382,7 @@ static struct clk_init_data osm_clks_init[] = { .name = "pwrcl_clk", .parent_names = (const char *[]){ "bi_tcxo_ao" }, .num_parents = 1, + .flags = CLK_GET_RATE_NOCACHE, .ops = &clk_ops_cpu_osm, .vdd_class = &vdd_pwrcl_mx_ao, }, @@ -419,6 +390,7 @@ static struct clk_init_data osm_clks_init[] = { .name = "perfcl_clk", .parent_names = (const char *[]){ "bi_tcxo_ao" }, .num_parents = 1, + .flags = CLK_GET_RATE_NOCACHE, .ops = &clk_ops_cpu_osm, }, }; @@ -429,10 +401,10 @@ static struct clk_osm l3_clk = { .hw.init = &osm_clks_init[0], }; -static DEFINE_CLK_VOTER(l3_cluster0_vote_clk, l3_clk, 0); -static DEFINE_CLK_VOTER(l3_cluster1_vote_clk, l3_clk, 0); -static DEFINE_CLK_VOTER(l3_misc_vote_clk, l3_clk, 0); -static DEFINE_CLK_VOTER(l3_gpu_vote_clk, l3_clk, 0); +static DEFINE_CLK_VOTER_NOCACHE(l3_cluster0_vote_clk, l3_clk, 0); +static DEFINE_CLK_VOTER_NOCACHE(l3_cluster1_vote_clk, l3_clk, 0); +static DEFINE_CLK_VOTER_NOCACHE(l3_misc_vote_clk, l3_clk, 0); +static DEFINE_CLK_VOTER_NOCACHE(l3_gpu_vote_clk, l3_clk, 0); static struct clk_osm pwrcl_clk = { .cluster_num = 1, @@ -448,7 +420,7 @@ static struct clk_osm cpu0_pwrcl_clk = { .name = "cpu0_pwrcl_clk", .parent_names = (const char *[]){ "pwrcl_clk" }, .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, + .flags = CLK_SET_RATE_PARENT | CLK_GET_RATE_NOCACHE, .ops = &clk_ops_pwrcl_core, }, }; @@ -461,7 +433,7 @@ static struct clk_osm cpu1_pwrcl_clk = { .name = "cpu1_pwrcl_clk", .parent_names = (const char *[]){ "pwrcl_clk" }, .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, + .flags = CLK_SET_RATE_PARENT | CLK_GET_RATE_NOCACHE, .ops = &clk_ops_pwrcl_core, }, }; @@ -474,7 +446,7 @@ static struct clk_osm cpu2_pwrcl_clk = { .name = "cpu2_pwrcl_clk", .parent_names = (const char *[]){ "pwrcl_clk" }, .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, + .flags = CLK_SET_RATE_PARENT | CLK_GET_RATE_NOCACHE, .ops = &clk_ops_pwrcl_core, }, }; @@ -487,7 +459,7 @@ static struct clk_osm cpu3_pwrcl_clk = { .name = "cpu3_pwrcl_clk", .parent_names = (const char *[]){ "pwrcl_clk" }, .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, + .flags = CLK_SET_RATE_PARENT | CLK_GET_RATE_NOCACHE, .ops = &clk_ops_pwrcl_core, }, }; @@ -500,7 +472,7 @@ static struct clk_osm cpu4_pwrcl_clk = { .name = "cpu4_pwrcl_clk", .parent_names = (const char *[]){ "pwrcl_clk" }, .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, + .flags = CLK_SET_RATE_PARENT | CLK_GET_RATE_NOCACHE, .ops = &clk_ops_pwrcl_core, }, }; @@ -513,7 +485,7 @@ static struct clk_osm cpu5_pwrcl_clk = { .name = "cpu5_pwrcl_clk", .parent_names = (const char *[]){ "pwrcl_clk" }, .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, + .flags = CLK_SET_RATE_PARENT | CLK_GET_RATE_NOCACHE, .ops = &clk_ops_pwrcl_core, }, }; @@ -533,7 +505,7 @@ static struct clk_osm cpu4_perfcl_clk = { .name = "cpu4_perfcl_clk", .parent_names = (const char *[]){ "perfcl_clk" }, .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, + .flags = CLK_SET_RATE_PARENT | CLK_GET_RATE_NOCACHE, .ops = &clk_ops_perfcl_core, }, }; @@ -546,7 +518,7 @@ static struct clk_osm cpu5_perfcl_clk = { .name = "cpu5_perfcl_clk", .parent_names = (const char *[]){ "perfcl_clk" }, .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, + .flags = CLK_SET_RATE_PARENT | CLK_GET_RATE_NOCACHE, .ops = &clk_ops_perfcl_core, }, }; @@ -559,7 +531,7 @@ static struct clk_osm cpu6_perfcl_clk = { .name = "cpu6_perfcl_clk", .parent_names = (const char *[]){ "perfcl_clk" }, .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, + .flags = CLK_SET_RATE_PARENT | CLK_GET_RATE_NOCACHE, .ops = &clk_ops_perfcl_core, }, }; @@ -572,7 +544,7 @@ static struct clk_osm cpu7_perfcl_clk = { .name = "cpu7_perfcl_clk", .parent_names = (const char *[]){ "perfcl_clk" }, .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, + .flags = CLK_SET_RATE_PARENT | CLK_GET_RATE_NOCACHE, .ops = &clk_ops_perfcl_core, }, }; @@ -691,7 +663,10 @@ osm_set_index(struct clk_osm *c, unsigned int index) { struct clk_hw *p_hw = clk_hw_get_parent(&c->hw); struct clk_osm *parent = to_clk_osm(p_hw); - unsigned long rate = 0; + unsigned int current_index; + unsigned long rate; + int core_num; + s64 delta_us; if (index >= OSM_TABLE_SIZE) { pr_err("Passing an index (%u) that's greater than max (%d)\n", @@ -703,7 +678,25 @@ osm_set_index(struct clk_osm *c, unsigned int index) if (!rate) return; + core_num = parent->per_core_dcvs ? c->core_num : 0; + + /* Skip the update if the current rate is the same as the new one */ + mutex_lock(&parent->update_lock); + current_index = clk_osm_read_reg(parent, + DCVS_PERF_STATE_DESIRED_REG(core_num, + is_sdm845v1)); + if (current_index == index) + goto unlock; + + /* The old rate needs time to settle before it can be changed again */ + delta_us = ktime_us_delta(ktime_get_boottime(), parent->last_update); + if (delta_us < 10000) + usleep_range(10000 - delta_us, 11000 - delta_us); + parent->last_update = ktime_get_boottime(); + clk_set_rate(c->hw.clk, clk_round_rate(c->hw.clk, rate)); +unlock: + mutex_unlock(&parent->update_lock); } static int @@ -719,6 +712,7 @@ static unsigned int osm_cpufreq_get(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); struct clk_osm *c; + u32 curr_lval; u32 index; if (!policy) @@ -727,7 +721,12 @@ static unsigned int osm_cpufreq_get(unsigned int cpu) c = policy->driver_data; index = clk_osm_read_reg(c, DCVS_PERF_STATE_DESIRED_REG(c->core_num, is_sdm845v1)); - return policy->freq_table[index].frequency; + curr_lval = CURRENT_LVAL(clk_osm_read_reg(c, PSTATE_STATUS)); + + if (policy->freq_table[index].frequency == OSM_INIT_RATE / 1000) + return OSM_INIT_RATE / 1000; + else + return XO_RATE * curr_lval / 1000; } static int osm_cpufreq_cpu_init(struct cpufreq_policy *policy) @@ -1317,6 +1316,9 @@ static int clk_cpu_osm_driver_probe(struct platform_device *pdev) spin_lock_init(&l3_clk.lock); spin_lock_init(&pwrcl_clk.lock); spin_lock_init(&perfcl_clk.lock); + mutex_init(&l3_clk.update_lock); + mutex_init(&pwrcl_clk.update_lock); + mutex_init(&perfcl_clk.update_lock); /* Register OSM l3, pwr and perf clocks with Clock Framework */ for (i = 0; i < num_clks; i++) { diff --git a/drivers/clk/qcom/clk-voter.h b/drivers/clk/qcom/clk-voter.h index 0ae2b1aa2444..1de689d9175d 100644 --- a/drivers/clk/qcom/clk-voter.h +++ b/drivers/clk/qcom/clk-voter.h @@ -29,7 +29,7 @@ extern const struct clk_ops clk_ops_voter; #define to_clk_voter(_hw) container_of(_hw, struct clk_voter, hw) -#define __DEFINE_CLK_VOTER(clk_name, _parent_name, _default_rate, _is_branch) \ +#define __DEFINE_CLK_VOTER(clk_name, _parent_name, _default_rate, _is_branch, _flags) \ struct clk_voter clk_name = { \ .is_branch = (_is_branch), \ .rate = _default_rate, \ @@ -38,13 +38,17 @@ extern const struct clk_ops clk_ops_voter; .name = #clk_name, \ .parent_names = (const char *[]){ #_parent_name }, \ .num_parents = 1, \ + .flags = _flags, \ }, \ } #define DEFINE_CLK_VOTER(clk_name, _parent_name, _default_rate) \ - __DEFINE_CLK_VOTER(clk_name, _parent_name, _default_rate, 0) + __DEFINE_CLK_VOTER(clk_name, _parent_name, _default_rate, 0, 0) + +#define DEFINE_CLK_VOTER_NOCACHE(clk_name, _parent_name, _default_rate) \ + __DEFINE_CLK_VOTER(clk_name, _parent_name, _default_rate, 0, CLK_GET_RATE_NOCACHE) #define DEFINE_CLK_BRANCH_VOTER(clk_name, _parent_name) \ - __DEFINE_CLK_VOTER(clk_name, _parent_name, 1000, 1) + __DEFINE_CLK_VOTER(clk_name, _parent_name, 1000, 1, 0) #endif diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index d865723e9ed8..a6a9e3da5588 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -85,6 +85,7 @@ static bool arch_timer_c3stop; static bool arch_timer_mem_use_virtual; static bool arch_counter_suspend_stop; +static cpumask_t evtstrm_available = CPU_MASK_NONE; static bool evtstrm_enable = IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM); static int __init early_evtstrm_cfg(char *buf) @@ -544,6 +545,7 @@ static void arch_timer_evtstrm_enable(int divider) #ifdef CONFIG_COMPAT compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM; #endif + cpumask_set_cpu(smp_processor_id(), &evtstrm_available); } static void arch_timer_configure_evtstream(void) @@ -693,6 +695,16 @@ void arch_timer_mem_get_cval(u32 *lo, u32 *hi) } } +bool arch_timer_evtstrm_available(void) +{ + /* + * We might get called from a preemptible context. This is fine + * because availability of the event stream should be always the same + * for a preemptible context and context where we might resume a task. + */ + return cpumask_test_cpu(raw_smp_processor_id(), &evtstrm_available); +} + static u64 arch_counter_get_cntvct_mem(void) { u32 vct_lo, vct_hi, tmp_hi; @@ -798,19 +810,27 @@ static int arch_timer_dying_cpu(unsigned int cpu) { struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt); + cpumask_clear_cpu(smp_processor_id(), &evtstrm_available); + arch_timer_stop(clk); return 0; } #ifdef CONFIG_CPU_PM -static unsigned int saved_cntkctl; +static DEFINE_PER_CPU(unsigned long, saved_cntkctl); static int arch_timer_cpu_pm_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - if (action == CPU_PM_ENTER) - saved_cntkctl = arch_timer_get_cntkctl(); - else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) - arch_timer_set_cntkctl(saved_cntkctl); + if (action == CPU_PM_ENTER) { + __this_cpu_write(saved_cntkctl, arch_timer_get_cntkctl()); + + cpumask_clear_cpu(smp_processor_id(), &evtstrm_available); + } else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) { + arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl)); + + if (elf_hwcap & HWCAP_EVTSTRM) + cpumask_set_cpu(smp_processor_id(), &evtstrm_available); + } return NOTIFY_OK; } @@ -887,7 +907,6 @@ static int __init arch_timer_register(void) if (err) goto out_unreg_notify; - /* Register and immediately configure the timer on the boot CPU */ err = cpuhp_setup_state(CPUHP_AP_ARM_ARCH_TIMER_STARTING, "AP_ARM_ARCH_TIMER_STARTING", diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 1678cff69c9a..609b32853627 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -252,7 +252,7 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) struct cpufreq_policy *policy = NULL; unsigned long flags; - if (WARN_ON(cpu >= nr_cpu_ids)) + if (cpu >= nr_cpu_ids) return NULL; /* get the cpufreq driver */ @@ -1659,7 +1659,10 @@ unsigned int cpufreq_get(unsigned int cpu) if (policy) { down_read(&policy->rwsem); - ret_freq = __cpufreq_get(policy); + + if (!policy_is_inactive(policy)) + ret_freq = __cpufreq_get(policy); + up_read(&policy->rwsem); cpufreq_cpu_put(policy); @@ -2310,7 +2313,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, * because new_policy is a copy of policy with one field updated. */ if (new_policy->min > new_policy->max) - return -EINVAL; + new_policy->min = new_policy->max; /* verify the cpu speed can be set within this limit */ ret = cpufreq_driver->verify(new_policy); @@ -2410,6 +2413,11 @@ int cpufreq_update_policy(unsigned int cpu) down_write(&policy->rwsem); + if (policy_is_inactive(policy)) { + ret = -ENODEV; + goto unlock; + } + pr_debug("updating policy for CPU %u\n", cpu); memcpy(&new_policy, policy, sizeof(*policy)); new_policy.min = policy->user_policy.min; diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 0fe251865ac6..eaab72d1ace5 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -316,17 +316,7 @@ struct dbs_governor cs_governor = { .start = cs_start, }; -#define CPU_FREQ_GOV_CONSERVATIVE (&cs_governor.gov) - -static int __init cpufreq_gov_dbs_init(void) -{ - return cpufreq_register_governor(CPU_FREQ_GOV_CONSERVATIVE); -} - -static void __exit cpufreq_gov_dbs_exit(void) -{ - cpufreq_unregister_governor(CPU_FREQ_GOV_CONSERVATIVE); -} +#define CPU_FREQ_GOV_CONSERVATIVE (cs_governor.gov) MODULE_AUTHOR("Alexander Clouter "); MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for " @@ -337,11 +327,9 @@ MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE struct cpufreq_governor *cpufreq_default_governor(void) { - return CPU_FREQ_GOV_CONSERVATIVE; + return &CPU_FREQ_GOV_CONSERVATIVE; } - -fs_initcall(cpufreq_gov_dbs_init); -#else -module_init(cpufreq_gov_dbs_init); #endif -module_exit(cpufreq_gov_dbs_exit); + +cpufreq_governor_init(CPU_FREQ_GOV_CONSERVATIVE); +cpufreq_governor_exit(CPU_FREQ_GOV_CONSERVATIVE); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 3a1f49f5f4c6..c40060a3c8a1 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -423,7 +423,7 @@ static struct dbs_governor od_dbs_gov = { .start = od_start, }; -#define CPU_FREQ_GOV_ONDEMAND (&od_dbs_gov.gov) +#define CPU_FREQ_GOV_ONDEMAND (od_dbs_gov.gov) static void od_set_powersave_bias(unsigned int powersave_bias) { @@ -444,7 +444,7 @@ static void od_set_powersave_bias(unsigned int powersave_bias) continue; policy = cpufreq_cpu_get_raw(cpu); - if (!policy || policy->governor != CPU_FREQ_GOV_ONDEMAND) + if (!policy || policy->governor != &CPU_FREQ_GOV_ONDEMAND) continue; policy_dbs = policy->governor_data; @@ -476,16 +476,6 @@ void od_unregister_powersave_bias_handler(void) } EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler); -static int __init cpufreq_gov_dbs_init(void) -{ - return cpufreq_register_governor(CPU_FREQ_GOV_ONDEMAND); -} - -static void __exit cpufreq_gov_dbs_exit(void) -{ - cpufreq_unregister_governor(CPU_FREQ_GOV_ONDEMAND); -} - MODULE_AUTHOR("Venkatesh Pallipadi "); MODULE_AUTHOR("Alexey Starikovskiy "); MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " @@ -495,11 +485,9 @@ MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND struct cpufreq_governor *cpufreq_default_governor(void) { - return CPU_FREQ_GOV_ONDEMAND; + return &CPU_FREQ_GOV_ONDEMAND; } - -fs_initcall(cpufreq_gov_dbs_init); -#else -module_init(cpufreq_gov_dbs_init); #endif -module_exit(cpufreq_gov_dbs_exit); + +cpufreq_governor_init(CPU_FREQ_GOV_ONDEMAND); +cpufreq_governor_exit(CPU_FREQ_GOV_ONDEMAND); diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c index 399428e40e89..6e94aff1b65a 100644 --- a/drivers/cpufreq/cpufreq_performance.c +++ b/drivers/cpufreq/cpufreq_performance.c @@ -31,16 +31,6 @@ struct cpufreq_governor cpufreq_gov_performance = { .limits = cpufreq_gov_performance_limits, }; -static int __init cpufreq_gov_performance_init(void) -{ - return cpufreq_register_governor(&cpufreq_gov_performance); -} - -static void __exit cpufreq_gov_performance_exit(void) -{ - cpufreq_unregister_governor(&cpufreq_gov_performance); -} - #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE struct cpufreq_governor *cpufreq_default_governor(void) { @@ -58,5 +48,5 @@ MODULE_AUTHOR("Dominik Brodowski "); MODULE_DESCRIPTION("CPUfreq policy governor 'performance'"); MODULE_LICENSE("GPL"); -fs_initcall(cpufreq_gov_performance_init); -module_exit(cpufreq_gov_performance_exit); +cpufreq_governor_init(cpufreq_gov_performance); +cpufreq_governor_exit(cpufreq_gov_performance); diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c index 5daa500fb0a9..686bd66251e8 100644 --- a/drivers/cpufreq/cpufreq_powersave.c +++ b/drivers/cpufreq/cpufreq_powersave.c @@ -31,16 +31,6 @@ struct cpufreq_governor cpufreq_gov_powersave = { .owner = THIS_MODULE, }; -static int __init cpufreq_gov_powersave_init(void) -{ - return cpufreq_register_governor(&cpufreq_gov_powersave); -} - -static void __exit cpufreq_gov_powersave_exit(void) -{ - cpufreq_unregister_governor(&cpufreq_gov_powersave); -} - MODULE_AUTHOR("Dominik Brodowski "); MODULE_DESCRIPTION("CPUfreq policy governor 'powersave'"); MODULE_LICENSE("GPL"); @@ -50,9 +40,7 @@ struct cpufreq_governor *cpufreq_default_governor(void) { return &cpufreq_gov_powersave; } - -fs_initcall(cpufreq_gov_powersave_init); -#else -module_init(cpufreq_gov_powersave_init); #endif -module_exit(cpufreq_gov_powersave_exit); + +cpufreq_governor_init(cpufreq_gov_powersave); +cpufreq_governor_exit(cpufreq_gov_powersave); diff --git a/drivers/cpufreq/cpufreq_times.c b/drivers/cpufreq/cpufreq_times.c index 190f7603ee53..b52bf4038e13 100644 --- a/drivers/cpufreq/cpufreq_times.c +++ b/drivers/cpufreq/cpufreq_times.c @@ -30,12 +30,14 @@ static DECLARE_HASHTABLE(uid_hash_table, UID_HASH_BITS); -static DEFINE_SPINLOCK(task_time_in_state_lock); /* task->time_in_state */ -static DEFINE_SPINLOCK(task_concurrent_active_time_lock); - /* task->concurrent_active_time */ -static DEFINE_SPINLOCK(task_concurrent_policy_time_lock); - /* task->concurrent_policy_time */ -static DEFINE_SPINLOCK(uid_lock); /* uid_hash_table */ +/* task->time_in_state */ +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(task_time_in_state_lock); +/* task->concurrent_active_time */ +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(task_concurrent_active_time_lock); +/* task->concurrent_policy_time */ +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(task_concurrent_policy_time_lock); +/* uid_hash_table */ +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(uid_lock); struct concurrent_times { atomic64_t active[NR_CPUS]; diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index 765166d881bb..63d1ba9a944d 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -133,16 +133,6 @@ struct cpufreq_governor cpufreq_gov_userspace = { .owner = THIS_MODULE, }; -static int __init cpufreq_gov_userspace_init(void) -{ - return cpufreq_register_governor(&cpufreq_gov_userspace); -} - -static void __exit cpufreq_gov_userspace_exit(void) -{ - cpufreq_unregister_governor(&cpufreq_gov_userspace); -} - MODULE_AUTHOR("Dominik Brodowski , " "Russell King "); MODULE_DESCRIPTION("CPUfreq policy governor 'userspace'"); @@ -153,9 +143,7 @@ struct cpufreq_governor *cpufreq_default_governor(void) { return &cpufreq_gov_userspace; } - -fs_initcall(cpufreq_gov_userspace_init); -#else -module_init(cpufreq_gov_userspace_init); #endif -module_exit(cpufreq_gov_userspace_exit); + +cpufreq_governor_init(cpufreq_gov_userspace); +cpufreq_governor_exit(cpufreq_gov_userspace); diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index 7e48eb5bf0a7..cdf3e45fa942 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -3,8 +3,6 @@ menu "CPU Idle" config CPU_IDLE bool "CPU idle PM support" default y if ACPI || PPC_PSERIES - select CPU_IDLE_GOV_LADDER if (!NO_HZ && !NO_HZ_IDLE) - select CPU_IDLE_GOV_MENU if (NO_HZ || NO_HZ_IDLE) help CPU idle is a generic framework for supporting software-controlled idle processor power management. It includes modular cross-platform diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 3eddf4375929..7c6f34c59d11 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -97,7 +97,17 @@ static int find_deepest_state(struct cpuidle_driver *drv, return ret; } -#ifdef CONFIG_SUSPEND +/* Set the current cpu to use the deepest idle state, override governors */ +void cpuidle_use_deepest_state(bool enable) +{ + struct cpuidle_device *dev; + + preempt_disable(); + dev = cpuidle_get_device(); + dev->use_deepest_state = enable; + preempt_enable(); +} + /** * cpuidle_find_deepest_state - Find the deepest available idle state. * @drv: cpuidle driver for the given CPU. @@ -109,6 +119,7 @@ int cpuidle_find_deepest_state(struct cpuidle_driver *drv, return find_deepest_state(drv, dev, UINT_MAX, 0, false); } +#ifdef CONFIG_SUSPEND static void enter_freeze_proper(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) { @@ -380,9 +391,11 @@ int cpuidle_enable_device(struct cpuidle_device *dev) if (ret) return ret; - if (cpuidle_curr_governor->enable && - (ret = cpuidle_curr_governor->enable(drv, dev))) - goto fail_sysfs; + if (cpuidle_curr_governor->enable) { + ret = cpuidle_curr_governor->enable(drv, dev); + if (ret) + goto fail_sysfs; + } smp_wmb(); diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c index fb9f511cca23..4e78263e34a4 100644 --- a/drivers/cpuidle/governor.c +++ b/drivers/cpuidle/governor.c @@ -9,7 +9,6 @@ */ #include -#include #include #include "cpuidle.h" @@ -53,14 +52,11 @@ int cpuidle_switch_governor(struct cpuidle_governor *gov) if (cpuidle_curr_governor) { list_for_each_entry(dev, &cpuidle_detected_devices, device_list) cpuidle_disable_device(dev); - module_put(cpuidle_curr_governor->owner); } cpuidle_curr_governor = gov; if (gov) { - if (!try_module_get(cpuidle_curr_governor->owner)) - return -EINVAL; list_for_each_entry(dev, &cpuidle_detected_devices, device_list) cpuidle_enable_device(dev); cpuidle_install_idle_handler(); diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index 63bd5a403e22..fe8f08948fcb 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -177,7 +176,6 @@ static struct cpuidle_governor ladder_governor = { .enable = ladder_enable_device, .select = ladder_select_state, .reflect = ladder_reflect, - .owner = THIS_MODULE, }; /** diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 72bc6e63584c..11faed2f23e8 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -20,7 +20,6 @@ #include #include #include -#include /* * Please note when changing the tuning values: @@ -183,7 +182,7 @@ static inline int performance_multiplier(unsigned long nr_iowaiters, unsigned lo /* mult += 2 * get_loadavg(); */ /* for IO wait tasks (per cpu!) we add 5x each */ - mult += 10 * nr_iowaiters; + mult += 2 * nr_iowaiters; return mult; } @@ -486,7 +485,6 @@ static struct cpuidle_governor menu_governor = { .enable = menu_enable_device, .select = menu_select, .reflect = menu_reflect, - .owner = THIS_MODULE, }; /** diff --git a/drivers/cpuidle/lpm-levels-legacy.c b/drivers/cpuidle/lpm-levels-legacy.c index 006a5ef0dfb6..4e9487675952 100644 --- a/drivers/cpuidle/lpm-levels-legacy.c +++ b/drivers/cpuidle/lpm-levels-legacy.c @@ -100,7 +100,7 @@ static const int num_dbg_elements = 0x100; static void cluster_unprepare(struct lpm_cluster *cluster, const struct cpumask *cpu, int child_idx, bool from_idle, - int64_t time); + int64_t time, bool success); static void cluster_prepare(struct lpm_cluster *cluster, const struct cpumask *cpu, int child_idx, bool from_idle, int64_t time); @@ -643,9 +643,11 @@ static int cluster_configure(struct lpm_cluster *cluster, int idx, cpumask_copy(&cpumask, cpumask_of(cpu)); nextcpu = level->disable_dynamic_routing ? NULL : &cpumask; - if (sys_pm_ops && sys_pm_ops->enter) - if ((sys_pm_ops->enter(nextcpu))) - return -EBUSY; + if (sys_pm_ops && sys_pm_ops->enter) { + ret = sys_pm_ops->enter(nextcpu); + if (ret) + goto failed_set_mode; + } if (cluster->no_saw_devices && !use_psci) msm_spm_set_rpm_hs(true); @@ -734,7 +736,7 @@ static void cluster_prepare(struct lpm_cluster *cluster, static void cluster_unprepare(struct lpm_cluster *cluster, const struct cpumask *cpu, int child_idx, bool from_idle, - int64_t end_time) + int64_t end_time, bool success) { struct lpm_cluster_level *level; bool first_cpu; @@ -767,12 +769,12 @@ static void cluster_unprepare(struct lpm_cluster *cluster, if (cluster->stats->sleep_time) cluster->stats->sleep_time = end_time - cluster->stats->sleep_time; - lpm_stats_cluster_exit(cluster->stats, cluster->last_level, true); + lpm_stats_cluster_exit(cluster->stats, cluster->last_level, success); level = &cluster->levels[cluster->last_level]; if (level->notify_rpm) { if (sys_pm_ops && sys_pm_ops->exit) - sys_pm_ops->exit(); + sys_pm_ops->exit(success); /* If RPM bumps up CX to turbo, unvote CX turbo vote * during exit of rpm assisted power collapse to @@ -805,7 +807,7 @@ static void cluster_unprepare(struct lpm_cluster *cluster, cluster_notify(cluster, &cluster->levels[last_level], false); cluster_unprepare(cluster->parent, &cluster->child_cpus, - last_level, from_idle, end_time); + last_level, from_idle, end_time, success); unlock_return: spin_unlock(&cluster->sync_lock); } @@ -1037,7 +1039,7 @@ static int lpm_cpuidle_enter(struct cpuidle_device *dev, end_time = ktime_to_ns(ktime_get()); lpm_stats_cpu_exit(idx, end_time, success); - cluster_unprepare(cluster, cpumask, idx, true, end_time); + cluster_unprepare(cluster, cpumask, idx, true, end_time, success); cpu_unprepare(cluster, idx, true); trace_cpu_idle_exit(idx, success); @@ -1246,6 +1248,7 @@ static int lpm_suspend_enter(suspend_state_t state) struct lpm_cpu *lpm_cpu = cluster->cpu; const struct cpumask *cpumask = get_cpu_mask(cpu); int idx; + bool success = true; for (idx = lpm_cpu->nlevels - 1; idx >= 0; idx--) { @@ -1273,13 +1276,13 @@ static int lpm_suspend_enter(suspend_state_t state) if (!use_psci) msm_cpu_pm_enter_sleep(cluster->cpu->levels[idx].mode, false); else - psci_enter_sleep(cluster, idx, true); + success = psci_enter_sleep(cluster, idx, true); if (idx > 0) update_debug_pc_event(CPU_EXIT, idx, true, 0xdeaffeed, false); - cluster_unprepare(cluster, cpumask, idx, false, 0); + cluster_unprepare(cluster, cpumask, idx, false, 0, success); cpu_unprepare(cluster, idx, false); return 0; } @@ -1302,7 +1305,8 @@ static int lpm_starting_cpu(unsigned int cpu) update_debug_pc_event(CPU_HP_STARTING, cpu, cluster->num_children_in_sync.bits[0], cluster->child_cpus.bits[0], false); - cluster_unprepare(cluster, get_cpu_mask(cpu), NR_LPM_LEVELS, false, 0); + cluster_unprepare(cluster, get_cpu_mask(cpu), NR_LPM_LEVELS, + false, 0, true); return 0; } diff --git a/drivers/cpuidle/lpm-levels.c b/drivers/cpuidle/lpm-levels.c index eb36dcf0d840..03b5b0ec9309 100644 --- a/drivers/cpuidle/lpm-levels.c +++ b/drivers/cpuidle/lpm-levels.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved. +/* Copyright (c) 2012-2020, The Linux Foundation. All rights reserved. * Copyright (C) 2006-2007 Adam Belay * Copyright (C) 2009 Intel Corporation * @@ -117,7 +117,7 @@ static const int num_dbg_elements = 0x100; static void cluster_unprepare(struct lpm_cluster *cluster, const struct cpumask *cpu, int child_idx, bool from_idle, - int64_t time); + int64_t time, bool success); static void cluster_prepare(struct lpm_cluster *cluster, const struct cpumask *cpu, int child_idx, bool from_idle, int64_t time); @@ -159,7 +159,7 @@ static uint32_t least_cluster_latency(struct lpm_cluster *cluster, uint32_t latency = 0; int i; - if (!cluster->list.next) { + if (list_empty(&cluster->list)) { for (i = 0; i < cluster->nlevels; i++) { level = &cluster->levels[i]; pwr_params = &level->pwr; @@ -328,7 +328,8 @@ static int lpm_starting_cpu(unsigned int cpu) update_debug_pc_event(CPU_HP_STARTING, cpu, cluster->num_children_in_sync.bits[0], cluster->child_cpus.bits[0], false); - cluster_unprepare(cluster, get_cpu_mask(cpu), NR_LPM_LEVELS, false, 0); + cluster_unprepare(cluster, get_cpu_mask(cpu), NR_LPM_LEVELS, false, + 0, true); return 0; } @@ -642,7 +643,7 @@ static int cpu_power_select(struct cpuidle_device *dev, lvl_latency_us = pwr_params->latency_us; - if (latency_us < lvl_latency_us) + if (latency_us <= lvl_latency_us) break; if (next_event_us) { @@ -979,7 +980,7 @@ static int cluster_select(struct lpm_cluster *cluster, bool from_idle, &level->num_cpu_votes)) continue; - if (from_idle && latency_us < pwr_params->latency_us) + if (from_idle && latency_us <= pwr_params->latency_us) break; if (sleep_us < pwr_params->time_overhead_us) @@ -1061,7 +1062,7 @@ static int cluster_configure(struct lpm_cluster *cluster, int idx, * LPMs(XO and Vmin). */ if (!from_idle) - clock_debug_print_enabled(true); + clock_debug_print_enabled(false); cpu = get_next_online_cpu(from_idle); cpumask_copy(&cpumask, cpumask_of(cpu)); @@ -1165,7 +1166,7 @@ static void cluster_prepare(struct lpm_cluster *cluster, static void cluster_unprepare(struct lpm_cluster *cluster, const struct cpumask *cpu, int child_idx, bool from_idle, - int64_t end_time) + int64_t end_time, bool success) { struct lpm_cluster_level *level; bool first_cpu; @@ -1202,14 +1203,14 @@ static void cluster_unprepare(struct lpm_cluster *cluster, if (cluster->stats->sleep_time) cluster->stats->sleep_time = end_time - cluster->stats->sleep_time; - lpm_stats_cluster_exit(cluster->stats, cluster->last_level, true); + lpm_stats_cluster_exit(cluster->stats, cluster->last_level, success); level = &cluster->levels[cluster->last_level]; if (level->notify_rpm) if (sys_pm_ops && sys_pm_ops->exit) { spin_lock(&bc_timer_lock); - sys_pm_ops->exit(); + sys_pm_ops->exit(success); spin_unlock(&bc_timer_lock); } @@ -1229,7 +1230,7 @@ static void cluster_unprepare(struct lpm_cluster *cluster, update_cluster_history(&cluster->history, last_level); cluster_unprepare(cluster->parent, &cluster->child_cpus, - last_level, from_idle, end_time); + last_level, from_idle, end_time, success); unlock_return: spin_unlock(&cluster->sync_lock); } @@ -1313,7 +1314,7 @@ static bool psci_enter_sleep(struct lpm_cpu *cpu, int idx, bool from_idle) if (!idx) { stop_critical_timings(); - wfi(); + cpu_do_idle(); start_critical_timings(); return 1; } @@ -1423,7 +1424,7 @@ static int lpm_cpuidle_enter(struct cpuidle_device *dev, end_time = ktime_to_ns(ktime_get()); lpm_stats_cpu_exit(idx, end_time, success); - cluster_unprepare(cpu->parent, cpumask, idx, true, end_time); + cluster_unprepare(cpu->parent, cpumask, idx, true, end_time, success); cpu_unprepare(cpu, idx, true); dev->last_residency = ktime_us_delta(ktime_get(), start); update_history(dev, idx); @@ -1441,6 +1442,7 @@ static void lpm_cpuidle_freeze(struct cpuidle_device *dev, { struct lpm_cpu *cpu = per_cpu(cpu_lpm, dev->cpu); const struct cpumask *cpumask = get_cpu_mask(dev->cpu); + bool success = false; for (; idx >= 0; idx--) { if (lpm_cpu_mode_allow(dev->cpu, idx, false)) @@ -1454,9 +1456,9 @@ static void lpm_cpuidle_freeze(struct cpuidle_device *dev, cpu_prepare(cpu, idx, true); cluster_prepare(cpu->parent, cpumask, idx, false, 0); - psci_enter_sleep(cpu, idx, false); + success = psci_enter_sleep(cpu, idx, false); - cluster_unprepare(cpu->parent, cpumask, idx, false, 0); + cluster_unprepare(cpu->parent, cpumask, idx, false, 0, success); cpu_unprepare(cpu, idx, true); } @@ -1507,7 +1509,6 @@ static struct cpuidle_governor lpm_governor = { .name = "qcom", .rating = 30, .select = lpm_cpuidle_select, - .owner = THIS_MODULE, }; static int cluster_cpuidle_register(struct lpm_cluster *cl) @@ -1671,6 +1672,7 @@ static int lpm_suspend_enter(suspend_state_t state) struct lpm_cluster *cluster = lpm_cpu->parent; const struct cpumask *cpumask = get_cpu_mask(cpu); int idx; + bool success; for (idx = lpm_cpu->nlevels - 1; idx >= 0; idx--) { if (lpm_cpu_mode_allow(cpu, idx, false)) @@ -1683,9 +1685,9 @@ static int lpm_suspend_enter(suspend_state_t state) cpu_prepare(lpm_cpu, idx, false); cluster_prepare(cluster, cpumask, idx, false, 0); - psci_enter_sleep(lpm_cpu, idx, false); + success = psci_enter_sleep(lpm_cpu, idx, false); - cluster_unprepare(cluster, cpumask, idx, false, 0); + cluster_unprepare(cluster, cpumask, idx, false, 0, success); cpu_unprepare(lpm_cpu, idx, false); return 0; } diff --git a/drivers/cpuidle/lpm-levels.h b/drivers/cpuidle/lpm-levels.h index 0b598c08b042..0d9713558341 100644 --- a/drivers/cpuidle/lpm-levels.h +++ b/drivers/cpuidle/lpm-levels.h @@ -137,7 +137,7 @@ uint32_t *get_per_cpu_min_residency(int cpu); extern struct lpm_cluster *lpm_root_node; #if defined(CONFIG_SMP) -extern DEFINE_PER_CPU(bool, pending_ipi); +DECLARE_PER_CPU(bool, pending_ipi); static inline bool is_IPI_pending(const struct cpumask *mask) { unsigned int cpu; diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c index 3f26a415ef44..8ae89a5f6baa 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c @@ -110,7 +110,7 @@ static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key, ctx->u.aes.key_len = key_len / 2; sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len); - return crypto_skcipher_setkey(ctx->u.aes.tfm_skcipher, key, key_len); + return crypto_sync_skcipher_setkey(ctx->u.aes.tfm_skcipher, key, key_len); } static int ccp_aes_xts_crypt(struct ablkcipher_request *req, @@ -143,12 +143,13 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req, if ((unit_size == CCP_XTS_AES_UNIT_SIZE__LAST) || (ctx->u.aes.key_len != AES_KEYSIZE_128)) { - SKCIPHER_REQUEST_ON_STACK(subreq, ctx->u.aes.tfm_skcipher); + SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, + ctx->u.aes.tfm_skcipher); /* Use the fallback to process the request for any * unsupported unit sizes or key sizes */ - skcipher_request_set_tfm(subreq, ctx->u.aes.tfm_skcipher); + skcipher_request_set_sync_tfm(subreq, ctx->u.aes.tfm_skcipher); skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL); skcipher_request_set_crypt(subreq, req->src, req->dst, @@ -195,12 +196,12 @@ static int ccp_aes_xts_decrypt(struct ablkcipher_request *req) static int ccp_aes_xts_cra_init(struct crypto_tfm *tfm) { struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); - struct crypto_skcipher *fallback_tfm; + struct crypto_sync_skcipher *fallback_tfm; ctx->complete = ccp_aes_xts_complete; ctx->u.aes.key_len = 0; - fallback_tfm = crypto_alloc_skcipher("xts(aes)", 0, + fallback_tfm = crypto_alloc_sync_skcipher("xts(aes)", 0, CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(fallback_tfm)) { @@ -218,7 +219,7 @@ static void ccp_aes_xts_cra_exit(struct crypto_tfm *tfm) { struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); - crypto_free_skcipher(ctx->u.aes.tfm_skcipher); + crypto_free_sync_skcipher(ctx->u.aes.tfm_skcipher); } static int ccp_register_aes_xts_alg(struct list_head *head, diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h index 8335b32e815e..ad2b4df37a75 100644 --- a/drivers/crypto/ccp/ccp-crypto.h +++ b/drivers/crypto/ccp/ccp-crypto.h @@ -68,7 +68,7 @@ static inline struct ccp_crypto_ahash_alg * /***** AES related defines *****/ struct ccp_aes_ctx { /* Fallback cipher for XTS with unsupported unit sizes */ - struct crypto_skcipher *tfm_skcipher; + struct crypto_sync_skcipher *tfm_skcipher; /* Cipher used to generate CMAC K1/K2 keys */ struct crypto_cipher *tfm_cipher; diff --git a/drivers/crypto/msm/qcrypto.c b/drivers/crypto/msm/qcrypto.c index fd4257567d39..8b505b866dd3 100644 --- a/drivers/crypto/msm/qcrypto.c +++ b/drivers/crypto/msm/qcrypto.c @@ -420,7 +420,7 @@ struct qcrypto_cipher_ctx { u8 ccm4309_nonce[QCRYPTO_CCM4309_NONCE_LEN]; - struct crypto_skcipher *cipher_aes192_fb; + struct crypto_sync_skcipher *cipher_aes192_fb; struct crypto_ahash *ahash_aead_aes192_fb; }; @@ -981,8 +981,8 @@ static int _qcrypto_cra_aes_ablkcipher_init(struct crypto_tfm *tfm) ctx->cipher_aes192_fb = NULL; return _qcrypto_cra_ablkcipher_init(tfm); } - ctx->cipher_aes192_fb = crypto_alloc_skcipher(name, 0, - CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + ctx->cipher_aes192_fb = crypto_alloc_sync_skcipher(name, 0, + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->cipher_aes192_fb)) { pr_err("Error allocating fallback algo %s\n", name); ret = PTR_ERR(ctx->cipher_aes192_fb); @@ -1058,7 +1058,7 @@ static int _qcrypto_cra_aead_aes_sha1_init(struct crypto_aead *tfm) ctx->cipher_aes192_fb = NULL; ctx->ahash_aead_aes192_fb = NULL; if (!cp->ce_support.aes_key_192) { - ctx->cipher_aes192_fb = crypto_alloc_skcipher( + ctx->cipher_aes192_fb = crypto_alloc_sync_skcipher( "cbc(aes)", 0, 0); if (IS_ERR(ctx->cipher_aes192_fb)) { ctx->cipher_aes192_fb = NULL; @@ -1067,7 +1067,7 @@ static int _qcrypto_cra_aead_aes_sha1_init(struct crypto_aead *tfm) "hmac(sha1)", 0, 0); if (IS_ERR(ctx->ahash_aead_aes192_fb)) { ctx->ahash_aead_aes192_fb = NULL; - crypto_free_skcipher(ctx->cipher_aes192_fb); + crypto_free_sync_skcipher(ctx->cipher_aes192_fb); ctx->cipher_aes192_fb = NULL; } } @@ -1089,7 +1089,7 @@ static int _qcrypto_cra_aead_aes_sha256_init(struct crypto_aead *tfm) ctx->cipher_aes192_fb = NULL; ctx->ahash_aead_aes192_fb = NULL; if (!cp->ce_support.aes_key_192) { - ctx->cipher_aes192_fb = crypto_alloc_skcipher( + ctx->cipher_aes192_fb = crypto_alloc_sync_skcipher( "cbc(aes)", 0, 0); if (IS_ERR(ctx->cipher_aes192_fb)) { ctx->cipher_aes192_fb = NULL; @@ -1098,7 +1098,7 @@ static int _qcrypto_cra_aead_aes_sha256_init(struct crypto_aead *tfm) "hmac(sha256)", 0, 0); if (IS_ERR(ctx->ahash_aead_aes192_fb)) { ctx->ahash_aead_aes192_fb = NULL; - crypto_free_skcipher(ctx->cipher_aes192_fb); + crypto_free_sync_skcipher(ctx->cipher_aes192_fb); ctx->cipher_aes192_fb = NULL; } } @@ -1121,7 +1121,7 @@ static void _qcrypto_cra_aes_ablkcipher_exit(struct crypto_tfm *tfm) _qcrypto_cra_ablkcipher_exit(tfm); if (ctx->cipher_aes192_fb) - crypto_free_skcipher(ctx->cipher_aes192_fb); + crypto_free_sync_skcipher(ctx->cipher_aes192_fb); ctx->cipher_aes192_fb = NULL; } @@ -1140,7 +1140,7 @@ static void _qcrypto_cra_aead_aes_exit(struct crypto_aead *tfm) if (!list_empty(&ctx->rsp_queue)) pr_err("_qcrypto__cra_aead_exit: requests still outstanding"); if (ctx->cipher_aes192_fb) - crypto_free_skcipher(ctx->cipher_aes192_fb); + crypto_free_sync_skcipher(ctx->cipher_aes192_fb); if (ctx->ahash_aead_aes192_fb) crypto_free_ahash(ctx->ahash_aead_aes192_fb); ctx->cipher_aes192_fb = NULL; @@ -1421,10 +1421,10 @@ static int _qcrypto_setkey_aes_192_fallback(struct crypto_ablkcipher *cipher, int ret; ctx->enc_key_len = AES_KEYSIZE_192; - ctx->cipher_aes192_fb->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; - ctx->cipher_aes192_fb->base.crt_flags |= + ctx->cipher_aes192_fb->base.base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; + ctx->cipher_aes192_fb->base.base.crt_flags |= (cipher->base.crt_flags & CRYPTO_TFM_REQ_MASK); - ret = crypto_skcipher_setkey(ctx->cipher_aes192_fb, key, + ret = crypto_sync_skcipher_setkey(ctx->cipher_aes192_fb, key, AES_KEYSIZE_192); if (ret) { tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; @@ -2480,8 +2480,8 @@ static int _qcrypto_enc_aes_192_fallback(struct ablkcipher_request *req) struct qcrypto_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm); int err; - SKCIPHER_REQUEST_ON_STACK(subreq, ctx->cipher_aes192_fb); - skcipher_request_set_tfm(subreq, ctx->cipher_aes192_fb); + SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->cipher_aes192_fb); + skcipher_request_set_sync_tfm(subreq, ctx->cipher_aes192_fb); skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL); skcipher_request_set_crypt(subreq, req->src, req->dst, @@ -2496,8 +2496,8 @@ static int _qcrypto_dec_aes_192_fallback(struct ablkcipher_request *req) struct qcrypto_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm); int err; - SKCIPHER_REQUEST_ON_STACK(subreq, ctx->cipher_aes192_fb); - skcipher_request_set_tfm(subreq, ctx->cipher_aes192_fb); + SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->cipher_aes192_fb); + skcipher_request_set_sync_tfm(subreq, ctx->cipher_aes192_fb); skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL); skcipher_request_set_crypt(subreq, req->src, req->dst, @@ -3091,8 +3091,8 @@ static int _qcrypto_aead_setkey(struct crypto_aead *tfm, const u8 *key, ctx->auth_key, ctx->auth_key_len); if (ret) goto badkey; - crypto_skcipher_clear_flags(ctx->cipher_aes192_fb, ~0); - ret = crypto_skcipher_setkey(ctx->cipher_aes192_fb, + crypto_sync_skcipher_clear_flags(ctx->cipher_aes192_fb, ~0); + ret = crypto_sync_skcipher_setkey(ctx->cipher_aes192_fb, ctx->enc_key, ctx->enc_key_len); if (ret) goto badkey; @@ -3229,7 +3229,7 @@ static void _aead_aes_fb_stage1_ahash_complete( /* compare icv */ if (err == 0) { - unsigned char tmp[ctx->authsize]; + unsigned char tmp[SHA1_DIGEST_SIZE]; scatterwalk_map_and_copy(tmp, rctx->fb_aes_src, req->cryptlen - ctx->authsize, ctx->authsize, 0); @@ -3289,7 +3289,7 @@ static int _qcrypto_aead_aes_192_fallback(struct aead_request *req, struct scatterlist *src, *dst; rctx->fb_aes_iv = NULL; - aes_req = skcipher_request_alloc(ctx->cipher_aes192_fb, GFP_KERNEL); + aes_req = skcipher_request_alloc(&ctx->cipher_aes192_fb->base, GFP_KERNEL); if (!aes_req) return -ENOMEM; ahash_req = ahash_request_alloc(ctx->ahash_aead_aes192_fb, GFP_KERNEL); @@ -3358,7 +3358,7 @@ static int _qcrypto_aead_aes_192_fallback(struct aead_request *req, rc = crypto_ahash_digest(ahash_req); if (rc == 0) { - unsigned char tmp[ctx->authsize]; + unsigned char tmp[SHA1_DIGEST_SIZE]; /* compare icv */ scatterwalk_map_and_copy(tmp, diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index ad714798f5d8..6f795e334f4d 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -101,7 +101,7 @@ struct dcp_async_ctx { unsigned int hot:1; /* Crypto-specific context */ - struct crypto_skcipher *fallback; + struct crypto_sync_skcipher *fallback; unsigned int key_len; uint8_t key[AES_KEYSIZE_128]; }; @@ -425,10 +425,10 @@ static int mxs_dcp_block_fallback(struct ablkcipher_request *req, int enc) { struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); struct dcp_async_ctx *ctx = crypto_ablkcipher_ctx(tfm); - SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); int ret; - skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_sync_tfm(subreq, ctx->fallback); skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL); skcipher_request_set_crypt(subreq, req->src, req->dst, req->nbytes, req->info); @@ -509,16 +509,16 @@ static int mxs_dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, * but is supported by in-kernel software implementation, we use * software fallback. */ - crypto_skcipher_clear_flags(actx->fallback, CRYPTO_TFM_REQ_MASK); - crypto_skcipher_set_flags(actx->fallback, + crypto_sync_skcipher_clear_flags(actx->fallback, CRYPTO_TFM_REQ_MASK); + crypto_sync_skcipher_set_flags(actx->fallback, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK); - ret = crypto_skcipher_setkey(actx->fallback, key, len); + ret = crypto_sync_skcipher_setkey(actx->fallback, key, len); if (!ret) return 0; tfm->base.crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm->base.crt_flags |= crypto_skcipher_get_flags(actx->fallback) & + tfm->base.crt_flags |= crypto_sync_skcipher_get_flags(actx->fallback) & CRYPTO_TFM_RES_MASK; return ret; @@ -527,11 +527,10 @@ static int mxs_dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, static int mxs_dcp_aes_fallback_init(struct crypto_tfm *tfm) { const char *name = crypto_tfm_alg_name(tfm); - const uint32_t flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK; struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm); - struct crypto_skcipher *blk; + struct crypto_sync_skcipher *blk; - blk = crypto_alloc_skcipher(name, 0, flags); + blk = crypto_alloc_sync_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(blk)) return PTR_ERR(blk); @@ -544,7 +543,7 @@ static void mxs_dcp_aes_fallback_exit(struct crypto_tfm *tfm) { struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm); - crypto_free_skcipher(actx->fallback); + crypto_free_sync_skcipher(actx->fallback); } /* diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index b3ea6d60c458..f9de81f45079 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c @@ -171,7 +171,7 @@ struct spacc_ablk_ctx { * The fallback cipher. If the operation can't be done in hardware, * fallback to a software version. */ - struct crypto_skcipher *sw_cipher; + struct crypto_sync_skcipher *sw_cipher; }; /* AEAD cipher context. */ @@ -797,17 +797,17 @@ static int spacc_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key, * Set the fallback transform to use the same request flags as * the hardware transform. */ - crypto_skcipher_clear_flags(ctx->sw_cipher, + crypto_sync_skcipher_clear_flags(ctx->sw_cipher, CRYPTO_TFM_REQ_MASK); - crypto_skcipher_set_flags(ctx->sw_cipher, + crypto_sync_skcipher_set_flags(ctx->sw_cipher, cipher->base.crt_flags & CRYPTO_TFM_REQ_MASK); - err = crypto_skcipher_setkey(ctx->sw_cipher, key, len); + err = crypto_sync_skcipher_setkey(ctx->sw_cipher, key, len); tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; tfm->crt_flags |= - crypto_skcipher_get_flags(ctx->sw_cipher) & + crypto_sync_skcipher_get_flags(ctx->sw_cipher) & CRYPTO_TFM_RES_MASK; if (err) @@ -912,7 +912,7 @@ static int spacc_ablk_do_fallback(struct ablkcipher_request *req, struct crypto_tfm *old_tfm = crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(old_tfm); - SKCIPHER_REQUEST_ON_STACK(subreq, ctx->sw_cipher); + SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->sw_cipher); int err; /* @@ -920,7 +920,7 @@ static int spacc_ablk_do_fallback(struct ablkcipher_request *req, * the ciphering has completed, put the old transform back into the * request. */ - skcipher_request_set_tfm(subreq, ctx->sw_cipher); + skcipher_request_set_sync_tfm(subreq, ctx->sw_cipher); skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL); skcipher_request_set_crypt(subreq, req->src, req->dst, req->nbytes, req->info); @@ -1018,9 +1018,8 @@ static int spacc_ablk_cra_init(struct crypto_tfm *tfm) ctx->generic.flags = spacc_alg->type; ctx->generic.engine = engine; if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) { - ctx->sw_cipher = crypto_alloc_skcipher( - alg->cra_name, 0, CRYPTO_ALG_ASYNC | - CRYPTO_ALG_NEED_FALLBACK); + ctx->sw_cipher = crypto_alloc_sync_skcipher( + alg->cra_name, 0, CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->sw_cipher)) { dev_warn(engine->dev, "failed to allocate fallback for %s\n", alg->cra_name); @@ -1039,7 +1038,7 @@ static void spacc_ablk_cra_exit(struct crypto_tfm *tfm) { struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm); - crypto_free_skcipher(ctx->sw_cipher); + crypto_free_sync_skcipher(ctx->sw_cipher); } static int spacc_ablk_encrypt(struct ablkcipher_request *req) diff --git a/drivers/crypto/qce/ablkcipher.c b/drivers/crypto/qce/ablkcipher.c index b04b42f48366..5d4d7335fef7 100644 --- a/drivers/crypto/qce/ablkcipher.c +++ b/drivers/crypto/qce/ablkcipher.c @@ -189,7 +189,7 @@ static int qce_ablkcipher_setkey(struct crypto_ablkcipher *ablk, const u8 *key, memcpy(ctx->enc_key, key, keylen); return 0; fallback: - ret = crypto_skcipher_setkey(ctx->fallback, key, keylen); + ret = crypto_sync_skcipher_setkey(ctx->fallback, key, keylen); if (!ret) ctx->enc_keylen = keylen; return ret; @@ -212,9 +212,9 @@ static int qce_ablkcipher_crypt(struct ablkcipher_request *req, int encrypt) if (IS_AES(rctx->flags) && ctx->enc_keylen != AES_KEYSIZE_128 && ctx->enc_keylen != AES_KEYSIZE_256) { - SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); - skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_sync_tfm(subreq, ctx->fallback); skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL); skcipher_request_set_crypt(subreq, req->src, req->dst, @@ -245,9 +245,8 @@ static int qce_ablkcipher_init(struct crypto_tfm *tfm) memset(ctx, 0, sizeof(*ctx)); tfm->crt_ablkcipher.reqsize = sizeof(struct qce_cipher_reqctx); - ctx->fallback = crypto_alloc_skcipher(crypto_tfm_alg_name(tfm), 0, - CRYPTO_ALG_ASYNC | - CRYPTO_ALG_NEED_FALLBACK); + ctx->fallback = crypto_alloc_sync_skcipher(crypto_tfm_alg_name(tfm), + 0, CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->fallback)) return PTR_ERR(ctx->fallback); @@ -258,7 +257,7 @@ static void qce_ablkcipher_exit(struct crypto_tfm *tfm) { struct qce_cipher_ctx *ctx = crypto_tfm_ctx(tfm); - crypto_free_skcipher(ctx->fallback); + crypto_free_sync_skcipher(ctx->fallback); } struct qce_ablkcipher_def { diff --git a/drivers/crypto/qce/cipher.h b/drivers/crypto/qce/cipher.h index 2b0278bb6e92..ee055bfe98a0 100644 --- a/drivers/crypto/qce/cipher.h +++ b/drivers/crypto/qce/cipher.h @@ -22,7 +22,7 @@ struct qce_cipher_ctx { u8 enc_key[QCE_MAX_KEY_SIZE]; unsigned int enc_keylen; - struct crypto_skcipher *fallback; + struct crypto_sync_skcipher *fallback; }; /** diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c index 4d0ec7bb3bc9..4bfe691581d7 100644 --- a/drivers/crypto/sahara.c +++ b/drivers/crypto/sahara.c @@ -149,7 +149,7 @@ struct sahara_ctx { /* AES-specific context */ int keylen; u8 key[AES_KEYSIZE_128]; - struct crypto_skcipher *fallback; + struct crypto_sync_skcipher *fallback; }; struct sahara_aes_reqctx { @@ -622,14 +622,14 @@ static int sahara_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, /* * The requested key size is not supported by HW, do a fallback. */ - crypto_skcipher_clear_flags(ctx->fallback, CRYPTO_TFM_REQ_MASK); - crypto_skcipher_set_flags(ctx->fallback, tfm->base.crt_flags & + crypto_sync_skcipher_clear_flags(ctx->fallback, CRYPTO_TFM_REQ_MASK); + crypto_sync_skcipher_set_flags(ctx->fallback, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK); - ret = crypto_skcipher_setkey(ctx->fallback, key, keylen); + ret = crypto_sync_skcipher_setkey(ctx->fallback, key, keylen); tfm->base.crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm->base.crt_flags |= crypto_skcipher_get_flags(ctx->fallback) & + tfm->base.crt_flags |= crypto_sync_skcipher_get_flags(ctx->fallback) & CRYPTO_TFM_RES_MASK; return ret; } @@ -667,9 +667,9 @@ static int sahara_aes_ecb_encrypt(struct ablkcipher_request *req) int err; if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { - SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); - skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_sync_tfm(subreq, ctx->fallback); skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL); skcipher_request_set_crypt(subreq, req->src, req->dst, @@ -689,9 +689,9 @@ static int sahara_aes_ecb_decrypt(struct ablkcipher_request *req) int err; if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { - SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); - skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_sync_tfm(subreq, ctx->fallback); skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL); skcipher_request_set_crypt(subreq, req->src, req->dst, @@ -711,9 +711,9 @@ static int sahara_aes_cbc_encrypt(struct ablkcipher_request *req) int err; if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { - SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); - skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_sync_tfm(subreq, ctx->fallback); skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL); skcipher_request_set_crypt(subreq, req->src, req->dst, @@ -733,9 +733,9 @@ static int sahara_aes_cbc_decrypt(struct ablkcipher_request *req) int err; if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { - SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); - skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_sync_tfm(subreq, ctx->fallback); skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL); skcipher_request_set_crypt(subreq, req->src, req->dst, @@ -753,8 +753,7 @@ static int sahara_aes_cra_init(struct crypto_tfm *tfm) const char *name = crypto_tfm_alg_name(tfm); struct sahara_ctx *ctx = crypto_tfm_ctx(tfm); - ctx->fallback = crypto_alloc_skcipher(name, 0, - CRYPTO_ALG_ASYNC | + ctx->fallback = crypto_alloc_sync_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->fallback)) { pr_err("Error allocating fallback algo %s\n", name); @@ -770,7 +769,7 @@ static void sahara_aes_cra_exit(struct crypto_tfm *tfm) { struct sahara_ctx *ctx = crypto_tfm_ctx(tfm); - crypto_free_skcipher(ctx->fallback); + crypto_free_sync_skcipher(ctx->fallback); } static u32 sahara_sha_init_hdr(struct sahara_dev *dev, diff --git a/drivers/devfreq/governor_memlat.c b/drivers/devfreq/governor_memlat.c index 12a90d41bc37..259e654d2869 100644 --- a/drivers/devfreq/governor_memlat.c +++ b/drivers/devfreq/governor_memlat.c @@ -251,6 +251,10 @@ static int devfreq_memlat_get_freq(struct devfreq *df, hw->core_stats[i].freq, hw->core_stats[i].stall_pct, ratio); + if (!hw->core_stats[i].inst_count + || !hw->core_stats[i].freq) + continue; + if (ratio <= node->ratio_ceil && hw->core_stats[i].stall_pct >= node->stall_floor && hw->core_stats[i].freq > max_freq) { diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile index b1c1b2197a31..114270673dc9 100644 --- a/drivers/firmware/Makefile +++ b/drivers/firmware/Makefile @@ -16,7 +16,9 @@ obj-$(CONFIG_ISCSI_IBFT) += iscsi_ibft.o obj-$(CONFIG_FIRMWARE_MEMMAP) += memmap.o obj-$(CONFIG_RASPBERRYPI_FIRMWARE) += raspberrypi.o obj-$(CONFIG_FW_CFG_SYSFS) += qemu_fw_cfg.o +ifneq ($(CONFIG_BOARD_B1C1),y) obj-$(CONFIG_QCOM_SCM) += qcom_scm.o +endif obj-$(CONFIG_QCOM_SCM_64) += qcom_scm-64.o obj-$(CONFIG_QCOM_SCM_32) += qcom_scm-32.o CFLAGS_qcom_scm-32.o :=$(call as-instr,.arch armv7-a\n.arch_extension sec,-DREQUIRES_SEC=1) -march=armv7-a diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 5060064acd45..45bae1ed6a19 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -21,6 +21,18 @@ menuconfig GPIOLIB if GPIOLIB +config GPIOLIB_FASTPATH_LIMIT + int "Maximum number of GPIOs for fast path" + range 32 512 + default 512 + help + This adjusts the point at which certain APIs will switch from + using a stack allocated buffer to a dynamically allocated buffer. + + You shouldn't need to change this unless you really need to + optimize either stack space or performance. Change this carefully + since setting an incorrect value could cause stack corruption. + config OF_GPIO def_bool y depends on OF diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 73d02f6089d5..af4a1774e10a 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -60,6 +60,16 @@ static struct bus_type gpio_bus_type = { .name = "gpio", }; +/* + * Number of GPIOs to use for the fast path in set array + */ +#define FASTPATH_NGPIO CONFIG_GPIOLIB_FASTPATH_LIMIT + +/* + * Number of GPIOs to use for the fast path in set array + */ +#define FASTPATH_NGPIO CONFIG_GPIOLIB_FASTPATH_LIMIT + /* gpio_lock prevents conflicts during gpio_desc[] table updates. * While any GPIO is requested, its gpio_chip is not removable; * each GPIO's "requested" flag serves as a lock and refcount. @@ -386,12 +396,11 @@ static long linehandle_ioctl(struct file *filep, unsigned int cmd, vals[i] = !!ghd.values[i]; /* Reuse the array setting function */ - gpiod_set_array_value_complex(false, + return gpiod_set_array_value_complex(false, true, lh->numdescs, lh->descs, vals); - return 0; } return -EINVAL; } @@ -1182,6 +1191,10 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data) goto err_free_descs; } + if (chip->ngpio > FASTPATH_NGPIO) + chip_warn(chip, "line cnt %u is greater than fast path cnt %u\n", + chip->ngpio, FASTPATH_NGPIO); + if (chip->label) gdev->label = kstrdup(chip->label, GFP_KERNEL); else @@ -2547,7 +2560,7 @@ static void gpio_chip_set_multiple(struct gpio_chip *chip, } } -void gpiod_set_array_value_complex(bool raw, bool can_sleep, +int gpiod_set_array_value_complex(bool raw, bool can_sleep, unsigned int array_size, struct gpio_desc **desc_array, int *value_array) @@ -2556,14 +2569,26 @@ void gpiod_set_array_value_complex(bool raw, bool can_sleep, while (i < array_size) { struct gpio_chip *chip = desc_array[i]->gdev->chip; - unsigned long mask[BITS_TO_LONGS(chip->ngpio)]; - unsigned long bits[BITS_TO_LONGS(chip->ngpio)]; + unsigned long fastpath[2 * BITS_TO_LONGS(FASTPATH_NGPIO)]; + unsigned long *mask, *bits; int count = 0; + if (likely(chip->ngpio <= FASTPATH_NGPIO)) { + mask = fastpath; + } else { + mask = kmalloc_array(2 * BITS_TO_LONGS(chip->ngpio), + sizeof(*mask), + can_sleep ? GFP_KERNEL : GFP_ATOMIC); + if (!mask) + return -ENOMEM; + } + + bits = mask + BITS_TO_LONGS(chip->ngpio); + bitmap_zero(mask, chip->ngpio); + if (!can_sleep) WARN_ON(chip->can_sleep); - memset(mask, 0, sizeof(mask)); do { struct gpio_desc *desc = desc_array[i]; int hwgpio = gpio_chip_hwgpio(desc); @@ -2594,7 +2619,11 @@ void gpiod_set_array_value_complex(bool raw, bool can_sleep, /* push collected bits to outputs */ if (count != 0) gpio_chip_set_multiple(chip, mask, bits); + + if (mask != fastpath) + kfree(mask); } + return 0; } /** @@ -2651,13 +2680,13 @@ EXPORT_SYMBOL_GPL(gpiod_set_value); * This function should be called from contexts where we cannot sleep, and will * complain if the GPIO chip functions potentially sleep. */ -void gpiod_set_raw_array_value(unsigned int array_size, +int gpiod_set_raw_array_value(unsigned int array_size, struct gpio_desc **desc_array, int *value_array) { if (!desc_array) - return; - gpiod_set_array_value_complex(true, false, array_size, desc_array, - value_array); + return -EINVAL; + return gpiod_set_array_value_complex(true, false, array_size, + desc_array, value_array); } EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value); @@ -2673,13 +2702,13 @@ EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value); * This function should be called from contexts where we cannot sleep, and will * complain if the GPIO chip functions potentially sleep. */ -void gpiod_set_array_value(unsigned int array_size, +int gpiod_set_array_value(unsigned int array_size, struct gpio_desc **desc_array, int *value_array) { if (!desc_array) - return; - gpiod_set_array_value_complex(false, false, array_size, desc_array, - value_array); + return -EINVAL; + return gpiod_set_array_value_complex(false, false, array_size, + desc_array, value_array); } EXPORT_SYMBOL_GPL(gpiod_set_array_value); @@ -2907,14 +2936,14 @@ EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep); * * This function is to be called from contexts that can sleep. */ -void gpiod_set_raw_array_value_cansleep(unsigned int array_size, +int gpiod_set_raw_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, int *value_array) { might_sleep_if(extra_checks); if (!desc_array) - return; - gpiod_set_array_value_complex(true, true, array_size, desc_array, + return -EINVAL; + return gpiod_set_array_value_complex(true, true, array_size, desc_array, value_array); } EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value_cansleep); @@ -2930,15 +2959,15 @@ EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value_cansleep); * * This function is to be called from contexts that can sleep. */ -void gpiod_set_array_value_cansleep(unsigned int array_size, +int gpiod_set_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, int *value_array) { might_sleep_if(extra_checks); if (!desc_array) - return; - gpiod_set_array_value_complex(false, true, array_size, desc_array, - value_array); + return -EINVAL; + return gpiod_set_array_value_complex(false, true, array_size, + desc_array, value_array); } EXPORT_SYMBOL_GPL(gpiod_set_array_value_cansleep); diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index 6c4d72872309..69512774cac5 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -168,7 +168,7 @@ static inline bool acpi_can_fallback_to_crs(struct acpi_device *adev, #endif struct gpio_desc *gpiochip_get_desc(struct gpio_chip *chip, u16 hwnum); -void gpiod_set_array_value_complex(bool raw, bool can_sleep, +int gpiod_set_array_value_complex(bool raw, bool can_sleep, unsigned int array_size, struct gpio_desc **desc_array, int *value_array); diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 4e4043fca223..df6c5162340e 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -2004,10 +2004,13 @@ int drm_mode_atomic_ioctl(struct drm_device *dev, * Below we call drm_atomic_state_free for it. */ ret = drm_atomic_check_only(state); - } else if (arg->flags & DRM_MODE_ATOMIC_NONBLOCK) { - ret = drm_atomic_nonblocking_commit(state); } else { - ret = drm_atomic_commit(state); + drm_bridge_enable_all(dev); + + if (arg->flags & DRM_MODE_ATOMIC_NONBLOCK) + ret = drm_atomic_nonblocking_commit(state); + else + ret = drm_atomic_commit(state); } out: diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c index 70e8414e3d42..cd34297d22be 100644 --- a/drivers/gpu/drm/drm_bridge.c +++ b/drivers/gpu/drm/drm_bridge.c @@ -26,6 +26,7 @@ #include #include +#include /** * DOC: overview @@ -115,6 +116,7 @@ int drm_bridge_attach(struct drm_device *dev, struct drm_bridge *bridge) return -EBUSY; bridge->dev = dev; + dev->bridge = bridge; if (bridge->funcs->attach) return bridge->funcs->attach(bridge); @@ -144,6 +146,7 @@ void drm_bridge_detach(struct drm_bridge *bridge) if (bridge->funcs->detach) bridge->funcs->detach(bridge); + bridge->dev->bridge = NULL; bridge->dev = NULL; } EXPORT_SYMBOL(drm_bridge_detach); @@ -238,6 +241,8 @@ void drm_bridge_disable(struct drm_bridge *bridge) if (bridge->funcs->disable) bridge->funcs->disable(bridge); + + atomic_set(&bridge->dev->bridges_enabled, 0); } EXPORT_SYMBOL(drm_bridge_disable); @@ -290,6 +295,17 @@ void drm_bridge_mode_set(struct drm_bridge *bridge, } EXPORT_SYMBOL(drm_bridge_mode_set); +void __drm_bridge_pre_enable(struct drm_bridge *bridge) +{ + if (!bridge) + return; + + __drm_bridge_pre_enable(bridge->next); + + if (bridge->funcs->pre_enable) + bridge->funcs->pre_enable(bridge); +} + /** * drm_bridge_pre_enable - calls ->pre_enable() &drm_bridge_funcs op for all * bridges in the encoder chain. @@ -306,13 +322,22 @@ void drm_bridge_pre_enable(struct drm_bridge *bridge) if (!bridge) return; - drm_bridge_pre_enable(bridge->next); - - if (bridge->funcs->pre_enable) - bridge->funcs->pre_enable(bridge); + drm_bridge_enable_all(bridge->dev); + kthread_flush_work(&bridge->dev->bridge_enable_work); } EXPORT_SYMBOL(drm_bridge_pre_enable); +void __drm_bridge_enable(struct drm_bridge *bridge) +{ + if (!bridge) + return; + + if (bridge->funcs->enable) + bridge->funcs->enable(bridge); + + __drm_bridge_enable(bridge->next); +} + /** * drm_bridge_enable - calls ->enable() &drm_bridge_funcs op for all bridges * in the encoder chain. @@ -329,13 +354,20 @@ void drm_bridge_enable(struct drm_bridge *bridge) if (!bridge) return; - if (bridge->funcs->enable) - bridge->funcs->enable(bridge); - - drm_bridge_enable(bridge->next); + drm_bridge_enable_all(bridge->dev); + kthread_flush_work(&bridge->dev->bridge_enable_work); } EXPORT_SYMBOL(drm_bridge_enable); +void drm_bridge_enable_all(struct drm_device *dev) +{ + if (atomic_cmpxchg(&dev->bridges_enabled, 0, 1)) + return; + + kthread_queue_work(&dev->bridge_enable_worker, + &dev->bridge_enable_work); +} + #ifdef CONFIG_OF /** * of_drm_find_bridge - find the bridge corresponding to the device node in diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 80a903bd317d..64838ab21f91 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -469,6 +469,16 @@ static void drm_fs_inode_free(struct inode *inode) } } +static void drm_bridge_enable_work(struct kthread_work *work) +{ + struct drm_device *dev = container_of(work, typeof(*dev), + bridge_enable_work); + struct drm_bridge *bridge = dev->bridge; + + __drm_bridge_pre_enable(bridge); + __drm_bridge_enable(bridge); +} + /** * drm_dev_init - Initialise new DRM device * @dev: DRM device @@ -496,6 +506,9 @@ int drm_dev_init(struct drm_device *dev, struct drm_driver *driver, struct device *parent) { + static const struct sched_param param = { + .sched_priority = MAX_RT_PRIO - 1 + }; int ret; kref_init(&dev->ref); @@ -515,11 +528,24 @@ int drm_dev_init(struct drm_device *dev, mutex_init(&dev->ctxlist_mutex); mutex_init(&dev->master_mutex); + kthread_init_worker(&dev->bridge_enable_worker); + dev->bridge_enable_task = + kthread_run_perf_critical(kthread_worker_fn, + &dev->bridge_enable_worker, + "drm_bridge_enable"); + if (IS_ERR(dev->bridge_enable_task)) { + ret = PTR_ERR(dev->bridge_enable_task); + DRM_ERROR("Cannot create bridge_enable kthread: %d\n", ret); + goto err_free; + } + kthread_init_work(&dev->bridge_enable_work, drm_bridge_enable_work); + sched_setscheduler_nocheck(dev->bridge_enable_task, SCHED_FIFO, ¶m); + dev->anon_inode = drm_fs_inode_new(); if (IS_ERR(dev->anon_inode)) { ret = PTR_ERR(dev->anon_inode); DRM_ERROR("Cannot allocate anonymous inode: %d\n", ret); - goto err_free; + goto err_kthread; } if (drm_core_check_feature(dev, DRIVER_MODESET)) { @@ -571,6 +597,8 @@ int drm_dev_init(struct drm_device *dev, drm_minor_free(dev, DRM_MINOR_RENDER); drm_minor_free(dev, DRM_MINOR_CONTROL); drm_fs_inode_free(dev->anon_inode); +err_kthread: + kthread_stop(dev->bridge_enable_task); err_free: mutex_destroy(&dev->master_mutex); return ret; @@ -634,6 +662,8 @@ static void drm_dev_release(struct kref *ref) drm_minor_free(dev, DRM_MINOR_RENDER); drm_minor_free(dev, DRM_MINOR_CONTROL); + kthread_flush_worker(&dev->bridge_enable_worker); + kthread_stop(dev->bridge_enable_task); mutex_destroy(&dev->master_mutex); kfree(dev->unique); kfree(dev); diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 00c815a7c414..f8b3cacc534e 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -448,7 +448,7 @@ static void drm_irq_vgaarb_nokms(void *cookie, bool state) int drm_irq_install(struct drm_device *dev, int irq) { int ret; - unsigned long sh_flags = 0; + unsigned long sh_flags = IRQF_PERF_CRITICAL; if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ)) return -EINVAL; diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c index 515fcf03dd0b..9bfd7eca680a 100644 --- a/drivers/gpu/drm/drm_mipi_dsi.c +++ b/drivers/gpu/drm/drm_mipi_dsi.c @@ -1122,7 +1122,7 @@ int mipi_dsi_dcs_get_display_brightness(struct mipi_dsi_device *dsi, *brightness = payload[0]; break; case 2: - *brightness = payload[0] << 8 || payload[1]; + *brightness = (payload[0] << 8) | payload[1]; break; } diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index b33d39d9dd14..15b62dc87e87 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -158,7 +158,8 @@ void drm_kms_helper_poll_enable_locked(struct drm_device *dev) } if (poll) - schedule_delayed_work(&dev->mode_config.output_poll_work, delay); + queue_delayed_work(system_power_efficient_wq, + &dev->mode_config.output_poll_work, delay); } EXPORT_SYMBOL(drm_kms_helper_poll_enable_locked); @@ -273,8 +274,8 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector, */ dev->mode_config.delayed_event = true; if (dev->mode_config.poll_enabled) - schedule_delayed_work(&dev->mode_config.output_poll_work, - 0); + queue_delayed_work(system_power_efficient_wq, + &dev->mode_config.output_poll_work, 0); } /* Re-enable polling in case the global poll config changed. */ @@ -460,7 +461,8 @@ static void output_poll_execute(struct work_struct *work) drm_kms_helper_hotplug_event(dev); if (repoll) - schedule_delayed_work(delayed_work, DRM_OUTPUT_POLL_PERIOD); + queue_delayed_work(system_power_efficient_wq, + delayed_work, DRM_OUTPUT_POLL_PERIOD); } /** diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index ba71ce8466a6..cdb12cbbb357 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -149,3 +149,10 @@ config DRM_SDE_RSC avoids the display core power collapse. A client can also register for display core power collapse events on rsc. +config KLAPSE + bool CONFIG_KLAPSE + depends on DRM_MSM + default n + help + A simple rgb dynamic lapsing module similar to LineageOS' livedisplay + diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index c0b20a19cf78..0c9199c95645 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -33,8 +33,6 @@ msm_drm-y := \ sde/sde_connector.o \ sde/sde_color_processing.o \ sde/sde_vbif.o \ - sde_dbg.o \ - sde_dbg_evtlog.o \ sde_io_util.o \ sde/sde_hw_reg_dma_v1_color_proc.o \ sde/sde_hw_color_proc_v4.o \ @@ -189,6 +187,8 @@ msm_drm-$(CONFIG_DRM_MSM) += \ obj-$(CONFIG_DRM_MSM) += msm_drm.o +msm_drm-$(CONFIG_KLAPSE) += sde/klapse.o + # TODO: remove me b/62058353 subdir-ccflags-y += \ -Wno-header-guard \ diff --git a/drivers/gpu/drm/msm/dsi-staging/dsi_backlight.c b/drivers/gpu/drm/msm/dsi-staging/dsi_backlight.c index 13675ddb4095..07c2b086048a 100644 --- a/drivers/gpu/drm/msm/dsi-staging/dsi_backlight.c +++ b/drivers/gpu/drm/msm/dsi-staging/dsi_backlight.c @@ -28,11 +28,79 @@ #include "dsi_display.h" #include "dsi_panel.h" +#ifdef CONFIG_KLAPSE +#include "../sde/klapse.h" +#endif + #define BL_NODE_NAME_SIZE 32 #define BL_STATE_STANDBY BL_CORE_FBBLANK #define BL_STATE_LP BL_CORE_DRIVER1 #define BL_STATE_LP2 BL_CORE_DRIVER2 +#define BL_HBM 1023 + +bool backlight_dimmer = 0; +module_param(backlight_dimmer, bool, 0644); + +static int hbm_enable = 0; +static struct dsi_backlight_config *bl_g; +static struct device *fb0_device; + +static void enable_hbm(int enable) +{ + struct dsi_panel *panel = container_of(bl_g, struct dsi_panel, bl_config); + struct hbm_data *hbm = bl_g->hbm; + struct hbm_range *range = NULL; + u32 target_range = enable ? bl_g->hbm->num_ranges - 1 : 0; + range = hbm->ranges + target_range; + + if (bl_g->bl_device->props.state & BL_CORE_FBBLANK) { + return; + } + + if(dsi_panel_cmd_set_transfer(panel, enable ? &range->entry_cmd : &range->dimming_stop_cmd)) + pr_err("Failed to send command for range %d\n", enable); +} + +static ssize_t hbm_show(struct device *device, struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", hbm_enable); +} + +static ssize_t hbm_store(struct device *device, struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret, val; + + ret = kstrtoint(buf, 0, &val); + if (ret < 0) + return ret; + + if (val < 0 || val > 1) + val = 0; + + hbm_enable = val; + enable_hbm(hbm_enable); + backlight_update_status(bl_g->bl_device); + + return count; +} + +static DEVICE_ATTR_RW(hbm); + +static void fb0_init_device(struct dsi_backlight_config *bl) +{ + bl_g = bl; + fb0_device = device_create(fb_class, NULL, MKDEV(0, 0), NULL, "fb0"); + if (IS_ERR(fb0_device)) { + fb0_device = NULL; + return; + } + + if (device_create_file(fb0_device, &dev_attr_hbm)) + pr_warn("unable to create hbm node\n"); +} struct dsi_backlight_pwm_config { bool pwm_pmi_control; @@ -135,7 +203,7 @@ static u32 dsi_backlight_calculate_normal(struct dsi_backlight_config *bl, /* map UI brightness into driver backlight level rounding it */ rc = dsi_backlight_lerp( 1, bl->brightness_max_level, - bl->bl_min_level ? : 1, bl->bl_max_level, + backlight_dimmer ? 3 : bl->bl_min_level, bl->bl_max_level, brightness, &bl_lvl); if (unlikely(rc)) pr_err("failed to linearly interpolate, brightness unmodified\n"); @@ -385,6 +453,10 @@ static u32 dsi_backlight_calculate(struct dsi_backlight_config *bl, bl_temp = mult_frac(bl_temp, bl->bl_scale_ad, MAX_AD_BL_SCALE_LEVEL); + if (hbm_enable) { + return BL_HBM; + } + if (panel->hbm_mode) bl_lvl = dsi_backlight_calculate_hbm(bl, bl_temp); else @@ -394,6 +466,10 @@ static u32 dsi_backlight_calculate(struct dsi_backlight_config *bl, brightness, bl->bl_scale, bl->bl_scale_ad, bl_lvl, panel->hbm_mode); +#ifdef CONFIG_KLAPSE + set_rgb_slider(bl_lvl); +#endif + return bl_lvl; } @@ -427,9 +503,6 @@ static int dsi_backlight_update_status(struct backlight_device *bd) if (panel->vr_mode && (bl_lvl < bl->bl_vr_min_safe_level)) bl_lvl = bl->bl_vr_min_safe_level; - pr_info("req:%d bl:%d state:0x%x\n", - bd->props.brightness, bl_lvl, bd->props.state); - rc = bl->update_bl(bl, bl_lvl); if (rc) { pr_err("unable to set backlight (%d)\n", rc); @@ -711,6 +784,9 @@ static int dsi_backlight_register(struct dsi_backlight_config *bl) if (sysfs_create_groups(&bl->bl_device->dev.kobj, bl_device_groups)) pr_warn("unable to create device groups\n"); + //make dummy fb0 device so we have the old standard hbm sysfs path + fb0_init_device(bl); + reg = regulator_get(panel->parent, "lab"); if (!PTR_ERR_OR_ZERO(reg)) { pr_info("LAB regulator found\n"); @@ -756,8 +832,6 @@ int dsi_backlight_early_dpms(struct dsi_backlight_config *bl, int power_mode) if (!bd) return 0; - pr_info("power_mode:%d state:0x%0x\n", power_mode, bd->props.state); - mutex_lock(&bl->state_lock); state = get_state_after_dpms(bl, power_mode); diff --git a/drivers/gpu/drm/msm/dsi-staging/dsi_ctrl.c b/drivers/gpu/drm/msm/dsi-staging/dsi_ctrl.c index 6f606eaf82fb..b2731d910799 100644 --- a/drivers/gpu/drm/msm/dsi-staging/dsi_ctrl.c +++ b/drivers/gpu/drm/msm/dsi-staging/dsi_ctrl.c @@ -1837,7 +1837,7 @@ static struct platform_driver dsi_ctrl_driver = { }, }; -#if defined(CONFIG_DEBUG_FS) +#if 0 void dsi_ctrl_debug_dump(void) { diff --git a/drivers/gpu/drm/msm/dsi-staging/dsi_display.c b/drivers/gpu/drm/msm/dsi-staging/dsi_display.c index 664fd31563cd..3bcdcd046c3d 100644 --- a/drivers/gpu/drm/msm/dsi-staging/dsi_display.c +++ b/drivers/gpu/drm/msm/dsi-staging/dsi_display.c @@ -987,7 +987,7 @@ void dsi_display_esd_irq_mode_switch(struct dsi_display *display, cancel_work_sync(&display->esd_irq_work); /* Schedule ESD status check */ - schedule_delayed_work(&conn->status_work, + queue_delayed_work(system_power_efficient_wq, &conn->status_work, msecs_to_jiffies(STATUS_CHECK_INTERVAL_MS)); conn->esd_status_check = true; } diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 41dd815ee8d8..52624617b775 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -772,7 +772,7 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv) kthread_init_worker(&priv->disp_thread[i].worker); priv->disp_thread[i].dev = ddev; priv->disp_thread[i].thread = - kthread_run(kthread_worker_fn, + kthread_run_perf_critical(kthread_worker_fn, &priv->disp_thread[i].worker, "crtc_commit:%d", priv->disp_thread[i].crtc_id); ret = sched_setscheduler(priv->disp_thread[i].thread, @@ -791,7 +791,7 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv) kthread_init_worker(&priv->event_thread[i].worker); priv->event_thread[i].dev = ddev; priv->event_thread[i].thread = - kthread_run(kthread_worker_fn, + kthread_run_perf_critical(kthread_worker_fn, &priv->event_thread[i].worker, "crtc_event:%d", priv->event_thread[i].crtc_id); /** @@ -838,7 +838,7 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv) * other important events. */ kthread_init_worker(&priv->pp_event_worker); - priv->pp_event_thread = kthread_run(kthread_worker_fn, + priv->pp_event_thread = kthread_run_perf_critical(kthread_worker_fn, &priv->pp_event_worker, "pp_event"); ret = sched_setscheduler(priv->pp_event_thread, diff --git a/drivers/gpu/drm/msm/sde/klapse.c b/drivers/gpu/drm/msm/sde/klapse.c new file mode 100644 index 000000000000..7e46ae0434f8 --- /dev/null +++ b/drivers/gpu/drm/msm/sde/klapse.c @@ -0,0 +1,1076 @@ +#include +#include +#include +#include +#include +#include +#include "klapse.h" + +//Add additional headers below here only +#include "sde_hw_color_proc_v4.h" + +/* DEFAULT_ENABLE values : + * 0 = off + * 1 = time-based scaling + * 2 = brightness-based scaling + */ +#define DEFAULT_ENABLE 0 + +// MAX_SCALE : Maximum value of RGB possible +#define MAX_SCALE 256 + +// SCALE_VAL_MIN : Minimum value of RGB recommended +#define SCALE_VAL_MIN 0 + +// MAX_BRIGHTNESS : Maximum value of the display brightness/backlight +#define MAX_BRIGHTNESS 1023 + +// MIN_BRIGHTNESS : Minimum value of the display brightness/backlight +#define MIN_BRIGHTNESS 2 + +/* UPPER_BL_LVL : Initial upper limit for brightness-dependent mode. + * Value <= MAX_BRIGHTNESS && > LOWER_BL_LVL (MUST) + */ +#define UPPER_BL_LVL 200 + +/* LOWER_BL_LVL : Initial lower limit for brightness-dependent mode. + * Value < UPPER_BL_LVL (MUST) + */ +#define LOWER_BL_LVL 2 + +#define LIC "GPLv2" +#define AUT "tanish2k09" +#define VER "4.3" + +MODULE_LICENSE(LIC); +MODULE_AUTHOR(AUT); +MODULE_DESCRIPTION("A simple rgb dynamic lapsing module similar to livedisplay"); +MODULE_VERSION(VER); + + +//Tunables : +static unsigned int daytime_r, daytime_g, daytime_b, target_r, target_g, target_b; +static unsigned int klapse_start_hour, klapse_stop_hour, enable_klapse; +static unsigned int brightness_factor_auto_start_hour, brightness_factor_auto_stop_hour; +static unsigned int brightness_factor; +static unsigned int backlight_lower, backlight_upper; +static unsigned int fadeback_minutes; +static unsigned int pulse_freq; +static bool brightness_factor_auto_enable; +static unsigned int target_minute; // <-- Masked as tunable by using klapse_scaling_rate sysfs name + +/* + *Internal calculation variables : + *WARNING : DO NOT MAKE THEM TUNABLE + */ +static unsigned int b_cache; +static unsigned int current_r, current_g, current_b; +static unsigned int klapse_scaling_rate, active_minutes, last_bl; +static unsigned long local_time; +static struct rtc_time tm; +static struct timeval time; +static struct timer_list pulse_timer; + +// Pulse prototype +static void klapse_pulse(unsigned long data); + +//klapse related functions +static void restart_timer(void) +{ + mod_timer(&pulse_timer, jiffies + msecs_to_jiffies(pulse_freq)); + printk(KERN_INFO "KLapse pulse timer restarted!!!.\n"); +} + +static void flush_timer(void) +{ + if (timer_pending(&pulse_timer)) + mod_timer_pending(&pulse_timer, jiffies); + printk(KERN_INFO "KLapse pulse timer flushed!!!.\n"); +} + +static void calc_active_minutes(void) +{ + bool isPulse = ((enable_klapse == 1) || (brightness_factor_auto_enable == 1)); + + if (isPulse) + flush_timer(); + + if(klapse_start_hour > klapse_stop_hour) + active_minutes = (24 + klapse_stop_hour - klapse_start_hour)*60; + else + active_minutes = (klapse_stop_hour - klapse_start_hour)*60; + + klapse_scaling_rate = (active_minutes*10)/target_minute; + + if (isPulse) + klapse_pulse(0); +} + +static int get_minutes_since_start(void) +{ + int hour, min; + hour = tm.tm_hour - klapse_start_hour; + + if (hour < 0) + hour += 24; + + min = ((hour*60) + tm.tm_min); + return min; +} + +static int get_minutes_before_stop(void) +{ + return (active_minutes - get_minutes_since_start()); +} + +static void set_rgb(int r, int g, int b) +{ + K_RED = r; + K_GREEN = g; + K_BLUE = b; + + current_r = r; + current_g = g; + current_b = b; +} + +static void set_rgb_brightness(int r,int g,int b) +{ + + r = ((r*brightness_factor)/10); + g = ((g*brightness_factor)/10); + b = ((b*brightness_factor)/10); + + if (r < 0) + r = SCALE_VAL_MIN; + else if (r > MAX_SCALE) + r = MAX_SCALE; + if (g < 0) + g = SCALE_VAL_MIN; + else if (g > MAX_SCALE) + g = MAX_SCALE; + if (b < 0) + b = SCALE_VAL_MIN; + else if (b > MAX_SCALE) + b = MAX_SCALE; + + set_rgb(r,g,b); +} + +static bool hour_within_range(int start, int stop, int check) +{ + // The 24-hour system is tricky because 0 comes after 23. + // Handle it here properly + + // Check whether the start hour comes before stop hour. + // if = 1, this would mean they are numerically in order + // if both extremes are same, no time is possible inside so return false. + // else, start hour is actually greater than stop, something like "From 5pm to 7am" + // which translates to "From 17 to 7". It is clear why this could be a problem if not handled. + if (start < stop) { + if ((check >= start) && (check < stop)) + return 1; + else + return 0; + } + else if (start == stop) + return 0; + else { + if ((check < stop) || (check >= start)) + return 1; + else + return 0; + } +} +//klapse calc functions end here. + +// klapse rgb update function +static void klapse_pulse(unsigned long data) +{ + int backtime; + + // Get time + do_gettimeofday(&time); + local_time = (u32)(time.tv_sec - (sys_tz.tz_minuteswest * 60)); + rtc_time_to_tm(local_time, &tm); + + // Check brightness level automation + if ((brightness_factor_auto_enable == 1) && !hour_within_range(brightness_factor_auto_start_hour, brightness_factor_auto_stop_hour, tm.tm_hour)) + { + //Means not in dimmer-time + brightness_factor = 10; + } + else + brightness_factor = b_cache; + + // Check klapse automation, acts as a security measure too + if (enable_klapse == 1) + { + backtime = get_minutes_before_stop(); + + if(hour_within_range(klapse_start_hour, klapse_stop_hour, tm.tm_hour) == 0) //Means not in klapse time period. + { + set_rgb_brightness(daytime_r,daytime_g,daytime_b); + if (!timer_pending(&pulse_timer)) + restart_timer(); + return; + } + else if (backtime > fadeback_minutes) + { + backtime = get_minutes_since_start(); + + // For optimisation, this >= can be turned to an == + // But doing so will break reverse "time jumps" due to clock change + // And a wrong RGB value will be calculated. + if (backtime >= target_minute) + { + current_r = target_r; + current_g = target_g; + current_b = target_b; + } + else if (backtime < target_minute) + { + current_r = daytime_r - (((daytime_r - target_r)*backtime*klapse_scaling_rate)/(active_minutes*10)); + current_g = daytime_g - (((daytime_g - target_g)*backtime*klapse_scaling_rate)/(active_minutes*10)); + current_b = daytime_b - (((daytime_b - target_b)*backtime*klapse_scaling_rate)/(active_minutes*10)); + } + } + else + { + current_r = target_r + (((daytime_r - target_r)*(fadeback_minutes - backtime))/fadeback_minutes); + current_g = target_g + (((daytime_g - target_g)*(fadeback_minutes - backtime))/fadeback_minutes); + current_b = target_b + (((daytime_b - target_b)*(fadeback_minutes - backtime))/fadeback_minutes); + } + + set_rgb_brightness(current_r, current_g, current_b); + } + else + { + set_rgb_brightness(K_RED, K_GREEN, K_BLUE); + } + + if (!timer_pending(&pulse_timer)) + restart_timer(); +} + +// Brightness-based mode +void set_rgb_slider(u32 bl_lvl) +{ + if (bl_lvl >= MIN_BRIGHTNESS) + { + if ((enable_klapse == 2) && (bl_lvl <= MAX_BRIGHTNESS)) + { + if (bl_lvl > backlight_upper) + set_rgb_brightness(daytime_r, daytime_g, daytime_b); + else if (bl_lvl <= backlight_lower) + set_rgb_brightness(target_r, target_g, target_b); + else { + current_r = daytime_r - ((daytime_r - target_r)*(backlight_upper - bl_lvl)/(backlight_upper - backlight_lower)); + current_g = daytime_g - ((daytime_g - target_g)*(backlight_upper - bl_lvl)/(backlight_upper - backlight_lower)); + current_b = daytime_b - ((daytime_b - target_b)*(backlight_upper - bl_lvl)/(backlight_upper - backlight_lower)); + set_rgb_brightness(current_r, current_g, current_b); + } + } + + last_bl = bl_lvl; + } +} + +static void set_enable_klapse(int val) +{ + if ((val <= 2) && (val >= 0)) + { + if ((val == 1) && (enable_klapse != 1)) + { + if (brightness_factor_auto_enable == 0) + { + klapse_pulse(0); + } + } + else if (val == 0) + { + set_rgb_brightness(daytime_r, daytime_g, daytime_b); + current_r = daytime_r; + current_g = daytime_g; + current_b = daytime_b; + + if (brightness_factor_auto_enable == 0) + flush_timer(); + } + else if (enable_klapse == 2) + set_rgb_slider(last_bl); + + enable_klapse = val; + } +} + +//SYSFS node for details : +static ssize_t info_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + size_t count = 0; + + count += snprintf(buf, PAGE_SIZE, "Author : %s\nVersion : %s\nLicense : %s\n", AUT, VER, LIC); + + return count; +} + +static ssize_t info_dump(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + return count; +} + +//SYSFS tunables : +static ssize_t enable_klapse_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + size_t count = 0; + + count += snprintf(buf, PAGE_SIZE, "%d\n", enable_klapse); + + return count; +} + +static ssize_t enable_klapse_dump(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + int tmpval = 0; + + if (kstrtoint(buf, 0, &tmpval) != 0) + return -EINVAL; + + set_enable_klapse(tmpval); + + return count; +} + +static ssize_t daytime_r_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + size_t count = 0; + + count += snprintf(buf, PAGE_SIZE, "%u\n", daytime_r); + + return count; +} + +static ssize_t daytime_r_dump(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned int tmpval = 0; + + if (kstrtouint(buf, 0, &tmpval) != 0) + return -EINVAL; + + if ((tmpval >= (SCALE_VAL_MIN)) && (tmpval <= MAX_SCALE)) + { + if (enable_klapse == 2) + { + daytime_r = tmpval; + set_rgb_slider(last_bl); + } + else if (enable_klapse == 1) + { + flush_timer(); + daytime_r = tmpval; + klapse_pulse(0); + } + else + daytime_r = tmpval; + } + + return count; +} + +static ssize_t daytime_g_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + size_t count = 0; + + count += snprintf(buf, PAGE_SIZE, "%u\n", daytime_g); + + return count; +} + +static ssize_t daytime_g_dump(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned int tmpval = 0; + + if (kstrtouint(buf, 0, &tmpval) != 0) + return -EINVAL; + + if ((tmpval >= (SCALE_VAL_MIN)) && (tmpval <= MAX_SCALE)) + { + if (enable_klapse == 2) + { + daytime_g = tmpval; + set_rgb_slider(last_bl); + } + else if (enable_klapse == 1) + { + flush_timer(); + daytime_g = tmpval; + klapse_pulse(0); + } + else + daytime_g = tmpval; + } + + return count; +} + +static ssize_t daytime_b_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + size_t count = 0; + + count += snprintf(buf, PAGE_SIZE, "%u\n", daytime_b); + + return count; +} + +static ssize_t daytime_b_dump(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned int tmpval = 0; + + if (kstrtouint(buf, 0, &tmpval) != 0) + return -EINVAL; + + if ((tmpval >= (SCALE_VAL_MIN)) && (tmpval <= MAX_SCALE)) + { + if (enable_klapse == 2) + { + daytime_b = tmpval; + set_rgb_slider(last_bl); + } + else if (enable_klapse == 1) + { + flush_timer(); + daytime_b = tmpval; + klapse_pulse(0); + } + else + daytime_b = tmpval; + } + + return count; +} + +static ssize_t target_r_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + size_t count = 0; + + count += snprintf(buf, PAGE_SIZE, "%u\n", target_r); + + return count; +} + +static ssize_t target_r_dump(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned int tmpval = 0; + + if (kstrtouint(buf, 0, &tmpval) != 0) + return -EINVAL; + + if ((tmpval >= (SCALE_VAL_MIN)) && (tmpval <= MAX_SCALE)) + { + if (enable_klapse == 2) + { + target_r = tmpval; + set_rgb_slider(last_bl); + } + else if (enable_klapse == 1) + { + flush_timer(); + target_r = tmpval; + klapse_pulse(0); + } + else + target_r = tmpval; + } + + return count; +} + +static ssize_t target_g_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + size_t count = 0; + + count += snprintf(buf, PAGE_SIZE, "%u\n", target_g); + + return count; +} + +static ssize_t target_g_dump(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned int tmpval = 0; + + if (kstrtouint(buf, 0, &tmpval) != 0) + return -EINVAL; + + if ((tmpval >= (SCALE_VAL_MIN)) && (tmpval <= MAX_SCALE)) + { + if (enable_klapse == 2) + { + target_g = tmpval; + set_rgb_slider(last_bl); + } + else if (enable_klapse == 1) + { + flush_timer(); + target_g = tmpval; + klapse_pulse(0); + } + else + target_g = tmpval; + } + + return count; +} + +static ssize_t target_b_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + size_t count = 0; + + count += snprintf(buf, PAGE_SIZE, "%u\n", target_b); + + return count; +} + +static ssize_t target_b_dump(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned int tmpval = 0; + + if (kstrtouint(buf, 0, &tmpval) != 0) + return -EINVAL; + + if ((tmpval >= (SCALE_VAL_MIN)) && (tmpval <= MAX_SCALE)) + { + if (enable_klapse == 2) + { + target_b = tmpval; + set_rgb_slider(last_bl); + } + else if (enable_klapse == 1) + { + flush_timer(); + target_b = tmpval; + klapse_pulse(0); + } + else + target_b = tmpval; + } + + return count; +} + +static ssize_t klapse_start_hour_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + size_t count = 0; + + count += snprintf(buf, PAGE_SIZE, "%u\n", klapse_start_hour); + + return count; +} + +static ssize_t klapse_start_hour_dump(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned int tmpval = 0; + + if (kstrtouint(buf, 0, &tmpval) != 0) + return -EINVAL; + + if ((tmpval >= 0) && (tmpval < 24) && (tmpval != klapse_stop_hour)) + { + klapse_start_hour = tmpval; + calc_active_minutes(); + } + + return count; +} + +static ssize_t klapse_stop_hour_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + size_t count = 0; + + count += snprintf(buf, PAGE_SIZE, "%u\n", klapse_stop_hour); + + return count; +} + +static ssize_t klapse_stop_hour_dump(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned int tmpval = 0; + + if (kstrtouint(buf, 0, &tmpval) != 0) + return -EINVAL; + + if ((tmpval >= 0) && (tmpval < 24) && (tmpval != klapse_start_hour)) + { + klapse_stop_hour = tmpval; + calc_active_minutes(); + } + + return count; +} + +static ssize_t klapse_scaling_rate_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + size_t count = 0; + + count += snprintf(buf, PAGE_SIZE, "%u\n", target_minute); + + return count; +} + +static ssize_t klapse_scaling_rate_dump(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned int tmpval = 0; + + if (kstrtouint(buf, 0, &tmpval) != 0) + return -EINVAL; + + if ((tmpval > 0) && (tmpval < active_minutes)) + { + target_minute = tmpval; + klapse_scaling_rate = (active_minutes*10)/target_minute; + } + + return count; +} + +static ssize_t brightness_factor_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + size_t count = 0; + + count += snprintf(buf, PAGE_SIZE, "%u\n", b_cache); + + return count; +} + +static ssize_t brightness_factor_dump(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned int tmpval = 0; + + if (kstrtouint(buf, 0, &tmpval) != 0) + return -EINVAL; + + if ((tmpval >= 2) && (tmpval <= 10) && (tmpval != b_cache)) + { + // At this point, the brightness_factor could already have been changed, so to apply new brightness + // the actual brightness RGB values must be restored. + if (brightness_factor_auto_enable == 0) // The auto-brightness is already disabled, so simply reset actual RGB + { + if (enable_klapse == 1) // Pulse is already running, for thread-safety, stop it and then modify RGB + { + flush_timer(); + set_rgb_brightness((K_RED*10)/b_cache, (K_GREEN*10)/b_cache, (K_BLUE*10)/b_cache); + b_cache = tmpval; + klapse_pulse(0); + } + else // Means pulse isn't running + { + set_rgb_brightness((K_RED*10)/b_cache, (K_GREEN*10)/b_cache, (K_BLUE*10)/b_cache); + b_cache = tmpval; + brightness_factor = tmpval; + if (enable_klapse == 2) + set_rgb_slider(last_bl); + else + set_rgb_brightness(K_RED, K_GREEN, K_BLUE); + } + } + else + { + flush_timer(); + set_rgb_brightness((K_RED*10)/brightness_factor, (K_GREEN*10)/brightness_factor, (K_BLUE*10)/brightness_factor); + b_cache = tmpval; + klapse_pulse(0); + } + } + + return count; +} + + +static ssize_t brightness_factor_auto_enable_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + size_t count = 0; + + count += snprintf(buf, PAGE_SIZE, "%u\n", brightness_factor_auto_enable); + + return count; +} + +static ssize_t brightness_factor_auto_enable_dump(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned int tmpval = 0; + + if (kstrtouint(buf, 0, &tmpval) != 0) + return -EINVAL; + + if ((tmpval == 0) || (tmpval == 1)) + { + if (brightness_factor_auto_enable == tmpval) // Do nothing if the same value is entered + return count; + + // At this point, the brightness_factor could already have been changed, so to apply new brightness + // the actual brightness RGB values must be restored. Here, check whether the current RGB have been reduced : + if (brightness_factor != 10) // Guarantess that the brightness_factor was changed + { + if (brightness_factor_auto_enable == 0) // The auto-brightness is already disabled, so simply reset actual RGB. Also implies tmpval is 1 + { + if (enable_klapse == 1) // Pulse is already running, for thread-safety, stop it and then modify RGB + flush_timer(); + + set_rgb_brightness((K_RED*10)/b_cache, (K_GREEN*10)/b_cache, (K_BLUE*10)/b_cache); + brightness_factor_auto_enable = tmpval; + klapse_pulse(0); + return count; + } + else // Guarantees that pulse is on, and RGB is reduced, and tmpval is 0 + { + flush_timer(); + set_rgb_brightness((K_RED*10)/brightness_factor, (K_GREEN*10)/brightness_factor, (K_BLUE*10)/brightness_factor); + brightness_factor_auto_enable = tmpval; + if (enable_klapse == 1) + klapse_pulse(0); + else + set_rgb_brightness(K_RED, K_GREEN, K_BLUE); + return count; + } + } + else // if brightness_factor is 10, auto_enable could be within active range, or it could be off + { + brightness_factor_auto_enable = tmpval; + + if ((tmpval == 1) || (enable_klapse == 1)) // Force restart pulse, if it is to be used + { + flush_timer(); + klapse_pulse(0); + } + else if (tmpval == 0) // Stop pulse anyways + { + flush_timer(); + set_rgb_brightness(K_RED, K_GREEN, K_BLUE); + } + } + } + + return count; +} + +static ssize_t brightness_factor_auto_start_hour_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + size_t count = 0; + + count += snprintf(buf, PAGE_SIZE, "%u\n", brightness_factor_auto_start_hour); + + return count; +} + +static ssize_t brightness_factor_auto_start_hour_dump(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned int tmpval = 0; + + if (kstrtouint(buf, 0, &tmpval) != 0) + return -EINVAL; + + if ((tmpval >= 0) && (tmpval < 24) && (tmpval != brightness_factor_auto_stop_hour)) + { + if ((brightness_factor_auto_enable == 1) || (enable_klapse == 1)) + { + flush_timer(); + brightness_factor_auto_start_hour = tmpval; + klapse_pulse(0); + } + else + brightness_factor_auto_start_hour = tmpval; + } + + return count; +} + +static ssize_t brightness_factor_auto_stop_hour_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + size_t count = 0; + + count += snprintf(buf, PAGE_SIZE, "%u\n", brightness_factor_auto_stop_hour); + + return count; +} + +static ssize_t brightness_factor_auto_stop_hour_dump(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned int tmpval = 0; + + if (kstrtouint(buf, 0, &tmpval) != 0) + return -EINVAL; + + if ((tmpval >= 0) && (tmpval < 24) && (tmpval != brightness_factor_auto_start_hour)) + { + if ((brightness_factor_auto_enable == 1) || (enable_klapse == 1)) + { + flush_timer(); + brightness_factor_auto_stop_hour = tmpval; + klapse_pulse(0); + } + else + brightness_factor_auto_stop_hour = tmpval; + } + + return count; +} + +static ssize_t backlight_range_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + size_t count = 0; + + count += snprintf(buf, PAGE_SIZE, "%u %u\n", backlight_lower, backlight_upper); + + return count; +} + +static ssize_t backlight_range_dump(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned int tmp_l = 0, tmp_u = 0, tmp = 0; + + if (sscanf(buf, "%u %u", &tmp_l, &tmp_u) != 2) + return -EINVAL; + + if ((tmp_l >= MIN_BRIGHTNESS) && (tmp_l <= MAX_BRIGHTNESS) && + (tmp_u >= MIN_BRIGHTNESS) && (tmp_u <= MAX_BRIGHTNESS)) + { + // Swap min and max correct + if (tmp_u < tmp_l) + { + tmp = tmp_u; + tmp_u = tmp_l; + tmp_l = tmp; + } + + backlight_lower = tmp_l; + backlight_upper = tmp_u; + + if (enable_klapse == 2) + set_rgb_slider(last_bl); + } + + return count; +} + +static ssize_t pulse_freq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + size_t count = 0; + + count += snprintf(buf, PAGE_SIZE, "%u\n", pulse_freq); + + return count; +} + +static ssize_t pulse_freq_dump(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned int tmp = 0; + + if (kstrtouint(buf, 0, &tmp) != 0) + return -EINVAL; + + if ((tmp >= 1000) && (tmp <= 10*60000)) + { + pulse_freq = tmp; + flush_timer(); + klapse_pulse(0); + } + + return count; +} + +static ssize_t fadeback_minutes_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + size_t count = 0; + + count += snprintf(buf, PAGE_SIZE, "%u\n", fadeback_minutes); + + return count; +} + +static ssize_t fadeback_minutes_dump(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned int tmp = 0; + + if (kstrtouint(buf, 0, &tmp) != 0) + return -EINVAL; + + if ((tmp >= 0) && (tmp <= active_minutes)) + { + if (enable_klapse == 1) + { + flush_timer(); + fadeback_minutes = tmp; + klapse_pulse(0); + } + else + fadeback_minutes = tmp; + } + + return count; +} + + +static struct kobj_attribute enable_klapse_attribute = + __ATTR(enable_klapse, 0644, enable_klapse_show, enable_klapse_dump); +static struct kobj_attribute daytime_r_attribute = + __ATTR(daytime_r, 0644, daytime_r_show, daytime_r_dump); +static struct kobj_attribute daytime_g_attribute = + __ATTR(daytime_g, 0644, daytime_g_show, daytime_g_dump); +static struct kobj_attribute daytime_b_attribute = + __ATTR(daytime_b, 0644, daytime_b_show, daytime_b_dump); +static struct kobj_attribute target_r_attribute = + __ATTR(target_r, 0644, target_r_show, target_r_dump); +static struct kobj_attribute target_g_attribute = + __ATTR(target_g, 0644, target_g_show, target_g_dump); +static struct kobj_attribute target_b_attribute = + __ATTR(target_b, 0644, target_b_show, target_b_dump); +static struct kobj_attribute klapse_start_hour_attribute = + __ATTR(klapse_start_hour, 0644, klapse_start_hour_show, klapse_start_hour_dump); +static struct kobj_attribute klapse_stop_hour_attribute = + __ATTR(klapse_stop_hour, 0644, klapse_stop_hour_show, klapse_stop_hour_dump); +static struct kobj_attribute klapse_scaling_rate_attribute = + __ATTR(klapse_scaling_rate, 0644, klapse_scaling_rate_show, klapse_scaling_rate_dump); +static struct kobj_attribute brightness_factor_attribute = + __ATTR(brightness_factor, 0644, brightness_factor_show, brightness_factor_dump); +static struct kobj_attribute brightness_factor_auto_attribute = + __ATTR(brightness_factor_auto, 0644, brightness_factor_auto_enable_show, brightness_factor_auto_enable_dump); +static struct kobj_attribute brightness_factor_auto_start_hour_attribute = + __ATTR(brightness_factor_auto_start_hour, 0644, brightness_factor_auto_start_hour_show, brightness_factor_auto_start_hour_dump); +static struct kobj_attribute brightness_factor_auto_stop_hour_attribute = + __ATTR(brightness_factor_auto_stop_hour, 0644, brightness_factor_auto_stop_hour_show, brightness_factor_auto_stop_hour_dump); +static struct kobj_attribute backlight_range_attribute = + __ATTR(backlight_range, 0644, backlight_range_show, backlight_range_dump); +static struct kobj_attribute pulse_freq_attribute = + __ATTR(pulse_freq, 0644, pulse_freq_show, pulse_freq_dump); +static struct kobj_attribute fadeback_minutes_attribute = + __ATTR(fadeback_minutes, 0644, fadeback_minutes_show, fadeback_minutes_dump); +static struct kobj_attribute info_attribute = + __ATTR(info, 0444, info_show, info_dump); + +static struct attribute *attrs[] = { + &enable_klapse_attribute.attr, + &daytime_r_attribute.attr, + &daytime_g_attribute.attr, + &daytime_b_attribute.attr, + &target_r_attribute.attr, + &target_g_attribute.attr, + &target_b_attribute.attr, + &klapse_start_hour_attribute.attr, + &klapse_stop_hour_attribute.attr, + &klapse_scaling_rate_attribute.attr, + &brightness_factor_attribute.attr, + &brightness_factor_auto_attribute.attr, + &brightness_factor_auto_start_hour_attribute.attr, + &brightness_factor_auto_stop_hour_attribute.attr, + &backlight_range_attribute.attr, + &pulse_freq_attribute.attr, + &fadeback_minutes_attribute.attr, + &info_attribute.attr, + NULL, +}; + +static struct attribute_group attr_group = { + .attrs = attrs, +}; + +static struct kobject *klapse_kobj; + +//INIT +static void values_setup(void) +{ + daytime_r = MAX_SCALE; + daytime_g = MAX_SCALE; + daytime_b = MAX_SCALE; + current_r = MAX_SCALE; + current_g = MAX_SCALE; + current_b = MAX_SCALE; + target_r = MAX_SCALE; + target_g = (MAX_SCALE*79)/100; + target_b = (MAX_SCALE*59)/100; + brightness_factor = 10; + b_cache = brightness_factor; + target_minute = 300; + klapse_start_hour = 17; + klapse_stop_hour = 7; + brightness_factor_auto_start_hour = 23; + brightness_factor_auto_stop_hour = 6; + enable_klapse = DEFAULT_ENABLE; + brightness_factor_auto_enable = 0; + backlight_lower = LOWER_BL_LVL; + backlight_upper = UPPER_BL_LVL; + last_bl = 1023; + pulse_freq = 30000; + fadeback_minutes = 60; + calc_active_minutes(); + + do_gettimeofday(&time); + local_time = (u32)(time.tv_sec - (sys_tz.tz_minuteswest * 60)); + rtc_time_to_tm(local_time, &tm); +} + +static int __init klapse_init(void) +{ + int rc; + printk(KERN_INFO "KLapse init entered!!!.\n"); + + values_setup(); + + klapse_kobj = kobject_create_and_add("klapse", NULL); + if (!klapse_kobj) + return -ENOMEM; + + rc = sysfs_create_group(klapse_kobj, &attr_group); + if (rc) + pr_warn("%s: sysfs_create_group failed\n", __func__); + + setup_timer(&pulse_timer, klapse_pulse, 0); + + printk(KERN_INFO "KLapse init returning!!!.\n"); + + return 0; +} + +static void __exit klapse_exit(void){ + printk(KERN_INFO "KLapse exit entered!!!.\n"); + kobject_del(klapse_kobj); + del_timer(&pulse_timer); + printk(KERN_INFO "KLapse exit finished!!!.\n"); +} + +module_init(klapse_init); +module_exit(klapse_exit); diff --git a/drivers/gpu/drm/msm/sde/klapse.h b/drivers/gpu/drm/msm/sde/klapse.h new file mode 100644 index 000000000000..2da625c97b43 --- /dev/null +++ b/drivers/gpu/drm/msm/sde/klapse.h @@ -0,0 +1,18 @@ +#ifndef _LINUX_KLAPSE_H +#define _LINUX_KLAPSE_H + +/* Required variables for external access. Change as per use */ +extern void set_rgb_slider(u32 bl_lvl); + +// This file uses generalised K_### defines +// The interpretation (right argument) should be the respective color's var that you +// include as extern via the CUSTOM_HEADER above +#define K_RED kcal_red +#define K_GREEN kcal_green +#define K_BLUE kcal_blue + +#define K_TYPE unsigned short + +extern K_TYPE K_RED, K_GREEN, K_BLUE; + +#endif /* _LINUX_KLAPSE_H */ diff --git a/drivers/gpu/drm/msm/sde/sde_connector.c b/drivers/gpu/drm/msm/sde/sde_connector.c index d99834472878..365d5adf84e2 100644 --- a/drivers/gpu/drm/msm/sde/sde_connector.c +++ b/drivers/gpu/drm/msm/sde/sde_connector.c @@ -349,7 +349,7 @@ void sde_connector_schedule_status_work(struct drm_connector *connector, c_conn->esd_status_interval : STATUS_CHECK_INTERVAL_MS; /* Schedule ESD status check */ - schedule_delayed_work(&c_conn->status_work, + queue_delayed_work(system_power_efficient_wq, &c_conn->status_work, msecs_to_jiffies(interval)); c_conn->esd_status_check = true; } else { @@ -1804,7 +1804,7 @@ static void sde_connector_check_status_work(struct work_struct *work) /* If debugfs property is not set then take default value */ interval = conn->esd_status_interval ? conn->esd_status_interval : STATUS_CHECK_INTERVAL_MS; - schedule_delayed_work(&conn->status_work, + queue_delayed_work(system_power_efficient_wq, &conn->status_work, msecs_to_jiffies(interval)); return; } diff --git a/drivers/gpu/drm/msm/sde/sde_encoder_phys.h b/drivers/gpu/drm/msm/sde/sde_encoder_phys.h index 4e9430e0d8f8..6e17355b6ce0 100644 --- a/drivers/gpu/drm/msm/sde/sde_encoder_phys.h +++ b/drivers/gpu/drm/msm/sde/sde_encoder_phys.h @@ -369,6 +369,7 @@ struct sde_encoder_phys_cmd { ktime_t rd_ptr_timestamp; atomic_t pending_vblank_cnt; wait_queue_head_t pending_vblank_wq; + struct work_struct ctl_wait_work; }; /** diff --git a/drivers/gpu/drm/msm/sde/sde_encoder_phys_cmd.c b/drivers/gpu/drm/msm/sde/sde_encoder_phys_cmd.c index f06ceb7e5f5c..55f5b7585a11 100644 --- a/drivers/gpu/drm/msm/sde/sde_encoder_phys_cmd.c +++ b/drivers/gpu/drm/msm/sde/sde_encoder_phys_cmd.c @@ -1153,6 +1153,15 @@ static int _sde_encoder_phys_cmd_wait_for_ctl_start( return ret; } +static void sde_encoder_phys_cmd_ctl_start_work(struct work_struct *work) +{ + struct sde_encoder_phys_cmd *cmd_enc = container_of(work, + typeof(*cmd_enc), + ctl_wait_work); + + _sde_encoder_phys_cmd_wait_for_ctl_start(&cmd_enc->base); +} + static int sde_encoder_phys_cmd_wait_for_tx_complete( struct sde_encoder_phys *phys_enc) { @@ -1187,9 +1196,9 @@ static int sde_encoder_phys_cmd_wait_for_commit_done( /* only required for master controller */ if (sde_encoder_phys_cmd_is_master(phys_enc)) - rc = _sde_encoder_phys_cmd_wait_for_ctl_start(phys_enc); + queue_work(system_unbound_wq, &cmd_enc->ctl_wait_work); - if (!rc && sde_encoder_phys_cmd_is_master(phys_enc) && + if (sde_encoder_phys_cmd_is_master(phys_enc) && cmd_enc->autorefresh.cfg.enable) rc = _sde_encoder_phys_cmd_wait_for_autorefresh_done(phys_enc); @@ -1274,6 +1283,9 @@ static void sde_encoder_phys_cmd_prepare_commit( if (!sde_encoder_phys_cmd_is_master(phys_enc)) return; + /* Wait for ctl_start interrupt for the previous commit if needed */ + flush_work(&cmd_enc->ctl_wait_work); + SDE_EVT32(DRMID(phys_enc->parent), phys_enc->intf_idx - INTF_0, cmd_enc->autorefresh.cfg.enable); @@ -1452,6 +1464,7 @@ struct sde_encoder_phys *sde_encoder_phys_cmd_init( init_waitqueue_head(&cmd_enc->pending_vblank_wq); atomic_set(&cmd_enc->autorefresh.kickoff_cnt, 0); init_waitqueue_head(&cmd_enc->autorefresh.kickoff_wq); + INIT_WORK(&cmd_enc->ctl_wait_work, sde_encoder_phys_cmd_ctl_start_work); SDE_DEBUG_CMDENC(cmd_enc, "created\n"); diff --git a/drivers/gpu/drm/msm/sde/sde_hw_catalog.c b/drivers/gpu/drm/msm/sde/sde_hw_catalog.c index 44be5c843ee2..ab81674cc891 100644 --- a/drivers/gpu/drm/msm/sde/sde_hw_catalog.c +++ b/drivers/gpu/drm/msm/sde/sde_hw_catalog.c @@ -1278,6 +1278,7 @@ static int sde_sspp_parse_dt(struct device_node *np, sde_cfg->mdp[j].clk_ctrls[sspp->clk_ctrl].bit_off = PROP_BITVALUE_ACCESS(prop_value, SSPP_CLK_CTRL, i, 1); + sde_cfg->mdp[j].clk_ctrls[sspp->clk_ctrl].val = -1; } SDE_DEBUG( @@ -1704,6 +1705,7 @@ static int sde_wb_parse_dt(struct device_node *np, struct sde_mdss_cfg *sde_cfg) sde_cfg->mdp[j].clk_ctrls[wb->clk_ctrl].bit_off = PROP_BITVALUE_ACCESS(prop_value, WB_CLK_CTRL, i, 1); + sde_cfg->mdp[j].clk_ctrls[wb->clk_ctrl].val = -1; } wb->format_list = sde_cfg->wb_formats; @@ -1890,6 +1892,7 @@ static void _sde_inline_rot_parse_dt(struct device_node *np, sde_cfg->mdp[j].clk_ctrls[index].bit_off = PROP_BITVALUE_ACCESS(prop_value, INLINE_ROT_CLK_CTRL, i, 1); + sde_cfg->mdp[j].clk_ctrls[index].val = -1; } SDE_DEBUG("rot- xin:%d, num:%d, rd:%d, clk:%d:0x%x/%d\n", diff --git a/drivers/gpu/drm/msm/sde/sde_hw_catalog.h b/drivers/gpu/drm/msm/sde/sde_hw_catalog.h index 65919e9bfbf2..4e18df84e591 100644 --- a/drivers/gpu/drm/msm/sde/sde_hw_catalog.h +++ b/drivers/gpu/drm/msm/sde/sde_hw_catalog.h @@ -514,10 +514,12 @@ enum sde_clk_ctrl_type { /* struct sde_clk_ctrl_reg : Clock control register * @reg_off: register offset * @bit_off: bit offset + * @val: current bit value */ struct sde_clk_ctrl_reg { u32 reg_off; u32 bit_off; + int val; }; /* struct sde_mdp_cfg : MDP TOP-BLK instance info diff --git a/drivers/gpu/drm/msm/sde/sde_hw_color_proc_v4.c b/drivers/gpu/drm/msm/sde/sde_hw_color_proc_v4.c index f5aeddf682ef..5e68e51628ce 100644 --- a/drivers/gpu/drm/msm/sde/sde_hw_color_proc_v4.c +++ b/drivers/gpu/drm/msm/sde/sde_hw_color_proc_v4.c @@ -1,4 +1,5 @@ /* Copyright (c) 2017, The Linux Foundation. All rights reserved. + * Copyright (c) 2018, Pal Zoltan Illes (tbalden) - kcal rgb * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -9,10 +10,35 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ +#include #include #include "sde_hw_color_proc_common_v4.h" #include "sde_hw_color_proc_v4.h" +#ifdef CONFIG_KLAPSE +#include "klapse.h" + +unsigned short kcal_red = 256; +unsigned short kcal_green = 256; +unsigned short kcal_blue = 256; +#else +static unsigned short kcal_red = 256; +static unsigned short kcal_green = 256; +static unsigned short kcal_blue = 256; +#endif +static unsigned short kcal_hue = 0; +static unsigned short kcal_sat = 255; +static unsigned short kcal_val = 255; +static unsigned short kcal_cont = 255; + +module_param(kcal_red, short, 0644); +module_param(kcal_green, short, 0644); +module_param(kcal_blue, short, 0644); +module_param(kcal_hue, short, 0644); +module_param(kcal_sat, short, 0644); +module_param(kcal_val, short, 0644); +module_param(kcal_cont, short, 0644); + static int sde_write_3d_gamut(struct sde_hw_blk_reg_map *hw, struct drm_msm_3d_gamut *payload, u32 base, u32 *opcode) @@ -172,13 +198,23 @@ void sde_setup_dspp_pccv4(struct sde_hw_dspp *ctx, void *cfg) struct drm_msm_pcc *pcc_cfg; struct drm_msm_pcc_coeff *coeffs = NULL; int i = 0; + int kcal_min = 60; + int kcal_min_safe = 30; u32 base = 0; + u32 opcode = 0, local_opcode = 0; if (!ctx || !cfg) { DRM_ERROR("invalid param ctx %pK cfg %pK\n", ctx, cfg); return; } + if ((kcal_green < kcal_min_safe) && (kcal_blue < kcal_min_safe) && (kcal_red < kcal_min)) + kcal_red = kcal_min; + if ((kcal_green < kcal_min_safe) && (kcal_red < kcal_min_safe) && (kcal_blue < kcal_min)) + kcal_blue = kcal_min; + if ((kcal_blue < kcal_min_safe) && (kcal_red < kcal_min_safe) && (kcal_green < kcal_min)) + kcal_green = kcal_min; + if (!hw_cfg->payload) { DRM_DEBUG_DRIVER("disable pcc feature\n"); SDE_REG_WRITE(&ctx->hw, ctx->cap->sblk->pcc.base, 0); @@ -192,7 +228,6 @@ void sde_setup_dspp_pccv4(struct sde_hw_dspp *ctx, void *cfg) } pcc_cfg = hw_cfg->payload; - for (i = 0; i < PCC_NUM_PLANES; i++) { base = ctx->cap->sblk->pcc.base + (i * sizeof(u32)); switch (i) { @@ -229,14 +264,47 @@ void sde_setup_dspp_pccv4(struct sde_hw_dspp *ctx, void *cfg) } SDE_REG_WRITE(&ctx->hw, base + PCC_C_OFF, coeffs->c); - SDE_REG_WRITE(&ctx->hw, base + PCC_R_OFF, coeffs->r); - SDE_REG_WRITE(&ctx->hw, base + PCC_G_OFF, coeffs->g); - SDE_REG_WRITE(&ctx->hw, base + PCC_B_OFF, coeffs->b); + + // RED + SDE_REG_WRITE(&ctx->hw, base + PCC_R_OFF, + i == 0 ? (coeffs->r * kcal_red) / 256 : coeffs->r); + // GREEN + SDE_REG_WRITE(&ctx->hw, base + PCC_G_OFF, + i == 1 ? (coeffs->g * kcal_green) / 256 : coeffs->g); + // BLUE + SDE_REG_WRITE(&ctx->hw, base + PCC_B_OFF, + i == 2 ? (coeffs->b * kcal_blue) / 256 : coeffs->b); + SDE_REG_WRITE(&ctx->hw, base + PCC_RG_OFF, coeffs->rg); SDE_REG_WRITE(&ctx->hw, base + PCC_RB_OFF, coeffs->rb); SDE_REG_WRITE(&ctx->hw, base + PCC_GB_OFF, coeffs->gb); SDE_REG_WRITE(&ctx->hw, base + PCC_RGB_OFF, coeffs->rgb); } + opcode = SDE_REG_READ(&ctx->hw, ctx->cap->sblk->hsic.base); + + // HUE + SDE_REG_WRITE(&ctx->hw, ctx->cap->sblk->hsic.base + PA_HUE_OFF, + kcal_hue & PA_HUE_MASK); + local_opcode |= PA_HUE_EN; + + // SATURATION + SDE_REG_WRITE(&ctx->hw, ctx->cap->sblk->hsic.base + PA_SAT_OFF, + kcal_sat & PA_SAT_MASK); + local_opcode |= PA_SAT_EN; + + // VALUE + SDE_REG_WRITE(&ctx->hw, ctx->cap->sblk->hsic.base + PA_VAL_OFF, + kcal_val & PA_VAL_MASK); + local_opcode |= PA_VAL_EN; + + // CONTRAST + SDE_REG_WRITE(&ctx->hw, ctx->cap->sblk->hsic.base + PA_CONT_OFF, + kcal_cont & PA_CONT_MASK); + local_opcode |= PA_CONT_EN; + + opcode |= (local_opcode | PA_EN); + SDE_REG_WRITE(&ctx->hw, ctx->cap->sblk->hsic.base, opcode); + SDE_REG_WRITE(&ctx->hw, ctx->cap->sblk->pcc.base, PCC_EN); } diff --git a/drivers/gpu/drm/msm/sde/sde_hw_dspp.c b/drivers/gpu/drm/msm/sde/sde_hw_dspp.c index 4451301cb4e2..ab8382931886 100644 --- a/drivers/gpu/drm/msm/sde/sde_hw_dspp.c +++ b/drivers/gpu/drm/msm/sde/sde_hw_dspp.c @@ -60,7 +60,7 @@ static void _setup_dspp_ops(struct sde_hw_dspp *c, unsigned long features) c->ops.setup_pcc = sde_setup_dspp_pcc_v1_7; else if (c->cap->sblk->pcc.version == (SDE_COLOR_PROCESS_VER(0x4, 0x0))) { - ret = reg_dmav1_init_dspp_op_v4(i, c->idx); + ret = 1; // reg_dmav1_init_dspp_op_v4(i, c->idx); use the sde one instead, with correct hsic settings possibilities if (!ret) c->ops.setup_pcc = reg_dmav1_setup_dspp_pccv4; diff --git a/drivers/gpu/drm/msm/sde/sde_hw_rot.c b/drivers/gpu/drm/msm/sde/sde_hw_rot.c index facec3d6c076..494f7467c803 100644 --- a/drivers/gpu/drm/msm/sde/sde_hw_rot.c +++ b/drivers/gpu/drm/msm/sde/sde_hw_rot.c @@ -61,16 +61,6 @@ static struct sde_rot_cfg *_rot_offset(enum sde_rot rot, return ERR_PTR(-EINVAL); } -/** - * _sde_hw_rot_reg_dump - perform register dump - * @ptr: private pointer to rotator platform device - * return: None - */ -static void _sde_hw_rot_reg_dump(void *ptr) -{ - sde_rotator_inline_reg_dump((struct platform_device *) ptr); -} - /** * sde_hw_rot_start - start rotator before any commit * @hw: Pointer to rotator hardware driver @@ -88,10 +78,6 @@ static int sde_hw_rot_start(struct sde_hw_rot *hw) pdev = hw->caps->pdev; - rc = sde_dbg_reg_register_cb(hw->name, _sde_hw_rot_reg_dump, pdev); - if (rc) - SDE_ERROR("failed to register debug dump %d\n", rc); - hw->rot_ctx = sde_rotator_inline_open(pdev); if (IS_ERR_OR_NULL(hw->rot_ctx)) { rc = PTR_ERR(hw->rot_ctx); @@ -116,9 +102,6 @@ static void sde_hw_rot_stop(struct sde_hw_rot *hw) sde_rotator_inline_release(hw->rot_ctx); hw->rot_ctx = NULL; - - sde_dbg_reg_unregister_cb(hw->name, _sde_hw_rot_reg_dump, - hw->caps->pdev); } /** diff --git a/drivers/gpu/drm/msm/sde/sde_hw_top.c b/drivers/gpu/drm/msm/sde/sde_hw_top.c index cf646bdbee40..af8f93c5c7a0 100644 --- a/drivers/gpu/drm/msm/sde/sde_hw_top.c +++ b/drivers/gpu/drm/msm/sde/sde_hw_top.c @@ -167,6 +167,7 @@ static void sde_hw_setup_cdm_output(struct sde_hw_mdp *mdp, static bool sde_hw_setup_clk_force_ctrl(struct sde_hw_mdp *mdp, enum sde_clk_ctrl_type clk_ctrl, bool enable) { + struct sde_clk_ctrl_reg *ctrl_reg; struct sde_hw_blk_reg_map *c; u32 reg_off, bit_off; u32 reg_val, new_val; @@ -180,8 +181,12 @@ static bool sde_hw_setup_clk_force_ctrl(struct sde_hw_mdp *mdp, if (clk_ctrl <= SDE_CLK_CTRL_NONE || clk_ctrl >= SDE_CLK_CTRL_MAX) return false; - reg_off = mdp->caps->clk_ctrls[clk_ctrl].reg_off; - bit_off = mdp->caps->clk_ctrls[clk_ctrl].bit_off; + ctrl_reg = (struct sde_clk_ctrl_reg *)&mdp->caps->clk_ctrls[clk_ctrl]; + if (cmpxchg(&ctrl_reg->val, !enable, enable) == enable) + return enable; + + reg_off = ctrl_reg->reg_off; + bit_off = ctrl_reg->bit_off; reg_val = SDE_REG_READ(c, reg_off); @@ -468,11 +473,6 @@ struct sde_hw_mdp *sde_hw_mdptop_init(enum sde_mdp idx, goto blk_init_error; } - sde_dbg_reg_register_dump_range(SDE_DBG_NAME, cfg->name, - mdp->hw.blk_off, mdp->hw.blk_off + mdp->hw.length, - mdp->hw.xin_id); - sde_dbg_set_sde_top_offset(mdp->hw.blk_off); - return mdp; blk_init_error: diff --git a/drivers/gpu/drm/msm/sde/sde_hw_util.c b/drivers/gpu/drm/msm/sde/sde_hw_util.c index 08fe5e191ef5..72591de78f3a 100644 --- a/drivers/gpu/drm/msm/sde/sde_hw_util.c +++ b/drivers/gpu/drm/msm/sde/sde_hw_util.c @@ -76,10 +76,6 @@ void sde_reg_write(struct sde_hw_blk_reg_map *c, u32 val, const char *name) { - /* don't need to mutex protect this */ - if (c->log_mask & sde_hw_util_log_mask) - SDE_DEBUG_DRIVER("[%s:0x%X] <= 0x%X\n", - name, c->blk_off + reg_off, val); writel_relaxed(val, c->base_off + c->blk_off + reg_off); } diff --git a/drivers/gpu/drm/msm/sde/sde_kms.h b/drivers/gpu/drm/msm/sde/sde_kms.h index cabef35b7e22..ae33c6885473 100644 --- a/drivers/gpu/drm/msm/sde/sde_kms.h +++ b/drivers/gpu/drm/msm/sde/sde_kms.h @@ -48,10 +48,7 @@ */ #define SDE_DEBUG(fmt, ...) \ do { \ - if (unlikely(drm_debug & DRM_UT_KMS)) \ - DRM_DEBUG(fmt, ##__VA_ARGS__); \ - else \ - pr_debug(fmt, ##__VA_ARGS__); \ + no_printk(fmt, ##__VA_ARGS__); \ } while (0) /** @@ -60,10 +57,7 @@ */ #define SDE_DEBUG_DRIVER(fmt, ...) \ do { \ - if (unlikely(drm_debug & DRM_UT_DRIVER)) \ - DRM_ERROR(fmt, ##__VA_ARGS__); \ - else \ - pr_debug(fmt, ##__VA_ARGS__); \ + no_printk(fmt, ##__VA_ARGS__); \ } while (0) #define SDE_ERROR(fmt, ...) pr_err("[sde error]" fmt, ##__VA_ARGS__) diff --git a/drivers/gpu/drm/msm/sde/sde_plane.h b/drivers/gpu/drm/msm/sde/sde_plane.h index f924aa692392..b11844e4899b 100644 --- a/drivers/gpu/drm/msm/sde/sde_plane.h +++ b/drivers/gpu/drm/msm/sde/sde_plane.h @@ -96,6 +96,7 @@ struct sde_plane_rot_state { #define SDE_PLANE_DIRTY_PERF 0x8 #define SDE_PLANE_DIRTY_FB_TRANSLATION_MODE 0x10 #define SDE_PLANE_DIRTY_ALL 0xFFFFFFFF +#define SDE_PLANE_DIRTY_QOS 0x200 /** * enum sde_plane_sclcheck_state - User scaler data status diff --git a/drivers/gpu/drm/msm/sde_dbg.h b/drivers/gpu/drm/msm/sde_dbg.h index 9efb893d52e9..f4e743b027ae 100644 --- a/drivers/gpu/drm/msm/sde_dbg.h +++ b/drivers/gpu/drm/msm/sde_dbg.h @@ -158,7 +158,7 @@ extern struct sde_dbg_evtlog *sde_dbg_base_evtlog; #define SDE_DBG_CTRL(...) sde_dbg_ctrl(__func__, ##__VA_ARGS__, \ SDE_DBG_DUMP_DATA_LIMITER) -#if defined(CONFIG_DEBUG_FS) +#if 0 /** * sde_evtlog_init - allocate a new event log object @@ -418,7 +418,7 @@ static inline void sde_dbg_reg_register_dump_range(const char *base_name, { } -void sde_dbg_set_sde_top_offset(u32 blk_off) +static inline void sde_dbg_set_sde_top_offset(u32 blk_off) { } diff --git a/drivers/gpu/drm/msm/sde_rsc.c b/drivers/gpu/drm/msm/sde_rsc.c index 8179b10ec858..119ecdd97649 100644 --- a/drivers/gpu/drm/msm/sde_rsc.c +++ b/drivers/gpu/drm/msm/sde_rsc.c @@ -932,7 +932,7 @@ int sde_rsc_client_vote(struct sde_rsc_client *caller_client, } EXPORT_SYMBOL(sde_rsc_client_vote); -#if defined(CONFIG_DEBUG_FS) +#if 0 void sde_rsc_debug_dump(u32 mux_sel) { struct sde_rsc_priv *rsc; diff --git a/drivers/gpu/msm/Makefile b/drivers/gpu/msm/Makefile index 005822672101..3bb25dee7ff8 100644 --- a/drivers/gpu/msm/Makefile +++ b/drivers/gpu/msm/Makefile @@ -28,16 +28,8 @@ msm_adreno-y += \ adreno_snapshot.o \ adreno_coresight.o \ adreno_trace.o \ - adreno_a3xx.o \ - adreno_a4xx.o \ - adreno_a5xx.o \ adreno_a6xx.o \ - adreno_a3xx_snapshot.o \ - adreno_a4xx_snapshot.o \ - adreno_a5xx_snapshot.o \ adreno_a6xx_snapshot.o \ - adreno_a4xx_preempt.o \ - adreno_a5xx_preempt.o \ adreno_a6xx_preempt.o \ adreno_sysfs.o \ adreno.o \ diff --git a/drivers/gpu/msm/a6xx_reg.h b/drivers/gpu/msm/a6xx_reg.h index 8b96afe8718a..0dc9bc1f0a99 100644 --- a/drivers/gpu/msm/a6xx_reg.h +++ b/drivers/gpu/msm/a6xx_reg.h @@ -1073,5 +1073,16 @@ #define PDC_GPU_TCS3_CMD0_DATA 0x215DB #define PDC_GPU_SEQ_MEM_0 0xA0000 +/* GPU CX_MISC registers */ +#define A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_0 0x1 +#define A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1 0x2 +#define A6XX_LLC_NUM_GPU_SCIDS 5 +#define A6XX_GPU_LLC_SCID_NUM_BITS 5 +#define A6XX_GPU_LLC_SCID_MASK \ + ((1 << (A6XX_LLC_NUM_GPU_SCIDS * A6XX_GPU_LLC_SCID_NUM_BITS)) - 1) +#define A6XX_GPUHTW_LLC_SCID_SHIFT 25 +#define A6XX_GPUHTW_LLC_SCID_MASK \ + (((1 << A6XX_GPU_LLC_SCID_NUM_BITS) - 1) << A6XX_GPUHTW_LLC_SCID_SHIFT) + #endif /* _A6XX_REG_H */ diff --git a/drivers/gpu/msm/adreno-gpulist.h b/drivers/gpu/msm/adreno-gpulist.h index 614b1c7697ef..2b988742fd70 100644 --- a/drivers/gpu/msm/adreno-gpulist.h +++ b/drivers/gpu/msm/adreno-gpulist.h @@ -14,350 +14,6 @@ #define ANY_ID (~0) static const struct adreno_gpu_core adreno_gpulist[] = { - { - .gpurev = ADRENO_REV_A306, - .core = 3, - .major = 0, - .minor = 6, - .patchid = 0x00, - .features = ADRENO_SOFT_FAULT_DETECT, - .pm4fw_name = "a300_pm4.fw", - .pfpfw_name = "a300_pfp.fw", - .gpudev = &adreno_a3xx_gpudev, - .gmem_size = SZ_128K, - .busy_mask = 0x7FFFFFFE, - }, - { - .gpurev = ADRENO_REV_A306A, - .core = 3, - .major = 0, - .minor = 6, - .patchid = 0x20, - .features = ADRENO_SOFT_FAULT_DETECT, - .pm4fw_name = "a300_pm4.fw", - .pfpfw_name = "a300_pfp.fw", - .gpudev = &adreno_a3xx_gpudev, - .gmem_size = SZ_128K, - .busy_mask = 0x7FFFFFFE, - }, - { - .gpurev = ADRENO_REV_A304, - .core = 3, - .major = 0, - .minor = 4, - .patchid = 0x00, - .features = ADRENO_SOFT_FAULT_DETECT, - .pm4fw_name = "a300_pm4.fw", - .pfpfw_name = "a300_pfp.fw", - .gpudev = &adreno_a3xx_gpudev, - .gmem_size = (SZ_64K + SZ_32K), - .busy_mask = 0x7FFFFFFE, - }, - { - .gpurev = ADRENO_REV_A405, - .core = 4, - .major = 0, - .minor = 5, - .patchid = ANY_ID, - .features = ADRENO_SOFT_FAULT_DETECT, - .pm4fw_name = "a420_pm4.fw", - .pfpfw_name = "a420_pfp.fw", - .gpudev = &adreno_a4xx_gpudev, - .gmem_size = SZ_256K, - .busy_mask = 0x7FFFFFFE, - }, - { - .gpurev = ADRENO_REV_A420, - .core = 4, - .major = 2, - .minor = 0, - .patchid = ANY_ID, - .features = ADRENO_USES_OCMEM | ADRENO_WARM_START | - ADRENO_USE_BOOTSTRAP | ADRENO_SOFT_FAULT_DETECT, - .pm4fw_name = "a420_pm4.fw", - .pfpfw_name = "a420_pfp.fw", - .gpudev = &adreno_a4xx_gpudev, - .gmem_size = (SZ_1M + SZ_512K), - .pm4_jt_idx = 0x901, - .pm4_jt_addr = 0x300, - .pfp_jt_idx = 0x401, - .pfp_jt_addr = 0x400, - .pm4_bstrp_size = 0x06, - .pfp_bstrp_size = 0x28, - .pfp_bstrp_ver = 0x4ff083, - .busy_mask = 0x7FFFFFFE, - }, - { - .gpurev = ADRENO_REV_A430, - .core = 4, - .major = 3, - .minor = 0, - .patchid = ANY_ID, - .features = ADRENO_USES_OCMEM | ADRENO_WARM_START | - ADRENO_USE_BOOTSTRAP | ADRENO_SPTP_PC | ADRENO_PPD | - ADRENO_CONTENT_PROTECTION | ADRENO_PREEMPTION | - ADRENO_SOFT_FAULT_DETECT, - .pm4fw_name = "a420_pm4.fw", - .pfpfw_name = "a420_pfp.fw", - .gpudev = &adreno_a4xx_gpudev, - .gmem_size = (SZ_1M + SZ_512K), - .pm4_jt_idx = 0x901, - .pm4_jt_addr = 0x300, - .pfp_jt_idx = 0x401, - .pfp_jt_addr = 0x400, - .pm4_bstrp_size = 0x06, - .pfp_bstrp_size = 0x28, - .pfp_bstrp_ver = 0x4ff083, - .shader_offset = 0x20000, - .shader_size = 0x10000, - .num_protected_regs = 0x18, - .busy_mask = 0x7FFFFFFE, - }, - { - .gpurev = ADRENO_REV_A418, - .core = 4, - .major = 1, - .minor = 8, - .patchid = ANY_ID, - .features = ADRENO_USES_OCMEM | ADRENO_WARM_START | - ADRENO_USE_BOOTSTRAP | ADRENO_SPTP_PC | - ADRENO_SOFT_FAULT_DETECT, - .pm4fw_name = "a420_pm4.fw", - .pfpfw_name = "a420_pfp.fw", - .gpudev = &adreno_a4xx_gpudev, - .gmem_size = (SZ_512K), - .pm4_jt_idx = 0x901, - .pm4_jt_addr = 0x300, - .pfp_jt_idx = 0x401, - .pfp_jt_addr = 0x400, - .pm4_bstrp_size = 0x06, - .pfp_bstrp_size = 0x28, - .pfp_bstrp_ver = 0x4ff083, - .shader_offset = 0x20000, /* SP and TP addresses */ - .shader_size = 0x10000, - .num_protected_regs = 0x18, - .busy_mask = 0x7FFFFFFE, - }, - { - .gpurev = ADRENO_REV_A530, - .core = 5, - .major = 3, - .minor = 0, - .patchid = 0, - .pm4fw_name = "a530v1_pm4.fw", - .pfpfw_name = "a530v1_pfp.fw", - .gpudev = &adreno_a5xx_gpudev, - .gmem_size = SZ_1M, - .num_protected_regs = 0x20, - .busy_mask = 0xFFFFFFFE, - }, - { - .gpurev = ADRENO_REV_A530, - .core = 5, - .major = 3, - .minor = 0, - .patchid = 1, - .features = ADRENO_GPMU | ADRENO_SPTP_PC | ADRENO_LM | - ADRENO_PREEMPTION | ADRENO_64BIT | - ADRENO_CONTENT_PROTECTION, - .pm4fw_name = "a530_pm4.fw", - .pfpfw_name = "a530_pfp.fw", - .zap_name = "a530_zap", - .gpudev = &adreno_a5xx_gpudev, - .gmem_size = SZ_1M, - .num_protected_regs = 0x20, - .gpmufw_name = "a530_gpmu.fw2", - .gpmu_major = 1, - .gpmu_minor = 0, - .busy_mask = 0xFFFFFFFE, - .lm_major = 3, - .lm_minor = 0, - .gpmu_tsens = 0x00060007, - .max_power = 5448, - .regfw_name = "a530v2_seq.fw2", - }, - { - .gpurev = ADRENO_REV_A530, - .core = 5, - .major = 3, - .minor = 0, - .patchid = ANY_ID, - .features = ADRENO_GPMU | ADRENO_SPTP_PC | ADRENO_LM | - ADRENO_PREEMPTION | ADRENO_64BIT | - ADRENO_CONTENT_PROTECTION, - .pm4fw_name = "a530_pm4.fw", - .pfpfw_name = "a530_pfp.fw", - .zap_name = "a530_zap", - .gpudev = &adreno_a5xx_gpudev, - .gmem_size = SZ_1M, - .num_protected_regs = 0x20, - .gpmufw_name = "a530v3_gpmu.fw2", - .gpmu_major = 1, - .gpmu_minor = 0, - .busy_mask = 0xFFFFFFFE, - .lm_major = 1, - .lm_minor = 0, - .gpmu_tsens = 0x00060007, - .max_power = 5448, - .regfw_name = "a530v3_seq.fw2", - }, - { - .gpurev = ADRENO_REV_A504, - .core = 5, - .major = 0, - .minor = 4, - .patchid = ANY_ID, - .features = ADRENO_PREEMPTION | ADRENO_64BIT | - ADRENO_CONTENT_PROTECTION | ADRENO_CPZ_RETENTION, - .pm4fw_name = "a530_pm4.fw", - .pfpfw_name = "a530_pfp.fw", - .zap_name = "a506_zap", - .gpudev = &adreno_a5xx_gpudev, - .gmem_size = (SZ_128K + SZ_8K), - .num_protected_regs = 0x20, - .busy_mask = 0xFFFFFFFE, - }, - { - .gpurev = ADRENO_REV_A505, - .core = 5, - .major = 0, - .minor = 5, - .patchid = ANY_ID, - .features = ADRENO_PREEMPTION | ADRENO_64BIT | - ADRENO_CONTENT_PROTECTION | ADRENO_CPZ_RETENTION, - .pm4fw_name = "a530_pm4.fw", - .pfpfw_name = "a530_pfp.fw", - .zap_name = "a506_zap", - .gpudev = &adreno_a5xx_gpudev, - .gmem_size = (SZ_128K + SZ_8K), - .num_protected_regs = 0x20, - .busy_mask = 0xFFFFFFFE, - }, - { - .gpurev = ADRENO_REV_A506, - .core = 5, - .major = 0, - .minor = 6, - .patchid = ANY_ID, - .features = ADRENO_PREEMPTION | ADRENO_64BIT | - ADRENO_CONTENT_PROTECTION | ADRENO_CPZ_RETENTION, - .pm4fw_name = "a530_pm4.fw", - .pfpfw_name = "a530_pfp.fw", - .zap_name = "a506_zap", - .gpudev = &adreno_a5xx_gpudev, - .gmem_size = (SZ_128K + SZ_8K), - .num_protected_regs = 0x20, - .busy_mask = 0xFFFFFFFE, - }, - { - .gpurev = ADRENO_REV_A510, - .core = 5, - .major = 1, - .minor = 0, - .patchid = ANY_ID, - .pm4fw_name = "a530_pm4.fw", - .pfpfw_name = "a530_pfp.fw", - .gpudev = &adreno_a5xx_gpudev, - .gmem_size = SZ_256K, - .num_protected_regs = 0x20, - .busy_mask = 0xFFFFFFFE, - }, - { - .gpurev = ADRENO_REV_A540, - .core = 5, - .major = 4, - .minor = 0, - .patchid = 0, - .features = ADRENO_PREEMPTION | ADRENO_64BIT | - ADRENO_CONTENT_PROTECTION | - ADRENO_GPMU | ADRENO_SPTP_PC, - .pm4fw_name = "a530_pm4.fw", - .pfpfw_name = "a530_pfp.fw", - .zap_name = "a540_zap", - .gpudev = &adreno_a5xx_gpudev, - .gmem_size = SZ_1M, - .num_protected_regs = 0x20, - .busy_mask = 0xFFFFFFFE, - .gpmufw_name = "a540_gpmu.fw2", - .gpmu_major = 3, - .gpmu_minor = 0, - .gpmu_tsens = 0x000C000D, - .max_power = 5448, - }, - { - .gpurev = ADRENO_REV_A540, - .core = 5, - .major = 4, - .minor = 0, - .patchid = ANY_ID, - .features = ADRENO_PREEMPTION | ADRENO_64BIT | - ADRENO_CONTENT_PROTECTION | - ADRENO_GPMU | ADRENO_SPTP_PC, - .pm4fw_name = "a530_pm4.fw", - .pfpfw_name = "a530_pfp.fw", - .zap_name = "a540_zap", - .gpudev = &adreno_a5xx_gpudev, - .gmem_size = SZ_1M, - .num_protected_regs = 0x20, - .busy_mask = 0xFFFFFFFE, - .gpmufw_name = "a540_gpmu.fw2", - .gpmu_major = 3, - .gpmu_minor = 0, - .gpmu_tsens = 0x000C000D, - .max_power = 5448, - }, - { - .gpurev = ADRENO_REV_A512, - .core = 5, - .major = 1, - .minor = 2, - .patchid = ANY_ID, - .features = ADRENO_PREEMPTION | ADRENO_64BIT | - ADRENO_CONTENT_PROTECTION | ADRENO_CPZ_RETENTION, - .pm4fw_name = "a530_pm4.fw", - .pfpfw_name = "a530_pfp.fw", - .zap_name = "a512_zap", - .gpudev = &adreno_a5xx_gpudev, - .gmem_size = (SZ_256K + SZ_16K), - .num_protected_regs = 0x20, - .busy_mask = 0xFFFFFFFE, - }, - { - .gpurev = ADRENO_REV_A508, - .core = 5, - .major = 0, - .minor = 8, - .patchid = ANY_ID, - .features = ADRENO_PREEMPTION | ADRENO_64BIT | - ADRENO_CONTENT_PROTECTION | ADRENO_CPZ_RETENTION, - .pm4fw_name = "a530_pm4.fw", - .pfpfw_name = "a530_pfp.fw", - .zap_name = "a508_zap", - .gpudev = &adreno_a5xx_gpudev, - .gmem_size = (SZ_128K + SZ_8K), - .num_protected_regs = 0x20, - .busy_mask = 0xFFFFFFFE, - }, - { - .gpurev = ADRENO_REV_A630, - .core = 6, - .major = 3, - .minor = 0, - .patchid = 0, - .features = ADRENO_64BIT | ADRENO_RPMH | - ADRENO_GPMU | ADRENO_CONTENT_PROTECTION | ADRENO_LM, - .sqefw_name = "a630_sqe.fw", - .zap_name = "a630_zap", - .gpudev = &adreno_a6xx_gpudev, - .gmem_size = SZ_1M, - .num_protected_regs = 0x20, - .busy_mask = 0xFFFFFFFE, - .gpmufw_name = "a630_gmu.bin", - .gpmu_major = 0x1, - .gpmu_minor = 0x003, - .gpmu_tsens = 0x000C000D, - .max_power = 5448, - }, { .gpurev = ADRENO_REV_A630, .core = 6, diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index 53f156242797..45640b4b6435 100644 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -59,15 +58,15 @@ MODULE_PARM_DESC(swfdetect, "Enable soft fault detection"); #define DRIVER_VERSION_MAJOR 3 #define DRIVER_VERSION_MINOR 1 -#define KGSL_LOG_LEVEL_DEFAULT 3 +#define KGSL_LOG_LEVEL_DEFAULT 0 -static void adreno_input_work(struct work_struct *work); +static void adreno_pwr_on_work(struct work_struct *work); static unsigned int counter_delta(struct kgsl_device *device, unsigned int reg, unsigned int *counter); static struct devfreq_msm_adreno_tz_data adreno_tz_data = { .bus = { - .max = 350, + .max = 1200, }, .device_id = KGSL_DEVICE_3D0, }; @@ -105,8 +104,6 @@ static struct adreno_device device_3d0 = { .ft_policy = KGSL_FT_DEFAULT_POLICY, .ft_pf_policy = KGSL_FT_PAGEFAULT_DEFAULT_POLICY, .long_ib_detect = 1, - .input_work = __WORK_INITIALIZER(device_3d0.input_work, - adreno_input_work), .pwrctrl_flag = BIT(ADRENO_SPTP_PC_CTRL) | BIT(ADRENO_PPD_CTRL) | BIT(ADRENO_LM_CTRL) | BIT(ADRENO_HWCG_CTRL) | BIT(ADRENO_THROTTLING_CTRL), @@ -120,6 +117,8 @@ static struct adreno_device device_3d0 = { .skipsaverestore = 1, .usesgmem = 1, }, + .pwr_on_work = __WORK_INITIALIZER(device_3d0.pwr_on_work, + adreno_pwr_on_work), }; /* Ptr to array for the current set of fault detect registers */ @@ -141,9 +140,6 @@ static unsigned int adreno_ft_regs_default[] = { /* Nice level for the higher priority GPU start thread */ int adreno_wake_nice = -7; -/* Number of milliseconds to stay active active after a wake on touch */ -unsigned int adreno_wake_timeout = 100; - /** * adreno_readreg64() - Read a 64bit register by getting its offset from the * offset array defined in gpudev node @@ -426,152 +422,17 @@ void adreno_fault_detect_stop(struct adreno_device *adreno_dev) adreno_dev->fast_hang_detect = 0; } -/* - * A workqueue callback responsible for actually turning on the GPU after a - * touch event. kgsl_pwrctrl_change_state(ACTIVE) is used without any - * active_count protection to avoid the need to maintain state. Either - * somebody will start using the GPU or the idle timer will fire and put the - * GPU back into slumber. - */ -static void adreno_input_work(struct work_struct *work) +static void adreno_pwr_on_work(struct work_struct *work) { - struct adreno_device *adreno_dev = container_of(work, - struct adreno_device, input_work); + struct adreno_device *adreno_dev = + container_of(work, typeof(*adreno_dev), pwr_on_work); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); mutex_lock(&device->mutex); - - device->flags |= KGSL_FLAG_WAKE_ON_TOUCH; - - /* - * Don't schedule adreno_start in a high priority workqueue, we are - * already in a workqueue which should be sufficient - */ kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); - - /* - * When waking up from a touch event we want to stay active long enough - * for the user to send a draw command. The default idle timer timeout - * is shorter than we want so go ahead and push the idle timer out - * further for this special case - */ - mod_timer(&device->idle_timer, - jiffies + msecs_to_jiffies(adreno_wake_timeout)); mutex_unlock(&device->mutex); } -/* - * Process input events and schedule work if needed. At this point we are only - * interested in groking EV_ABS touchscreen events - */ -static void adreno_input_event(struct input_handle *handle, unsigned int type, - unsigned int code, int value) -{ - struct kgsl_device *device = handle->handler->private; - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - - /* Only consider EV_ABS (touch) events */ - if (type != EV_ABS) - return; - - /* - * Don't do anything if anything hasn't been rendered since we've been - * here before - */ - - if (device->flags & KGSL_FLAG_WAKE_ON_TOUCH) - return; - - /* - * If the device is in nap, kick the idle timer to make sure that we - * don't go into slumber before the first render. If the device is - * already in slumber schedule the wake. - */ - - if (device->state == KGSL_STATE_NAP) { - /* - * Set the wake on touch bit to keep from coming back here and - * keeping the device in nap without rendering - */ - - device->flags |= KGSL_FLAG_WAKE_ON_TOUCH; - - mod_timer(&device->idle_timer, - jiffies + device->pwrctrl.interval_timeout); - } else if (device->state == KGSL_STATE_SLUMBER) { - schedule_work(&adreno_dev->input_work); - } -} - -#ifdef CONFIG_INPUT -static int adreno_input_connect(struct input_handler *handler, - struct input_dev *dev, const struct input_device_id *id) -{ - struct input_handle *handle; - int ret; - - handle = kzalloc(sizeof(*handle), GFP_KERNEL); - if (handle == NULL) - return -ENOMEM; - - handle->dev = dev; - handle->handler = handler; - handle->name = handler->name; - - ret = input_register_handle(handle); - if (ret) { - kfree(handle); - return ret; - } - - ret = input_open_device(handle); - if (ret) { - input_unregister_handle(handle); - kfree(handle); - } - - return ret; -} - -static void adreno_input_disconnect(struct input_handle *handle) -{ - input_close_device(handle); - input_unregister_handle(handle); - kfree(handle); -} -#else -static int adreno_input_connect(struct input_handler *handler, - struct input_dev *dev, const struct input_device_id *id) -{ - return 0; -} -static void adreno_input_disconnect(struct input_handle *handle) {} -#endif - -/* - * We are only interested in EV_ABS events so only register handlers for those - * input devices that have EV_ABS events - */ -static const struct input_device_id adreno_input_ids[] = { - { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT, - .evbit = { BIT_MASK(EV_ABS) }, - /* assumption: MT_.._X & MT_.._Y are in the same long */ - .absbit = { [BIT_WORD(ABS_MT_POSITION_X)] = - BIT_MASK(ABS_MT_POSITION_X) | - BIT_MASK(ABS_MT_POSITION_Y) }, - }, - { }, -}; - -static struct input_handler adreno_input_handler = { - .event = adreno_input_event, - .connect = adreno_input_connect, - .disconnect = adreno_input_disconnect, - .name = "kgsl", - .id_table = adreno_input_ids, -}; - /* * _soft_reset() - Soft reset GPU * @adreno_dev: Pointer to adreno device @@ -1028,7 +889,7 @@ static void adreno_of_get_initial_pwrlevel(struct adreno_device *adreno_dev, init_level = 1; pwr->active_pwrlevel = init_level; - pwr->default_pwrlevel = init_level; + pwr->default_pwrlevel = 7; } static int adreno_of_get_legacy_pwrlevels(struct adreno_device *adreno_dev, @@ -1172,29 +1033,26 @@ static int adreno_of_get_power(struct adreno_device *adreno_dev, /* get pm-qos-active-latency, set it to default if not found */ if (of_property_read_u32(node, "qcom,pm-qos-active-latency", &device->pwrctrl.pm_qos_active_latency)) - device->pwrctrl.pm_qos_active_latency = 501; + device->pwrctrl.pm_qos_active_latency = 1000; /* get pm-qos-cpu-mask-latency, set it to default if not found */ if (of_property_read_u32(node, "qcom,l2pc-cpu-mask-latency", &device->pwrctrl.pm_qos_cpu_mask_latency)) - device->pwrctrl.pm_qos_cpu_mask_latency = 501; + device->pwrctrl.pm_qos_cpu_mask_latency = 1000; /* get pm-qos-wakeup-latency, set it to default if not found */ if (of_property_read_u32(node, "qcom,pm-qos-wakeup-latency", &device->pwrctrl.pm_qos_wakeup_latency)) - device->pwrctrl.pm_qos_wakeup_latency = 101; + device->pwrctrl.pm_qos_wakeup_latency = 100; if (of_property_read_u32(node, "qcom,idle-timeout", &timeout)) - timeout = 80; + timeout = 64; device->pwrctrl.interval_timeout = msecs_to_jiffies(timeout); device->pwrctrl.bus_control = of_property_read_bool(node, "qcom,bus-control"); - device->pwrctrl.input_disable = of_property_read_bool(node, - "qcom,disable-wake-on-touch"); - return 0; } @@ -1263,6 +1121,22 @@ static void adreno_cx_dbgc_probe(struct kgsl_device *device) KGSL_DRV_WARN(device, "cx_dbgc ioremap failed\n"); } +static void adreno_cx_misc_probe(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct resource *res; + + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, + "cx_misc"); + + if (res == NULL) + return; + + adreno_dev->cx_misc_len = resource_size(res); + adreno_dev->cx_misc_virt = devm_ioremap(device->dev, + res->start, adreno_dev->cx_misc_len); +} + static void adreno_efuse_read_soc_hw_rev(struct adreno_device *adreno_dev) { unsigned int val; @@ -1383,6 +1257,9 @@ static int adreno_probe(struct platform_device *pdev) /* Probe for the optional CX_DBGC block */ adreno_cx_dbgc_probe(device); + /* Probe for the optional CX_MISC block */ + adreno_cx_misc_probe(device); + /* * qcom,iommu-secure-id is used to identify MMUs that can handle secure * content but that is only part of the story - the GPU also has to be @@ -1434,21 +1311,6 @@ static int adreno_probe(struct platform_device *pdev) PTR_ERR(adreno_dev->gpuhtw_llc_slice)); adreno_dev->gpuhtw_llc_slice = NULL; } - -#ifdef CONFIG_INPUT - if (!device->pwrctrl.input_disable) { - adreno_input_handler.private = device; - /* - * It isn't fatal if we cannot register the input handler. Sad, - * perhaps, but not fatal - */ - if (input_register_handler(&adreno_input_handler)) { - adreno_input_handler.private = NULL; - KGSL_DRV_ERR(device, - "Unable to register the input handler\n"); - } - } -#endif out: if (status) { adreno_ringbuffer_close(adreno_dev); @@ -1500,10 +1362,6 @@ static int adreno_remove(struct platform_device *pdev) /* The memory is fading */ _adreno_free_memories(adreno_dev); -#ifdef CONFIG_INPUT - if (adreno_input_handler.private) - input_unregister_handler(&adreno_input_handler); -#endif adreno_sysfs_close(adreno_dev); adreno_coresight_remove(adreno_dev); @@ -1770,13 +1628,6 @@ int adreno_set_unsecured_mode(struct adreno_device *adreno_dev, if (!adreno_is_a5xx(adreno_dev) && !adreno_is_a6xx(adreno_dev)) return -EINVAL; - if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CRITICAL_PACKETS) && - adreno_is_a5xx(adreno_dev)) { - ret = a5xx_critical_packet_submit(adreno_dev, rb); - if (ret) - return ret; - } - /* GPU comes up in secured mode, make it unsecured by default */ if (adreno_dev->zap_loaded) ret = adreno_switch_to_unsecure_mode(adreno_dev, rb); @@ -1854,7 +1705,7 @@ static int _adreno_start(struct adreno_device *adreno_dev) status = kgsl_mmu_start(device); if (status) - goto error_pwr_off; + goto error_boot_oob_clear; _set_secvid(device); @@ -1996,6 +1847,17 @@ static int _adreno_start(struct adreno_device *adreno_dev) } } + if (kgsl_gmu_isenabled(device) && adreno_dev->perfctr_ifpc_lo == 0) { + ret = adreno_perfcounter_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_GPMU_PWR, 4, + &adreno_dev->perfctr_ifpc_lo, NULL, + PERFCOUNTER_FLAG_KERNEL); + if (ret) { + WARN_ONCE(1, "Unable to get perf counter for IFPC\n"); + adreno_dev->perfctr_ifpc_lo = 0; + } + } + /* Clear the busy_data stats - we're starting over from scratch */ adreno_dev->busy_data.gpu_busy = 0; adreno_dev->busy_data.bif_ram_cycles = 0; @@ -2004,6 +1866,7 @@ static int _adreno_start(struct adreno_device *adreno_dev) adreno_dev->busy_data.bif_ram_cycles_write_ch1 = 0; adreno_dev->busy_data.bif_starved_ram = 0; adreno_dev->busy_data.bif_starved_ram_ch1 = 0; + adreno_dev->busy_data.num_ifpc = 0; /* Restore performance counter registers with saved values */ adreno_perfcounter_restore(adreno_dev); @@ -2063,6 +1926,12 @@ static int _adreno_start(struct adreno_device *adreno_dev) error_mmu_off: kgsl_mmu_stop(&device->mmu); +error_boot_oob_clear: + if (gpudev->oob_clear && + ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_HFI_USE_REG)) + gpudev->oob_clear(adreno_dev, + OOB_BOOT_SLUMBER_CLEAR_MASK); + error_pwr_off: /* set the state back to original state */ kgsl_pwrctrl_change_state(device, state); @@ -3035,6 +2904,9 @@ int adreno_spin_idle(struct adreno_device *adreno_dev, unsigned int timeout) if (adreno_isidle(KGSL_DEVICE(adreno_dev))) return 0; + /* relax tight loop */ + cond_resched(); + } while (time_before(jiffies, wait)); /* @@ -3287,6 +3159,54 @@ void adreno_cx_dbgc_regwrite(struct kgsl_device *device, __raw_writel(value, adreno_dev->cx_dbgc_virt + cx_dbgc_offset); } +void adreno_cx_misc_regread(struct adreno_device *adreno_dev, + unsigned int offsetwords, unsigned int *value) +{ + unsigned int cx_misc_offset; + + cx_misc_offset = (offsetwords << 2); + if (!adreno_dev->cx_misc_virt || + (cx_misc_offset >= adreno_dev->cx_misc_len)) + return; + + *value = __raw_readl(adreno_dev->cx_misc_virt + cx_misc_offset); + + /* + * ensure this read finishes before the next one. + * i.e. act like normal readl() + */ + rmb(); +} + +void adreno_cx_misc_regwrite(struct adreno_device *adreno_dev, + unsigned int offsetwords, unsigned int value) +{ + unsigned int cx_misc_offset; + + cx_misc_offset = (offsetwords << 2); + if (!adreno_dev->cx_misc_virt || + (cx_misc_offset >= adreno_dev->cx_misc_len)) + return; + + /* + * ensure previous writes post before this one, + * i.e. act like normal writel() + */ + wmb(); + __raw_writel(value, adreno_dev->cx_misc_virt + cx_misc_offset); +} + +void adreno_cx_misc_regrmw(struct adreno_device *adreno_dev, + unsigned int offsetwords, + unsigned int mask, unsigned int bits) +{ + unsigned int val = 0; + + adreno_cx_misc_regread(adreno_dev, offsetwords, &val); + val &= ~mask; + adreno_cx_misc_regwrite(adreno_dev, offsetwords, val | bits); +} + /** * adreno_waittimestamp - sleep while waiting for the specified timestamp * @device - pointer to a KGSL device structure @@ -3427,6 +3347,47 @@ static int adreno_readtimestamp(struct kgsl_device *device, return status; } +/** + * adreno_device_private_create(): Allocate an adreno_device_private structure + */ +static struct kgsl_device_private *adreno_device_private_create(void) +{ + struct adreno_device_private *adreno_priv = + kzalloc(sizeof(*adreno_priv), GFP_KERNEL); + + if (adreno_priv) { + INIT_LIST_HEAD(&adreno_priv->perfcounter_list); + return &adreno_priv->dev_priv; + } + return NULL; +} + +/** + * adreno_device_private_destroy(): Destroy an adreno_device_private structure + * and release the perfcounters held by the kgsl fd. + * @dev_priv: The kgsl device private structure + */ +static void adreno_device_private_destroy(struct kgsl_device_private *dev_priv) +{ + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_device_private *adreno_priv = + container_of(dev_priv, struct adreno_device_private, + dev_priv); + struct adreno_perfcounter_list_node *p, *tmp; + + mutex_lock(&device->mutex); + list_for_each_entry_safe(p, tmp, &adreno_priv->perfcounter_list, node) { + adreno_perfcounter_put(adreno_dev, p->groupid, + p->countable, PERFCOUNTER_FLAG_NONE); + list_del(&p->node); + kfree(p); + } + mutex_unlock(&device->mutex); + + kfree(adreno_priv); +} + static inline s64 adreno_ticks_to_us(u32 ticks, u32 freq) { freq /= 1000000; @@ -3514,6 +3475,17 @@ static void adreno_power_stats(struct kgsl_device *device, stats->ram_time = ram_cycles; stats->ram_wait = starved_ram; } + + if (adreno_dev->perfctr_ifpc_lo != 0) { + uint32_t num_ifpc; + + num_ifpc = counter_delta(device, adreno_dev->perfctr_ifpc_lo, + &busy->num_ifpc); + adreno_dev->ifpc_count += num_ifpc; + if (num_ifpc > 0) + trace_adreno_ifpc_count(adreno_dev->ifpc_count); + } + if (adreno_dev->lm_threshold_count && gpudev->count_throttles) gpudev->count_throttles(adreno_dev, adj); @@ -3614,6 +3586,9 @@ static void adreno_iommu_sync(struct kgsl_device *device, bool sync) struct scm_desc desc = {0}; int ret; + if (!ADRENO_QUIRK(ADRENO_DEVICE(device), ADRENO_QUIRK_IOMMU_SYNC)) + return; + if (sync == true) { mutex_lock(&kgsl_mmu_sync); desc.args[0] = true; @@ -3630,25 +3605,29 @@ static void adreno_iommu_sync(struct kgsl_device *device, bool sync) } } -static void _regulator_disable(struct kgsl_regulator *regulator, bool poll) +static void +_regulator_disable(struct kgsl_regulator *regulator, unsigned int timeout) { - unsigned long wait_time = jiffies + msecs_to_jiffies(200); + unsigned long wait_time; if (IS_ERR_OR_NULL(regulator->reg)) return; regulator_disable(regulator->reg); - if (poll == false) - return; + wait_time = jiffies + msecs_to_jiffies(timeout); + /* Poll for regulator status to ensure it's OFF */ while (!time_after(jiffies, wait_time)) { if (!regulator_is_enabled(regulator->reg)) return; - cpu_relax(); + usleep_range(10, 100); } - KGSL_CORE_ERR("regulator '%s' still on after 200ms\n", regulator->name); + if (!regulator_is_enabled(regulator->reg)) + return; + + KGSL_CORE_ERR("regulator '%s' disable timed out\n", regulator->name); } static void adreno_regulator_disable_poll(struct kgsl_device *device) @@ -3656,18 +3635,13 @@ static void adreno_regulator_disable_poll(struct kgsl_device *device) struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct kgsl_pwrctrl *pwr = &device->pwrctrl; int i; - - /* Fast path - hopefully we don't need this quirk */ - if (!ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_IOMMU_SYNC)) { - for (i = KGSL_MAX_REGULATORS - 1; i >= 0; i--) - _regulator_disable(&pwr->regulators[i], false); - return; - } + unsigned int timeout = + ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_IOMMU_SYNC) ? 200 : 5000; adreno_iommu_sync(device, true); - for (i = 0; i < KGSL_MAX_REGULATORS; i++) - _regulator_disable(&pwr->regulators[i], true); + for (i = KGSL_MAX_REGULATORS - 1; i >= 0; i--) + _regulator_disable(&pwr->regulators[i], timeout); adreno_iommu_sync(device, false); } @@ -3721,8 +3695,9 @@ static const struct kgsl_functable adreno_functable = { .snapshot = adreno_snapshot, .irq_handler = adreno_irq_handler, .drain = adreno_drain, + .device_private_create = adreno_device_private_create, + .device_private_destroy = adreno_device_private_destroy, /* Optional functions */ - .snapshot_gmu = adreno_snapshot_gmu, .drawctxt_create = adreno_drawctxt_create, .drawctxt_detach = adreno_drawctxt_detach, .drawctxt_destroy = adreno_drawctxt_destroy, @@ -3788,7 +3763,7 @@ static struct platform_driver kgsl_bus_platform_driver = { } }; -static int __init kgsl_3d_init(void) +static int __kgsl_3d_init(void *arg) { int ret; @@ -3803,6 +3778,16 @@ static int __init kgsl_3d_init(void) return ret; } +static int __init kgsl_3d_init(void) +{ + struct task_struct *kgsl_3d_init_task = + kthread_run(__kgsl_3d_init, NULL, "kgsl_3d_init"); + if (IS_ERR(kgsl_3d_init_task)) + return PTR_ERR(kgsl_3d_init_task); + else + return 0; +} + static void __exit kgsl_3d_exit(void) { platform_driver_unregister(&adreno_platform_driver); diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h index 5cc0274b9631..0215041c836c 100644 --- a/drivers/gpu/msm/adreno.h +++ b/drivers/gpu/msm/adreno.h @@ -315,6 +315,7 @@ struct adreno_busy_data { unsigned int bif_ram_cycles_write_ch1; unsigned int bif_starved_ram; unsigned int bif_starved_ram_ch1; + unsigned int num_ifpc; unsigned int throttle_cycles[ADRENO_GPMU_THROTTLE_COUNTERS]; }; @@ -332,6 +333,29 @@ struct adreno_firmware { struct kgsl_memdesc memdesc; }; +/** + * struct adreno_perfcounter_list_node - struct to store perfcounters + * allocated by a process on a kgsl fd. + * @groupid: groupid of the allocated perfcounter + * @countable: countable assigned to the allocated perfcounter + * @node: list node for perfcounter_list of a process + */ +struct adreno_perfcounter_list_node { + unsigned int groupid; + unsigned int countable; + struct list_head node; +}; + +/** + * struct adreno_device_private - Adreno private structure per fd + * @dev_priv: the kgsl device private structure + * @perfcounter_list: list of perfcounters used by the process + */ +struct adreno_device_private { + struct kgsl_device_private dev_priv; + struct list_head perfcounter_list; +}; + /** * struct adreno_gpu_core - A specific GPU core definition * @gpurev: Unique GPU revision identifier @@ -411,6 +435,8 @@ enum gpu_coresight_sources { * @chipid: Chip ID specific to the GPU * @gmem_base: Base physical address of GMEM * @gmem_size: GMEM size + * @cx_misc_len: Length of the CX MISC register block + * @cx_misc_virt: Pointer where the CX MISC block is mapped * @gpucore: Pointer to the adreno_gpu_core structure * @pfp_fw: Buffer which holds the pfp ucode * @pfp_fw_size: Size of pfp ucode buffer @@ -436,7 +462,7 @@ enum gpu_coresight_sources { * @dispatcher: Container for adreno GPU dispatcher * @pwron_fixup: Command buffer to run a post-power collapse shader workaround * @pwron_fixup_dwords: Number of dwords in the command buffer - * @input_work: Work struct for turning on the GPU after a touch event + * @pwr_on_work: Work struct for turning on the GPU * @busy_data: Struct holding GPU VBIF busy stats * @ram_cycles_lo: Number of DDR clock cycles for the monitor session (Only * DDR channel 0 read cycles in case of GBIF) @@ -450,6 +476,7 @@ enum gpu_coresight_sources { * stall cycles in case of GBIF) * @starved_ram_lo_ch1: Number of cycles GBIF is stalled by DDR channel 1 * @perfctr_pwr_lo: GPU busy cycles + * @perfctr_ifpc_lo: IFPC count * @halt: Atomic variable to check whether the GPU is currently halted * @pending_irq_refcnt: Atomic variable to keep track of running IRQ handlers * @ctx_d_debugfs: Context debugfs node @@ -467,6 +494,7 @@ enum gpu_coresight_sources { * @lm_limit: limiting value for LM * @lm_threshold_count: register value for counter for lm threshold breakin * @lm_threshold_cross: number of current peaks exceeding threshold + * @ifpc_count: Number of times the GPU went into IFPC * @speed_bin: Indicate which power level set to use * @csdev: Pointer to a coresight device (if applicable) * @gpmu_throttle_counters - counteers for number of throttled clocks @@ -489,6 +517,8 @@ struct adreno_device { unsigned long cx_dbgc_base; unsigned int cx_dbgc_len; void __iomem *cx_dbgc_virt; + unsigned int cx_misc_len; + void __iomem *cx_misc_virt; const struct adreno_gpu_core *gpucore; struct adreno_firmware fw[2]; size_t gpmu_cmds_size; @@ -507,7 +537,7 @@ struct adreno_device { struct adreno_dispatcher dispatcher; struct kgsl_memdesc pwron_fixup; unsigned int pwron_fixup_dwords; - struct work_struct input_work; + struct work_struct pwr_on_work; struct adreno_busy_data busy_data; unsigned int ram_cycles_lo; unsigned int ram_cycles_lo_ch1_read; @@ -516,6 +546,7 @@ struct adreno_device { unsigned int starved_ram_lo; unsigned int starved_ram_lo_ch1; unsigned int perfctr_pwr_lo; + unsigned int perfctr_ifpc_lo; atomic_t halt; atomic_t pending_irq_refcnt; struct dentry *ctx_d_debugfs; @@ -535,6 +566,7 @@ struct adreno_device { uint32_t lm_limit; uint32_t lm_threshold_count; uint32_t lm_threshold_cross; + uint32_t ifpc_count; unsigned int speed_bin; unsigned int quirks; @@ -924,7 +956,6 @@ struct adreno_gpudev { /* GPU specific function hooks */ void (*irq_trace)(struct adreno_device *, unsigned int status); void (*snapshot)(struct adreno_device *, struct kgsl_snapshot *); - void (*snapshot_gmu)(struct adreno_device *, struct kgsl_snapshot *); void (*platform_setup)(struct adreno_device *); void (*init)(struct adreno_device *); void (*remove)(struct adreno_device *); @@ -1068,13 +1099,9 @@ extern unsigned int *adreno_ft_regs; extern unsigned int adreno_ft_regs_num; extern unsigned int *adreno_ft_regs_val; -extern struct adreno_gpudev adreno_a3xx_gpudev; -extern struct adreno_gpudev adreno_a4xx_gpudev; -extern struct adreno_gpudev adreno_a5xx_gpudev; extern struct adreno_gpudev adreno_a6xx_gpudev; extern int adreno_wake_nice; -extern unsigned int adreno_wake_timeout; int adreno_start(struct kgsl_device *device, int priority); int adreno_soft_reset(struct kgsl_device *device); @@ -1106,9 +1133,6 @@ void adreno_snapshot(struct kgsl_device *device, struct kgsl_snapshot *snapshot, struct kgsl_context *context); -void adreno_snapshot_gmu(struct kgsl_device *device, - struct kgsl_snapshot *snapshot); - int adreno_reset(struct kgsl_device *device, int fault); void adreno_fault_skipcmd_detached(struct adreno_device *adreno_dev, @@ -1154,17 +1178,24 @@ void adreno_cx_dbgc_regread(struct kgsl_device *adreno_device, void adreno_cx_dbgc_regwrite(struct kgsl_device *device, unsigned int offsetwords, unsigned int value); u32 adreno_get_ucode_version(const u32 *data); +void adreno_cx_misc_regread(struct adreno_device *adreno_dev, + unsigned int offsetwords, unsigned int *value); +void adreno_cx_misc_regwrite(struct adreno_device *adreno_dev, + unsigned int offsetwords, unsigned int value); +void adreno_cx_misc_regrmw(struct adreno_device *adreno_dev, + unsigned int offsetwords, + unsigned int mask, unsigned int bits); + #define ADRENO_TARGET(_name, _id) \ static inline int adreno_is_##_name(struct adreno_device *adreno_dev) \ { \ - return (ADRENO_GPUREV(adreno_dev) == (_id)); \ + return 0; \ } static inline int adreno_is_a3xx(struct adreno_device *adreno_dev) { - return ((ADRENO_GPUREV(adreno_dev) >= 300) && - (ADRENO_GPUREV(adreno_dev) < 400)); + return 0; } ADRENO_TARGET(a304, ADRENO_REV_A304) @@ -1179,28 +1210,24 @@ ADRENO_TARGET(a330, ADRENO_REV_A330) static inline int adreno_is_a330v2(struct adreno_device *adreno_dev) { - return ((ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A330) && - (ADRENO_CHIPID_PATCH(adreno_dev->chipid) > 0)); + return 0; } static inline int adreno_is_a330v21(struct adreno_device *adreno_dev) { - return ((ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A330) && - (ADRENO_CHIPID_PATCH(adreno_dev->chipid) > 0xF)); + return 0; } static inline int adreno_is_a4xx(struct adreno_device *adreno_dev) { - return ADRENO_GPUREV(adreno_dev) >= 400 && - ADRENO_GPUREV(adreno_dev) < 500; + return 0; } ADRENO_TARGET(a405, ADRENO_REV_A405); static inline int adreno_is_a405v2(struct adreno_device *adreno_dev) { - return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A405) && - (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 0x10); + return 0; } ADRENO_TARGET(a418, ADRENO_REV_A418) @@ -1209,14 +1236,12 @@ ADRENO_TARGET(a430, ADRENO_REV_A430) static inline int adreno_is_a430v2(struct adreno_device *adreno_dev) { - return ((ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A430) && - (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 1)); + return 0; } static inline int adreno_is_a5xx(struct adreno_device *adreno_dev) { - return ADRENO_GPUREV(adreno_dev) >= 500 && - ADRENO_GPUREV(adreno_dev) < 600; + return 0; } ADRENO_TARGET(a504, ADRENO_REV_A504) @@ -1230,60 +1255,55 @@ ADRENO_TARGET(a540, ADRENO_REV_A540) static inline int adreno_is_a530v1(struct adreno_device *adreno_dev) { - return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A530) && - (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 0); + return 0; } static inline int adreno_is_a530v2(struct adreno_device *adreno_dev) { - return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A530) && - (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 1); + return 0; } static inline int adreno_is_a530v3(struct adreno_device *adreno_dev) { - return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A530) && - (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 2); + return 0; } static inline int adreno_is_a504_to_a506(struct adreno_device *adreno_dev) { - return ADRENO_GPUREV(adreno_dev) >= 504 && - ADRENO_GPUREV(adreno_dev) <= 506; + return 0; } static inline int adreno_is_a540v1(struct adreno_device *adreno_dev) { - return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A540) && - (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 0); + return 0; } static inline int adreno_is_a540v2(struct adreno_device *adreno_dev) { - return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A540) && - (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 1); + return 0; } static inline int adreno_is_a6xx(struct adreno_device *adreno_dev) { - return ADRENO_GPUREV(adreno_dev) >= 600 && - ADRENO_GPUREV(adreno_dev) < 700; + return 1; +} + +static inline int adreno_is_a630(struct adreno_device *adreno_dev) +{ + return 1; } ADRENO_TARGET(a615, ADRENO_REV_A615) ADRENO_TARGET(a616, ADRENO_REV_A616) -ADRENO_TARGET(a630, ADRENO_REV_A630) static inline int adreno_is_a630v1(struct adreno_device *adreno_dev) { - return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A630) && - (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 0); + return (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 0); } static inline int adreno_is_a630v2(struct adreno_device *adreno_dev) { - return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A630) && - (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 1); + return (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 1); } /* @@ -1885,8 +1905,11 @@ static inline int adreno_perfcntr_active_oob_get( ret = gpudev->oob_set(adreno_dev, OOB_PERFCNTR_SET_MASK, OOB_PERFCNTR_CHECK_MASK, OOB_PERFCNTR_CLEAR_MASK); - if (ret) + if (ret) { + adreno_set_gpu_fault(adreno_dev, ADRENO_GMU_FAULT); + adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev)); kgsl_active_count_put(KGSL_DEVICE(adreno_dev)); + } } return ret; diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c index 4f9891278118..e1f32e8c1419 100644 --- a/drivers/gpu/msm/adreno_a3xx.c +++ b/drivers/gpu/msm/adreno_a3xx.c @@ -156,6 +156,12 @@ static void a3xx_efuse_speed_bin(struct adreno_device *adreno_dev) unsigned int speed_bin[3]; struct kgsl_device *device = &adreno_dev->dev; + if (of_get_property(device->pdev->dev.of_node, + "qcom,gpu-speed-bin-vectors", NULL)) { + adreno_efuse_speed_bin_array(adreno_dev); + return; + } + if (of_property_read_u32_array(device->pdev->dev.of_node, "qcom,gpu-speed-bin", speed_bin, 3)) return; diff --git a/drivers/gpu/msm/adreno_a5xx.c b/drivers/gpu/msm/adreno_a5xx.c index 118f7017fd0b..91fb01104ff4 100644 --- a/drivers/gpu/msm/adreno_a5xx.c +++ b/drivers/gpu/msm/adreno_a5xx.c @@ -2248,7 +2248,7 @@ static int a5xx_microcode_load(struct adreno_device *adreno_dev) desc.args[1] = 13; desc.arginfo = SCM_ARGS(2); - ret = scm_call2(SCM_SIP_FNID(SCM_SVC_BOOT, 0xA), &desc); + ret = scm_call2_atomic(SCM_SIP_FNID(SCM_SVC_BOOT, 0xA), &desc); if (ret) { pr_err("SCM resume call failed with error %d\n", ret); return ret; @@ -2284,6 +2284,7 @@ static int _me_init_ucode_workarounds(struct adreno_device *adreno_dev) switch (ADRENO_GPUREV(adreno_dev)) { case ADRENO_REV_A510: return 0x00000001; /* Ucode workaround for token end syncs */ + case ADRENO_REV_A504: case ADRENO_REV_A505: case ADRENO_REV_A506: case ADRENO_REV_A530: diff --git a/drivers/gpu/msm/adreno_a5xx_snapshot.c b/drivers/gpu/msm/adreno_a5xx_snapshot.c index 4bde8c6fbbae..0de422735b77 100644 --- a/drivers/gpu/msm/adreno_a5xx_snapshot.c +++ b/drivers/gpu/msm/adreno_a5xx_snapshot.c @@ -363,9 +363,9 @@ static const unsigned int a5xx_registers[] = { 0x04E0, 0x04F4, 0X04F8, 0x0529, 0x0531, 0x0533, 0x0540, 0x0555, 0xF400, 0xF400, 0xF800, 0xF807, /* CP */ - 0x0800, 0x081A, 0x081F, 0x0841, 0x0860, 0x0860, 0x0880, 0x08A0, - 0x0B00, 0x0B12, 0x0B15, 0X0B1C, 0X0B1E, 0x0B28, 0x0B78, 0x0B7F, - 0x0BB0, 0x0BBD, + 0x0800, 0x0803, 0x0806, 0x081A, 0x081F, 0x0841, 0x0860, 0x0860, + 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0X0B1C, 0X0B1E, 0x0B28, + 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, /* VSC */ 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53, 0x0C60, 0x0C61, /* GRAS */ diff --git a/drivers/gpu/msm/adreno_a6xx.c b/drivers/gpu/msm/adreno_a6xx.c index 4d7e21291191..d310da2fa174 100644 --- a/drivers/gpu/msm/adreno_a6xx.c +++ b/drivers/gpu/msm/adreno_a6xx.c @@ -32,17 +32,6 @@ #define MIN_HBB 13 -#define A6XX_LLC_NUM_GPU_SCIDS 5 -#define A6XX_GPU_LLC_SCID_NUM_BITS 5 -#define A6XX_GPU_LLC_SCID_MASK \ - ((1 << (A6XX_LLC_NUM_GPU_SCIDS * A6XX_GPU_LLC_SCID_NUM_BITS)) - 1) -#define A6XX_GPUHTW_LLC_SCID_SHIFT 25 -#define A6XX_GPUHTW_LLC_SCID_MASK \ - (((1 << A6XX_GPU_LLC_SCID_NUM_BITS) - 1) << A6XX_GPUHTW_LLC_SCID_SHIFT) - -#define A6XX_GPU_CX_REG_BASE 0x509E000 -#define A6XX_GPU_CX_REG_SIZE 0x1000 - #define GPU_LIMIT_THRESHOLD_ENABLE BIT(31) static int _load_gmu_firmware(struct kgsl_device *device); @@ -994,6 +983,7 @@ static void _set_ordinals(struct adreno_device *adreno_dev, static int a6xx_send_cp_init(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb) { + struct kgsl_device *device = KGSL_DEVICE(adreno_dev); unsigned int *cmds; int ret; @@ -1006,10 +996,17 @@ static int a6xx_send_cp_init(struct adreno_device *adreno_dev, _set_ordinals(adreno_dev, cmds, 11); ret = adreno_ringbuffer_submit_spin(rb, NULL, 2000); - if (ret) + if (ret) { adreno_spin_idle_debug(adreno_dev, "CP initialization failed to idle\n"); + if (!adreno_is_a3xx(adreno_dev)) + kgsl_sharedmem_writel(device, &device->scratch, + SCRATCH_RPTR_OFFSET(rb->id), 0); + rb->wptr = 0; + rb->_wptr = 0; + } + return ret; } @@ -2378,7 +2375,7 @@ static int a6xx_rpmh_gpu_pwrctrl(struct adreno_device *adreno_dev, static int a6xx_reset(struct kgsl_device *device, int fault) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - int ret = -EINVAL; + int ret; int i = 0; /* Use the regular reset sequence for No GMU */ @@ -2424,14 +2421,7 @@ static int a6xx_reset(struct kgsl_device *device, int fault) /* since device is officially off now clear start bit */ clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv); - /* Keep trying to start the device until it works */ - for (i = 0; i < NUM_TIMES_RESET_RETRY; i++) { - ret = adreno_start(device, 0); - if (!ret) - break; - - msleep(20); - } + ret = adreno_start(device, 0); } clear_bit(ADRENO_DEVICE_HARD_RESET, &adreno_dev->priv); @@ -2439,20 +2429,16 @@ static int a6xx_reset(struct kgsl_device *device, int fault) if (ret) return ret; - if (i != 0) - KGSL_DRV_WARN(device, "Device hard reset tried %d tries\n", i); + kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); /* - * If active_cnt is non-zero then the system was active before - * going into a reset - put it back in that state + * If active_cnt is zero, there is no need to keep the GPU active. So, + * we should transition to SLUMBER. */ + if (!atomic_read(&device->active_cnt)) + kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER); - if (atomic_read(&device->active_cnt)) - kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); - else - kgsl_pwrctrl_change_state(device, KGSL_STATE_NAP); - - return ret; + return 0; } static void a6xx_cp_hw_err_callback(struct adreno_device *adreno_dev, int bit) @@ -2523,24 +2509,6 @@ static void a6xx_err_callback(struct adreno_device *adreno_dev, int bit) } } -/* GPU System Cache control registers */ -#define A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_0 0x4 -#define A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1 0x8 - -static inline void _reg_rmw(void __iomem *regaddr, - unsigned int mask, unsigned int bits) -{ - unsigned int val = 0; - - val = __raw_readl(regaddr); - /* Make sure the above read completes before we proceed */ - rmb(); - val &= ~mask; - __raw_writel(val | bits, regaddr); - /* Make sure the above write posts before we proceed*/ - wmb(); -} - /* * a6xx_llc_configure_gpu_scid() - Program the sub-cache ID for all GPU blocks * @adreno_dev: The adreno device pointer @@ -2550,17 +2518,15 @@ static void a6xx_llc_configure_gpu_scid(struct adreno_device *adreno_dev) uint32_t gpu_scid; uint32_t gpu_cntl1_val = 0; int i; - void __iomem *gpu_cx_reg; gpu_scid = adreno_llc_get_scid(adreno_dev->gpu_llc_slice); for (i = 0; i < A6XX_LLC_NUM_GPU_SCIDS; i++) gpu_cntl1_val = (gpu_cntl1_val << A6XX_GPU_LLC_SCID_NUM_BITS) | gpu_scid; - gpu_cx_reg = ioremap(A6XX_GPU_CX_REG_BASE, A6XX_GPU_CX_REG_SIZE); - _reg_rmw(gpu_cx_reg + A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1, + adreno_cx_misc_regrmw(adreno_dev, + A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1, A6XX_GPU_LLC_SCID_MASK, gpu_cntl1_val); - iounmap(gpu_cx_reg); } /* @@ -2570,15 +2536,13 @@ static void a6xx_llc_configure_gpu_scid(struct adreno_device *adreno_dev) static void a6xx_llc_configure_gpuhtw_scid(struct adreno_device *adreno_dev) { uint32_t gpuhtw_scid; - void __iomem *gpu_cx_reg; gpuhtw_scid = adreno_llc_get_scid(adreno_dev->gpuhtw_llc_slice); - gpu_cx_reg = ioremap(A6XX_GPU_CX_REG_BASE, A6XX_GPU_CX_REG_SIZE); - _reg_rmw(gpu_cx_reg + A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1, + adreno_cx_misc_regrmw(adreno_dev, + A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1, A6XX_GPUHTW_LLC_SCID_MASK, gpuhtw_scid << A6XX_GPUHTW_LLC_SCID_SHIFT); - iounmap(gpu_cx_reg); } /* @@ -2587,19 +2551,14 @@ static void a6xx_llc_configure_gpuhtw_scid(struct adreno_device *adreno_dev) */ static void a6xx_llc_enable_overrides(struct adreno_device *adreno_dev) { - void __iomem *gpu_cx_reg; - /* * 0x3: readnoallocoverrideen=0 * read-no-alloc=0 - Allocate lines on read miss * writenoallocoverrideen=1 * write-no-alloc=1 - Do not allocates lines on write miss */ - gpu_cx_reg = ioremap(A6XX_GPU_CX_REG_BASE, A6XX_GPU_CX_REG_SIZE); - __raw_writel(0x3, gpu_cx_reg + A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_0); - /* Make sure the above write posts before we proceed*/ - wmb(); - iounmap(gpu_cx_reg); + adreno_cx_misc_regwrite(adreno_dev, + A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_0, 0x3); } static const char *fault_block[8] = { @@ -3910,7 +3869,6 @@ struct adreno_gpudev adreno_a6xx_gpudev = { .reg_offsets = &a6xx_reg_offsets, .start = a6xx_start, .snapshot = a6xx_snapshot, - .snapshot_gmu = a6xx_snapshot_gmu, .irq = &a6xx_irq, .snapshot_data = &a6xx_snapshot_data, .irq_trace = trace_kgsl_a5xx_irq_status, diff --git a/drivers/gpu/msm/adreno_a6xx.h b/drivers/gpu/msm/adreno_a6xx.h index 7b8690635f2f..2a7aa5c1e288 100644 --- a/drivers/gpu/msm/adreno_a6xx.h +++ b/drivers/gpu/msm/adreno_a6xx.h @@ -94,8 +94,6 @@ struct cpu_gpu_lock { }; #define A6XX_CP_CTXRECORD_MAGIC_REF 0xAE399D6EUL -/* Size of each CP preemption record */ -#define A6XX_CP_CTXRECORD_SIZE_IN_BYTES (2112 * 1024) /* Size of the preemption counter block (in bytes) */ #define A6XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE (16 * 4) /* Size of the user context record block (in bytes) */ @@ -129,8 +127,5 @@ void a6xx_preemption_context_destroy(struct kgsl_context *context); void a6xx_snapshot(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot); -void a6xx_snapshot_gmu(struct adreno_device *adreno_dev, - struct kgsl_snapshot *snapshot); - void a6xx_crashdump_init(struct adreno_device *adreno_dev); #endif diff --git a/drivers/gpu/msm/adreno_a6xx_preempt.c b/drivers/gpu/msm/adreno_a6xx_preempt.c index ac5db166fdd9..29f31b5bc714 100644 --- a/drivers/gpu/msm/adreno_a6xx_preempt.c +++ b/drivers/gpu/msm/adreno_a6xx_preempt.c @@ -50,8 +50,13 @@ static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer) OOB_PREEMPTION_SET_MASK, OOB_PREEMPTION_CHECK_MASK, OOB_PREEMPTION_CLEAR_MASK); - if (status) + if (status) { + adreno_set_gpu_fault(adreno_dev, + ADRENO_GMU_FAULT); + adreno_dispatcher_schedule( + KGSL_DEVICE(adreno_dev)); return; + } } } @@ -155,7 +160,7 @@ static void _a6xx_preemption_fault(struct adreno_device *adreno_dev) if (kgsl_state_is_awake(device)) { adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT, &status); - if (status == 0) { + if (!(status & 0x1)) { adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE); @@ -165,7 +170,7 @@ static void _a6xx_preemption_fault(struct adreno_device *adreno_dev) } KGSL_DRV_ERR(device, - "Preemption timed out: cur=%d R/W=%X/%X, next=%d R/W=%X/%X\n", + "Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n", adreno_dev->cur_rb->id, adreno_get_rptr(adreno_dev->cur_rb), adreno_dev->cur_rb->wptr, adreno_dev->next_rb->id, @@ -641,14 +646,23 @@ static int a6xx_preemption_ringbuffer_init(struct adreno_device *adreno_dev, struct kgsl_device *device = KGSL_DEVICE(adreno_dev); int ret; + /* + * Reserve CP context record size as + * GMEM size + GPU HW state size i.e 0x110000 + */ ret = kgsl_allocate_global(device, &rb->preemption_desc, - A6XX_CP_CTXRECORD_SIZE_IN_BYTES, 0, KGSL_MEMDESC_PRIVILEGED, + adreno_dev->gpucore->gmem_size + 0x110000, + 0, KGSL_MEMDESC_PRIVILEGED, "preemption_desc"); if (ret) return ret; + /* + * Reserve CP context record size as + * GMEM size + GPU HW state size i.e 0x110000 + */ ret = kgsl_allocate_user(device, &rb->secure_preemption_desc, - A6XX_CP_CTXRECORD_SIZE_IN_BYTES, + adreno_dev->gpucore->gmem_size + 0x110000, KGSL_MEMFLAGS_SECURE | KGSL_MEMDESC_PRIVILEGED); if (ret) return ret; diff --git a/drivers/gpu/msm/adreno_a6xx_snapshot.c b/drivers/gpu/msm/adreno_a6xx_snapshot.c index ecce95657470..639133ae094b 100644 --- a/drivers/gpu/msm/adreno_a6xx_snapshot.c +++ b/drivers/gpu/msm/adreno_a6xx_snapshot.c @@ -1457,7 +1457,7 @@ static void a6xx_snapshot_debugbus(struct kgsl_device *device, * This is where all of the A6XX GMU specific bits and pieces are grabbed * into the snapshot memory */ -void a6xx_snapshot_gmu(struct adreno_device *adreno_dev, +static void a6xx_snapshot_gmu(struct adreno_device *adreno_dev, struct kgsl_snapshot *snapshot) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -1480,8 +1480,6 @@ void a6xx_snapshot_gmu(struct adreno_device *adreno_dev, a6xx_gmu_gx_registers, ARRAY_SIZE(a6xx_gmu_gx_registers) / 2); } - - a6xx_snapshot_debugbus(device, snapshot); } /* a6xx_snapshot_sqe() - Dump SQE data in snapshot */ @@ -1583,6 +1581,15 @@ void a6xx_snapshot(struct adreno_device *adreno_dev, /* GMU TCM data dumped through AHB */ a6xx_snapshot_gmu(adreno_dev, snapshot); + /* + * Dump debugbus data here to capture it for both + * GMU and GPU snapshot. Debugbus data can be accessed + * even if the gx headswitch or sptprac is off. If gx + * headswitch is off, data for gx blocks will show as + * 0x5c00bd00. + */ + a6xx_snapshot_debugbus(device, snapshot); + sptprac_on = gpudev->sptprac_is_on(adreno_dev); /* Return if the GX is off */ diff --git a/drivers/gpu/msm/adreno_cp_parser.h b/drivers/gpu/msm/adreno_cp_parser.h index 1fa46c147c3c..d879e5079f7c 100644 --- a/drivers/gpu/msm/adreno_cp_parser.h +++ b/drivers/gpu/msm/adreno_cp_parser.h @@ -15,9 +15,6 @@ #include "adreno.h" -extern const unsigned int a3xx_cp_addr_regs[]; -extern const unsigned int a4xx_cp_addr_regs[]; - /* * struct adreno_ib_object - Structure containing information about an * address range found in an IB @@ -134,15 +131,7 @@ static inline void adreno_ib_init_ib_obj(uint64_t gpuaddr, static inline int adreno_cp_parser_getreg(struct adreno_device *adreno_dev, enum adreno_cp_addr_regs reg_enum) { - if (reg_enum == ADRENO_CP_ADDR_MAX) - return -EEXIST; - - if (adreno_is_a3xx(adreno_dev)) - return a3xx_cp_addr_regs[reg_enum]; - else if (adreno_is_a4xx(adreno_dev)) - return a4xx_cp_addr_regs[reg_enum]; - else - return -EEXIST; + return -EEXIST; } /* @@ -160,19 +149,6 @@ static inline int adreno_cp_parser_regindex(struct adreno_device *adreno_dev, enum adreno_cp_addr_regs start, enum adreno_cp_addr_regs end) { - int i; - const unsigned int *regs; - - if (adreno_is_a4xx(adreno_dev)) - regs = a4xx_cp_addr_regs; - else if (adreno_is_a3xx(adreno_dev)) - regs = a3xx_cp_addr_regs; - else - return -EEXIST; - - for (i = start; i <= end && i < ADRENO_CP_ADDR_MAX; i++) - if (regs[i] == offset) - return i; return -EEXIST; } diff --git a/drivers/gpu/msm/adreno_debugfs.c b/drivers/gpu/msm/adreno_debugfs.c index 1528aaadc989..260171236658 100644 --- a/drivers/gpu/msm/adreno_debugfs.c +++ b/drivers/gpu/msm/adreno_debugfs.c @@ -137,8 +137,11 @@ static void sync_event_print(struct seq_file *s, break; } case KGSL_CMD_SYNCPOINT_TYPE_FENCE: { - seq_printf(s, "sync: [%pK] %s", sync_event->handle, - sync_event->fence_name); + int i; + + for (i = 0; i < sync_event->info.num_fences; i++) + seq_printf(s, "sync: %s", + sync_event->info.fences[i].name); break; } default: diff --git a/drivers/gpu/msm/adreno_dispatch.c b/drivers/gpu/msm/adreno_dispatch.c index 2f8536339fdf..82602ef9f3cd 100644 --- a/drivers/gpu/msm/adreno_dispatch.c +++ b/drivers/gpu/msm/adreno_dispatch.c @@ -17,6 +17,11 @@ #include #include #include +#include +/* The sched_param struct is located elsewhere in newer kernels */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) +#include +#endif #include "kgsl.h" #include "kgsl_sharedmem.h" @@ -656,6 +661,17 @@ static int sendcmd(struct adreno_device *adreno_dev, secs = time.ktime; nsecs = do_div(secs, 1000000000); + /* + * For the first submission in any given command queue update the + * expected expire time - this won't actually be used / updated until + * the command queue in question goes current, but universally setting + * it here avoids the possibilty of some race conditions with preempt + */ + + if (dispatch_q->inflight == 1) + dispatch_q->expires = jiffies + + msecs_to_jiffies(adreno_drawobj_timeout); + trace_adreno_cmdbatch_submitted(drawobj, (int) dispatcher->inflight, time.ticks, (unsigned long) secs, nsecs / 1000, drawctxt->rb, adreno_get_rptr(drawctxt->rb)); @@ -668,17 +684,6 @@ static int sendcmd(struct adreno_device *adreno_dev, dispatch_q->tail = (dispatch_q->tail + 1) % ADRENO_DISPATCH_DRAWQUEUE_SIZE; - /* - * For the first submission in any given command queue update the - * expected expire time - this won't actually be used / updated until - * the command queue in question goes current, but universally setting - * it here avoids the possibilty of some race conditions with preempt - */ - - if (dispatch_q->inflight == 1) - dispatch_q->expires = jiffies + - msecs_to_jiffies(adreno_drawobj_timeout); - /* * If we believe ourselves to be current and preemption isn't a thing, * then set up the timer. If this misses, then preemption is indeed a @@ -986,13 +991,6 @@ static void _adreno_dispatcher_issuecmds(struct adreno_device *adreno_dev) spin_unlock(&dispatcher->plist_lock); } -static inline void _decrement_submit_now(struct kgsl_device *device) -{ - spin_lock(&device->submit_lock); - device->submit_now--; - spin_unlock(&device->submit_lock); -} - /** * adreno_dispatcher_issuecmds() - Issue commmands from pending contexts * @adreno_dev: Pointer to the adreno device struct @@ -1001,30 +999,7 @@ static inline void _decrement_submit_now(struct kgsl_device *device) */ static void adreno_dispatcher_issuecmds(struct adreno_device *adreno_dev) { - struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; - struct kgsl_device *device = KGSL_DEVICE(adreno_dev); - - spin_lock(&device->submit_lock); - /* If state transition to SLUMBER, schedule the work for later */ - if (device->slumber == true) { - spin_unlock(&device->submit_lock); - goto done; - } - device->submit_now++; - spin_unlock(&device->submit_lock); - - /* If the dispatcher is busy then schedule the work for later */ - if (!mutex_trylock(&dispatcher->mutex)) { - _decrement_submit_now(device); - goto done; - } - - _adreno_dispatcher_issuecmds(adreno_dev); - mutex_unlock(&dispatcher->mutex); - _decrement_submit_now(device); - return; -done: - adreno_dispatcher_schedule(device); + adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev)); } /** @@ -1184,12 +1159,6 @@ static inline int _verify_cmdobj(struct kgsl_device_private *dev_priv, &ADRENO_CONTEXT(context)->base, ib) == false) return -EINVAL; - /* - * Clear the wake on touch bit to indicate an IB has - * been submitted since the last time we set it. - * But only clear it when we have rendering commands. - */ - device->flags &= ~KGSL_FLAG_WAKE_ON_TOUCH; } /* A3XX does not have support for drawobj profiling */ @@ -1735,8 +1704,9 @@ static void adreno_fault_header(struct kgsl_device *device, ib2base, ib2sz, drawctxt->rb->id); pr_fault(device, drawobj, - "gpu fault ctx %d ts %d status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", - drawobj->context->id, drawobj->timestamp, status, + "gpu fault ctx %d ctx_type %s ts %d status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", + drawobj->context->id, get_api_type_str(drawctxt->type), + drawobj->timestamp, status, rptr, wptr, ib1base, ib1sz, ib2base, ib2sz); if (rb != NULL) @@ -2112,6 +2082,22 @@ static int dispatcher_do_fault(struct adreno_device *adreno_dev) if (fault == 0) return 0; + mutex_lock(&device->mutex); + + /* + * In the very unlikely case that the power is off, do nothing - the + * state will be reset on power up and everybody will be happy + */ + if (!kgsl_state_is_awake(device)) { + mutex_unlock(&device->mutex); + if (fault & ADRENO_SOFT_FAULT) { + /* Clear the existing register values */ + memset(adreno_ft_regs_val, 0, + adreno_ft_regs_num * sizeof(unsigned int)); + } + return 0; + } + /* Mask all GMU interrupts */ if (kgsl_gmu_isenabled(device)) { adreno_write_gmureg(adreno_dev, @@ -2125,17 +2111,6 @@ static int dispatcher_do_fault(struct adreno_device *adreno_dev) if (gpudev->gx_is_on) gx_on = gpudev->gx_is_on(adreno_dev); - /* - * In the very unlikely case that the power is off, do nothing - the - * state will be reset on power up and everybody will be happy - */ - - if (!kgsl_state_is_awake(device) && (fault & ADRENO_SOFT_FAULT)) { - /* Clear the existing register values */ - memset(adreno_ft_regs_val, 0, - adreno_ft_regs_num * sizeof(unsigned int)); - return 0; - } /* * On A5xx and A6xx, read RBBM_STATUS3:SMMU_STALLED_ON_FAULT (BIT 24) @@ -2148,11 +2123,11 @@ static int dispatcher_do_fault(struct adreno_device *adreno_dev) gx_on) { unsigned int val; - mutex_lock(&device->mutex); adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS3, &val); - mutex_unlock(&device->mutex); - if (val & BIT(24)) + if (val & BIT(24)) { + mutex_unlock(&device->mutex); return 0; + } } /* Turn off all the timers */ @@ -2165,8 +2140,6 @@ static int dispatcher_do_fault(struct adreno_device *adreno_dev) if (adreno_is_preemption_enabled(adreno_dev)) del_timer_sync(&adreno_dev->preempt.timer); - mutex_lock(&device->mutex); - if (gx_on) adreno_readreg64(adreno_dev, ADRENO_REG_CP_RB_BASE, ADRENO_REG_CP_RB_BASE_HI, &base); @@ -2506,12 +2479,9 @@ static void _dispatcher_power_down(struct adreno_device *adreno_dev) mutex_unlock(&device->mutex); } -static void adreno_dispatcher_work(struct kthread_work *work) +static void adreno_dispatcher_work(struct adreno_device *adreno_dev) { - struct adreno_dispatcher *dispatcher = - container_of(work, struct adreno_dispatcher, work); - struct adreno_device *adreno_dev = - container_of(dispatcher, struct adreno_device, dispatcher); + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; struct kgsl_device *device = KGSL_DEVICE(adreno_dev); struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); int count = 0; @@ -2561,12 +2531,39 @@ static void adreno_dispatcher_work(struct kthread_work *work) mutex_unlock(&dispatcher->mutex); } +static int adreno_dispatcher_thread(void *data) +{ + static const struct sched_param sched_rt_prio = { + .sched_priority = 16 + }; + struct adreno_device *adreno_dev = data; + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + + sched_setscheduler_nocheck(current, SCHED_FIFO, &sched_rt_prio); + + while (1) { + bool should_stop; + + wait_event(dispatcher->cmd_waitq, + (should_stop = kthread_should_stop()) || + atomic_cmpxchg(&dispatcher->send_cmds, 1, 0)); + + if (should_stop) + break; + + adreno_dispatcher_work(adreno_dev); + } + + return 0; +} + void adreno_dispatcher_schedule(struct kgsl_device *device) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; - kthread_queue_work(&kgsl_driver.worker, &dispatcher->work); + if (!atomic_cmpxchg(&dispatcher->send_cmds, 0, 1)) + wake_up(&dispatcher->cmd_waitq); } /** @@ -2686,6 +2683,8 @@ void adreno_dispatcher_close(struct adreno_device *adreno_dev) int i; struct adreno_ringbuffer *rb; + kthread_stop(dispatcher->thread); + mutex_lock(&dispatcher->mutex); del_timer_sync(&dispatcher->timer); del_timer_sync(&dispatcher->fault_timer); @@ -2862,14 +2861,19 @@ int adreno_dispatcher_init(struct adreno_device *adreno_dev) setup_timer(&dispatcher->fault_timer, adreno_dispatcher_fault_timer, (unsigned long) adreno_dev); - kthread_init_work(&dispatcher->work, adreno_dispatcher_work); - init_completion(&dispatcher->idle_gate); complete_all(&dispatcher->idle_gate); plist_head_init(&dispatcher->pending); spin_lock_init(&dispatcher->plist_lock); + init_waitqueue_head(&dispatcher->cmd_waitq); + dispatcher->send_cmds = (atomic_t)ATOMIC_INIT(0); + dispatcher->thread = kthread_run(adreno_dispatcher_thread, adreno_dev, + "adreno_dispatch"); + if (IS_ERR(dispatcher->thread)) + return PTR_ERR(dispatcher->thread); + ret = kobject_init_and_add(&dispatcher->kobj, &ktype_dispatcher, &device->dev->kobj, "dispatch"); diff --git a/drivers/gpu/msm/adreno_dispatch.h b/drivers/gpu/msm/adreno_dispatch.h index 61bd06f4d373..2526eda6bf4e 100644 --- a/drivers/gpu/msm/adreno_dispatch.h +++ b/drivers/gpu/msm/adreno_dispatch.h @@ -75,12 +75,14 @@ struct adreno_dispatcher_drawqueue { * @fault: Non-zero if a fault was detected. * @pending: Priority list of contexts waiting to submit drawobjs * @plist_lock: Spin lock to protect the pending queue - * @work: work_struct to put the dispatcher in a work queue * @kobj: kobject for the dispatcher directory in the device sysfs node * @idle_gate: Gate to wait on for dispatcher to idle * @disp_preempt_fair_sched: If set then dispatcher will try to be fair to * starving RB's by scheduling them in and enforcing a minimum time slice * for every RB that is scheduled to run on the device + * @thread: Kthread for the command dispatcher + * @cmd_waitq: Waitqueue for the command dispatcher + * @send_cmds: Atomic boolean indicating that commands should be dispatched */ struct adreno_dispatcher { struct mutex mutex; @@ -91,10 +93,12 @@ struct adreno_dispatcher { atomic_t fault; struct plist_head pending; spinlock_t plist_lock; - struct kthread_work work; struct kobject kobj; struct completion idle_gate; unsigned int disp_preempt_fair_sched; + struct task_struct *thread; + wait_queue_head_t cmd_waitq; + atomic_t send_cmds; }; enum adreno_dispatcher_flags { diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c index 7c1d9b2b4ae1..031364ac2fd4 100644 --- a/drivers/gpu/msm/adreno_drawctxt.c +++ b/drivers/gpu/msm/adreno_drawctxt.c @@ -472,6 +472,7 @@ void adreno_drawctxt_detach(struct kgsl_context *context) { struct kgsl_device *device; struct adreno_device *adreno_dev; + struct adreno_gpudev *gpudev; struct adreno_context *drawctxt; struct adreno_ringbuffer *rb; int ret, count, i; @@ -482,6 +483,7 @@ void adreno_drawctxt_detach(struct kgsl_context *context) device = context->device; adreno_dev = ADRENO_DEVICE(device); + gpudev = ADRENO_GPU_DEVICE(adreno_dev); drawctxt = ADRENO_CONTEXT(context); rb = drawctxt->rb; @@ -563,6 +565,9 @@ void adreno_drawctxt_detach(struct kgsl_context *context) mutex_unlock(&device->mutex); + if (gpudev->preemption_context_destroy) + gpudev->preemption_context_destroy(context); + /* wake threads waiting to submit commands from this context */ wake_up_all(&drawctxt->waiting); wake_up_all(&drawctxt->wq); @@ -571,18 +576,10 @@ void adreno_drawctxt_detach(struct kgsl_context *context) void adreno_drawctxt_destroy(struct kgsl_context *context) { struct adreno_context *drawctxt; - struct adreno_device *adreno_dev; - struct adreno_gpudev *gpudev; if (context == NULL) return; - adreno_dev = ADRENO_DEVICE(context->device); - gpudev = ADRENO_GPU_DEVICE(adreno_dev); - - if (gpudev->preemption_context_destroy) - gpudev->preemption_context_destroy(context); - drawctxt = ADRENO_CONTEXT(context); kfree(drawctxt); } diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h index eef506ff46f0..4857c7851556 100644 --- a/drivers/gpu/msm/adreno_drawctxt.h +++ b/drivers/gpu/msm/adreno_drawctxt.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved. +/* Copyright (c) 2002,2007-2018, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -139,4 +139,16 @@ void adreno_drawctxt_invalidate(struct kgsl_device *device, void adreno_drawctxt_dump(struct kgsl_device *device, struct kgsl_context *context); +static struct adreno_context_type ctxt_type_table[] = {KGSL_CONTEXT_TYPES}; + +static inline const char *get_api_type_str(unsigned int type) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ctxt_type_table); i++) { + if (ctxt_type_table[i].type == type) + return ctxt_type_table[i].str; + } + return "UNKNOWN"; +} #endif /* __ADRENO_DRAWCTXT_H */ diff --git a/drivers/gpu/msm/adreno_ioctl.c b/drivers/gpu/msm/adreno_ioctl.c index 2b580217aec6..31b44a0ad6d0 100644 --- a/drivers/gpu/msm/adreno_ioctl.c +++ b/drivers/gpu/msm/adreno_ioctl.c @@ -17,6 +17,50 @@ #include "adreno.h" #include "adreno_a5xx.h" +/* + * Add a perfcounter to the per-fd list. + * Call with the device mutex held + */ +static int adreno_process_perfcounter_add(struct kgsl_device_private *dev_priv, + unsigned int groupid, unsigned int countable) +{ + struct adreno_device_private *adreno_priv = container_of(dev_priv, + struct adreno_device_private, dev_priv); + struct adreno_perfcounter_list_node *perfctr; + + perfctr = kmalloc(sizeof(*perfctr), GFP_KERNEL); + if (!perfctr) + return -ENOMEM; + + perfctr->groupid = groupid; + perfctr->countable = countable; + + /* add the pair to process perfcounter list */ + list_add(&perfctr->node, &adreno_priv->perfcounter_list); + return 0; +} + +/* + * Remove a perfcounter from the per-fd list. + * Call with the device mutex held + */ +static int adreno_process_perfcounter_del(struct kgsl_device_private *dev_priv, + unsigned int groupid, unsigned int countable) +{ + struct adreno_device_private *adreno_priv = container_of(dev_priv, + struct adreno_device_private, dev_priv); + struct adreno_perfcounter_list_node *p; + + list_for_each_entry(p, &adreno_priv->perfcounter_list, node) { + if (p->groupid == groupid && p->countable == countable) { + list_del(&p->node); + kfree(p); + return 0; + } + } + return -ENODEV; +} + long adreno_ioctl_perfcounter_get(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) { @@ -43,6 +87,15 @@ long adreno_ioctl_perfcounter_get(struct kgsl_device_private *dev_priv, get->groupid, get->countable, &get->offset, &get->offset_hi, PERFCOUNTER_FLAG_NONE); + /* Add the perfcounter into the list */ + if (!result) { + result = adreno_process_perfcounter_add(dev_priv, get->groupid, + get->countable); + if (result) + adreno_perfcounter_put(adreno_dev, get->groupid, + get->countable, PERFCOUNTER_FLAG_NONE); + } + adreno_perfcntr_active_oob_put(adreno_dev); mutex_unlock(&device->mutex); @@ -59,8 +112,15 @@ long adreno_ioctl_perfcounter_put(struct kgsl_device_private *dev_priv, int result; mutex_lock(&device->mutex); - result = adreno_perfcounter_put(adreno_dev, put->groupid, - put->countable, PERFCOUNTER_FLAG_NONE); + + /* Delete the perfcounter from the process list */ + result = adreno_process_perfcounter_del(dev_priv, put->groupid, + put->countable); + + /* Put the perfcounter refcount */ + if (!result) + adreno_perfcounter_put(adreno_dev, put->groupid, + put->countable, PERFCOUNTER_FLAG_NONE); mutex_unlock(&device->mutex); return (long) result; diff --git a/drivers/gpu/msm/adreno_profile.c b/drivers/gpu/msm/adreno_profile.c index 0e05625ab771..b1e55fcfd964 100644 --- a/drivers/gpu/msm/adreno_profile.c +++ b/drivers/gpu/msm/adreno_profile.c @@ -77,19 +77,6 @@ #define SIZE_PIPE_ENTRY(cnt) (50 + (cnt) * 62) #define SIZE_LOG_ENTRY(cnt) (6 + (cnt) * 5) -static struct adreno_context_type ctxt_type_table[] = {KGSL_CONTEXT_TYPES}; - -static const char *get_api_type_str(unsigned int type) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(ctxt_type_table) - 1; i++) { - if (ctxt_type_table[i].type == type) - return ctxt_type_table[i].str; - } - return "UNKNOWN"; -} - static inline uint _ib_start(struct adreno_device *adreno_dev, unsigned int *cmds) { diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c index 3e2701e77a22..2bdd4a93a408 100644 --- a/drivers/gpu/msm/adreno_ringbuffer.c +++ b/drivers/gpu/msm/adreno_ringbuffer.c @@ -116,8 +116,11 @@ static void adreno_ringbuffer_wptr(struct adreno_device *adreno_dev, rb->wptr = rb->_wptr; spin_unlock_irqrestore(&rb->preempt_lock, flags); - if (ret) - kgsl_device_snapshot(KGSL_DEVICE(adreno_dev), NULL, false); + if (ret) { + /* If WPTR update fails, set the fault and trigger recovery */ + adreno_set_gpu_fault(adreno_dev, ADRENO_GMU_FAULT); + adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev)); + } } diff --git a/drivers/gpu/msm/adreno_snapshot.c b/drivers/gpu/msm/adreno_snapshot.c index ff199a26ba56..63f424856a60 100644 --- a/drivers/gpu/msm/adreno_snapshot.c +++ b/drivers/gpu/msm/adreno_snapshot.c @@ -413,6 +413,8 @@ static void snapshot_rb_ibs(struct kgsl_device *device, ibsize = rbptr[index + 3]; } + index = (index + 1) % KGSL_RB_DWORDS; + /* Don't parse known global IBs */ if (iommu_is_setstate_addr(device, ibaddr, ibsize)) continue; @@ -423,9 +425,8 @@ static void snapshot_rb_ibs(struct kgsl_device *device, parse_ib(device, snapshot, snapshot->process, ibaddr, ibsize); - } - - index = (index + 1) % KGSL_RB_DWORDS; + } else + index = (index + 1) % KGSL_RB_DWORDS; } } @@ -958,24 +959,6 @@ void adreno_snapshot(struct kgsl_device *device, struct kgsl_snapshot *snapshot, } -/* adreno_snapshot_gmu - Snapshot the Adreno GMU state - * @device - KGSL device to snapshot - * @snapshot - Pointer to the snapshot instance - * This is a hook function called by kgsl_snapshot to snapshot the - * Adreno specific information for the GMU snapshot. In turn, this function - * calls the GMU specific snapshot function to get core specific information. - */ -void adreno_snapshot_gmu(struct kgsl_device *device, - struct kgsl_snapshot *snapshot) -{ - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); - - /* Add GMU specific sections */ - if (gpudev->snapshot_gmu) - gpudev->snapshot_gmu(adreno_dev, snapshot); -} - /* * adreno_snapshot_cp_roq - Dump CP merciu data in snapshot * @device: Device being snapshotted diff --git a/drivers/gpu/msm/adreno_sysfs.c b/drivers/gpu/msm/adreno_sysfs.c index ae19ccd900df..2ffc3f552f7f 100644 --- a/drivers/gpu/msm/adreno_sysfs.c +++ b/drivers/gpu/msm/adreno_sysfs.c @@ -30,6 +30,13 @@ struct adreno_sysfs_attribute adreno_attr_##_name = { \ .store = _ ## _name ## _store, \ } +#define _ADRENO_SYSFS_FPERM_ATTR(_name, _fperm, __show, __store) \ +struct adreno_sysfs_attribute adreno_attr_##_name = { \ + .attr = __ATTR(_name, _fperm, __show, __store), \ + .show = _ ## _name ## _show, \ + .store = _ ## _name ## _store, \ +} + #define _ADRENO_SYSFS_ATTR_RO(_name, __show) \ struct adreno_sysfs_attribute adreno_attr_##_name = { \ .attr = __ATTR(_name, 0444, __show, NULL), \ @@ -279,7 +286,7 @@ static unsigned int _hwcg_show(struct adreno_device *adreno_dev) static int _throttling_store(struct adreno_device *adreno_dev, unsigned int val) { - return _pwrctrl_store(adreno_dev, val, ADRENO_THROTTLING_CTRL); + return 0; } static unsigned int _throttling_show(struct adreno_device *adreno_dev) @@ -342,6 +349,11 @@ static unsigned int _ifpc_show(struct adreno_device *adreno_dev) return kgsl_gmu_isenabled(device) && gmu->idle_level >= GPU_HW_IFPC; } +static unsigned int _ifpc_count_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->ifpc_count; +} + static unsigned int _preempt_count_show(struct adreno_device *adreno_dev) { struct adreno_preemption *preempt = &adreno_dev->preempt; @@ -451,6 +463,9 @@ static ssize_t _sysfs_show_bool(struct device *dev, #define ADRENO_SYSFS_BOOL(_name) \ _ADRENO_SYSFS_ATTR(_name, _sysfs_show_bool, _sysfs_store_bool) +#define ADRENO_SYSFS_RO_BOOL(_name) \ + _ADRENO_SYSFS_FPERM_ATTR(_name, 0444, _sysfs_show_bool, _sysfs_store_bool) + #define ADRENO_SYSFS_U32(_name) \ _ADRENO_SYSFS_ATTR(_name, _sysfs_show_u32, _sysfs_store_u32) @@ -469,15 +484,15 @@ static ADRENO_SYSFS_BOOL(gpu_llc_slice_enable); static ADRENO_SYSFS_BOOL(gpuhtw_llc_slice_enable); static DEVICE_INT_ATTR(wake_nice, 0644, adreno_wake_nice); -static DEVICE_INT_ATTR(wake_timeout, 0644, adreno_wake_timeout); static ADRENO_SYSFS_BOOL(sptp_pc); static ADRENO_SYSFS_BOOL(lm); static ADRENO_SYSFS_BOOL(preemption); static ADRENO_SYSFS_BOOL(hwcg); -static ADRENO_SYSFS_BOOL(throttling); +static ADRENO_SYSFS_RO_BOOL(throttling); static ADRENO_SYSFS_BOOL(ifpc); static ADRENO_SYSFS_BOOL(perfcounter); +static ADRENO_SYSFS_RO_U32(ifpc_count); @@ -487,7 +502,6 @@ static const struct device_attribute *_attr_list[] = { &adreno_attr_ft_long_ib_detect.attr, &adreno_attr_ft_hang_intr_status.attr, &dev_attr_wake_nice.attr, - &dev_attr_wake_timeout.attr, &adreno_attr_sptp_pc.attr, &adreno_attr_lm.attr, &adreno_attr_preemption.attr, @@ -499,6 +513,7 @@ static const struct device_attribute *_attr_list[] = { &adreno_attr_usesgmem.attr, &adreno_attr_skipsaverestore.attr, &adreno_attr_ifpc.attr, + &adreno_attr_ifpc_count.attr, &adreno_attr_preempt_count.attr, &adreno_attr_perfcounter.attr, NULL, diff --git a/drivers/gpu/msm/adreno_trace.h b/drivers/gpu/msm/adreno_trace.h index 7774ca9ef1b9..bf72623cac91 100644 --- a/drivers/gpu/msm/adreno_trace.h +++ b/drivers/gpu/msm/adreno_trace.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved. +/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -609,6 +609,19 @@ TRACE_EVENT(adreno_preempt_done, __entry->next->id, __entry->cur->id, __entry->level ) ); + +TRACE_EVENT(adreno_ifpc_count, + TP_PROTO(unsigned int ifpc_count), + TP_ARGS(ifpc_count), + TP_STRUCT__entry( + __field(unsigned int, ifpc_count) + ), + TP_fast_assign( + __entry->ifpc_count = ifpc_count; + ), + TP_printk("total times GMU entered IFPC = %d", __entry->ifpc_count) +); + #endif /* _ADRENO_TRACE_H */ /* This part must be outside protection */ diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index a9f4d1bfef20..042e2efff5b2 100644 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -556,7 +556,7 @@ int kgsl_context_init(struct kgsl_device_private *dev_priv, */ spin_lock(&proc_priv->ctxt_count_lock); if (atomic_read(&proc_priv->ctxt_count) > KGSL_MAX_CONTEXTS_PER_PROC) { - KGSL_DRV_ERR(device, + KGSL_DRV_ERR_RATELIMIT(device, "Per process context limit reached for pid %u", pid_nr(dev_priv->process_priv->pid)); spin_unlock(&proc_priv->ctxt_count_lock); @@ -781,7 +781,7 @@ static int kgsl_suspend_device(struct kgsl_device *device, pm_message_t state) mutex_lock(&device->mutex); status = kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND); - if (status == 0) + if (status == 0 && device->state == KGSL_STATE_SUSPEND) device->ftbl->dispatcher_halt(device); mutex_unlock(&device->mutex); @@ -1148,7 +1148,8 @@ static int kgsl_release(struct inode *inodep, struct file *filep) /* Close down the process wide resources for the file */ kgsl_process_private_close(dev_priv, dev_priv->process_priv); - kfree(dev_priv); + /* Destroy the device-specific structure */ + device->ftbl->device_private_destroy(dev_priv); result = kgsl_close_device(device); pm_runtime_put(&device->pdev->dev); @@ -1216,7 +1217,7 @@ static int kgsl_open(struct inode *inodep, struct file *filep) } result = 0; - dev_priv = kzalloc(sizeof(struct kgsl_device_private), GFP_KERNEL); + dev_priv = device->ftbl->device_private_create(); if (dev_priv == NULL) { result = -ENOMEM; goto err; @@ -1425,6 +1426,45 @@ long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv, kgsl_context_put(context); break; } + case KGSL_PROP_SECURE_BUFFER_ALIGNMENT: + { + unsigned int align; + + if (param->sizebytes != sizeof(unsigned int)) { + result = -EINVAL; + break; + } + /* + * XPUv2 impose the constraint of 1MB memory alignment, + * on the other hand Hypervisor does not have such + * constraints. So driver should fulfill such + * requirements when allocating secure memory. + */ + align = MMU_FEATURE(&dev_priv->device->mmu, + KGSL_MMU_HYP_SECURE_ALLOC) ? PAGE_SIZE : SZ_1M; + + if (copy_to_user(param->value, &align, sizeof(align))) + result = -EFAULT; + + break; + } + case KGSL_PROP_SECURE_CTXT_SUPPORT: + { + unsigned int secure_ctxt; + + if (param->sizebytes != sizeof(unsigned int)) { + result = -EINVAL; + break; + } + + secure_ctxt = dev_priv->device->mmu.secured ? 1 : 0; + + if (copy_to_user(param->value, &secure_ctxt, + sizeof(secure_ctxt))) + result = -EFAULT; + + break; + } default: if (is_compat_task()) result = dev_priv->device->ftbl->getproperty_compat( @@ -2007,7 +2047,7 @@ static long gpuobj_free_on_fence(struct kgsl_device_private *dev_priv, } handle = kgsl_sync_fence_async_wait(event.fd, - gpuobj_free_fence_func, entry, NULL, 0); + gpuobj_free_fence_func, entry, NULL); if (IS_ERR(handle)) { kgsl_mem_entry_unset_pend(entry); @@ -4699,6 +4739,8 @@ struct kgsl_driver kgsl_driver = { .stats.secure_max = ATOMIC_LONG_INIT(0), .stats.mapped = ATOMIC_LONG_INIT(0), .stats.mapped_max = ATOMIC_LONG_INIT(0), + .stats.page_free_pending = ATOMIC_LONG_INIT(0), + .stats.page_alloc_pending = ATOMIC_LONG_INIT(0), }; EXPORT_SYMBOL(kgsl_driver); @@ -4834,7 +4876,8 @@ int kgsl_device_platform_probe(struct kgsl_device *device) } status = devm_request_irq(device->dev, device->pwrctrl.interrupt_num, - kgsl_irq_handler, IRQF_TRIGGER_HIGH, + kgsl_irq_handler, + IRQF_TRIGGER_HIGH | IRQF_PERF_CRITICAL, device->name, device); if (status) { KGSL_DRV_ERR(device, "request_irq(%d) failed: %d\n", @@ -4848,7 +4891,6 @@ int kgsl_device_platform_probe(struct kgsl_device *device) device->id, device->reg_phys, device->reg_len); rwlock_init(&device->context_lock); - spin_lock_init(&device->submit_lock); setup_timer(&device->idle_timer, kgsl_timer, (unsigned long) device); @@ -4911,7 +4953,7 @@ int kgsl_device_platform_probe(struct kgsl_device *device) } device->events_wq = alloc_workqueue("kgsl-events", - WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); + WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS | WQ_HIGHPRI, 0); /* Initialize the snapshot engine */ kgsl_device_snapshot_init(device); @@ -4992,7 +5034,7 @@ static void kgsl_core_exit(void) static int __init kgsl_core_init(void) { int result = 0; - struct sched_param param = { .sched_priority = 2 }; + struct sched_param param = { .sched_priority = 16 }; /* alloc major and minor device numbers */ result = alloc_chrdev_region(&kgsl_driver.major, 0, KGSL_DEVICE_MAX, @@ -5055,14 +5097,14 @@ static int __init kgsl_core_init(void) INIT_LIST_HEAD(&kgsl_driver.pagetable_list); kgsl_driver.workqueue = alloc_workqueue("kgsl-workqueue", - WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); + WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); kgsl_driver.mem_workqueue = alloc_workqueue("kgsl-mementry", WQ_UNBOUND | WQ_MEM_RECLAIM, 0); kthread_init_worker(&kgsl_driver.worker); - kgsl_driver.worker_thread = kthread_run(kthread_worker_fn, + kgsl_driver.worker_thread = kthread_run_perf_critical(kthread_worker_fn, &kgsl_driver.worker, "kgsl_worker_thread"); if (IS_ERR(kgsl_driver.worker_thread)) { diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h index 420464a04545..f6b841f580a4 100644 --- a/drivers/gpu/msm/kgsl.h +++ b/drivers/gpu/msm/kgsl.h @@ -155,6 +155,8 @@ struct kgsl_driver { atomic_long_t secure_max; atomic_long_t mapped; atomic_long_t mapped_max; + atomic_long_t page_free_pending; + atomic_long_t page_alloc_pending; } stats; unsigned int full_cache_threshold; struct workqueue_struct *workqueue; diff --git a/drivers/gpu/msm/kgsl_debugfs.c b/drivers/gpu/msm/kgsl_debugfs.c index 2a6e60883637..6fc5e243f6a6 100644 --- a/drivers/gpu/msm/kgsl_debugfs.c +++ b/drivers/gpu/msm/kgsl_debugfs.c @@ -418,8 +418,6 @@ void kgsl_process_init_debugfs(struct kgsl_process_private *private) */ if (IS_ERR_OR_NULL(private->debug_root)) { - WARN((private->debug_root == NULL), - "Unable to create debugfs dir for %s\n", name); private->debug_root = NULL; return; } diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h index 556ee78a9100..ec5b6c249c58 100644 --- a/drivers/gpu/msm/kgsl_device.h +++ b/drivers/gpu/msm/kgsl_device.h @@ -65,7 +65,6 @@ enum kgsl_event_results { KGSL_EVENT_CANCELLED = 2, }; -#define KGSL_FLAG_WAKE_ON_TOUCH BIT(0) #define KGSL_FLAG_SPARSE BIT(1) /* @@ -98,7 +97,8 @@ enum kgsl_event_results { { KGSL_CONTEXT_TYPE_GL, "GL" }, \ { KGSL_CONTEXT_TYPE_CL, "CL" }, \ { KGSL_CONTEXT_TYPE_C2D, "C2D" }, \ - { KGSL_CONTEXT_TYPE_RS, "RS" } + { KGSL_CONTEXT_TYPE_RS, "RS" }, \ + { KGSL_CONTEXT_TYPE_VK, "VK" } #define KGSL_CONTEXT_ID(_context) \ ((_context != NULL) ? (_context)->id : KGSL_MEMSTORE_GLOBAL) @@ -153,10 +153,10 @@ struct kgsl_functable { unsigned int (*gpuid)(struct kgsl_device *device, unsigned int *chipid); void (*snapshot)(struct kgsl_device *device, struct kgsl_snapshot *snapshot, struct kgsl_context *context); - void (*snapshot_gmu)(struct kgsl_device *device, - struct kgsl_snapshot *snapshot); irqreturn_t (*irq_handler)(struct kgsl_device *device); int (*drain)(struct kgsl_device *device); + struct kgsl_device_private * (*device_private_create)(void); + void (*device_private_destroy)(struct kgsl_device_private *dev_priv); /* * Optional functions - these functions are not mandatory. The * driver will check that the function pointer is not NULL before @@ -280,11 +280,6 @@ struct kgsl_device { struct kgsl_pwrctrl pwrctrl; int open_count; - /* For GPU inline submission */ - uint32_t submit_now; - spinlock_t submit_lock; - bool slumber; - struct mutex mutex; uint32_t state; uint32_t requested_state; diff --git a/drivers/gpu/msm/kgsl_drawobj.c b/drivers/gpu/msm/kgsl_drawobj.c index 34a4cff1a4bf..b0dd4ac598b3 100644 --- a/drivers/gpu/msm/kgsl_drawobj.c +++ b/drivers/gpu/msm/kgsl_drawobj.c @@ -43,6 +43,17 @@ static struct kmem_cache *memobjs_cache; static struct kmem_cache *sparseobjs_cache; +static void free_fence_names(struct kgsl_drawobj_sync *syncobj) +{ + unsigned int i; + + for (i = 0; i < syncobj->numsyncs; i++) { + struct kgsl_drawobj_sync_event *event = &syncobj->synclist[i]; + + if (event->type == KGSL_CMD_SYNCPOINT_TYPE_FENCE) + kfree(event->info.fences); + } +} void kgsl_drawobj_destroy_object(struct kref *kref) { @@ -55,6 +66,7 @@ void kgsl_drawobj_destroy_object(struct kref *kref) switch (drawobj->type) { case SYNCOBJ_TYPE: syncobj = SYNCOBJ(drawobj); + free_fence_names(syncobj); kfree(syncobj->synclist); kfree(syncobj); break; @@ -94,11 +106,16 @@ void kgsl_dump_syncpoints(struct kgsl_device *device, retired); break; } - case KGSL_CMD_SYNCPOINT_TYPE_FENCE: - dev_err(device->dev, " fence: %s\n", - event->fence_name); + case KGSL_CMD_SYNCPOINT_TYPE_FENCE: { + int j; + struct event_fence_info *info = &event->info; + + for (j = 0; j < info->num_fences; j++) + dev_err(device->dev, "[%d] fence: %s\n", + i, info->fences[j].name); break; } + } } } @@ -146,11 +163,16 @@ static void syncobj_timer(unsigned long data) dev_err(device->dev, " [%d] TIMESTAMP %d:%d\n", i, event->context->id, event->timestamp); break; - case KGSL_CMD_SYNCPOINT_TYPE_FENCE: - dev_err(device->dev, " [%d] FENCE %s\n", - i, event->fence_name); + case KGSL_CMD_SYNCPOINT_TYPE_FENCE: { + int j; + struct event_fence_info *info = &event->info; + + for (j = 0; j < info->num_fences; j++) + dev_err(device->dev, " [%d] FENCE %s\n", + i, info->fences[j].name); break; } + } } kgsl_drawobj_put(drawobj); @@ -333,8 +355,11 @@ EXPORT_SYMBOL(kgsl_drawobj_destroy); static bool drawobj_sync_fence_func(void *priv) { struct kgsl_drawobj_sync_event *event = priv; + int i; - trace_syncpoint_fence_expire(event->syncobj, event->fence_name); + for (i = 0; i < event->info.num_fences; i++) + trace_syncpoint_fence_expire(event->syncobj, + event->info.fences[i].name); /* * Only call kgsl_drawobj_put() if it's not marked for cancellation @@ -360,7 +385,7 @@ static int drawobj_add_sync_fence(struct kgsl_device *device, struct kgsl_cmd_syncpoint_fence *sync = priv; struct kgsl_drawobj *drawobj = DRAWOBJ(syncobj); struct kgsl_drawobj_sync_event *event; - unsigned int id; + unsigned int id, i; kref_get(&drawobj->refcount); @@ -378,7 +403,7 @@ static int drawobj_add_sync_fence(struct kgsl_device *device, event->handle = kgsl_sync_fence_async_wait(sync->fd, drawobj_sync_fence_func, event, - event->fence_name, sizeof(event->fence_name)); + &event->info); if (IS_ERR_OR_NULL(event->handle)) { int ret = PTR_ERR(event->handle); @@ -398,7 +423,8 @@ static int drawobj_add_sync_fence(struct kgsl_device *device, return ret; } - trace_syncpoint_fence(syncobj, event->fence_name); + for (i = 0; i < event->info.num_fences; i++) + trace_syncpoint_fence(syncobj, event->info.fences[i].name); return 0; } diff --git a/drivers/gpu/msm/kgsl_drawobj.h b/drivers/gpu/msm/kgsl_drawobj.h index 06eef7fdd884..bd32f5e503a7 100644 --- a/drivers/gpu/msm/kgsl_drawobj.h +++ b/drivers/gpu/msm/kgsl_drawobj.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2016-2017, The Linux Foundation. All rights reserved. +/* Copyright (c) 2016-2018, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -107,6 +107,15 @@ struct kgsl_drawobj_sync { #define KGSL_FENCE_NAME_LEN 74 +struct fence_info { + char name[KGSL_FENCE_NAME_LEN]; +}; + +struct event_fence_info { + struct fence_info *fences; + int num_fences; +}; + /** * struct kgsl_drawobj_sync_event * @id: identifer (positiion within the pending bitmap) @@ -116,8 +125,8 @@ struct kgsl_drawobj_sync { * register this event * @timestamp: Pending timestamp for the event * @handle: Pointer to a sync fence handle - * @fence_name: A fence name string to describe the fence * @device: Pointer to the KGSL device + * @info: structure to hold info about the fence */ struct kgsl_drawobj_sync_event { unsigned int id; @@ -126,8 +135,8 @@ struct kgsl_drawobj_sync_event { struct kgsl_context *context; unsigned int timestamp; struct kgsl_sync_fence_cb *handle; - char fence_name[KGSL_FENCE_NAME_LEN]; struct kgsl_device *device; + struct event_fence_info info; }; /** diff --git a/drivers/gpu/msm/kgsl_gmu.c b/drivers/gpu/msm/kgsl_gmu.c index dbc3d53e08cd..222c86ad5bad 100644 --- a/drivers/gpu/msm/kgsl_gmu.c +++ b/drivers/gpu/msm/kgsl_gmu.c @@ -60,6 +60,8 @@ struct gmu_vma { unsigned int image_start; }; +#define GMU_CM3_CFG_NONMASKINTR_SHIFT 9 + struct gmu_iommu_context { const char *name; struct device *dev; @@ -450,7 +452,7 @@ int gmu_dcvs_set(struct gmu_device *gmu, struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); int perf_idx = INVALID_DCVS_IDX, bw_idx = INVALID_DCVS_IDX; - int ret; + int ret = 0; if (gpu_pwrlevel < gmu->num_gpupwrlevels - 1) perf_idx = gmu->num_gpupwrlevels - gpu_pwrlevel - 1; @@ -462,23 +464,22 @@ int gmu_dcvs_set(struct gmu_device *gmu, (bw_idx == INVALID_DCVS_IDX)) return -EINVAL; - if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_HFI_USE_REG)) { + if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_HFI_USE_REG)) ret = gpudev->rpmh_gpu_pwrctrl(adreno_dev, GMU_DCVS_NOHFI, perf_idx, bw_idx); + else if (test_bit(GMU_HFI_ON, &gmu->flags)) + ret = hfi_send_dcvs_vote(gmu, perf_idx, bw_idx, ACK_NONBLOCK); - if (ret) { - dev_err_ratelimited(&gmu->pdev->dev, - "Failed to set GPU perf idx %d, bw idx %d\n", - perf_idx, bw_idx); - - adreno_set_gpu_fault(adreno_dev, ADRENO_GMU_FAULT); - adreno_dispatcher_schedule(device); - } + if (ret) { + dev_err_ratelimited(&gmu->pdev->dev, + "Failed to set GPU perf idx %d, bw idx %d\n", + perf_idx, bw_idx); - return ret; + adreno_set_gpu_fault(adreno_dev, ADRENO_GMU_FAULT); + adreno_dispatcher_schedule(device); } - return hfi_send_dcvs_vote(gmu, perf_idx, bw_idx, ACK_NONBLOCK); + return ret; } struct rpmh_arc_vals { @@ -787,12 +788,30 @@ static int gmu_rpmh_init(struct gmu_device *gmu, struct kgsl_pwrctrl *pwr) return rpmh_arc_votes_init(gmu, &cx_arc, &mx_arc, GMU_ARC_VOTE); } +static void send_nmi_to_gmu(struct adreno_device *adreno_dev) +{ + /* Mask so there's no interrupt caused by NMI */ + adreno_write_gmureg(adreno_dev, + ADRENO_REG_GMU_GMU2HOST_INTR_MASK, 0xFFFFFFFF); + + /* Make sure the interrupt is masked before causing it */ + wmb(); + adreno_write_gmureg(adreno_dev, + ADRENO_REG_GMU_NMI_CONTROL_STATUS, 0); + adreno_write_gmureg(adreno_dev, + ADRENO_REG_GMU_CM3_CFG, + (1 << GMU_CM3_CFG_NONMASKINTR_SHIFT)); + + /* Make sure the NMI is invoked before we proceed*/ + wmb(); +} + static irqreturn_t gmu_irq_handler(int irq, void *data) { struct gmu_device *gmu = data; struct kgsl_device *device = container_of(gmu, struct kgsl_device, gmu); struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - unsigned int status = 0; + unsigned int mask, status = 0; adreno_read_gmureg(ADRENO_DEVICE(device), ADRENO_REG_GMU_AO_HOST_INTERRUPT_STATUS, &status); @@ -801,6 +820,20 @@ static irqreturn_t gmu_irq_handler(int irq, void *data) /* Ignore GMU_INT_RSCC_COMP and GMU_INT_DBD WAKEUP interrupts */ if (status & GMU_INT_WDOG_BITE) { + /* Temporarily mask the watchdog interrupt to prevent a storm */ + adreno_read_gmureg(adreno_dev, + ADRENO_REG_GMU_AO_HOST_INTERRUPT_MASK, &mask); + adreno_write_gmureg(adreno_dev, + ADRENO_REG_GMU_AO_HOST_INTERRUPT_MASK, + (mask | GMU_INT_WDOG_BITE)); + + send_nmi_to_gmu(adreno_dev); + /* + * There is sufficient delay for the GMU to have finished + * handling the NMI before snapshot is taken, as the fault + * worker is scheduled below. + */ + dev_err_ratelimited(&gmu->pdev->dev, "GMU watchdog expired interrupt received\n"); adreno_set_gpu_fault(adreno_dev, ADRENO_GMU_FAULT); @@ -1350,19 +1383,8 @@ void gmu_snapshot(struct kgsl_device *device) struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct gmu_device *gmu = &device->gmu; - /* Mask so there's no interrupt caused by NMI */ - adreno_write_gmureg(adreno_dev, - ADRENO_REG_GMU_GMU2HOST_INTR_MASK, 0xFFFFFFFF); - - /* Make sure the interrupt is masked before causing it */ - wmb(); - adreno_write_gmureg(adreno_dev, - ADRENO_REG_GMU_NMI_CONTROL_STATUS, 0); - adreno_write_gmureg(adreno_dev, - ADRENO_REG_GMU_CM3_CFG, (1 << 9)); - + send_nmi_to_gmu(adreno_dev); /* Wait for the NMI to be handled */ - wmb(); udelay(100); kgsl_device_snapshot(device, NULL, true); @@ -1662,7 +1684,7 @@ int adreno_gmu_fenced_write(struct adreno_device *adreno_dev, if (!kgsl_gmu_isenabled(KGSL_DEVICE(adreno_dev))) return 0; - for (i = 0; i < GMU_WAKEUP_RETRY_MAX; i++) { + for (i = 0; i < GMU_LONG_WAKEUP_RETRY_LIMIT; i++) { adreno_read_gmureg(adreno_dev, ADRENO_REG_GMU_AHB_FENCE_STATUS, &status); @@ -1677,9 +1699,19 @@ int adreno_gmu_fenced_write(struct adreno_device *adreno_dev, /* Try to write the fenced register again */ adreno_writereg(adreno_dev, offset, val); + + if (i == GMU_SHORT_WAKEUP_RETRY_LIMIT) + dev_err(adreno_dev->dev.dev, + "Waited %d usecs to write fenced register 0x%x. Continuing to wait...\n", + (GMU_SHORT_WAKEUP_RETRY_LIMIT * + GMU_WAKEUP_DELAY_US), + reg_offset); } dev_err(adreno_dev->dev.dev, - "GMU fenced register write timed out: reg 0x%x\n", reg_offset); + "Timed out waiting %d usecs to write fenced register 0x%x\n", + GMU_LONG_WAKEUP_RETRY_LIMIT * GMU_WAKEUP_DELAY_US, + reg_offset); + return -ETIMEDOUT; } diff --git a/drivers/gpu/msm/kgsl_gmu.h b/drivers/gpu/msm/kgsl_gmu.h index 5db2123a401b..0900b5930087 100644 --- a/drivers/gpu/msm/kgsl_gmu.h +++ b/drivers/gpu/msm/kgsl_gmu.h @@ -92,8 +92,15 @@ * the GMU will start shutting down before we try again. */ #define GMU_WAKEUP_DELAY_US 10 -/* Max amount of tries to wake up the GMU. */ -#define GMU_WAKEUP_RETRY_MAX 60 + +/* Max amount of tries to wake up the GMU. The short retry + * limit is half of the long retry limit. After the short + * number of retries, we print an informational message to say + * exiting IFPC is taking longer than expected. We continue + * to retry after this until the long retry limit. + */ +#define GMU_SHORT_WAKEUP_RETRY_LIMIT 100 +#define GMU_LONG_WAKEUP_RETRY_LIMIT 200 /* Bits for the flags field in the gmu structure */ enum gmu_flags { diff --git a/drivers/gpu/msm/kgsl_ioctl.c b/drivers/gpu/msm/kgsl_ioctl.c index 9b02e1993a09..fce411b3c781 100644 --- a/drivers/gpu/msm/kgsl_ioctl.c +++ b/drivers/gpu/msm/kgsl_ioctl.c @@ -17,6 +17,7 @@ #include #include "kgsl_device.h" #include "kgsl_sync.h" +#include "adreno.h" static const struct kgsl_ioctl kgsl_ioctl_funcs[] = { KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_GETPROPERTY, @@ -168,8 +169,13 @@ long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { struct kgsl_device_private *dev_priv = filep->private_data; struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); long ret; + if (cmd == IOCTL_KGSL_GPU_COMMAND && + READ_ONCE(device->state) != KGSL_STATE_ACTIVE) + kgsl_schedule_work(&adreno_dev->pwr_on_work); + ret = kgsl_ioctl_helper(filep, cmd, arg, kgsl_ioctl_funcs, ARRAY_SIZE(kgsl_ioctl_funcs)); diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c index 56d8933215c7..cc89b70e4dc6 100644 --- a/drivers/gpu/msm/kgsl_iommu.c +++ b/drivers/gpu/msm/kgsl_iommu.c @@ -786,6 +786,7 @@ static int kgsl_iommu_fault_handler(struct iommu_domain *domain, unsigned int curr_context_id = 0; struct kgsl_context *context; char *fault_type = "unknown"; + bool fault_ret_flag = false; static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, @@ -846,9 +847,11 @@ static int kgsl_iommu_fault_handler(struct iommu_domain *domain, * Turn off GPU IRQ so we don't get faults from it too. * The device mutex must be held to change power state */ - mutex_lock(&device->mutex); - kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE); - mutex_unlock(&device->mutex); + if (mutex_trylock(&device->mutex)) { + kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE); + mutex_unlock(&device->mutex); + } else + fault_ret_flag = true; } ptbase = KGSL_IOMMU_GET_CTX_REG_Q(ctx, TTBR0); @@ -869,11 +872,21 @@ static int kgsl_iommu_fault_handler(struct iommu_domain *domain, no_page_fault_log = kgsl_mmu_log_fault_addr(mmu, ptbase, addr); if (!no_page_fault_log && __ratelimit(&_rs)) { + const char *api_str; + + if (context != NULL) { + struct adreno_context *drawctxt = + ADRENO_CONTEXT(context); + + api_str = get_api_type_str(drawctxt->type); + } else + api_str = "UNKNOWN"; + KGSL_MEM_CRIT(ctx->kgsldev, "GPU PAGE FAULT: addr = %lX pid= %d\n", addr, ptname); KGSL_MEM_CRIT(ctx->kgsldev, - "context=%s TTBR0=0x%llx CIDR=0x%x (%s %s fault)\n", - ctx->name, ptbase, contextidr, + "context=%s ctx_type=%s TTBR0=0x%llx CIDR=0x%x (%s %s fault)\n", + ctx->name, api_str, ptbase, contextidr, write ? "write" : "read", fault_type); if (gpudev->iommu_fault_block) { @@ -909,13 +922,12 @@ static int kgsl_iommu_fault_handler(struct iommu_domain *domain, } } - /* * We do not want the h/w to resume fetching data from an iommu * that has faulted, this is better for debugging as it will stall * the GPU and trigger a snapshot. Return EBUSY error. */ - if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, + if (!fault_ret_flag && test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &adreno_dev->ft_pf_policy) && (flags & IOMMU_FAULT_TRANSACTION_STALLED)) { uint32_t sctlr_val; @@ -1259,7 +1271,7 @@ static int _init_global_pt(struct kgsl_mmu *mmu, struct kgsl_pagetable *pt) if (ret) { pr_err("SMMU aperture programming call failed with error %d\n", ret); - return ret; + goto done; } } @@ -2073,6 +2085,8 @@ kgsl_iommu_get_current_ttbr0(struct kgsl_mmu *mmu) { u64 val; struct kgsl_iommu *iommu = _IOMMU_PRIV(mmu); + struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER]; + /* * We cannot enable or disable the clocks in interrupt context, this * function is called from interrupt context if there is an axi error @@ -2080,9 +2094,11 @@ kgsl_iommu_get_current_ttbr0(struct kgsl_mmu *mmu) if (in_interrupt()) return 0; + if (ctx->regbase == NULL) + return 0; + kgsl_iommu_enable_clk(mmu); - val = KGSL_IOMMU_GET_CTX_REG_Q(&iommu->ctx[KGSL_IOMMU_CONTEXT_USER], - TTBR0); + val = KGSL_IOMMU_GET_CTX_REG_Q(ctx, TTBR0); kgsl_iommu_disable_clk(mmu); return val; } @@ -2267,6 +2283,7 @@ static int _insert_gpuaddr(struct kgsl_pagetable *pagetable, else { /* Duplicate entry */ WARN(1, "duplicate gpuaddr: 0x%llx\n", gpuaddr); + kmem_cache_free(addr_entry_cache, new); return -EEXIST; } } diff --git a/drivers/gpu/msm/kgsl_pool.c b/drivers/gpu/msm/kgsl_pool.c index 2f37f1277e1a..be8b0f946b68 100644 --- a/drivers/gpu/msm/kgsl_pool.c +++ b/drivers/gpu/msm/kgsl_pool.c @@ -38,11 +38,11 @@ */ struct kgsl_page_pool { unsigned int pool_order; - int page_count; + atomic_t page_count; unsigned int reserved_pages; bool allocation_allowed; spinlock_t list_lock; - struct list_head page_list; + struct llist_head page_list; }; static struct kgsl_page_pool kgsl_pools[KGSL_MAX_POOLS]; @@ -95,40 +95,33 @@ _kgsl_pool_add_page(struct kgsl_page_pool *pool, struct page *p) _kgsl_pool_zero_page(p, pool->pool_order); - spin_lock(&pool->list_lock); - list_add_tail(&p->lru, &pool->page_list); - pool->page_count++; - spin_unlock(&pool->list_lock); + llist_add((struct llist_node *)&p->lru, &pool->page_list); + atomic_inc(&pool->page_count); } /* Returns a page from specified pool */ static struct page * _kgsl_pool_get_page(struct kgsl_page_pool *pool) { + struct llist_node *node; struct page *p = NULL; spin_lock(&pool->list_lock); - if (pool->page_count) { - p = list_first_entry(&pool->page_list, struct page, lru); - pool->page_count--; - list_del(&p->lru); - } + node = llist_del_first(&pool->page_list); spin_unlock(&pool->list_lock); + if (node) { + atomic_dec(&pool->page_count); + p = container_of((struct list_head *)node, typeof(*p), lru); + } return p; } /* Returns the number of pages in specified pool */ static int -kgsl_pool_size(struct kgsl_page_pool *kgsl_pool) +kgsl_pool_size(struct kgsl_page_pool *pool) { - int size; - - spin_lock(&kgsl_pool->list_lock); - size = kgsl_pool->page_count * (1 << kgsl_pool->pool_order); - spin_unlock(&kgsl_pool->list_lock); - - return size; + return atomic_read(&pool->page_count) * (1 << pool->pool_order); } /* Returns the number of pages in all kgsl page pools */ @@ -531,7 +524,7 @@ static void kgsl_pool_config(unsigned int order, unsigned int reserved_pages, kgsl_pools[kgsl_num_pools].reserved_pages = reserved_pages; kgsl_pools[kgsl_num_pools].allocation_allowed = allocation_allowed; spin_lock_init(&kgsl_pools[kgsl_num_pools].list_lock); - INIT_LIST_HEAD(&kgsl_pools[kgsl_num_pools].page_list); + init_llist_head(&kgsl_pools[kgsl_num_pools].page_list); kgsl_num_pools++; } diff --git a/drivers/gpu/msm/kgsl_pwrctrl.c b/drivers/gpu/msm/kgsl_pwrctrl.c index a3b84d7619e3..0fe85927781c 100644 --- a/drivers/gpu/msm/kgsl_pwrctrl.c +++ b/drivers/gpu/msm/kgsl_pwrctrl.c @@ -604,8 +604,8 @@ static ssize_t kgsl_pwrctrl_thermal_pwrlevel_store(struct device *dev, mutex_lock(&device->mutex); - if (level > pwr->num_pwrlevels - 2) - level = pwr->num_pwrlevels - 2; + if (level > pwr->num_pwrlevels - 1) + level = pwr->num_pwrlevels - 1; pwr->thermal_pwrlevel = level; @@ -683,8 +683,8 @@ static void kgsl_pwrctrl_min_pwrlevel_set(struct kgsl_device *device, struct kgsl_pwrctrl *pwr = &device->pwrctrl; mutex_lock(&device->mutex); - if (level > pwr->num_pwrlevels - 2) - level = pwr->num_pwrlevels - 2; + if (level > pwr->num_pwrlevels - 1) + level = pwr->num_pwrlevels - 1; /* You can't set a minimum power level lower than the maximum */ if (level < pwr->max_pwrlevel) @@ -888,13 +888,20 @@ static ssize_t kgsl_pwrctrl_gpuclk_show(struct device *dev, struct device_attribute *attr, char *buf) { + unsigned long freq; struct kgsl_device *device = kgsl_device_from_dev(dev); struct kgsl_pwrctrl *pwr; if (device == NULL) return 0; pwr = &device->pwrctrl; - return snprintf(buf, PAGE_SIZE, "%ld\n", kgsl_pwrctrl_active_freq(pwr)); + + if (device->state == KGSL_STATE_SLUMBER) + freq = pwr->pwrlevels[pwr->num_pwrlevels - 1].gpu_freq; + else + freq = kgsl_pwrctrl_active_freq(pwr); + + return snprintf(buf, PAGE_SIZE, "%lu\n", freq); } static ssize_t __timer_store(struct device *dev, struct device_attribute *attr, @@ -1463,28 +1470,23 @@ static ssize_t kgsl_pwrctrl_temp_show(struct device *dev, struct kgsl_device *device = kgsl_device_from_dev(dev); struct kgsl_pwrctrl *pwr; struct thermal_zone_device *thermal_dev; - int ret, temperature = 0; + int i, max_temp = 0; if (device == NULL) - goto done; + return 0; pwr = &device->pwrctrl; - if (!pwr->tzone_name) - goto done; + for (i = 0; i < KGSL_MAX_TZONE_NAMES; i++) { + int temp = 0; - thermal_dev = thermal_zone_get_zone_by_name((char *)pwr->tzone_name); - if (thermal_dev == NULL) - goto done; - - ret = thermal_zone_get_temp(thermal_dev, &temperature); - if (ret) - goto done; + thermal_dev = thermal_zone_get_zone_by_name( + pwr->tzone_names[i]); + if (!(thermal_zone_get_temp(thermal_dev, &temp))) + max_temp = max_t(int, temp, max_temp); + } - return snprintf(buf, PAGE_SIZE, "%d\n", - temperature); -done: - return 0; + return scnprintf(buf, PAGE_SIZE, "%d\n", max_temp); } static ssize_t kgsl_pwrctrl_pwrscale_store(struct device *dev, @@ -2415,8 +2417,9 @@ int kgsl_pwrctrl_init(struct kgsl_device *device) kgsl_pwrctrl_vbif_init(); /* temperature sensor name */ - of_property_read_string(pdev->dev.of_node, "qcom,tzone-name", - &pwr->tzone_name); + + of_property_read_string_array(pdev->dev.of_node, "tzone-names", + pwr->tzone_names, KGSL_MAX_TZONE_NAMES); return result; @@ -2478,28 +2481,14 @@ void kgsl_idle_check(struct work_struct *work) requested_state = device->requested_state; - if (device->state == KGSL_STATE_ACTIVE - || device->state == KGSL_STATE_NAP) { + if ((requested_state != KGSL_STATE_NONE) && + (device->state == KGSL_STATE_ACTIVE + || device->state == KGSL_STATE_NAP)) { if (!atomic_read(&device->active_cnt)) { - spin_lock(&device->submit_lock); - if (device->submit_now) { - spin_unlock(&device->submit_lock); - goto done; - } - /* Don't allow GPU inline submission in SLUMBER */ - if (requested_state == KGSL_STATE_SLUMBER) - device->slumber = true; - spin_unlock(&device->submit_lock); - ret = kgsl_pwrctrl_change_state(device, device->requested_state); if (ret == -EBUSY) { - if (requested_state == KGSL_STATE_SLUMBER) { - spin_lock(&device->submit_lock); - device->slumber = false; - spin_unlock(&device->submit_lock); - } /* * If the GPU is currently busy, restore * the requested state and reschedule @@ -2510,7 +2499,7 @@ void kgsl_idle_check(struct work_struct *work) kgsl_schedule_work(&device->idle_check_ws); } } -done: + if (!ret) kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); @@ -2761,6 +2750,12 @@ _aware(struct kgsl_device *device) status = gmu_start(device); break; case KGSL_STATE_INIT: + /* if GMU already in FAULT */ + if (kgsl_gmu_isenabled(device) && + test_bit(GMU_FAULT, &gmu->flags)) { + status = -EINVAL; + break; + } status = kgsl_pwrctrl_enable(device); break; /* The following 3 cases shouldn't occur, but don't panic. */ @@ -3035,13 +3030,6 @@ static void kgsl_pwrctrl_set_state(struct kgsl_device *device, trace_kgsl_pwr_set_state(device, state); device->state = state; device->requested_state = KGSL_STATE_NONE; - - spin_lock(&device->submit_lock); - if (state == KGSL_STATE_SLUMBER || state == KGSL_STATE_SUSPEND) - device->slumber = true; - else - device->slumber = false; - spin_unlock(&device->submit_lock); } static void kgsl_pwrctrl_request_state(struct kgsl_device *device, diff --git a/drivers/gpu/msm/kgsl_pwrctrl.h b/drivers/gpu/msm/kgsl_pwrctrl.h index a8f03ff55920..a3a4f90c47c4 100644 --- a/drivers/gpu/msm/kgsl_pwrctrl.h +++ b/drivers/gpu/msm/kgsl_pwrctrl.h @@ -30,6 +30,8 @@ #define KGSL_MAX_PWRLEVELS 10 +#define KGSL_MAX_TZONE_NAMES 2 + /* Only two supported levels, min & max */ #define KGSL_CONSTRAINT_PWR_MAXLEVELS 2 @@ -53,9 +55,9 @@ /* * The effective duration of qos request in usecs at queue time. * After timeout, qos request is cancelled automatically. - * Kept 80ms default, inline with default GPU idle time. + * Kept 64ms default, inline with default GPU idle time. */ -#define KGSL_L2PC_QUEUE_TIMEOUT (80 * 1000) +#define KGSL_L2PC_QUEUE_TIMEOUT (64 * 1000) /* * The effective duration of qos request in usecs at wakeup time. @@ -147,7 +149,6 @@ struct kgsl_regulator { * @pm_qos_req_dma - the power management quality of service structure * @pm_qos_active_latency - allowed CPU latency in microseconds when active * @pm_qos_cpu_mask_latency - allowed CPU mask latency in microseconds - * @input_disable - To disable GPU wakeup on touch input event * @pm_qos_wakeup_latency - allowed CPU latency in microseconds during wakeup * @bus_control - true if the bus calculation is independent * @bus_mod - modifier from the current power level for the bus vote @@ -169,7 +170,7 @@ struct kgsl_regulator { * @sysfs_pwr_limit - pointer to the sysfs limits node * isense_clk_indx - index of isense clock, 0 if no isense * isense_clk_on_level - isense clock rate is XO rate below this level. - * tzone_name - pointer to thermal zone name of GPU temperature sensor + * tzone_names - array of thermal zone names of GPU temperature sensors */ struct kgsl_pwrctrl { @@ -205,7 +206,6 @@ struct kgsl_pwrctrl { unsigned int pm_qos_active_latency; unsigned int pm_qos_cpu_mask_latency; unsigned int pm_qos_wakeup_latency; - bool input_disable; bool bus_control; int bus_mod; unsigned int bus_percent_ab; @@ -226,7 +226,7 @@ struct kgsl_pwrctrl { struct kgsl_pwr_limit *sysfs_pwr_limit; unsigned int gpu_bimc_int_clk_freq; bool gpu_bimc_interface_enabled; - const char *tzone_name; + const char *tzone_names[KGSL_MAX_TZONE_NAMES]; }; int kgsl_pwrctrl_init(struct kgsl_device *device); diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c index 02b821840244..e112d1860bfc 100644 --- a/drivers/gpu/msm/kgsl_sharedmem.c +++ b/drivers/gpu/msm/kgsl_sharedmem.c @@ -501,7 +501,8 @@ static void kgsl_page_alloc_free(struct kgsl_memdesc *memdesc) atomic_long_sub(memdesc->size, &kgsl_driver.stats.secure); } else { - atomic_long_sub(memdesc->size, &kgsl_driver.stats.page_alloc); + atomic_long_add(memdesc->size, + &kgsl_driver.stats.page_free_pending); } if (memdesc->priv & KGSL_MEMDESC_TZ_LOCKED) { @@ -519,6 +520,11 @@ static void kgsl_page_alloc_free(struct kgsl_memdesc *memdesc) else kgsl_pool_free_sgt(memdesc->sgt); + if (!(memdesc->priv & KGSL_MEMDESC_TZ_LOCKED)) { + atomic_long_sub(memdesc->size, &kgsl_driver.stats.page_alloc); + atomic_long_sub(memdesc->size, + &kgsl_driver.stats.page_free_pending); + } } /* @@ -826,6 +832,8 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, * are allocated by kgsl. This helps with improving the vm fault * routine by finding the faulted page in constant time. */ + if (!(memdesc->flags & KGSL_MEMFLAGS_SECURE)) + atomic_long_add(size, &kgsl_driver.stats.page_alloc_pending); memdesc->pages = kgsl_malloc(len_alloc * sizeof(struct page *)); memdesc->page_count = 0; @@ -931,6 +939,9 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, &kgsl_driver.stats.page_alloc_max); done: + if (!(memdesc->flags & KGSL_MEMFLAGS_SECURE)) + atomic_long_sub(size, &kgsl_driver.stats.page_alloc_pending); + if (ret) { if (memdesc->pages) { unsigned int count = 1; diff --git a/drivers/gpu/msm/kgsl_snapshot.c b/drivers/gpu/msm/kgsl_snapshot.c index 0ed17d859080..76e0e2599315 100644 --- a/drivers/gpu/msm/kgsl_snapshot.c +++ b/drivers/gpu/msm/kgsl_snapshot.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved. +/* Copyright (c) 2012-2019, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -335,10 +335,8 @@ int kgsl_snapshot_get_object(struct kgsl_snapshot *snapshot, entry = kgsl_sharedmem_find(process, gpuaddr); - if (entry == NULL) { - KGSL_CORE_ERR("Unable to find GPU buffer 0x%016llX\n", gpuaddr); + if (entry == NULL) return -EINVAL; - } /* We can't freeze external memory, because we don't own it */ if (entry->memdesc.flags & KGSL_MEMFLAGS_USERMEM_MASK) @@ -704,22 +702,20 @@ void kgsl_device_snapshot(struct kgsl_device *device, snapshot->size += sizeof(*header); /* Build the Linux specific header */ - /* We either want to only dump GMU, or we want to dump GPU and GMU */ - if (gmu_fault) { - /* Dump only the GMU */ + if (gmu_fault) kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_OS, - snapshot, snapshot_os_no_ctxt, NULL); - - if (device->ftbl->snapshot_gmu) - device->ftbl->snapshot_gmu(device, snapshot); - } else { - /* Dump GPU and GMU */ + snapshot, snapshot_os_no_ctxt, NULL); + else kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_OS, - snapshot, snapshot_os, NULL); + snapshot, snapshot_os, NULL); - if (device->ftbl->snapshot) - device->ftbl->snapshot(device, snapshot, context); - } + /* + * Trigger both GPU and GMU snapshot. GPU specific code + * will take care of whether to dumps full state or only + * GMU state based on current GPU power state. + */ + if (device->ftbl->snapshot) + device->ftbl->snapshot(device, snapshot, context); /* * The timestamp is the seconds since boot so it is easier to match to diff --git a/drivers/gpu/msm/kgsl_sync.c b/drivers/gpu/msm/kgsl_sync.c index d484894e06f2..98c117d018e2 100644 --- a/drivers/gpu/msm/kgsl_sync.c +++ b/drivers/gpu/msm/kgsl_sync.c @@ -438,27 +438,54 @@ static void kgsl_sync_fence_callback(struct fence *fence, struct fence_cb *cb) } } -static void kgsl_get_fence_name(struct fence *fence, - char *fence_name, int name_len) +static void kgsl_get_fence_names(struct fence *fence, + struct event_fence_info *info_ptr) { - char *ptr = fence_name; - char *last = fence_name + name_len; + unsigned int num_fences; + struct fence **fences; + struct fence_array *array; + int i; - ptr += snprintf(ptr, last - ptr, "%s %s", - fence->ops->get_driver_name(fence), - fence->ops->get_timeline_name(fence)); + if (!info_ptr) + return; + + array = to_fence_array(fence); + + if (array != NULL) { + num_fences = array->num_fences; + fences = array->fences; + } else { + num_fences = 1; + fences = &fence; + } - if ((ptr + 2) >= last) + info_ptr->fences = kcalloc(num_fences, sizeof(struct fence_info), + GFP_ATOMIC); + if (info_ptr->fences == NULL) return; - if (fence->ops->fence_value_str) { - ptr += snprintf(ptr, last - ptr, ": "); - fence->ops->fence_value_str(fence, ptr, last - ptr); + info_ptr->num_fences = num_fences; + + for (i = 0; i < num_fences; i++) { + struct fence *f = fences[i]; + struct fence_info *fi = &info_ptr->fences[i]; + int len; + + len = scnprintf(fi->name, sizeof(fi->name), "%s %s", + f->ops->get_driver_name(f), + f->ops->get_timeline_name(f)); + + if (f->ops->fence_value_str) { + len += scnprintf(fi->name + len, sizeof(fi->name) - len, + ": "); + f->ops->fence_value_str(f, fi->name + len, + sizeof(fi->name) - len); + } } } struct kgsl_sync_fence_cb *kgsl_sync_fence_async_wait(int fd, - bool (*func)(void *priv), void *priv, char *fence_name, int name_len) + bool (*func)(void *priv), void *priv, struct event_fence_info *info_ptr) { struct kgsl_sync_fence_cb *kcb; struct fence *fence; @@ -479,8 +506,7 @@ struct kgsl_sync_fence_cb *kgsl_sync_fence_async_wait(int fd, kcb->priv = priv; kcb->func = func; - if (fence_name) - kgsl_get_fence_name(fence, fence_name, name_len); + kgsl_get_fence_names(fence, info_ptr); /* if status then error or signaled */ status = fence_add_callback(fence, &kcb->fence_cb, diff --git a/drivers/gpu/msm/kgsl_sync.h b/drivers/gpu/msm/kgsl_sync.h index 6998b40f77eb..a53cd51733ed 100644 --- a/drivers/gpu/msm/kgsl_sync.h +++ b/drivers/gpu/msm/kgsl_sync.h @@ -1,3 +1,4 @@ + /* Copyright (c) 2012-2014,2017-2018 The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify @@ -93,7 +94,7 @@ void kgsl_sync_timeline_put(struct kgsl_sync_timeline *ktimeline); struct kgsl_sync_fence_cb *kgsl_sync_fence_async_wait(int fd, bool (*func)(void *priv), void *priv, - char *fence_name, int name_len); + struct event_fence_info *info_ptr); void kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_cb *kcb); @@ -139,7 +140,7 @@ static inline void kgsl_sync_timeline_put(struct kgsl_sync_timeline *ktimeline) static inline struct kgsl_sync_fence_cb *kgsl_sync_fence_async_wait(int fd, bool (*func)(void *priv), void *priv, - char *fence_name, int name_len) + struct event_fence_info *info_ptr) { return NULL; } diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index fdf8da929cbe..b1b7deaab49d 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1048,16 +1048,16 @@ static unsigned long compute_balloon_floor(void) * 8192 744 (1/16) * 32768 1512 (1/32) */ - if (totalram_pages < MB2PAGES(128)) - min_pages = MB2PAGES(8) + (totalram_pages >> 1); - else if (totalram_pages < MB2PAGES(512)) - min_pages = MB2PAGES(40) + (totalram_pages >> 2); - else if (totalram_pages < MB2PAGES(2048)) - min_pages = MB2PAGES(104) + (totalram_pages >> 3); - else if (totalram_pages < MB2PAGES(8192)) - min_pages = MB2PAGES(232) + (totalram_pages >> 4); + if (totalram_pages() < MB2PAGES(128)) + min_pages = MB2PAGES(8) + (totalram_pages() >> 1); + else if (totalram_pages() < MB2PAGES(512)) + min_pages = MB2PAGES(40) + (totalram_pages() >> 2); + else if (totalram_pages() < MB2PAGES(2048)) + min_pages = MB2PAGES(104) + (totalram_pages() >> 3); + else if (totalram_pages() < MB2PAGES(8192)) + min_pages = MB2PAGES(232) + (totalram_pages() >> 4); else - min_pages = MB2PAGES(488) + (totalram_pages >> 5); + min_pages = MB2PAGES(488) + (totalram_pages() >> 5); #undef MB2PAGES return min_pages; } diff --git a/drivers/hwmon/qpnp-adc-voltage.c b/drivers/hwmon/qpnp-adc-voltage.c index 2783e4f6d736..ee2865c335f1 100644 --- a/drivers/hwmon/qpnp-adc-voltage.c +++ b/drivers/hwmon/qpnp-adc-voltage.c @@ -1383,7 +1383,7 @@ int32_t qpnp_vbat_sns_comp_result(struct qpnp_vadc_chip *vadc, rc = qpnp_vadc_conv_seq_request(vadc, ADC_SEQ_NONE, DIE_TEMP, &die_temp_result); if (rc < 0) { - pr_err("Error reading die_temp\n"); + pr_debug("Error reading die_temp\n"); return rc; } @@ -2114,7 +2114,7 @@ int32_t qpnp_vadc_read(struct qpnp_vadc_chip *vadc, rc = qpnp_vadc_conv_seq_request(vadc, ADC_SEQ_NONE, DIE_TEMP, &die_temp_result); if (rc < 0) { - pr_err("Error reading die_temp\n"); + pr_debug("Error reading die_temp\n"); return rc; } @@ -2148,7 +2148,7 @@ int32_t qpnp_vadc_read(struct qpnp_vadc_chip *vadc, rc = qpnp_vadc_conv_seq_request(vadc, ADC_SEQ_NONE, channel, result); if (rc < 0) - pr_err("Error reading die_temp\n"); + pr_debug("Error reading die_temp\n"); ret.intval = 0; rc = power_supply_set_property(vadc->vadc_chg_vote, @@ -2824,9 +2824,9 @@ static int qpnp_vadc_suspend_noirq(struct device *dev) status &= QPNP_VADC_STATUS1_REQ_STS_EOC_MASK; pr_debug("vadc conversion status=%d\n", status); if (status != QPNP_VADC_STATUS1_EOC) { - pr_err( + pr_err_ratelimited( "Aborting suspend, adc conversion requested while suspending\n"); - return -EBUSY; + return 0; } } diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c index 287f9012b390..31218d65ef6e 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.c +++ b/drivers/hwtracing/coresight/coresight-tmc.c @@ -425,7 +425,7 @@ static ssize_t out_mode_store(struct device *dev, if (strlen(buf) >= 10) return -EINVAL; - if (sscanf(buf, "%10s", str) != 1) + if (sscanf(buf, "%s", str) != 1) return -EINVAL; mutex_lock(&drvdata->mem_lock); @@ -525,7 +525,7 @@ static ssize_t mem_type_store(struct device *dev, if (strlen(buf) >= 10) return -EINVAL; - if (sscanf(buf, "%10s", str) != 1) + if (sscanf(buf, "%s", str) != 1) return -EINVAL; mutex_lock(&drvdata->mem_lock); diff --git a/drivers/i2c/busses/i2c-msm-v2.c b/drivers/i2c/busses/i2c-msm-v2.c index 631169b4081d..3908b35ea613 100644 --- a/drivers/i2c/busses/i2c-msm-v2.c +++ b/drivers/i2c/busses/i2c-msm-v2.c @@ -2679,11 +2679,9 @@ static int i2c_msm_rsrcs_clk_init(struct i2c_msm_ctrl *ctrl) } ctrl->rsrcs.core_clk = clk_get(ctrl->dev, "core_clk"); - if (IS_ERR(ctrl->rsrcs.core_clk)) { - ret = PTR_ERR(ctrl->rsrcs.core_clk); - dev_err(ctrl->dev, "error on clk_get(core_clk):%d\n", ret); - return ret; - } + if (IS_ERR(ctrl->rsrcs.core_clk)) + return dev_err_probe(ctrl->dev, PTR_ERR(ctrl->rsrcs.core_clk), + "clk_get(core_clk)\n"); ret = clk_set_rate(ctrl->rsrcs.core_clk, ctrl->rsrcs.clk_freq_in); if (ret) { @@ -2694,8 +2692,8 @@ static int i2c_msm_rsrcs_clk_init(struct i2c_msm_ctrl *ctrl) ctrl->rsrcs.iface_clk = clk_get(ctrl->dev, "iface_clk"); if (IS_ERR(ctrl->rsrcs.iface_clk)) { - ret = PTR_ERR(ctrl->rsrcs.iface_clk); - dev_err(ctrl->dev, "error on clk_get(iface_clk):%d\n", ret); + ret = dev_err_probe(ctrl->dev, PTR_ERR(ctrl->rsrcs.iface_clk), + "clk_get(iface_clk)\n"); goto err_set_rate; } @@ -2984,7 +2982,7 @@ static int i2c_msm_probe(struct platform_device *pdev) clk_err: i2c_msm_rsrcs_mem_teardown(ctrl); mem_err: - dev_err(ctrl->dev, "error probe() failed with err:%d\n", ret); + dev_err_probe(ctrl->dev, ret, "error probe() failed with err\n"); devm_kfree(&pdev->dev, ctrl); return ret; } diff --git a/drivers/input/misc/fpr_FingerprintCard/fpc1020_platform_tee.c b/drivers/input/misc/fpr_FingerprintCard/fpc1020_platform_tee.c index b0a3172c0a8a..3be91d909fcf 100644 --- a/drivers/input/misc/fpr_FingerprintCard/fpc1020_platform_tee.c +++ b/drivers/input/misc/fpr_FingerprintCard/fpc1020_platform_tee.c @@ -260,7 +260,7 @@ static int hw_reset(struct fpc1020_data *fpc1020) usleep_range(RESET_HIGH_SLEEP2_MIN_US, RESET_HIGH_SLEEP2_MAX_US); irq_gpio = gpio_get_value(fpc1020->irq_gpio); - dev_info(dev, "IRQ after reset %d\n", irq_gpio); + dev_info(dev, "IRQ after reset %d\n", irq_gpio); exit: return rc; diff --git a/drivers/input/misc/qpnp-power-on.c b/drivers/input/misc/qpnp-power-on.c index 2fa09531083f..c078a9a09509 100644 --- a/drivers/input/misc/qpnp-power-on.c +++ b/drivers/input/misc/qpnp-power-on.c @@ -967,9 +967,6 @@ qpnp_pon_input_dispatch(struct qpnp_pon *pon, u32 pon_type) return -EINVAL; } - pr_info("PMIC input: code=%d, sts=0x%hhx, bit=0x%hhx, type=%d, os=%d\n", - cfg->key_code, pon_rt_sts, pon_rt_bit, cfg->pon_type, - cfg->old_state); key_status = pon_rt_sts & pon_rt_bit; if (pon->kpdpwr_dbc_enable && cfg->pon_type == PON_KPDPWR) { diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index 6e0439ab78d6..2743ab3fdd44 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -1317,6 +1317,10 @@ config TOUCHSCREEN_HIMAX_CHIPSET If unsure, say N. +config WAKE_GESTURES + bool "wake gestures" + + source "drivers/input/touchscreen/hxchipset/Kconfig" endif diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile index ec331e69708a..39d32699418a 100644 --- a/drivers/input/touchscreen/Makefile +++ b/drivers/input/touchscreen/Makefile @@ -112,4 +112,5 @@ obj-$(CONFIG_TOUCHSCREEN_HIMAX_CHIPSET) += hxchipset/ obj-$(CONFIG_TOUCHSCREEN_HEATMAP) += heatmap.o obj-$(CONFIG_TOUCHSCREEN_FTM4) += stm/ obj-$(CONFIG_TOUCHSCREEN_SEC_TS) += sec_ts/ -obj-$(CONFIG_TOUCHSCREEN_TBN) += qmi_tbn_v01.o touch_bus_negotiator.o \ No newline at end of file +obj-$(CONFIG_TOUCHSCREEN_TBN) += qmi_tbn_v01.o touch_bus_negotiator.o +obj-$(CONFIG_WAKE_GESTURES) += wake_gestures.o diff --git a/drivers/input/touchscreen/sec_ts/sec_cmd.c b/drivers/input/touchscreen/sec_ts/sec_cmd.c index 3ac058a8bd26..b8f475c9bbb8 100644 --- a/drivers/input/touchscreen/sec_ts/sec_cmd.c +++ b/drivers/input/touchscreen/sec_ts/sec_cmd.c @@ -87,7 +87,7 @@ static ssize_t sec_cmd_store(struct device *dev, if (*(buf + len - 1) == '\n') len--; - memset(data->cmd, 0x00, ARRAY_SIZE(data->cmd)); + memset(data->cmd, 0x00, sizeof(data->cmd)); memcpy(data->cmd, buf, len); cur = strchr(buf, (int)delim); @@ -119,17 +119,17 @@ static ssize_t sec_cmd_store(struct device *dev, if (cur && cmd_found) { cur++; start = cur; - memset(buff, 0x00, ARRAY_SIZE(buff)); + memset(buff, 0x00, sizeof(buff)); do { if (*cur == delim || cur - buf == len) { end = cur; memcpy(buff, start, end - start); - *(buff + strnlen(buff, ARRAY_SIZE(buff))) = '\0'; + *(buff + strnlen(buff, sizeof(buff))) = '\0'; if (kstrtoint(buff, 10, data->cmd_param + param_cnt) < 0) goto err_out; start = cur + 1; - memset(buff, 0x00, ARRAY_SIZE(buff)); + memset(buff, 0x00, sizeof(buff)); param_cnt++; } cur++; @@ -200,7 +200,7 @@ static void sec_cmd_store_function(struct sec_cmd_data *data) if (*(buf + len - 1) == '\n') len--; - memset(data->cmd, 0x00, ARRAY_SIZE(data->cmd)); + memset(data->cmd, 0x00, sizeof(data->cmd)); memcpy(data->cmd, buf, len); cur = strchr(buf, (int)delim); @@ -232,17 +232,17 @@ static void sec_cmd_store_function(struct sec_cmd_data *data) if (cur && cmd_found) { cur++; start = cur; - memset(buff, 0x00, ARRAY_SIZE(buff)); + memset(buff, 0x00, sizeof(buff)); do { if (*cur == delim || cur - buf == len) { end = cur; memcpy(buff, start, end - start); - *(buff + strnlen(buff, ARRAY_SIZE(buff))) = '\0'; + *(buff + strnlen(buff, sizeof(buff))) = '\0'; if (kstrtoint(buff, 10, data->cmd_param + param_cnt) < 0) return; start = cur + 1; - memset(buff, 0x00, ARRAY_SIZE(buff)); + memset(buff, 0x00, sizeof(buff)); param_cnt++; } cur++; diff --git a/drivers/input/touchscreen/sec_ts/sec_ts.c b/drivers/input/touchscreen/sec_ts/sec_ts.c index f0bb46e7fd4d..ac08e511f159 100644 --- a/drivers/input/touchscreen/sec_ts/sec_ts.c +++ b/drivers/input/touchscreen/sec_ts/sec_ts.c @@ -10,6 +10,15 @@ * published by the Free Software Foundation. */ +#ifdef CONFIG_WAKE_GESTURES +#include +static bool is_suspended; +bool scr_suspended(void) +{ + return is_suspended; +} +#endif + struct sec_ts_data *tsp_info; #include "sec_ts.h" @@ -482,11 +491,6 @@ int sec_ts_wait_for_ready_with_count(struct sec_ts_data *ts, unsigned int ack, sec_ts_delay(20); } - input_info(true, &ts->client->dev, - "%s: %02X, %02X, %02X, %02X, %02X, %02X, %02X, %02X [%d]\n", - __func__, tBuff[0], tBuff[1], tBuff[2], tBuff[3], - tBuff[4], tBuff[5], tBuff[6], tBuff[7], retry); - return rc; } @@ -957,6 +961,11 @@ static void sec_ts_read_event(struct sec_ts_data *ts) input_report_key(ts->input_dev, BTN_TOUCH, 1); input_report_key(ts->input_dev, BTN_TOOL_FINGER, 1); +#ifdef CONFIG_WAKE_GESTURES + if (is_suspended) + ts->coord[t_id].x += 5000; +#endif + input_report_abs(ts->input_dev, ABS_MT_POSITION_X, ts->coord[t_id].x); input_report_abs(ts->input_dev, ABS_MT_POSITION_Y, ts->coord[t_id].y); input_report_abs(ts->input_dev, ABS_MT_TOUCH_MAJOR, ts->coord[t_id].major); @@ -1268,9 +1277,6 @@ void sec_ts_set_grip_type(struct sec_ts_data *ts, u8 set_type) { u8 mode = G_NONE; - input_info(true, &ts->client->dev, "%s: re-init grip(%d), edh:%d, edg:%d, lan:%d\n", __func__, - set_type, ts->grip_edgehandler_direction, ts->grip_edge_range, ts->grip_landscape_mode); - /* edge handler */ if (ts->grip_edgehandler_direction != 0) mode |= G_SET_EDGE_HANDLER; @@ -1298,8 +1304,6 @@ static int sec_ts_pinctrl_configure(struct sec_ts_data *ts, bool enable) { struct pinctrl_state *state; - input_info(true, &ts->client->dev, "%s: %s\n", __func__, enable ? "ACTIVE" : "SUSPEND"); - if (enable) { state = pinctrl_lookup_state(ts->plat_data->pinctrl, "on_state"); if (IS_ERR(ts->plat_data->pinctrl)) @@ -2949,10 +2953,6 @@ static void sec_set_switch_gpio(struct sec_ts_data *ts, int gpio_value) if (!gpio_is_valid(gpio)) return; - input_info(true, &ts->client->dev, "%s: toggling i2c switch to %s\n", - __func__, gpio_value == SEC_SWITCH_GPIO_VALUE_AP_MASTER ? - "AP" : "SLPI"); - retval = gpio_direction_output(gpio, gpio_value); if (retval < 0) input_err(true, &ts->client->dev, @@ -2966,8 +2966,6 @@ static void sec_ts_suspend_work(struct work_struct *work) suspend_work); int ret = 0; - input_info(true, &ts->client->dev, "%s\n", __func__); - mutex_lock(&ts->device_mutex); reinit_completion(&ts->bus_resumed); @@ -3011,8 +3009,6 @@ static void sec_ts_resume_work(struct work_struct *work) resume_work); int ret = 0; - input_info(true, &ts->client->dev, "%s\n", __func__); - mutex_lock(&ts->device_mutex); #ifdef CONFIG_TOUCHSCREEN_TBN @@ -3200,6 +3196,13 @@ static int sec_ts_screen_state_chg_callback(struct notifier_block *nb, switch (blank) { case MSM_DRM_BLANK_POWERDOWN: case MSM_DRM_BLANK_LP: +#ifdef CONFIG_WAKE_GESTURES + if (wg_switch) { + enable_irq_wake(ts->client->irq); + is_suspended = true; + break; + } +#endif if (val == MSM_DRM_EARLY_EVENT_BLANK) { input_dbg(true, &ts->client->dev, "%s: MSM_DRM_BLANK_POWERDOWN.\n", __func__); @@ -3207,6 +3210,17 @@ static int sec_ts_screen_state_chg_callback(struct notifier_block *nb, } break; case MSM_DRM_BLANK_UNBLANK: +#ifdef CONFIG_WAKE_GESTURES + if (wg_switch) { + disable_irq_wake(ts->client->irq); + is_suspended = false; + break; + } + if (wg_changed) { + wg_switch = wg_switch_temp; + wg_changed = false; + } +#endif if (val == MSM_DRM_EVENT_BLANK) { input_dbg(true, &ts->client->dev, "%s: MSM_DRM_BLANK_UNBLANK.\n", __func__); diff --git a/drivers/input/touchscreen/sec_ts/sec_ts_fw.c b/drivers/input/touchscreen/sec_ts/sec_ts_fw.c index efd9152d6cc0..563c2775a3ff 100644 --- a/drivers/input/touchscreen/sec_ts/sec_ts_fw.c +++ b/drivers/input/touchscreen/sec_ts/sec_ts_fw.c @@ -132,8 +132,6 @@ int sec_ts_sw_reset(struct sec_ts_data *ts) return 0; } - input_info(true, &ts->client->dev, "%s: sw_reset\n", __func__); - /* Sense_on */ ret = ts->sec_ts_i2c_write(ts, SEC_TS_CMD_SENSE_ON, NULL, 0); if (ret < 0) { diff --git a/drivers/input/touchscreen/stm/ftm4_pdc.c b/drivers/input/touchscreen/stm/ftm4_pdc.c index 4fc23c995288..43cc9cf9c46a 100644 --- a/drivers/input/touchscreen/stm/ftm4_pdc.c +++ b/drivers/input/touchscreen/stm/ftm4_pdc.c @@ -333,7 +333,7 @@ static ssize_t store_cmd(struct device *dev, struct device_attribute *devattr, if (info->delayed_cmd_param[0] > 1) info->delayed_cmd_param[1] = buf[19]-'0'; - schedule_delayed_work(&info->cover_cmd_work, + queue_delayed_work(system_power_efficient_wq, &info->cover_cmd_work, msecs_to_jiffies(10)); } return -EBUSY; @@ -350,7 +350,7 @@ static ssize_t store_cmd(struct device *dev, struct device_attribute *devattr, info->delayed_cmd_param[1] = buf[19]-'0'; if (info->delayed_cmd_param[0] == 0) - schedule_delayed_work(&info->cover_cmd_work, + queue_delayed_work(system_power_efficient_wq, &info->cover_cmd_work, msecs_to_jiffies(300)); } } diff --git a/drivers/input/touchscreen/stm/ftm4_ts.c b/drivers/input/touchscreen/stm/ftm4_ts.c index 342d906732ad..8d52fa8ebc22 100644 --- a/drivers/input/touchscreen/stm/ftm4_ts.c +++ b/drivers/input/touchscreen/stm/ftm4_ts.c @@ -492,7 +492,7 @@ int fts_wait_for_ready(struct fts_ts_info *info) __func__); if (info->lowpower_mode) { - schedule_delayed_work(&info->reset_work, + queue_delayed_work(system_power_efficient_wq, &info->reset_work, msecs_to_jiffies(10)); } break; @@ -1808,7 +1808,7 @@ static int fts_input_open(struct input_dev *dev) tsp_debug_info(&info->client->dev, "%s\n", __func__); #ifdef USE_OPEN_DWORK - schedule_delayed_work(&info->open_work, + queue_delayed_work(system_power_efficient_wq, &info->open_work, msecs_to_jiffies(TOUCH_OPEN_DWORK_TIME)); #else retval = fts_start_device(info); diff --git a/drivers/input/touchscreen/stm/fts.c b/drivers/input/touchscreen/stm/fts.c index 963b7d5cfb4c..bb6a2dbbfc74 100644 --- a/drivers/input/touchscreen/stm/fts.c +++ b/drivers/input/touchscreen/stm/fts.c @@ -100,6 +100,15 @@ #define TYPE_B_PROTOCOL #endif +#ifdef CONFIG_WAKE_GESTURES +#include +static bool is_suspended; +bool scr_suspended_blueline(void) +{ + return is_suspended; +} +#endif + extern SysInfo systemInfo; extern TestToDo tests; #ifdef GESTURE_MODE @@ -2555,6 +2564,11 @@ static bool fts_enter_pointer_event_handler(struct fts_ts_info *info, unsigned /* pr_info("%s : TouchID = %d,Touchcount = %d\n", __func__, * touchId,touchcount); */ +#ifdef CONFIG_WAKE_GESTURES + if (is_suspended) + x += 5000; +#endif + input_report_abs(info->input_dev, ABS_MT_POSITION_X, x); input_report_abs(info->input_dev, ABS_MT_POSITION_Y, y); input_report_abs(info->input_dev, ABS_MT_TOUCH_MAJOR, major); @@ -3865,6 +3879,15 @@ static void fts_resume_work(struct work_struct *work) info = container_of(work, struct fts_ts_info, resume_work); +#ifdef CONFIG_WAKE_GESTURES + if (wg_switch) { + disable_irq_wake(info->client->irq); + fts_system_reset(); + release_all_touches(info); + return; + } +#endif + if (!info->sensor_sleep) return; @@ -3905,6 +3928,14 @@ static void fts_suspend_work(struct work_struct *work) info = container_of(work, struct fts_ts_info, suspend_work); +#ifdef CONFIG_WAKE_GESTURES + if (wg_switch) { + enable_irq_wake(info->client->irq); + release_all_touches(info); + return; + } +#endif + if (info->sensor_sleep) return; @@ -4025,12 +4056,28 @@ static int fts_screen_state_chg_callback(struct notifier_block *nb, switch (blank) { case MSM_DRM_BLANK_POWERDOWN: case MSM_DRM_BLANK_LP: +#ifdef CONFIG_WAKE_GESTURES + if (wg_switch) { + is_suspended = true; + break; + } +#endif if (val == MSM_DRM_EARLY_EVENT_BLANK) { pr_info("%s: BLANK\n", __func__); fts_set_bus_ref(info, FTS_BUS_REF_SCREEN_ON, false); } break; case MSM_DRM_BLANK_UNBLANK: +#ifdef CONFIG_WAKE_GESTURES + if (wg_switch) { + is_suspended = false; + break; + } + if (wg_changed) { + wg_switch = wg_switch_temp; + wg_changed = false; + } +#endif if (val == MSM_DRM_EVENT_BLANK) { pr_info("%s: UNBLANK\n", __func__); fts_set_bus_ref(info, FTS_BUS_REF_SCREEN_ON, true); diff --git a/drivers/input/touchscreen/stm/fts_lib/ftsIO.c b/drivers/input/touchscreen/stm/fts_lib/ftsIO.c index 9443fdd6fb4a..4f08fe54e8c3 100644 --- a/drivers/input/touchscreen/stm/fts_lib/ftsIO.c +++ b/drivers/input/touchscreen/stm/fts_lib/ftsIO.c @@ -112,7 +112,7 @@ struct device *getDev(void) * slave * @return client if it was previously set or NULL in all the other cases */ -struct i2c_client *getClient() +struct i2c_client *getClient(void) { if (client != NULL) return (struct i2c_client *)client; diff --git a/drivers/input/touchscreen/touch_bus_negotiator.c b/drivers/input/touchscreen/touch_bus_negotiator.c index 947122071634..6dca53bed77f 100644 --- a/drivers/input/touchscreen/touch_bus_negotiator.c +++ b/drivers/input/touchscreen/touch_bus_negotiator.c @@ -116,8 +116,6 @@ int tbn_release_bus(struct tbn_context *tbn) struct tbn_kernel_release_bus_v01 req; struct tbn_ssc_acquire_bus_v01 rsp; - dev_info(tbn->dev, "kernel releasing bus access from SLPI\n"); - if (!tbn || !tbn->qmi_handle) return -EINVAL; diff --git a/drivers/input/touchscreen/wake_gestures.c b/drivers/input/touchscreen/wake_gestures.c new file mode 100644 index 000000000000..0619a99034f4 --- /dev/null +++ b/drivers/input/touchscreen/wake_gestures.c @@ -0,0 +1,851 @@ +/* + * drivers/input/touchscreen/wake_gestures.c + * + * + * Copyright (c) 2013, Dennis Rassmann + * Copyright (c) 2013-18 Aaron Segaert + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Tunables */ +#define WG_DEBUG 0 +#define WG_DEFAULT 0 +#define DT2W_DEFAULT 0 +#define S2W_DEFAULT 0 +#define S2S_DEFAULT 0 +#define WG_PWRKEY_DUR 60 + +/* Crosshatch */ +#define SWEEP_Y_MAX 2960 +#define SWEEP_X_MAX 1440 +#define SWEEP_EDGE 130 +#define SWEEP_Y_LIMIT SWEEP_Y_MAX-SWEEP_EDGE +#define SWEEP_X_LIMIT SWEEP_X_MAX-SWEEP_EDGE +#define SWEEP_X_B1 480 +#define SWEEP_X_B2 940 +#define SWEEP_X_START 720 +#define SWEEP_X_FINAL 360 +#define SWEEP_Y_START 1066 +#define SWEEP_Y_NEXT 300 +#define DT2W_FEATHER 200 +#define DT2W_TIME 500 + +/* Blueline */ +#define SWEEP_Y_MAX_BLUELINE 2160 +#define SWEEP_X_MAX_BLUELINE 1060 +#define SWEEP_EDGE_BLUELINE 90 +#define SWEEP_Y_LIMIT_BLUELINE SWEEP_Y_MAX_BLUELINE-SWEEP_EDGE_BLUELINE +#define SWEEP_X_LIMIT_BLUELINE SWEEP_X_MAX_BLUELINE-SWEEP_EDGE_BLUELINE +#define SWEEP_X_B1_BLUELINE 350 +#define SWEEP_X_B2_BLUELINE 600 +#define SWEEP_Y_START_BLUELINE 800 +#define SWEEP_X_START_BLUELINE 530 +#define SWEEP_X_FINAL_BLUELINE 260 +#define SWEEP_Y_NEXT_BLUELINE 150 + + +/* Wake Gestures */ +#define SWEEP_TIMEOUT 300 +#define TRIGGER_TIMEOUT 500 +#define WAKE_GESTURE 0x0b +#define SWEEP_RIGHT 0x01 +#define SWEEP_LEFT 0x02 +#define SWEEP_UP 0x04 +#define SWEEP_DOWN 0x08 + +#define LOGTAG "WG" +#define BLUELINE 1 +#define CROSSHATCH 2 + +int gestures_switch = WG_DEFAULT; +static struct input_dev *gesture_dev; + +/* Resources */ +static int s2w_switch = S2W_DEFAULT; +static int dt2w_switch = DT2W_DEFAULT; +bool wg_switch = 0; +bool wg_switch_temp; +bool wg_changed = false; +static int s2s_switch = S2S_DEFAULT; +static int touch_x = 0, touch_y = 0; +static bool touch_x_called = false, touch_y_called = false; +static bool exec_countx = true, exec_county = true, exec_count = true; +static bool barrierx[2] = {false, false}, barriery[2] = {false, false}; +static int firstx = 0, firsty = 0; +static unsigned long firstx_time = 0, firsty_time = 0; +static unsigned long pwrtrigger_time[2] = {0, 0}; +static unsigned long long tap_time_pre = 0; +static int touch_nr = 0, x_pre = 0, y_pre = 0; +static bool touch_cnt = true; +static int wake_vibrate = true; +static int sleep_vibrate = false; + +static unsigned int sweep_y_limit = SWEEP_Y_LIMIT; +static unsigned int sweep_x_limit = SWEEP_X_LIMIT; +static unsigned int sweep_x_b1 = SWEEP_X_B1; +static unsigned int sweep_x_b2 = SWEEP_X_B2; +static unsigned int sweep_y_start = SWEEP_Y_START; +static unsigned int sweep_x_start = SWEEP_X_START; +static unsigned int sweep_x_final = SWEEP_X_FINAL; +static unsigned int sweep_y_next = SWEEP_Y_NEXT; +static unsigned int sweep_x_max = SWEEP_X_MAX; +static unsigned int sweep_edge = SWEEP_EDGE; + +static struct input_dev * wake_dev; +static DEFINE_MUTEX(pwrkeyworklock); +static struct workqueue_struct *s2w_input_wq; +static struct workqueue_struct *dt2w_input_wq; +static struct work_struct s2w_input_work; +static struct work_struct dt2w_input_work; + +//get hardware type +static int hw_version = CROSSHATCH; +static int __init get_model(char *cmdline_model) +{ + if (strstr(cmdline_model, "blueline")) { + sweep_y_limit = SWEEP_Y_LIMIT_BLUELINE; + sweep_x_limit = SWEEP_X_LIMIT_BLUELINE; + sweep_x_b1 = SWEEP_X_B1_BLUELINE; + sweep_x_b2 = SWEEP_X_B2_BLUELINE; + sweep_y_start = SWEEP_Y_START_BLUELINE; + sweep_x_start = SWEEP_X_START_BLUELINE; + sweep_x_final = SWEEP_X_FINAL_BLUELINE; + sweep_y_next = SWEEP_Y_NEXT_BLUELINE; + sweep_x_max = SWEEP_X_MAX_BLUELINE; + sweep_edge = SWEEP_EDGE_BLUELINE; + hw_version = BLUELINE; + } + return 0; +} +__setup("androidboot.hardware=", get_model); + +static bool is_suspended(void) +{ + if (hw_version == BLUELINE) + return scr_suspended_blueline(); + else + return scr_suspended(); +} + +/* Wake Gestures */ +static void report_gesture(int gest) +{ + pwrtrigger_time[1] = pwrtrigger_time[0]; + pwrtrigger_time[0] = ktime_to_ms(ktime_get()); + + if (pwrtrigger_time[0] - pwrtrigger_time[1] < TRIGGER_TIMEOUT) + return; + + input_report_rel(gesture_dev, WAKE_GESTURE, gest); + input_sync(gesture_dev); +} + +/* PowerKey work func */ +static void wake_presspwr(struct work_struct * wake_presspwr_work) { + if (!mutex_trylock(&pwrkeyworklock)) + return; + + input_event(wake_dev, EV_KEY, KEY_POWER, 1); + input_event(wake_dev, EV_SYN, 0, 0); + msleep(WG_PWRKEY_DUR); + input_event(wake_dev, EV_KEY, KEY_POWER, 0); + input_event(wake_dev, EV_SYN, 0, 0); + msleep(WG_PWRKEY_DUR); + mutex_unlock(&pwrkeyworklock); + + if ((wake_vibrate && is_suspended()) || + (sleep_vibrate && !is_suspended())) + set_vibrate(); + + return; +} +static DECLARE_WORK(wake_presspwr_work, wake_presspwr); + +/* PowerKey trigger */ +static void wake_pwrtrigger(void) { + pwrtrigger_time[1] = pwrtrigger_time[0]; + pwrtrigger_time[0] = ktime_to_ms(ktime_get()); + + if (pwrtrigger_time[0] - pwrtrigger_time[1] < TRIGGER_TIMEOUT) + return; + + schedule_work(&wake_presspwr_work); + + return; +} + + +/* Doubletap2wake */ + +static void doubletap2wake_reset(void) { + exec_count = true; + touch_nr = 0; + tap_time_pre = 0; + x_pre = 0; + y_pre = 0; +} + +static unsigned int calc_feather(int coord, int prev_coord) { + int calc_coord = 0; + calc_coord = coord-prev_coord; + if (calc_coord < 0) + calc_coord = calc_coord * (-1); + return calc_coord; +} + +/* init a new touch */ +static void new_touch(int x, int y) { + tap_time_pre = ktime_to_ms(ktime_get()); + x_pre = x; + y_pre = y; + touch_nr++; +} + +/* Doubletap2wake main function */ +static void detect_doubletap2wake(int x, int y, bool st) +{ + bool single_touch = st; +#if WG_DEBUG + pr_info(LOGTAG"x,y(%4d,%4d) tap_time_pre:%llu\n", + x, y, tap_time_pre); +#endif + if (x < sweep_edge || x > sweep_x_limit) + return; + if (y < sweep_edge || y > sweep_y_limit) + return; + + if ((single_touch) && (dt2w_switch) && (exec_count) && (touch_cnt)) { + touch_cnt = false; + if (touch_nr == 0) { + new_touch(x, y); + } else if (touch_nr == 1) { + if ((calc_feather(x, x_pre) < DT2W_FEATHER) && + (calc_feather(y, y_pre) < DT2W_FEATHER) && + ((ktime_to_ms(ktime_get())-tap_time_pre) < DT2W_TIME)) + touch_nr++; + else { + doubletap2wake_reset(); + new_touch(x, y); + } + } else { + doubletap2wake_reset(); + new_touch(x, y); + } + if ((touch_nr > 1)) { + exec_count = false; + if (gestures_switch) { + report_gesture(5); + } else { + wake_pwrtrigger(); + } + doubletap2wake_reset(); + } + } +} + + +/* Sweep2wake/Sweep2sleep */ +static void sweep2wake_reset(void) { + + exec_countx = true; + barrierx[0] = false; + barrierx[1] = false; + firstx = 0; + firstx_time = 0; + + exec_county = true; + barriery[0] = false; + barriery[1] = false; + firsty = 0; + firsty_time = 0; +} + +/* Sweep2wake main functions*/ +static void detect_sweep2wake_v(int x, int y, bool st) +{ + int prevy = 0, nexty = 0; + bool single_touch = st; + + if (firsty == 0) { + firsty = y; + firsty_time = ktime_to_ms(ktime_get()); + } + +#if WG_DEBUG + pr_info(LOGTAG"s2w vert x,y(%4d,%4d) single:%s\n", + x, y, (single_touch) ? "true" : "false"); +#endif + + //sweep up + if (firsty > sweep_y_start && single_touch && s2w_switch & SWEEP_UP) { + prevy = firsty; + nexty = prevy - sweep_y_next; + if (barriery[0] == true || (y < prevy && y > nexty)) { + prevy = nexty; + nexty -= sweep_y_next; + barriery[0] = true; + if (barriery[1] == true || (y < prevy && y > nexty)) { + prevy = nexty; + barriery[1] = true; + if (y < prevy) { + if (y < (nexty - sweep_y_next)) { + if (exec_county && (ktime_to_ms(ktime_get()) - firsty_time < SWEEP_TIMEOUT)) { + if (gestures_switch) { + report_gesture(3); + } else { + wake_pwrtrigger(); + } + exec_county = false; + } + } + } + } + } + //sweep down + } else if (firsty <= sweep_y_start && single_touch && s2w_switch & SWEEP_DOWN) { + prevy = firsty; + nexty = prevy + sweep_y_next; + if (barriery[0] == true || (y > prevy && y < nexty)) { + prevy = nexty; + nexty += sweep_y_next; + barriery[0] = true; + if (barriery[1] == true || (y > prevy && y < nexty)) { + prevy = nexty; + barriery[1] = true; + if (y > prevy) { + if (y > (nexty + sweep_y_next)) { + if (exec_county && (ktime_to_ms(ktime_get()) - firsty_time < SWEEP_TIMEOUT)) { + if (gestures_switch) { + report_gesture(4); + } else { + wake_pwrtrigger(); + } + exec_county = false; + } + } + } + } + } + } + +} + +static void detect_sweep2wake_h(int x, int y, bool st, bool scr_suspended) +{ + int prevx = 0, nextx = 0; + bool single_touch = st; + + if (!scr_suspended && y < sweep_y_limit) { + sweep2wake_reset(); + return; + } + + if (x < 0) + return; + + if (firstx == 0) { + firstx = x; + firstx_time = ktime_to_ms(ktime_get()); + } + +#if WG_DEBUG + pr_info(LOGTAG"s2w Horz x,y(%4d,%4d) wake:%s\n", + x, y, (scr_suspended) ? "true" : "false"); +#endif + + //left->right + if (firstx < sweep_x_start && single_touch && + ((scr_suspended && (s2w_switch & SWEEP_RIGHT)) || + (!scr_suspended && (s2s_switch & SWEEP_RIGHT)))) { + prevx = 0; + nextx = sweep_x_b1; + if ((barrierx[0] == true) || + ((x > prevx) && (x < nextx))) { + prevx = nextx; + nextx = sweep_x_b2; + barrierx[0] = true; + if ((barrierx[1] == true) || + ((x > prevx) && (x < nextx))) { + prevx = nextx; + barrierx[1] = true; + if (x > prevx) { + if (x > (sweep_x_max - sweep_x_final)) { + if (exec_countx && (ktime_to_ms(ktime_get()) - firstx_time < SWEEP_TIMEOUT)) { + if (gestures_switch && scr_suspended) { + report_gesture(1); + } else { + wake_pwrtrigger(); + } + exec_countx = false; + } + } + } + } + } + //right->left + } else if (firstx >= sweep_x_start && single_touch && + ((scr_suspended && (s2w_switch & SWEEP_LEFT)) || + (!scr_suspended && (s2s_switch & SWEEP_LEFT)))) { + prevx = sweep_x_max; + nextx = SWEEP_X_B2; + if ((barrierx[0] == true) || + ((x < prevx) && (x > nextx))) { + prevx = nextx; + nextx = sweep_x_b1; + barrierx[0] = true; + if ((barrierx[1] == true) || + ((x < prevx) && (x > nextx))) { + prevx = nextx; + barrierx[1] = true; + if (x < prevx) { + if (x < sweep_x_final) { + if (exec_countx) { + if (gestures_switch && scr_suspended) { + report_gesture(2); + } else { + wake_pwrtrigger(); + } + exec_countx = false; + } + } + } + } + } + } +} + +static void s2w_input_callback(struct work_struct *unused) +{ + detect_sweep2wake_h(touch_x, touch_y, true, is_suspended()); + if (is_suspended()) + detect_sweep2wake_v(touch_x, touch_y, true); + + return; +} + +static void dt2w_input_callback(struct work_struct *unused) +{ + + if (is_suspended() && dt2w_switch) + detect_doubletap2wake(touch_x, touch_y, true); + return; +} + +static void wg_input_event(struct input_handle *handle, unsigned int type, + unsigned int code, int value) +{ + if (is_suspended() && code == ABS_MT_POSITION_X) { + value -= 5000; + } + +#if WG_DEBUG + pr_info("wg: code: %s|%u, val: %i\n", + ((code==ABS_MT_POSITION_X) ? "X" : + (code==ABS_MT_POSITION_Y) ? "Y" : + (code==ABS_MT_TRACKING_ID) ? "ID" : + "undef"), code, value); +#endif + + if (code == ABS_MT_SLOT) { + sweep2wake_reset(); + doubletap2wake_reset(); + return; + } + + if (code == ABS_MT_TRACKING_ID && value == -1) { + sweep2wake_reset(); + touch_cnt = true; + queue_work_on(0, dt2w_input_wq, &dt2w_input_work); + return; + } + + if (code == ABS_MT_POSITION_X) { + touch_x = value; + touch_x_called = true; + } + + if (code == ABS_MT_POSITION_Y) { + touch_y = value; + touch_y_called = true; + } + + if (touch_x_called && touch_y_called) { + touch_x_called = false; + touch_y_called = false; + queue_work_on(0, s2w_input_wq, &s2w_input_work); + } else if (!is_suspended() && touch_x_called && !touch_y_called) { + touch_x_called = false; + touch_y_called = false; + queue_work_on(0, s2w_input_wq, &s2w_input_work); + } +} + +static int input_dev_filter(struct input_dev *dev) { + if (strstr(dev->name, "sec_touchscreen")) { + return 0; + } else if (strstr(dev->name, "fts")) { + return 0; + } else { + return 1; + } +} + +static int wg_input_connect(struct input_handler *handler, + struct input_dev *dev, const struct input_device_id *id) { + struct input_handle *handle; + int error; + + if (input_dev_filter(dev)) + return -ENODEV; + + handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + handle->dev = dev; + handle->handler = handler; + handle->name = "wg"; + + error = input_register_handle(handle); + if (error) + goto err2; + + error = input_open_device(handle); + if (error) + goto err1; + + return 0; +err1: + input_unregister_handle(handle); +err2: + kfree(handle); + return error; +} + +static void wg_input_disconnect(struct input_handle *handle) { + input_close_device(handle); + input_unregister_handle(handle); + kfree(handle); +} + +static const struct input_device_id wg_ids[] = { + { .driver_info = 1 }, + { }, +}; + +static struct input_handler wg_input_handler = { + .event = wg_input_event, + .connect = wg_input_connect, + .disconnect = wg_input_disconnect, + .name = "wg_inputreq", + .id_table = wg_ids, +}; + +static void wake_gesture_changed(void) +{ + wg_switch_temp = (s2w_switch || dt2w_switch); + if (!is_suspended()) + wg_switch = wg_switch_temp; + else + wg_changed = true; +} + +/* + * SYSFS stuff below here + */ +static ssize_t sweep2wake_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", s2w_switch); +} + +static ssize_t sweep2wake_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int ret, val; + + ret = kstrtoint(buf, 0, &val); + if (ret < 0) + return ret; + + if (val < 0 || val > 15) + val = 0; + + s2w_switch = val; + + wake_gesture_changed(); + + return count; +} + +static struct kobj_attribute sweep2wake_attribute = + __ATTR(sweep2wake, 0664, sweep2wake_show, sweep2wake_store); + +static ssize_t sweep2sleep_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", s2s_switch); +} + +static ssize_t sweep2sleep_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int ret, val; + + ret = kstrtoint(buf, 0, &val); + if (ret < 0) + return ret; + + if (val < 0 || val > 3) + val = 0; + + s2s_switch = val; + + return count; +} + +static struct kobj_attribute sweep2sleep_attribute = + __ATTR(sweep2sleep, 0664, sweep2sleep_show, sweep2sleep_store); + +static ssize_t doubletap2wake_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", dt2w_switch); +} + +static ssize_t doubletap2wake_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int ret, val; + + ret = kstrtoint(buf, 0, &val); + if (ret < 0) + return ret; + + if (val < 0 || val > 1) + val = 0; + + dt2w_switch = val; + + wake_gesture_changed(); + + return count; +} + +static struct kobj_attribute doubletap2wake_attribute = + __ATTR(doubletap2wake, 0664, doubletap2wake_show, doubletap2wake_store); + +static ssize_t wake_gestures_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", gestures_switch); +} + +static ssize_t wake_gestures_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int ret, val; + + ret = kstrtoint(buf, 0, &val); + if (ret < 0) + return ret; + + if (val < 0 || val > 1) + val = 0; + + gestures_switch = val; + + return count; +} + +static struct kobj_attribute wake_gestures_attribute = + __ATTR(wake_gestures, 0664, wake_gestures_show, wake_gestures_store); + +static ssize_t wake_vibrate_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", wake_vibrate); +} + +static ssize_t wake_vibrate_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int ret, val; + + ret = kstrtoint(buf, 0, &val); + if (ret < 0) + return ret; + + if (val < 0 || val > 1) + val = 1; + + wake_vibrate = val; + + return count; +} + +static struct kobj_attribute wake_vibrate_attribute = + __ATTR(wake_vibrate, 0664, wake_vibrate_show, wake_vibrate_store); + +static ssize_t sleep_vibrate_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", sleep_vibrate); +} + +static ssize_t sleep_vibrate_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int ret, val; + + ret = kstrtoint(buf, 0, &val); + if (ret < 0) + return ret; + + if (val < 0 || val > 1) + val = 1; + + sleep_vibrate = val; + + return count; +} + +static struct kobj_attribute sleep_vibrate_attribute = + __ATTR(sleep_vibrate, 0664, sleep_vibrate_show, sleep_vibrate_store); + +static struct attribute *attrs[] = { + &sweep2sleep_attribute.attr, + &sweep2wake_attribute.attr, + &doubletap2wake_attribute.attr, + &wake_gestures_attribute.attr, + &wake_vibrate_attribute.attr, + &sleep_vibrate_attribute.attr, + NULL, +}; + +static struct attribute_group attr_group = { + .attrs = attrs, +}; + +static struct kobject *android_touch_kobj; + +/* + * INIT / EXIT stuff below here + */ + +static int __init wake_gestures_init(void) +{ + int rc = 0; + + wake_dev = input_allocate_device(); + if (!wake_dev) { + pr_err("Failed to allocate wake_dev\n"); + goto err_alloc_dev; + } + + input_set_capability(wake_dev, EV_KEY, KEY_POWER); + wake_dev->name = "wg_pwrkey"; + wake_dev->phys = "wg_pwrkey/input0"; + + rc = input_register_device(wake_dev); + if (rc) { + pr_err("%s: input_register_device err=%d\n", __func__, rc); + goto err_input_dev; + } + + rc = input_register_handler(&wg_input_handler); + if (rc) + pr_err("%s: Failed to register wg_input_handler\n", __func__); + + s2w_input_wq = create_workqueue("s2wiwq"); + if (!s2w_input_wq) { + pr_err("%s: Failed to create s2wiwq workqueue\n", __func__); + return -EFAULT; + } + INIT_WORK(&s2w_input_work, s2w_input_callback); + + dt2w_input_wq = create_workqueue("dt2wiwq"); + if (!dt2w_input_wq) { + pr_err("%s: Failed to create dt2wiwq workqueue\n", __func__); + return -EFAULT; + } + INIT_WORK(&dt2w_input_work, dt2w_input_callback); + + gesture_dev = input_allocate_device(); + if (!gesture_dev) { + pr_err("Failed to allocate gesture_dev\n"); + goto err_alloc_dev; + } + + gesture_dev->name = "wake_gesture"; + gesture_dev->phys = "wake_gesture/input0"; + input_set_capability(gesture_dev, EV_REL, WAKE_GESTURE); + + rc = input_register_device(gesture_dev); + if (rc) { + pr_err("%s: input_register_device err=%d\n", __func__, rc); + goto err_gesture_dev; + } + + android_touch_kobj = kobject_create_and_add("android_touch", NULL); + if (!android_touch_kobj) + return -ENOMEM; + + rc = sysfs_create_group(android_touch_kobj, &attr_group); + if (rc) + pr_warn("%s: sysfs_create_group failed\n", __func__); + +err_gesture_dev: + input_free_device(gesture_dev); +err_input_dev: + input_free_device(wake_dev); +err_alloc_dev: + + wg_switch_temp = (s2w_switch || dt2w_switch); + + return 0; +} + +static void __exit wake_gestures_exit(void) +{ + kobject_del(android_touch_kobj); + input_unregister_handler(&wg_input_handler); + destroy_workqueue(s2w_input_wq); + destroy_workqueue(dt2w_input_wq); + input_unregister_device(wake_dev); + input_free_device(wake_dev); + input_unregister_device(gesture_dev); + input_free_device(gesture_dev); + + return; +} + +module_init(wake_gestures_init); +module_exit(wake_gestures_exit); + diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h index 623e57239821..83b72f4f85e4 100644 --- a/drivers/irqchip/irq-gic-common.h +++ b/drivers/irqchip/irq-gic-common.h @@ -30,12 +30,6 @@ struct gic_quirk { extern bool from_suspend; extern struct irq_chip gic_arch_extn; -#ifdef CONFIG_QCOM_SHOW_RESUME_IRQ -extern int msm_show_resume_irq_mask; -#else -#define msm_show_resume_irq_mask 0 -#endif - int gic_configure_irq(unsigned int irq, unsigned int type, void __iomem *base, void (*sync_access)(void)); void gic_dist_config(void __iomem *base, int gic_irqs, diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 56ddcc3ae6ed..639eecff6c18 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -41,8 +41,6 @@ #include #include -#include - #include "irq-gic-common.h" #define MAX_IRQ 1020U /* Max number of SGI+PPI+SPI */ @@ -690,69 +688,6 @@ static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) return 0; } -#ifdef CONFIG_PM - -static int gic_suspend(void) -{ - return 0; -} - -static void gic_show_resume_irq(struct gic_chip_data *gic) -{ - unsigned int i; - u32 enabled; - u32 pending[32]; - void __iomem *base = gic_data.dist_base; - - if (!msm_show_resume_irq_mask) - return; - - for (i = 0; i * 32 < gic->irq_nr; i++) { - enabled = readl_relaxed(base + GICD_ICENABLER + i * 4); - pending[i] = readl_relaxed(base + GICD_ISPENDR + i * 4); - pending[i] &= enabled; - } - - for (i = find_first_bit((unsigned long *)pending, gic->irq_nr); - i < gic->irq_nr; - i = find_next_bit((unsigned long *)pending, gic->irq_nr, i+1)) { - unsigned int irq = irq_find_mapping(gic->domain, i); - struct irq_desc *desc = irq_to_desc(irq); - const char *name = "null"; - - if (desc == NULL) - name = "stray irq"; - else if (desc->action && desc->action->name) - name = desc->action->name; - - pr_warn("%s: %d triggered %s\n", __func__, irq, name); - } -} - -static void gic_resume_one(struct gic_chip_data *gic) -{ - gic_show_resume_irq(gic); -} - -static void gic_resume(void) -{ - gic_resume_one(&gic_data); -} - -static struct syscore_ops gic_syscore_ops = { - .suspend = gic_suspend, - .resume = gic_resume, -}; - -static int __init gic_init_sys(void) -{ - register_syscore_ops(&gic_syscore_ops); - return 0; -} -arch_initcall(gic_init_sys); - -#endif - static u64 gic_mpidr_to_affinity(unsigned long mpidr) { u64 aff; @@ -1026,7 +961,6 @@ static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq) MPIDR_TO_SGI_AFFINITY(cluster_id, 1) | tlist << ICC_SGI1R_TARGET_LIST_SHIFT); - pr_devel("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val); gic_write_sgi1r(val); } diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 54fa8c46633a..e6fb1ccfe371 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -50,7 +50,6 @@ #include #include -#include #include "irq-gic-common.h" #ifdef CONFIG_ARM64 @@ -1232,70 +1231,6 @@ static int __init __gic_init_bases(struct gic_chip_data *gic, return ret; } -#ifdef CONFIG_PM -static int gic_suspend(void) -{ - return 0; -} - -static void gic_show_resume_irq(struct gic_chip_data *gic) -{ - unsigned int i; - u32 enabled; - u32 pending[32]; - void __iomem *base = gic_data_dist_base(gic); - - if (!msm_show_resume_irq_mask) - return; - - for (i = 0; i * 32 < gic->gic_irqs; i++) { - enabled = readl_relaxed(base + GIC_DIST_ENABLE_CLEAR + i * 4); - pending[i] = readl_relaxed(base + GIC_DIST_PENDING_SET + i * 4); - pending[i] &= enabled; - } - - for (i = find_first_bit((unsigned long *)pending, gic->gic_irqs); - i < gic->gic_irqs; - i = find_next_bit((unsigned long *)pending, gic->gic_irqs, i+1)) { - unsigned int irq = irq_find_mapping(gic->domain, i); - struct irq_desc *desc = irq_to_desc(irq); - const char *name = "null"; - - if (desc == NULL) - name = "stray irq"; - else if (desc->action && desc->action->name) - name = desc->action->name; - - pr_warn("%s: %d triggered %s\n", __func__, i, name); - } -} - -static void gic_resume_one(struct gic_chip_data *gic) -{ - gic_show_resume_irq(gic); -} - -static void gic_resume(void) -{ - int i; - - for (i = 0; i < CONFIG_ARM_GIC_MAX_NR; i++) - gic_resume_one(&gic_data[i]); -} - -static struct syscore_ops gic_syscore_ops = { - .suspend = gic_suspend, - .resume = gic_resume, -}; - -static int __init gic_init_sys(void) -{ - register_syscore_ops(&gic_syscore_ops); - return 0; -} -arch_initcall(gic_init_sys); -#endif - void __init gic_init(unsigned int gic_nr, int irq_start, void __iomem *dist_base, void __iomem *cpu_base) { diff --git a/drivers/irqchip/qcom/mpm.c b/drivers/irqchip/qcom/mpm.c index 5e352f4d45a0..813b6b309c5d 100644 --- a/drivers/irqchip/qcom/mpm.c +++ b/drivers/irqchip/qcom/mpm.c @@ -417,7 +417,7 @@ static int msm_get_apps_irq(unsigned int mpm_irq) } -static void system_pm_exit_sleep(void) +static void system_pm_exit_sleep(bool success) { msm_rpm_exit_sleep(); } diff --git a/drivers/isdn/hisax/q931.c b/drivers/isdn/hisax/q931.c index ba4beb25d872..298c8dba0321 100644 --- a/drivers/isdn/hisax/q931.c +++ b/drivers/isdn/hisax/q931.c @@ -855,7 +855,7 @@ struct DTag { /* Display tags */ { 0x8c, "Reason" }, { 0x8d, "Calling party name" }, { 0x8e, "Called party name" }, - { 0x8f, "Orignal called name" }, + { 0x8f, "Original called name" }, { 0x90, "Redirecting name" }, { 0x91, "Connected name" }, { 0x92, "Originating restrictions" }, diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 48bb5a879e6f..14b6004653a7 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -421,14 +421,13 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask, */ if (gfp_mask & __GFP_NORETRY) { unsigned noio_flag = memalloc_noio_save(); - void *ptr = __vmalloc(c->block_size, gfp_mask | __GFP_HIGHMEM, - PAGE_KERNEL); + void *ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL); memalloc_noio_restore(noio_flag); return ptr; } - return __vmalloc(c->block_size, gfp_mask | __GFP_HIGHMEM, PAGE_KERNEL); + return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL); } /* @@ -1907,7 +1906,7 @@ static int __init dm_bufio_init(void) memset(&dm_bufio_caches, 0, sizeof dm_bufio_caches); memset(&dm_bufio_cache_names, 0, sizeof dm_bufio_cache_names); - mem = (__u64)mult_frac(totalram_pages - totalhigh_pages, + mem = (__u64)mult_frac(totalram_pages() - totalhigh_pages(), DM_BUFIO_MEMORY_PERCENT, 100) << PAGE_SHIFT; if (mem > ULONG_MAX) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 9b2cd89fd651..0ab46f63ae86 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -87,7 +87,7 @@ struct crypt_iv_operations { }; struct iv_essiv_private { - struct crypto_ahash *hash_tfm; + struct crypto_shash *hash_tfm; u8 *salt; }; @@ -270,25 +270,22 @@ static int crypt_iv_plain64_gen(struct crypt_config *cc, u8 *iv, static int crypt_iv_essiv_init(struct crypt_config *cc) { struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; - AHASH_REQUEST_ON_STACK(req, essiv->hash_tfm); - struct scatterlist sg; + SHASH_DESC_ON_STACK(desc, essiv->hash_tfm); struct crypto_cipher *essiv_tfm; int err; - sg_init_one(&sg, cc->key, cc->key_size); - ahash_request_set_tfm(req, essiv->hash_tfm); - ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); - ahash_request_set_crypt(req, &sg, essiv->salt, cc->key_size); + desc->tfm = essiv->hash_tfm; + desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; - err = crypto_ahash_digest(req); - ahash_request_zero(req); + err = crypto_shash_digest(desc, cc->key, cc->key_size, essiv->salt); + shash_desc_zero(desc); if (err) return err; essiv_tfm = cc->iv_private; err = crypto_cipher_setkey(essiv_tfm, essiv->salt, - crypto_ahash_digestsize(essiv->hash_tfm)); + crypto_shash_digestsize(essiv->hash_tfm)); if (err) return err; @@ -299,7 +296,7 @@ static int crypt_iv_essiv_init(struct crypt_config *cc) static int crypt_iv_essiv_wipe(struct crypt_config *cc) { struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; - unsigned salt_size = crypto_ahash_digestsize(essiv->hash_tfm); + unsigned salt_size = crypto_shash_digestsize(essiv->hash_tfm); struct crypto_cipher *essiv_tfm; int r, err = 0; @@ -351,7 +348,7 @@ static void crypt_iv_essiv_dtr(struct crypt_config *cc) struct crypto_cipher *essiv_tfm; struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; - crypto_free_ahash(essiv->hash_tfm); + crypto_free_shash(essiv->hash_tfm); essiv->hash_tfm = NULL; kzfree(essiv->salt); @@ -369,7 +366,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, const char *opts) { struct crypto_cipher *essiv_tfm = NULL; - struct crypto_ahash *hash_tfm = NULL; + struct crypto_shash *hash_tfm = NULL; u8 *salt = NULL; int err; @@ -379,14 +376,14 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, } /* Allocate hash algorithm */ - hash_tfm = crypto_alloc_ahash(opts, 0, CRYPTO_ALG_ASYNC); + hash_tfm = crypto_alloc_shash(opts, 0, 0); if (IS_ERR(hash_tfm)) { ti->error = "Error initializing ESSIV hash"; err = PTR_ERR(hash_tfm); goto bad; } - salt = kzalloc(crypto_ahash_digestsize(hash_tfm), GFP_KERNEL); + salt = kzalloc(crypto_shash_digestsize(hash_tfm), GFP_KERNEL); if (!salt) { ti->error = "Error kmallocing salt storage in ESSIV"; err = -ENOMEM; @@ -397,7 +394,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, cc->iv_gen_private.essiv.hash_tfm = hash_tfm; essiv_tfm = setup_essiv_cpu(cc, ti, salt, - crypto_ahash_digestsize(hash_tfm)); + crypto_shash_digestsize(hash_tfm)); if (IS_ERR(essiv_tfm)) { crypt_iv_essiv_dtr(cc); return PTR_ERR(essiv_tfm); @@ -408,7 +405,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, bad: if (hash_tfm && !IS_ERR(hash_tfm)) - crypto_free_ahash(hash_tfm); + crypto_free_shash(hash_tfm); kfree(salt); return err; } diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index fdd4a840b30f..6ec1b8808311 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -84,7 +84,7 @@ static bool __check_shared_memory(size_t alloc_size) a = shared_memory_amount + alloc_size; if (a < shared_memory_amount) return false; - if (a >> PAGE_SHIFT > totalram_pages / DM_STATS_MEMORY_FACTOR) + if (a >> PAGE_SHIFT > totalram_pages() / DM_STATS_MEMORY_FACTOR) return false; #ifdef CONFIG_MMU if (a > (VMALLOC_END - VMALLOC_START) / DM_STATS_VMALLOC_FACTOR) @@ -146,12 +146,7 @@ static void *dm_kvzalloc(size_t alloc_size, int node) if (!claim_shared_memory(alloc_size)) return NULL; - if (alloc_size <= KMALLOC_MAX_SIZE) { - p = kzalloc_node(alloc_size, GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN, node); - if (p) - return p; - } - p = vzalloc_node(alloc_size, node); + p = kvzalloc_node(alloc_size, GFP_KERNEL | __GFP_NOMEMALLOC, node); if (p) return p; diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 28193a57bf47..fb5ff428f842 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -348,7 +348,6 @@ static void stripe_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { struct stripe_c *sc = (struct stripe_c *) ti->private; - char buffer[sc->stripes + 1]; unsigned int sz = 0; unsigned int i; @@ -357,11 +356,12 @@ static void stripe_status(struct dm_target *ti, status_type_t type, DMEMIT("%d ", sc->stripes); for (i = 0; i < sc->stripes; i++) { DMEMIT("%s ", sc->stripe[i].dev->name); - buffer[i] = atomic_read(&(sc->stripe[i].error_count)) ? - 'D' : 'A'; } - buffer[i] = '\0'; - DMEMIT("1 %s", buffer); + DMEMIT("1 "); + for (i = 0; i < sc->stripes; i++) { + DMEMIT("%c", atomic_read(&(sc->stripe[i].error_count)) ? + 'D' : 'A'); + } break; case STATUSTYPE_TABLE: diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 330db48e7c4a..dc7133d63167 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -191,7 +191,7 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio, static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io, u8 *want_digest, u8 *data) { - if (unlikely(verity_hash(v, verity_io_hash_desc(v, io), + if (unlikely(verity_hash(v, verity_io_hash_req(v, io), data, 1 << v->data_dev_block_bits, verity_io_real_digest(v, io)))) return 0; @@ -400,7 +400,7 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io, } /* Always re-validate the corrected block against the expected hash */ - r = verity_hash(v, verity_io_hash_desc(v, io), fio->output, + r = verity_hash(v, verity_io_hash_req(v, io), fio->output, 1 << v->data_dev_block_bits, verity_io_real_digest(v, io)); if (unlikely(r < 0)) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 8a34559220e1..bd64f27bac6b 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -95,81 +95,123 @@ static sector_t verity_position_at_level(struct dm_verity *v, sector_t block, } /* - * Wrapper for crypto_shash_init, which handles verity salting. + * Callback function for asynchrnous crypto API completion notification */ -static int verity_hash_init(struct dm_verity *v, struct shash_desc *desc) +static void verity_op_done(struct crypto_async_request *base, int err) { - int r; + struct verity_result *res = (struct verity_result *)base->data; - desc->tfm = v->tfm; - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + if (err == -EINPROGRESS) + return; - r = crypto_shash_init(desc); + res->err = err; + complete(&res->completion); +} - if (unlikely(r < 0)) { - DMERR("crypto_shash_init failed: %d", r); - return r; - } +/* + * Wait for async crypto API callback + */ +static inline int verity_complete_op(struct verity_result *res, int ret) +{ + switch (ret) { + case 0: + break; - if (likely(v->version >= 1)) { - r = crypto_shash_update(desc, v->salt, v->salt_size); + case -EINPROGRESS: + case -EBUSY: + ret = wait_for_completion_interruptible(&res->completion); + if (!ret) + ret = res->err; + reinit_completion(&res->completion); + break; - if (unlikely(r < 0)) { - DMERR("crypto_shash_update failed: %d", r); - return r; - } + default: + DMERR("verity_wait_hash: crypto op submission failed: %d", ret); } - return 0; + if (unlikely(ret < 0)) + DMERR("verity_wait_hash: crypto op failed: %d", ret); + + return ret; } -static int verity_hash_update(struct dm_verity *v, struct shash_desc *desc, - const u8 *data, size_t len) +static int verity_hash_update(struct dm_verity *v, struct ahash_request *req, + const u8 *data, size_t len, + struct verity_result *res) { - int r = crypto_shash_update(desc, data, len); + struct scatterlist sg; - if (unlikely(r < 0)) - DMERR("crypto_shash_update failed: %d", r); + sg_init_one(&sg, data, len); + ahash_request_set_crypt(req, &sg, NULL, len); + + return verity_complete_op(res, crypto_ahash_update(req)); +} + +/* + * Wrapper for crypto_ahash_init, which handles verity salting. + */ +static int verity_hash_init(struct dm_verity *v, struct ahash_request *req, + struct verity_result *res) +{ + int r; + + ahash_request_set_tfm(req, v->tfm); + ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | + CRYPTO_TFM_REQ_MAY_BACKLOG, + verity_op_done, (void *)res); + init_completion(&res->completion); + + r = verity_complete_op(res, crypto_ahash_init(req)); + + if (unlikely(r < 0)) { + DMERR("crypto_ahash_init failed: %d", r); + return r; + } + + if (likely(v->salt_size && (v->version >= 1))) + r = verity_hash_update(v, req, v->salt, v->salt_size, res); return r; } -static int verity_hash_final(struct dm_verity *v, struct shash_desc *desc, - u8 *digest) +static int verity_hash_final(struct dm_verity *v, struct ahash_request *req, + u8 *digest, struct verity_result *res) { int r; - if (unlikely(!v->version)) { - r = crypto_shash_update(desc, v->salt, v->salt_size); + if (unlikely(v->salt_size && (!v->version))) { + r = verity_hash_update(v, req, v->salt, v->salt_size, res); if (r < 0) { - DMERR("crypto_shash_update failed: %d", r); - return r; + DMERR("verity_hash_final failed updating salt: %d", r); + goto out; } } - r = crypto_shash_final(desc, digest); - - if (unlikely(r < 0)) - DMERR("crypto_shash_final failed: %d", r); - + ahash_request_set_crypt(req, NULL, digest, 0); + r = verity_complete_op(res, crypto_ahash_final(req)); +out: return r; } -int verity_hash(struct dm_verity *v, struct shash_desc *desc, +int verity_hash(struct dm_verity *v, struct ahash_request *req, const u8 *data, size_t len, u8 *digest) { int r; + struct verity_result res; - r = verity_hash_init(v, desc); + r = verity_hash_init(v, req, &res); if (unlikely(r < 0)) - return r; + goto out; - r = verity_hash_update(v, desc, data, len); + r = verity_hash_update(v, req, data, len, &res); if (unlikely(r < 0)) - return r; + goto out; + + r = verity_hash_final(v, req, digest, &res); - return verity_hash_final(v, desc, digest); +out: + return r; } static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level, @@ -281,7 +323,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, goto release_ret_r; } - r = verity_hash(v, verity_io_hash_desc(v, io), + r = verity_hash(v, verity_io_hash_req(v, io), data, 1 << v->hash_dev_block_bits, verity_io_real_digest(v, io)); if (unlikely(r < 0)) @@ -349,6 +391,49 @@ int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, return r; } +/* + * Calculates the digest for the given bio + */ +int verity_for_io_block(struct dm_verity *v, struct dm_verity_io *io, + struct bvec_iter *iter, struct verity_result *res) +{ + unsigned int todo = 1 << v->data_dev_block_bits; + struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size); + struct scatterlist sg; + struct ahash_request *req = verity_io_hash_req(v, io); + + do { + int r; + unsigned int len; + struct bio_vec bv = bio_iter_iovec(bio, *iter); + + sg_init_table(&sg, 1); + + len = bv.bv_len; + + if (likely(len >= todo)) + len = todo; + /* + * Operating on a single page at a time looks suboptimal + * until you consider the typical block size is 4,096B. + * Going through this loops twice should be very rare. + */ + sg_set_page(&sg, bv.bv_page, len, bv.bv_offset); + ahash_request_set_crypt(req, &sg, NULL, len); + r = verity_complete_op(res, crypto_ahash_update(req)); + + if (unlikely(r < 0)) { + DMERR("verity_for_io_block crypto op failed: %d", r); + return r; + } + + bio_advance_iter(bio, iter, len); + todo -= len; + } while (todo); + + return 0; +} + /* * Calls function process for 1 << v->data_dev_block_bits bytes in the bio_vec * starting from iter. @@ -387,12 +472,6 @@ int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, return 0; } -static int verity_bv_hash_update(struct dm_verity *v, struct dm_verity_io *io, - u8 *data, size_t len) -{ - return verity_hash_update(v, verity_io_hash_desc(v, io), data, len); -} - static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io, u8 *data, size_t len) { @@ -421,11 +500,13 @@ static int verity_verify_io(struct dm_verity_io *io) struct dm_verity *v = io->v; struct bvec_iter start; unsigned b; + struct verity_result res; + struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size); for (b = 0; b < io->n_blocks; b++) { int r; sector_t cur_block = io->block + b; - struct shash_desc *desc = verity_io_hash_desc(v, io); + struct ahash_request *req = verity_io_hash_req(v, io); if (v->validated_blocks && likely(test_bit(cur_block, v->validated_blocks))) { @@ -452,16 +533,17 @@ static int verity_verify_io(struct dm_verity_io *io) continue; } - r = verity_hash_init(v, desc); + r = verity_hash_init(v, req, &res); if (unlikely(r < 0)) return r; start = io->iter; - r = verity_for_bv_block(v, io, &io->iter, verity_bv_hash_update); + r = verity_for_io_block(v, io, &io->iter, &res); if (unlikely(r < 0)) return r; - r = verity_hash_final(v, desc, verity_io_real_digest(v, io)); + r = verity_hash_final(v, req, verity_io_real_digest(v, io), + &res); if (unlikely(r < 0)) return r; @@ -474,9 +556,17 @@ static int verity_verify_io(struct dm_verity_io *io) else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, cur_block, NULL, &start) == 0) continue; - else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, + else { + if (bio->bi_error) { + /* + * Error correction failed; Just return error + */ + return -EIO; + } + if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, cur_block)) - return -EIO; + return -EIO; + } } return 0; @@ -743,7 +833,7 @@ void verity_dtr(struct dm_target *ti) kfree(v->zero_digest); if (v->tfm) - crypto_free_shash(v->tfm); + crypto_free_ahash(v->tfm); kfree(v->alg_name); @@ -782,7 +872,7 @@ static int verity_alloc_most_once(struct dm_verity *v) static int verity_alloc_zero_digest(struct dm_verity *v) { int r = -ENOMEM; - struct shash_desc *desc; + struct ahash_request *req; u8 *zero_data; v->zero_digest = kmalloc(v->digest_size, GFP_KERNEL); @@ -790,9 +880,9 @@ static int verity_alloc_zero_digest(struct dm_verity *v) if (!v->zero_digest) return r; - desc = kmalloc(v->shash_descsize, GFP_KERNEL); + req = kmalloc(v->ahash_reqsize, GFP_KERNEL); - if (!desc) + if (!req) return r; /* verity_dtr will free zero_digest */ zero_data = kzalloc(1 << v->data_dev_block_bits, GFP_KERNEL); @@ -800,11 +890,11 @@ static int verity_alloc_zero_digest(struct dm_verity *v) if (!zero_data) goto out; - r = verity_hash(v, desc, zero_data, 1 << v->data_dev_block_bits, + r = verity_hash(v, req, zero_data, 1 << v->data_dev_block_bits, v->zero_digest); out: - kfree(desc); + kfree(req); kfree(zero_data); return r; @@ -988,7 +1078,7 @@ int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } - v->tfm = crypto_alloc_shash(v->alg_name, 0, 0); + v->tfm = crypto_alloc_ahash(v->alg_name, 0, 0); if (IS_ERR(v->tfm)) { ti->error = "Cannot initialize hash function"; r = PTR_ERR(v->tfm); @@ -1002,16 +1092,16 @@ int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) * problems by logging the ->cra_driver_name. */ DMINFO("%s using implementation \"%s\"", v->alg_name, - crypto_shash_alg(v->tfm)->base.cra_driver_name); + crypto_hash_alg_common(v->tfm)->base.cra_driver_name); - v->digest_size = crypto_shash_digestsize(v->tfm); + v->digest_size = crypto_ahash_digestsize(v->tfm); if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) { ti->error = "Digest size too big"; r = -EINVAL; goto bad; } - v->shash_descsize = - sizeof(struct shash_desc) + crypto_shash_descsize(v->tfm); + v->ahash_reqsize = sizeof(struct ahash_request) + + crypto_ahash_reqsize(v->tfm); v->root_digest = kmalloc(v->digest_size, GFP_KERNEL); if (!v->root_digest) { @@ -1119,7 +1209,7 @@ int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) } ti->per_io_data_size = sizeof(struct dm_verity_io) + - v->shash_descsize + v->digest_size * 2; + v->ahash_reqsize + v->digest_size * 2; r = verity_fec_ctr(v); if (r) diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index d216fc76d350..116e91fa50d7 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -37,7 +37,7 @@ struct dm_verity { struct dm_target *ti; struct dm_bufio_client *bufio; char *alg_name; - struct crypto_shash *tfm; + struct crypto_ahash *tfm; u8 *root_digest; /* digest of the root block */ u8 *salt; /* salt: its size is salt_size */ u8 *zero_digest; /* digest for a zero block */ @@ -52,7 +52,7 @@ struct dm_verity { unsigned char levels; /* the number of tree levels */ unsigned char version; unsigned digest_size; /* digest size for the current hash algorithm */ - unsigned shash_descsize;/* the size of temporary space for crypto */ + unsigned int ahash_reqsize;/* the size of temporary space for crypto */ int hash_failed; /* set to 1 if hash of any block failed */ enum verity_mode mode; /* mode for handling verification errors */ unsigned corrupted_errs;/* Number of errors for corrupted blocks */ @@ -82,31 +82,36 @@ struct dm_verity_io { /* * Three variably-size fields follow this struct: * - * u8 hash_desc[v->shash_descsize]; + * u8 hash_req[v->ahash_reqsize]; * u8 real_digest[v->digest_size]; * u8 want_digest[v->digest_size]; * - * To access them use: verity_io_hash_desc(), verity_io_real_digest() + * To access them use: verity_io_hash_req(), verity_io_real_digest() * and verity_io_want_digest(). */ }; -static inline struct shash_desc *verity_io_hash_desc(struct dm_verity *v, +struct verity_result { + struct completion completion; + int err; +}; + +static inline struct ahash_request *verity_io_hash_req(struct dm_verity *v, struct dm_verity_io *io) { - return (struct shash_desc *)(io + 1); + return (struct ahash_request *)(io + 1); } static inline u8 *verity_io_real_digest(struct dm_verity *v, struct dm_verity_io *io) { - return (u8 *)(io + 1) + v->shash_descsize; + return (u8 *)(io + 1) + v->ahash_reqsize; } static inline u8 *verity_io_want_digest(struct dm_verity *v, struct dm_verity_io *io) { - return (u8 *)(io + 1) + v->shash_descsize + v->digest_size; + return (u8 *)(io + 1) + v->ahash_reqsize + v->digest_size; } static inline u8 *verity_io_digest_end(struct dm_verity *v, @@ -121,7 +126,7 @@ extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, struct dm_verity_io *io, u8 *data, size_t len)); -extern int verity_hash(struct dm_verity *v, struct shash_desc *desc, +extern int verity_hash(struct dm_verity *v, struct ahash_request *req, const u8 *data, size_t len, u8 *digest); extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, diff --git a/drivers/media/media-device.c b/drivers/media/media-device.c index 73a2dba475d0..69785905e7d1 100644 --- a/drivers/media/media-device.c +++ b/drivers/media/media-device.c @@ -569,7 +569,7 @@ int __must_check media_device_register_entity(struct media_device *mdev, if (entity->function == MEDIA_ENT_F_V4L2_SUBDEV_UNKNOWN || entity->function == MEDIA_ENT_F_UNKNOWN) - dev_warn(mdev->dev, + dev_dbg(mdev->dev, "Entity type for entity %s was not initialized!\n", entity->name); diff --git a/drivers/media/platform/msm/camera/cam_req_mgr/cam_req_mgr_dev.c b/drivers/media/platform/msm/camera/cam_req_mgr/cam_req_mgr_dev.c index cade169d2d27..a15b0ab8e045 100644 --- a/drivers/media/platform/msm/camera/cam_req_mgr/cam_req_mgr_dev.c +++ b/drivers/media/platform/msm/camera/cam_req_mgr/cam_req_mgr_dev.c @@ -625,12 +625,6 @@ static int cam_req_mgr_probe(struct platform_device *pdev) goto req_mgr_util_fail; } - rc = cam_mem_mgr_init(); - if (rc) { - CAM_ERR(CAM_CRM, "mem mgr init failed"); - goto mem_mgr_init_fail; - } - rc = cam_req_mgr_core_device_init(); if (rc) { CAM_ERR(CAM_CRM, "core device setup failed"); @@ -654,8 +648,6 @@ static int cam_req_mgr_probe(struct platform_device *pdev) return rc; req_mgr_core_fail: - cam_mem_mgr_deinit(); -mem_mgr_init_fail: cam_req_mgr_util_deinit(); req_mgr_util_fail: mutex_destroy(&g_dev.dev_lock); diff --git a/drivers/media/platform/msm/camera/cam_sensor_module/cam_sensor/cam_sensor_core.c b/drivers/media/platform/msm/camera/cam_sensor_module/cam_sensor/cam_sensor_core.c index ad85f6c679d2..5f2d549e8e88 100644 --- a/drivers/media/platform/msm/camera/cam_sensor_module/cam_sensor/cam_sensor_core.c +++ b/drivers/media/platform/msm/camera/cam_sensor_module/cam_sensor/cam_sensor_core.c @@ -590,8 +590,8 @@ void cam_sensor_shutdown(struct cam_sensor_ctrl_t *s_ctrl) &s_ctrl->sensordata->power_info; int rc = 0; - if ((s_ctrl->sensor_state == CAM_SENSOR_INIT) && - (s_ctrl->is_probe_succeed == 0)) + s_ctrl->is_probe_succeed = 0; + if (s_ctrl->sensor_state == CAM_SENSOR_INIT) return; cam_sensor_release_resource(s_ctrl); @@ -615,7 +615,6 @@ void cam_sensor_shutdown(struct cam_sensor_ctrl_t *s_ctrl) power_info->power_down_setting_size = 0; s_ctrl->streamon_count = 0; s_ctrl->streamoff_count = 0; - s_ctrl->is_probe_succeed = 0; s_ctrl->sensor_state = CAM_SENSOR_INIT; } @@ -816,15 +815,6 @@ int32_t cam_sensor_driver_cmd(struct cam_sensor_ctrl_t *s_ctrl, struct cam_sensor_acquire_dev sensor_acq_dev; struct cam_create_dev_hdl bridge_params; - if ((s_ctrl->is_probe_succeed == 0) || - (s_ctrl->sensor_state != CAM_SENSOR_INIT)) { - CAM_WARN(CAM_SENSOR, - "Not in right state to aquire %d", - s_ctrl->sensor_state); - rc = -EINVAL; - goto release_mutex; - } - if (s_ctrl->bridge_intf.device_hdl != -1) { CAM_ERR(CAM_SENSOR, "Device is already acquired"); rc = -EINVAL; diff --git a/drivers/media/platform/msm/camera/cam_sensor_module/cam_sensor_io/cam_sensor_cci_i2c.c b/drivers/media/platform/msm/camera/cam_sensor_module/cam_sensor_io/cam_sensor_cci_i2c.c index b40b51282daf..6b96c56c7999 100644 --- a/drivers/media/platform/msm/camera/cam_sensor_module/cam_sensor_io/cam_sensor_cci_i2c.c +++ b/drivers/media/platform/msm/camera/cam_sensor_module/cam_sensor_io/cam_sensor_cci_i2c.c @@ -20,7 +20,7 @@ int32_t cam_cci_i2c_read(struct cam_sensor_cci_client *cci_client, enum camera_sensor_i2c_type data_type) { int32_t rc = -EINVAL; - unsigned char buf[data_type]; + unsigned char buf[8]; struct cam_cci_ctrl cci_ctrl; if (addr_type <= CAMERA_SENSOR_I2C_TYPE_INVALID diff --git a/drivers/media/platform/msm/sde/rotator/sde_rotator_core.c b/drivers/media/platform/msm/sde/rotator/sde_rotator_core.c index e248813f57e5..6e672809d4aa 100644 --- a/drivers/media/platform/msm/sde/rotator/sde_rotator_core.c +++ b/drivers/media/platform/msm/sde/rotator/sde_rotator_core.c @@ -2843,7 +2843,7 @@ static int sde_rotator_get_dt_vreg_data(struct device *dev, mp->vreg_config[i].enable_load, mp->vreg_config[i].disable_load); } - return rc; + return 0; error: if (mp->vreg_config) { diff --git a/drivers/media/platform/msm/vidc/msm_venc.c b/drivers/media/platform/msm/vidc/msm_venc.c index ae826543b977..a8cc8e8c06a5 100644 --- a/drivers/media/platform/msm/vidc/msm_venc.c +++ b/drivers/media/platform/msm/vidc/msm_venc.c @@ -1963,7 +1963,7 @@ int msm_venc_s_ctrl(struct msm_vidc_inst *inst, struct v4l2_ctrl *ctrl) case V4L2_CID_MPEG_VIDC_VIDEO_USELTRFRAME: property_id = HAL_CONFIG_VENC_USELTRFRAME; use_ltr.ref_ltr = ctrl->val; - use_ltr.use_constraint = false; + use_ltr.use_constraint = true; use_ltr.frames = 0; pdata = &use_ltr; break; diff --git a/drivers/media/platform/msm/vidc/msm_vidc_debug.c b/drivers/media/platform/msm/vidc/msm_vidc_debug.c index 74fa3d554842..2319c26909cf 100644 --- a/drivers/media/platform/msm/vidc/msm_vidc_debug.c +++ b/drivers/media/platform/msm/vidc/msm_vidc_debug.c @@ -16,14 +16,14 @@ #include "msm_vidc_debug.h" #include "vidc_hfi_api.h" -int msm_vidc_debug = VIDC_ERR | VIDC_WARN; +int msm_vidc_debug = 0; EXPORT_SYMBOL(msm_vidc_debug); int msm_vidc_debug_out = VIDC_OUT_PRINTK; EXPORT_SYMBOL(msm_vidc_debug_out); int msm_vidc_fw_debug = 0x18; -int msm_vidc_fw_debug_mode = 1; +int msm_vidc_fw_debug_mode = 0; int msm_vidc_fw_low_power_mode = 1; bool msm_vidc_fw_coverage = !true; bool msm_vidc_sys_idle_indicator = !true; diff --git a/drivers/media/platform/msm/vidc/msm_vidc_platform.c b/drivers/media/platform/msm/vidc/msm_vidc_platform.c index 6fc1afb8e923..f975409b6bbb 100644 --- a/drivers/media/platform/msm/vidc/msm_vidc_platform.c +++ b/drivers/media/platform/msm/vidc/msm_vidc_platform.c @@ -120,7 +120,7 @@ static struct msm_vidc_common_data sdm845_common_data[] = { }, { .key = "qcom,max-secure-instances", - .value = 5, + .value = 2, }, { .key = "qcom,max-hw-load", diff --git a/drivers/media/platform/msm/vidc/msm_vidc_res_parse.c b/drivers/media/platform/msm/vidc/msm_vidc_res_parse.c index 530fe3a5703a..1dc6723ea358 100644 --- a/drivers/media/platform/msm/vidc/msm_vidc_res_parse.c +++ b/drivers/media/platform/msm/vidc/msm_vidc_res_parse.c @@ -25,6 +25,7 @@ enum clock_properties { CLOCK_PROP_HAS_SCALING = 1 << 0, CLOCK_PROP_HAS_MEM_RETENTION = 1 << 1, + CLOCK_PROP_DISABLE_MEMCORE_ONLY = 1 << 2, }; #define PERF_GOV "performance" @@ -666,6 +667,11 @@ static int msm_vidc_load_clock_table( else vc->has_mem_retention = false; + if (clock_props[c] & CLOCK_PROP_DISABLE_MEMCORE_ONLY) + vc->disable_memcore_only = true; + else + vc->disable_memcore_only = false; + dprintk(VIDC_DBG, "Found clock %s: scale-able = %s\n", vc->name, vc->count ? "yes" : "no"); } diff --git a/drivers/media/platform/msm/vidc/msm_vidc_resources.h b/drivers/media/platform/msm/vidc/msm_vidc_resources.h index 23e33fe104f2..06d8fa5c8b53 100644 --- a/drivers/media/platform/msm/vidc/msm_vidc_resources.h +++ b/drivers/media/platform/msm/vidc/msm_vidc_resources.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved. +/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -90,6 +90,7 @@ struct clock_info { u32 count; bool has_scaling; bool has_mem_retention; + bool disable_memcore_only; }; struct clock_set { diff --git a/drivers/media/platform/msm/vidc/venus_hfi.c b/drivers/media/platform/msm/vidc/venus_hfi.c index 2f3d4ea14df5..19c248c02e90 100644 --- a/drivers/media/platform/msm/vidc/venus_hfi.c +++ b/drivers/media/platform/msm/vidc/venus_hfi.c @@ -142,7 +142,7 @@ static void __dump_packet(u8 *packet, enum vidc_msg_prio log_level) * row must contain enough for 0xdeadbaad * 8 to be converted into * "de ad ba ab " * 8 + '\0' */ - char row[3 * row_size]; + char row[3 * 32]; for (c = 0; c * row_size < packet_size; ++c) { int bytes_to_read = ((c + 1) * row_size > packet_size) ? @@ -1118,6 +1118,8 @@ static int __set_clocks(struct venus_hfi_device *device, u32 freq) int rc = 0; venus_hfi_for_each_clock(device, cl) { + if (cl->disable_memcore_only) + continue; if (cl->has_scaling) {/* has_scaling */ device->clk_freq = freq; rc = clk_set_rate(cl->clk, freq); @@ -3414,6 +3416,8 @@ static inline void __disable_unprepare_clks(struct venus_hfi_device *device) } venus_hfi_for_each_clock_reverse(device, cl) { + if (cl->disable_memcore_only) + continue; dprintk(VIDC_DBG, "Clock: %s disable and unprepare\n", cl->name); rc = clk_set_flags(cl->clk, CLKFLAG_NORETAIN_PERIPH); @@ -3443,6 +3447,11 @@ static inline int __prepare_enable_clks(struct venus_hfi_device *device) } venus_hfi_for_each_clock(device, cl) { + /* MEM CORE is ON by default. Unset it for unused clocks*/ + if (cl->disable_memcore_only) { + clk_set_flags(cl->clk, CLKFLAG_NORETAIN_MEM); + continue; + } /* * For the clocks we control, set the rate prior to preparing * them. Since we don't really have a load at this point, scale @@ -3480,6 +3489,8 @@ static inline int __prepare_enable_clks(struct venus_hfi_device *device) fail_clk_enable: venus_hfi_for_each_clock_reverse_continue(device, cl, c) { + if (cl->disable_memcore_only) + continue; dprintk(VIDC_ERR, "Clock: %s disable and unprepare\n", cl->name); clk_disable_unprepare(cl->clk); diff --git a/drivers/media/platform/msm/vidc_3x/msm_v4l2_vidc.c b/drivers/media/platform/msm/vidc_3x/msm_v4l2_vidc.c index 9d05086c86ba..416ccab1522d 100644 --- a/drivers/media/platform/msm/vidc_3x/msm_v4l2_vidc.c +++ b/drivers/media/platform/msm/vidc_3x/msm_v4l2_vidc.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "msm_vidc_common.h" #include "msm_vidc_debug.h" @@ -34,6 +35,8 @@ #define BASE_DEVICE_NUMBER 32 + +static struct pm_qos_request msm_v4l2_vidc_pm_qos_request; struct msm_vidc_drv *vidc_driver; uint32_t msm_vidc_pwr_collapse_delay = 2000; @@ -60,6 +63,10 @@ static int msm_v4l2_open(struct file *filp) core->id, vid_dev->type); return -ENOMEM; } + + pm_qos_add_request(&msm_v4l2_vidc_pm_qos_request, + PM_QOS_CPU_DMA_LATENCY, 1000); + clear_bit(V4L2_FL_USES_V4L2_FH, &vdev->flags); filp->private_data = &(vidc_inst->event_handler); trace_msm_v4l2_vidc_open_end("msm_v4l2_open end"); @@ -80,6 +87,12 @@ static int msm_v4l2_close(struct file *filp) "Failed in %s for release output buffers\n", __func__); rc = msm_vidc_close(vidc_inst); + + pm_qos_update_request(&msm_v4l2_vidc_pm_qos_request, + PM_QOS_DEFAULT_VALUE); + pm_qos_remove_request(&msm_v4l2_vidc_pm_qos_request); + + trace_msm_v4l2_vidc_close_end("msm_v4l2_close end"); return rc; } diff --git a/drivers/media/platform/mtk-vpu/mtk_vpu.c b/drivers/media/platform/mtk-vpu/mtk_vpu.c index 4d7e470c1715..58ee3e4c826b 100644 --- a/drivers/media/platform/mtk-vpu/mtk_vpu.c +++ b/drivers/media/platform/mtk-vpu/mtk_vpu.c @@ -841,7 +841,7 @@ static int mtk_vpu_probe(struct platform_device *pdev) /* Set PTCM to 96K and DTCM to 32K */ vpu_cfg_writel(vpu, 0x2, VPU_TCM_CFG); - vpu->enable_4GB = !!(totalram_pages > (SZ_2G >> PAGE_SHIFT)); + vpu->enable_4GB = !!(totalram_pages() > (SZ_2G >> PAGE_SHIFT)); dev_info(dev, "4GB mode %u\n", vpu->enable_4GB); if (vpu->enable_4GB) { diff --git a/drivers/media/usb/dvb-usb-v2/af9015.c b/drivers/media/usb/dvb-usb-v2/af9015.c index 941ceff9b268..29011dfabb11 100644 --- a/drivers/media/usb/dvb-usb-v2/af9015.c +++ b/drivers/media/usb/dvb-usb-v2/af9015.c @@ -1455,7 +1455,7 @@ static const struct usb_device_id af9015_id_table[] = { { DVB_USB_DEVICE(USB_VID_KWORLD_2, USB_PID_CONCEPTRONIC_CTVDIGRCU, &af9015_props, "Conceptronic USB2.0 DVB-T CTVDIGRCU V3.0", NULL) }, { DVB_USB_DEVICE(USB_VID_KWORLD_2, USB_PID_KWORLD_MC810, - &af9015_props, "KWorld Digial MC-810", NULL) }, + &af9015_props, "KWorld Digital MC-810", NULL) }, { DVB_USB_DEVICE(USB_VID_KYE, USB_PID_GENIUS_TVGO_DVB_T03, &af9015_props, "Genius TVGo DVB-T03", NULL) }, { DVB_USB_DEVICE(USB_VID_KWORLD_2, USB_PID_KWORLD_399U_2, diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile index 795a5352761d..abe70ab29ce1 100644 --- a/drivers/media/v4l2-core/Makefile +++ b/drivers/media/v4l2-core/Makefile @@ -45,3 +45,4 @@ ccflags-y += -I$(srctree)/drivers/media/dvb-core ccflags-y += -I$(srctree)/drivers/media/dvb-frontends ccflags-y += -I$(srctree)/drivers/media/tuners +CFLAGS_v4l2-ioctl.o += -Wframe-larger-than=8192 diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 874d759bdcfd..6d2049e241a7 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -2917,8 +2917,9 @@ long video_usercopy(struct file *file, unsigned int cmd, unsigned long arg, v4l2_kioctl func) { - char sbuf[128]; - void *mbuf = NULL, *array_buf = NULL; + char sbuf[SZ_4K] __aligned(8); + char mbuf_onstack[SZ_512] __aligned(8); + void *mbuf = NULL; void *parg = (void *)arg; long err = -EINVAL; bool has_array_args; @@ -2973,14 +2974,24 @@ video_usercopy(struct file *file, unsigned int cmd, unsigned long arg, has_array_args = err; if (has_array_args) { - array_buf = kmalloc(array_size, GFP_KERNEL); - err = -ENOMEM; - if (array_buf == NULL) - goto out_array_args; + /* + * When adding new types of array args, make sure that the + * parent argument to ioctl (which contains the pointer to the + * array) fits into sbuf (so that mbuf will still remain + * unused up to here). + */ + if (array_size <= ARRAY_SIZE(mbuf_onstack)) { + mbuf = mbuf_onstack; + } else { + mbuf = kmalloc(array_size, GFP_KERNEL); + err = -ENOMEM; + if (NULL == mbuf) + goto out_array_args; + } err = -EFAULT; - if (copy_from_user(array_buf, user_ptr, array_size)) + if (copy_from_user(mbuf, user_ptr, array_size)) goto out_array_args; - *kernel_ptr = array_buf; + *kernel_ptr = mbuf; } /* Handles IOCTL */ @@ -2999,7 +3010,7 @@ video_usercopy(struct file *file, unsigned int cmd, unsigned long arg, if (has_array_args) { *kernel_ptr = (void __force *)user_ptr; - if (copy_to_user(user_ptr, array_buf, array_size)) + if (copy_to_user(user_ptr, mbuf, array_size)) err = -EFAULT; goto out_array_args; } @@ -3019,8 +3030,8 @@ video_usercopy(struct file *file, unsigned int cmd, unsigned long arg, } out: - kfree(array_buf); - kfree(mbuf); + if (mbuf != mbuf_onstack) + kfree(mbuf); return err; } EXPORT_SYMBOL(video_usercopy); diff --git a/drivers/mfd/max77620.c b/drivers/mfd/max77620.c index 258757e216c4..b1700b5fa640 100644 --- a/drivers/mfd/max77620.c +++ b/drivers/mfd/max77620.c @@ -461,7 +461,7 @@ static int max77620_probe(struct i2c_client *client, chip->rmap = devm_regmap_init_i2c(client, rmap_config); if (IS_ERR(chip->rmap)) { ret = PTR_ERR(chip->rmap); - dev_err(chip->dev, "Failed to intialise regmap: %d\n", ret); + dev_err(chip->dev, "Failed to initialise regmap: %d\n", ret); return ret; } diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 37766e469f68..7794d2d640f5 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -785,7 +785,7 @@ config PANEL_BOOT_MESSAGE config UID_SYS_STATS bool "Per-UID statistics" - depends on PROFILING && TASK_XACCT && TASK_IO_ACCOUNTING + depends on TASK_XACCT && TASK_IO_ACCOUNTING help Per UID based cpu time statistics exported to /proc/uid_cputime Per UID based io statistics exported to /proc/uid_io diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 86cade73835a..e75a95452d32 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -85,8 +85,7 @@ KCOV_INSTRUMENT_lkdtm_rodata.o := n OBJCOPYFLAGS := OBJCOPYFLAGS_lkdtm_rodata_objcopy.o := \ - --set-section-flags .text=alloc,readonly \ - --rename-section .text=.rodata + --rename-section .text=.rodata,alloc,readonly,load targets += lkdtm_rodata.o lkdtm_rodata_objcopy.o $(obj)/lkdtm_rodata_objcopy.o: $(obj)/lkdtm_rodata.o FORCE $(call if_changed,objcopy) diff --git a/drivers/misc/cs40l2x.c b/drivers/misc/cs40l2x.c index eba52b957859..9baacbd9968a 100644 --- a/drivers/misc/cs40l2x.c +++ b/drivers/misc/cs40l2x.c @@ -74,6 +74,8 @@ struct cs40l2x_private { struct led_classdev led_dev; }; +static struct cs40l2x_private *cs40l2x_g; + static const char * const cs40l2x_supplies[] = { "VA", "VP", @@ -1565,6 +1567,14 @@ static void cs40l2x_vibe_brightness_set(struct led_classdev *led_cdev, queue_work(cs40l2x->vibe_workqueue, &cs40l2x->vibe_start_work); } +void set_vibrate(void) +{ + struct led_classdev *led_dev = &cs40l2x_g->led_dev; + cs40l2x_vibe_brightness_set(led_dev, LED_FULL); + msleep(1); + cs40l2x_vibe_brightness_set(led_dev, LED_OFF); +} + static void cs40l2x_create_led(struct cs40l2x_private *cs40l2x) { int ret; @@ -2986,6 +2996,8 @@ static int cs40l2x_i2c_probe(struct i2c_client *i2c_client, cs40l2x_create_led(cs40l2x); + cs40l2x_g = cs40l2x; + return 0; err: gpiod_set_value_cansleep(cs40l2x->reset_gpio, 0); diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 862111b068a5..b4815b17ea31 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -73,7 +73,7 @@ static const unsigned freqs[] = { 400000, 300000, 200000, 100000 }; * performance cost, and for other reasons may not always be desired. * So we allow it it to be disabled. */ -bool use_spi_crc = 1; +bool use_spi_crc = 0; module_param(use_spi_crc, bool, 0); static int mmc_schedule_delayed_work(struct delayed_work *work, diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 2f2e60f9b884..44f5605786d0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -591,7 +591,7 @@ int bnx2x_vf_mcast(struct bnx2x *bp, struct bnx2x_virtf *vf, mcast.mcast_list_len = mc_num; rc = bnx2x_config_mcast(bp, &mcast, BNX2X_MCAST_CMD_SET); if (rc) - BNX2X_ERR("Faled to set multicasts\n"); + BNX2X_ERR("Failed to set multicasts\n"); } else { /* clear existing mcasts */ rc = bnx2x_config_mcast(bp, &mcast, BNX2X_MCAST_CMD_DEL); diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index 6e1d38041d0a..4743e60d2b13 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -1377,7 +1377,7 @@ static const char *attn_master_to_str(u8 master) case 9: return "DBU"; case 10: return "DMAE"; default: - return "Unkown"; + return "Unknown"; } } @@ -1555,7 +1555,7 @@ static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) DORQ_REG_DB_DROP_DETAILS); DP_INFO(p_hwfn->cdev, - "DORQ db_drop: adress 0x%08x Opaque FID 0x%04x Size [bytes] 0x%08x Reason: 0x%08x\n", + "DORQ db_drop: address 0x%08x Opaque FID 0x%04x Size [bytes] 0x%08x Reason: 0x%08x\n", qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, DORQ_REG_DB_DROP_DETAILS_ADDRESS), (u16)(details & QED_DORQ_ATTENTION_OPAQUE_MASK), diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index ac69ff3f7c5c..187b07e0aa4e 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -381,7 +381,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, p_ramrod->personality = PERSONALITY_RDMA_AND_ETH; break; default: - DP_NOTICE(p_hwfn, "Unkown personality %d\n", + DP_NOTICE(p_hwfn, "Unknown personality %d\n", p_hwfn->hw_info.personality); p_ramrod->personality = PERSONALITY_ETH; } diff --git a/drivers/net/ppp/ppp_mppe.c b/drivers/net/ppp/ppp_mppe.c index 92f52a73ec0e..861a19094103 100644 --- a/drivers/net/ppp/ppp_mppe.c +++ b/drivers/net/ppp/ppp_mppe.c @@ -96,8 +96,8 @@ static inline void sha_pad_init(struct sha_pad *shapad) * State for an MPPE (de)compressor. */ struct ppp_mppe_state { - struct crypto_skcipher *arc4; - struct crypto_ahash *sha1; + struct crypto_sync_skcipher *arc4; + struct shash_desc *sha1; unsigned char *sha1_digest; unsigned char master_key[MPPE_MAX_KEY_LEN]; unsigned char session_key[MPPE_MAX_KEY_LEN]; @@ -137,25 +137,16 @@ struct ppp_mppe_state { */ static void get_new_key_from_sha(struct ppp_mppe_state * state) { - AHASH_REQUEST_ON_STACK(req, state->sha1); - struct scatterlist sg[4]; - unsigned int nbytes; - - sg_init_table(sg, 4); - - nbytes = setup_sg(&sg[0], state->master_key, state->keylen); - nbytes += setup_sg(&sg[1], sha_pad->sha_pad1, - sizeof(sha_pad->sha_pad1)); - nbytes += setup_sg(&sg[2], state->session_key, state->keylen); - nbytes += setup_sg(&sg[3], sha_pad->sha_pad2, - sizeof(sha_pad->sha_pad2)); - - ahash_request_set_tfm(req, state->sha1); - ahash_request_set_callback(req, 0, NULL, NULL); - ahash_request_set_crypt(req, sg, state->sha1_digest, nbytes); - - crypto_ahash_digest(req); - ahash_request_zero(req); + crypto_shash_init(state->sha1); + crypto_shash_update(state->sha1, state->master_key, + state->keylen); + crypto_shash_update(state->sha1, sha_pad->sha_pad1, + sizeof(sha_pad->sha_pad1)); + crypto_shash_update(state->sha1, state->session_key, + state->keylen); + crypto_shash_update(state->sha1, sha_pad->sha_pad2, + sizeof(sha_pad->sha_pad2)); + crypto_shash_final(state->sha1, state->sha1_digest); } /* @@ -165,15 +156,15 @@ static void get_new_key_from_sha(struct ppp_mppe_state * state) static void mppe_rekey(struct ppp_mppe_state * state, int initial_key) { struct scatterlist sg_in[1], sg_out[1]; - SKCIPHER_REQUEST_ON_STACK(req, state->arc4); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, state->arc4); - skcipher_request_set_tfm(req, state->arc4); + skcipher_request_set_sync_tfm(req, state->arc4); skcipher_request_set_callback(req, 0, NULL, NULL); get_new_key_from_sha(state); if (!initial_key) { - crypto_skcipher_setkey(state->arc4, state->sha1_digest, - state->keylen); + crypto_sync_skcipher_setkey(state->arc4, state->sha1_digest, + state->keylen); sg_init_table(sg_in, 1); sg_init_table(sg_out, 1); setup_sg(sg_in, state->sha1_digest, state->keylen); @@ -191,7 +182,8 @@ static void mppe_rekey(struct ppp_mppe_state * state, int initial_key) state->session_key[1] = 0x26; state->session_key[2] = 0x9e; } - crypto_skcipher_setkey(state->arc4, state->session_key, state->keylen); + crypto_sync_skcipher_setkey(state->arc4, state->session_key, + state->keylen); skcipher_request_zero(req); } @@ -201,6 +193,7 @@ static void mppe_rekey(struct ppp_mppe_state * state, int initial_key) static void *mppe_alloc(unsigned char *options, int optlen) { struct ppp_mppe_state *state; + struct crypto_shash *shash; unsigned int digestsize; if (optlen != CILEN_MPPE + sizeof(state->master_key) || @@ -212,19 +205,27 @@ static void *mppe_alloc(unsigned char *options, int optlen) goto out; - state->arc4 = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); + state->arc4 = crypto_alloc_sync_skcipher("ecb(arc4)", 0, 0); if (IS_ERR(state->arc4)) { state->arc4 = NULL; goto out_free; } - state->sha1 = crypto_alloc_ahash("sha1", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(state->sha1)) { - state->sha1 = NULL; + shash = crypto_alloc_shash("sha1", 0, 0); + if (IS_ERR(shash)) + goto out_free; + + state->sha1 = kmalloc(sizeof(*state->sha1) + + crypto_shash_descsize(shash), + GFP_KERNEL); + if (!state->sha1) { + crypto_free_shash(shash); goto out_free; } + state->sha1->tfm = shash; + state->sha1->flags = 0; - digestsize = crypto_ahash_digestsize(state->sha1); + digestsize = crypto_shash_digestsize(shash); if (digestsize < MPPE_MAX_KEY_LEN) goto out_free; @@ -247,8 +248,11 @@ static void *mppe_alloc(unsigned char *options, int optlen) out_free: kfree(state->sha1_digest); - crypto_free_ahash(state->sha1); - crypto_free_skcipher(state->arc4); + if (state->sha1) { + crypto_free_shash(state->sha1->tfm); + kzfree(state->sha1); + } + crypto_free_sync_skcipher(state->arc4); kfree(state); out: return NULL; @@ -262,8 +266,9 @@ static void mppe_free(void *arg) struct ppp_mppe_state *state = (struct ppp_mppe_state *) arg; if (state) { kfree(state->sha1_digest); - crypto_free_ahash(state->sha1); - crypto_free_skcipher(state->arc4); + crypto_free_shash(state->sha1->tfm); + kzfree(state->sha1); + crypto_free_sync_skcipher(state->arc4); kfree(state); } } @@ -370,7 +375,7 @@ mppe_compress(void *arg, unsigned char *ibuf, unsigned char *obuf, int isize, int osize) { struct ppp_mppe_state *state = (struct ppp_mppe_state *) arg; - SKCIPHER_REQUEST_ON_STACK(req, state->arc4); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, state->arc4); int proto; int err; struct scatterlist sg_in[1], sg_out[1]; @@ -430,7 +435,7 @@ mppe_compress(void *arg, unsigned char *ibuf, unsigned char *obuf, setup_sg(sg_in, ibuf, isize); setup_sg(sg_out, obuf, osize); - skcipher_request_set_tfm(req, state->arc4); + skcipher_request_set_sync_tfm(req, state->arc4); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg_in, sg_out, isize, NULL); err = crypto_skcipher_encrypt(req); @@ -484,7 +489,7 @@ mppe_decompress(void *arg, unsigned char *ibuf, int isize, unsigned char *obuf, int osize) { struct ppp_mppe_state *state = (struct ppp_mppe_state *) arg; - SKCIPHER_REQUEST_ON_STACK(req, state->arc4); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, state->arc4); unsigned ccount; int flushed = MPPE_BITS(ibuf) & MPPE_BIT_FLUSHED; struct scatterlist sg_in[1], sg_out[1]; @@ -619,7 +624,7 @@ mppe_decompress(void *arg, unsigned char *ibuf, int isize, unsigned char *obuf, setup_sg(sg_in, ibuf, 1); setup_sg(sg_out, obuf, 1); - skcipher_request_set_tfm(req, state->arc4); + skcipher_request_set_sync_tfm(req, state->arc4); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg_in, sg_out, 1, NULL); if (crypto_skcipher_decrypt(req)) { diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 450eb7b31256..cfc69a3dea40 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -2097,7 +2097,7 @@ int ath10k_pci_init_config(struct ath10k *ar) ret = ath10k_pci_diag_read32(ar, ealloc_targ_addr, &ealloc_value); if (ret != 0) { - ath10k_err(ar, "Faile to get early alloc val: %d\n", ret); + ath10k_err(ar, "Failed to get early alloc val: %d\n", ret); return ret; } diff --git a/drivers/net/wireless/ath/wil6210/txrx.c b/drivers/net/wireless/ath/wil6210/txrx.c index d0fecd9f8a4c..9f7d6b5b2d7f 100644 --- a/drivers/net/wireless/ath/wil6210/txrx.c +++ b/drivers/net/wireless/ath/wil6210/txrx.c @@ -406,7 +406,7 @@ static void wil_rx_add_radiotap_header(struct wil6210_priv *wil, if (skb_headroom(skb) < rtap_len && pskb_expand_head(skb, rtap_len, 0, GFP_ATOMIC)) { - wil_err(wil, "Unable to expand headrom to %d\n", rtap_len); + wil_err(wil, "Unable to expand headroom to %d\n", rtap_len); return; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/fw.c index 1186755e55b8..e5505387260b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/fw.c @@ -134,7 +134,7 @@ static void _rtl8723e_fill_h2c_command(struct ieee80211_hw *hw, u8 element_id, wait_h2c_limmit--; if (wait_h2c_limmit == 0) { RT_TRACE(rtlpriv, COMP_CMD, DBG_LOUD, - "Wating too long for FW read clear HMEBox(%d)!\n", + "Waiting too long for FW read clear HMEBox(%d)!\n", boxnum); break; } diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 84a93ddcd57a..aba3897844c5 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -1272,7 +1272,7 @@ ccio_ioc_init(struct ioc *ioc) ** Hot-Plug/Removal of PCI cards. (aka PCI OLARD). */ - iova_space_size = (u32) (totalram_pages / count_parisc_driver(&ccio_driver)); + iova_space_size = (u32) (totalram_pages() / count_parisc_driver(&ccio_driver)); /* limit IOVA space size to 1MB-1GB */ @@ -1311,7 +1311,7 @@ ccio_ioc_init(struct ioc *ioc) DBG_INIT("%s() hpa 0x%p mem %luMB IOV %dMB (%d bits)\n", __func__, ioc->ioc_regs, - (unsigned long) totalram_pages >> (20 - PAGE_SHIFT), + (unsigned long) totalram_pages() >> (20 - PAGE_SHIFT), iova_space_size>>20, iov_order + PAGE_SHIFT); diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index c715af1b6c3c..f9ed5b1c9258 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -1436,7 +1436,7 @@ sba_ioc_init(struct parisc_device *sba, struct ioc *ioc, int ioc_num) DBG_INIT("%s() hpa 0x%lx mem %ldMB IOV %dMB (%d bits)\n", __func__, ioc->ioc_hpa, - (unsigned long) totalram_pages >> (20 - PAGE_SHIFT), + (unsigned long) totalram_pages() >> (20 - PAGE_SHIFT), iova_space_size>>20, iov_order + PAGE_SHIFT); diff --git a/drivers/pci/host/pci-msm.c b/drivers/pci/host/pci-msm.c index 4ef3d8fd0be4..9ff796605246 100644 --- a/drivers/pci/host/pci-msm.c +++ b/drivers/pci/host/pci-msm.c @@ -3757,7 +3757,7 @@ static int msm_pcie_enable(struct msm_pcie_dev_t *dev, u32 options) /* assert PCIe reset link to keep EP in reset */ - PCIE_INFO(dev, "PCIe: Assert the reset of endpoint of RC%d.\n", + PCIE_DBG(dev, "PCIe: Assert the reset of endpoint of RC%d.\n", dev->rc_idx); gpio_set_value(dev->gpio[MSM_PCIE_GPIO_PERST].num, dev->gpio[MSM_PCIE_GPIO_PERST].on); @@ -3880,7 +3880,7 @@ static int msm_pcie_enable(struct msm_pcie_dev_t *dev, u32 options) dev->rc_idx, retries); if (pcie_phy_is_ready(dev)) - PCIE_INFO(dev, "PCIe RC%d PHY is ready!\n", dev->rc_idx); + PCIE_DBG(dev, "PCIe RC%d PHY is ready!\n", dev->rc_idx); else { PCIE_ERR(dev, "PCIe PHY RC%d failed to come up!\n", dev->rc_idx); @@ -3898,7 +3898,7 @@ static int msm_pcie_enable(struct msm_pcie_dev_t *dev, u32 options) /* de-assert PCIe reset link to bring EP out of reset */ - PCIE_INFO(dev, "PCIe: Release the reset of endpoint of RC%d.\n", + PCIE_DBG(dev, "PCIe: Release the reset of endpoint of RC%d.\n", dev->rc_idx); gpio_set_value(dev->gpio[MSM_PCIE_GPIO_PERST].num, 1 - dev->gpio[MSM_PCIE_GPIO_PERST].on); @@ -3943,9 +3943,9 @@ static int msm_pcie_enable(struct msm_pcie_dev_t *dev, u32 options) msm_pcie_confirm_linkup(dev, false, false, NULL)) { PCIE_DBG(dev, "Link is up after %d checkings\n", link_check_count); - PCIE_INFO(dev, "PCIe RC%d link initialized\n", dev->rc_idx); + PCIE_DBG(dev, "PCIe RC%d link initialized\n", dev->rc_idx); } else { - PCIE_INFO(dev, "PCIe: Assert the reset of endpoint of RC%d.\n", + PCIE_DBG(dev, "PCIe: Assert the reset of endpoint of RC%d.\n", dev->rc_idx); gpio_set_value(dev->gpio[MSM_PCIE_GPIO_PERST].num, dev->gpio[MSM_PCIE_GPIO_PERST].on); @@ -4049,7 +4049,7 @@ static void msm_pcie_disable(struct msm_pcie_dev_t *dev, u32 options) dev->power_on = false; dev->link_turned_off_counter++; - PCIE_INFO(dev, "PCIe: Assert the reset of endpoint of RC%d.\n", + PCIE_DBG(dev, "PCIe: Assert the reset of endpoint of RC%d.\n", dev->rc_idx); gpio_set_value(dev->gpio[MSM_PCIE_GPIO_PERST].num, diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 0ee83e397744..18ee33e83453 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -425,7 +425,7 @@ static int msm_gpio_direction_output(struct gpio_chip *chip, unsigned offset, in val = readl(pctrl->regs + g->ctl_reg); val |= BIT(g->oe_bit); - writel(val, pctrl->regs + g->ctl_reg); + writel_relaxed(val, pctrl->regs + g->ctl_reg); spin_unlock_irqrestore(&pctrl->lock, flags); @@ -562,11 +562,11 @@ static void msm_gpio_update_dual_edge_pos(struct msm_pinctrl *pctrl, unsigned pol; do { - val = readl(pctrl->regs + g->io_reg) & BIT(g->in_bit); + val = readl_relaxed(pctrl->regs + g->io_reg) & BIT(g->in_bit); pol = readl(pctrl->regs + g->intr_cfg_reg); pol ^= BIT(g->intr_polarity_bit); - writel(pol, pctrl->regs + g->intr_cfg_reg); + writel_relaxed(pol, pctrl->regs + g->intr_cfg_reg); val2 = readl(pctrl->regs + g->io_reg) & BIT(g->in_bit); intstat = readl(pctrl->regs + g->intr_status_reg); @@ -646,7 +646,7 @@ static void msm_gpio_irq_unmask(struct irq_data *d) val = readl(pctrl->regs + g->intr_cfg_reg); val |= BIT(g->intr_enable_bit); - writel(val, pctrl->regs + g->intr_cfg_reg); + writel_relaxed(val, pctrl->regs + g->intr_cfg_reg); set_bit(d->hwirq, pctrl->enabled_irqs); @@ -760,7 +760,7 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type) } else { BUG(); } - writel(val, pctrl->regs + g->intr_cfg_reg); + writel_relaxed(val, pctrl->regs + g->intr_cfg_reg); if (test_bit(d->hwirq, pctrl->dual_edge_irqs)) msm_gpio_update_dual_edge_pos(pctrl, g, d); diff --git a/drivers/platform/msm/ipa/ipa_clients/rndis_ipa.c b/drivers/platform/msm/ipa/ipa_clients/rndis_ipa.c index 51531afd88f1..c7c423534e39 100644 --- a/drivers/platform/msm/ipa/ipa_clients/rndis_ipa.c +++ b/drivers/platform/msm/ipa/ipa_clients/rndis_ipa.c @@ -1474,7 +1474,7 @@ static void rndis_ipa_xmit_error(struct sk_buff *skb) delay_jiffies = msecs_to_jiffies( rndis_ipa_ctx->error_msec_sleep_time + rand_dealy_msec); - retval = schedule_delayed_work( + retval = queue_delayed_work(system_power_efficient_wq, &rndis_ipa_ctx->xmit_error_delayed_work, delay_jiffies); if (!retval) { RNDIS_IPA_ERROR("fail to schedule delayed work\n"); diff --git a/drivers/platform/msm/ipa/ipa_rm.c b/drivers/platform/msm/ipa/ipa_rm.c index 914028c244b2..68658eb826f4 100644 --- a/drivers/platform/msm/ipa/ipa_rm.c +++ b/drivers/platform/msm/ipa/ipa_rm.c @@ -512,7 +512,7 @@ int ipa_rm_request_resource_with_timer(enum ipa_rm_resource_name resource_name) release_work->needed_bw = 0; release_work->dec_usage_count = false; INIT_DELAYED_WORK(&release_work->work, delayed_release_work_func); - schedule_delayed_work(&release_work->work, + queue_delayed_work(system_power_efficient_wq, &release_work->work, msecs_to_jiffies(IPA_RM_RELEASE_DELAY_IN_MSEC)); result = 0; bail: diff --git a/drivers/platform/msm/ipa/ipa_v2/Makefile b/drivers/platform/msm/ipa/ipa_v2/Makefile index fb039709261e..52e3af2c594a 100644 --- a/drivers/platform/msm/ipa/ipa_v2/Makefile +++ b/drivers/platform/msm/ipa/ipa_v2/Makefile @@ -5,3 +5,5 @@ ipat-y := ipa.o ipa_debugfs.o ipa_hdr.o ipa_flt.o ipa_rt.o ipa_dp.o ipa_client.o ipa_wdi3_i.o obj-$(CONFIG_RMNET_IPA) += rmnet_ipa.o ipa_qmi_service_v01.o ipa_qmi_service.o rmnet_ipa_fd_ioctl.o + +subdir-ccflags-y += $(call cc-disable-warning, enum-conversion) diff --git a/drivers/platform/msm/ipa/ipa_v2/ipa.c b/drivers/platform/msm/ipa/ipa_v2/ipa.c index 10468f34c85b..3b0a52122401 100644 --- a/drivers/platform/msm/ipa/ipa_v2/ipa.c +++ b/drivers/platform/msm/ipa/ipa_v2/ipa.c @@ -5071,6 +5071,7 @@ int ipa2_ap_suspend(struct device *dev) IPADBG("Enter...\n"); +#if 0 /* In case there is a tx/rx handler in polling mode fail to suspend */ for (i = 0; i < ipa_ctx->ipa_num_pipes; i++) { if (ipa_ctx->ep[i].sys && @@ -5080,7 +5081,8 @@ int ipa2_ap_suspend(struct device *dev) return -EAGAIN; } } - +#endif + /* release SPS IPA resource without waiting for inactivity timer */ atomic_set(&ipa_ctx->sps_pm.eot_activity, 0); ipa_sps_release_resource(NULL); diff --git a/drivers/platform/msm/ipa/ipa_v3/Makefile b/drivers/platform/msm/ipa/ipa_v3/Makefile index 8942634f5e8b..7776149a1c49 100644 --- a/drivers/platform/msm/ipa/ipa_v3/Makefile +++ b/drivers/platform/msm/ipa/ipa_v3/Makefile @@ -21,3 +21,5 @@ obj-$(CONFIG_RMNET_IPA3) += rmnet_ipa.o ipa_qmi_service_v01.o ipa_qmi_service.o # TODO: remove me b/62058353 subdir-ccflags-y += \ -Wno-pointer-bool-conversion \ + +subdir-ccflags-y += $(call cc-disable-warning, enum-conversion) diff --git a/drivers/platform/msm/ipa/ipa_v3/ipa.c b/drivers/platform/msm/ipa/ipa_v3/ipa.c index 5f1699af3507..ccf77e28e3b8 100644 --- a/drivers/platform/msm/ipa/ipa_v3/ipa.c +++ b/drivers/platform/msm/ipa/ipa_v3/ipa.c @@ -3783,6 +3783,7 @@ void ipa3_dec_client_disable_clks_no_block( */ void ipa3_inc_acquire_wakelock(void) { +#if 0 unsigned long flags; spin_lock_irqsave(&ipa3_ctx->wakelock_ref_cnt.spinlock, flags); @@ -3792,6 +3793,7 @@ void ipa3_inc_acquire_wakelock(void) IPADBG_LOW("active wakelock ref cnt = %d\n", ipa3_ctx->wakelock_ref_cnt.cnt); spin_unlock_irqrestore(&ipa3_ctx->wakelock_ref_cnt.spinlock, flags); +#endif } /** @@ -3804,6 +3806,7 @@ void ipa3_inc_acquire_wakelock(void) */ void ipa3_dec_release_wakelock(void) { +#if 0 unsigned long flags; spin_lock_irqsave(&ipa3_ctx->wakelock_ref_cnt.spinlock, flags); @@ -3813,6 +3816,7 @@ void ipa3_dec_release_wakelock(void) if (ipa3_ctx->wakelock_ref_cnt.cnt == 0) __pm_relax(&ipa3_ctx->w_lock); spin_unlock_irqrestore(&ipa3_ctx->wakelock_ref_cnt.spinlock, flags); +#endif } int ipa3_set_clock_plan_from_pm(int idx) @@ -5302,9 +5306,11 @@ static int ipa3_pre_init(const struct ipa3_plat_drv_res *resource_p, goto fail_device_create; } +#if 0 /* Create a wakeup source. */ wakeup_source_init(&ipa3_ctx->w_lock, "IPA_WS"); spin_lock_init(&ipa3_ctx->wakelock_ref_cnt.spinlock); +#endif /* Initialize Power Management framework */ if (ipa3_ctx->use_ipa_pm) { diff --git a/drivers/platform/msm/ipa/ipa_v3/ipa_i.h b/drivers/platform/msm/ipa/ipa_v3/ipa_i.h index 3fc4b8ef1b31..f18e2afc792c 100644 --- a/drivers/platform/msm/ipa/ipa_v3/ipa_i.h +++ b/drivers/platform/msm/ipa/ipa_v3/ipa_i.h @@ -1138,10 +1138,12 @@ struct ipa3_active_clients { int bus_vote_idx; }; +#if 0 struct ipa3_wakelock_ref_cnt { spinlock_t spinlock; int cnt; }; +#endif struct ipa3_tag_completion { struct completion comp; @@ -1580,8 +1582,10 @@ struct ipa3_context { bool gsi_ch20_wa; bool s1_bypass_arr[IPA_SMMU_CB_MAX]; u32 wdi_map_cnt; +#if 0 struct wakeup_source w_lock; struct ipa3_wakelock_ref_cnt wakelock_ref_cnt; +#endif /* RMNET_IOCTL_INGRESS_FORMAT_AGG_DATA */ bool ipa_client_apps_wan_cons_agg_gro; /* M-release support to know client pipes */ diff --git a/drivers/platform/msm/ipa/ipa_v3/ipa_nat.c b/drivers/platform/msm/ipa/ipa_v3/ipa_nat.c index be80cd70ba45..5f5b480edfdc 100644 --- a/drivers/platform/msm/ipa/ipa_v3/ipa_nat.c +++ b/drivers/platform/msm/ipa/ipa_v3/ipa_nat.c @@ -242,6 +242,7 @@ static int ipa3_nat_ipv6ct_init_device( dev->smem_offset = smem_offset; dev->is_dev_init = true; + dev->tmp_mem = tmp_mem; mutex_unlock(&dev->lock); IPADBG("ipa dev %s added successful. major:%d minor:%d\n", name, @@ -265,6 +266,9 @@ static void ipa3_nat_ipv6ct_destroy_device( mutex_lock(&dev->lock); + dma_free_coherent(ipa3_ctx->pdev, IPA_NAT_IPV6CT_TEMP_MEM_SIZE, + dev->tmp_mem->vaddr, dev->tmp_mem->dma_handle); + kfree(dev->tmp_mem); device_destroy(dev->class, dev->dev_num); unregister_chrdev_region(dev->dev_num, 1); class_destroy(dev->class); diff --git a/drivers/platform/msm/ipa/ipa_v3/rmnet_ipa.c b/drivers/platform/msm/ipa/ipa_v3/rmnet_ipa.c index 258329f23081..eb523a08f1d8 100644 --- a/drivers/platform/msm/ipa/ipa_v3/rmnet_ipa.c +++ b/drivers/platform/msm/ipa/ipa_v3/rmnet_ipa.c @@ -460,9 +460,11 @@ int ipa3_copy_ul_filter_rule_to_ipa(struct ipa_install_fltr_rule_req_msg_v01 goto failure; } ipa3_qmi_ctx->q6_ul_filter_rule[i].ip = - rule_req->filter_spec_ex_list[i].ip_type; - ipa3_qmi_ctx->q6_ul_filter_rule[i].action = - rule_req->filter_spec_ex_list[i].filter_action; + (enum ipa_ip_type)rule_req->filter_spec_ex_list[i]. + ip_type; + ipa3_qmi_ctx->q6_ul_filter_rule[i].action = + (enum ipa_flt_action)rule_req->filter_spec_ex_list[i]. + filter_action; if (rule_req->filter_spec_ex_list[i]. is_routing_table_index_valid == true) ipa3_qmi_ctx->q6_ul_filter_rule[i].rt_tbl_idx = @@ -2844,7 +2846,8 @@ static void tethering_stats_poll_queue(struct work_struct *work) /* Schedule again only if there's an active polling interval */ if (ipa3_rmnet_ctx.polling_interval != 0) - schedule_delayed_work(&ipa_tether_stats_poll_wakequeue_work, + queue_delayed_work(system_power_efficient_wq, + &ipa_tether_stats_poll_wakequeue_work, msecs_to_jiffies(ipa3_rmnet_ctx.polling_interval*1000)); } @@ -2938,7 +2941,8 @@ int rmnet_ipa3_poll_tethering_stats(struct wan_ioctl_poll_tethering_stats *data) return 0; } - schedule_delayed_work(&ipa_tether_stats_poll_wakequeue_work, 0); + queue_delayed_work(system_power_efficient_wq, + &ipa_tether_stats_poll_wakequeue_work, 0); return 0; } diff --git a/drivers/platform/msm/msm_ext_display.c b/drivers/platform/msm/msm_ext_display.c index 7a93e0e95476..fbaaa9157d84 100644 --- a/drivers/platform/msm/msm_ext_display.c +++ b/drivers/platform/msm/msm_ext_display.c @@ -249,7 +249,7 @@ static int msm_ext_disp_update_audio_ops(struct msm_ext_disp *ext_disp, ret = msm_ext_disp_get_intf_data(ext_disp, type, &data); if (ret || !data) { - pr_err("interface %s not found\n", msm_ext_disp_name(type)); + pr_debug("interface %s not found\n", msm_ext_disp_name(type)); goto end; } diff --git a/drivers/platform/msm/usb_bam.c b/drivers/platform/msm/usb_bam.c index 06166400966f..f9a08ebfefb5 100644 --- a/drivers/platform/msm/usb_bam.c +++ b/drivers/platform/msm/usb_bam.c @@ -44,7 +44,7 @@ #define ARRAY_INDEX_FROM_ADDR(base, addr) ((addr) - (base)) #define ENABLE_EVENT_LOG 1 -static unsigned int enable_event_log = ENABLE_EVENT_LOG; +static unsigned int enable_event_log; module_param(enable_event_log, uint, 0644); MODULE_PARM_DESC(enable_event_log, "enable event logging in debug buffer"); diff --git a/drivers/power/reset/msm-poweroff.c b/drivers/power/reset/msm-poweroff.c index 1216a131578b..d5425801ca3c 100644 --- a/drivers/power/reset/msm-poweroff.c +++ b/drivers/power/reset/msm-poweroff.c @@ -117,7 +117,7 @@ static bool scandump_test; module_param(scandump_test, bool, 0644); MODULE_PARM_DESC(scandump_test, "Set 1 to enable scandump test"); -static bool warm_reset; +static bool warm_reset = true; module_param(warm_reset, bool, 0644); MODULE_PARM_DESC(warm_reset, "Set 1 to override default cold-reset"); diff --git a/drivers/power/supply/google_charger.c b/drivers/power/supply/google_charger.c index 345c335e89df..3b2cccded933 100644 --- a/drivers/power/supply/google_charger.c +++ b/drivers/power/supply/google_charger.c @@ -807,7 +807,6 @@ static void chg_work(struct work_struct *work) struct chg_drv *chg_drv = container_of(work, struct chg_drv, chg_work.work); struct chg_profile *profile = &chg_drv->chg_profile; - union power_supply_propval val; struct power_supply *chg_psy = chg_drv->chg_psy; struct power_supply *usb_psy = chg_drv->usb_psy; struct power_supply *wlc_psy = chg_drv->wlc_psy; @@ -1341,7 +1340,7 @@ static void chg_work(struct work_struct *work) pr_err("chg_work charging status UNKNOWN\n"); goto error_rerun; default: - pr_err("chg_work invalid charging status %d\n", val.intval); + pr_err("chg_work invalid charging status %d\n", batt_status); goto error_rerun; } diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index 7fcef827559d..361d4ece5cdd 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -98,7 +98,7 @@ static ssize_t power_supply_show_property(struct device *dev, dev_dbg(dev, "driver has no data for `%s' property\n", attr->attr.name); else if (ret != -ENODEV && ret != -EAGAIN) - dev_err_ratelimited(dev, + dev_dbg_ratelimited(dev, "driver failed to report `%s' property: %zd\n", attr->attr.name, ret); return ret; @@ -404,29 +404,12 @@ void power_supply_init_attrs(struct device_type *dev_type) __power_supply_attrs[i] = &power_supply_attrs[i].attr; } -static char *kstruprdup(const char *str, gfp_t gfp) -{ - char *ret, *ustr; - - ustr = ret = kmalloc(strlen(str) + 1, gfp); - - if (!ret) - return NULL; - - while (*str) - *ustr++ = toupper(*str++); - - *ustr = 0; - - return ret; -} - int power_supply_uevent(struct device *dev, struct kobj_uevent_env *env) { struct power_supply *psy = dev_get_drvdata(dev); int ret = 0, j; char *prop_buf; - char *attrname; + char attrname[64]; if (!psy || !psy->desc) { dev_dbg(dev, "No power supply yet\n"); @@ -443,7 +426,8 @@ int power_supply_uevent(struct device *dev, struct kobj_uevent_env *env) for (j = 0; j < psy->desc->num_properties; j++) { struct device_attribute *attr; - char *line; + const char *str; + char *line, *ustr; attr = &power_supply_attrs[psy->desc->properties[j]]; @@ -462,14 +446,14 @@ int power_supply_uevent(struct device *dev, struct kobj_uevent_env *env) if (line) *line = 0; - attrname = kstruprdup(attr->attr.name, GFP_KERNEL); - if (!attrname) { - ret = -ENOMEM; - goto out; - } + str = attr->attr.name; + ustr = attrname; + while (*str) + *ustr++ = toupper(*str++); + + *ustr = 0; ret = add_uevent_var(env, "POWER_SUPPLY_%s=%s", attrname, prop_buf); - kfree(attrname); if (ret) goto out; } diff --git a/drivers/power/supply/qcom/battery.c b/drivers/power/supply/qcom/battery.c index 8a9d6f865efd..f9f79bb17b37 100644 --- a/drivers/power/supply/qcom/battery.c +++ b/drivers/power/supply/qcom/battery.c @@ -1271,7 +1271,7 @@ static int pl_awake_vote_callback(struct votable *votable, struct pl_data *chip = data; if (awake) - __pm_stay_awake(chip->pl_ws); + __pm_wakeup_event(chip->pl_ws, 500); else __pm_relax(chip->pl_ws); diff --git a/drivers/power/supply/qcom/qpnp-fg-gen3.c b/drivers/power/supply/qcom/qpnp-fg-gen3.c index c2842b1673a8..af6dfc6cf434 100644 --- a/drivers/power/supply/qcom/qpnp-fg-gen3.c +++ b/drivers/power/supply/qcom/qpnp-fg-gen3.c @@ -1240,7 +1240,7 @@ static int fg_awake_cb(struct votable *votable, void *data, int awake, struct fg_chip *chip = data; if (awake) - pm_stay_awake(chip->dev); + pm_wakeup_event(chip->dev, 500); else pm_relax(chip->dev); @@ -4049,7 +4049,6 @@ static int fg_psy_get_property(struct power_supply *psy, POWER_SUPPLY_PROP_HEALTH, pval); break; default: - pr_err("unsupported property %d\n", psp); rc = -EINVAL; break; } @@ -5510,7 +5509,7 @@ static int fg_parse_dt(struct fg_chip *chip) rc = of_property_read_u32(node, "qcom,fg-esr-meas-curr-ma", &temp); if (!rc) { /* ESR measurement current range is 60-240 mA */ - if (temp >= 60 || temp <= 240) + if (temp >= 60 && temp <= 240) chip->dt.esr_meas_curr_ma = temp; } diff --git a/drivers/power/supply/qcom/qpnp-fg.c b/drivers/power/supply/qcom/qpnp-fg.c index deccb20fec83..8912e76fc93d 100644 --- a/drivers/power/supply/qcom/qpnp-fg.c +++ b/drivers/power/supply/qcom/qpnp-fg.c @@ -440,7 +440,7 @@ struct fg_wakeup_source { static void fg_stay_awake(struct fg_wakeup_source *source) { if (!__test_and_set_bit(0, &source->enabled)) { - __pm_stay_awake(&source->source); + __pm_wakeup_event(&source->source, 0); pr_debug("enabled source %s\n", source->source.name); } } @@ -2015,9 +2015,11 @@ static void fg_handle_battery_insertion(struct fg_chip *chip) { reinit_completion(&chip->batt_id_avail); reinit_completion(&chip->fg_reset_done); - schedule_delayed_work(&chip->batt_profile_init, 0); + mod_delayed_work(system_freezable_power_efficient_wq, + &chip->batt_profile_init, 0); cancel_delayed_work(&chip->update_sram_data); - schedule_delayed_work(&chip->update_sram_data, msecs_to_jiffies(0)); + mod_delayed_work(system_freezable_power_efficient_wq, + &chip->update_sram_data, msecs_to_jiffies(0)); } @@ -2246,7 +2248,7 @@ static int get_prop_capacity(struct fg_chip *chip) if (chip->last_soc == FULL_SOC_RAW) return FULL_CAPACITY; return DIV_ROUND_CLOSEST((chip->last_soc - 1) * - (FULL_CAPACITY - 2), + (FULL_CAPACITY - 1), FULL_SOC_RAW - 2) + 1; } @@ -2274,7 +2276,7 @@ static int get_prop_capacity(struct fg_chip *chip) if (!vbatt_low_sts) return DIV_ROUND_CLOSEST((chip->last_soc - 1) * - (FULL_CAPACITY - 2), + (FULL_CAPACITY - 1), FULL_SOC_RAW - 2) + 1; else return EMPTY_CAPACITY; @@ -2285,7 +2287,7 @@ static int get_prop_capacity(struct fg_chip *chip) return FULL_CAPACITY; } - return DIV_ROUND_CLOSEST((msoc - 1) * (FULL_CAPACITY - 2), + return DIV_ROUND_CLOSEST((msoc - 1) * (FULL_CAPACITY - 1), FULL_SOC_RAW - 2) + 1; } @@ -2352,7 +2354,7 @@ static int set_prop_jeita_temp(struct fg_chip *chip, cancel_delayed_work_sync( &chip->update_jeita_setting); - schedule_delayed_work( + mod_delayed_work(system_freezable_power_efficient_wq, &chip->update_jeita_setting, 0); return rc; @@ -2691,7 +2693,7 @@ static void check_sanity_work(struct work_struct *work) chip->last_beat_count = beat_count; } resched: - schedule_delayed_work( + mod_delayed_work(system_freezable_power_efficient_wq, &chip->check_sanity_work, msecs_to_jiffies(SANITY_CHECK_PERIOD_MS)); out: @@ -2730,7 +2732,7 @@ static void update_sram_data_work(struct work_struct *work) out: if (!rc) - schedule_delayed_work( + mod_delayed_work(system_freezable_power_efficient_wq, &chip->update_sram_data, msecs_to_jiffies(resched_ms)); } @@ -2849,7 +2851,7 @@ static void update_temp_data(struct work_struct *work) fg_relax(&chip->update_temp_wakeup_source); resched: - schedule_delayed_work( + mod_delayed_work(system_freezable_power_efficient_wq, &chip->update_temp_work, msecs_to_jiffies(TEMP_PERIOD_UPDATE_MS)); } @@ -4101,7 +4103,8 @@ static void status_change_work(struct work_struct *work) */ if (chip->last_temp_update_time && chip->soc_slope_limiter_en) { cancel_delayed_work_sync(&chip->update_temp_work); - schedule_delayed_work(&chip->update_temp_work, + mod_delayed_work(system_freezable_power_efficient_wq, + &chip->update_temp_work, msecs_to_jiffies(0)); } @@ -4117,7 +4120,8 @@ static void status_change_work(struct work_struct *work) */ if (chip->last_sram_update_time + 5 < current_time) { cancel_delayed_work(&chip->update_sram_data); - schedule_delayed_work(&chip->update_sram_data, + mod_delayed_work(system_freezable_power_efficient_wq, + &chip->update_sram_data, msecs_to_jiffies(0)); } if (chip->cyc_ctr.en) @@ -4475,7 +4479,7 @@ static bool fg_validate_battery_info(struct fg_chip *chip) batt_soc = get_monotonic_soc_raw(chip); if (batt_soc != 0 && batt_soc != FULL_SOC_RAW) batt_soc = DIV_ROUND_CLOSEST((batt_soc - 1) * - (FULL_CAPACITY - 2), FULL_SOC_RAW - 2) + 1; + (FULL_CAPACITY - 1), FULL_SOC_RAW - 2) + 1; if (*chip->batt_range_ocv && chip->batt_max_voltage_uv > 1000) delta_pct = DIV_ROUND_CLOSEST(abs(batt_volt_mv - @@ -5238,7 +5242,8 @@ static irqreturn_t fg_vbatt_low_handler(int irq, void *_chip) disable_irq_nosync(chip->batt_irq[VBATT_LOW].irq); chip->vbat_low_irq_enabled = false; fg_stay_awake(&chip->empty_check_wakeup_source); - schedule_delayed_work(&chip->check_empty_work, + mod_delayed_work(system_freezable_power_efficient_wq, + &chip->check_empty_work, msecs_to_jiffies(FG_EMPTY_DEBOUNCE_MS)); } else { if (fg_debug_mask & FG_IRQS) @@ -5370,7 +5375,8 @@ static irqreturn_t fg_soc_irq_handler(int irq, void *_chip) msoc = get_monotonic_soc_raw(chip); if (msoc == 0 || chip->soc_empty) { fg_stay_awake(&chip->empty_check_wakeup_source); - schedule_delayed_work(&chip->check_empty_work, + mod_delayed_work(system_freezable_power_efficient_wq, + &chip->check_empty_work, msecs_to_jiffies(FG_EMPTY_DEBOUNCE_MS)); } } @@ -5426,7 +5432,8 @@ static irqreturn_t fg_empty_soc_irq_handler(int irq, void *_chip) pr_info("triggered 0x%x\n", soc_rt_sts); if (fg_is_batt_empty(chip)) { fg_stay_awake(&chip->empty_check_wakeup_source); - schedule_delayed_work(&chip->check_empty_work, + mod_delayed_work(system_freezable_power_efficient_wq, + &chip->check_empty_work, msecs_to_jiffies(FG_EMPTY_DEBOUNCE_MS)); } else { chip->soc_empty = false; @@ -6069,7 +6076,8 @@ static int fg_do_restart(struct fg_chip *chip, bool write_profile) if (!tried_once) { cancel_delayed_work(&chip->update_sram_data); - schedule_delayed_work(&chip->update_sram_data, + mod_delayed_work(system_freezable_power_efficient_wq, + &chip->update_sram_data, msecs_to_jiffies(0)); msleep(1000); tried_once = true; @@ -6587,11 +6595,11 @@ static int fg_batt_profile_init(struct fg_chip *chip) return rc; update: cancel_delayed_work(&chip->update_sram_data); - schedule_delayed_work( + mod_delayed_work(system_freezable_power_efficient_wq, &chip->update_sram_data, msecs_to_jiffies(0)); reschedule: - schedule_delayed_work( + mod_delayed_work(system_freezable_power_efficient_wq, &chip->batt_profile_init, msecs_to_jiffies(BATTERY_PSY_WAIT_MS)); fg_relax(&chip->profile_wakeup_source); @@ -8497,7 +8505,8 @@ static void ima_error_recovery_work(struct work_struct *work) fg_enable_irqs(chip, true); update_sram_data_work(&chip->update_sram_data.work); update_temp_data(&chip->update_temp_work.work); - schedule_delayed_work(&chip->check_sanity_work, + mod_delayed_work(system_freezable_power_efficient_wq, + &chip->check_sanity_work, msecs_to_jiffies(1000)); chip->ima_error_handling = false; mutex_unlock(&chip->ima_recovery_lock); @@ -8633,7 +8642,7 @@ static void delayed_init_work(struct work_struct *work) /* release memory access before update_sram_data is called */ fg_mem_release(chip); - schedule_delayed_work( + mod_delayed_work(system_freezable_power_efficient_wq, &chip->update_jeita_setting, msecs_to_jiffies(INIT_JEITA_DELAY_MS)); @@ -8644,10 +8653,12 @@ static void delayed_init_work(struct work_struct *work) update_temp_data(&chip->update_temp_work.work); if (!chip->use_otp_profile) - schedule_delayed_work(&chip->batt_profile_init, 0); + mod_delayed_work(system_freezable_power_efficient_wq, + &chip->batt_profile_init, 0); if (chip->ima_supported && fg_reset_on_lockup) - schedule_delayed_work(&chip->check_sanity_work, + mod_delayed_work(system_freezable_power_efficient_wq, + &chip->check_sanity_work, msecs_to_jiffies(1000)); if (chip->wa_flag & IADC_GAIN_COMP_WA) { @@ -8734,12 +8745,12 @@ static int fg_probe(struct platform_device *pdev) mutex_init(&chip->rslow_comp.lock); mutex_init(&chip->sysfs_restart_lock); mutex_init(&chip->ima_recovery_lock); - INIT_DELAYED_WORK(&chip->update_jeita_setting, update_jeita_setting); - INIT_DELAYED_WORK(&chip->update_sram_data, update_sram_data_work); - INIT_DELAYED_WORK(&chip->update_temp_work, update_temp_data); - INIT_DELAYED_WORK(&chip->check_empty_work, check_empty_work); - INIT_DELAYED_WORK(&chip->batt_profile_init, batt_profile_init); - INIT_DELAYED_WORK(&chip->check_sanity_work, check_sanity_work); + INIT_DEFERRABLE_WORK(&chip->update_jeita_setting, update_jeita_setting); + INIT_DEFERRABLE_WORK(&chip->update_sram_data, update_sram_data_work); + INIT_DEFERRABLE_WORK(&chip->update_temp_work, update_temp_data); + INIT_DEFERRABLE_WORK(&chip->check_empty_work, check_empty_work); + INIT_DEFERRABLE_WORK(&chip->batt_profile_init, batt_profile_init); + INIT_DEFERRABLE_WORK(&chip->check_sanity_work, check_sanity_work); INIT_WORK(&chip->ima_error_recovery_work, ima_error_recovery_work); INIT_WORK(&chip->rslow_comp_work, rslow_comp_work); INIT_WORK(&chip->fg_cap_learning_work, fg_cap_learning_work); @@ -8959,7 +8970,7 @@ static void check_and_update_sram_data(struct fg_chip *chip) else time_left = 0; - schedule_delayed_work( + mod_delayed_work(system_freezable_power_efficient_wq, &chip->update_temp_work, msecs_to_jiffies(time_left * 1000)); next_update_time = chip->last_sram_update_time @@ -8970,7 +8981,7 @@ static void check_and_update_sram_data(struct fg_chip *chip) else time_left = 0; - schedule_delayed_work( + mod_delayed_work(system_freezable_power_efficient_wq, &chip->update_sram_data, msecs_to_jiffies(time_left * 1000)); } @@ -9080,7 +9091,8 @@ static int fg_reset_lockup_set(const char *val, const struct kernel_param *kp) pr_info("fg_reset_on_lockup set to %d\n", fg_reset_on_lockup); if (fg_reset_on_lockup) - schedule_delayed_work(&chip->check_sanity_work, + mod_delayed_work(system_freezable_power_efficient_wq, + &chip->check_sanity_work, msecs_to_jiffies(1000)); else cancel_delayed_work_sync(&chip->check_sanity_work); diff --git a/drivers/power/supply/qcom/qpnp-smb2.c b/drivers/power/supply/qcom/qpnp-smb2.c index 3310ae71f4fe..892ad3498c0e 100644 --- a/drivers/power/supply/qcom/qpnp-smb2.c +++ b/drivers/power/supply/qcom/qpnp-smb2.c @@ -180,8 +180,7 @@ struct smb2 { bool bad_part; }; -static int __debug_mask = - PR_INTERRUPT | PR_REGISTER | PR_MISC | PR_PARALLEL | PR_OTG; +static int __debug_mask; module_param_named( debug_mask, __debug_mask, int, 0600 ); @@ -629,7 +628,7 @@ static int smb2_usb_set_prop(struct power_supply *psy, rc = smblib_set_prop_otg_fastroleswap(chg, val); break; default: - pr_err("set prop %d is not supported\n", psp); + pr_debug("set prop %d is not supported\n", psp); rc = -EINVAL; break; } @@ -741,7 +740,7 @@ static int smb2_usb_port_set_prop(struct power_supply *psy, switch (psp) { default: - pr_err_ratelimited("Set prop %d is not supported in pc_port\n", + pr_debug("Set prop %d is not supported in pc_port\n", psp); rc = -EINVAL; break; @@ -2691,7 +2690,7 @@ static int smb2_probe(struct platform_device *pdev) device_init_wakeup(chg->dev, true); - pr_info("QPNP SMB2 probed successfully usb:present=%d type=%d batt:present = %d health = %d charge = %d\n", + pr_debug("QPNP SMB2 probed successfully usb:present=%d type=%d batt:present = %d health = %d charge = %d\n", usb_present, chg->real_charger_type, batt_present, batt_health, batt_charge_type); return rc; diff --git a/drivers/power/supply/qcom/smb5-lib.c b/drivers/power/supply/qcom/smb5-lib.c index 58466b838c88..dd7c43f23732 100644 --- a/drivers/power/supply/qcom/smb5-lib.c +++ b/drivers/power/supply/qcom/smb5-lib.c @@ -889,6 +889,7 @@ int smblib_set_icl_current(struct smb_charger *chg, int icl_ua) smblib_err(chg, "Couldn't set SDP ICL rc=%d\n", rc); goto out; } + goto unsuspend; } else { set_sdp_current(chg, 100000); rc = smblib_set_charge_param(chg, &chg->param.usb_icl, icl_ua); @@ -982,7 +983,7 @@ static int smblib_awake_vote_callback(struct votable *votable, void *data, struct smb_charger *chg = data; if (awake) - pm_stay_awake(chg->dev); + pm_wakeup_event(chg->dev, 500); else pm_relax(chg->dev); diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 75a86202d1d1..415e878c0c5a 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -877,7 +877,7 @@ void pwm_put(struct pwm_device *pwm) mutex_lock(&pwm_lock); if (!test_and_clear_bit(PWMF_REQUESTED, &pwm->flags)) { - pr_warn("PWM device already freed\n"); + pr_debug("PWM device already freed\n"); goto out; } diff --git a/drivers/regulator/qpnp-lcdb-regulator.c b/drivers/regulator/qpnp-lcdb-regulator.c index f0d19128fbda..f27fbc17b63e 100644 --- a/drivers/regulator/qpnp-lcdb-regulator.c +++ b/drivers/regulator/qpnp-lcdb-regulator.c @@ -2182,7 +2182,7 @@ static int __init qpnp_lcdb_regulator_init(void) { return platform_driver_register(&qpnp_lcdb_regulator_driver); } -arch_initcall(qpnp_lcdb_regulator_init); +subsys_initcall(qpnp_lcdb_regulator_init); static void __exit qpnp_lcdb_regulator_exit(void) { diff --git a/drivers/rtc/qpnp-rtc.c b/drivers/rtc/qpnp-rtc.c index 4152086d2d6f..186e73cb4691 100644 --- a/drivers/rtc/qpnp-rtc.c +++ b/drivers/rtc/qpnp-rtc.c @@ -267,12 +267,6 @@ qpnp_rtc_read_time(struct device *dev, struct rtc_time *tm) rtc_time_to_tm(secs, tm); - rc = rtc_valid_tm(tm); - if (rc) { - dev_err(dev, "Invalid time read from RTC\n"); - return rc; - } - dev_dbg(dev, "secs = %lu, h:m:s == %d:%d:%d, d/m/y = %d/%d/%d\n", secs, tm->tm_hour, tm->tm_min, tm->tm_sec, tm->tm_mday, tm->tm_mon, tm->tm_year); diff --git a/drivers/scsi/aic7xxx/aicasm/aicasm.c b/drivers/scsi/aic7xxx/aicasm/aicasm.c index 2e3117aa382f..21ac265280bf 100644 --- a/drivers/scsi/aic7xxx/aicasm/aicasm.c +++ b/drivers/scsi/aic7xxx/aicasm/aicasm.c @@ -254,7 +254,7 @@ main(int argc, char *argv[]) argv += optind; if (argc != 1) { - fprintf(stderr, "%s: No input file specifiled\n", appname); + fprintf(stderr, "%s: No input file specified\n", appname); usage(); /* NOTREACHED */ } diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c index 8bde30aa162c..47ec954cfa6e 100644 --- a/drivers/scsi/ufs/ufs-qcom.c +++ b/drivers/scsi/ufs/ufs-qcom.c @@ -37,8 +37,6 @@ #define MAX_PROP_SIZE 32 #define VDDP_REF_CLK_MIN_UV 1200000 #define VDDP_REF_CLK_MAX_UV 1200000 -/* TODO: further tuning for this parameter may be required */ -#define UFS_QCOM_PM_QOS_UNVOTE_TIMEOUT_US (10000) /* microseconds */ #define UFS_QCOM_DEFAULT_DBG_PRINT_EN \ (UFS_QCOM_DBG_PRINT_REGS_EN | UFS_QCOM_DBG_PRINT_TEST_BUS_EN) @@ -1725,8 +1723,7 @@ static void ufs_qcom_pm_qos_unvote_work(struct work_struct *work) group->state = PM_QOS_UNVOTED; spin_unlock_irqrestore(host->hba->host->host_lock, flags); - pm_qos_update_request_timeout(&group->req, - group->latency_us, UFS_QCOM_PM_QOS_UNVOTE_TIMEOUT_US); + pm_qos_update_request(&group->req, PM_QOS_DEFAULT_VALUE); } static ssize_t ufs_qcom_pm_qos_enable_show(struct device *dev, @@ -2804,6 +2801,7 @@ static struct platform_driver ufs_qcom_pltform = { .name = "ufshcd-qcom", .pm = &ufs_qcom_pm_ops, .of_match_table = of_match_ptr(ufs_qcom_of_match), + .probe_type = PROBE_PREFER_ASYNCHRONOUS, }, }; module_platform_driver(ufs_qcom_pltform); diff --git a/drivers/scsi/ufs/ufshpb.h b/drivers/scsi/ufs/ufshpb.h index 7aad09b65cad..40292b158fe2 100644 --- a/drivers/scsi/ufs/ufshpb.h +++ b/drivers/scsi/ufs/ufshpb.h @@ -390,16 +390,6 @@ struct ufshpb_sysfs_entry { ssize_t (*store)(struct ufshpb_lu *hpb, const char *, size_t); }; -static inline void *kvzalloc(size_t size, gfp_t flags) -{ - void *ret; - - ret = kzalloc(size, flags | __GFP_NOWARN); - if (!ret) - ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); - return ret; -} - struct ufshcd_lrb; void ufshcd_init_hpb(struct ufs_hba *hba); diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile index 7dcd0a22159a..51626813ff25 100644 --- a/drivers/soc/qcom/Makefile +++ b/drivers/soc/qcom/Makefile @@ -117,3 +117,5 @@ subdir-ccflags-y += \ -Wno-enum-conversion \ -Wno-strlcpy-strlcat-size \ -Wno-gcc-compat \ + +subdir-ccflags-y += $(call cc-disable-warning, enum-conversion) diff --git a/drivers/soc/qcom/glink.c b/drivers/soc/qcom/glink.c index b8deec19ff8b..17cb46769835 100644 --- a/drivers/soc/qcom/glink.c +++ b/drivers/soc/qcom/glink.c @@ -320,7 +320,7 @@ struct channel_ctx { static struct glink_core_if core_impl; static void *log_ctx; -static unsigned int glink_debug_mask = QCOM_GLINK_INFO; +static unsigned int glink_debug_mask; module_param_named(debug_mask, glink_debug_mask, uint, S_IRUGO | S_IWUSR | S_IWGRP); @@ -5805,7 +5805,7 @@ static void glink_pm_qos_unvote(struct glink_core_xprt_ctx *xprt_ptr) xprt_ptr->tx_path_activity = false; if (xprt_ptr->qos_req_active) { GLINK_PERF("%s: qos unvote\n", __func__); - schedule_delayed_work(&xprt_ptr->pm_qos_work, + queue_delayed_work(system_power_efficient_wq, &xprt_ptr->pm_qos_work, msecs_to_jiffies(GLINK_PM_QOS_HOLDOFF_MS)); } } diff --git a/drivers/soc/qcom/glink_smem_native_xprt.c b/drivers/soc/qcom/glink_smem_native_xprt.c index 27c0ca9f30a3..c4a889f6942c 100644 --- a/drivers/soc/qcom/glink_smem_native_xprt.c +++ b/drivers/soc/qcom/glink_smem_native_xprt.c @@ -281,7 +281,7 @@ enum { QCOM_GLINK_DEBUG_DISABLE = 1U << 1, }; -static unsigned int glink_xprt_debug_mask = QCOM_GLINK_DEBUG_ENABLE; +static unsigned int glink_xprt_debug_mask; module_param_named(debug_mask, glink_xprt_debug_mask, uint, 0664); diff --git a/drivers/soc/qcom/icnss.c b/drivers/soc/qcom/icnss.c index 1e2f05aa07b4..19e002a982f5 100644 --- a/drivers/soc/qcom/icnss.c +++ b/drivers/soc/qcom/icnss.c @@ -3365,7 +3365,7 @@ int icnss_wlan_enable(struct device *dev, struct icnss_wlan_enable_cfg *config, if (ret) goto out; skip: - ret = wlfw_wlan_mode_send_sync_msg(mode); + ret = wlfw_wlan_mode_send_sync_msg((enum wlfw_driver_mode_enum_v01)mode); out: if (test_bit(SKIP_QMI, &quirks)) ret = 0; @@ -4890,6 +4890,7 @@ static struct platform_driver icnss_driver = { .pm = &icnss_pm_ops, .owner = THIS_MODULE, .of_match_table = icnss_dt_match, + .probe_type = PROBE_PREFER_ASYNCHRONOUS, }, }; diff --git a/drivers/soc/qcom/msm_bus/msm_bus_arb_adhoc.c b/drivers/soc/qcom/msm_bus/msm_bus_arb_adhoc.c index cb215a22791e..b160610d3f44 100644 --- a/drivers/soc/qcom/msm_bus/msm_bus_arb_adhoc.c +++ b/drivers/soc/qcom/msm_bus/msm_bus_arb_adhoc.c @@ -874,7 +874,7 @@ static void unregister_client_adhoc(uint32_t cl) curr = client->curr; if ((curr < 0) || (curr >= pdata->num_usecases)) { - MSM_BUS_ERR("Invalid index Defaulting curr to 0"); + MSM_BUS_DBG("Invalid index Defaulting curr to 0"); curr = 0; } diff --git a/drivers/soc/qcom/msm_bus/msm_bus_arb_rpmh.c b/drivers/soc/qcom/msm_bus/msm_bus_arb_rpmh.c index 8af9b5a95826..c7e8f545d39d 100644 --- a/drivers/soc/qcom/msm_bus/msm_bus_arb_rpmh.c +++ b/drivers/soc/qcom/msm_bus/msm_bus_arb_rpmh.c @@ -1338,8 +1338,7 @@ static uint32_t register_client_adhoc(struct msm_bus_scale_pdata *pdata) return handle; } -static int update_client_paths(struct msm_bus_client *client, bool log_trns, - unsigned int idx) +static int update_client_paths(struct msm_bus_client *client, unsigned int idx) { int lnode, src, dest, cur_idx; uint64_t req_clk, req_bw, curr_clk, curr_bw, slp_clk, slp_bw; @@ -1399,17 +1398,13 @@ static int update_client_paths(struct msm_bus_client *client, bool log_trns, __func__, ret, pdata->active_only); goto exit_update_client_paths; } - - if (log_trns) - getpath_debug(src, lnode, pdata->active_only); } commit_data(); exit_update_client_paths: return ret; } -static int update_client_alc(struct msm_bus_client *client, bool log_trns, - unsigned int idx) +static int update_client_alc(struct msm_bus_client *client, unsigned int idx) { int lnode, cur_idx; uint64_t req_idle_time, req_fal, dual_idle_time, dual_fal, @@ -1588,7 +1583,7 @@ static int update_context(uint32_t cl, bool active_only, pdata->active_only = active_only; msm_bus_dbg_client_data(client->pdata, ctx_idx, cl); - ret = update_client_paths(client, false, ctx_idx); + ret = update_client_paths(client, ctx_idx); if (ret) { pr_err("%s: Err updating path\n", __func__); goto exit_update_context; @@ -1606,8 +1601,6 @@ static int update_request_adhoc(uint32_t cl, unsigned int index) int ret = 0; struct msm_bus_scale_pdata *pdata; struct msm_bus_client *client; - const char *test_cl = "Null"; - bool log_transaction = false; rt_mutex_lock(&msm_bus_adhoc_lock); @@ -1645,17 +1638,14 @@ static int update_request_adhoc(uint32_t cl, unsigned int index) goto exit_update_request; } - if (!strcmp(test_cl, pdata->name)) - log_transaction = true; - MSM_BUS_DBG("%s: cl: %u index: %d curr: %d num_paths: %d\n", __func__, cl, index, client->curr, client->pdata->usecase->num_paths); if (pdata->alc) - ret = update_client_alc(client, log_transaction, index); + ret = update_client_alc(client, index); else { msm_bus_dbg_client_data(client->pdata, index, cl); - ret = update_client_paths(client, log_transaction, index); + ret = update_client_paths(client, index); } if (ret) { pr_err("%s: Err updating path\n", __func__); diff --git a/drivers/soc/qcom/msm_bus/msm_bus_fabric_adhoc.c b/drivers/soc/qcom/msm_bus/msm_bus_fabric_adhoc.c index eb541190563c..299da893a97d 100644 --- a/drivers/soc/qcom/msm_bus/msm_bus_fabric_adhoc.c +++ b/drivers/soc/qcom/msm_bus/msm_bus_fabric_adhoc.c @@ -50,6 +50,7 @@ ssize_t bw_show(struct device *dev, struct device_attribute *attr, bus_node->lnode_list[i].lnode_ab[ACTIVE_CTX], bus_node->lnode_list[i].lnode_ib[DUAL_CTX], bus_node->lnode_list[i].lnode_ab[DUAL_CTX]); +#if defined(CONFIG_TRACING) && defined(DEBUG) trace_printk( "[%d]:%s:Act_IB %llu Act_AB %llu Slp_IB %llu Slp_AB %llu\n", i, bus_node->lnode_list[i].cl_name, @@ -57,6 +58,7 @@ ssize_t bw_show(struct device *dev, struct device_attribute *attr, bus_node->lnode_list[i].lnode_ab[ACTIVE_CTX], bus_node->lnode_list[i].lnode_ib[DUAL_CTX], bus_node->lnode_list[i].lnode_ab[DUAL_CTX]); +#endif } off += scnprintf((buf + off), PAGE_SIZE, "Max_Act_IB %llu Sum_Act_AB %llu Act_Util_fact %d Act_Vrail_comp %d\n", @@ -70,6 +72,7 @@ ssize_t bw_show(struct device *dev, struct device_attribute *attr, bus_node->node_bw[DUAL_CTX].sum_ab, bus_node->node_bw[DUAL_CTX].util_used, bus_node->node_bw[DUAL_CTX].vrail_used); +#if defined(CONFIG_TRACING) && defined(DEBUG) trace_printk( "Max_Act_IB %llu Sum_Act_AB %llu Act_Util_fact %d Act_Vrail_comp %d\n", bus_node->node_bw[ACTIVE_CTX].max_ib, @@ -83,6 +86,7 @@ ssize_t bw_show(struct device *dev, struct device_attribute *attr, bus_node->node_bw[DUAL_CTX].util_used, bus_node->node_bw[DUAL_CTX].vrail_used); return off; +#endif } ssize_t bw_store(struct device *dev, struct device_attribute *attr, diff --git a/drivers/soc/qcom/msm_bus/msm_bus_fabric_rpmh.c b/drivers/soc/qcom/msm_bus/msm_bus_fabric_rpmh.c index 5d3bd751d885..eff6bfc33b85 100644 --- a/drivers/soc/qcom/msm_bus/msm_bus_fabric_rpmh.c +++ b/drivers/soc/qcom/msm_bus/msm_bus_fabric_rpmh.c @@ -979,7 +979,7 @@ static int msm_bus_dev_init_qos(struct device *dev, void *data) goto exit_init_qos; } - if (node_dev->ap_owned) { + if (node_dev->ap_owned && node_dev->num_node_qos_clks) { struct msm_bus_node_device_type *bus_node_info; bus_node_info = diff --git a/drivers/soc/qcom/msm_bus/msm_bus_rules.c b/drivers/soc/qcom/msm_bus/msm_bus_rules.c index 3fec29dfe95a..edc0457df931 100644 --- a/drivers/soc/qcom/msm_bus/msm_bus_rules.c +++ b/drivers/soc/qcom/msm_bus/msm_bus_rules.c @@ -16,7 +16,6 @@ #include #include #include -#include struct node_vote_info { int id; @@ -216,11 +215,6 @@ static void match_rule(struct rule_update_path_info *inp_node, continue; if (check_rule(rule, inp_node)) { - trace_bus_rules_matches( - (node->cur_rule ? - node->cur_rule->rule_id : -1), - inp_node->id, inp_node->ab, - inp_node->ib, inp_node->clk); if (rule->state == RULE_STATE_NOT_APPLIED) rule->state_change = true; diff --git a/drivers/soc/qcom/msm_smem.c b/drivers/soc/qcom/msm_smem.c index b73fd52aabc4..18de29704fab 100644 --- a/drivers/soc/qcom/msm_smem.c +++ b/drivers/soc/qcom/msm_smem.c @@ -1414,14 +1414,14 @@ static int smem_init_target_info(phys_addr_t info_addr, resource_size_t size) return 0; } +#define TEMP_STRING_SIZE 11 /* max 3 digit count */ static int msm_smem_probe(struct platform_device *pdev) { char *key; struct resource *r; phys_addr_t aux_mem_base; resource_size_t aux_mem_size; - int temp_string_size = 11; /* max 3 digit count */ - char temp_string[temp_string_size]; + char temp_string[TEMP_STRING_SIZE]; int ret; struct ramdump_segment *ramdump_segments_tmp = NULL; struct smem_area *smem_areas_tmp = NULL; @@ -1504,7 +1504,7 @@ static int msm_smem_probe(struct platform_device *pdev) num_smem_areas = 1; while (1) { - scnprintf(temp_string, temp_string_size, "aux-mem%d", + scnprintf(temp_string, TEMP_STRING_SIZE, "aux-mem%d", num_smem_areas); r = platform_get_resource_byname(pdev, IORESOURCE_MEM, temp_string); @@ -1544,7 +1544,7 @@ static int msm_smem_probe(struct platform_device *pdev) /* Configure auxiliary SMEM regions */ while (1) { - scnprintf(temp_string, temp_string_size, "aux-mem%d", + scnprintf(temp_string, TEMP_STRING_SIZE, "aux-mem%d", smem_idx); r = platform_get_resource_byname(pdev, IORESOURCE_MEM, temp_string); diff --git a/drivers/soc/qcom/msm_smp2p.c b/drivers/soc/qcom/msm_smp2p.c index 1cd47505ea19..7692c2164474 100644 --- a/drivers/soc/qcom/msm_smp2p.c +++ b/drivers/soc/qcom/msm_smp2p.c @@ -72,7 +72,7 @@ struct smp2p_out_list_item { static struct smp2p_out_list_item out_list[SMP2P_NUM_PROCS]; static void *log_ctx; -static int smp2p_debug_mask = MSM_SMP2P_INFO | MSM_SMP2P_DEBUG; +static int smp2p_debug_mask; module_param_named(debug_mask, smp2p_debug_mask, int, 0664); diff --git a/drivers/soc/qcom/system_pm.c b/drivers/soc/qcom/system_pm.c index 1befdcf32aa6..334be4a34864 100644 --- a/drivers/soc/qcom/system_pm.c +++ b/drivers/soc/qcom/system_pm.c @@ -75,9 +75,10 @@ static int system_sleep_enter(struct cpumask *mask) /** * system_sleep_exit() - Activities done when exiting system low power modes */ -static void system_sleep_exit(void) +static void system_sleep_exit(bool success) { - msm_rpmh_master_stats_update(); + if (success) + msm_rpmh_master_stats_update(); gic_v3_dist_restore(); } diff --git a/drivers/soc/qcom/watchdog_v2.c b/drivers/soc/qcom/watchdog_v2.c index 7f23db999604..c0f7cb599152 100644 --- a/drivers/soc/qcom/watchdog_v2.c +++ b/drivers/soc/qcom/watchdog_v2.c @@ -654,6 +654,9 @@ static void configure_bark_dump(struct msm_watchdog_data *wdog_dd) int cpu; void *cpu_buf; + if (!IS_ENABLED(CONFIG_QCOM_MEMORY_DUMP_V2)) + return; + cpu_data = kzalloc(sizeof(struct msm_dump_data) * num_present_cpus(), GFP_KERNEL); if (!cpu_data) diff --git a/drivers/soc/qcom/wcnss/wcnss_wlan.c b/drivers/soc/qcom/wcnss/wcnss_wlan.c index 08bd78b55364..e539db5c5c8c 100644 --- a/drivers/soc/qcom/wcnss/wcnss_wlan.c +++ b/drivers/soc/qcom/wcnss/wcnss_wlan.c @@ -1317,7 +1317,7 @@ static void wcnss_smd_notify_event(void *data, unsigned int event) schedule_work(&penv->wcnssctrl_version_work); schedule_work(&penv->wcnss_pm_config_work); cancel_delayed_work(&penv->wcnss_pm_qos_del_req); - schedule_delayed_work(&penv->wcnss_pm_qos_del_req, 0); + queue_delayed_work(system_power_efficient_wq, &penv->wcnss_pm_qos_del_req, 0); if (penv->wlan_config.is_pronto_vadc && (penv->vadc_dev)) schedule_work(&penv->wcnss_vadc_work); break; @@ -1928,7 +1928,7 @@ static int wcnss_smd_tx(void *data, int len) ret = smd_write_avail(penv->smd_ch); if (ret < len) { - wcnss_log(ERR, "no space available for smd frame\n"); + //wcnss_log(ERR, "no space available for smd frame\n"); return -ENOSPC; } ret = smd_write(penv->smd_ch, data, len); @@ -2000,7 +2000,7 @@ static void wcnss_notify_vbat(enum qpnp_tm_state state, void *ctx) if (rc) wcnss_log(ERR, "%s: tm setup failed: %d\n", __func__, rc); else - schedule_delayed_work(&penv->vbatt_work, + queue_delayed_work(system_power_efficient_wq, &penv->vbatt_work, msecs_to_jiffies(2000)); mutex_unlock(&penv->vbat_monitor_mutex); @@ -3439,7 +3439,7 @@ static int wcnss_notif_cb(struct notifier_block *this, unsigned long code, } else if ((code == SUBSYS_BEFORE_SHUTDOWN && data && data->crashed) || code == SUBSYS_SOC_RESET) { wcnss_disable_pc_add_req(); - schedule_delayed_work(&penv->wcnss_pm_qos_del_req, + queue_delayed_work(system_power_efficient_wq, &penv->wcnss_pm_qos_del_req, msecs_to_jiffies(WCNSS_PM_QOS_TIMEOUT)); penv->is_shutdown = 1; wcnss_log_debug_regs_on_bite(); @@ -3451,7 +3451,7 @@ static int wcnss_notif_cb(struct notifier_block *this, unsigned long code, wcnss_disable_pc_remove_req(); } else if (code == SUBSYS_BEFORE_SHUTDOWN) { wcnss_disable_pc_add_req(); - schedule_delayed_work(&penv->wcnss_pm_qos_del_req, + queue_delayed_work(system_power_efficient_wq, &penv->wcnss_pm_qos_del_req, msecs_to_jiffies(WCNSS_PM_QOS_TIMEOUT)); penv->is_shutdown = 1; } else if (code == SUBSYS_AFTER_POWERUP) { diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index f05cc7405acc..b019dbca0cd9 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -1530,6 +1530,7 @@ static struct platform_driver spi_geni_driver = { .name = "spi_geni", .pm = &spi_geni_pm_ops, .of_match_table = spi_geni_dt_match, + .probe_type = PROBE_PREFER_ASYNCHRONOUS, }, }; module_platform_driver(spi_geni_driver); diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index f01a03a805a1..49eb1e735a0b 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -571,11 +571,6 @@ static struct ion_handle *__ion_alloc( struct ion_buffer *buffer = NULL; struct ion_heap *heap; int ret; - const unsigned int MAX_DBG_STR_LEN = 64; - char dbg_str[MAX_DBG_STR_LEN]; - unsigned int dbg_str_idx = 0; - - dbg_str[0] = '\0'; /* * For now, we don't want to fault in pages individually since @@ -614,40 +609,15 @@ static struct ion_handle *__ion_alloc( trace_ion_alloc_buffer_fallback(client->name, heap->name, len, heap_id_mask, flags, PTR_ERR(buffer)); - if (dbg_str_idx < MAX_DBG_STR_LEN) { - unsigned int len_left; - int ret_value; - - len_left = MAX_DBG_STR_LEN - dbg_str_idx - 1; - ret_value = snprintf(&dbg_str[dbg_str_idx], - len_left, "%s ", heap->name); - - if (ret_value >= len_left) { - /* overflow */ - dbg_str[MAX_DBG_STR_LEN - 1] = '\0'; - dbg_str_idx = MAX_DBG_STR_LEN; - } else if (ret_value >= 0) { - dbg_str_idx += ret_value; - } else { - /* error */ - dbg_str[MAX_DBG_STR_LEN - 1] = '\0'; - } - } } up_read(&dev->lock); - if (!buffer) { - trace_ion_alloc_buffer_fail(client->name, dbg_str, len, - heap_id_mask, flags, -ENODEV); + if (!buffer) return ERR_PTR(-ENODEV); - } if (IS_ERR(buffer)) { - trace_ion_alloc_buffer_fail(client->name, dbg_str, len, - heap_id_mask, flags, - PTR_ERR(buffer)); - pr_debug("ION is unable to allocate 0x%zx bytes (alignment: 0x%zx) from heap(s) %sfor client %s\n", - len, align, dbg_str, client->name); + pr_debug("ION is unable to allocate 0x%zx bytes (alignment: 0x%zx) from heap(s) for client %s\n", + len, align, client->name); return ERR_CAST(buffer); } diff --git a/drivers/staging/android/ion/ion_cma_heap.c b/drivers/staging/android/ion/ion_cma_heap.c index 55bbaf8ea97b..202430e11c25 100644 --- a/drivers/staging/android/ion/ion_cma_heap.c +++ b/drivers/staging/android/ion/ion_cma_heap.c @@ -103,7 +103,7 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer, DMA_ATTR_FORCE_COHERENT); if (!info->cpu_addr) { - dev_err(dev, "Fail to allocate buffer\n"); + dev_dbg(dev, "Fail to allocate buffer\n"); goto err; } diff --git a/drivers/staging/android/ion/ion_cma_secure_heap.c b/drivers/staging/android/ion/ion_cma_secure_heap.c index 26c13ec79cc6..f290f22d9ac0 100644 --- a/drivers/staging/android/ion/ion_cma_secure_heap.c +++ b/drivers/staging/android/ion/ion_cma_secure_heap.c @@ -484,7 +484,7 @@ static struct ion_secure_cma_buffer_info *__ion_secure_cma_allocate( ret = ion_secure_cma_add_to_pool(sheap, len, false); if (ret) { mutex_unlock(&sheap->alloc_lock); - dev_err(sheap->dev, "Fail to allocate buffer\n"); + dev_dbg(sheap->dev, "Fail to allocate buffer\n"); goto err; } ret = ion_secure_cma_alloc_from_pool(sheap, &info->phys, len); diff --git a/drivers/staging/android/ion/ion_system_heap.c b/drivers/staging/android/ion/ion_system_heap.c index 502ba33e3270..95c9dba9d009 100644 --- a/drivers/staging/android/ion/ion_system_heap.c +++ b/drivers/staging/android/ion/ion_system_heap.c @@ -379,7 +379,7 @@ static int ion_system_heap_allocate(struct ion_heap *heap, if (align > PAGE_SIZE) return -EINVAL; - if (size / PAGE_SIZE > totalram_pages / 2) + if (size / PAGE_SIZE > totalram_pages() / 2) return -ENOMEM; data.size = 0; diff --git a/drivers/staging/android/ion/ion_system_secure_heap.c b/drivers/staging/android/ion/ion_system_secure_heap.c index f913672f8426..151f8f730ed4 100644 --- a/drivers/staging/android/ion/ion_system_secure_heap.c +++ b/drivers/staging/android/ion/ion_system_secure_heap.c @@ -295,8 +295,8 @@ static int __ion_system_secure_heap_resize(struct ion_heap *heap, void *ptr, goto out_free; } list_splice_init(&items, &secure_heap->prefetch_list); - schedule_delayed_work(&secure_heap->prefetch_work, - shrink ? msecs_to_jiffies(SHRINK_DELAY) : 0); + queue_delayed_work(system_unbound_wq, &secure_heap->prefetch_work, + shrink ? msecs_to_jiffies(SHRINK_DELAY) : 0); spin_unlock_irqrestore(&secure_heap->work_lock, flags); return 0; diff --git a/drivers/staging/android/ion/msm/msm_ion.c b/drivers/staging/android/ion/msm/msm_ion.c index 6f58c7e3fc92..fcc4c7ab8f0b 100644 --- a/drivers/staging/android/ion/msm/msm_ion.c +++ b/drivers/staging/android/ion/msm/msm_ion.c @@ -1256,6 +1256,7 @@ static struct platform_driver msm_ion_driver = { .driver = { .name = "ion-msm", .of_match_table = msm_ion_match_table, + .probe_type = PROBE_PREFER_ASYNCHRONOUS, }, }; diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h index 8c75d5075590..a38af2a561ab 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h @@ -42,7 +42,7 @@ #if BITS_PER_LONG == 32 /* limit to lowmem on 32-bit systems */ #define NUM_CACHEPAGES \ - min(totalram_pages, 1UL << (30 - PAGE_SHIFT) * 3 / 4) + min(totalram_pages(), 1UL << (30 - PAGE_SHIFT) * 3 / 4) #else #define NUM_CACHEPAGES totalram_pages #endif diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c index 8b551d2708ba..2d95784b660c 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c @@ -249,7 +249,7 @@ void cfs_print_to_console(struct ptldebug_header *hdr, int mask, int cfs_trace_max_debug_mb(void) { - int total_mb = (totalram_pages >> (20 - PAGE_SHIFT)); + int total_mb = (totalram_pages() >> (20 - PAGE_SHIFT)); return max(512, (total_mb * 80) / 100); } diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h index 722c33f7eecc..9057f715c0b5 100644 --- a/drivers/staging/lustre/lustre/include/obd.h +++ b/drivers/staging/lustre/lustre/include/obd.h @@ -1217,8 +1217,8 @@ static inline void client_adjust_max_dirty(struct client_obd *cli) cli->cl_dirty_max_pages = dirty_max; } - if (cli->cl_dirty_max_pages > totalram_pages / 8) - cli->cl_dirty_max_pages = totalram_pages / 8; + if (cli->cl_dirty_max_pages > totalram_pages() / 8) + cli->cl_dirty_max_pages = totalram_pages() / 8; } #endif /* __OBD_H */ diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c index 153e990c494e..1a2d41d99867 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c @@ -363,11 +363,11 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) if (!strcmp(name, LUSTRE_MDC_NAME)) { cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_DEFAULT; - } else if (totalram_pages >> (20 - PAGE_SHIFT) <= 128 /* MB */) { + } else if (totalram_pages() >> (20 - PAGE_SHIFT) <= 128 /* MB */) { cli->cl_max_rpcs_in_flight = 2; - } else if (totalram_pages >> (20 - PAGE_SHIFT) <= 256 /* MB */) { + } else if (totalram_pages() >> (20 - PAGE_SHIFT) <= 256 /* MB */) { cli->cl_max_rpcs_in_flight = 3; - } else if (totalram_pages >> (20 - PAGE_SHIFT) <= 512 /* MB */) { + } else if (totalram_pages() >> (20 - PAGE_SHIFT) <= 512 /* MB */) { cli->cl_max_rpcs_in_flight = 4; } else { cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_DEFAULT; diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c index 13ec24d44b04..e1da3cf62a61 100644 --- a/drivers/staging/lustre/lustre/llite/lproc_llite.c +++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c @@ -244,9 +244,9 @@ static ssize_t max_read_ahead_mb_store(struct kobject *kobj, pages_number *= 1 << (20 - PAGE_SHIFT); /* MB -> pages */ - if (pages_number > totalram_pages / 2) { + if (pages_number > totalram_pages() / 2) { CERROR("can't set file readahead more than %lu MB\n", - totalram_pages >> (20 - PAGE_SHIFT + 1)); /*1/2 of RAM*/ + totalram_pages() >> (20 - PAGE_SHIFT + 1)); /*1/2 of RAM*/ return -ERANGE; } @@ -411,10 +411,10 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file, return -ERANGE; pages_number = (long)val; - if (pages_number < 0 || pages_number > totalram_pages) { + if (pages_number < 0 || pages_number > totalram_pages()) { CERROR("%s: can't set max cache more than %lu MB\n", ll_get_fsname(sb, NULL, 0), - totalram_pages >> (20 - PAGE_SHIFT)); + totalram_pages() >> (20 - PAGE_SHIFT)); return -ERANGE; } diff --git a/drivers/staging/lustre/lustre/obdclass/class_obd.c b/drivers/staging/lustre/lustre/obdclass/class_obd.c index 76e1ee83a723..ff3f738f01dc 100644 --- a/drivers/staging/lustre/lustre/obdclass/class_obd.c +++ b/drivers/staging/lustre/lustre/obdclass/class_obd.c @@ -477,10 +477,10 @@ static int __init obdclass_init(void) * For clients with less memory, a larger fraction is needed * for other purposes (mostly for BGL). */ - if (totalram_pages <= 512 << (20 - PAGE_SHIFT)) - obd_max_dirty_pages = totalram_pages / 4; + if (totalram_pages() <= 512 << (20 - PAGE_SHIFT)) + obd_max_dirty_pages = totalram_pages() / 4; else - obd_max_dirty_pages = totalram_pages / 2; + obd_max_dirty_pages = totalram_pages() / 2; err = obd_init_caches(); if (err) diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c index e6c785afceba..d7d2ea8c3c40 100644 --- a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c +++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c @@ -112,7 +112,7 @@ static ssize_t max_dirty_mb_store(struct kobject *kobj, struct attribute *attr, val *= 1 << (20 - PAGE_SHIFT); /* convert to pages */ - if (val > ((totalram_pages / 10) * 9)) { + if (val > ((totalram_pages() / 10) * 9)) { /* Somebody wants to assign too much memory to dirty pages */ return -EINVAL; } diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c index 054e567e6c8d..107f4362109c 100644 --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c @@ -884,7 +884,7 @@ static unsigned long lu_htable_order(struct lu_device *top) * * Size of lu_object is (arbitrary) taken as 1K (together with inode). */ - cache_size = totalram_pages; + cache_size = totalram_pages(); #if BITS_PER_LONG == 32 /* limit hashtable size for lowmem systems to low RAM */ diff --git a/drivers/staging/lustre/lustre/osc/lproc_osc.c b/drivers/staging/lustre/lustre/osc/lproc_osc.c index f0062d44ee03..fc3fab479e16 100644 --- a/drivers/staging/lustre/lustre/osc/lproc_osc.c +++ b/drivers/staging/lustre/lustre/osc/lproc_osc.c @@ -163,7 +163,7 @@ static ssize_t max_dirty_mb_store(struct kobject *kobj, if (pages_number <= 0 || pages_number > OSC_MAX_DIRTY_MB_MAX << (20 - PAGE_SHIFT) || - pages_number > totalram_pages / 4) /* 1/4 of RAM */ + pages_number > totalram_pages() / 4) /* 1/4 of RAM */ return -ERANGE; spin_lock(&cli->cl_loi_list_lock); diff --git a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c index 9bad57d65db4..84845c274a2e 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c +++ b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c @@ -306,7 +306,7 @@ ptlrpc_lprocfs_req_history_max_seq_write(struct file *file, * far. */ bufpages = (svc->srv_buf_size + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (val > totalram_pages / (2 * bufpages)) + if (val > totalram_pages() / (2 * bufpages)) return -ERANGE; spin_lock(&svc->srv_lock); diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c index b2cc5ea6cb93..f9095a84fd15 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c @@ -140,7 +140,7 @@ int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v) "low free mark: %lu\n" "max waitqueue depth: %u\n" "max wait time: %ld/%lu\n", - totalram_pages, + totalram_pages(), PAGES_PER_POOL, page_pools.epp_max_pages, page_pools.epp_max_pools, @@ -378,7 +378,7 @@ int sptlrpc_enc_pool_init(void) * maximum capacity is 1/8 of total physical memory. * is the 1/8 a good number? */ - page_pools.epp_max_pages = totalram_pages / 8; + page_pools.epp_max_pages = totalram_pages() / 8; page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages); init_waitqueue_head(&page_pools.epp_waitq); diff --git a/drivers/staging/qca-wifi-host-cmn/qdf/linux/src/i_qdf_defer.h b/drivers/staging/qca-wifi-host-cmn/qdf/linux/src/i_qdf_defer.h index 5a1bcda6218c..ef30f790e84d 100644 --- a/drivers/staging/qca-wifi-host-cmn/qdf/linux/src/i_qdf_defer.h +++ b/drivers/staging/qca-wifi-host-cmn/qdf/linux/src/i_qdf_defer.h @@ -157,7 +157,7 @@ static inline QDF_STATUS __qdf_sched_work(__qdf_work_t *work) static inline QDF_STATUS __qdf_sched_delayed_work(__qdf_delayed_work_t *work, uint32_t delay) { - schedule_delayed_work(work, msecs_to_jiffies(delay)); + queue_delayed_work(system_power_efficient_wq, work, msecs_to_jiffies(delay)); return QDF_STATUS_SUCCESS; } @@ -246,7 +246,7 @@ static inline QDF_STATUS __qdf_sched_work(__qdf_work_t *work) static inline QDF_STATUS __qdf_sched_delayed_work(__qdf_delayed_work_t *work, uint32_t delay) { - schedule_delayed_work(&work->dwork, msecs_to_jiffies(delay)); + queue_delayed_work(system_power_efficient_wq, &work->dwork, msecs_to_jiffies(delay)); return QDF_STATUS_SUCCESS; } diff --git a/drivers/staging/qca-wifi-host-cmn/qdf/linux/src/qdf_lock.c b/drivers/staging/qca-wifi-host-cmn/qdf/linux/src/qdf_lock.c index 85168cdb4b7f..211ed17af801 100644 --- a/drivers/staging/qca-wifi-host-cmn/qdf/linux/src/qdf_lock.c +++ b/drivers/staging/qca-wifi-host-cmn/qdf/linux/src/qdf_lock.c @@ -249,19 +249,10 @@ qdf_export_symbol(qdf_mutex_release); * * Return: Pointer to the name if it is valid or a default string */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)) -const char *qdf_wake_lock_name(qdf_wake_lock_t *lock) -{ - if (lock->name) - return lock->name; - return "UNNAMED_WAKELOCK"; -} -#else const char *qdf_wake_lock_name(qdf_wake_lock_t *lock) { return "NO_WAKELOCK_SUPPORT"; } -#endif qdf_export_symbol(qdf_wake_lock_name); /** @@ -273,18 +264,10 @@ qdf_export_symbol(qdf_wake_lock_name); * QDF status success: if wake lock is initialized * QDF status failure: if wake lock was not initialized */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)) QDF_STATUS qdf_wake_lock_create(qdf_wake_lock_t *lock, const char *name) { - wakeup_source_init(lock, name); return QDF_STATUS_SUCCESS; } -#else -QDF_STATUS qdf_wake_lock_create(qdf_wake_lock_t *lock, const char *name) -{ - return QDF_STATUS_SUCCESS; -} -#endif qdf_export_symbol(qdf_wake_lock_create); /** @@ -296,23 +279,10 @@ qdf_export_symbol(qdf_wake_lock_create); * QDF status success: if wake lock is acquired * QDF status failure: if wake lock was not acquired */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)) -QDF_STATUS qdf_wake_lock_acquire(qdf_wake_lock_t *lock, uint32_t reason) -{ -#ifdef CONFIG_MCL - host_diag_log_wlock(reason, qdf_wake_lock_name(lock), - WIFI_POWER_EVENT_DEFAULT_WAKELOCK_TIMEOUT, - WIFI_POWER_EVENT_WAKELOCK_TAKEN); -#endif - __pm_stay_awake(lock); - return QDF_STATUS_SUCCESS; -} -#else QDF_STATUS qdf_wake_lock_acquire(qdf_wake_lock_t *lock, uint32_t reason) { return QDF_STATUS_SUCCESS; } -#endif qdf_export_symbol(qdf_wake_lock_acquire); /** @@ -324,21 +294,10 @@ qdf_export_symbol(qdf_wake_lock_acquire); * QDF status success: if wake lock is acquired * QDF status failure: if wake lock was not acquired */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)) -QDF_STATUS qdf_wake_lock_timeout_acquire(qdf_wake_lock_t *lock, uint32_t msec) -{ - /* Wakelock for Rx is frequent. - * It is reported only during active debug - */ - __pm_wakeup_event(lock, msec); - return QDF_STATUS_SUCCESS; -} -#else QDF_STATUS qdf_wake_lock_timeout_acquire(qdf_wake_lock_t *lock, uint32_t msec) { return QDF_STATUS_SUCCESS; } -#endif qdf_export_symbol(qdf_wake_lock_timeout_acquire); /** @@ -350,23 +309,10 @@ qdf_export_symbol(qdf_wake_lock_timeout_acquire); * QDF status success: if wake lock is acquired * QDF status failure: if wake lock was not acquired */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)) QDF_STATUS qdf_wake_lock_release(qdf_wake_lock_t *lock, uint32_t reason) { -#ifdef CONFIG_MCL - host_diag_log_wlock(reason, qdf_wake_lock_name(lock), - WIFI_POWER_EVENT_DEFAULT_WAKELOCK_TIMEOUT, - WIFI_POWER_EVENT_WAKELOCK_RELEASED); -#endif - __pm_relax(lock); return QDF_STATUS_SUCCESS; } -#else -QDF_STATUS qdf_wake_lock_release(qdf_wake_lock_t *lock, uint32_t reason) -{ - return QDF_STATUS_SUCCESS; -} -#endif qdf_export_symbol(qdf_wake_lock_release); /** @@ -377,18 +323,10 @@ qdf_export_symbol(qdf_wake_lock_release); * QDF status success: if wake lock is acquired * QDF status failure: if wake lock was not acquired */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)) -QDF_STATUS qdf_wake_lock_destroy(qdf_wake_lock_t *lock) -{ - wakeup_source_trash(lock); - return QDF_STATUS_SUCCESS; -} -#else QDF_STATUS qdf_wake_lock_destroy(qdf_wake_lock_t *lock) { return QDF_STATUS_SUCCESS; } -#endif qdf_export_symbol(qdf_wake_lock_destroy); #ifdef CONFIG_MCL diff --git a/drivers/staging/qca-wifi-host-cmn/wmi/src/wmi_unified_tlv.c b/drivers/staging/qca-wifi-host-cmn/wmi/src/wmi_unified_tlv.c index a0aa50697509..7c00308fb496 100644 --- a/drivers/staging/qca-wifi-host-cmn/wmi/src/wmi_unified_tlv.c +++ b/drivers/staging/qca-wifi-host-cmn/wmi/src/wmi_unified_tlv.c @@ -12288,7 +12288,7 @@ QDF_STATUS send_set_arp_stats_req_cmd_tlv(wmi_unified_t wmi_handle, goto error; } - WMI_LOGI(FL("set arp stats flag=%d, vdev=%d"), + WMI_LOGD(FL("set arp stats flag=%d, vdev=%d"), req_buf->flag, req_buf->vdev_id); return QDF_STATUS_SUCCESS; error: diff --git a/drivers/staging/qcacld-3.0/Kbuild b/drivers/staging/qcacld-3.0/Kbuild index 93a22ffbf3bd..179c1d3f5fd0 100644 --- a/drivers/staging/qcacld-3.0/Kbuild +++ b/drivers/staging/qcacld-3.0/Kbuild @@ -172,7 +172,7 @@ WLAN_OPEN_SOURCE := 1 CONFIG_LINUX_QCMBR :=y endif - CONFIG_MPC_UT_FRAMEWORK := y + CONFIG_MPC_UT_FRAMEWORK := n #Flag to enable offload packets feature CONFIG_WLAN_OFFLOAD_PACKETS := y @@ -258,7 +258,7 @@ CONFIG_WLAN_SPECTRAL_SCAN := y CONFIG_WLAN_FEATURE_APF := y #Enable WLAN/Power debugfs feature only if debug_fs is enabled -ifeq ($(CONFIG_DEBUG_FS), y) +ifeq ($(CONFIG_ANDROID), y) # Flag to enable debugfs. Depends on CONFIG_DEBUG_FS in kernel # configuration. CONFIG_WLAN_DEBUGFS := y @@ -276,6 +276,9 @@ endif # Feature flags which are not (currently) configurable via Kconfig +# Force user build +TARGET_BUILD_VARIANT := user + #Whether to build debug version BUILD_DEBUG_VERSION := 1 @@ -474,6 +477,7 @@ HDD_OBJS += $(HDD_SRC_DIR)/wlan_hdd_debugfs_csr.o HDD_OBJS += $(HDD_SRC_DIR)/wlan_hdd_debugfs_connect.o HDD_OBJS += $(HDD_SRC_DIR)/wlan_hdd_debugfs_offload.o HDD_OBJS += $(HDD_SRC_DIR)/wlan_hdd_debugfs_roam.o +CFLAGS_wlan_hdd_debugfs.o += -DCONFIG_DEBUG_FS endif ifeq ($(CONFIG_WLAN_FEATURE_DSRC), y) @@ -1253,9 +1257,7 @@ CDEFINES := -DANI_LITTLE_BYTE_ENDIAN \ -DWLAN_AP_STA_CONCURRENCY \ -DFEATURE_WLAN_SCAN_PNO \ -DWLAN_FEATURE_PACKET_FILTERING \ - -DWLAN_FEATURE_P2P_DEBUG \ -DWLAN_ENABLE_AGEIE_ON_SCAN_RESULTS \ - -DWLANTL_DEBUG\ -DWLAN_NS_OFFLOAD \ -DWLAN_SOFTAP_VSTA_FEATURE \ -DWLAN_FEATURE_GTK_OFFLOAD \ @@ -1779,10 +1781,6 @@ ifeq (y,$(filter y,$(CONFIG_CNSS_EOS) $(CONFIG_ICNSS))) CONFIG_HELIUMPLUS := y CONFIG_64BIT_PADDR := y CONFIG_FEATURE_TSO := y -CONFIG_FEATURE_TSO_DEBUG := y -ifneq ($(CONFIG_FORCE_ALLOC_FROM_DMA_ZONE), y) -CONFIG_ENABLE_DEBUG_ADDRESS_MARKING := y -endif ifeq ($(CONFIG_HELIUMPLUS),y) CDEFINES += -DHELIUMPLUS CDEFINES += -DAR900B diff --git a/drivers/staging/qcacld-3.0/core/hdd/inc/wlan_hdd_cfg.h b/drivers/staging/qcacld-3.0/core/hdd/inc/wlan_hdd_cfg.h index dd2dba778f6a..17f98b211209 100644 --- a/drivers/staging/qcacld-3.0/core/hdd/inc/wlan_hdd_cfg.h +++ b/drivers/staging/qcacld-3.0/core/hdd/inc/wlan_hdd_cfg.h @@ -10337,12 +10337,6 @@ enum restart_beaconing_on_ch_avoid_rule { #define CFG_CRASH_FW_TIMEOUT_ENABLE (1) #define CFG_CRASH_FW_TIMEOUT_DEFAULT (1) -/* Hold wakelock for unicast RX packets for the specified duration */ -#define CFG_RX_WAKELOCK_TIMEOUT_NAME "rx_wakelock_timeout" -#define CFG_RX_WAKELOCK_TIMEOUT_DEFAULT (50) -#define CFG_RX_WAKELOCK_TIMEOUT_MIN (0) -#define CFG_RX_WAKELOCK_TIMEOUT_MAX (100) - /* * * enable_5g_band_pref - Enable preference for 5G from INI. @@ -15140,7 +15134,6 @@ struct hdd_config { /* beacon count before channel switch */ uint8_t sap_chanswitch_beacon_cnt; uint8_t sap_chanswitch_mode; - uint32_t rx_wakelock_timeout; #ifdef WLAN_FEATURE_WOW_PULSE bool wow_pulse_support; uint8_t wow_pulse_pin; diff --git a/drivers/staging/qcacld-3.0/core/hdd/inc/wlan_hdd_main.h b/drivers/staging/qcacld-3.0/core/hdd/inc/wlan_hdd_main.h index 99d044d42a38..b926e27d0ede 100644 --- a/drivers/staging/qcacld-3.0/core/hdd/inc/wlan_hdd_main.h +++ b/drivers/staging/qcacld-3.0/core/hdd/inc/wlan_hdd_main.h @@ -1945,7 +1945,6 @@ struct hdd_context_s { /** P2P Device MAC Address for the adapter */ struct qdf_mac_addr p2pDeviceAddress; - qdf_wake_lock_t rx_wake_lock; qdf_wake_lock_t sap_wake_lock; #ifdef FEATURE_WLAN_TDLS diff --git a/drivers/staging/qcacld-3.0/core/hdd/src/wlan_hdd_cfg.c b/drivers/staging/qcacld-3.0/core/hdd/src/wlan_hdd_cfg.c index ab3d0735e31f..77c3793c3b0a 100644 --- a/drivers/staging/qcacld-3.0/core/hdd/src/wlan_hdd_cfg.c +++ b/drivers/staging/qcacld-3.0/core/hdd/src/wlan_hdd_cfg.c @@ -4505,12 +4505,6 @@ struct reg_table_entry g_registry_table[] = { CFG_CRASH_FW_TIMEOUT_DEFAULT, CFG_CRASH_FW_TIMEOUT_DISABLE, CFG_CRASH_FW_TIMEOUT_ENABLE), - REG_VARIABLE(CFG_RX_WAKELOCK_TIMEOUT_NAME, WLAN_PARAM_Integer, - struct hdd_config, rx_wakelock_timeout, - VAR_FLAGS_OPTIONAL | VAR_FLAGS_RANGE_CHECK_ASSUME_DEFAULT, - CFG_RX_WAKELOCK_TIMEOUT_DEFAULT, - CFG_RX_WAKELOCK_TIMEOUT_MIN, - CFG_RX_WAKELOCK_TIMEOUT_MAX), REG_VARIABLE(CFG_SAP_CH_SWITCH_BEACON_CNT, WLAN_PARAM_Integer, struct hdd_config, sap_chanswitch_beacon_cnt, diff --git a/drivers/staging/qcacld-3.0/core/hdd/src/wlan_hdd_cfg80211.c b/drivers/staging/qcacld-3.0/core/hdd/src/wlan_hdd_cfg80211.c index ce7b21953c85..1b91b1b91b8f 100644 --- a/drivers/staging/qcacld-3.0/core/hdd/src/wlan_hdd_cfg80211.c +++ b/drivers/staging/qcacld-3.0/core/hdd/src/wlan_hdd_cfg80211.c @@ -11314,7 +11314,7 @@ static int __wlan_hdd_cfg80211_set_nud_stats(struct wiphy *wiphy, } } - QDF_TRACE(QDF_MODULE_ID_HDD, QDF_TRACE_LEVEL_INFO, + QDF_TRACE(QDF_MODULE_ID_HDD, QDF_TRACE_LEVEL_DEBUG, "%s STATS_SET_START Received flag %d!!", __func__, arp_stats_params.flag); diff --git a/drivers/staging/qcacld-3.0/core/hdd/src/wlan_hdd_main.c b/drivers/staging/qcacld-3.0/core/hdd/src/wlan_hdd_main.c index e76b17f73e17..2c1ec48782ab 100644 --- a/drivers/staging/qcacld-3.0/core/hdd/src/wlan_hdd_main.c +++ b/drivers/staging/qcacld-3.0/core/hdd/src/wlan_hdd_main.c @@ -6269,32 +6269,6 @@ static int hdd_init_netlink_services(hdd_context_t *hdd_ctx) return ret; } -/** - * hdd_rx_wake_lock_destroy() - Destroy RX wakelock - * @hdd_ctx: HDD context. - * - * Destroy RX wakelock. - * - * Return: None. - */ -static void hdd_rx_wake_lock_destroy(hdd_context_t *hdd_ctx) -{ - qdf_wake_lock_destroy(&hdd_ctx->rx_wake_lock); -} - -/** - * hdd_rx_wake_lock_create() - Create RX wakelock - * @hdd_ctx: HDD context. - * - * Create RX wakelock. - * - * Return: None. - */ -static void hdd_rx_wake_lock_create(hdd_context_t *hdd_ctx) -{ - qdf_wake_lock_create(&hdd_ctx->rx_wake_lock, "qcom_rx_wakelock"); -} - /** * hdd_roc_context_init() - Init ROC context * @hdd_ctx: HDD context. @@ -6349,8 +6323,6 @@ static int hdd_context_deinit(hdd_context_t *hdd_ctx) hdd_sap_context_destroy(hdd_ctx); - hdd_rx_wake_lock_destroy(hdd_ctx); - hdd_tdls_context_destroy(hdd_ctx); hdd_scan_context_destroy(hdd_ctx); @@ -6685,7 +6657,7 @@ QDF_STATUS hdd_post_cds_enable_config(hdd_context_t *hdd_ctx) return QDF_STATUS_SUCCESS; } -hdd_adapter_t *hdd_get_first_valid_adapter() +hdd_adapter_t *hdd_get_first_valid_adapter(void) { hdd_adapter_list_node_t *adapterNode = NULL, *pNext = NULL; hdd_adapter_t *adapter; @@ -8567,8 +8539,6 @@ static int hdd_context_init(hdd_context_t *hdd_ctx) hdd_tdls_context_init(hdd_ctx, false); - hdd_rx_wake_lock_create(hdd_ctx); - ret = hdd_sap_context_init(hdd_ctx); if (ret) goto scan_destroy; @@ -8599,7 +8569,6 @@ static int hdd_context_init(hdd_context_t *hdd_ctx) scan_destroy: hdd_scan_context_destroy(hdd_ctx); - hdd_rx_wake_lock_destroy(hdd_ctx); hdd_tdls_context_destroy(hdd_ctx); list_destroy: diff --git a/drivers/staging/qcacld-3.0/core/hdd/src/wlan_hdd_softap_tx_rx.c b/drivers/staging/qcacld-3.0/core/hdd/src/wlan_hdd_softap_tx_rx.c index cbcae60029f3..ca8824ab7c31 100644 --- a/drivers/staging/qcacld-3.0/core/hdd/src/wlan_hdd_softap_tx_rx.c +++ b/drivers/staging/qcacld-3.0/core/hdd/src/wlan_hdd_softap_tx_rx.c @@ -940,18 +940,6 @@ QDF_STATUS hdd_softap_rx_packet_cbk(void *context, qdf_nbuf_t rxBuf) skb->protocol = eth_type_trans(skb, skb->dev); - /* hold configurable wakelock for unicast traffic */ - if (pHddCtx->config->rx_wakelock_timeout && - skb->pkt_type != PACKET_BROADCAST && - skb->pkt_type != PACKET_MULTICAST) { - cds_host_diag_log_work(&pHddCtx->rx_wake_lock, - pHddCtx->config->rx_wakelock_timeout, - WIFI_POWER_EVENT_WAKELOCK_HOLD_RX); - qdf_wake_lock_timeout_acquire(&pHddCtx->rx_wake_lock, - pHddCtx->config-> - rx_wakelock_timeout); - } - /* Remove SKB from internal tracking table before submitting * it to stack */ diff --git a/drivers/staging/qcacld-3.0/core/hdd/src/wlan_hdd_tx_rx.c b/drivers/staging/qcacld-3.0/core/hdd/src/wlan_hdd_tx_rx.c index 3ff0ad2d8564..9e1b84777796 100644 --- a/drivers/staging/qcacld-3.0/core/hdd/src/wlan_hdd_tx_rx.c +++ b/drivers/staging/qcacld-3.0/core/hdd/src/wlan_hdd_tx_rx.c @@ -1494,66 +1494,6 @@ static bool hdd_is_duplicate_ip_arp(struct sk_buff *skb) return false; } -/** - * hdd_is_arp_local() - check if local or non local arp - * @skb: pointer to sk_buff - * - * Return: true if local arp or false otherwise. - */ -static bool hdd_is_arp_local(struct sk_buff *skb) -{ - struct arphdr *arp; - struct in_ifaddr **ifap = NULL; - struct in_ifaddr *ifa = NULL; - struct in_device *in_dev; - unsigned char *arp_ptr; - __be32 tip; - - arp = (struct arphdr *)skb->data; - if (arp->ar_op == htons(ARPOP_REQUEST)) { - in_dev = __in_dev_get_rtnl(skb->dev); - if (in_dev) { - for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; - ifap = &ifa->ifa_next) { - if (!strcmp(skb->dev->name, ifa->ifa_label)) - break; - } - } - - if (ifa && ifa->ifa_local) { - arp_ptr = (unsigned char *)(arp + 1); - arp_ptr += (skb->dev->addr_len + 4 + - skb->dev->addr_len); - memcpy(&tip, arp_ptr, 4); - hdd_debug("ARP packet: local IP: %x dest IP: %x", - ifa->ifa_local, tip); - if (ifa->ifa_local == tip) - return true; - } - } - - return false; -} - -/** - * hdd_is_rx_wake_lock_needed() - check if wake lock is needed - * @skb: pointer to sk_buff - * - * RX wake lock is needed for: - * 1) Unicast data packet OR - * 2) Local ARP data packet - * - * Return: true if wake lock is needed or false otherwise. - */ -static bool hdd_is_rx_wake_lock_needed(struct sk_buff *skb) -{ - if ((skb->pkt_type != PACKET_BROADCAST && - skb->pkt_type != PACKET_MULTICAST) || hdd_is_arp_local(skb)) - return true; - - return false; -} - #ifdef WLAN_FEATURE_TSF_PLUS static inline void hdd_tsf_timestamp_rx(hdd_context_t *hdd_ctx, qdf_nbuf_t netbuf, @@ -1896,7 +1836,6 @@ QDF_STATUS hdd_rx_packet_cbk(void *context, qdf_nbuf_t rxBuf) struct sk_buff *skb = NULL; hdd_station_ctx_t *pHddStaCtx = NULL; unsigned int cpu_index; - bool wake_lock = false; bool is_arp = false; bool track_arp = false; uint8_t pkt_type = 0; @@ -1993,20 +1932,6 @@ QDF_STATUS hdd_rx_packet_cbk(void *context, qdf_nbuf_t rxBuf) return QDF_STATUS_SUCCESS; } - /* hold configurable wakelock for unicast traffic */ - if (pHddCtx->config->rx_wakelock_timeout && - pHddStaCtx->conn_info.uIsAuthenticated) - wake_lock = hdd_is_rx_wake_lock_needed(skb); - - if (wake_lock) { - cds_host_diag_log_work(&pHddCtx->rx_wake_lock, - pHddCtx->config->rx_wakelock_timeout, - WIFI_POWER_EVENT_WAKELOCK_HOLD_RX); - qdf_wake_lock_timeout_acquire(&pHddCtx->rx_wake_lock, - pHddCtx->config-> - rx_wakelock_timeout); - } - /* Remove SKB from internal tracking table before submitting * it to stack */ diff --git a/drivers/staging/qcacld-3.0/core/mac/src/include/parser_api.h b/drivers/staging/qcacld-3.0/core/mac/src/include/parser_api.h index 8affc7789f8c..b75d521e5a5c 100644 --- a/drivers/staging/qcacld-3.0/core/mac/src/include/parser_api.h +++ b/drivers/staging/qcacld-3.0/core/mac/src/include/parser_api.h @@ -1108,7 +1108,7 @@ tSirRetStatus populate_dot11f_rrm_ie(tpAniSirGlobal pMac, tpPESession psessionEntry); void populate_mdie(tpAniSirGlobal pMac, - tDot11fIEMobilityDomain * pDot11f, uint8_t mdie[]); + tDot11fIEMobilityDomain * pDot11f, uint8_t mdie[SIR_MDIE_SIZE]); void populate_ft_info(tpAniSirGlobal pMac, tDot11fIEFTInfo *pDot11f); void populate_dot11f_assoc_rsp_rates(tpAniSirGlobal pMac, diff --git a/drivers/staging/qcacld-3.0/core/sme/src/csr/csr_api_roam.c b/drivers/staging/qcacld-3.0/core/sme/src/csr/csr_api_roam.c index 51ff2f868b1c..ad2d7b9208cf 100644 --- a/drivers/staging/qcacld-3.0/core/sme/src/csr/csr_api_roam.c +++ b/drivers/staging/qcacld-3.0/core/sme/src/csr/csr_api_roam.c @@ -4483,8 +4483,8 @@ QDF_STATUS csr_roam_prepare_bss_config(tpAniSirGlobal pMac, if (((pBssConfig->uCfgDot11Mode == eCSR_CFG_DOT11_MODE_11N) || (pBssConfig->uCfgDot11Mode == eCSR_CFG_DOT11_MODE_11AC)) && ((pBssConfig->qosType != eCSR_MEDIUM_ACCESS_WMM_eDCF_DSCP) - || (pBssConfig->qosType != eCSR_MEDIUM_ACCESS_11e_HCF) - || (pBssConfig->qosType != eCSR_MEDIUM_ACCESS_11e_eDCF))) { + && (pBssConfig->qosType != eCSR_MEDIUM_ACCESS_11e_HCF) + && (pBssConfig->qosType != eCSR_MEDIUM_ACCESS_11e_eDCF))) { /* Joining BSS is 11n capable and WMM is disabled on AP. */ /* Assume all HT AP's are QOS AP's and enable WMM */ pBssConfig->qosType = eCSR_MEDIUM_ACCESS_WMM_eDCF_DSCP; diff --git a/drivers/staging/qcacld-3.0/core/wma/src/wma_utils.c b/drivers/staging/qcacld-3.0/core/wma/src/wma_utils.c index d46b2a34f410..7a33a4081056 100644 --- a/drivers/staging/qcacld-3.0/core/wma/src/wma_utils.c +++ b/drivers/staging/qcacld-3.0/core/wma/src/wma_utils.c @@ -1280,7 +1280,7 @@ static int wma_unified_link_peer_stats_event_handler(void *handle, size_t peer_info_size, peer_stats_size, rate_stats_size; size_t link_stats_results_size; bool excess_data = false; - uint32_t buf_len; + uint32_t buf_len = 0; tpAniSirGlobal pMac = cds_get_context(QDF_MODULE_ID_PE); diff --git a/drivers/thermal/qcom/Makefile b/drivers/thermal/qcom/Makefile index 83872854e4a4..39d56b6d9549 100644 --- a/drivers/thermal/qcom/Makefile +++ b/drivers/thermal/qcom/Makefile @@ -1,7 +1,7 @@ obj-$(CONFIG_QCOM_TSENS) += qcom_tsens.o qcom_tsens-y += tsens.o tsens-common.o tsens-8916.o tsens-8974.o tsens-8960.o tsens-8996.o obj-$(CONFIG_MSM_BCL_PERIPHERAL_CTL) += bcl_peripheral.o -obj-$(CONFIG_QTI_THERMAL_LIMITS_DCVS) += msm_lmh_dcvs.o lmh_dbg.o +obj-y += msm_lmh_dcvs.o lmh_dbg.o obj-$(CONFIG_QTI_VIRTUAL_SENSOR) += qti_virtual_sensor.o obj-$(CONFIG_QTI_AOP_REG_COOLING_DEVICE) += regulator_aop_cdev.o obj-$(CONFIG_REGULATOR_COOLING_DEVICE) += regulator_cdev.o diff --git a/drivers/thermal/qcom/msm_lmh_dcvs.c b/drivers/thermal/qcom/msm_lmh_dcvs.c index 1b00aebe99a9..5a208733b12b 100644 --- a/drivers/thermal/qcom/msm_lmh_dcvs.c +++ b/drivers/thermal/qcom/msm_lmh_dcvs.c @@ -91,6 +91,8 @@ struct __limits_cdev_data { u32 min_freq; }; +static bool lmh_enabled = false; + struct limits_dcvs_hw { char sensor_name[THERMAL_NAME_LENGTH]; uint32_t affinity; @@ -346,6 +348,9 @@ static int enable_lmh(void) int ret = 0; struct scm_desc desc_arg; + if (lmh_enabled) + return 0; + desc_arg.args[0] = 1; desc_arg.arginfo = SCM_ARGS(1, SCM_VAL); ret = scm_call2(SCM_SIP_FNID(SCM_SVC_LMH, LIMITS_PROFILE_CHANGE), @@ -355,6 +360,8 @@ static int enable_lmh(void) return ret; } + lmh_enabled = true; + return ret; } @@ -595,6 +602,13 @@ static int limits_dcvs_probe(struct platform_device *pdev) if (ret) return ret; + if (!IS_ENABLED(CONFIG_QTI_THERMAL_LIMITS_DCVS)) { + limits_isens_vref_ldo_init(pdev, hw); + devm_kfree(&pdev->dev, hw->cdev_data); + devm_kfree(&pdev->dev, hw); + return 0; + } + /* * Setup virtual thermal zones for each LMH-DCVS hardware * The sensor does not do actual thermal temperature readings diff --git a/drivers/thermal/qpnp-temp-alarm.c b/drivers/thermal/qpnp-temp-alarm.c index 1e3d76c0777b..987adfa2af49 100644 --- a/drivers/thermal/qpnp-temp-alarm.c +++ b/drivers/thermal/qpnp-temp-alarm.c @@ -347,7 +347,7 @@ static irqreturn_t qpnp_tm_isr(int irq, void *data) { struct qpnp_tm_chip *chip = data; - schedule_delayed_work(&chip->irq_work, + queue_delayed_work(system_power_efficient_wq, &chip->irq_work, msecs_to_jiffies(STATUS_REGISTER_DELAY_MS) + 1); return IRQ_HANDLED; diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c index 4bbb47aab15d..4b40e8314f3b 100644 --- a/drivers/thermal/step_wise.c +++ b/drivers/thermal/step_wise.c @@ -53,6 +53,12 @@ static unsigned long get_target_state(struct thermal_instance *instance, unsigned long cur_state; unsigned long next_target; + /* + * If the throttle condition is not reached and there is no + * previous mitigaiton request, then there is nothing to compute. + */ + if (!throttle && instance->target == THERMAL_NO_TARGET) + return THERMAL_NO_TARGET; /* * We keep this instance the way it is by default. * Otherwise, we use the current state of the diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 02cf835132de..40df4792649d 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -405,10 +405,12 @@ static void thermal_zone_device_set_polling(struct workqueue_struct *queue, int delay) { if (delay > 1000) - mod_delayed_work(queue, &tz->poll_queue, + mod_delayed_work(system_freezable_power_efficient_wq, + &tz->poll_queue, round_jiffies(msecs_to_jiffies(delay))); else if (delay) - mod_delayed_work(queue, &tz->poll_queue, + mod_delayed_work(system_freezable_power_efficient_wq, + &tz->poll_queue, msecs_to_jiffies(delay)); else cancel_delayed_work(&tz->poll_queue); @@ -604,7 +606,7 @@ static void update_temperature(struct thermal_zone_device *tz) ret = thermal_zone_get_temp(tz, &temp); if (ret) { if (ret != -EAGAIN) - dev_warn(&tz->device, + dev_dbg(&tz->device, "failed to read out thermal zone (%d)\n", ret); return; @@ -2710,8 +2712,7 @@ static int __init thermal_init(void) int result; thermal_passive_wq = alloc_workqueue("thermal_passive_wq", - WQ_HIGHPRI | WQ_UNBOUND - | WQ_FREEZABLE, + WQ_UNBOUND | WQ_FREEZABLE, THERMAL_MAX_ACTIVE); if (!thermal_passive_wq) { result = -ENOMEM; diff --git a/drivers/tty/serial/msm_serial_hs.c b/drivers/tty/serial/msm_serial_hs.c index d099b32d40cb..1d16e994c210 100644 --- a/drivers/tty/serial/msm_serial_hs.c +++ b/drivers/tty/serial/msm_serial_hs.c @@ -2858,25 +2858,25 @@ struct msm_serial_hs_platform_data pdata->uart_tx_gpio = of_get_named_gpio(node, "qcom,tx-gpio", 0); if (pdata->uart_tx_gpio < 0) - pr_err("uart_tx_gpio is not available\n"); + pr_debug("uart_tx_gpio is not available\n"); /* UART RX GPIO */ pdata->uart_rx_gpio = of_get_named_gpio(node, "qcom,rx-gpio", 0); if (pdata->uart_rx_gpio < 0) - pr_err("uart_rx_gpio is not available\n"); + pr_debug("uart_rx_gpio is not available\n"); /* UART CTS GPIO */ pdata->uart_cts_gpio = of_get_named_gpio(node, "qcom,cts-gpio", 0); if (pdata->uart_cts_gpio < 0) - pr_err("uart_cts_gpio is not available\n"); + pr_debug("uart_cts_gpio is not available\n"); /* UART RFR GPIO */ pdata->uart_rfr_gpio = of_get_named_gpio(node, "qcom,rfr-gpio", 0); if (pdata->uart_rfr_gpio < 0) - pr_err("uart_rfr_gpio is not available\n"); + pr_debug("uart_rfr_gpio is not available\n"); pdata->no_suspend_delay = of_property_read_bool(node, "qcom,no-suspend-delay"); diff --git a/drivers/usb/dwc3/debug.c b/drivers/usb/dwc3/debug.c index fc2644067bbf..487b4e573563 100644 --- a/drivers/usb/dwc3/debug.c +++ b/drivers/usb/dwc3/debug.c @@ -19,9 +19,9 @@ #include -static unsigned int ep_addr_rxdbg_mask = 1; +static unsigned int ep_addr_rxdbg_mask; module_param(ep_addr_rxdbg_mask, uint, 0644); -static unsigned int ep_addr_txdbg_mask = 1; +static unsigned int ep_addr_txdbg_mask; module_param(ep_addr_txdbg_mask, uint, 0644); void dwc3_trace(void (*trace)(struct va_format *), const char *fmt, ...) diff --git a/drivers/usb/dwc3/dwc3-msm.c b/drivers/usb/dwc3/dwc3-msm.c index 3f409c481b00..c3c297a4f2eb 100644 --- a/drivers/usb/dwc3/dwc3-msm.c +++ b/drivers/usb/dwc3/dwc3-msm.c @@ -3669,19 +3669,13 @@ static int dwc3_msm_probe(struct platform_device *pdev) goto uninit_iommu; } - ret = of_platform_populate(node, NULL, NULL, &pdev->dev); - if (ret) { - dev_err(&pdev->dev, - "failed to add create dwc3 core\n"); - of_node_put(dwc3_node); - goto uninit_iommu; - } + of_platform_device_create(dwc3_node, NULL, &pdev->dev); mdwc->dwc3 = of_find_device_by_node(dwc3_node); of_node_put(dwc3_node); if (!mdwc->dwc3) { dev_err(&pdev->dev, "failed to get dwc3 platform device\n"); - goto put_dwc3; + goto uninit_iommu; } mdwc->hs_phy = devm_usb_get_phy_by_phandle(&mdwc->dwc3->dev, @@ -3689,14 +3683,14 @@ static int dwc3_msm_probe(struct platform_device *pdev) if (IS_ERR(mdwc->hs_phy)) { dev_err(&pdev->dev, "unable to get hsphy device\n"); ret = PTR_ERR(mdwc->hs_phy); - goto put_dwc3; + goto uninit_iommu; } mdwc->ss_phy = devm_usb_get_phy_by_phandle(&mdwc->dwc3->dev, "usb-phy", 1); if (IS_ERR(mdwc->ss_phy)) { dev_err(&pdev->dev, "unable to get ssphy device\n"); ret = PTR_ERR(mdwc->ss_phy); - goto put_dwc3; + goto uninit_iommu; } mdwc->bus_scale_table = msm_bus_cl_get_pdata(pdev); @@ -3825,7 +3819,6 @@ static int dwc3_msm_probe(struct platform_device *pdev) arm_iommu_detach_device(mdwc->dev); arm_iommu_release_mapping(mdwc->iommu_map); } - of_platform_depopulate(&pdev->dev); err: destroy_workqueue(mdwc->dwc3_wq); return ret; diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index f2c80e6379aa..0f1d379b06aa 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3338,7 +3338,7 @@ static void dwc3_gadget_conndone_interrupt(struct dwc3 *dwc) */ WARN_ONCE(dwc->revision < DWC3_REVISION_240A && dwc->has_lpm_erratum, - "LPM Erratum not available on dwc3 revisisions < 2.40a\n"); + "LPM Erratum not available on dwc3 revisions < 2.40a\n"); if (dwc->has_lpm_erratum && dwc->revision >= DWC3_REVISION_240A) reg |= DWC3_DCTL_LPM_ERRATA(dwc->lpm_nyet_threshold); diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c index a2bb276a8b24..e6f0c9403f3f 100644 --- a/drivers/video/fbdev/core/cfbimgblt.c +++ b/drivers/video/fbdev/core/cfbimgblt.c @@ -260,6 +260,133 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * dst1 += p->fix.line_length; s += spitch; } +} + +/* + * Optimized fast_imageblit for bpp == 16. ppw = 2, bit_mask = 3 folded + * into the code, main loop unrolled. + */ + +static inline void fast_imageblit16(const struct fb_image *image, + struct fb_info *p, u8 __iomem * dst1, + u32 fgcolor, u32 bgcolor) +{ + u32 fgx = fgcolor, bgx = bgcolor; + u32 spitch = (image->width + 7) / 8; + u32 end_mask, eorx; + const char *s = image->data, *src; + u32 __iomem *dst; + const u32 *tab = NULL; + int i, j, k; + + tab = fb_be_math(p) ? cfb_tab16_be : cfb_tab16_le; + + fgx <<= 16; + bgx <<= 16; + fgx |= fgcolor; + bgx |= bgcolor; + + eorx = fgx ^ bgx; + k = image->width / 2; + + for (i = image->height; i--;) { + dst = (u32 __iomem *) dst1; + src = s; + + j = k; + while (j >= 4) { + u8 bits = *src; + end_mask = tab[(bits >> 6) & 3]; + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); + end_mask = tab[(bits >> 4) & 3]; + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); + end_mask = tab[(bits >> 2) & 3]; + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); + end_mask = tab[bits & 3]; + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); + src++; + j -= 4; + } + if (j != 0) { + u8 bits = *src; + end_mask = tab[(bits >> 6) & 3]; + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); + if (j >= 2) { + end_mask = tab[(bits >> 4) & 3]; + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); + if (j == 3) { + end_mask = tab[(bits >> 2) & 3]; + FB_WRITEL((end_mask & eorx) ^ bgx, dst); + } + } + } + dst1 += p->fix.line_length; + s += spitch; + } +} + +/* + * Optimized fast_imageblit for bpp == 32. ppw = 1, bit_mask = 1 folded + * into the code, main loop unrolled. + */ + +static inline void fast_imageblit32(const struct fb_image *image, + struct fb_info *p, u8 __iomem * dst1, + u32 fgcolor, u32 bgcolor) +{ + u32 fgx = fgcolor, bgx = bgcolor; + u32 spitch = (image->width + 7) / 8; + u32 end_mask, eorx; + const char *s = image->data, *src; + u32 __iomem *dst; + const u32 *tab = NULL; + int i, j, k; + + tab = cfb_tab32; + + eorx = fgx ^ bgx; + k = image->width; + + for (i = image->height; i--;) { + dst = (u32 __iomem *) dst1; + src = s; + + j = k; + while (j >= 8) { + u8 bits = *src; + end_mask = tab[(bits >> 7) & 1]; + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); + end_mask = tab[(bits >> 6) & 1]; + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); + end_mask = tab[(bits >> 5) & 1]; + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); + end_mask = tab[(bits >> 4) & 1]; + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); + end_mask = tab[(bits >> 3) & 1]; + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); + end_mask = tab[(bits >> 2) & 1]; + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); + end_mask = tab[(bits >> 1) & 1]; + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); + end_mask = tab[bits & 1]; + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); + src++; + j -= 8; + } + if (j != 0) { + u32 bits = (u32) * src; + while (j > 1) { + end_mask = tab[(bits >> 7) & 1]; + FB_WRITEL((end_mask & eorx) ^ bgx, dst++); + bits <<= 1; + j--; + } + end_mask = tab[(bits >> 7) & 1]; + FB_WRITEL((end_mask & eorx) ^ bgx, dst); + } + dst1 += p->fix.line_length; + s += spitch; + } } void cfb_imageblit(struct fb_info *p, const struct fb_image *image) diff --git a/drivers/video/fbdev/msm/mdss_fb.c b/drivers/video/fbdev/msm/mdss_fb.c index 72f651e83e4b..7894e31d2baa 100644 --- a/drivers/video/fbdev/msm/mdss_fb.c +++ b/drivers/video/fbdev/msm/mdss_fb.c @@ -1625,6 +1625,7 @@ static struct platform_driver mdss_fb_driver = { .name = "mdss_fb", .of_match_table = mdss_fb_dt_match, .pm = &mdss_fb_pm_ops, + .probe_type = PROBE_PREFER_ASYNCHRONOUS, }, }; diff --git a/drivers/video/fbdev/msm/mdss_mdp.c b/drivers/video/fbdev/msm/mdss_mdp.c index a6d43a6cd57b..7c9acd38ce94 100644 --- a/drivers/video/fbdev/msm/mdss_mdp.c +++ b/drivers/video/fbdev/msm/mdss_mdp.c @@ -4508,7 +4508,7 @@ struct mdss_panel_cfg *mdss_panel_intf_type(int intf_val) } EXPORT_SYMBOL(mdss_panel_intf_type); -struct irq_info *mdss_intr_line() +struct irq_info *mdss_intr_line(void) { return mdss_mdp_hw.irq_info; } diff --git a/drivers/video/fbdev/msm/mdss_mdp_overlay.c b/drivers/video/fbdev/msm/mdss_mdp_overlay.c index 672b634c7155..0514d52bfdae 100644 --- a/drivers/video/fbdev/msm/mdss_mdp_overlay.c +++ b/drivers/video/fbdev/msm/mdss_mdp_overlay.c @@ -6420,7 +6420,7 @@ static int __vsync_retire_setup(struct msm_fb_data_type *mfd) { struct mdss_overlay_private *mdp5_data = mfd_to_mdp5_data(mfd); char name[24]; - struct sched_param param = { .sched_priority = 5 }; + struct sched_param param = { .sched_priority = 9 }; snprintf(name, sizeof(name), "mdss_fb%d_retire", mfd->index); mfd->mdp_sync_pt_data.timeline_retire = mdss_create_timeline(name); diff --git a/drivers/video/fbdev/msm/mdss_rotator.c b/drivers/video/fbdev/msm/mdss_rotator.c index 4952e7ec13b9..a55e72272934 100644 --- a/drivers/video/fbdev/msm/mdss_rotator.c +++ b/drivers/video/fbdev/msm/mdss_rotator.c @@ -850,6 +850,7 @@ static int mdss_rotator_init_queue(struct mdss_rot_mgr *mgr) { int i, size, ret = 0; char name[32]; + struct sched_param param = { .sched_priority = 9 }; size = sizeof(struct mdss_rot_queue) * mgr->queue_count; mgr->queues = devm_kzalloc(&mgr->pdev->dev, size, GFP_KERNEL); diff --git a/drivers/video/fbdev/msm/mdss_util.c b/drivers/video/fbdev/msm/mdss_util.c index 30fcf28e25e4..0f2e166bb5c3 100644 --- a/drivers/video/fbdev/msm/mdss_util.c +++ b/drivers/video/fbdev/msm/mdss_util.c @@ -237,7 +237,7 @@ struct mdss_util_intf mdss_util = { .display_disabled = false }; -struct mdss_util_intf *mdss_get_util_intf() +struct mdss_util_intf *mdss_get_util_intf(void) { return &mdss_util; } diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 1b76e8a99c40..67a6f62e3313 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -747,7 +747,7 @@ static void __init balloon_add_region(unsigned long start_pfn, for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) { page = pfn_to_page(pfn); - /* totalram_pages and totalhigh_pages do not + /* totalram_pages() and totalhigh_pages() do not include the boot-time balloon extension, so don't subtract from it. */ __balloon_append(page); diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 66620713242a..21e96adb00b8 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c @@ -188,7 +188,7 @@ static void selfballoon_process(struct work_struct *work) bool reset_timer = false; if (xen_selfballooning_enabled) { - cur_pages = totalram_pages; + cur_pages = totalram_pages(); tgt_pages = cur_pages; /* default is no change */ goal_pages = vm_memory_committed() + totalreserve_pages + @@ -226,7 +226,7 @@ static void selfballoon_process(struct work_struct *work) if (tgt_pages < floor_pages) tgt_pages = floor_pages; balloon_set_new_target(tgt_pages + - balloon_stats.current_pages - totalram_pages); + balloon_stats.current_pages - totalram_pages()); reset_timer = true; } #ifdef CONFIG_FRONTSWAP @@ -568,7 +568,7 @@ int xen_selfballoon_init(bool use_selfballooning, bool use_frontswap_selfshrink) * much more reliably and response faster in some cases. */ if (!selfballoon_reserved_mb) { - reserve_pages = totalram_pages / 10; + reserve_pages = totalram_pages() / 10; selfballoon_reserved_mb = PAGES2MB(reserve_pages); } schedule_delayed_work(&selfballoon_worker, selfballoon_interval * HZ); diff --git a/fs/Kconfig b/fs/Kconfig index 90276c9ddd09..5f4245109c2e 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -127,6 +127,7 @@ if BLOCK menu "DOS/FAT/NT Filesystems" source "fs/fat/Kconfig" +source "fs/exfat/Kconfig" source "fs/ntfs/Kconfig" endmenu @@ -252,6 +253,7 @@ source "fs/pstore/Kconfig" source "fs/sysv/Kconfig" source "fs/ufs/Kconfig" source "fs/exofs/Kconfig" +source "fs/erofs/Kconfig" endif # MISC_FILESYSTEMS diff --git a/fs/Makefile b/fs/Makefile index 0e97f4b85bee..d3ad13c5b092 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -79,6 +79,7 @@ obj-$(CONFIG_HUGETLBFS) += hugetlbfs/ obj-$(CONFIG_CODA_FS) += coda/ obj-$(CONFIG_MINIX_FS) += minix/ obj-$(CONFIG_FAT_FS) += fat/ +obj-$(CONFIG_EXFAT_FS) += exfat/ obj-$(CONFIG_BFS_FS) += bfs/ obj-$(CONFIG_ISO9660_FS) += isofs/ obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ @@ -131,3 +132,4 @@ obj-y += exofs/ # Multiple modules obj-$(CONFIG_CEPH_FS) += ceph/ obj-$(CONFIG_PSTORE) += pstore/ obj-$(CONFIG_EFIVAR_FS) += efivarfs/ +obj-$(CONFIG_EROFS_FS) += erofs/ diff --git a/fs/aio.c b/fs/aio.c index c3fc80294397..04f38b382446 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1616,7 +1616,6 @@ long do_io_submit(aio_context_t ctx_id, long nr, struct kioctx *ctx; long ret = 0; int i = 0; - struct blk_plug plug; if (unlikely(nr < 0)) return -EINVAL; @@ -1633,8 +1632,6 @@ long do_io_submit(aio_context_t ctx_id, long nr, return -EINVAL; } - blk_start_plug(&plug); - /* * AKPM: should this return a partial result if some of the IOs were * successfully submitted? @@ -1657,7 +1654,6 @@ long do_io_submit(aio_context_t ctx_id, long nr, if (ret) break; } - blk_finish_plug(&plug); percpu_ref_put(&ctx->users); return i ? i : ret; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 73cd7482c1fa..cf09ea3edc93 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -146,6 +146,25 @@ static int padzero(unsigned long elf_bss) #define ELF_BASE_PLATFORM NULL #endif +/* + * Use get_random_int() to implement AT_RANDOM while avoiding depletion + * of the entropy pool. + */ +static void get_atrandom_bytes(unsigned char *buf, size_t nbytes) +{ + unsigned char *p = buf; + + while (nbytes) { + unsigned int random_variable; + size_t chunk = min(nbytes, sizeof(random_variable)); + + random_variable = get_random_int(); + memcpy(p, &random_variable, chunk); + p += chunk; + nbytes -= chunk; + } +} + static int create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, unsigned long load_addr, unsigned long interp_load_addr) @@ -207,7 +226,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, /* * Generate 16 random bytes for userspace PRNG seeding. */ - get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes)); + get_atrandom_bytes(k_rand_bytes, sizeof(k_rand_bytes)); u_rand_bytes = (elf_addr_t __user *) STACK_ALLOC(p, sizeof(k_rand_bytes)); if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes))) diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 57401b474ec6..14476074957a 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -167,8 +167,7 @@ static u8 *alloc_bitmap(u32 bitmap_size) if (mem) return mem; - return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO, - PAGE_KERNEL); + return __vmalloc(bitmap_size, GFP_NOFS | __GFP_ZERO, PAGE_KERNEL); } int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 9f18635f78c7..84e243f07a7d 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -711,7 +711,7 @@ static inline int default_congestion_kb(void) * This allows larger machines to have larger/more transfers. * Limit the default to 256M */ - congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); + congestion_kb = (16*int_sqrt(totalram_pages())) << (PAGE_SHIFT-10); if (congestion_kb > 256*1024) congestion_kb = 256*1024; diff --git a/fs/dcache.c b/fs/dcache.c index 3934c29049c0..e60905757c37 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -82,7 +82,7 @@ * dentry1->d_lock * dentry2->d_lock */ -int sysctl_vfs_cache_pressure __read_mostly = 100; +int sysctl_vfs_cache_pressure __read_mostly = 50; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 44de6b82b16f..a5c3b45a105a 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include "internal.h" /* A global variable is a bit ugly, but it keeps the code simple */ @@ -73,3 +75,45 @@ int drop_caches_sysctl_handler(struct ctl_table *table, int write, } return 0; } + +static void drop_caches_now(struct work_struct *work); +static DECLARE_WORK(drop_caches_now_work, drop_caches_now); + +static void drop_caches_now(struct work_struct *work) +{ + /* sleep for 200ms */ + msleep(200); + /* sync */ + emergency_sync(); + /* echo "1" > /proc/sys/vm/drop_caches */ + iterate_supers(drop_pagecache_sb, NULL); +} + +static int fb_notifier(struct notifier_block *self, + unsigned long event, void *data) +{ + struct fb_event *evdata = (struct fb_event *)data; + + if ((event == FB_EVENT_BLANK) && evdata && evdata->data) { + int blank = *(int *)evdata->data; + + if (blank == FB_BLANK_POWERDOWN) { + schedule_work_on(0, &drop_caches_now_work); + return NOTIFY_OK; + } + return NOTIFY_OK; + } + return NOTIFY_DONE; +} + +static struct notifier_block fb_notifier_block = { + .notifier_call = fb_notifier, + .priority = -1, +}; + +static int __init drop_caches_init(void) +{ + fb_register_client(&fb_notifier_block); + return 0; +} +late_initcall(drop_caches_init); diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig new file mode 100644 index 000000000000..858b3339f381 --- /dev/null +++ b/fs/erofs/Kconfig @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config EROFS_FS + tristate "EROFS filesystem support" + depends on BLOCK + select LIBCRC32C + help + EROFS (Enhanced Read-Only File System) is a lightweight + read-only file system with modern designs (eg. page-sized + blocks, inline xattrs/data, etc.) for scenarios which need + high-performance read-only requirements, e.g. Android OS + for mobile phones and LIVECDs. + + It also provides fixed-sized output compression support, + which improves storage density, keeps relatively higher + compression ratios, which is more useful to achieve high + performance for embedded devices with limited memory. + + If unsure, say N. + +config EROFS_FS_DEBUG + bool "EROFS debugging feature" + depends on EROFS_FS + help + Print debugging messages and enable more BUG_ONs which check + filesystem consistency and find potential issues aggressively, + which can be used for Android eng build, for example. + + For daily use, say N. + +config EROFS_FS_XATTR + bool "EROFS extended attributes" + depends on EROFS_FS + default y + help + Extended attributes are name:value pairs associated with inodes by + the kernel or by users (see the attr(5) manual page, or visit + for details). + + If unsure, say N. + +config EROFS_FS_POSIX_ACL + bool "EROFS Access Control Lists" + depends on EROFS_FS_XATTR + select FS_POSIX_ACL + default y + help + Posix Access Control Lists (ACLs) support permissions for users and + groups beyond the owner/group/world scheme. + + To learn more about Access Control Lists, visit the POSIX ACLs for + Linux website . + + If you don't know what Access Control Lists are, say N. + +config EROFS_FS_SECURITY + bool "EROFS Security Labels" + depends on EROFS_FS_XATTR + default y + help + Security labels provide an access control facility to support Linux + Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO + Linux. This option enables an extended attribute handler for file + security labels in the erofs filesystem, so that it requires enabling + the extended attribute support in advance. + + If you are not using a security module, say N. + +config EROFS_FS_ZIP + bool "EROFS Data Compression Support" + depends on EROFS_FS + select LZ4_DECOMPRESS + default y + help + Enable fixed-sized output compression for EROFS. + + If you don't want to enable compression feature, say N. + diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile new file mode 100644 index 000000000000..1f9aced49070 --- /dev/null +++ b/fs/erofs/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_EROFS_FS) += erofs.o +erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o +erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o +erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o diff --git a/fs/erofs/TODO b/fs/erofs/TODO new file mode 100644 index 000000000000..f99ddb842f99 --- /dev/null +++ b/fs/erofs/TODO @@ -0,0 +1,45 @@ + +EROFS is still working in progress, thus it is not suitable +for all productive uses. play at your own risk :) + +TODO List: + - add the missing error handling code + (mainly existed in xattr and decompression submodules); + + - finalize erofs ondisk format design (which means that + minor on-disk revisions could happen later); + + - documentation and detailed technical analysis; + + - general code review and clean up + (including confusing variable names and code snippets); + + - support larger compressed clustersizes for selection + (currently erofs only works as expected with the page-sized + compressed cluster configuration, usually 4KB); + + - support more lossless data compression algorithms + in addition to LZ4 algorithms in VLE approach; + + - data deduplication and other useful features. + +erofs-mkfs (preview version) binaries for i386 / x86_64 are available at: + + https://github.com/hsiangkao/erofs_mkfs_binary + +It is still in progress opening mkfs source code to public, +in any case an open-source mkfs will be released in the near future. + + +Code, suggestions, etc, are welcome. Please feel free to +ask and send patches, + +To: + linux-erofs mailing list + Gao Xiang + Chao Yu + +Cc: (for linux-kernel upstream patches) + Greg Kroah-Hartman + linux-staging mailing list + diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h new file mode 100644 index 000000000000..3701c72bacb2 --- /dev/null +++ b/fs/erofs/compress.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2019 HUAWEI, Inc. + * https://www.huawei.com/ + */ +#ifndef __EROFS_FS_COMPRESS_H +#define __EROFS_FS_COMPRESS_H + +#include "internal.h" + +enum { + Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX, + Z_EROFS_COMPRESSION_RUNTIME_MAX +}; + +struct z_erofs_decompress_req { + struct super_block *sb; + struct page **in, **out; + + unsigned short pageofs_out; + unsigned int inputsize, outputsize; + + /* indicate the algorithm will be used for decompression */ + unsigned int alg; + bool inplace_io, partial_decoding; +}; + +/* some special page->private (unsigned long, see below) */ +#define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2) +#define Z_EROFS_PREALLOCATED_PAGE (-2UL << 2) + +/* + * For all pages in a pcluster, page->private should be one of + * Type Last 2bits page->private + * short-lived page 00 Z_EROFS_SHORTLIVED_PAGE + * preallocated page (tryalloc) 00 Z_EROFS_PREALLOCATED_PAGE + * cached/managed page 00 pointer to z_erofs_pcluster + * online page (file-backed, 01/10/11 sub-index << 2 | count + * some pages can be used for inplace I/O) + * + * page->mapping should be one of + * Type page->mapping + * short-lived page NULL + * preallocated page NULL + * cached/managed page non-NULL or NULL (invalidated/truncated page) + * online page non-NULL + * + * For all managed pages, PG_private should be set with 1 extra refcount, + * which is used for page reclaim / migration. + */ + +/* + * short-lived pages are pages directly from buddy system with specific + * page->private (no need to set PagePrivate since these are non-LRU / + * non-movable pages and bypass reclaim / migration code). + */ +static inline bool z_erofs_is_shortlived_page(struct page *page) +{ + if (page->private != Z_EROFS_SHORTLIVED_PAGE) + return false; + + DBG_BUGON(page->mapping); + return true; +} + +static inline bool z_erofs_put_shortlivedpage(struct list_head *pagepool, + struct page *page) +{ + if (!z_erofs_is_shortlived_page(page)) + return false; + + /* short-lived pages should not be used by others at the same time */ + if (page_ref_count(page) > 1) { + put_page(page); + } else { + /* follow the pcluster rule above. */ + set_page_private(page, 0); + list_add(&page->lru, pagepool); + } + return true; +} + +int z_erofs_decompress(struct z_erofs_decompress_req *rq, + struct list_head *pagepool); + +#endif diff --git a/fs/erofs/data.c b/fs/erofs/data.c new file mode 100644 index 000000000000..8704816c8db2 --- /dev/null +++ b/fs/erofs/data.c @@ -0,0 +1,334 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2017-2018 HUAWEI, Inc. + * https://www.huawei.com/ + */ +#include "internal.h" +#include + +#include + +static void erofs_readendio(struct bio *bio) +{ + int i; + struct bio_vec *bvec; + + bio_for_each_segment_all(bvec, bio, i) { + struct page *page = bvec->bv_page; + + /* page is already locked */ + DBG_BUGON(PageUptodate(page)); + + if (bio->bi_error) + SetPageError(page); + else + SetPageUptodate(page); + + unlock_page(page); + /* page could be reclaimed now */ + } + bio_put(bio); +} + +struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr) +{ + struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping; + struct page *page; + + page = read_cache_page_gfp(mapping, blkaddr, + mapping_gfp_constraint(mapping, ~__GFP_FS)); + /* should already be PageUptodate */ + if (!IS_ERR(page)) + lock_page(page); + return page; +} + +static int erofs_map_blocks_flatmode(struct inode *inode, + struct erofs_map_blocks *map, + int flags) +{ + int err = 0; + erofs_blk_t nblocks, lastblk; + u64 offset = map->m_la; + struct erofs_inode *vi = EROFS_I(inode); + bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE); + + trace_erofs_map_blocks_flatmode_enter(inode, map, flags); + + nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE); + lastblk = nblocks - tailendpacking; + + if (offset >= inode->i_size) { + /* leave out-of-bound access unmapped */ + map->m_flags = 0; + map->m_plen = 0; + goto out; + } + + /* there is no hole in flatmode */ + map->m_flags = EROFS_MAP_MAPPED; + + if (offset < blknr_to_addr(lastblk)) { + map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la; + map->m_plen = blknr_to_addr(lastblk) - offset; + } else if (tailendpacking) { + /* 2 - inode inline B: inode, [xattrs], inline last blk... */ + struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb); + + map->m_pa = iloc(sbi, vi->nid) + vi->inode_isize + + vi->xattr_isize + erofs_blkoff(map->m_la); + map->m_plen = inode->i_size - offset; + + /* inline data should be located in one meta block */ + if (erofs_blkoff(map->m_pa) + map->m_plen > PAGE_SIZE) { + erofs_err(inode->i_sb, + "inline data cross block boundary @ nid %llu", + vi->nid); + DBG_BUGON(1); + err = -EFSCORRUPTED; + goto err_out; + } + + map->m_flags |= EROFS_MAP_META; + } else { + erofs_err(inode->i_sb, + "internal error @ nid: %llu (size %llu), m_la 0x%llx", + vi->nid, inode->i_size, map->m_la); + DBG_BUGON(1); + err = -EIO; + goto err_out; + } + +out: + map->m_llen = map->m_plen; + +err_out: + trace_erofs_map_blocks_flatmode_exit(inode, map, flags, 0); + return err; +} + +static inline struct bio *erofs_read_raw_page(struct bio *bio, + struct address_space *mapping, + struct page *page, + erofs_off_t *last_block, + unsigned int nblocks, + bool ra) +{ + struct inode *const inode = mapping->host; + struct super_block *const sb = inode->i_sb; + erofs_off_t current_block = (erofs_off_t)page->index; + int err; + + DBG_BUGON(!nblocks); + + if (PageUptodate(page)) { + err = 0; + goto has_updated; + } + + /* note that for readpage case, bio also equals to NULL */ + if (bio && + /* not continuous */ + *last_block + 1 != current_block) { +submit_bio_retry: + submit_bio(bio); + bio = NULL; + } + + if (!bio) { + struct erofs_map_blocks map = { + .m_la = blknr_to_addr(current_block), + }; + erofs_blk_t blknr; + unsigned int blkoff; + + err = erofs_map_blocks_flatmode(inode, &map, EROFS_GET_BLOCKS_RAW); + if (err) + goto err_out; + + /* zero out the holed page */ + if (!(map.m_flags & EROFS_MAP_MAPPED)) { + zero_user_segment(page, 0, PAGE_SIZE); + SetPageUptodate(page); + + /* imply err = 0, see erofs_map_blocks */ + goto has_updated; + } + + /* for RAW access mode, m_plen must be equal to m_llen */ + DBG_BUGON(map.m_plen != map.m_llen); + + blknr = erofs_blknr(map.m_pa); + blkoff = erofs_blkoff(map.m_pa); + + /* deal with inline page */ + if (map.m_flags & EROFS_MAP_META) { + void *vsrc, *vto; + struct page *ipage; + + DBG_BUGON(map.m_plen > PAGE_SIZE); + + ipage = erofs_get_meta_page(inode->i_sb, blknr); + + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto err_out; + } + + vsrc = kmap_atomic(ipage); + vto = kmap_atomic(page); + memcpy(vto, vsrc + blkoff, map.m_plen); + memset(vto + map.m_plen, 0, PAGE_SIZE - map.m_plen); + kunmap_atomic(vto); + kunmap_atomic(vsrc); + flush_dcache_page(page); + + SetPageUptodate(page); + /* TODO: could we unlock the page earlier? */ + unlock_page(ipage); + put_page(ipage); + + /* imply err = 0, see erofs_map_blocks */ + goto has_updated; + } + + /* pa must be block-aligned for raw reading */ + DBG_BUGON(erofs_blkoff(map.m_pa)); + + /* max # of continuous pages */ + if (nblocks > DIV_ROUND_UP(map.m_plen, PAGE_SIZE)) + nblocks = DIV_ROUND_UP(map.m_plen, PAGE_SIZE); + if (nblocks > BIO_MAX_PAGES) + nblocks = BIO_MAX_PAGES; + + bio = bio_alloc(GFP_NOIO, nblocks); + + bio->bi_end_io = erofs_readendio; + bio->bi_bdev = sb->s_bdev; + bio->bi_iter.bi_sector = (sector_t)blknr << + LOG_SECTORS_PER_BLOCK; + bio->bi_opf = REQ_OP_READ | (ra ? REQ_RAHEAD : 0); + } + + err = bio_add_page(bio, page, PAGE_SIZE, 0); + /* out of the extent or bio is full */ + if (err < PAGE_SIZE) + goto submit_bio_retry; + + *last_block = current_block; + + /* shift in advance in case of it followed by too many gaps */ + if (bio->bi_iter.bi_size >= bio->bi_max_vecs * PAGE_SIZE) { + /* err should reassign to 0 after submitting */ + err = 0; + goto submit_bio_out; + } + + return bio; + +err_out: + /* for sync reading, set page error immediately */ + if (!ra) { + SetPageError(page); + ClearPageUptodate(page); + } +has_updated: + unlock_page(page); + + /* if updated manually, continuous pages has a gap */ + if (bio) +submit_bio_out: + submit_bio(bio); + return err ? ERR_PTR(err) : NULL; +} + +/* + * since we dont have write or truncate flows, so no inode + * locking needs to be held at the moment. + */ +static int erofs_raw_access_readpage(struct file *file, struct page *page) +{ + erofs_off_t last_block; + struct bio *bio; + + trace_erofs_readpage(page, true); + + bio = erofs_read_raw_page(NULL, page->mapping, + page, &last_block, 1, false); + + if (IS_ERR(bio)) + return PTR_ERR(bio); + + DBG_BUGON(bio); /* since we have only one bio -- must be NULL */ + return 0; +} + +static int erofs_raw_access_readpages(struct file *filp, + struct address_space *mapping, + struct list_head *pages, + unsigned int nr_pages) +{ + erofs_off_t last_block; + struct bio *bio = NULL; + gfp_t gfp = readahead_gfp_mask(mapping); + struct page *page = list_last_entry(pages, struct page, lru); + + trace_erofs_readpages(mapping->host, page, nr_pages, true); + + for (; nr_pages; --nr_pages) { + page = list_entry(pages->prev, struct page, lru); + + prefetchw(&page->flags); + list_del(&page->lru); + + if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) { + bio = erofs_read_raw_page(bio, mapping, page, + &last_block, nr_pages, true); + + /* all the page errors are ignored when readahead */ + if (IS_ERR(bio)) { + pr_err("%s, readahead error at page %lu of nid %llu\n", + __func__, page->index, + EROFS_I(mapping->host)->nid); + + bio = NULL; + } + } + + /* pages could still be locked */ + put_page(page); + } + DBG_BUGON(!list_empty(pages)); + + /* the rare case (end in gaps) */ + if (bio) + submit_bio(bio); + return 0; +} + +static sector_t erofs_bmap(struct address_space *mapping, sector_t block) +{ + struct inode *inode = mapping->host; + struct erofs_map_blocks map = { + .m_la = blknr_to_addr(block), + }; + + if (EROFS_I(inode)->datalayout == EROFS_INODE_FLAT_INLINE) { + erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE; + + if (block >> LOG_SECTORS_PER_BLOCK >= blks) + return 0; + } + + if (!erofs_map_blocks_flatmode(inode, &map, EROFS_GET_BLOCKS_RAW)) + return erofs_blknr(map.m_pa); + + return 0; +} + +/* for uncompressed (aligned) files and raw access for other files */ +const struct address_space_operations erofs_raw_access_aops = { + .readpage = erofs_raw_access_readpage, + .readpages = erofs_raw_access_readpages, + .bmap = erofs_bmap, +}; diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c new file mode 100644 index 000000000000..a5bc4b1b7813 --- /dev/null +++ b/fs/erofs/decompressor.c @@ -0,0 +1,408 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2019 HUAWEI, Inc. + * https://www.huawei.com/ + */ +#include "compress.h" +#include +#include + +#ifndef LZ4_DISTANCE_MAX /* history window size */ +#define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ +#endif + +#define LZ4_MAX_DISTANCE_PAGES (DIV_ROUND_UP(LZ4_DISTANCE_MAX, PAGE_SIZE) + 1) +#ifndef LZ4_DECOMPRESS_INPLACE_MARGIN +#define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32) +#endif + +struct z_erofs_decompressor { + /* + * if destpages have sparsed pages, fill them with bounce pages. + * it also check whether destpages indicate continuous physical memory. + */ + int (*prepare_destpages)(struct z_erofs_decompress_req *rq, + struct list_head *pagepool); + int (*decompress)(struct z_erofs_decompress_req *rq, u8 *out); + char *name; +}; + +int z_erofs_load_lz4_config(struct super_block *sb, + struct erofs_super_block *dsb, + struct z_erofs_lz4_cfgs *lz4, int size) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + u16 distance; + + if (lz4) { + if (size < sizeof(struct z_erofs_lz4_cfgs)) { + erofs_err(sb, "invalid lz4 cfgs, size=%u", size); + return -EINVAL; + } + distance = le16_to_cpu(lz4->max_distance); + + sbi->lz4.max_pclusterblks = le16_to_cpu(lz4->max_pclusterblks); + if (!sbi->lz4.max_pclusterblks) { + sbi->lz4.max_pclusterblks = 1; /* reserved case */ + } else if (sbi->lz4.max_pclusterblks > + Z_EROFS_PCLUSTER_MAX_SIZE / EROFS_BLKSIZ) { + erofs_err(sb, "too large lz4 pclusterblks %u", + sbi->lz4.max_pclusterblks); + return -EINVAL; + } else if (sbi->lz4.max_pclusterblks >= 2) { + erofs_info(sb, "EXPERIMENTAL big pcluster feature in use. Use at your own risk!"); + } + } else { + distance = le16_to_cpu(dsb->u1.lz4_max_distance); + sbi->lz4.max_pclusterblks = 1; + } + + sbi->lz4.max_distance_pages = distance ? + DIV_ROUND_UP(distance, PAGE_SIZE) + 1 : + LZ4_MAX_DISTANCE_PAGES; + return erofs_pcpubuf_growsize(sbi->lz4.max_pclusterblks); +} + +static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq, + struct list_head *pagepool) +{ + const unsigned int nr = + PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; + struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL }; + unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES, + BITS_PER_LONG)] = { 0 }; + unsigned int lz4_max_distance_pages = + EROFS_SB(rq->sb)->lz4.max_distance_pages; + void *kaddr = NULL; + unsigned int i, j, top; + + top = 0; + for (i = j = 0; i < nr; ++i, ++j) { + struct page *const page = rq->out[i]; + struct page *victim; + + if (j >= lz4_max_distance_pages) + j = 0; + + /* 'valid' bounced can only be tested after a complete round */ + if (test_bit(j, bounced)) { + DBG_BUGON(i < lz4_max_distance_pages); + DBG_BUGON(top >= lz4_max_distance_pages); + availables[top++] = rq->out[i - lz4_max_distance_pages]; + } + + if (page) { + __clear_bit(j, bounced); + if (kaddr) { + if (kaddr + PAGE_SIZE == page_address(page)) + kaddr += PAGE_SIZE; + else + kaddr = NULL; + } else if (!i) { + kaddr = page_address(page); + } + continue; + } + kaddr = NULL; + __set_bit(j, bounced); + + if (top) { + victim = availables[--top]; + get_page(victim); + } else { + victim = erofs_allocpage(pagepool, + GFP_KERNEL | __GFP_NOFAIL); + set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE); + } + rq->out[i] = victim; + } + return kaddr ? 1 : 0; +} + +static void *z_erofs_handle_inplace_io(struct z_erofs_decompress_req *rq, + void *inpage, unsigned int *inputmargin, int *maptype, + bool support_0padding) +{ + unsigned int nrpages_in, nrpages_out; + unsigned int ofull, oend, inputsize, total, i, j; + struct page **in; + void *src, *tmp; + + inputsize = rq->inputsize; + nrpages_in = PAGE_ALIGN(inputsize) >> PAGE_SHIFT; + oend = rq->pageofs_out + rq->outputsize; + ofull = PAGE_ALIGN(oend); + nrpages_out = ofull >> PAGE_SHIFT; + + if (rq->inplace_io) { + if (rq->partial_decoding || !support_0padding || + ofull - oend < LZ4_DECOMPRESS_INPLACE_MARGIN(inputsize)) + goto docopy; + + for (i = 0; i < nrpages_in; ++i) { + DBG_BUGON(rq->in[i] == NULL); + for (j = 0; j < nrpages_out - nrpages_in + i; ++j) + if (rq->out[j] == rq->in[i]) + goto docopy; + } + } + + if (nrpages_in <= 1) { + *maptype = 0; + return inpage; + } + kunmap_atomic(inpage); + might_sleep(); + src = erofs_vm_map_ram(rq->in, nrpages_in); + if (!src) + return ERR_PTR(-ENOMEM); + *maptype = 1; + return src; + +docopy: + /* Or copy compressed data which can be overlapped to per-CPU buffer */ + in = rq->in; + src = erofs_get_pcpubuf(nrpages_in); + if (!src) { + DBG_BUGON(1); + kunmap_atomic(inpage); + return ERR_PTR(-EFAULT); + } + + tmp = src; + total = rq->inputsize; + while (total) { + unsigned int page_copycnt = + min_t(unsigned int, total, PAGE_SIZE - *inputmargin); + + if (!inpage) + inpage = kmap_atomic(*in); + memcpy(tmp, inpage + *inputmargin, page_copycnt); + kunmap_atomic(inpage); + inpage = NULL; + tmp += page_copycnt; + total -= page_copycnt; + ++in; + *inputmargin = 0; + } + *maptype = 2; + return src; +} + +static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out) +{ + unsigned int inputmargin; + u8 *headpage, *src; + bool support_0padding; + int ret, maptype; + + DBG_BUGON(*rq->in == NULL); + headpage = kmap_atomic(*rq->in); + inputmargin = 0; + support_0padding = false; + + /* decompression inplace is only safe when 0padding is enabled */ + if (erofs_sb_has_lz4_0padding(EROFS_SB(rq->sb))) { + support_0padding = true; + + while (!headpage[inputmargin & ~PAGE_MASK]) + if (!(++inputmargin & ~PAGE_MASK)) + break; + + if (inputmargin >= rq->inputsize) { + kunmap_atomic(headpage); + return -EIO; + } + } + + rq->inputsize -= inputmargin; + src = z_erofs_handle_inplace_io(rq, headpage, &inputmargin, &maptype, + support_0padding); + if (IS_ERR(src)) + return PTR_ERR(src); + + /* legacy format could compress extra data in a pcluster. */ + if (rq->partial_decoding || !support_0padding) + ret = LZ4_decompress_safe_partial(src + inputmargin, out, + rq->inputsize, rq->outputsize, rq->outputsize); + else + ret = LZ4_decompress_safe(src + inputmargin, out, + rq->inputsize, rq->outputsize); + + if (ret != rq->outputsize) { + erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]", + ret, rq->inputsize, inputmargin, rq->outputsize); + + WARN_ON(1); + print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET, + 16, 1, src + inputmargin, rq->inputsize, true); + print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET, + 16, 1, out, rq->outputsize, true); + + if (ret >= 0) + memset(out + ret, 0, rq->outputsize - ret); + ret = -EIO; + } + + if (maptype == 0) { + kunmap_atomic(src); + } else if (maptype == 1) { + vm_unmap_ram(src, PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT); + } else if (maptype == 2) { + erofs_put_pcpubuf(src); + } else { + DBG_BUGON(1); + return -EFAULT; + } + return ret; +} + +static struct z_erofs_decompressor decompressors[] = { + [Z_EROFS_COMPRESSION_SHIFTED] = { + .name = "shifted" + }, + [Z_EROFS_COMPRESSION_LZ4] = { + .prepare_destpages = z_erofs_lz4_prepare_destpages, + .decompress = z_erofs_lz4_decompress, + .name = "lz4" + }, +}; + +static void copy_from_pcpubuf(struct page **out, const char *dst, + unsigned short pageofs_out, + unsigned int outputsize) +{ + const char *end = dst + outputsize; + const unsigned int righthalf = PAGE_SIZE - pageofs_out; + const char *cur = dst - pageofs_out; + + while (cur < end) { + struct page *const page = *out++; + + if (page) { + char *buf = kmap_atomic(page); + + if (cur >= dst) { + memcpy(buf, cur, min_t(uint, PAGE_SIZE, + end - cur)); + } else { + memcpy(buf + pageofs_out, cur + pageofs_out, + min_t(uint, righthalf, end - cur)); + } + kunmap_atomic(buf); + } + cur += PAGE_SIZE; + } +} + +static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq, + struct list_head *pagepool) +{ + const unsigned int nrpages_out = + PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; + const struct z_erofs_decompressor *alg = decompressors + rq->alg; + unsigned int dst_maptype; + void *dst; + int ret; + + /* two optimized fast paths only for non bigpcluster cases yet */ + if (rq->inputsize <= PAGE_SIZE) { + if (nrpages_out == 1 && !rq->inplace_io) { + DBG_BUGON(!*rq->out); + dst = kmap_atomic(*rq->out); + dst_maptype = 0; + goto dstmap_out; + } + + /* + * For the case of small output size (especially much less + * than PAGE_SIZE), memcpy the decompressed data rather than + * compressed data is preferred. + */ + if (rq->outputsize <= PAGE_SIZE * 7 / 8) { + dst = erofs_get_pcpubuf(1); + if (IS_ERR(dst)) + return PTR_ERR(dst); + + rq->inplace_io = false; + ret = alg->decompress(rq, dst); + if (!ret) + copy_from_pcpubuf(rq->out, dst, rq->pageofs_out, + rq->outputsize); + + erofs_put_pcpubuf(dst); + return ret; + } + } + + /* general decoding path which can be used for all cases */ + ret = alg->prepare_destpages(rq, pagepool); + if (ret < 0) + return ret; + if (ret) { + dst = page_address(*rq->out); + dst_maptype = 1; + goto dstmap_out; + } + + dst = erofs_vm_map_ram(rq->out, nrpages_out); + if (!dst) + return -ENOMEM; + dst_maptype = 2; + +dstmap_out: + ret = alg->decompress(rq, dst + rq->pageofs_out); + + if (!dst_maptype) + kunmap_atomic(dst); + else if (dst_maptype == 2) + vm_unmap_ram(dst, nrpages_out); + return ret; +} + +static int z_erofs_shifted_transform(const struct z_erofs_decompress_req *rq, + struct list_head *pagepool) +{ + const unsigned int nrpages_out = + PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; + const unsigned int righthalf = PAGE_SIZE - rq->pageofs_out; + unsigned char *src, *dst; + + if (nrpages_out > 2) { + DBG_BUGON(1); + return -EIO; + } + + if (rq->out[0] == *rq->in) { + DBG_BUGON(nrpages_out != 1); + return 0; + } + + src = kmap_atomic(*rq->in); + if (rq->out[0]) { + dst = kmap_atomic(rq->out[0]); + memcpy(dst + rq->pageofs_out, src, righthalf); + kunmap_atomic(dst); + } + + if (nrpages_out == 2) { + DBG_BUGON(!rq->out[1]); + if (rq->out[1] == *rq->in) { + memmove(src, src + righthalf, rq->pageofs_out); + } else { + dst = kmap_atomic(rq->out[1]); + memcpy(dst, src + righthalf, rq->pageofs_out); + kunmap_atomic(dst); + } + } + kunmap_atomic(src); + return 0; +} + +int z_erofs_decompress(struct z_erofs_decompress_req *rq, + struct list_head *pagepool) +{ + if (rq->alg == Z_EROFS_COMPRESSION_SHIFTED) + return z_erofs_shifted_transform(rq, pagepool); + return z_erofs_decompress_generic(rq, pagepool); +} diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c new file mode 100644 index 000000000000..722b653c7a77 --- /dev/null +++ b/fs/erofs/dir.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2017-2018 HUAWEI, Inc. + * https://www.huawei.com/ + */ +#include "internal.h" + +static const unsigned char erofs_filetype_table[EROFS_FT_MAX] = { + [EROFS_FT_UNKNOWN] = DT_UNKNOWN, + [EROFS_FT_REG_FILE] = DT_REG, + [EROFS_FT_DIR] = DT_DIR, + [EROFS_FT_CHRDEV] = DT_CHR, + [EROFS_FT_BLKDEV] = DT_BLK, + [EROFS_FT_FIFO] = DT_FIFO, + [EROFS_FT_SOCK] = DT_SOCK, + [EROFS_FT_SYMLINK] = DT_LNK, +}; + +static void debug_one_dentry(unsigned char d_type, const char *de_name, + unsigned int de_namelen) +{ +#ifdef CONFIG_EROFS_FS_DEBUG + /* since the on-disk name could not have the trailing '\0' */ + unsigned char dbg_namebuf[EROFS_NAME_LEN + 1]; + + memcpy(dbg_namebuf, de_name, de_namelen); + dbg_namebuf[de_namelen] = '\0'; + + erofs_dbg("found dirent %s de_len %u d_type %d", dbg_namebuf, + de_namelen, d_type); +#endif +} + +static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx, + void *dentry_blk, unsigned int *ofs, + unsigned int nameoff, unsigned int maxsize) +{ + struct erofs_dirent *de = dentry_blk + *ofs; + const struct erofs_dirent *end = dentry_blk + nameoff; + + while (de < end) { + const char *de_name; + unsigned int de_namelen; + unsigned char d_type; + + if (de->file_type < EROFS_FT_MAX) + d_type = erofs_filetype_table[de->file_type]; + else + d_type = DT_UNKNOWN; + + nameoff = le16_to_cpu(de->nameoff); + de_name = (char *)dentry_blk + nameoff; + + /* the last dirent in the block? */ + if (de + 1 >= end) + de_namelen = strnlen(de_name, maxsize - nameoff); + else + de_namelen = le16_to_cpu(de[1].nameoff) - nameoff; + + /* a corrupted entry is found */ + if (nameoff + de_namelen > maxsize || + de_namelen > EROFS_NAME_LEN) { + erofs_err(dir->i_sb, "bogus dirent @ nid %llu", + EROFS_I(dir)->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; + } + + debug_one_dentry(d_type, de_name, de_namelen); + if (!dir_emit(ctx, de_name, de_namelen, + le64_to_cpu(de->nid), d_type)) + /* stopped by some reason */ + return 1; + ++de; + *ofs += sizeof(struct erofs_dirent); + } + *ofs = maxsize; + return 0; +} + +static int erofs_readdir(struct file *f, struct dir_context *ctx) +{ + struct inode *dir = file_inode(f); + struct address_space *mapping = dir->i_mapping; + const size_t dirsize = i_size_read(dir); + unsigned int i = ctx->pos / EROFS_BLKSIZ; + unsigned int ofs = ctx->pos % EROFS_BLKSIZ; + int err = 0; + bool initial = true; + + while (ctx->pos < dirsize) { + struct page *dentry_page; + struct erofs_dirent *de; + unsigned int nameoff, maxsize; + + dentry_page = read_mapping_page(mapping, i, NULL); + if (dentry_page == ERR_PTR(-ENOMEM)) { + err = -ENOMEM; + break; + } else if (IS_ERR(dentry_page)) { + erofs_err(dir->i_sb, + "fail to readdir of logical block %u of nid %llu", + i, EROFS_I(dir)->nid); + err = -EFSCORRUPTED; + break; + } + + de = (struct erofs_dirent *)kmap(dentry_page); + + nameoff = le16_to_cpu(de->nameoff); + + if (nameoff < sizeof(struct erofs_dirent) || + nameoff >= PAGE_SIZE) { + erofs_err(dir->i_sb, + "invalid de[0].nameoff %u @ nid %llu", + nameoff, EROFS_I(dir)->nid); + err = -EFSCORRUPTED; + goto skip_this; + } + + maxsize = min_t(unsigned int, + dirsize - ctx->pos + ofs, PAGE_SIZE); + + /* search dirents at the arbitrary position */ + if (initial) { + initial = false; + + ofs = roundup(ofs, sizeof(struct erofs_dirent)); + if (ofs >= nameoff) + goto skip_this; + } + + err = erofs_fill_dentries(dir, ctx, de, &ofs, + nameoff, maxsize); +skip_this: + kunmap(dentry_page); + + put_page(dentry_page); + + ctx->pos = blknr_to_addr(i) + ofs; + + if (err) + break; + ++i; + ofs = 0; + } + return err < 0 ? err : 0; +} + +const struct file_operations erofs_dir_fops = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .iterate_shared = erofs_readdir, +}; diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h new file mode 100644 index 000000000000..32f79b14ef50 --- /dev/null +++ b/fs/erofs/erofs_fs.h @@ -0,0 +1,357 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR Apache-2.0 */ +/* + * EROFS (Enhanced ROM File System) on-disk format definition + * + * Copyright (C) 2017-2018 HUAWEI, Inc. + * https://www.huawei.com/ + */ +#ifndef __EROFS_FS_H +#define __EROFS_FS_H + +#define EROFS_SUPER_MAGIC_V1 0xE0F5E1E2 +#define EROFS_SUPER_OFFSET 1024 + +#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001 + +/* + * Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should + * be incompatible with this kernel version. + */ +#define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING 0x00000001 +#define EROFS_FEATURE_INCOMPAT_COMPR_CFGS 0x00000002 +#define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER 0x00000002 +#define EROFS_ALL_FEATURE_INCOMPAT \ + (EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \ + EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \ + EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER) + +#define EROFS_SB_EXTSLOT_SIZE 16 + +/* erofs on-disk super block (currently 128 bytes) */ +struct erofs_super_block { + __le32 magic; /* file system magic number */ + __le32 checksum; /* crc32c(super_block) */ + __le32 feature_compat; + __u8 blkszbits; /* support block_size == PAGE_SIZE only */ + __u8 sb_extslots; /* superblock size = 128 + sb_extslots * 16 */ + + __le16 root_nid; /* nid of root directory */ + __le64 inos; /* total valid ino # (== f_files - f_favail) */ + + __le64 build_time; /* inode v1 time derivation */ + __le32 build_time_nsec; /* inode v1 time derivation in nano scale */ + __le32 blocks; /* used for statfs */ + __le32 meta_blkaddr; /* start block address of metadata area */ + __le32 xattr_blkaddr; /* start block address of shared xattr area */ + __u8 uuid[16]; /* 128-bit uuid for volume */ + __u8 volume_name[16]; /* volume name */ + __le32 feature_incompat; + union { + /* bitmap for available compression algorithms */ + __le16 available_compr_algs; + /* customized sliding window size instead of 64k by default */ + __le16 lz4_max_distance; + } __packed u1; + __u8 reserved2[42]; +}; + +/* + * erofs inode datalayout (i_format in on-disk inode): + * 0 - inode plain without inline data A: + * inode, [xattrs], ... | ... | no-holed data + * 1 - inode VLE compression B (legacy): + * inode, [xattrs], extents ... | ... + * 2 - inode plain with inline data C: + * inode, [xattrs], last_inline_data, ... | ... | no-holed data + * 3 - inode compression D: + * inode, [xattrs], map_header, extents ... | ... + * 4~7 - reserved + */ +enum { + EROFS_INODE_FLAT_PLAIN = 0, + EROFS_INODE_FLAT_COMPRESSION_LEGACY = 1, + EROFS_INODE_FLAT_INLINE = 2, + EROFS_INODE_FLAT_COMPRESSION = 3, + EROFS_INODE_DATALAYOUT_MAX +}; + +static inline bool erofs_inode_is_data_compressed(unsigned int datamode) +{ + return datamode == EROFS_INODE_FLAT_COMPRESSION || + datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY; +} + +/* bit definitions of inode i_advise */ +#define EROFS_I_VERSION_BITS 1 +#define EROFS_I_DATALAYOUT_BITS 3 + +#define EROFS_I_VERSION_BIT 0 +#define EROFS_I_DATALAYOUT_BIT 1 + +#define EROFS_I_ALL \ + ((1 << (EROFS_I_DATALAYOUT_BIT + EROFS_I_DATALAYOUT_BITS)) - 1) + +/* 32-byte reduced form of an ondisk inode */ +struct erofs_inode_compact { + __le16 i_format; /* inode format hints */ + +/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */ + __le16 i_xattr_icount; + __le16 i_mode; + __le16 i_nlink; + __le32 i_size; + __le32 i_reserved; + union { + /* file total compressed blocks for data mapping 1 */ + __le32 compressed_blocks; + __le32 raw_blkaddr; + + /* for device files, used to indicate old/new device # */ + __le32 rdev; + } i_u; + __le32 i_ino; /* only used for 32-bit stat compatibility */ + __le16 i_uid; + __le16 i_gid; + __le32 i_reserved2; +}; + +/* 32 bytes on-disk inode */ +#define EROFS_INODE_LAYOUT_COMPACT 0 +/* 64 bytes on-disk inode */ +#define EROFS_INODE_LAYOUT_EXTENDED 1 + +/* 64-byte complete form of an ondisk inode */ +struct erofs_inode_extended { + __le16 i_format; /* inode format hints */ + +/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */ + __le16 i_xattr_icount; + __le16 i_mode; + __le16 i_reserved; + __le64 i_size; + union { + /* file total compressed blocks for data mapping 1 */ + __le32 compressed_blocks; + __le32 raw_blkaddr; + + /* for device files, used to indicate old/new device # */ + __le32 rdev; + } i_u; + + /* only used for 32-bit stat compatibility */ + __le32 i_ino; + + __le32 i_uid; + __le32 i_gid; + __le64 i_ctime; + __le32 i_ctime_nsec; + __le32 i_nlink; + __u8 i_reserved2[16]; +}; + +#define EROFS_MAX_SHARED_XATTRS (128) +/* h_shared_count between 129 ... 255 are special # */ +#define EROFS_SHARED_XATTR_EXTENT (255) + +/* + * inline xattrs (n == i_xattr_icount): + * erofs_xattr_ibody_header(1) + (n - 1) * 4 bytes + * 12 bytes / \ + * / \ + * /-----------------------\ + * | erofs_xattr_entries+ | + * +-----------------------+ + * inline xattrs must starts in erofs_xattr_ibody_header, + * for read-only fs, no need to introduce h_refcount + */ +struct erofs_xattr_ibody_header { + __le32 h_reserved; + __u8 h_shared_count; + __u8 h_reserved2[7]; + __le32 h_shared_xattrs[0]; /* shared xattr id array */ +}; + +/* Name indexes */ +#define EROFS_XATTR_INDEX_USER 1 +#define EROFS_XATTR_INDEX_POSIX_ACL_ACCESS 2 +#define EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT 3 +#define EROFS_XATTR_INDEX_TRUSTED 4 +#define EROFS_XATTR_INDEX_LUSTRE 5 +#define EROFS_XATTR_INDEX_SECURITY 6 + +/* xattr entry (for both inline & shared xattrs) */ +struct erofs_xattr_entry { + __u8 e_name_len; /* length of name */ + __u8 e_name_index; /* attribute name index */ + __le16 e_value_size; /* size of attribute value */ + /* followed by e_name and e_value */ + char e_name[0]; /* attribute name */ +}; + +static inline unsigned int erofs_xattr_ibody_size(__le16 i_xattr_icount) +{ + if (!i_xattr_icount) + return 0; + + return sizeof(struct erofs_xattr_ibody_header) + + sizeof(__u32) * (le16_to_cpu(i_xattr_icount) - 1); +} + +#define EROFS_XATTR_ALIGN(size) round_up(size, sizeof(struct erofs_xattr_entry)) + +static inline unsigned int erofs_xattr_entry_size(struct erofs_xattr_entry *e) +{ + return EROFS_XATTR_ALIGN(sizeof(struct erofs_xattr_entry) + + e->e_name_len + le16_to_cpu(e->e_value_size)); +} + +/* maximum supported size of a physical compression cluster */ +#define Z_EROFS_PCLUSTER_MAX_SIZE (1024 * 1024) + +/* available compression algorithm types (for h_algorithmtype) */ +enum { + Z_EROFS_COMPRESSION_LZ4 = 0, + Z_EROFS_COMPRESSION_MAX +}; +#define Z_EROFS_ALL_COMPR_ALGS (1 << (Z_EROFS_COMPRESSION_MAX - 1)) + +/* 14 bytes (+ length field = 16 bytes) */ +struct z_erofs_lz4_cfgs { + __le16 max_distance; + __le16 max_pclusterblks; + u8 reserved[10]; +} __packed; + +/* + * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on) + * e.g. for 4k logical cluster size, 4B if compacted 2B is off; + * (4B) + 2B + (4B) if compacted 2B is on. + * bit 1 : HEAD1 big pcluster (0 - off; 1 - on) + * bit 2 : HEAD2 big pcluster (0 - off; 1 - on) + */ +#define Z_EROFS_ADVISE_COMPACTED_2B 0x0001 +#define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002 +#define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004 + +struct z_erofs_map_header { + __le32 h_reserved1; + __le16 h_advise; + /* + * bit 0-3 : algorithm type of head 1 (logical cluster type 01); + * bit 4-7 : algorithm type of head 2 (logical cluster type 11). + */ + __u8 h_algorithmtype; + /* + * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096; + * bit 3-7 : reserved. + */ + __u8 h_clusterbits; +}; + +#define Z_EROFS_VLE_LEGACY_HEADER_PADDING 8 + +/* + * Fixed-sized output compression ondisk Logical Extent cluster type: + * 0 - literal (uncompressed) cluster + * 1 - compressed cluster (for the head logical cluster) + * 2 - compressed cluster (for the other logical clusters) + * + * In detail, + * 0 - literal (uncompressed) cluster, + * di_advise = 0 + * di_clusterofs = the literal data offset of the cluster + * di_blkaddr = the blkaddr of the literal cluster + * + * 1 - compressed cluster (for the head logical cluster) + * di_advise = 1 + * di_clusterofs = the decompressed data offset of the cluster + * di_blkaddr = the blkaddr of the compressed cluster + * + * 2 - compressed cluster (for the other logical clusters) + * di_advise = 2 + * di_clusterofs = + * the decompressed data offset in its own head cluster + * di_u.delta[0] = distance to its corresponding head cluster + * di_u.delta[1] = distance to its corresponding tail cluster + * (di_advise could be 0, 1 or 2) + */ +enum { + Z_EROFS_VLE_CLUSTER_TYPE_PLAIN = 0, + Z_EROFS_VLE_CLUSTER_TYPE_HEAD = 1, + Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD = 2, + Z_EROFS_VLE_CLUSTER_TYPE_RESERVED = 3, + Z_EROFS_VLE_CLUSTER_TYPE_MAX +}; + +#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS 2 +#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT 0 + +/* + * D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the + * compressed block count of a compressed extent (in logical clusters, aka. + * block count of a pcluster). + */ +#define Z_EROFS_VLE_DI_D0_CBLKCNT (1 << 11) + +struct z_erofs_vle_decompressed_index { + __le16 di_advise; + /* where to decompress in the head cluster */ + __le16 di_clusterofs; + + union { + /* for the head cluster */ + __le32 blkaddr; + /* + * for the rest clusters + * eg. for 4k page-sized cluster, maximum 4K*64k = 256M) + * [0] - pointing to the head cluster + * [1] - pointing to the tail cluster + */ + __le16 delta[2]; + } di_u; +}; + +#define Z_EROFS_VLE_LEGACY_INDEX_ALIGN(size) \ + (round_up(size, sizeof(struct z_erofs_vle_decompressed_index)) + \ + sizeof(struct z_erofs_map_header) + Z_EROFS_VLE_LEGACY_HEADER_PADDING) + +/* dirent sorts in alphabet order, thus we can do binary search */ +struct erofs_dirent { + __le64 nid; /* node number */ + __le16 nameoff; /* start offset of file name */ + __u8 file_type; /* file type */ + __u8 reserved; /* reserved */ +} __packed; + +/* file types used in inode_info->flags */ +enum { + EROFS_FT_UNKNOWN, + EROFS_FT_REG_FILE, + EROFS_FT_DIR, + EROFS_FT_CHRDEV, + EROFS_FT_BLKDEV, + EROFS_FT_FIFO, + EROFS_FT_SOCK, + EROFS_FT_SYMLINK, + EROFS_FT_MAX +}; + +#define EROFS_NAME_LEN 255 + +/* check the EROFS on-disk layout strictly at compile time */ +static inline void erofs_check_ondisk_layout_definitions(void) +{ + BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128); + BUILD_BUG_ON(sizeof(struct erofs_inode_compact) != 32); + BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64); + BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12); + BUILD_BUG_ON(sizeof(struct erofs_xattr_entry) != 4); + BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8); + BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8); + BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12); + + BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) < + Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1); +} + +#endif diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c new file mode 100644 index 000000000000..c7fd150add27 --- /dev/null +++ b/fs/erofs/inode.c @@ -0,0 +1,376 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2017-2018 HUAWEI, Inc. + * https://www.huawei.com/ + */ +#include "xattr.h" + +#include + +/* + * if inode is successfully read, return its inode page (or sometimes + * the inode payload page if it's an extended inode) in order to fill + * inline data if possible. + */ +static struct page *erofs_read_inode(struct inode *inode, + unsigned int *ofs) +{ + struct super_block *sb = inode->i_sb; + struct erofs_sb_info *sbi = EROFS_SB(sb); + struct erofs_inode *vi = EROFS_I(inode); + const erofs_off_t inode_loc = iloc(sbi, vi->nid); + + erofs_blk_t blkaddr, nblks = 0; + struct page *page; + struct erofs_inode_compact *dic; + struct erofs_inode_extended *die, *copied = NULL; + unsigned int ifmt; + int err; + + blkaddr = erofs_blknr(inode_loc); + *ofs = erofs_blkoff(inode_loc); + + erofs_dbg("%s, reading inode nid %llu at %u of blkaddr %u", + __func__, vi->nid, *ofs, blkaddr); + + page = erofs_get_meta_page(sb, blkaddr); + if (IS_ERR(page)) { + erofs_err(sb, "failed to get inode (nid: %llu) page, err %ld", + vi->nid, PTR_ERR(page)); + return page; + } + + dic = page_address(page) + *ofs; + ifmt = le16_to_cpu(dic->i_format); + + if (ifmt & ~EROFS_I_ALL) { + erofs_err(inode->i_sb, "unsupported i_format %u of nid %llu", + ifmt, vi->nid); + err = -EOPNOTSUPP; + goto err_out; + } + + vi->datalayout = erofs_inode_datalayout(ifmt); + if (vi->datalayout >= EROFS_INODE_DATALAYOUT_MAX) { + erofs_err(inode->i_sb, "unsupported datalayout %u of nid %llu", + vi->datalayout, vi->nid); + err = -EOPNOTSUPP; + goto err_out; + } + + switch (erofs_inode_version(ifmt)) { + case EROFS_INODE_LAYOUT_EXTENDED: + vi->inode_isize = sizeof(struct erofs_inode_extended); + /* check if the inode acrosses page boundary */ + if (*ofs + vi->inode_isize <= PAGE_SIZE) { + *ofs += vi->inode_isize; + die = (struct erofs_inode_extended *)dic; + } else { + const unsigned int gotten = PAGE_SIZE - *ofs; + + copied = kmalloc(vi->inode_isize, GFP_NOFS); + if (!copied) { + err = -ENOMEM; + goto err_out; + } + memcpy(copied, dic, gotten); + unlock_page(page); + put_page(page); + + page = erofs_get_meta_page(sb, blkaddr + 1); + if (IS_ERR(page)) { + erofs_err(sb, "failed to get inode payload page (nid: %llu), err %ld", + vi->nid, PTR_ERR(page)); + kfree(copied); + return page; + } + *ofs = vi->inode_isize - gotten; + memcpy((u8 *)copied + gotten, page_address(page), *ofs); + die = copied; + } + vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount); + + inode->i_mode = le16_to_cpu(die->i_mode); + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + case S_IFDIR: + case S_IFLNK: + vi->raw_blkaddr = le32_to_cpu(die->i_u.raw_blkaddr); + break; + case S_IFCHR: + case S_IFBLK: + inode->i_rdev = + new_decode_dev(le32_to_cpu(die->i_u.rdev)); + break; + case S_IFIFO: + case S_IFSOCK: + inode->i_rdev = 0; + break; + default: + goto bogusimode; + } + i_uid_write(inode, le32_to_cpu(die->i_uid)); + i_gid_write(inode, le32_to_cpu(die->i_gid)); + set_nlink(inode, le32_to_cpu(die->i_nlink)); + + /* extended inode has its own timestamp */ + inode->i_ctime.tv_sec = le64_to_cpu(die->i_ctime); + inode->i_ctime.tv_nsec = le32_to_cpu(die->i_ctime_nsec); + + inode->i_size = le64_to_cpu(die->i_size); + + /* total blocks for compressed files */ + if (erofs_inode_is_data_compressed(vi->datalayout)) + nblks = le32_to_cpu(die->i_u.compressed_blocks); + + kfree(copied); + break; + case EROFS_INODE_LAYOUT_COMPACT: + vi->inode_isize = sizeof(struct erofs_inode_compact); + *ofs += vi->inode_isize; + vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount); + + inode->i_mode = le16_to_cpu(dic->i_mode); + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + case S_IFDIR: + case S_IFLNK: + vi->raw_blkaddr = le32_to_cpu(dic->i_u.raw_blkaddr); + break; + case S_IFCHR: + case S_IFBLK: + inode->i_rdev = + new_decode_dev(le32_to_cpu(dic->i_u.rdev)); + break; + case S_IFIFO: + case S_IFSOCK: + inode->i_rdev = 0; + break; + default: + goto bogusimode; + } + i_uid_write(inode, le16_to_cpu(dic->i_uid)); + i_gid_write(inode, le16_to_cpu(dic->i_gid)); + set_nlink(inode, le16_to_cpu(dic->i_nlink)); + + /* use build time for compact inodes */ + inode->i_ctime.tv_sec = sbi->build_time; + inode->i_ctime.tv_nsec = sbi->build_time_nsec; + + inode->i_size = le32_to_cpu(dic->i_size); + if (erofs_inode_is_data_compressed(vi->datalayout)) + nblks = le32_to_cpu(dic->i_u.compressed_blocks); + break; + default: + erofs_err(inode->i_sb, + "unsupported on-disk inode version %u of nid %llu", + erofs_inode_version(ifmt), vi->nid); + err = -EOPNOTSUPP; + goto err_out; + } + + inode->i_mtime.tv_sec = inode->i_ctime.tv_sec; + inode->i_atime.tv_sec = inode->i_ctime.tv_sec; + inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec; + inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec; + + if (!nblks) + /* measure inode.i_blocks as generic filesystems */ + inode->i_blocks = roundup(inode->i_size, EROFS_BLKSIZ) >> 9; + else + inode->i_blocks = nblks << LOG_SECTORS_PER_BLOCK; + return page; + +bogusimode: + erofs_err(inode->i_sb, "bogus i_mode (%o) @ nid %llu", + inode->i_mode, vi->nid); + err = -EFSCORRUPTED; +err_out: + DBG_BUGON(1); + kfree(copied); + unlock_page(page); + put_page(page); + return ERR_PTR(err); +} + +static int erofs_fill_symlink(struct inode *inode, void *data, + unsigned int m_pofs) +{ + struct erofs_inode *vi = EROFS_I(inode); + char *lnk; + + /* if it cannot be handled with fast symlink scheme */ + if (vi->datalayout != EROFS_INODE_FLAT_INLINE || + inode->i_size >= PAGE_SIZE) { + inode->i_op = &erofs_symlink_iops; + return 0; + } + + lnk = kmalloc(inode->i_size + 1, GFP_KERNEL); + if (!lnk) + return -ENOMEM; + + m_pofs += vi->xattr_isize; + /* inline symlink data shouldn't cross page boundary as well */ + if (m_pofs + inode->i_size > PAGE_SIZE) { + kfree(lnk); + erofs_err(inode->i_sb, + "inline data cross block boundary @ nid %llu", + vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; + } + + memcpy(lnk, data + m_pofs, inode->i_size); + lnk[inode->i_size] = '\0'; + + inode->i_link = lnk; + inode->i_op = &erofs_fast_symlink_iops; + return 0; +} + +static int erofs_fill_inode(struct inode *inode, int isdir) +{ + struct erofs_inode *vi = EROFS_I(inode); + struct page *page; + unsigned int ofs; + int err = 0; + + trace_erofs_fill_inode(inode, isdir); + + /* read inode base data from disk */ + page = erofs_read_inode(inode, &ofs); + if (IS_ERR(page)) + return PTR_ERR(page); + + /* setup the new inode */ + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + inode->i_op = &erofs_generic_iops; + inode->i_fop = &generic_ro_fops; + break; + case S_IFDIR: + inode->i_op = &erofs_dir_iops; + inode->i_fop = &erofs_dir_fops; + break; + case S_IFLNK: + err = erofs_fill_symlink(inode, page_address(page), ofs); + if (err) + goto out_unlock; + inode_nohighmem(inode); + break; + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: + inode->i_op = &erofs_generic_iops; + init_special_inode(inode, inode->i_mode, inode->i_rdev); + goto out_unlock; + default: + err = -EFSCORRUPTED; + goto out_unlock; + } + + if (erofs_inode_is_data_compressed(vi->datalayout)) { + err = z_erofs_fill_inode(inode); + goto out_unlock; + } + inode->i_mapping->a_ops = &erofs_raw_access_aops; + +out_unlock: + unlock_page(page); + put_page(page); + return err; +} + +/* + * erofs nid is 64bits, but i_ino is 'unsigned long', therefore + * we should do more for 32-bit platform to find the right inode. + */ +static int erofs_ilookup_test_actor(struct inode *inode, void *opaque) +{ + const erofs_nid_t nid = *(erofs_nid_t *)opaque; + + return EROFS_I(inode)->nid == nid; +} + +static int erofs_iget_set_actor(struct inode *inode, void *opaque) +{ + const erofs_nid_t nid = *(erofs_nid_t *)opaque; + + inode->i_ino = erofs_inode_hash(nid); + return 0; +} + +static inline struct inode *erofs_iget_locked(struct super_block *sb, + erofs_nid_t nid) +{ + const unsigned long hashval = erofs_inode_hash(nid); + + return iget5_locked(sb, hashval, erofs_ilookup_test_actor, + erofs_iget_set_actor, &nid); +} + +struct inode *erofs_iget(struct super_block *sb, + erofs_nid_t nid, + bool isdir) +{ + struct inode *inode = erofs_iget_locked(sb, nid); + + if (!inode) + return ERR_PTR(-ENOMEM); + + if (inode->i_state & I_NEW) { + int err; + struct erofs_inode *vi = EROFS_I(inode); + + vi->nid = nid; + + err = erofs_fill_inode(inode, isdir); + if (!err) + unlock_new_inode(inode); + else { + iget_failed(inode); + inode = ERR_PTR(err); + } + } + return inode; +} + +int erofs_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct inode *const inode = d_inode(dentry); + +#if 0 + if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) + stat->attributes |= STATX_ATTR_COMPRESSED; + + stat->attributes |= STATX_ATTR_IMMUTABLE; + stat->attributes_mask |= (STATX_ATTR_COMPRESSED | + STATX_ATTR_IMMUTABLE); +#endif + + generic_fillattr(inode, stat); + return 0; +} + +const struct inode_operations erofs_generic_iops = { + .getattr = erofs_getattr, + .listxattr = erofs_listxattr, + .get_acl = erofs_get_acl, +}; + +const struct inode_operations erofs_symlink_iops = { + .get_link = page_get_link, + .getattr = erofs_getattr, + .listxattr = erofs_listxattr, + .get_acl = erofs_get_acl, +}; + +const struct inode_operations erofs_fast_symlink_iops = { + .get_link = simple_get_link, + .getattr = erofs_getattr, + .listxattr = erofs_listxattr, + .get_acl = erofs_get_acl, +}; diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h new file mode 100644 index 000000000000..c52e77109675 --- /dev/null +++ b/fs/erofs/internal.h @@ -0,0 +1,467 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2017-2018 HUAWEI, Inc. + * https://www.huawei.com/ + */ +#ifndef __EROFS_INTERNAL_H +#define __EROFS_INTERNAL_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "erofs_fs.h" + +/* redefine pr_fmt "erofs: " */ +#undef pr_fmt +#define pr_fmt(fmt) "erofs: " fmt + +__printf(3, 4) void _erofs_err(struct super_block *sb, + const char *function, const char *fmt, ...); +#define erofs_err(sb, fmt, ...) \ + _erofs_err(sb, __func__, fmt "\n", ##__VA_ARGS__) +__printf(3, 4) void _erofs_info(struct super_block *sb, + const char *function, const char *fmt, ...); +#define erofs_info(sb, fmt, ...) \ + _erofs_info(sb, __func__, fmt "\n", ##__VA_ARGS__) +#ifdef CONFIG_EROFS_FS_DEBUG +#define erofs_dbg(x, ...) pr_debug(x "\n", ##__VA_ARGS__) +#define DBG_BUGON BUG_ON +#else +#define erofs_dbg(x, ...) ((void)0) +#define DBG_BUGON(x) ((void)(x)) +#endif /* !CONFIG_EROFS_FS_DEBUG */ + +/* EROFS_SUPER_MAGIC_V1 to represent the whole file system */ +#define EROFS_SUPER_MAGIC EROFS_SUPER_MAGIC_V1 + +typedef u64 erofs_nid_t; +typedef u64 erofs_off_t; +/* data type for filesystem-wide blocks number */ +typedef u32 erofs_blk_t; + +/* all filesystem-wide lz4 configurations */ +struct erofs_sb_lz4_info { + /* # of pages needed for EROFS lz4 rolling decompression */ + u16 max_distance_pages; + /* maximum possible blocks for pclusters in the filesystem */ + u16 max_pclusterblks; +}; + +struct erofs_sb_info { +#ifdef CONFIG_EROFS_FS_ZIP + /* list for all registered superblocks, mainly for shrinker */ + struct list_head list; + struct mutex umount_mutex; + + /* the dedicated workstation for compression */ + struct radix_tree_root workstn_tree; + spinlock_t tree_lock; + + /* threshold for decompression synchronously */ + unsigned int max_sync_decompress_pages; + + unsigned int shrinker_run_no; + u16 available_compr_algs; + + /* current strategy of how to use managed cache */ + unsigned char cache_strategy; + + /* pseudo inode to manage cached pages */ + struct inode *managed_cache; + + struct erofs_sb_lz4_info lz4; +#endif /* CONFIG_EROFS_FS_ZIP */ + u32 blocks; + u32 meta_blkaddr; +#ifdef CONFIG_EROFS_FS_XATTR + u32 xattr_blkaddr; +#endif + + /* inode slot unit size in bit shift */ + unsigned char islotbits; + + u32 sb_size; /* total superblock size */ + u32 build_time_nsec; + u64 build_time; + + /* what we really care is nid, rather than ino.. */ + erofs_nid_t root_nid; + /* used for statfs, f_files - f_favail */ + u64 inos; + + u8 uuid[16]; /* 128-bit uuid for volume */ + u8 volume_name[16]; /* volume name */ + u32 feature_compat; + u32 feature_incompat; + + unsigned int mount_opt; +}; + +#define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info) +#define EROFS_I_SB(inode) ((struct erofs_sb_info *)(inode)->i_sb->s_fs_info) + +/* Mount flags set via mount options or defaults */ +#define EROFS_MOUNT_XATTR_USER 0x00000010 +#define EROFS_MOUNT_POSIX_ACL 0x00000020 + +#define clear_opt(sbi, option) ((sbi)->mount_opt &= ~EROFS_MOUNT_##option) +#define set_opt(sbi, option) ((sbi)->mount_opt |= EROFS_MOUNT_##option) +#define test_opt(sbi, option) ((sbi)->mount_opt & EROFS_MOUNT_##option) + +#ifdef CONFIG_EROFS_FS_ZIP +enum { + EROFS_ZIP_CACHE_DISABLED, + EROFS_ZIP_CACHE_READAHEAD, + EROFS_ZIP_CACHE_READAROUND +}; + +#define EROFS_LOCKED_MAGIC (INT_MIN | 0xE0F510CCL) + +/* basic unit of the workstation of a super_block */ +struct erofs_workgroup { + /* the workgroup index in the workstation */ + pgoff_t index; + + /* overall workgroup reference count */ + atomic_t refcount; +}; + +#if defined(CONFIG_SMP) +static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp, + int val) +{ + preempt_disable(); + if (val != atomic_cmpxchg(&grp->refcount, val, EROFS_LOCKED_MAGIC)) { + preempt_enable(); + return false; + } + return true; +} + +static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp, + int orig_val) +{ + /* + * other observers should notice all modifications + * in the freezing period. + */ + smp_mb(); + atomic_set(&grp->refcount, orig_val); + preempt_enable(); +} + +static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp) +{ + return atomic_cond_read_relaxed(&grp->refcount, + VAL != EROFS_LOCKED_MAGIC); +} +#else +static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp, + int val) +{ + preempt_disable(); + /* no need to spin on UP platforms, let's just disable preemption. */ + if (val != atomic_read(&grp->refcount)) { + preempt_enable(); + return false; + } + return true; +} + +static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp, + int orig_val) +{ + preempt_enable(); +} + +static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp) +{ + int v = atomic_read(&grp->refcount); + + /* workgroup is never freezed on uniprocessor systems */ + DBG_BUGON(v == EROFS_LOCKED_MAGIC); + return v; +} +#endif /* !CONFIG_SMP */ +#endif /* !CONFIG_EROFS_FS_ZIP */ + +/* we strictly follow PAGE_SIZE and no buffer head yet */ +#define LOG_BLOCK_SIZE PAGE_SHIFT + +#undef LOG_SECTORS_PER_BLOCK +#define LOG_SECTORS_PER_BLOCK (PAGE_SHIFT - 9) + +#undef SECTORS_PER_BLOCK +#define SECTORS_PER_BLOCK (1 << SECTORS_PER_BLOCK) + +#define EROFS_BLKSIZ (1 << LOG_BLOCK_SIZE) + +#if (EROFS_BLKSIZ % 4096 || !EROFS_BLKSIZ) +#error erofs cannot be used in this platform +#endif + +#define ROOT_NID(sb) ((sb)->root_nid) + +#define erofs_blknr(addr) ((addr) / EROFS_BLKSIZ) +#define erofs_blkoff(addr) ((addr) % EROFS_BLKSIZ) +#define blknr_to_addr(nr) ((erofs_off_t)(nr) * EROFS_BLKSIZ) + +static inline erofs_off_t iloc(struct erofs_sb_info *sbi, erofs_nid_t nid) +{ + return blknr_to_addr(sbi->meta_blkaddr) + (nid << sbi->islotbits); +} + +#define EROFS_FEATURE_FUNCS(name, compat, feature) \ +static inline bool erofs_sb_has_##name(struct erofs_sb_info *sbi) \ +{ \ + return sbi->feature_##compat & EROFS_FEATURE_##feature; \ +} + +EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_LZ4_0PADDING) +EROFS_FEATURE_FUNCS(compr_cfgs, incompat, INCOMPAT_COMPR_CFGS) +EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER) +EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM) + +/* atomic flag definitions */ +#define EROFS_I_EA_INITED_BIT 0 +#define EROFS_I_Z_INITED_BIT 1 + +/* bitlock definitions (arranged in reverse order) */ +#define EROFS_I_BL_XATTR_BIT (BITS_PER_LONG - 1) +#define EROFS_I_BL_Z_BIT (BITS_PER_LONG - 2) + +struct erofs_inode { + erofs_nid_t nid; + + /* atomic flags (including bitlocks) */ + unsigned long flags; + + unsigned char datalayout; + unsigned char inode_isize; + unsigned short xattr_isize; + + unsigned int xattr_shared_count; + unsigned int *xattr_shared_xattrs; + + union { + erofs_blk_t raw_blkaddr; +#ifdef CONFIG_EROFS_FS_ZIP + struct { + unsigned short z_advise; + unsigned char z_algorithmtype[2]; + unsigned char z_logical_clusterbits; + }; +#endif /* CONFIG_EROFS_FS_ZIP */ + }; + /* the corresponding vfs inode */ + struct inode vfs_inode; +}; + +#define EROFS_I(ptr) \ + container_of(ptr, struct erofs_inode, vfs_inode) + +static inline unsigned long erofs_inode_datablocks(struct inode *inode) +{ + /* since i_size cannot be changed */ + return DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ); +} + +static inline unsigned int erofs_bitrange(unsigned int value, unsigned int bit, + unsigned int bits) +{ + + return (value >> bit) & ((1 << bits) - 1); +} + + +static inline unsigned int erofs_inode_version(unsigned int value) +{ + return erofs_bitrange(value, EROFS_I_VERSION_BIT, + EROFS_I_VERSION_BITS); +} + +static inline unsigned int erofs_inode_datalayout(unsigned int value) +{ + return erofs_bitrange(value, EROFS_I_DATALAYOUT_BIT, + EROFS_I_DATALAYOUT_BITS); +} + +extern const struct super_operations erofs_sops; + +extern const struct address_space_operations erofs_raw_access_aops; +extern const struct address_space_operations z_erofs_aops; + +/* + * Logical to physical block mapping + * + * Different with other file systems, it is used for 2 access modes: + * + * 1) RAW access mode: + * + * Users pass a valid (m_lblk, m_lofs -- usually 0) pair, + * and get the valid m_pblk, m_pofs and the longest m_len(in bytes). + * + * Note that m_lblk in the RAW access mode refers to the number of + * the compressed ondisk block rather than the uncompressed + * in-memory block for the compressed file. + * + * m_pofs equals to m_lofs except for the inline data page. + * + * 2) Normal access mode: + * + * If the inode is not compressed, it has no difference with + * the RAW access mode. However, if the inode is compressed, + * users should pass a valid (m_lblk, m_lofs) pair, and get + * the needed m_pblk, m_pofs, m_len to get the compressed data + * and the updated m_lblk, m_lofs which indicates the start + * of the corresponding uncompressed data in the file. + */ +enum { + BH_Zipped = BH_PrivateStart, + BH_FullMapped, +}; + +/* Has a disk mapping */ +#define EROFS_MAP_MAPPED (1 << BH_Mapped) +/* Located in metadata (could be copied from bd_inode) */ +#define EROFS_MAP_META (1 << BH_Meta) +/* The extent has been compressed */ +#define EROFS_MAP_ZIPPED (1 << BH_Zipped) +/* The length of extent is full */ +#define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped) + +struct erofs_map_blocks { + erofs_off_t m_pa, m_la; + u64 m_plen, m_llen; + + unsigned int m_flags; + + struct page *mpage; +}; + +/* Flags used by erofs_map_blocks_flatmode() */ +#define EROFS_GET_BLOCKS_RAW 0x0001 + +/* zmap.c */ +#ifdef CONFIG_EROFS_FS_ZIP +int z_erofs_fill_inode(struct inode *inode); +int z_erofs_map_blocks_iter(struct inode *inode, + struct erofs_map_blocks *map, + int flags); +#else +static inline int z_erofs_fill_inode(struct inode *inode) { return -EOPNOTSUPP; } +static inline int z_erofs_map_blocks_iter(struct inode *inode, + struct erofs_map_blocks *map, + int flags) +{ + return -EOPNOTSUPP; +} +#endif /* !CONFIG_EROFS_FS_ZIP */ + +/* data.c */ +struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr); + +/* inode.c */ +static inline unsigned long erofs_inode_hash(erofs_nid_t nid) +{ +#if BITS_PER_LONG == 32 + return (nid >> 32) ^ (nid & 0xffffffff); +#else + return nid; +#endif +} + +extern const struct inode_operations erofs_generic_iops; +extern const struct inode_operations erofs_symlink_iops; +extern const struct inode_operations erofs_fast_symlink_iops; + +struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid, bool dir); +int erofs_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat); + +/* namei.c */ +extern const struct inode_operations erofs_dir_iops; + +int erofs_namei(struct inode *dir, struct qstr *name, + erofs_nid_t *nid, unsigned int *d_type); + +/* dir.c */ +extern const struct file_operations erofs_dir_fops; + +static inline void *erofs_vm_map_ram(struct page **pages, unsigned int count) +{ + int retried = 0; + + while (1) { + void *p = vm_map_ram(pages, count, -1, PAGE_KERNEL); + + /* retry two more times (totally 3 times) */ + if (p || ++retried >= 3) + return p; + vm_unmap_aliases(); + } + return NULL; +} + +/* pcpubuf.c */ +void *erofs_get_pcpubuf(unsigned int requiredpages); +void erofs_put_pcpubuf(void *ptr); +int erofs_pcpubuf_growsize(unsigned int nrpages); +void erofs_pcpubuf_init(void); +void erofs_pcpubuf_exit(void); + +/* utils.c / zdata.c */ +struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp); + +#ifdef CONFIG_EROFS_FS_ZIP +int erofs_workgroup_put(struct erofs_workgroup *grp); +struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, + pgoff_t index); +int erofs_register_workgroup(struct super_block *sb, + struct erofs_workgroup *grp); +void erofs_workgroup_free_rcu(struct erofs_workgroup *grp); +void erofs_shrinker_register(struct super_block *sb); +void erofs_shrinker_unregister(struct super_block *sb); +int __init erofs_init_shrinker(void); +void erofs_exit_shrinker(void); +int __init z_erofs_init_zip_subsystem(void); +void z_erofs_exit_zip_subsystem(void); +int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, + struct erofs_workgroup *egrp); +int erofs_try_to_free_cached_page(struct address_space *mapping, + struct page *page); +int z_erofs_load_lz4_config(struct super_block *sb, + struct erofs_super_block *dsb, + struct z_erofs_lz4_cfgs *lz4, int len); +#else +static inline void erofs_shrinker_register(struct super_block *sb) {} +static inline void erofs_shrinker_unregister(struct super_block *sb) {} +static inline int erofs_init_shrinker(void) { return 0; } +static inline void erofs_exit_shrinker(void) {} +static inline int z_erofs_init_zip_subsystem(void) { return 0; } +static inline void z_erofs_exit_zip_subsystem(void) {} +static inline int z_erofs_load_lz4_config(struct super_block *sb, + struct erofs_super_block *dsb, + struct z_erofs_lz4_cfgs *lz4, int len) +{ + if (lz4 || dsb->u1.lz4_max_distance) { + erofs_err(sb, "lz4 algorithm isn't enabled"); + return -EINVAL; + } + return 0; +} +#endif /* !CONFIG_EROFS_FS_ZIP */ + +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ + +#ifndef lru_to_page +#define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) +#endif + +#endif /* __EROFS_INTERNAL_H */ diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c new file mode 100644 index 000000000000..c5c0dd9d9033 --- /dev/null +++ b/fs/erofs/namei.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2017-2018 HUAWEI, Inc. + * https://www.huawei.com/ + */ +#include "xattr.h" + +#include + +struct erofs_qstr { + const unsigned char *name; + const unsigned char *end; +}; + +/* based on the end of qn is accurate and it must have the trailing '\0' */ +static inline int erofs_dirnamecmp(const struct erofs_qstr *qn, + const struct erofs_qstr *qd, + unsigned int *matched) +{ + unsigned int i = *matched; + + /* + * on-disk error, let's only BUG_ON in the debugging mode. + * otherwise, it will return 1 to just skip the invalid name + * and go on (in consideration of the lookup performance). + */ + DBG_BUGON(qd->name > qd->end); + + /* qd could not have trailing '\0' */ + /* However it is absolutely safe if < qd->end */ + while (qd->name + i < qd->end && qd->name[i] != '\0') { + if (qn->name[i] != qd->name[i]) { + *matched = i; + return qn->name[i] > qd->name[i] ? 1 : -1; + } + ++i; + } + *matched = i; + /* See comments in __d_alloc on the terminating NUL character */ + return qn->name[i] == '\0' ? 0 : 1; +} + +#define nameoff_from_disk(off, sz) (le16_to_cpu(off) & ((sz) - 1)) + +static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name, + u8 *data, + unsigned int dirblksize, + const int ndirents) +{ + int head, back; + unsigned int startprfx, endprfx; + struct erofs_dirent *const de = (struct erofs_dirent *)data; + + /* since the 1st dirent has been evaluated previously */ + head = 1; + back = ndirents - 1; + startprfx = endprfx = 0; + + while (head <= back) { + const int mid = head + (back - head) / 2; + const int nameoff = nameoff_from_disk(de[mid].nameoff, + dirblksize); + unsigned int matched = min(startprfx, endprfx); + struct erofs_qstr dname = { + .name = data + nameoff, + .end = mid >= ndirents - 1 ? + data + dirblksize : + data + nameoff_from_disk(de[mid + 1].nameoff, + dirblksize) + }; + + /* string comparison without already matched prefix */ + int ret = erofs_dirnamecmp(name, &dname, &matched); + + if (!ret) { + return de + mid; + } else if (ret > 0) { + head = mid + 1; + startprfx = matched; + } else { + back = mid - 1; + endprfx = matched; + } + } + + return ERR_PTR(-ENOENT); +} + +static struct page *find_target_block_classic(struct inode *dir, + struct erofs_qstr *name, + int *_ndirents) +{ + unsigned int startprfx, endprfx; + int head, back; + struct address_space *const mapping = dir->i_mapping; + struct page *candidate = ERR_PTR(-ENOENT); + + startprfx = endprfx = 0; + head = 0; + back = erofs_inode_datablocks(dir) - 1; + + while (head <= back) { + const int mid = head + (back - head) / 2; + struct page *page = read_mapping_page(mapping, mid, NULL); + + if (!IS_ERR(page)) { + struct erofs_dirent *de = kmap_atomic(page); + const int nameoff = nameoff_from_disk(de->nameoff, + EROFS_BLKSIZ); + const int ndirents = nameoff / sizeof(*de); + int diff; + unsigned int matched; + struct erofs_qstr dname; + + if (!ndirents) { + kunmap_atomic(de); + put_page(page); + erofs_err(dir->i_sb, + "corrupted dir block %d @ nid %llu", + mid, EROFS_I(dir)->nid); + DBG_BUGON(1); + page = ERR_PTR(-EFSCORRUPTED); + goto out; + } + + matched = min(startprfx, endprfx); + + dname.name = (u8 *)de + nameoff; + if (ndirents == 1) + dname.end = (u8 *)de + EROFS_BLKSIZ; + else + dname.end = (u8 *)de + + nameoff_from_disk(de[1].nameoff, + EROFS_BLKSIZ); + + /* string comparison without already matched prefix */ + diff = erofs_dirnamecmp(name, &dname, &matched); + kunmap_atomic(de); + + if (!diff) { + *_ndirents = 0; + goto out; + } else if (diff > 0) { + head = mid + 1; + startprfx = matched; + + if (!IS_ERR(candidate)) + put_page(candidate); + candidate = page; + *_ndirents = ndirents; + } else { + put_page(page); + + back = mid - 1; + endprfx = matched; + } + continue; + } +out: /* free if the candidate is valid */ + if (!IS_ERR(candidate)) + put_page(candidate); + return page; + } + return candidate; +} + +int erofs_namei(struct inode *dir, + struct qstr *name, + erofs_nid_t *nid, unsigned int *d_type) +{ + int ndirents; + struct page *page; + void *data; + struct erofs_dirent *de; + struct erofs_qstr qn; + + if (!dir->i_size) + return -ENOENT; + + qn.name = name->name; + qn.end = name->name + name->len; + + ndirents = 0; + page = find_target_block_classic(dir, &qn, &ndirents); + + if (IS_ERR(page)) + return PTR_ERR(page); + + data = kmap_atomic(page); + /* the target page has been mapped */ + if (ndirents) + de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents); + else + de = (struct erofs_dirent *)data; + + if (!IS_ERR(de)) { + *nid = le64_to_cpu(de->nid); + *d_type = de->file_type; + } + + kunmap_atomic(data); + put_page(page); + + return PTR_ERR_OR_ZERO(de); +} + +/* NOTE: i_mutex is already held by vfs */ +static struct dentry *erofs_lookup(struct inode *dir, + struct dentry *dentry, + unsigned int flags) +{ + int err; + erofs_nid_t nid; + unsigned int d_type; + struct inode *inode; + + DBG_BUGON(!d_really_is_negative(dentry)); + /* dentry must be unhashed in lookup, no need to worry about */ + DBG_BUGON(!d_unhashed(dentry)); + + trace_erofs_lookup(dir, dentry, flags); + + /* file name exceeds fs limit */ + if (dentry->d_name.len > EROFS_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + + /* false uninitialized warnings on gcc 4.8.x */ + err = erofs_namei(dir, &dentry->d_name, &nid, &d_type); + + if (err == -ENOENT) { + /* negative dentry */ + inode = NULL; + } else if (err) { + inode = ERR_PTR(err); + } else { + erofs_dbg("%s, %s (nid %llu) found, d_type %u", __func__, + dentry->d_name.name, nid, d_type); + inode = erofs_iget(dir->i_sb, nid, d_type == EROFS_FT_DIR); + } + return d_splice_alias(inode, dentry); +} + +const struct inode_operations erofs_dir_iops = { + .lookup = erofs_lookup, + .getattr = erofs_getattr, + .listxattr = erofs_listxattr, + .get_acl = erofs_get_acl, +}; diff --git a/fs/erofs/pcpubuf.c b/fs/erofs/pcpubuf.c new file mode 100644 index 000000000000..6c885575128a --- /dev/null +++ b/fs/erofs/pcpubuf.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) Gao Xiang + * + * For low-latency decompression algorithms (e.g. lz4), reserve consecutive + * per-CPU virtual memory (in pages) in advance to store such inplace I/O + * data if inplace decompression is failed (due to unmet inplace margin for + * example). + */ +#include "internal.h" + +struct erofs_pcpubuf { + raw_spinlock_t lock; + void *ptr; + struct page **pages; + unsigned int nrpages; +}; + +static DEFINE_PER_CPU(struct erofs_pcpubuf, erofs_pcb); + +void *erofs_get_pcpubuf(unsigned int requiredpages) + __acquires(pcb->lock) +{ + struct erofs_pcpubuf *pcb = &get_cpu_var(erofs_pcb); + + raw_spin_lock(&pcb->lock); + /* check if the per-CPU buffer is too small */ + if (requiredpages > pcb->nrpages) { + raw_spin_unlock(&pcb->lock); + put_cpu_var(erofs_pcb); + /* (for sparse checker) pretend pcb->lock is still taken */ + __acquire(pcb->lock); + return NULL; + } + return pcb->ptr; +} + +void erofs_put_pcpubuf(void *ptr) __releases(pcb->lock) +{ + struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, smp_processor_id()); + + DBG_BUGON(pcb->ptr != ptr); + raw_spin_unlock(&pcb->lock); + put_cpu_var(erofs_pcb); +} + +/* the next step: support per-CPU page buffers hotplug */ +int erofs_pcpubuf_growsize(unsigned int nrpages) +{ + static DEFINE_MUTEX(pcb_resize_mutex); + static unsigned int pcb_nrpages; + LIST_HEAD(pagepool); + int delta, cpu, ret, i; + + mutex_lock(&pcb_resize_mutex); + delta = nrpages - pcb_nrpages; + ret = 0; + /* avoid shrinking pcpubuf, since no idea how many fses rely on */ + if (delta <= 0) + goto out; + + for_each_possible_cpu(cpu) { + struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu); + struct page **pages, **oldpages; + void *ptr, *old_ptr; + + pages = kmalloc_array(nrpages, sizeof(*pages), GFP_KERNEL); + if (!pages) { + ret = -ENOMEM; + break; + } + + for (i = 0; i < nrpages; ++i) { + pages[i] = erofs_allocpage(&pagepool, GFP_KERNEL); + if (!pages[i]) { + ret = -ENOMEM; + oldpages = pages; + goto free_pagearray; + } + } + ptr = vmap(pages, nrpages, VM_MAP, PAGE_KERNEL); + if (!ptr) { + ret = -ENOMEM; + oldpages = pages; + goto free_pagearray; + } + raw_spin_lock(&pcb->lock); + old_ptr = pcb->ptr; + pcb->ptr = ptr; + oldpages = pcb->pages; + pcb->pages = pages; + i = pcb->nrpages; + pcb->nrpages = nrpages; + raw_spin_unlock(&pcb->lock); + + if (!oldpages) { + DBG_BUGON(old_ptr); + continue; + } + + if (old_ptr) + vunmap(old_ptr); +free_pagearray: + while (i) + list_add(&oldpages[--i]->lru, &pagepool); + kfree(oldpages); + if (ret) + break; + } + pcb_nrpages = nrpages; + put_pages_list(&pagepool); +out: + mutex_unlock(&pcb_resize_mutex); + return ret; +} + +void erofs_pcpubuf_init(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu); + + raw_spin_lock_init(&pcb->lock); + } +} + +void erofs_pcpubuf_exit(void) +{ + int cpu, i; + + for_each_possible_cpu(cpu) { + struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu); + + if (pcb->ptr) { + vunmap(pcb->ptr); + pcb->ptr = NULL; + } + if (!pcb->pages) + continue; + + for (i = 0; i < pcb->nrpages; ++i) + if (pcb->pages[i]) + put_page(pcb->pages[i]); + kfree(pcb->pages); + pcb->pages = NULL; + } +} diff --git a/fs/erofs/super.c b/fs/erofs/super.c new file mode 100644 index 000000000000..ab21efb5e4ce --- /dev/null +++ b/fs/erofs/super.c @@ -0,0 +1,791 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2017-2018 HUAWEI, Inc. + * https://www.huawei.com/ + */ +#include +#include +#include +#include +#include +#include +#include "xattr.h" + +#define CREATE_TRACE_POINTS +#include + +static struct kmem_cache *erofs_inode_cachep __read_mostly; + +void _erofs_err(struct super_block *sb, const char *function, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + pr_err("(device %s): %s: %pV", sb->s_id, function, &vaf); + va_end(args); +} + +void _erofs_info(struct super_block *sb, const char *function, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + pr_info("(device %s): %pV", sb->s_id, &vaf); + va_end(args); +} + +static int erofs_superblock_csum_verify(struct super_block *sb, void *sbdata) +{ + struct erofs_super_block *dsb; + u32 expected_crc, crc; + + dsb = kmemdup(sbdata + EROFS_SUPER_OFFSET, + EROFS_BLKSIZ - EROFS_SUPER_OFFSET, GFP_KERNEL); + if (!dsb) + return -ENOMEM; + + expected_crc = le32_to_cpu(dsb->checksum); + dsb->checksum = 0; + /* to allow for x86 boot sectors and other oddities. */ + crc = crc32c(~0, dsb, EROFS_BLKSIZ - EROFS_SUPER_OFFSET); + kfree(dsb); + + if (crc != expected_crc) { + erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected", + crc, expected_crc); + return -EBADMSG; + } + return 0; +} + +static void erofs_inode_init_once(void *ptr) +{ + struct erofs_inode *vi = ptr; + + inode_init_once(&vi->vfs_inode); +} + +static struct inode *erofs_alloc_inode(struct super_block *sb) +{ + struct erofs_inode *vi = + kmem_cache_alloc(erofs_inode_cachep, GFP_KERNEL); + + if (!vi) + return NULL; + + /* zero out everything except vfs_inode */ + memset(vi, 0, offsetof(struct erofs_inode, vfs_inode)); + return &vi->vfs_inode; +} + +static void i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct erofs_inode *vi = EROFS_I(inode); + + /* be careful of RCU symlink path */ + if (inode->i_op == &erofs_fast_symlink_iops) + kfree(inode->i_link); + kfree(vi->xattr_shared_xattrs); + + kmem_cache_free(erofs_inode_cachep, vi); +} + +static void erofs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, i_callback); +} + +static bool check_layout_compatibility(struct super_block *sb, + struct erofs_super_block *dsb) +{ + const unsigned int feature = le32_to_cpu(dsb->feature_incompat); + + EROFS_SB(sb)->feature_incompat = feature; + + /* check if current kernel meets all mandatory requirements */ + if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) { + erofs_err(sb, + "unidentified incompatible feature %x, please upgrade kernel version", + feature & ~EROFS_ALL_FEATURE_INCOMPAT); + return false; + } + return true; +} + +#ifdef CONFIG_EROFS_FS_ZIP +/* read variable-sized metadata, offset will be aligned by 4-byte */ +static void *erofs_read_metadata(struct super_block *sb, struct page **pagep, + erofs_off_t *offset, int *lengthp) +{ + struct page *page = *pagep; + u8 *buffer, *ptr; + int len, i, cnt; + erofs_blk_t blk; + + *offset = round_up(*offset, 4); + blk = erofs_blknr(*offset); + + if (!page || page->index != blk) { + if (page) { + unlock_page(page); + put_page(page); + } + page = erofs_get_meta_page(sb, blk); + if (IS_ERR(page)) + goto err_nullpage; + } + + ptr = kmap(page); + len = le16_to_cpu(*(__le16 *)&ptr[erofs_blkoff(*offset)]); + if (!len) + len = U16_MAX + 1; + buffer = kmalloc(len, GFP_KERNEL); + if (!buffer) { + buffer = ERR_PTR(-ENOMEM); + goto out; + } + *offset += sizeof(__le16); + *lengthp = len; + + for (i = 0; i < len; i += cnt) { + cnt = min(EROFS_BLKSIZ - (int)erofs_blkoff(*offset), len - i); + blk = erofs_blknr(*offset); + + if (!page || page->index != blk) { + if (page) { + kunmap(page); + unlock_page(page); + put_page(page); + } + page = erofs_get_meta_page(sb, blk); + if (IS_ERR(page)) { + kfree(buffer); + goto err_nullpage; + } + ptr = kmap(page); + } + memcpy(buffer + i, ptr + erofs_blkoff(*offset), cnt); + *offset += cnt; + } +out: + kunmap(page); + *pagep = page; + return buffer; +err_nullpage: + *pagep = NULL; + return page; +} + +static int erofs_load_compr_cfgs(struct super_block *sb, + struct erofs_super_block *dsb) +{ + struct erofs_sb_info *sbi; + struct page *page; + unsigned int algs, alg; + erofs_off_t offset; + int size, ret; + + sbi = EROFS_SB(sb); + sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs); + + if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) { + erofs_err(sb, "try to load compressed fs with unsupported algorithms %x", + sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS); + return -EINVAL; + } + + offset = EROFS_SUPER_OFFSET + sbi->sb_size; + page = NULL; + alg = 0; + ret = 0; + + for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) { + void *data; + + if (!(algs & 1)) + continue; + + data = erofs_read_metadata(sb, &page, &offset, &size); + if (IS_ERR(data)) { + ret = PTR_ERR(data); + goto err; + } + + switch (alg) { + case Z_EROFS_COMPRESSION_LZ4: + ret = z_erofs_load_lz4_config(sb, dsb, data, size); + break; + default: + DBG_BUGON(1); + ret = -EFAULT; + } + kfree(data); + if (ret) + goto err; + } +err: + if (page) { + unlock_page(page); + put_page(page); + } + return ret; +} +#else +static int erofs_load_compr_cfgs(struct super_block *sb, + struct erofs_super_block *dsb) +{ + if (dsb->u1.available_compr_algs) { + erofs_err(sb, "try to load compressed fs when compression is disabled"); + return -EINVAL; + } + return 0; +} +#endif + +static int erofs_read_superblock(struct super_block *sb) +{ + struct erofs_sb_info *sbi; + struct page *page; + struct erofs_super_block *dsb; + unsigned int blkszbits; + void *data; + int ret; + + page = read_mapping_page(sb->s_bdev->bd_inode->i_mapping, 0, NULL); + if (IS_ERR(page)) { + erofs_err(sb, "cannot read erofs superblock"); + return PTR_ERR(page); + } + + sbi = EROFS_SB(sb); + + data = kmap(page); + dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET); + + ret = -EINVAL; + if (le32_to_cpu(dsb->magic) != EROFS_SUPER_MAGIC_V1) { + erofs_err(sb, "cannot find valid erofs superblock"); + goto out; + } + + sbi->feature_compat = le32_to_cpu(dsb->feature_compat); + if (erofs_sb_has_sb_chksum(sbi)) { + ret = erofs_superblock_csum_verify(sb, data); + if (ret) + goto out; + } + + ret = -EINVAL; + blkszbits = dsb->blkszbits; + /* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */ + if (blkszbits != LOG_BLOCK_SIZE) { + erofs_err(sb, "blkszbits %u isn't supported on this platform", + blkszbits); + goto out; + } + + if (!check_layout_compatibility(sb, dsb)) + goto out; + + sbi->sb_size = 128 + dsb->sb_extslots * EROFS_SB_EXTSLOT_SIZE; + if (sbi->sb_size > EROFS_BLKSIZ) { + erofs_err(sb, "invalid sb_extslots %u (more than a fs block)", + sbi->sb_size); + goto out; + } + sbi->blocks = le32_to_cpu(dsb->blocks); + sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr); +#ifdef CONFIG_EROFS_FS_XATTR + sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr); +#endif + sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact)); + sbi->root_nid = le16_to_cpu(dsb->root_nid); + sbi->inos = le64_to_cpu(dsb->inos); + + sbi->build_time = le64_to_cpu(dsb->build_time); + sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec); + + memcpy(&sb->s_uuid, dsb->uuid, sizeof(dsb->uuid)); + + ret = strscpy(sbi->volume_name, dsb->volume_name, + sizeof(dsb->volume_name)); + if (ret < 0) { /* -E2BIG */ + erofs_err(sb, "bad volume name without NIL terminator"); + ret = -EFSCORRUPTED; + goto out; + } + + /* parse on-disk compression configurations */ + if (erofs_sb_has_compr_cfgs(sbi)) + ret = erofs_load_compr_cfgs(sb, dsb); + else + ret = z_erofs_load_lz4_config(sb, dsb, NULL, 0); +out: + kunmap(page); + put_page(page); + return ret; +} + +#ifdef CONFIG_EROFS_FS_ZIP +static int erofs_build_cache_strategy(struct super_block *sb, + substring_t *args) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + const char *cs = match_strdup(args); + int err = 0; + + if (!cs) { + erofs_err(sb, "Not enough memory to store cache strategy"); + return -ENOMEM; + } + + if (!strcmp(cs, "disabled")) { + sbi->cache_strategy = EROFS_ZIP_CACHE_DISABLED; + } else if (!strcmp(cs, "readahead")) { + sbi->cache_strategy = EROFS_ZIP_CACHE_READAHEAD; + } else if (!strcmp(cs, "readaround")) { + sbi->cache_strategy = EROFS_ZIP_CACHE_READAROUND; + } else { + erofs_err(sb, "Unrecognized cache strategy \"%s\"", cs); + err = -EINVAL; + } + kfree(cs); + return err; +} +#else +static int erofs_build_cache_strategy(struct super_block *sb, + substring_t *args) +{ + erofs_info(sb, "EROFS compression is disabled, so cache strategy is ignored"); + return 0; +} +#endif + +/* set up default EROFS parameters */ +static void erofs_default_options(struct erofs_sb_info *sbi) +{ +#ifdef CONFIG_EROFS_FS_ZIP + sbi->cache_strategy = EROFS_ZIP_CACHE_READAROUND; + sbi->max_sync_decompress_pages = 3; +#endif +#ifdef CONFIG_EROFS_FS_XATTR + set_opt(sbi, XATTR_USER); +#endif +#ifdef CONFIG_EROFS_FS_POSIX_ACL + set_opt(sbi, POSIX_ACL); +#endif +} + +enum { + Opt_user_xattr, + Opt_nouser_xattr, + Opt_acl, + Opt_noacl, + Opt_cache_strategy, + Opt_err +}; + +static match_table_t erofs_tokens = { + {Opt_user_xattr, "user_xattr"}, + {Opt_nouser_xattr, "nouser_xattr"}, + {Opt_acl, "acl"}, + {Opt_noacl, "noacl"}, + {Opt_cache_strategy, "cache_strategy=%s"}, + {Opt_err, NULL} +}; + +static int erofs_parse_options(struct super_block *sb, char *options) +{ + substring_t args[MAX_OPT_ARGS]; + char *p; + int err; + + if (!options) + return 0; + + while ((p = strsep(&options, ","))) { + int token; + + if (!*p) + continue; + + args[0].to = args[0].from = NULL; + token = match_token(p, erofs_tokens, args); + + switch (token) { +#ifdef CONFIG_EROFS_FS_XATTR + case Opt_user_xattr: + set_opt(EROFS_SB(sb), XATTR_USER); + break; + case Opt_nouser_xattr: + clear_opt(EROFS_SB(sb), XATTR_USER); + break; +#else + case Opt_user_xattr: + erofs_info(sb, "user_xattr options not supported"); + break; + case Opt_nouser_xattr: + erofs_info(sb, "nouser_xattr options not supported"); + break; +#endif +#ifdef CONFIG_EROFS_FS_POSIX_ACL + case Opt_acl: + set_opt(EROFS_SB(sb), POSIX_ACL); + break; + case Opt_noacl: + clear_opt(EROFS_SB(sb), POSIX_ACL); + break; +#else + case Opt_acl: + erofs_info(sb, "acl options not supported"); + break; + case Opt_noacl: + erofs_info(sb, "noacl options not supported"); + break; +#endif + case Opt_cache_strategy: + err = erofs_build_cache_strategy(sb, args); + if (err) + return err; + break; + default: + erofs_err(sb, "Unrecognized mount option \"%s\" or missing value", p); + return -EINVAL; + } + } + return 0; +} + +#ifdef CONFIG_EROFS_FS_ZIP +static const struct address_space_operations managed_cache_aops; + +static int erofs_managed_cache_releasepage(struct page *page, gfp_t gfp_mask) +{ + int ret = 1; /* 0 - busy */ + struct address_space *const mapping = page->mapping; + + DBG_BUGON(!PageLocked(page)); + DBG_BUGON(mapping->a_ops != &managed_cache_aops); + + if (PagePrivate(page)) + ret = erofs_try_to_free_cached_page(mapping, page); + + return ret; +} + +static void erofs_managed_cache_invalidatepage(struct page *page, + unsigned int offset, + unsigned int length) +{ + const unsigned int stop = length + offset; + + DBG_BUGON(!PageLocked(page)); + + /* Check for potential overflow in debug mode */ + DBG_BUGON(stop > PAGE_SIZE || stop < length); + + if (offset == 0 && stop == PAGE_SIZE) + while (!erofs_managed_cache_releasepage(page, GFP_NOFS)) + cond_resched(); +} + +static const struct address_space_operations managed_cache_aops = { + .releasepage = erofs_managed_cache_releasepage, + .invalidatepage = erofs_managed_cache_invalidatepage, +}; + +static int erofs_init_managed_cache(struct super_block *sb) +{ + struct erofs_sb_info *const sbi = EROFS_SB(sb); + struct inode *const inode = new_inode(sb); + + if (!inode) + return -ENOMEM; + + set_nlink(inode, 1); + inode->i_size = OFFSET_MAX; + + inode->i_mapping->a_ops = &managed_cache_aops; + mapping_set_gfp_mask(inode->i_mapping, + GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE); + sbi->managed_cache = inode; + return 0; +} +#else +static int erofs_init_managed_cache(struct super_block *sb) { return 0; } +#endif + +static int erofs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct inode *inode; + struct erofs_sb_info *sbi; + int err; + + sb->s_magic = EROFS_SUPER_MAGIC; + + if (!sb_set_blocksize(sb, EROFS_BLKSIZ)) { + erofs_err(sb, "failed to set erofs blksize"); + return -EINVAL; + } + + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + + sb->s_fs_info = sbi; + err = erofs_read_superblock(sb); + if (err) + return err; + + sb->s_flags |= MS_RDONLY | MS_NOATIME; + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_time_gran = 1; + + sb->s_op = &erofs_sops; + sb->s_xattr = erofs_xattr_handlers; + + /* set erofs default mount options */ + erofs_default_options(sbi); + + err = erofs_parse_options(sb, data); + if (err) + return err; + + if (test_opt(sbi, POSIX_ACL)) + sb->s_flags |= MS_POSIXACL; + else + sb->s_flags &= ~MS_POSIXACL; + +#ifdef CONFIG_EROFS_FS_ZIP + INIT_RADIX_TREE(&sbi->workstn_tree, GFP_ATOMIC); + spin_lock_init(&sbi->tree_lock); +#endif + + /* get the root inode */ + inode = erofs_iget(sb, ROOT_NID(sbi), true); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + if (!S_ISDIR(inode->i_mode)) { + erofs_err(sb, "rootino(nid %llu) is not a directory(i_mode %o)", + ROOT_NID(sbi), inode->i_mode); + iput(inode); + return -EINVAL; + } + + sb->s_root = d_make_root(inode); + if (!sb->s_root) + return -ENOMEM; + + erofs_shrinker_register(sb); + /* sb->s_umount is already locked, SB_ACTIVE and SB_BORN are not set */ + err = erofs_init_managed_cache(sb); + if (err) + return err; + + erofs_info(sb, "mounted with opts: %s, root inode @ nid %llu.", + (char *)data, ROOT_NID(sbi)); + return 0; +} + +static struct dentry *erofs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) +{ + return mount_bdev(fs_type, flags, dev_name, data, erofs_fill_super); +} + +/* + * could be triggered after deactivate_locked_super() + * is called, thus including umount and failed to initialize. + */ +static void erofs_kill_sb(struct super_block *sb) +{ + struct erofs_sb_info *sbi; + + WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC); + + kill_block_super(sb); + + sbi = EROFS_SB(sb); + if (!sbi) + return; + kfree(sbi); + sb->s_fs_info = NULL; +} + +/* called when ->s_root is non-NULL */ +static void erofs_put_super(struct super_block *sb) +{ + struct erofs_sb_info *const sbi = EROFS_SB(sb); + + DBG_BUGON(!sbi); + + erofs_shrinker_unregister(sb); +#ifdef CONFIG_EROFS_FS_ZIP + iput(sbi->managed_cache); + sbi->managed_cache = NULL; +#endif +} + +static struct file_system_type erofs_fs_type = { + .owner = THIS_MODULE, + .name = "erofs", + .mount = erofs_mount, + .kill_sb = erofs_kill_sb, + .fs_flags = FS_REQUIRES_DEV, +}; +MODULE_ALIAS_FS("erofs"); + +static int __init erofs_module_init(void) +{ + int err; + + erofs_check_ondisk_layout_definitions(); + + erofs_inode_cachep = kmem_cache_create("erofs_inode", + sizeof(struct erofs_inode), 0, + SLAB_RECLAIM_ACCOUNT, + erofs_inode_init_once); + if (!erofs_inode_cachep) { + err = -ENOMEM; + goto icache_err; + } + + err = erofs_init_shrinker(); + if (err) + goto shrinker_err; + + erofs_pcpubuf_init(); + err = z_erofs_init_zip_subsystem(); + if (err) + goto zip_err; + + err = register_filesystem(&erofs_fs_type); + if (err) + goto fs_err; + + return 0; + +fs_err: + z_erofs_exit_zip_subsystem(); +zip_err: + erofs_exit_shrinker(); +shrinker_err: + kmem_cache_destroy(erofs_inode_cachep); +icache_err: + return err; +} + +static void __exit erofs_module_exit(void) +{ + unregister_filesystem(&erofs_fs_type); + z_erofs_exit_zip_subsystem(); + erofs_exit_shrinker(); + + /* Ensure all RCU free inodes are safe before cache is destroyed. */ + rcu_barrier(); + kmem_cache_destroy(erofs_inode_cachep); + erofs_pcpubuf_exit(); +} + +/* get filesystem statistics */ +static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct super_block *sb = dentry->d_sb; + struct erofs_sb_info *sbi = EROFS_SB(sb); + u64 id = huge_encode_dev(sb->s_bdev->bd_dev); + + buf->f_type = sb->s_magic; + buf->f_bsize = EROFS_BLKSIZ; + buf->f_blocks = sbi->blocks; + buf->f_bfree = buf->f_bavail = 0; + + buf->f_files = ULLONG_MAX; + buf->f_ffree = ULLONG_MAX - sbi->inos; + + buf->f_namelen = EROFS_NAME_LEN; + + buf->f_fsid.val[0] = (u32)id; + buf->f_fsid.val[1] = (u32)(id >> 32); + return 0; +} + +static int erofs_show_options(struct seq_file *seq, struct dentry *root) +{ + struct erofs_sb_info *sbi __maybe_unused = EROFS_SB(root->d_sb); + +#ifdef CONFIG_EROFS_FS_XATTR + if (test_opt(sbi, XATTR_USER)) + seq_puts(seq, ",user_xattr"); + else + seq_puts(seq, ",nouser_xattr"); +#endif +#ifdef CONFIG_EROFS_FS_POSIX_ACL + if (test_opt(sbi, POSIX_ACL)) + seq_puts(seq, ",acl"); + else + seq_puts(seq, ",noacl"); +#endif +#ifdef CONFIG_EROFS_FS_ZIP + if (sbi->cache_strategy == EROFS_ZIP_CACHE_DISABLED) { + seq_puts(seq, ",cache_strategy=disabled"); + } else if (sbi->cache_strategy == EROFS_ZIP_CACHE_READAHEAD) { + seq_puts(seq, ",cache_strategy=readahead"); + } else if (sbi->cache_strategy == EROFS_ZIP_CACHE_READAROUND) { + seq_puts(seq, ",cache_strategy=readaround"); + } +#endif + return 0; +} + +static int erofs_remount(struct super_block *sb, int *flags, char *data) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + unsigned int org_mnt_opt = sbi->mount_opt; + int err; + + DBG_BUGON(!(sb->s_flags & MS_RDONLY)); + err = erofs_parse_options(sb, data); + if (err) + goto out; + + if (test_opt(sbi, POSIX_ACL)) + sb->s_flags |= MS_POSIXACL; + else + sb->s_flags &= ~MS_POSIXACL; + + *flags |= MS_RDONLY; + return 0; +out: + sbi->mount_opt = org_mnt_opt; + return err; +} + +const struct super_operations erofs_sops = { + .put_super = erofs_put_super, + .alloc_inode = erofs_alloc_inode, + .destroy_inode = erofs_destroy_inode, + .statfs = erofs_statfs, + .show_options = erofs_show_options, + .remount_fs = erofs_remount, +}; + +module_init(erofs_module_init); +module_exit(erofs_module_exit); + +MODULE_DESCRIPTION("Enhanced ROM File System"); +MODULE_AUTHOR("Gao Xiang, Chao Yu, Miao Xie, CONSUMER BG, HUAWEI Inc."); +MODULE_LICENSE("GPL"); diff --git a/fs/erofs/tagptr.h b/fs/erofs/tagptr.h new file mode 100644 index 000000000000..64ceb7270b5c --- /dev/null +++ b/fs/erofs/tagptr.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * A tagged pointer implementation + */ +#ifndef __EROFS_FS_TAGPTR_H +#define __EROFS_FS_TAGPTR_H + +#include +#include + +/* + * the name of tagged pointer types are tagptr{1, 2, 3...}_t + * avoid directly using the internal structs __tagptr{1, 2, 3...} + */ +#define __MAKE_TAGPTR(n) \ +typedef struct __tagptr##n { \ + uintptr_t v; \ +} tagptr##n##_t; + +__MAKE_TAGPTR(1) +__MAKE_TAGPTR(2) +__MAKE_TAGPTR(3) +__MAKE_TAGPTR(4) + +#undef __MAKE_TAGPTR + +extern void __compiletime_error("bad tagptr tags") + __bad_tagptr_tags(void); + +extern void __compiletime_error("bad tagptr type") + __bad_tagptr_type(void); + +/* fix the broken usage of "#define tagptr2_t tagptr3_t" by users */ +#define __tagptr_mask_1(ptr, n) \ + __builtin_types_compatible_p(typeof(ptr), struct __tagptr##n) ? \ + (1UL << (n)) - 1 : + +#define __tagptr_mask(ptr) (\ + __tagptr_mask_1(ptr, 1) ( \ + __tagptr_mask_1(ptr, 2) ( \ + __tagptr_mask_1(ptr, 3) ( \ + __tagptr_mask_1(ptr, 4) ( \ + __bad_tagptr_type(), 0))))) + +/* generate a tagged pointer from a raw value */ +#define tagptr_init(type, val) \ + ((typeof(type)){ .v = (uintptr_t)(val) }) + +/* + * directly cast a tagged pointer to the native pointer type, which + * could be used for backward compatibility of existing code. + */ +#define tagptr_cast_ptr(tptr) ((void *)(tptr).v) + +/* encode tagged pointers */ +#define tagptr_fold(type, ptr, _tags) ({ \ + const typeof(_tags) tags = (_tags); \ + if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(type))) \ + __bad_tagptr_tags(); \ +tagptr_init(type, (uintptr_t)(ptr) | tags); }) + +/* decode tagged pointers */ +#define tagptr_unfold_ptr(tptr) \ + ((void *)((tptr).v & ~__tagptr_mask(tptr))) + +#define tagptr_unfold_tags(tptr) \ + ((tptr).v & __tagptr_mask(tptr)) + +/* operations for the tagger pointer */ +#define tagptr_eq(_tptr1, _tptr2) ({ \ + typeof(_tptr1) tptr1 = (_tptr1); \ + typeof(_tptr2) tptr2 = (_tptr2); \ + (void)(&tptr1 == &tptr2); \ +(tptr1).v == (tptr2).v; }) + +/* lock-free CAS operation */ +#define tagptr_cmpxchg(_ptptr, _o, _n) ({ \ + typeof(_ptptr) ptptr = (_ptptr); \ + typeof(_o) o = (_o); \ + typeof(_n) n = (_n); \ + (void)(&o == &n); \ + (void)(&o == ptptr); \ +tagptr_init(o, cmpxchg(&ptptr->v, o.v, n.v)); }) + +/* wrap WRITE_ONCE if atomic update is needed */ +#define tagptr_replace_tags(_ptptr, tags) ({ \ + typeof(_ptptr) ptptr = (_ptptr); \ + *ptptr = tagptr_fold(*ptptr, tagptr_unfold_ptr(*ptptr), tags); \ +*ptptr; }) + +#define tagptr_set_tags(_ptptr, _tags) ({ \ + typeof(_ptptr) ptptr = (_ptptr); \ + const typeof(_tags) tags = (_tags); \ + if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(*ptptr))) \ + __bad_tagptr_tags(); \ + ptptr->v |= tags; \ +*ptptr; }) + +#define tagptr_clear_tags(_ptptr, _tags) ({ \ + typeof(_ptptr) ptptr = (_ptptr); \ + const typeof(_tags) tags = (_tags); \ + if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(*ptptr))) \ + __bad_tagptr_tags(); \ + ptptr->v &= ~tags; \ +*ptptr; }) + +#endif /* __EROFS_FS_TAGPTR_H */ diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c new file mode 100644 index 000000000000..6c6e6d3df619 --- /dev/null +++ b/fs/erofs/utils.c @@ -0,0 +1,303 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2018 HUAWEI, Inc. + * https://www.huawei.com/ + */ +#include "internal.h" +#include + +struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) +{ + struct page *page; + + if (!list_empty(pool)) { + page = lru_to_page(pool); + DBG_BUGON(page_ref_count(page) != 1); + list_del(&page->lru); + } else { + page = alloc_page(gfp); + } + return page; +} + +#ifdef CONFIG_EROFS_FS_ZIP +/* global shrink count (for all mounted EROFS instances) */ +static atomic_long_t erofs_global_shrink_cnt; + +#define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount) +#define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount) + +static int erofs_workgroup_get(struct erofs_workgroup *grp) +{ + int o; + +repeat: + o = erofs_wait_on_workgroup_freezed(grp); + if (o <= 0) + return -1; + + if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o) + goto repeat; + + /* decrease refcount paired by erofs_workgroup_put */ + if (o == 1) + atomic_long_dec(&erofs_global_shrink_cnt); + return 0; +} + +struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, + pgoff_t index) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + struct erofs_workgroup *grp; + +repeat: + rcu_read_lock(); + grp = radix_tree_lookup(&sbi->workstn_tree, index); + if (grp) { + if (erofs_workgroup_get(grp)) { + /* prefer to relax rcu read side */ + rcu_read_unlock(); + goto repeat; + } + + DBG_BUGON(index != grp->index); + } + rcu_read_unlock(); + return grp; +} + +int erofs_register_workgroup(struct super_block *sb, + struct erofs_workgroup *grp) +{ + struct erofs_sb_info *sbi; + int err; + + /* grp shouldn't be broken or used before */ + if (atomic_read(&grp->refcount) != 1) { + DBG_BUGON(1); + return -EINVAL; + } + + err = radix_tree_preload(GFP_NOFS); + if (err) + return err; + + sbi = EROFS_SB(sb); + spin_lock(&sbi->tree_lock); + + /* + * Bump up reference count before making this workgroup + * visible to other users in order to avoid potential UAF + * without serialized by workstn_lock. + */ + __erofs_workgroup_get(grp); + + err = radix_tree_insert(&sbi->workstn_tree, grp->index, grp); + if (err) + /* + * it's safe to decrease since the workgroup isn't visible + * and refcount >= 2 (cannot be freezed). + */ + __erofs_workgroup_put(grp); + + spin_unlock(&sbi->tree_lock); + radix_tree_preload_end(); + return err; +} + +static void __erofs_workgroup_free(struct erofs_workgroup *grp) +{ + atomic_long_dec(&erofs_global_shrink_cnt); + erofs_workgroup_free_rcu(grp); +} + +int erofs_workgroup_put(struct erofs_workgroup *grp) +{ + int count = atomic_dec_return(&grp->refcount); + + if (count == 1) + atomic_long_inc(&erofs_global_shrink_cnt); + else if (!count) + __erofs_workgroup_free(grp); + return count; +} + +static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, + struct erofs_workgroup *grp) +{ + /* + * If managed cache is on, refcount of workgroups + * themselves could be < 0 (freezed). In other words, + * there is no guarantee that all refcounts > 0. + */ + if (!erofs_workgroup_try_to_freeze(grp, 1)) + return false; + + /* + * Note that all cached pages should be unattached + * before deleted from the radix tree. Otherwise some + * cached pages could be still attached to the orphan + * old workgroup when the new one is available in the tree. + */ + if (erofs_try_to_free_all_cached_pages(sbi, grp)) { + erofs_workgroup_unfreeze(grp, 1); + return false; + } + + /* + * It's impossible to fail after the workgroup is freezed, + * however in order to avoid some race conditions, add a + * DBG_BUGON to observe this in advance. + */ + DBG_BUGON(radix_tree_delete(&sbi->workstn_tree, grp->index) != grp); + + /* last refcount should be connected with its managed pslot. */ + erofs_workgroup_unfreeze(grp, 0); + __erofs_workgroup_free(grp); + return true; +} + +static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, + unsigned long nr_shrink) +{ + pgoff_t first_index = 0; + void *batch[PAGEVEC_SIZE]; + unsigned int freed = 0; + + int i, found; +repeat: + spin_lock(&sbi->tree_lock); + + found = radix_tree_gang_lookup(&sbi->workstn_tree, + batch, first_index, PAGEVEC_SIZE); + + for (i = 0; i < found; ++i) { + struct erofs_workgroup *grp = batch[i]; + + first_index = grp->index + 1; + + /* try to shrink each valid workgroup */ + if (!erofs_try_to_release_workgroup(sbi, grp)) + continue; + + ++freed; + if (!--nr_shrink) + break; + } + spin_unlock(&sbi->tree_lock); + + if (i && nr_shrink) + goto repeat; + return freed; +} + +/* protected by 'erofs_sb_list_lock' */ +static unsigned int shrinker_run_no; + +/* protects the mounted 'erofs_sb_list' */ +static DEFINE_SPINLOCK(erofs_sb_list_lock); +static LIST_HEAD(erofs_sb_list); + +void erofs_shrinker_register(struct super_block *sb) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + + mutex_init(&sbi->umount_mutex); + + spin_lock(&erofs_sb_list_lock); + list_add(&sbi->list, &erofs_sb_list); + spin_unlock(&erofs_sb_list_lock); +} + +void erofs_shrinker_unregister(struct super_block *sb) +{ + struct erofs_sb_info *const sbi = EROFS_SB(sb); + + mutex_lock(&sbi->umount_mutex); + /* clean up all remaining workgroups in memory */ + erofs_shrink_workstation(sbi, ~0UL); + + spin_lock(&erofs_sb_list_lock); + list_del(&sbi->list); + spin_unlock(&erofs_sb_list_lock); + mutex_unlock(&sbi->umount_mutex); +} + +static unsigned long erofs_shrink_count(struct shrinker *shrink, + struct shrink_control *sc) +{ + return atomic_long_read(&erofs_global_shrink_cnt); +} + +static unsigned long erofs_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc) +{ + struct erofs_sb_info *sbi; + struct list_head *p; + + unsigned long nr = sc->nr_to_scan; + unsigned int run_no; + unsigned long freed = 0; + + spin_lock(&erofs_sb_list_lock); + do { + run_no = ++shrinker_run_no; + } while (run_no == 0); + + /* Iterate over all mounted superblocks and try to shrink them */ + p = erofs_sb_list.next; + while (p != &erofs_sb_list) { + sbi = list_entry(p, struct erofs_sb_info, list); + + /* + * We move the ones we do to the end of the list, so we stop + * when we see one we have already done. + */ + if (sbi->shrinker_run_no == run_no) + break; + + if (!mutex_trylock(&sbi->umount_mutex)) { + p = p->next; + continue; + } + + spin_unlock(&erofs_sb_list_lock); + sbi->shrinker_run_no = run_no; + + freed += erofs_shrink_workstation(sbi, nr - freed); + + spin_lock(&erofs_sb_list_lock); + /* Get the next list element before we move this one */ + p = p->next; + + /* + * Move this one to the end of the list to provide some + * fairness. + */ + list_move_tail(&sbi->list, &erofs_sb_list); + mutex_unlock(&sbi->umount_mutex); + + if (freed >= nr) + break; + } + spin_unlock(&erofs_sb_list_lock); + return freed; +} + +static struct shrinker erofs_shrinker_info = { + .scan_objects = erofs_shrink_scan, + .count_objects = erofs_shrink_count, + .seeks = DEFAULT_SEEKS, +}; + +int __init erofs_init_shrinker(void) +{ + return register_shrinker(&erofs_shrinker_info); +} + +void erofs_exit_shrinker(void) +{ + unregister_shrinker(&erofs_shrinker_info); +} +#endif /* !CONFIG_EROFS_FS_ZIP */ diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c new file mode 100644 index 000000000000..869c7e184304 --- /dev/null +++ b/fs/erofs/xattr.c @@ -0,0 +1,713 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2017-2018 HUAWEI, Inc. + * https://www.huawei.com/ + */ +#include +#include "xattr.h" + +struct xattr_iter { + struct super_block *sb; + struct page *page; + void *kaddr; + + erofs_blk_t blkaddr; + unsigned int ofs; +}; + +static inline void xattr_iter_end(struct xattr_iter *it, bool atomic) +{ + /* the only user of kunmap() is 'init_inode_xattrs' */ + if (!atomic) + kunmap(it->page); + else + kunmap_atomic(it->kaddr); + + unlock_page(it->page); + put_page(it->page); +} + +static inline void xattr_iter_end_final(struct xattr_iter *it) +{ + if (!it->page) + return; + + xattr_iter_end(it, true); +} + +static int init_inode_xattrs(struct inode *inode) +{ + struct erofs_inode *const vi = EROFS_I(inode); + struct xattr_iter it; + unsigned int i; + struct erofs_xattr_ibody_header *ih; + struct super_block *sb; + struct erofs_sb_info *sbi; + bool atomic_map; + int ret = 0; + + /* the most case is that xattrs of this inode are initialized. */ + if (test_bit(EROFS_I_EA_INITED_BIT, &vi->flags)) { + /* + * paired with smp_mb() at the end of the function to ensure + * fields will only be observed after the bit is set. + */ + smp_mb(); + return 0; + } + + if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_XATTR_BIT, TASK_KILLABLE)) + return -ERESTARTSYS; + + /* someone has initialized xattrs for us? */ + if (test_bit(EROFS_I_EA_INITED_BIT, &vi->flags)) + goto out_unlock; + + /* + * bypass all xattr operations if ->xattr_isize is not greater than + * sizeof(struct erofs_xattr_ibody_header), in detail: + * 1) it is not enough to contain erofs_xattr_ibody_header then + * ->xattr_isize should be 0 (it means no xattr); + * 2) it is just to contain erofs_xattr_ibody_header, which is on-disk + * undefined right now (maybe use later with some new sb feature). + */ + if (vi->xattr_isize == sizeof(struct erofs_xattr_ibody_header)) { + erofs_err(inode->i_sb, + "xattr_isize %d of nid %llu is not supported yet", + vi->xattr_isize, vi->nid); + ret = -EOPNOTSUPP; + goto out_unlock; + } else if (vi->xattr_isize < sizeof(struct erofs_xattr_ibody_header)) { + if (vi->xattr_isize) { + erofs_err(inode->i_sb, + "bogus xattr ibody @ nid %llu", vi->nid); + DBG_BUGON(1); + ret = -EFSCORRUPTED; + goto out_unlock; /* xattr ondisk layout error */ + } + ret = -ENOATTR; + goto out_unlock; + } + + sb = inode->i_sb; + sbi = EROFS_SB(sb); + it.blkaddr = erofs_blknr(iloc(sbi, vi->nid) + vi->inode_isize); + it.ofs = erofs_blkoff(iloc(sbi, vi->nid) + vi->inode_isize); + + it.page = erofs_get_meta_page(sb, it.blkaddr); + if (IS_ERR(it.page)) { + ret = PTR_ERR(it.page); + goto out_unlock; + } + + /* read in shared xattr array (non-atomic, see kmalloc below) */ + it.kaddr = kmap(it.page); + atomic_map = false; + + ih = (struct erofs_xattr_ibody_header *)(it.kaddr + it.ofs); + + vi->xattr_shared_count = ih->h_shared_count; + vi->xattr_shared_xattrs = kmalloc_array(vi->xattr_shared_count, + sizeof(uint), GFP_KERNEL); + if (!vi->xattr_shared_xattrs) { + xattr_iter_end(&it, atomic_map); + ret = -ENOMEM; + goto out_unlock; + } + + /* let's skip ibody header */ + it.ofs += sizeof(struct erofs_xattr_ibody_header); + + for (i = 0; i < vi->xattr_shared_count; ++i) { + if (it.ofs >= EROFS_BLKSIZ) { + /* cannot be unaligned */ + DBG_BUGON(it.ofs != EROFS_BLKSIZ); + xattr_iter_end(&it, atomic_map); + + it.page = erofs_get_meta_page(sb, ++it.blkaddr); + if (IS_ERR(it.page)) { + kfree(vi->xattr_shared_xattrs); + vi->xattr_shared_xattrs = NULL; + ret = PTR_ERR(it.page); + goto out_unlock; + } + + it.kaddr = kmap_atomic(it.page); + atomic_map = true; + it.ofs = 0; + } + vi->xattr_shared_xattrs[i] = + le32_to_cpu(*(__le32 *)(it.kaddr + it.ofs)); + it.ofs += sizeof(__le32); + } + xattr_iter_end(&it, atomic_map); + + /* paired with smp_mb() at the beginning of the function. */ + smp_mb(); + set_bit(EROFS_I_EA_INITED_BIT, &vi->flags); + +out_unlock: + clear_bit_unlock(EROFS_I_BL_XATTR_BIT, &vi->flags); + /* See wake_up_bit() for which memory barrier you need to use. */ + smp_mb__after_atomic(); + wake_up_bit(&vi->flags, EROFS_I_BL_XATTR_BIT); + return ret; +} + +/* + * the general idea for these return values is + * if 0 is returned, go on processing the current xattr; + * 1 (> 0) is returned, skip this round to process the next xattr; + * -err (< 0) is returned, an error (maybe ENOXATTR) occurred + * and need to be handled + */ +struct xattr_iter_handlers { + int (*entry)(struct xattr_iter *_it, struct erofs_xattr_entry *entry); + int (*name)(struct xattr_iter *_it, unsigned int processed, char *buf, + unsigned int len); + int (*alloc_buffer)(struct xattr_iter *_it, unsigned int value_sz); + void (*value)(struct xattr_iter *_it, unsigned int processed, char *buf, + unsigned int len); +}; + +static inline int xattr_iter_fixup(struct xattr_iter *it) +{ + if (it->ofs < EROFS_BLKSIZ) + return 0; + + xattr_iter_end(it, true); + + it->blkaddr += erofs_blknr(it->ofs); + + it->page = erofs_get_meta_page(it->sb, it->blkaddr); + if (IS_ERR(it->page)) { + int err = PTR_ERR(it->page); + + it->page = NULL; + return err; + } + + it->kaddr = kmap_atomic(it->page); + it->ofs = erofs_blkoff(it->ofs); + return 0; +} + +static int inline_xattr_iter_begin(struct xattr_iter *it, + struct inode *inode) +{ + struct erofs_inode *const vi = EROFS_I(inode); + struct erofs_sb_info *const sbi = EROFS_SB(inode->i_sb); + unsigned int xattr_header_sz, inline_xattr_ofs; + + xattr_header_sz = inlinexattr_header_size(inode); + if (xattr_header_sz >= vi->xattr_isize) { + DBG_BUGON(xattr_header_sz > vi->xattr_isize); + return -ENOATTR; + } + + inline_xattr_ofs = vi->inode_isize + xattr_header_sz; + + it->blkaddr = erofs_blknr(iloc(sbi, vi->nid) + inline_xattr_ofs); + it->ofs = erofs_blkoff(iloc(sbi, vi->nid) + inline_xattr_ofs); + + it->page = erofs_get_meta_page(inode->i_sb, it->blkaddr); + if (IS_ERR(it->page)) + return PTR_ERR(it->page); + + it->kaddr = kmap_atomic(it->page); + return vi->xattr_isize - xattr_header_sz; +} + +/* + * Regardless of success or failure, `xattr_foreach' will end up with + * `ofs' pointing to the next xattr item rather than an arbitrary position. + */ +static int xattr_foreach(struct xattr_iter *it, + const struct xattr_iter_handlers *op, + unsigned int *tlimit) +{ + struct erofs_xattr_entry entry; + unsigned int value_sz, processed, slice; + int err; + + /* 0. fixup blkaddr, ofs, ipage */ + err = xattr_iter_fixup(it); + if (err) + return err; + + /* + * 1. read xattr entry to the memory, + * since we do EROFS_XATTR_ALIGN + * therefore entry should be in the page + */ + entry = *(struct erofs_xattr_entry *)(it->kaddr + it->ofs); + if (tlimit) { + unsigned int entry_sz = erofs_xattr_entry_size(&entry); + + /* xattr on-disk corruption: xattr entry beyond xattr_isize */ + if (*tlimit < entry_sz) { + DBG_BUGON(1); + return -EFSCORRUPTED; + } + *tlimit -= entry_sz; + } + + it->ofs += sizeof(struct erofs_xattr_entry); + value_sz = le16_to_cpu(entry.e_value_size); + + /* handle entry */ + err = op->entry(it, &entry); + if (err) { + it->ofs += entry.e_name_len + value_sz; + goto out; + } + + /* 2. handle xattr name (ofs will finally be at the end of name) */ + processed = 0; + + while (processed < entry.e_name_len) { + if (it->ofs >= EROFS_BLKSIZ) { + DBG_BUGON(it->ofs > EROFS_BLKSIZ); + + err = xattr_iter_fixup(it); + if (err) + goto out; + it->ofs = 0; + } + + slice = min_t(unsigned int, PAGE_SIZE - it->ofs, + entry.e_name_len - processed); + + /* handle name */ + err = op->name(it, processed, it->kaddr + it->ofs, slice); + if (err) { + it->ofs += entry.e_name_len - processed + value_sz; + goto out; + } + + it->ofs += slice; + processed += slice; + } + + /* 3. handle xattr value */ + processed = 0; + + if (op->alloc_buffer) { + err = op->alloc_buffer(it, value_sz); + if (err) { + it->ofs += value_sz; + goto out; + } + } + + while (processed < value_sz) { + if (it->ofs >= EROFS_BLKSIZ) { + DBG_BUGON(it->ofs > EROFS_BLKSIZ); + + err = xattr_iter_fixup(it); + if (err) + goto out; + it->ofs = 0; + } + + slice = min_t(unsigned int, PAGE_SIZE - it->ofs, + value_sz - processed); + op->value(it, processed, it->kaddr + it->ofs, slice); + it->ofs += slice; + processed += slice; + } + +out: + /* xattrs should be 4-byte aligned (on-disk constraint) */ + it->ofs = EROFS_XATTR_ALIGN(it->ofs); + return err < 0 ? err : 0; +} + +struct getxattr_iter { + struct xattr_iter it; + + char *buffer; + int buffer_size, index; + struct qstr name; +}; + +static int xattr_entrymatch(struct xattr_iter *_it, + struct erofs_xattr_entry *entry) +{ + struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it); + + return (it->index != entry->e_name_index || + it->name.len != entry->e_name_len) ? -ENOATTR : 0; +} + +static int xattr_namematch(struct xattr_iter *_it, + unsigned int processed, char *buf, unsigned int len) +{ + struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it); + + return memcmp(buf, it->name.name + processed, len) ? -ENOATTR : 0; +} + +static int xattr_checkbuffer(struct xattr_iter *_it, + unsigned int value_sz) +{ + struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it); + int err = it->buffer_size < value_sz ? -ERANGE : 0; + + it->buffer_size = value_sz; + return !it->buffer ? 1 : err; +} + +static void xattr_copyvalue(struct xattr_iter *_it, + unsigned int processed, + char *buf, unsigned int len) +{ + struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it); + + memcpy(it->buffer + processed, buf, len); +} + +static const struct xattr_iter_handlers find_xattr_handlers = { + .entry = xattr_entrymatch, + .name = xattr_namematch, + .alloc_buffer = xattr_checkbuffer, + .value = xattr_copyvalue +}; + +static int inline_getxattr(struct inode *inode, struct getxattr_iter *it) +{ + int ret; + unsigned int remaining; + + ret = inline_xattr_iter_begin(&it->it, inode); + if (ret < 0) + return ret; + + remaining = ret; + while (remaining) { + ret = xattr_foreach(&it->it, &find_xattr_handlers, &remaining); + if (ret != -ENOATTR) + break; + } + xattr_iter_end_final(&it->it); + + return ret ? ret : it->buffer_size; +} + +static int shared_getxattr(struct inode *inode, struct getxattr_iter *it) +{ + struct erofs_inode *const vi = EROFS_I(inode); + struct super_block *const sb = inode->i_sb; + struct erofs_sb_info *const sbi = EROFS_SB(sb); + unsigned int i; + int ret = -ENOATTR; + + for (i = 0; i < vi->xattr_shared_count; ++i) { + erofs_blk_t blkaddr = + xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]); + + it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]); + + if (!i || blkaddr != it->it.blkaddr) { + if (i) + xattr_iter_end(&it->it, true); + + it->it.page = erofs_get_meta_page(sb, blkaddr); + if (IS_ERR(it->it.page)) + return PTR_ERR(it->it.page); + + it->it.kaddr = kmap_atomic(it->it.page); + it->it.blkaddr = blkaddr; + } + + ret = xattr_foreach(&it->it, &find_xattr_handlers, NULL); + if (ret != -ENOATTR) + break; + } + if (vi->xattr_shared_count) + xattr_iter_end_final(&it->it); + + return ret ? ret : it->buffer_size; +} + +static bool erofs_xattr_user_list(struct dentry *dentry) +{ + return test_opt(EROFS_SB(dentry->d_sb), XATTR_USER); +} + +static bool erofs_xattr_trusted_list(struct dentry *dentry) +{ + return capable(CAP_SYS_ADMIN); +} + +int erofs_getxattr(struct inode *inode, int index, + const char *name, + void *buffer, size_t buffer_size) +{ + int ret; + struct getxattr_iter it; + + if (!name) + return -EINVAL; + + ret = init_inode_xattrs(inode); + if (ret) + return ret; + + it.index = index; + + it.name.len = strlen(name); + if (it.name.len > EROFS_NAME_LEN) + return -ERANGE; + it.name.name = name; + + it.buffer = buffer; + it.buffer_size = buffer_size; + + it.it.sb = inode->i_sb; + ret = inline_getxattr(inode, &it); + if (ret == -ENOATTR) + ret = shared_getxattr(inode, &it); + return ret; +} + +static int erofs_xattr_generic_get(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) +{ + struct erofs_sb_info *const sbi = EROFS_I_SB(inode); + + switch (handler->flags) { + case EROFS_XATTR_INDEX_USER: + if (!test_opt(sbi, XATTR_USER)) + return -EOPNOTSUPP; + break; + case EROFS_XATTR_INDEX_TRUSTED: + break; + case EROFS_XATTR_INDEX_SECURITY: + break; + default: + return -EINVAL; + } + + return erofs_getxattr(inode, handler->flags, name, buffer, size); +} + +const struct xattr_handler erofs_xattr_user_handler = { + .prefix = XATTR_USER_PREFIX, + .flags = EROFS_XATTR_INDEX_USER, + .list = erofs_xattr_user_list, + .get = erofs_xattr_generic_get, +}; + +const struct xattr_handler erofs_xattr_trusted_handler = { + .prefix = XATTR_TRUSTED_PREFIX, + .flags = EROFS_XATTR_INDEX_TRUSTED, + .list = erofs_xattr_trusted_list, + .get = erofs_xattr_generic_get, +}; + +#ifdef CONFIG_EROFS_FS_SECURITY +const struct xattr_handler __maybe_unused erofs_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .flags = EROFS_XATTR_INDEX_SECURITY, + .get = erofs_xattr_generic_get, +}; +#endif + +const struct xattr_handler *erofs_xattr_handlers[] = { + &erofs_xattr_user_handler, +#ifdef CONFIG_EROFS_FS_POSIX_ACL + &posix_acl_access_xattr_handler, + &posix_acl_default_xattr_handler, +#endif + &erofs_xattr_trusted_handler, +#ifdef CONFIG_EROFS_FS_SECURITY + &erofs_xattr_security_handler, +#endif + NULL, +}; + +struct listxattr_iter { + struct xattr_iter it; + + struct dentry *dentry; + char *buffer; + int buffer_size, buffer_ofs; +}; + +static int xattr_entrylist(struct xattr_iter *_it, + struct erofs_xattr_entry *entry) +{ + struct listxattr_iter *it = + container_of(_it, struct listxattr_iter, it); + unsigned int prefix_len; + const char *prefix; + + const struct xattr_handler *h = + erofs_xattr_handler(entry->e_name_index); + + if (!h || (h->list && !h->list(it->dentry))) + return 1; + + prefix = xattr_prefix(h); + prefix_len = strlen(prefix); + + if (!it->buffer) { + it->buffer_ofs += prefix_len + entry->e_name_len + 1; + return 1; + } + + if (it->buffer_ofs + prefix_len + + entry->e_name_len + 1 > it->buffer_size) + return -ERANGE; + + memcpy(it->buffer + it->buffer_ofs, prefix, prefix_len); + it->buffer_ofs += prefix_len; + return 0; +} + +static int xattr_namelist(struct xattr_iter *_it, + unsigned int processed, char *buf, unsigned int len) +{ + struct listxattr_iter *it = + container_of(_it, struct listxattr_iter, it); + + memcpy(it->buffer + it->buffer_ofs, buf, len); + it->buffer_ofs += len; + return 0; +} + +static int xattr_skipvalue(struct xattr_iter *_it, + unsigned int value_sz) +{ + struct listxattr_iter *it = + container_of(_it, struct listxattr_iter, it); + + it->buffer[it->buffer_ofs++] = '\0'; + return 1; +} + +static const struct xattr_iter_handlers list_xattr_handlers = { + .entry = xattr_entrylist, + .name = xattr_namelist, + .alloc_buffer = xattr_skipvalue, + .value = NULL +}; + +static int inline_listxattr(struct listxattr_iter *it) +{ + int ret; + unsigned int remaining; + + ret = inline_xattr_iter_begin(&it->it, d_inode(it->dentry)); + if (ret < 0) + return ret; + + remaining = ret; + while (remaining) { + ret = xattr_foreach(&it->it, &list_xattr_handlers, &remaining); + if (ret) + break; + } + xattr_iter_end_final(&it->it); + return ret ? ret : it->buffer_ofs; +} + +static int shared_listxattr(struct listxattr_iter *it) +{ + struct inode *const inode = d_inode(it->dentry); + struct erofs_inode *const vi = EROFS_I(inode); + struct super_block *const sb = inode->i_sb; + struct erofs_sb_info *const sbi = EROFS_SB(sb); + unsigned int i; + int ret = 0; + + for (i = 0; i < vi->xattr_shared_count; ++i) { + erofs_blk_t blkaddr = + xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]); + + it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]); + if (!i || blkaddr != it->it.blkaddr) { + if (i) + xattr_iter_end(&it->it, true); + + it->it.page = erofs_get_meta_page(sb, blkaddr); + if (IS_ERR(it->it.page)) + return PTR_ERR(it->it.page); + + it->it.kaddr = kmap_atomic(it->it.page); + it->it.blkaddr = blkaddr; + } + + ret = xattr_foreach(&it->it, &list_xattr_handlers, NULL); + if (ret) + break; + } + if (vi->xattr_shared_count) + xattr_iter_end_final(&it->it); + + return ret ? ret : it->buffer_ofs; +} + +ssize_t erofs_listxattr(struct dentry *dentry, + char *buffer, size_t buffer_size) +{ + int ret; + struct listxattr_iter it; + + ret = init_inode_xattrs(d_inode(dentry)); + if (ret == -ENOATTR) + return 0; + if (ret) + return ret; + + it.dentry = dentry; + it.buffer = buffer; + it.buffer_size = buffer_size; + it.buffer_ofs = 0; + + it.it.sb = dentry->d_sb; + + ret = inline_listxattr(&it); + if (ret < 0 && ret != -ENOATTR) + return ret; + return shared_listxattr(&it); +} + +#ifdef CONFIG_EROFS_FS_POSIX_ACL +struct posix_acl *erofs_get_acl(struct inode *inode, int type) +{ + struct posix_acl *acl; + int prefix, rc; + char *value = NULL; + + switch (type) { + case ACL_TYPE_ACCESS: + prefix = EROFS_XATTR_INDEX_POSIX_ACL_ACCESS; + break; + case ACL_TYPE_DEFAULT: + prefix = EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT; + break; + default: + return ERR_PTR(-EINVAL); + } + + rc = erofs_getxattr(inode, prefix, "", NULL, 0); + if (rc > 0) { + value = kmalloc(rc, GFP_KERNEL); + if (!value) + return ERR_PTR(-ENOMEM); + rc = erofs_getxattr(inode, prefix, "", value, rc); + } + + if (rc == -ENOATTR) + acl = NULL; + else if (rc < 0) + acl = ERR_PTR(rc); + else + acl = posix_acl_from_xattr(&init_user_ns, value, rc); + kfree(value); + return acl; +} +#endif diff --git a/fs/erofs/xattr.h b/fs/erofs/xattr.h new file mode 100644 index 000000000000..366dcb400525 --- /dev/null +++ b/fs/erofs/xattr.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2017-2018 HUAWEI, Inc. + * https://www.huawei.com/ + */ +#ifndef __EROFS_XATTR_H +#define __EROFS_XATTR_H + +#include "internal.h" +#include +#include + +/* Attribute not found */ +#define ENOATTR ENODATA + +static inline unsigned int inlinexattr_header_size(struct inode *inode) +{ + return sizeof(struct erofs_xattr_ibody_header) + + sizeof(u32) * EROFS_I(inode)->xattr_shared_count; +} + +static inline erofs_blk_t xattrblock_addr(struct erofs_sb_info *sbi, + unsigned int xattr_id) +{ +#ifdef CONFIG_EROFS_FS_XATTR + return sbi->xattr_blkaddr + + xattr_id * sizeof(__u32) / EROFS_BLKSIZ; +#else + return 0; +#endif +} + +static inline unsigned int xattrblock_offset(struct erofs_sb_info *sbi, + unsigned int xattr_id) +{ + return (xattr_id * sizeof(__u32)) % EROFS_BLKSIZ; +} + +#ifdef CONFIG_EROFS_FS_XATTR +extern const struct xattr_handler erofs_xattr_user_handler; +extern const struct xattr_handler erofs_xattr_trusted_handler; +#ifdef CONFIG_EROFS_FS_SECURITY +extern const struct xattr_handler erofs_xattr_security_handler; +#endif + +static inline const struct xattr_handler *erofs_xattr_handler(unsigned int idx) +{ + static const struct xattr_handler *xattr_handler_map[] = { + [EROFS_XATTR_INDEX_USER] = &erofs_xattr_user_handler, +#ifdef CONFIG_EROFS_FS_POSIX_ACL + [EROFS_XATTR_INDEX_POSIX_ACL_ACCESS] = + &posix_acl_access_xattr_handler, + [EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT] = + &posix_acl_default_xattr_handler, +#endif + [EROFS_XATTR_INDEX_TRUSTED] = &erofs_xattr_trusted_handler, +#ifdef CONFIG_EROFS_FS_SECURITY + [EROFS_XATTR_INDEX_SECURITY] = &erofs_xattr_security_handler, +#endif + }; + + return idx && idx < ARRAY_SIZE(xattr_handler_map) ? + xattr_handler_map[idx] : NULL; +} + +extern const struct xattr_handler *erofs_xattr_handlers[]; + +int erofs_getxattr(struct inode *, int, const char *, void *, size_t); +ssize_t erofs_listxattr(struct dentry *, char *, size_t); +#else +static inline int erofs_getxattr(struct inode *inode, int index, + const char *name, void *buffer, + size_t buffer_size) +{ + return -EOPNOTSUPP; +} + +#define erofs_listxattr (NULL) +#define erofs_xattr_handlers (NULL) +#endif /* !CONFIG_EROFS_FS_XATTR */ + +#ifdef CONFIG_EROFS_FS_POSIX_ACL +struct posix_acl *erofs_get_acl(struct inode *inode, int type); +#else +#define erofs_get_acl (NULL) +#endif + +#endif + diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c new file mode 100644 index 000000000000..f1c0e0ae3c33 --- /dev/null +++ b/fs/erofs/zdata.c @@ -0,0 +1,1464 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2018 HUAWEI, Inc. + * https://www.huawei.com/ + */ +#include "zdata.h" +#include "compress.h" +#include +#include +#include + +#include + +/* + * since pclustersize is variable for big pcluster feature, introduce slab + * pools implementation for different pcluster sizes. + */ +struct z_erofs_pcluster_slab { + struct kmem_cache *slab; + unsigned int maxpages; + char name[48]; +}; + +#define _PCLP(n) { .maxpages = n } + +static struct z_erofs_pcluster_slab pcluster_pool[] __read_mostly = { + _PCLP(1), _PCLP(4), _PCLP(16), _PCLP(64), _PCLP(128), + _PCLP(Z_EROFS_PCLUSTER_MAX_PAGES) +}; + +static void z_erofs_destroy_pcluster_pool(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) { + if (!pcluster_pool[i].slab) + continue; + kmem_cache_destroy(pcluster_pool[i].slab); + pcluster_pool[i].slab = NULL; + } +} + +static int z_erofs_create_pcluster_pool(void) +{ + struct z_erofs_pcluster_slab *pcs; + struct z_erofs_pcluster *a; + unsigned int size; + + for (pcs = pcluster_pool; + pcs < pcluster_pool + ARRAY_SIZE(pcluster_pool); ++pcs) { + size = struct_size(a, compressed_pages, pcs->maxpages); + + sprintf(pcs->name, "erofs_pcluster-%u", pcs->maxpages); + pcs->slab = kmem_cache_create(pcs->name, size, 0, + SLAB_RECLAIM_ACCOUNT, NULL); + if (pcs->slab) + continue; + + z_erofs_destroy_pcluster_pool(); + return -ENOMEM; + } + return 0; +} + +static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int nrpages) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) { + struct z_erofs_pcluster_slab *pcs = pcluster_pool + i; + struct z_erofs_pcluster *pcl; + + if (nrpages > pcs->maxpages) + continue; + + pcl = kmem_cache_zalloc(pcs->slab, GFP_NOFS); + if (!pcl) + return ERR_PTR(-ENOMEM); + pcl->pclusterpages = nrpages; + return pcl; + } + return ERR_PTR(-EINVAL); +} + +static void z_erofs_free_pcluster(struct z_erofs_pcluster *pcl) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) { + struct z_erofs_pcluster_slab *pcs = pcluster_pool + i; + + if (pcl->pclusterpages > pcs->maxpages) + continue; + + kmem_cache_free(pcs->slab, pcl); + return; + } + DBG_BUGON(1); +} + +/* how to allocate cached pages for a pcluster */ +enum z_erofs_cache_alloctype { + DONTALLOC, /* don't allocate any cached pages */ + /* + * try to use cached I/O if page allocation succeeds or fallback + * to in-place I/O instead to avoid any direct reclaim. + */ + TRYALLOC, +}; + +/* + * tagged pointer with 1-bit tag for all compressed pages + * tag 0 - the page is just found with an extra page reference + */ +typedef tagptr1_t compressed_page_t; + +#define tag_compressed_page_justfound(page) \ + tagptr_fold(compressed_page_t, page, 1) + +static struct workqueue_struct *z_erofs_workqueue __read_mostly; + +void z_erofs_exit_zip_subsystem(void) +{ + destroy_workqueue(z_erofs_workqueue); + z_erofs_destroy_pcluster_pool(); +} + +static inline int z_erofs_init_workqueue(void) +{ + const unsigned int onlinecpus = num_possible_cpus(); + + /* + * no need to spawn too many threads, limiting threads could minimum + * scheduling overhead, perhaps per-CPU threads should be better? + */ + z_erofs_workqueue = alloc_workqueue("erofs_unzipd", + WQ_UNBOUND | WQ_HIGHPRI, + onlinecpus + onlinecpus / 4); + return z_erofs_workqueue ? 0 : -ENOMEM; +} + +static void z_erofs_pcluster_init_always(struct z_erofs_pcluster *pcl) +{ + struct z_erofs_collection *cl = z_erofs_primarycollection(pcl); + + atomic_set(&pcl->obj.refcount, 1); + + DBG_BUGON(cl->nr_pages); + DBG_BUGON(cl->vcnt); +} + +int __init z_erofs_init_zip_subsystem(void) +{ + int err = z_erofs_create_pcluster_pool(); + + if (err) + return err; + err = z_erofs_init_workqueue(); + if (err) + z_erofs_destroy_pcluster_pool(); + return err; +} + +enum z_erofs_collectmode { + COLLECT_SECONDARY, + COLLECT_PRIMARY, + /* + * The current collection was the tail of an exist chain, in addition + * that the previous processed chained collections are all decided to + * be hooked up to it. + * A new chain will be created for the remaining collections which are + * not processed yet, therefore different from COLLECT_PRIMARY_FOLLOWED, + * the next collection cannot reuse the whole page safely in + * the following scenario: + * ________________________________________________________________ + * | tail (partial) page | head (partial) page | + * | (belongs to the next cl) | (belongs to the current cl) | + * |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________| + */ + COLLECT_PRIMARY_HOOKED, + /* + * a weak form of COLLECT_PRIMARY_FOLLOWED, the difference is that it + * could be dispatched into bypass queue later due to uptodated managed + * pages. All related online pages cannot be reused for inplace I/O (or + * pagevec) since it can be directly decoded without I/O submission. + */ + COLLECT_PRIMARY_FOLLOWED_NOINPLACE, + /* + * The current collection has been linked with the owned chain, and + * could also be linked with the remaining collections, which means + * if the processing page is the tail page of the collection, thus + * the current collection can safely use the whole page (since + * the previous collection is under control) for in-place I/O, as + * illustrated below: + * ________________________________________________________________ + * | tail (partial) page | head (partial) page | + * | (of the current cl) | (of the previous collection) | + * | PRIMARY_FOLLOWED or | | + * |_____PRIMARY_HOOKED___|____________PRIMARY_FOLLOWED____________| + * + * [ (*) the above page can be used as inplace I/O. ] + */ + COLLECT_PRIMARY_FOLLOWED, +}; + +struct z_erofs_collector { + struct z_erofs_pagevec_ctor vector; + + struct z_erofs_pcluster *pcl, *tailpcl; + struct z_erofs_collection *cl; + /* a pointer used to pick up inplace I/O pages */ + struct page **icpage_ptr; + z_erofs_next_pcluster_t owned_head; + + enum z_erofs_collectmode mode; +}; + +struct z_erofs_decompress_frontend { + struct inode *const inode; + + struct z_erofs_collector clt; + struct erofs_map_blocks map; + + bool readahead; + /* used for applying cache strategy on the fly */ + bool backmost; + erofs_off_t headoffset; +}; + +#define COLLECTOR_INIT() { \ + .owned_head = Z_EROFS_PCLUSTER_TAIL, \ + .mode = COLLECT_PRIMARY_FOLLOWED } + +#define DECOMPRESS_FRONTEND_INIT(__i) { \ + .inode = __i, .clt = COLLECTOR_INIT(), \ + .backmost = true, } + +static struct page *z_pagemap_global[Z_EROFS_VMAP_GLOBAL_PAGES]; +static DEFINE_MUTEX(z_pagemap_global_lock); + +static void preload_compressed_pages(struct z_erofs_collector *clt, + struct address_space *mc, + enum z_erofs_cache_alloctype type, + struct list_head *pagepool) +{ + struct z_erofs_pcluster *pcl = clt->pcl; + bool standalone = true; + gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) | + __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; + struct page **pages; + pgoff_t index; + + if (clt->mode < COLLECT_PRIMARY_FOLLOWED) + return; + + pages = pcl->compressed_pages; + index = pcl->obj.index; + for (; index < pcl->obj.index + pcl->pclusterpages; ++index, ++pages) { + struct page *page; + compressed_page_t t; + struct page *newpage = NULL; + + /* the compressed page was loaded before */ + if (READ_ONCE(*pages)) + continue; + + page = find_get_page(mc, index); + + if (page) { + t = tag_compressed_page_justfound(page); + } else { + /* I/O is needed, no possible to decompress directly */ + standalone = false; + switch (type) { + case TRYALLOC: + newpage = erofs_allocpage(pagepool, gfp); + if (!newpage) + continue; + set_page_private(newpage, + Z_EROFS_PREALLOCATED_PAGE); + t = tag_compressed_page_justfound(newpage); + break; + default: /* DONTALLOC */ + continue; + } + } + + if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t))) + continue; + + if (page) { + put_page(page); + } else if (newpage) { + set_page_private(newpage, 0); + list_add(&newpage->lru, pagepool); + } + } + + /* + * don't do inplace I/O if all compressed pages are available in + * managed cache since it can be moved to the bypass queue instead. + */ + if (standalone) + clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE; +} + +/* called by erofs_shrinker to get rid of all compressed_pages */ +int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, + struct erofs_workgroup *grp) +{ + struct z_erofs_pcluster *const pcl = + container_of(grp, struct z_erofs_pcluster, obj); + struct address_space *const mapping = MNGD_MAPPING(sbi); + int i; + + /* + * refcount of workgroup is now freezed as 1, + * therefore no need to worry about available decompression users. + */ + for (i = 0; i < pcl->pclusterpages; ++i) { + struct page *page = pcl->compressed_pages[i]; + + if (!page) + continue; + + /* block other users from reclaiming or migrating the page */ + if (!trylock_page(page)) + return -EBUSY; + + if (page->mapping != mapping) + continue; + + /* barrier is implied in the following 'unlock_page' */ + WRITE_ONCE(pcl->compressed_pages[i], NULL); + + set_page_private(page, 0); + ClearPagePrivate(page); + unlock_page(page); + put_page(page); + } + return 0; +} + +int erofs_try_to_free_cached_page(struct address_space *mapping, + struct page *page) +{ + struct z_erofs_pcluster *const pcl = (void *)page_private(page); + int ret = 0; /* 0 - busy */ + + if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) { + unsigned int i; + + for (i = 0; i < pcl->pclusterpages; ++i) { + if (pcl->compressed_pages[i] == page) { + WRITE_ONCE(pcl->compressed_pages[i], NULL); + ret = 1; + break; + } + } + erofs_workgroup_unfreeze(&pcl->obj, 1); + + if (ret) { + set_page_private(page, 0); + ClearPagePrivate(page); + put_page(page); + } + } + return ret; +} + +/* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */ +static bool z_erofs_try_inplace_io(struct z_erofs_collector *clt, + struct page *page) +{ + struct z_erofs_pcluster *const pcl = clt->pcl; + + while (clt->icpage_ptr > pcl->compressed_pages) + if (!cmpxchg(--clt->icpage_ptr, NULL, page)) + return true; + return false; +} + +/* callers must be with collection lock held */ +static int z_erofs_attach_page(struct z_erofs_collector *clt, + struct page *page, enum z_erofs_page_type type, + bool pvec_safereuse) +{ + int ret; + + /* give priority for inplaceio */ + if (clt->mode >= COLLECT_PRIMARY && + type == Z_EROFS_PAGE_TYPE_EXCLUSIVE && + z_erofs_try_inplace_io(clt, page)) + return 0; + + ret = z_erofs_pagevec_enqueue(&clt->vector, page, type, + pvec_safereuse); + clt->cl->vcnt += (unsigned int)ret; + return ret ? 0 : -EAGAIN; +} + +static void z_erofs_try_to_claim_pcluster(struct z_erofs_collector *clt) +{ + struct z_erofs_pcluster *pcl = clt->pcl; + z_erofs_next_pcluster_t *owned_head = &clt->owned_head; + + /* type 1, nil pcluster (this pcluster doesn't belong to any chain.) */ + if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL, + *owned_head) == Z_EROFS_PCLUSTER_NIL) { + *owned_head = &pcl->next; + /* so we can attach this pcluster to our submission chain. */ + clt->mode = COLLECT_PRIMARY_FOLLOWED; + return; + } + + /* + * type 2, link to the end of an existing open chain, be careful + * that its submission is controlled by the original attached chain. + */ + if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL, + *owned_head) == Z_EROFS_PCLUSTER_TAIL) { + *owned_head = Z_EROFS_PCLUSTER_TAIL; + clt->mode = COLLECT_PRIMARY_HOOKED; + clt->tailpcl = NULL; + return; + } + /* type 3, it belongs to a chain, but it isn't the end of the chain */ + clt->mode = COLLECT_PRIMARY; +} + +static int z_erofs_lookup_collection(struct z_erofs_collector *clt, + struct inode *inode, + struct erofs_map_blocks *map) +{ + struct erofs_workgroup *grp; + struct z_erofs_pcluster *pcl; + struct z_erofs_collection *cl; + unsigned int length; + + grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT); + if (!grp) + return -ENOENT; + + pcl = container_of(grp, struct z_erofs_pcluster, obj); + if (clt->owned_head == &pcl->next || pcl == clt->tailpcl) { + DBG_BUGON(1); + erofs_workgroup_put(grp); + return -EFSCORRUPTED; + } + + cl = z_erofs_primarycollection(pcl); + if (cl->pageofs != (map->m_la & ~PAGE_MASK)) { + DBG_BUGON(1); + erofs_workgroup_put(grp); + return -EFSCORRUPTED; + } + + length = READ_ONCE(pcl->length); + if (length & Z_EROFS_PCLUSTER_FULL_LENGTH) { + if ((map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) > length) { + DBG_BUGON(1); + erofs_workgroup_put(grp); + return -EFSCORRUPTED; + } + } else { + unsigned int llen = map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT; + + if (map->m_flags & EROFS_MAP_FULL_MAPPED) + llen |= Z_EROFS_PCLUSTER_FULL_LENGTH; + + while (llen > length && + length != cmpxchg_relaxed(&pcl->length, length, llen)) { + cpu_relax(); + length = READ_ONCE(pcl->length); + } + } + mutex_lock(&cl->lock); + /* used to check tail merging loop due to corrupted images */ + if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL) + clt->tailpcl = pcl; + clt->pcl = pcl; + z_erofs_try_to_claim_pcluster(clt); + clt->pcl = pcl; + clt->cl = cl; + return 0; +} + +static int z_erofs_register_collection(struct z_erofs_collector *clt, + struct inode *inode, + struct erofs_map_blocks *map) +{ + struct z_erofs_pcluster *pcl; + struct z_erofs_collection *cl; + int err; + + /* no available pcluster, let's allocate one */ + pcl = z_erofs_alloc_pcluster(map->m_plen >> PAGE_SHIFT); + if (IS_ERR(pcl)) + return PTR_ERR(pcl); + + z_erofs_pcluster_init_always(pcl); + pcl->obj.index = map->m_pa >> PAGE_SHIFT; + + pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) | + (map->m_flags & EROFS_MAP_FULL_MAPPED ? + Z_EROFS_PCLUSTER_FULL_LENGTH : 0); + + if (map->m_flags & EROFS_MAP_ZIPPED) + pcl->algorithmformat = Z_EROFS_COMPRESSION_LZ4; + else + pcl->algorithmformat = Z_EROFS_COMPRESSION_SHIFTED; + + /* new pclusters should be claimed as type 1, primary and followed */ + pcl->next = clt->owned_head; + clt->mode = COLLECT_PRIMARY_FOLLOWED; + + cl = z_erofs_primarycollection(pcl); + cl->pageofs = map->m_la & ~PAGE_MASK; + + /* + * lock all primary followed works before visible to others + * and mutex_trylock *never* fails for a new pcluster. + */ + mutex_init(&cl->lock); + mutex_trylock(&cl->lock); + + err = erofs_register_workgroup(inode->i_sb, &pcl->obj); + if (err) { + mutex_unlock(&cl->lock); + z_erofs_free_pcluster(pcl); + return -EAGAIN; + } + /* used to check tail merging loop due to corrupted images */ + if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL) + clt->tailpcl = pcl; + clt->owned_head = &pcl->next; + clt->pcl = pcl; + clt->cl = cl; + return 0; +} + +static int z_erofs_collector_begin(struct z_erofs_collector *clt, + struct inode *inode, + struct erofs_map_blocks *map) +{ + int ret; + + DBG_BUGON(clt->cl); + + /* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous collection */ + DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_NIL); + DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED); + + if (!PAGE_ALIGNED(map->m_pa)) { + DBG_BUGON(1); + return -EINVAL; + } + +repeat: + ret = z_erofs_lookup_collection(clt, inode, map); + if (ret == -ENOENT) { + ret = z_erofs_register_collection(clt, inode, map); + + /* someone registered at the same time, give another try */ + if (ret == -EAGAIN) { + cond_resched(); + goto repeat; + } + } + + if (ret) + return ret; + + z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS, + clt->cl->pagevec, clt->cl->vcnt); + + /* since file-backed online pages are traversed in reverse order */ + clt->icpage_ptr = clt->pcl->compressed_pages + clt->pcl->pclusterpages; + return 0; +} + +/* + * keep in mind that no referenced pclusters will be freed + * only after a RCU grace period. + */ +static void z_erofs_rcu_callback(struct rcu_head *head) +{ + struct z_erofs_collection *const cl = + container_of(head, struct z_erofs_collection, rcu); + + z_erofs_free_pcluster(container_of(cl, struct z_erofs_pcluster, + primary_collection)); +} + +void erofs_workgroup_free_rcu(struct erofs_workgroup *grp) +{ + struct z_erofs_pcluster *const pcl = + container_of(grp, struct z_erofs_pcluster, obj); + struct z_erofs_collection *const cl = z_erofs_primarycollection(pcl); + + call_rcu(&cl->rcu, z_erofs_rcu_callback); +} + +static void z_erofs_collection_put(struct z_erofs_collection *cl) +{ + struct z_erofs_pcluster *const pcl = + container_of(cl, struct z_erofs_pcluster, primary_collection); + + erofs_workgroup_put(&pcl->obj); +} + +static bool z_erofs_collector_end(struct z_erofs_collector *clt) +{ + struct z_erofs_collection *cl = clt->cl; + + if (!cl) + return false; + + z_erofs_pagevec_ctor_exit(&clt->vector, false); + mutex_unlock(&cl->lock); + + /* + * if all pending pages are added, don't hold its reference + * any longer if the pcluster isn't hosted by ourselves. + */ + if (clt->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE) + z_erofs_collection_put(cl); + + clt->cl = NULL; + return true; +} + +static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe, + unsigned int cachestrategy, + erofs_off_t la) +{ + if (cachestrategy <= EROFS_ZIP_CACHE_DISABLED) + return false; + + if (fe->backmost) + return true; + + return cachestrategy >= EROFS_ZIP_CACHE_READAROUND && + la < fe->headoffset; +} + +static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, + struct page *page, struct list_head *pagepool) +{ + struct inode *const inode = fe->inode; + struct erofs_sb_info *const sbi = EROFS_I_SB(inode); + struct erofs_map_blocks *const map = &fe->map; + struct z_erofs_collector *const clt = &fe->clt; + const loff_t offset = page_offset(page); + bool tight = true; + + enum z_erofs_cache_alloctype cache_strategy; + enum z_erofs_page_type page_type; + unsigned int cur, end, spiltted, index; + int err = 0; + + /* register locked file pages as online pages in pack */ + z_erofs_onlinepage_init(page); + + spiltted = 0; + end = PAGE_SIZE; +repeat: + cur = end - 1; + + /* lucky, within the range of the current map_blocks */ + if (offset + cur >= map->m_la && + offset + cur < map->m_la + map->m_llen) { + /* didn't get a valid collection previously (very rare) */ + if (!clt->cl) + goto restart_now; + goto hitted; + } + + /* go ahead the next map_blocks */ + erofs_dbg("%s: [out-of-range] pos %llu", __func__, offset + cur); + + if (z_erofs_collector_end(clt)) + fe->backmost = false; + + map->m_la = offset + cur; + map->m_llen = 0; + err = z_erofs_map_blocks_iter(inode, map, 0); + if (err) + goto err_out; + +restart_now: + if (!(map->m_flags & EROFS_MAP_MAPPED)) + goto hitted; + + err = z_erofs_collector_begin(clt, inode, map); + if (err) + goto err_out; + + /* preload all compressed pages (maybe downgrade role if necessary) */ + if (should_alloc_managed_pages(fe, sbi->cache_strategy, map->m_la)) + cache_strategy = TRYALLOC; + else + cache_strategy = DONTALLOC; + + preload_compressed_pages(clt, MNGD_MAPPING(sbi), + cache_strategy, pagepool); + +hitted: + /* + * Ensure the current partial page belongs to this submit chain rather + * than other concurrent submit chains or the noio(bypass) chain since + * those chains are handled asynchronously thus the page cannot be used + * for inplace I/O or pagevec (should be processed in strict order.) + */ + tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED && + clt->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE); + + cur = end - min_t(unsigned int, offset + end - map->m_la, end); + if (!(map->m_flags & EROFS_MAP_MAPPED)) { + zero_user_segment(page, cur, end); + goto next_part; + } + + /* let's derive page type */ + page_type = cur ? Z_EROFS_VLE_PAGE_TYPE_HEAD : + (!spiltted ? Z_EROFS_PAGE_TYPE_EXCLUSIVE : + (tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE : + Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED)); + + if (cur) + tight &= (clt->mode >= COLLECT_PRIMARY_FOLLOWED); + +retry: + err = z_erofs_attach_page(clt, page, page_type, + clt->mode >= COLLECT_PRIMARY_FOLLOWED); + /* should allocate an additional short-lived page for pagevec */ + if (err == -EAGAIN) { + struct page *const newpage = + alloc_page(GFP_NOFS | __GFP_NOFAIL); + + set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE); + err = z_erofs_attach_page(clt, newpage, + Z_EROFS_PAGE_TYPE_EXCLUSIVE, true); + if (!err) + goto retry; + } + + if (err) + goto err_out; + + index = page->index - (map->m_la >> PAGE_SHIFT); + + z_erofs_onlinepage_fixup(page, index, true); + + /* bump up the number of spiltted parts of a page */ + ++spiltted; + /* also update nr_pages */ + clt->cl->nr_pages = max_t(pgoff_t, clt->cl->nr_pages, index + 1); +next_part: + /* can be used for verification */ + map->m_llen = offset + cur - map->m_la; + + end = cur; + if (end > 0) + goto repeat; + +out: + z_erofs_onlinepage_endio(page); + + erofs_dbg("%s, finish page: %pK spiltted: %u map->m_llen %llu", + __func__, page, spiltted, map->m_llen); + return err; + + /* if some error occurred while processing this page */ +err_out: + SetPageError(page); + goto out; +} + +static void z_erofs_decompressqueue_work(struct work_struct *work); +static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, + bool sync, int bios) +{ + /* wake up the caller thread for sync decompression */ + if (sync) { + if (!atomic_add_return(bios, &io->pending_bios)) + complete(&io->u.done); + + return; + } + + if (atomic_add_return(bios, &io->pending_bios)) + return; + /* Use workqueue decompression for atomic contexts only */ + if (in_atomic() || irqs_disabled()) { + queue_work(z_erofs_workqueue, &io->u.work); + return; + } + z_erofs_decompressqueue_work(&io->u.work); +} + +static bool z_erofs_page_is_invalidated(struct page *page) +{ + return !page->mapping && !z_erofs_is_shortlived_page(page); +} + +static void z_erofs_decompressqueue_endio(struct bio *bio) +{ + tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private); + struct z_erofs_decompressqueue *q = tagptr_unfold_ptr(t); + struct bio_vec *bvec; + unsigned int i; + + bio_for_each_segment_all(bvec, bio, i) { + struct page *page = bvec->bv_page; + + DBG_BUGON(PageUptodate(page)); + DBG_BUGON(z_erofs_page_is_invalidated(page)); + + if (bio->bi_error) + SetPageError(page); + + if (erofs_page_is_managed(EROFS_SB(q->sb), page)) { + if (!bio->bi_error) + SetPageUptodate(page); + unlock_page(page); + } + } + z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1); + bio_put(bio); +} + +static int z_erofs_decompress_pcluster(struct super_block *sb, + struct z_erofs_pcluster *pcl, + struct list_head *pagepool) +{ + struct erofs_sb_info *const sbi = EROFS_SB(sb); + struct z_erofs_pagevec_ctor ctor; + unsigned int i, inputsize, outputsize, llen, nr_pages; + struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES]; + struct page **pages, **compressed_pages, *page; + + enum z_erofs_page_type page_type; + bool overlapped, partial; + struct z_erofs_collection *cl; + int err; + + might_sleep(); + cl = z_erofs_primarycollection(pcl); + DBG_BUGON(!READ_ONCE(cl->nr_pages)); + + mutex_lock(&cl->lock); + nr_pages = cl->nr_pages; + + if (nr_pages <= Z_EROFS_VMAP_ONSTACK_PAGES) { + pages = pages_onstack; + } else if (nr_pages <= Z_EROFS_VMAP_GLOBAL_PAGES && + mutex_trylock(&z_pagemap_global_lock)) { + pages = z_pagemap_global; + } else { + pages = kmalloc_array(nr_pages, sizeof(struct page *), + GFP_KERNEL); + + if (!pages) + pages = vmalloc(nr_pages * sizeof(struct page *)); + + if (!pages && nr_pages > Z_EROFS_VMAP_GLOBAL_PAGES) + pages = kmalloc_array(nr_pages, sizeof(struct page *), + GFP_KERNEL | __GFP_NOFAIL); + + /* fallback to global pagemap for the lowmem scenario */ + if (!pages) { + mutex_lock(&z_pagemap_global_lock); + pages = z_pagemap_global; + } + } + + for (i = 0; i < nr_pages; ++i) + pages[i] = NULL; + + err = 0; + z_erofs_pagevec_ctor_init(&ctor, Z_EROFS_NR_INLINE_PAGEVECS, + cl->pagevec, 0); + + for (i = 0; i < cl->vcnt; ++i) { + unsigned int pagenr; + + page = z_erofs_pagevec_dequeue(&ctor, &page_type); + + /* all pages in pagevec ought to be valid */ + DBG_BUGON(!page); + DBG_BUGON(z_erofs_page_is_invalidated(page)); + + if (z_erofs_put_shortlivedpage(pagepool, page)) + continue; + + if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD) + pagenr = 0; + else + pagenr = z_erofs_onlinepage_index(page); + + DBG_BUGON(pagenr >= nr_pages); + + /* + * currently EROFS doesn't support multiref(dedup), + * so here erroring out one multiref page. + */ + if (pages[pagenr]) { + DBG_BUGON(1); + SetPageError(pages[pagenr]); + z_erofs_onlinepage_endio(pages[pagenr]); + err = -EFSCORRUPTED; + } + pages[pagenr] = page; + } + z_erofs_pagevec_ctor_exit(&ctor, true); + + overlapped = false; + compressed_pages = pcl->compressed_pages; + + for (i = 0; i < pcl->pclusterpages; ++i) { + unsigned int pagenr; + + page = compressed_pages[i]; + + /* all compressed pages ought to be valid */ + DBG_BUGON(!page); + DBG_BUGON(z_erofs_page_is_invalidated(page)); + + if (!z_erofs_is_shortlived_page(page)) { + if (erofs_page_is_managed(sbi, page)) { + if (!PageUptodate(page)) + err = -EIO; + continue; + } + + /* + * only if non-head page can be selected + * for inplace decompression + */ + pagenr = z_erofs_onlinepage_index(page); + + DBG_BUGON(pagenr >= nr_pages); + if (pages[pagenr]) { + DBG_BUGON(1); + SetPageError(pages[pagenr]); + z_erofs_onlinepage_endio(pages[pagenr]); + err = -EFSCORRUPTED; + } + pages[pagenr] = page; + + overlapped = true; + } + + /* PG_error needs checking for all non-managed pages */ + if (PageError(page)) { + DBG_BUGON(PageUptodate(page)); + err = -EIO; + } + } + + if (err) + goto out; + + llen = pcl->length >> Z_EROFS_PCLUSTER_LENGTH_BIT; + if (nr_pages << PAGE_SHIFT >= cl->pageofs + llen) { + outputsize = llen; + partial = !(pcl->length & Z_EROFS_PCLUSTER_FULL_LENGTH); + } else { + outputsize = (nr_pages << PAGE_SHIFT) - cl->pageofs; + partial = true; + } + + inputsize = pcl->pclusterpages * PAGE_SIZE; + err = z_erofs_decompress(&(struct z_erofs_decompress_req) { + .sb = sb, + .in = compressed_pages, + .out = pages, + .pageofs_out = cl->pageofs, + .inputsize = inputsize, + .outputsize = outputsize, + .alg = pcl->algorithmformat, + .inplace_io = overlapped, + .partial_decoding = partial + }, pagepool); + +out: + /* must handle all compressed pages before ending pages */ + for (i = 0; i < pcl->pclusterpages; ++i) { + page = compressed_pages[i]; + + if (erofs_page_is_managed(sbi, page)) + continue; + + /* recycle all individual short-lived pages */ + (void)z_erofs_put_shortlivedpage(pagepool, page); + + WRITE_ONCE(compressed_pages[i], NULL); + } + + for (i = 0; i < nr_pages; ++i) { + page = pages[i]; + if (!page) + continue; + + DBG_BUGON(z_erofs_page_is_invalidated(page)); + + /* recycle all individual short-lived pages */ + if (z_erofs_put_shortlivedpage(pagepool, page)) + continue; + + if (err < 0) + SetPageError(page); + + z_erofs_onlinepage_endio(page); + } + + if (pages == z_pagemap_global) + mutex_unlock(&z_pagemap_global_lock); + else if (pages != pages_onstack) + kvfree(pages); + + cl->nr_pages = 0; + cl->vcnt = 0; + + /* all cl locks MUST be taken before the following line */ + WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_NIL); + + /* all cl locks SHOULD be released right now */ + mutex_unlock(&cl->lock); + + z_erofs_collection_put(cl); + return err; +} + +static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io, + struct list_head *pagepool) +{ + z_erofs_next_pcluster_t owned = io->head; + + while (owned != Z_EROFS_PCLUSTER_TAIL_CLOSED) { + struct z_erofs_pcluster *pcl; + + /* no possible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */ + DBG_BUGON(owned == Z_EROFS_PCLUSTER_TAIL); + + /* no possible that 'owned' equals NULL */ + DBG_BUGON(owned == Z_EROFS_PCLUSTER_NIL); + + pcl = container_of(owned, struct z_erofs_pcluster, next); + owned = READ_ONCE(pcl->next); + + z_erofs_decompress_pcluster(io->sb, pcl, pagepool); + } +} + +static void z_erofs_decompressqueue_work(struct work_struct *work) +{ + struct z_erofs_decompressqueue *bgq = + container_of(work, struct z_erofs_decompressqueue, u.work); + LIST_HEAD(pagepool); + + DBG_BUGON(bgq->head == Z_EROFS_PCLUSTER_TAIL_CLOSED); + z_erofs_decompress_queue(bgq, &pagepool); + + put_pages_list(&pagepool); + kvfree(bgq); +} + +static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl, + unsigned int nr, + struct list_head *pagepool, + struct address_space *mc, + gfp_t gfp) +{ + const pgoff_t index = pcl->obj.index; + bool tocache = false; + + struct address_space *mapping; + struct page *oldpage, *page; + + compressed_page_t t; + int justfound; + +repeat: + page = READ_ONCE(pcl->compressed_pages[nr]); + oldpage = page; + + if (!page) + goto out_allocpage; + + /* process the target tagged pointer */ + t = tagptr_init(compressed_page_t, page); + justfound = tagptr_unfold_tags(t); + page = tagptr_unfold_ptr(t); + + /* + * preallocated cached pages, which is used to avoid direct reclaim + * otherwise, it will go inplace I/O path instead. + */ + if (page->private == Z_EROFS_PREALLOCATED_PAGE) { + WRITE_ONCE(pcl->compressed_pages[nr], page); + set_page_private(page, 0); + tocache = true; + goto out_tocache; + } + mapping = READ_ONCE(page->mapping); + + /* + * file-backed online pages in plcuster are all locked steady, + * therefore it is impossible for `mapping' to be NULL. + */ + if (mapping && mapping != mc) + /* ought to be unmanaged pages */ + goto out; + + /* directly return for shortlived page as well */ + if (z_erofs_is_shortlived_page(page)) + goto out; + + lock_page(page); + + /* only true if page reclaim goes wrong, should never happen */ + DBG_BUGON(justfound && PagePrivate(page)); + + /* the page is still in manage cache */ + if (page->mapping == mc) { + WRITE_ONCE(pcl->compressed_pages[nr], page); + + ClearPageError(page); + if (!PagePrivate(page)) { + /* + * impossible to be !PagePrivate(page) for + * the current restriction as well if + * the page is already in compressed_pages[]. + */ + DBG_BUGON(!justfound); + + justfound = 0; + set_page_private(page, (unsigned long)pcl); + SetPagePrivate(page); + } + + /* no need to submit io if it is already up-to-date */ + if (PageUptodate(page)) { + unlock_page(page); + page = NULL; + } + goto out; + } + + /* + * the managed page has been truncated, it's unsafe to + * reuse this one, let's allocate a new cache-managed page. + */ + DBG_BUGON(page->mapping); + DBG_BUGON(!justfound); + + tocache = true; + unlock_page(page); + put_page(page); +out_allocpage: + page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL); + if (oldpage != cmpxchg(&pcl->compressed_pages[nr], oldpage, page)) { + list_add(&page->lru, pagepool); + cond_resched(); + goto repeat; + } +out_tocache: + if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) { + /* turn into temporary page if fails (1 ref) */ + set_page_private(page, Z_EROFS_SHORTLIVED_PAGE); + goto out; + } + set_page_private(page, (unsigned long)pcl); + SetPagePrivate(page); + +out: /* the only exit (for tracing and debugging) */ + return page; +} + +static struct z_erofs_decompressqueue * +jobqueue_init(struct super_block *sb, + struct z_erofs_decompressqueue *fgq, bool *fg) +{ + struct z_erofs_decompressqueue *q; + + if (fg && !*fg) { + q = kzalloc(sizeof(*q), GFP_KERNEL | __GFP_NOWARN); + if (!q) { + *fg = true; + goto fg_out; + } + INIT_WORK(&q->u.work, z_erofs_decompressqueue_work); + } else { +fg_out: + q = fgq; + init_completion(&fgq->u.done); + atomic_set(&fgq->pending_bios, 0); + } + q->sb = sb; + q->head = Z_EROFS_PCLUSTER_TAIL_CLOSED; + return q; +} + +/* define decompression jobqueue types */ +enum { + JQ_BYPASS, + JQ_SUBMIT, + NR_JOBQUEUES, +}; + +static void *jobqueueset_init(struct super_block *sb, + struct z_erofs_decompressqueue *q[], + struct z_erofs_decompressqueue *fgq, bool *fg) +{ + /* + * if managed cache is enabled, bypass jobqueue is needed, + * no need to read from device for all pclusters in this queue. + */ + q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, NULL); + q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, fg); + + return tagptr_cast_ptr(tagptr_fold(tagptr1_t, q[JQ_SUBMIT], *fg)); +} + +static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl, + z_erofs_next_pcluster_t qtail[], + z_erofs_next_pcluster_t owned_head) +{ + z_erofs_next_pcluster_t *const submit_qtail = qtail[JQ_SUBMIT]; + z_erofs_next_pcluster_t *const bypass_qtail = qtail[JQ_BYPASS]; + + DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED); + if (owned_head == Z_EROFS_PCLUSTER_TAIL) + owned_head = Z_EROFS_PCLUSTER_TAIL_CLOSED; + + WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_TAIL_CLOSED); + + WRITE_ONCE(*submit_qtail, owned_head); + WRITE_ONCE(*bypass_qtail, &pcl->next); + + qtail[JQ_BYPASS] = &pcl->next; +} + +static void z_erofs_submit_queue(struct super_block *sb, + struct z_erofs_decompress_frontend *f, + struct list_head *pagepool, + struct z_erofs_decompressqueue *fgq, + bool *force_fg) +{ + struct erofs_sb_info *const sbi = EROFS_SB(sb); + z_erofs_next_pcluster_t qtail[NR_JOBQUEUES]; + struct z_erofs_decompressqueue *q[NR_JOBQUEUES]; + void *bi_private; + z_erofs_next_pcluster_t owned_head = f->clt.owned_head; + /* since bio will be NULL, no need to initialize last_index */ + pgoff_t last_index; + unsigned int nr_bios = 0; + struct bio *bio = NULL; + + bi_private = jobqueueset_init(sb, q, fgq, force_fg); + qtail[JQ_BYPASS] = &q[JQ_BYPASS]->head; + qtail[JQ_SUBMIT] = &q[JQ_SUBMIT]->head; + + /* by default, all need io submission */ + q[JQ_SUBMIT]->head = owned_head; + + do { + struct z_erofs_pcluster *pcl; + pgoff_t cur, end; + unsigned int i = 0; + bool bypass = true; + + /* no possible 'owned_head' equals the following */ + DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED); + DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_NIL); + + pcl = container_of(owned_head, struct z_erofs_pcluster, next); + + cur = pcl->obj.index; + end = cur + pcl->pclusterpages; + + /* close the main owned chain at first */ + owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL, + Z_EROFS_PCLUSTER_TAIL_CLOSED); + + do { + struct page *page; + + page = pickup_page_for_submission(pcl, i++, pagepool, + MNGD_MAPPING(sbi), + GFP_NOFS); + if (!page) + continue; + + if (bio && cur != last_index + 1) { +submit_bio_retry: + submit_bio(bio); + bio = NULL; + } + + if (!bio) { + bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); + + bio->bi_end_io = z_erofs_decompressqueue_endio; + bio->bi_bdev = sb->s_bdev; + bio->bi_iter.bi_sector = (sector_t)cur << + LOG_SECTORS_PER_BLOCK; + bio->bi_private = bi_private; + bio->bi_opf = REQ_OP_READ; + if (f->readahead) + bio->bi_opf |= REQ_RAHEAD; + ++nr_bios; + } + + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) + goto submit_bio_retry; + + last_index = cur; + bypass = false; + } while (++cur < end); + + if (!bypass) + qtail[JQ_SUBMIT] = &pcl->next; + else + move_to_bypass_jobqueue(pcl, qtail, owned_head); + } while (owned_head != Z_EROFS_PCLUSTER_TAIL); + + if (bio) + submit_bio(bio); + + /* + * although background is preferred, no one is pending for submission. + * don't issue workqueue for decompression but drop it directly instead. + */ + if (!*force_fg && !nr_bios) { + kvfree(q[JQ_SUBMIT]); + return; + } + z_erofs_decompress_kickoff(q[JQ_SUBMIT], *force_fg, nr_bios); +} + +static void z_erofs_runqueue(struct super_block *sb, + struct z_erofs_decompress_frontend *f, + struct list_head *pagepool, bool force_fg) +{ + struct z_erofs_decompressqueue io[NR_JOBQUEUES]; + + if (f->clt.owned_head == Z_EROFS_PCLUSTER_TAIL) + return; + z_erofs_submit_queue(sb, f, pagepool, io, &force_fg); + + /* handle bypass queue (no i/o pclusters) immediately */ + z_erofs_decompress_queue(&io[JQ_BYPASS], pagepool); + + if (!force_fg) + return; + + /* wait until all bios are completed */ + wait_for_completion_io(&io[JQ_SUBMIT].u.done); + + /* handle synchronous decompress queue in the caller context */ + z_erofs_decompress_queue(&io[JQ_SUBMIT], pagepool); +} + +static int z_erofs_readpage(struct file *file, struct page *page) +{ + struct inode *const inode = page->mapping->host; + struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); + int err; + LIST_HEAD(pagepool); + + trace_erofs_readpage(page, false); + + f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT; + + err = z_erofs_do_read_page(&f, page, &pagepool); + (void)z_erofs_collector_end(&f.clt); + + /* if some compressed cluster ready, need submit them anyway */ + z_erofs_runqueue(inode->i_sb, &f, &pagepool, true); + + if (err) + erofs_err(inode->i_sb, "failed to read, err [%d]", err); + + if (f.map.mpage) + put_page(f.map.mpage); + + /* clean up the remaining free pages */ + put_pages_list(&pagepool); + return err; +} + +static int z_erofs_readpages(struct file *filp, struct address_space *mapping, + struct list_head *pages, unsigned int nr_pages) +{ + struct inode *const inode = mapping->host; + struct erofs_sb_info *const sbi = EROFS_I_SB(inode); + + bool sync = (nr_pages <= sbi->max_sync_decompress_pages); + struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); + gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL); + struct page *head = NULL; + LIST_HEAD(pagepool); + + trace_erofs_readpages(mapping->host, lru_to_page(pages), + nr_pages, false); + + f.headoffset = (erofs_off_t)lru_to_page(pages)->index << PAGE_SHIFT; + + for (; nr_pages; --nr_pages) { + struct page *page = lru_to_page(pages); + + prefetchw(&page->flags); + list_del(&page->lru); + + /* + * A pure asynchronous readahead is indicated if + * a PG_readahead marked page is hitted at first. + * Let's also do asynchronous decompression for this case. + */ + sync &= !(PageReadahead(page) && !head); + + if (add_to_page_cache_lru(page, mapping, page->index, gfp)) { + list_add(&page->lru, &pagepool); + continue; + } + + set_page_private(page, (unsigned long)head); + head = page; + } + + while (head) { + struct page *page = head; + int err; + + /* traversal in reverse order */ + head = (void *)page_private(page); + + err = z_erofs_do_read_page(&f, page, &pagepool); + if (err) + erofs_err(inode->i_sb, + "readahead error at page %lu @ nid %llu", + page->index, EROFS_I(inode)->nid); + put_page(page); + } + + (void)z_erofs_collector_end(&f.clt); + + z_erofs_runqueue(inode->i_sb, &f, &pagepool, sync); + + if (f.map.mpage) + put_page(f.map.mpage); + + /* clean up the remaining free pages */ + put_pages_list(&pagepool); + return 0; +} + +const struct address_space_operations z_erofs_aops = { + .readpage = z_erofs_readpage, + .readpages = z_erofs_readpages, +}; diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h new file mode 100644 index 000000000000..33ed799600c9 --- /dev/null +++ b/fs/erofs/zdata.h @@ -0,0 +1,188 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2018 HUAWEI, Inc. + * https://www.huawei.com/ + */ +#ifndef __EROFS_FS_ZDATA_H +#define __EROFS_FS_ZDATA_H + +#include "internal.h" +#include "zpvec.h" + +#define Z_EROFS_PCLUSTER_MAX_PAGES (Z_EROFS_PCLUSTER_MAX_SIZE / PAGE_SIZE) +#define Z_EROFS_NR_INLINE_PAGEVECS 3 + +/* + * Structure fields follow one of the following exclusion rules. + * + * I: Modifiable by initialization/destruction paths and read-only + * for everyone else; + * + * L: Field should be protected by pageset lock; + * + * A: Field should be accessed / updated in atomic for parallelized code. + */ +struct z_erofs_collection { + struct mutex lock; + + /* I: page offset of start position of decompression */ + unsigned short pageofs; + + /* L: maximum relative page index in pagevec[] */ + unsigned short nr_pages; + + /* L: total number of pages in pagevec[] */ + unsigned int vcnt; + + union { + /* L: inline a certain number of pagevecs for bootstrap */ + erofs_vtptr_t pagevec[Z_EROFS_NR_INLINE_PAGEVECS]; + + /* I: can be used to free the pcluster by RCU. */ + struct rcu_head rcu; + }; +}; + +#define Z_EROFS_PCLUSTER_FULL_LENGTH 0x00000001 +#define Z_EROFS_PCLUSTER_LENGTH_BIT 1 + +/* + * let's leave a type here in case of introducing + * another tagged pointer later. + */ +typedef void *z_erofs_next_pcluster_t; + +struct z_erofs_pcluster { + struct erofs_workgroup obj; + struct z_erofs_collection primary_collection; + + /* A: point to next chained pcluster or TAILs */ + z_erofs_next_pcluster_t next; + + /* A: lower limit of decompressed length and if full length or not */ + unsigned int length; + + /* I: physical cluster size in pages */ + unsigned short pclusterpages; + + /* I: compression algorithm format */ + unsigned char algorithmformat; + + /* A: compressed pages (can be cached or inplaced pages) */ + struct page *compressed_pages[]; +}; + +#define z_erofs_primarycollection(pcluster) (&(pcluster)->primary_collection) + +/* let's avoid the valid 32-bit kernel addresses */ + +/* the chained workgroup has't submitted io (still open) */ +#define Z_EROFS_PCLUSTER_TAIL ((void *)0x5F0ECAFE) +/* the chained workgroup has already submitted io */ +#define Z_EROFS_PCLUSTER_TAIL_CLOSED ((void *)0x5F0EDEAD) + +#define Z_EROFS_PCLUSTER_NIL (NULL) + +struct z_erofs_decompressqueue { + struct super_block *sb; + atomic_t pending_bios; + z_erofs_next_pcluster_t head; + + union { + struct completion done; + struct work_struct work; + } u; +}; + +#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping) +static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi, + struct page *page) +{ + return page->mapping == MNGD_MAPPING(sbi); +} + +#define Z_EROFS_ONLINEPAGE_COUNT_BITS 2 +#define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1) +#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS) + +/* + * waiters (aka. ongoing_packs): # to unlock the page + * sub-index: 0 - for partial page, >= 1 full page sub-index + */ +typedef atomic_t z_erofs_onlinepage_t; + +/* type punning */ +union z_erofs_onlinepage_converter { + z_erofs_onlinepage_t *o; + unsigned long *v; +}; + +static inline unsigned int z_erofs_onlinepage_index(struct page *page) +{ + union z_erofs_onlinepage_converter u; + + DBG_BUGON(!PagePrivate(page)); + u.v = &page_private(page); + + return atomic_read(u.o) >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT; +} + +static inline void z_erofs_onlinepage_init(struct page *page) +{ + union { + z_erofs_onlinepage_t o; + unsigned long v; + /* keep from being unlocked in advance */ + } u = { .o = ATOMIC_INIT(1) }; + + set_page_private(page, u.v); + smp_wmb(); + SetPagePrivate(page); +} + +static inline void z_erofs_onlinepage_fixup(struct page *page, + uintptr_t index, bool down) +{ + union z_erofs_onlinepage_converter u = { .v = &page_private(page) }; + int orig, orig_index, val; + +repeat: + orig = atomic_read(u.o); + orig_index = orig >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT; + if (orig_index) { + if (!index) + return; + + DBG_BUGON(orig_index != index); + } + + val = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) | + ((orig & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down); + if (atomic_cmpxchg(u.o, orig, val) != orig) + goto repeat; +} + +static inline void z_erofs_onlinepage_endio(struct page *page) +{ + union z_erofs_onlinepage_converter u; + unsigned int v; + + DBG_BUGON(!PagePrivate(page)); + u.v = &page_private(page); + + v = atomic_dec_return(u.o); + if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) { + set_page_private(page, 0); + ClearPagePrivate(page); + if (!PageError(page)) + SetPageUptodate(page); + unlock_page(page); + } + erofs_dbg("%s, page %p value %x", __func__, page, atomic_read(u.o)); +} + +#define Z_EROFS_VMAP_ONSTACK_PAGES \ + min_t(unsigned int, THREAD_SIZE / 8 / sizeof(struct page *), 96U) +#define Z_EROFS_VMAP_GLOBAL_PAGES 2048 + +#endif diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c new file mode 100644 index 000000000000..6381281a7dca --- /dev/null +++ b/fs/erofs/zmap.c @@ -0,0 +1,601 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2018-2019 HUAWEI, Inc. + * https://www.huawei.com/ + */ +#include "internal.h" +#include +#include + +int z_erofs_fill_inode(struct inode *inode) +{ + struct erofs_inode *const vi = EROFS_I(inode); + struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb); + + if (!erofs_sb_has_big_pcluster(sbi) && + vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) { + vi->z_advise = 0; + vi->z_algorithmtype[0] = 0; + vi->z_algorithmtype[1] = 0; + vi->z_logical_clusterbits = LOG_BLOCK_SIZE; + set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); + } + inode->i_mapping->a_ops = &z_erofs_aops; + return 0; +} + +static int z_erofs_fill_inode_lazy(struct inode *inode) +{ + struct erofs_inode *const vi = EROFS_I(inode); + struct super_block *const sb = inode->i_sb; + int err; + erofs_off_t pos; + struct page *page; + void *kaddr; + struct z_erofs_map_header *h; + + if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) { + /* + * paired with smp_mb() at the end of the function to ensure + * fields will only be observed after the bit is set. + */ + smp_mb(); + return 0; + } + + if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_Z_BIT, TASK_KILLABLE)) + return -ERESTARTSYS; + + err = 0; + if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) + goto out_unlock; + + DBG_BUGON(!erofs_sb_has_big_pcluster(EROFS_SB(sb)) && + vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY); + + pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize + + vi->xattr_isize, 8); + page = erofs_get_meta_page(sb, erofs_blknr(pos)); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto out_unlock; + } + + kaddr = kmap_atomic(page); + + h = kaddr + erofs_blkoff(pos); + vi->z_advise = le16_to_cpu(h->h_advise); + vi->z_algorithmtype[0] = h->h_algorithmtype & 15; + vi->z_algorithmtype[1] = h->h_algorithmtype >> 4; + + if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX) { + erofs_err(sb, "unknown compression format %u for nid %llu, please upgrade kernel", + vi->z_algorithmtype[0], vi->nid); + err = -EOPNOTSUPP; + goto unmap_done; + } + + vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7); + if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) && + vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 | + Z_EROFS_ADVISE_BIG_PCLUSTER_2)) { + erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu", + vi->nid); + err = -EFSCORRUPTED; + goto unmap_done; + } + if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION && + !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^ + !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) { + erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu", + vi->nid); + err = -EFSCORRUPTED; + goto unmap_done; + } + /* paired with smp_mb() at the beginning of the function */ + smp_mb(); + set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); +unmap_done: + kunmap_atomic(kaddr); + unlock_page(page); + put_page(page); +out_unlock: + clear_bit_unlock(EROFS_I_BL_Z_BIT, &vi->flags); + /* See wake_up_bit() for which memory barrier you need to use. */ + smp_mb__after_atomic(); + wake_up_bit(&vi->flags, EROFS_I_BL_Z_BIT); + return err; +} + +struct z_erofs_maprecorder { + struct inode *inode; + struct erofs_map_blocks *map; + void *kaddr; + + unsigned long lcn; + /* compression extent information gathered */ + u8 type; + u16 clusterofs; + u16 delta[2]; + erofs_blk_t pblk, compressedlcs; +}; + +static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m, + erofs_blk_t eblk) +{ + struct super_block *const sb = m->inode->i_sb; + struct erofs_map_blocks *const map = m->map; + struct page *mpage = map->mpage; + + if (mpage) { + if (mpage->index == eblk) { + if (!m->kaddr) + m->kaddr = kmap_atomic(mpage); + return 0; + } + + if (m->kaddr) { + kunmap_atomic(m->kaddr); + m->kaddr = NULL; + } + put_page(mpage); + } + + mpage = erofs_get_meta_page(sb, eblk); + if (IS_ERR(mpage)) { + map->mpage = NULL; + return PTR_ERR(mpage); + } + m->kaddr = kmap_atomic(mpage); + unlock_page(mpage); + map->mpage = mpage; + return 0; +} + +static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, + unsigned long lcn) +{ + struct inode *const inode = m->inode; + struct erofs_inode *const vi = EROFS_I(inode); + const erofs_off_t ibase = iloc(EROFS_I_SB(inode), vi->nid); + const erofs_off_t pos = + Z_EROFS_VLE_LEGACY_INDEX_ALIGN(ibase + vi->inode_isize + + vi->xattr_isize) + + lcn * sizeof(struct z_erofs_vle_decompressed_index); + struct z_erofs_vle_decompressed_index *di; + unsigned int advise, type; + int err; + + err = z_erofs_reload_indexes(m, erofs_blknr(pos)); + if (err) + return err; + + m->lcn = lcn; + di = m->kaddr + erofs_blkoff(pos); + + advise = le16_to_cpu(di->di_advise); + type = (advise >> Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT) & + ((1 << Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) - 1); + switch (type) { + case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: + m->clusterofs = 1 << vi->z_logical_clusterbits; + m->delta[0] = le16_to_cpu(di->di_u.delta[0]); + if (m->delta[0] & Z_EROFS_VLE_DI_D0_CBLKCNT) { + if (!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) { + DBG_BUGON(1); + return -EFSCORRUPTED; + } + m->compressedlcs = m->delta[0] & + ~Z_EROFS_VLE_DI_D0_CBLKCNT; + m->delta[0] = 1; + } + m->delta[1] = le16_to_cpu(di->di_u.delta[1]); + break; + case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: + m->clusterofs = le16_to_cpu(di->di_clusterofs); + m->pblk = le32_to_cpu(di->di_u.blkaddr); + break; + default: + DBG_BUGON(1); + return -EOPNOTSUPP; + } + m->type = type; + return 0; +} + +static unsigned int decode_compactedbits(unsigned int lobits, + unsigned int lomask, + u8 *in, unsigned int pos, u8 *type) +{ + const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7); + const unsigned int lo = v & lomask; + + *type = (v >> lobits) & 3; + return lo; +} + +static int unpack_compacted_index(struct z_erofs_maprecorder *m, + unsigned int amortizedshift, + unsigned int eofs) +{ + struct erofs_inode *const vi = EROFS_I(m->inode); + const unsigned int lclusterbits = vi->z_logical_clusterbits; + const unsigned int lomask = (1 << lclusterbits) - 1; + unsigned int vcnt, base, lo, encodebits, nblk; + int i; + u8 *in, type; + bool big_pcluster; + + if (1 << amortizedshift == 4) + vcnt = 2; + else if (1 << amortizedshift == 2 && lclusterbits == 12) + vcnt = 16; + else + return -EOPNOTSUPP; + + big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1; + encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt; + base = round_down(eofs, vcnt << amortizedshift); + in = m->kaddr + base; + + i = (eofs - base) >> amortizedshift; + + lo = decode_compactedbits(lclusterbits, lomask, + in, encodebits * i, &type); + m->type = type; + if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) { + m->clusterofs = 1 << lclusterbits; + if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) { + if (!big_pcluster) { + DBG_BUGON(1); + return -EFSCORRUPTED; + } + m->compressedlcs = lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT; + m->delta[0] = 1; + return 0; + } else if (i + 1 != (int)vcnt) { + m->delta[0] = lo; + return 0; + } + /* + * since the last lcluster in the pack is special, + * of which lo saves delta[1] rather than delta[0]. + * Hence, get delta[0] by the previous lcluster indirectly. + */ + lo = decode_compactedbits(lclusterbits, lomask, + in, encodebits * (i - 1), &type); + if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) + lo = 0; + else if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) + lo = 1; + m->delta[0] = lo + 1; + return 0; + } + m->clusterofs = lo; + m->delta[0] = 0; + /* figout out blkaddr (pblk) for HEAD lclusters */ + if (!big_pcluster) { + nblk = 1; + while (i > 0) { + --i; + lo = decode_compactedbits(lclusterbits, lomask, + in, encodebits * i, &type); + if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) + i -= lo; + + if (i >= 0) + ++nblk; + } + } else { + nblk = 0; + while (i > 0) { + --i; + lo = decode_compactedbits(lclusterbits, lomask, + in, encodebits * i, &type); + if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) { + if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) { + --i; + nblk += lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT; + continue; + } + /* bigpcluster shouldn't have plain d0 == 1 */ + if (lo <= 1) { + DBG_BUGON(1); + return -EFSCORRUPTED; + } + i -= lo - 2; + continue; + } + ++nblk; + } + } + in += (vcnt << amortizedshift) - sizeof(__le32); + m->pblk = le32_to_cpu(*(__le32 *)in) + nblk; + return 0; +} + +static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m, + unsigned long lcn) +{ + struct inode *const inode = m->inode; + struct erofs_inode *const vi = EROFS_I(inode); + const unsigned int lclusterbits = vi->z_logical_clusterbits; + const erofs_off_t ebase = ALIGN(iloc(EROFS_I_SB(inode), vi->nid) + + vi->inode_isize + vi->xattr_isize, 8) + + sizeof(struct z_erofs_map_header); + const unsigned int totalidx = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ); + unsigned int compacted_4b_initial, compacted_2b; + unsigned int amortizedshift; + erofs_off_t pos; + int err; + + if (lclusterbits != 12) + return -EOPNOTSUPP; + + if (lcn >= totalidx) + return -EINVAL; + + m->lcn = lcn; + /* used to align to 32-byte (compacted_2b) alignment */ + compacted_4b_initial = (32 - ebase % 32) / 4; + if (compacted_4b_initial == 32 / 4) + compacted_4b_initial = 0; + + if (vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) + compacted_2b = rounddown(totalidx - compacted_4b_initial, 16); + else + compacted_2b = 0; + + pos = ebase; + if (lcn < compacted_4b_initial) { + amortizedshift = 2; + goto out; + } + pos += compacted_4b_initial * 4; + lcn -= compacted_4b_initial; + + if (lcn < compacted_2b) { + amortizedshift = 1; + goto out; + } + pos += compacted_2b * 2; + lcn -= compacted_2b; + amortizedshift = 2; +out: + pos += lcn * (1 << amortizedshift); + err = z_erofs_reload_indexes(m, erofs_blknr(pos)); + if (err) + return err; + return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos)); +} + +static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m, + unsigned int lcn) +{ + const unsigned int datamode = EROFS_I(m->inode)->datalayout; + + if (datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY) + return legacy_load_cluster_from_disk(m, lcn); + + if (datamode == EROFS_INODE_FLAT_COMPRESSION) + return compacted_load_cluster_from_disk(m, lcn); + + return -EINVAL; +} + +static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, + unsigned int lookback_distance) +{ + struct erofs_inode *const vi = EROFS_I(m->inode); + struct erofs_map_blocks *const map = m->map; + const unsigned int lclusterbits = vi->z_logical_clusterbits; + unsigned long lcn = m->lcn; + int err; + + if (lcn < lookback_distance) { + erofs_err(m->inode->i_sb, + "bogus lookback distance @ nid %llu", vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; + } + + /* load extent head logical cluster if needed */ + lcn -= lookback_distance; + err = z_erofs_load_cluster_from_disk(m, lcn); + if (err) + return err; + + switch (m->type) { + case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: + if (!m->delta[0]) { + erofs_err(m->inode->i_sb, + "invalid lookback distance 0 @ nid %llu", + vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; + } + return z_erofs_extent_lookback(m, m->delta[0]); + case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: + map->m_flags &= ~EROFS_MAP_ZIPPED; + /* fallthrough */ + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: + map->m_la = (lcn << lclusterbits) | m->clusterofs; + break; + default: + erofs_err(m->inode->i_sb, + "unknown type %u @ lcn %lu of nid %llu", + m->type, lcn, vi->nid); + DBG_BUGON(1); + return -EOPNOTSUPP; + } + return 0; +} + +static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, + unsigned int initial_lcn) +{ + struct erofs_inode *const vi = EROFS_I(m->inode); + struct erofs_map_blocks *const map = m->map; + const unsigned int lclusterbits = vi->z_logical_clusterbits; + unsigned long lcn; + int err; + + DBG_BUGON(m->type != Z_EROFS_VLE_CLUSTER_TYPE_PLAIN && + m->type != Z_EROFS_VLE_CLUSTER_TYPE_HEAD); + if (!(map->m_flags & EROFS_MAP_ZIPPED) || + !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) { + map->m_plen = 1 << lclusterbits; + return 0; + } + + lcn = m->lcn + 1; + if (m->compressedlcs) + goto out; + + err = z_erofs_load_cluster_from_disk(m, lcn); + if (err) + return err; + + /* + * If the 1st NONHEAD lcluster has already been handled initially w/o + * valid compressedlcs, which means at least it mustn't be CBLKCNT, or + * an internal implemenatation error is detected. + * + * The following code can also handle it properly anyway, but let's + * BUG_ON in the debugging mode only for developers to notice that. + */ + DBG_BUGON(lcn == initial_lcn && + m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD); + + switch (m->type) { + case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: + /* + * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type + * rather than CBLKCNT, it's a 1 lcluster-sized pcluster. + */ + m->compressedlcs = 1; + break; + case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: + if (m->delta[0] != 1) + goto err_bonus_cblkcnt; + if (m->compressedlcs) + break; + /* fallth/rough */ + default: + erofs_err(m->inode->i_sb, + "cannot found CBLKCNT @ lcn %lu of nid %llu", + lcn, vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; + } +out: + map->m_plen = m->compressedlcs << lclusterbits; + return 0; +err_bonus_cblkcnt: + erofs_err(m->inode->i_sb, + "bogus CBLKCNT @ lcn %lu of nid %llu", + lcn, vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; +} + +int z_erofs_map_blocks_iter(struct inode *inode, + struct erofs_map_blocks *map, + int flags) +{ + struct erofs_inode *const vi = EROFS_I(inode); + struct z_erofs_maprecorder m = { + .inode = inode, + .map = map, + }; + int err = 0; + unsigned int lclusterbits, endoff; + unsigned long initial_lcn; + unsigned long long ofs, end; + + trace_z_erofs_map_blocks_iter_enter(inode, map, flags); + + /* when trying to read beyond EOF, leave it unmapped */ + if (map->m_la >= inode->i_size) { + map->m_llen = map->m_la + 1 - inode->i_size; + map->m_la = inode->i_size; + map->m_flags = 0; + goto out; + } + + err = z_erofs_fill_inode_lazy(inode); + if (err) + goto out; + + lclusterbits = vi->z_logical_clusterbits; + ofs = map->m_la; + initial_lcn = ofs >> lclusterbits; + endoff = ofs & ((1 << lclusterbits) - 1); + + err = z_erofs_load_cluster_from_disk(&m, initial_lcn); + if (err) + goto unmap_out; + + map->m_flags = EROFS_MAP_ZIPPED; /* by default, compressed */ + end = (m.lcn + 1ULL) << lclusterbits; + + switch (m.type) { + case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: + if (endoff >= m.clusterofs) + map->m_flags &= ~EROFS_MAP_ZIPPED; + /* fallthrough */ + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: + if (endoff >= m.clusterofs) { + map->m_la = (m.lcn << lclusterbits) | m.clusterofs; + break; + } + /* m.lcn should be >= 1 if endoff < m.clusterofs */ + if (!m.lcn) { + erofs_err(inode->i_sb, + "invalid logical cluster 0 at nid %llu", + vi->nid); + err = -EFSCORRUPTED; + goto unmap_out; + } + end = (m.lcn << lclusterbits) | m.clusterofs; + map->m_flags |= EROFS_MAP_FULL_MAPPED; + m.delta[0] = 1; + /* fallthrough */ + case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: + /* get the corresponding first chunk */ + err = z_erofs_extent_lookback(&m, m.delta[0]); + if (err) + goto unmap_out; + break; + default: + erofs_err(inode->i_sb, + "unknown type %u @ offset %llu of nid %llu", + m.type, ofs, vi->nid); + err = -EOPNOTSUPP; + goto unmap_out; + } + + map->m_llen = end - map->m_la; + map->m_pa = blknr_to_addr(m.pblk); + map->m_flags |= EROFS_MAP_MAPPED; + + err = z_erofs_get_extent_compressedlen(&m, initial_lcn); + if (err) + goto out; +unmap_out: + if (m.kaddr) + kunmap_atomic(m.kaddr); + +out: + erofs_dbg("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o", + __func__, map->m_la, map->m_pa, + map->m_llen, map->m_plen, map->m_flags); + + trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err); + + /* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */ + DBG_BUGON(err < 0 && err != -ENOMEM); + return err; +} diff --git a/fs/erofs/zpvec.h b/fs/erofs/zpvec.h new file mode 100644 index 000000000000..b05464f4a808 --- /dev/null +++ b/fs/erofs/zpvec.h @@ -0,0 +1,159 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2018 HUAWEI, Inc. + * https://www.huawei.com/ + */ +#ifndef __EROFS_FS_ZPVEC_H +#define __EROFS_FS_ZPVEC_H + +#include "tagptr.h" + +/* page type in pagevec for decompress subsystem */ +enum z_erofs_page_type { + /* including Z_EROFS_VLE_PAGE_TAIL_EXCLUSIVE */ + Z_EROFS_PAGE_TYPE_EXCLUSIVE, + + Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED, + + Z_EROFS_VLE_PAGE_TYPE_HEAD, + Z_EROFS_VLE_PAGE_TYPE_MAX +}; + +extern void __compiletime_error("Z_EROFS_PAGE_TYPE_EXCLUSIVE != 0") + __bad_page_type_exclusive(void); + +/* pagevec tagged pointer */ +typedef tagptr2_t erofs_vtptr_t; + +/* pagevec collector */ +struct z_erofs_pagevec_ctor { + struct page *curr, *next; + erofs_vtptr_t *pages; + + unsigned int nr, index; +}; + +static inline void z_erofs_pagevec_ctor_exit(struct z_erofs_pagevec_ctor *ctor, + bool atomic) +{ + if (!ctor->curr) + return; + + if (atomic) + kunmap_atomic(ctor->pages); + else + kunmap(ctor->curr); +} + +static inline struct page * +z_erofs_pagevec_ctor_next_page(struct z_erofs_pagevec_ctor *ctor, + unsigned int nr) +{ + unsigned int index; + + /* keep away from occupied pages */ + if (ctor->next) + return ctor->next; + + for (index = 0; index < nr; ++index) { + const erofs_vtptr_t t = ctor->pages[index]; + const unsigned int tags = tagptr_unfold_tags(t); + + if (tags == Z_EROFS_PAGE_TYPE_EXCLUSIVE) + return tagptr_unfold_ptr(t); + } + DBG_BUGON(nr >= ctor->nr); + return NULL; +} + +static inline void +z_erofs_pagevec_ctor_pagedown(struct z_erofs_pagevec_ctor *ctor, + bool atomic) +{ + struct page *next = z_erofs_pagevec_ctor_next_page(ctor, ctor->nr); + + z_erofs_pagevec_ctor_exit(ctor, atomic); + + ctor->curr = next; + ctor->next = NULL; + ctor->pages = atomic ? + kmap_atomic(ctor->curr) : kmap(ctor->curr); + + ctor->nr = PAGE_SIZE / sizeof(struct page *); + ctor->index = 0; +} + +static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor, + unsigned int nr, + erofs_vtptr_t *pages, + unsigned int i) +{ + ctor->nr = nr; + ctor->curr = ctor->next = NULL; + ctor->pages = pages; + + if (i >= nr) { + i -= nr; + z_erofs_pagevec_ctor_pagedown(ctor, false); + while (i > ctor->nr) { + i -= ctor->nr; + z_erofs_pagevec_ctor_pagedown(ctor, false); + } + } + ctor->next = z_erofs_pagevec_ctor_next_page(ctor, i); + ctor->index = i; +} + +static inline bool z_erofs_pagevec_enqueue(struct z_erofs_pagevec_ctor *ctor, + struct page *page, + enum z_erofs_page_type type, + bool pvec_safereuse) +{ + if (!ctor->next) { + /* some pages cannot be reused as pvec safely without I/O */ + if (type == Z_EROFS_PAGE_TYPE_EXCLUSIVE && !pvec_safereuse) + type = Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED; + + if (type != Z_EROFS_PAGE_TYPE_EXCLUSIVE && + ctor->index + 1 == ctor->nr) + return false; + } + + if (ctor->index >= ctor->nr) + z_erofs_pagevec_ctor_pagedown(ctor, false); + + /* exclusive page type must be 0 */ + if (Z_EROFS_PAGE_TYPE_EXCLUSIVE != (uintptr_t)NULL) + __bad_page_type_exclusive(); + + /* should remind that collector->next never equal to 1, 2 */ + if (type == (uintptr_t)ctor->next) { + ctor->next = page; + } + ctor->pages[ctor->index++] = tagptr_fold(erofs_vtptr_t, page, type); + return true; +} + +static inline struct page * +z_erofs_pagevec_dequeue(struct z_erofs_pagevec_ctor *ctor, + enum z_erofs_page_type *type) +{ + erofs_vtptr_t t; + + if (ctor->index >= ctor->nr) { + DBG_BUGON(!ctor->next); + z_erofs_pagevec_ctor_pagedown(ctor, true); + } + + t = ctor->pages[ctor->index]; + + *type = tagptr_unfold_tags(t); + + /* should remind that collector->next never equal to 1, 2 */ + if (*type == (uintptr_t)ctor->next) + ctor->next = tagptr_unfold_ptr(t); + + ctor->pages[ctor->index++] = tagptr_fold(erofs_vtptr_t, NULL, 0); + return tagptr_unfold_ptr(t); +} +#endif diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 3e5cc68a7b39..4d410240c2ee 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1888,8 +1888,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, goto error_tgt_fput; /* Check if EPOLLWAKEUP is allowed */ - if (ep_op_has_event(op)) - ep_take_care_of_epollwakeup(&epds); + epds.events &= ~EPOLLWAKEUP; /* * We have to check that the file structure underneath the file descriptor diff --git a/fs/exfat/.travis.yml b/fs/exfat/.travis.yml new file mode 100644 index 000000000000..04cc4a413626 --- /dev/null +++ b/fs/exfat/.travis.yml @@ -0,0 +1,96 @@ +dist: bionic + +language: c + +notifications: + - email: true + +before_script: + # Download the kernel + - sudo apt-get install libelf-dev wget tar gzip python + - wget https://mirrors.edge.kernel.org/pub/linux/kernel/v4.x/linux-4.1.36.tar.gz + - tar xf linux-4.1.36.tar.gz + - mv linux-4.1.36 linux-stable + - ./.travis_get_mainline_kernel + - cp ./.travis_cmd_wrapper.pl ~/travis_cmd_wrapper.pl + # Prerequisite for xfstests testing + - sudo apt-get install linux-headers-$(uname -r) + - sudo apt-get install autoconf libtool pkg-config libnl-3-dev libnl-genl-3-dev + - sudo apt-get install xfslibs-dev uuid-dev libtool-bin xfsprogs libgdbm-dev gawk fio attr libattr1-dev libacl1-dev libaio-dev + - git clone --branch=exfat-next https://github.com/exfat-utils/exfat-utils + - git clone https://github.com/namjaejeon/exfat-testsuites + - export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH + - export PATH=/usr/local/lib:$PATH + - sudo useradd fsgqa + - sudo useradd 123456-fsgqa + +script: + # Copy ksmbd source to kernel + - mv linux-stable ../ + - mv linux ../ + - mkdir ../linux-stable/fs/exfat + - cp -ar * ../linux-stable/fs/exfat/ + - cp -ar * ../linux/fs/exfat/ + + # Compile with 4.1 kernel + - cd ../linux-stable + - yes "" | make oldconfig > /dev/null + - echo 'obj-$(CONFIG_EXFAT_FS) += exfat/' >> fs/Makefile + - echo 'source "fs/exfat/Kconfig"' >> fs/Kconfig + - echo 'CONFIG_EXFAT_FS=m' >> .config + - echo 'CONFIG_EXFAT_DEFAULT_IOCHARSET="utf8"' >> .config + - make -j$((`nproc`+1)) fs/exfat/exfat.ko + + # Compile with latest Torvalds' kernel +# - cd ../linux +# - yes "" | make oldconfig > /dev/null +# - echo 'obj-$(CONFIG_EXFAT) += exfat/' >> fs/Makefile +# - echo 'source "fs/exfat/Kconfig"' >> fs/Kconfig +# - echo 'CONFIG_EXFAT_FS=m' >> .config +# - echo 'CONFIG_EXFAT_DEFAULT_IOCHARSET="utf8"' >> .config +# - make -j$((`nproc`+1)) fs/exfat/exfat.ko + + # Run xfstests testsuite + - cd ../linux-exfat-oot + - make > /dev/null + - sudo make install > /dev/null + - sudo modprobe exfat + - cd exfat-utils + - ./autogen.sh > /dev/null + - ./configure > /dev/null + - make -j$((`nproc`+1)) > /dev/null + - sudo make install > /dev/null + - sudo mkdir -p /mnt/scratch + - sudo mkdir -p /mnt/test + - sudo mkdir -p /mnt/full_test + # create file/director test + - truncate -s 10G full_test.img + - sudo losetup /dev/loop22 full_test.img + - sudo mkfs.exfat /dev/loop22 + - sudo mount -t exfat /dev/loop22 /mnt/full_test/ + - cd /mnt/full_test/ + - i=1;while [ $i -le 10000 ];do sudo touch file$i;if [ $? != 0 ]; then exit 1; fi; i=$(($i + 1));done + - sync + - sudo fsck.exfat /dev/loop22 + - sudo rm -rf * + - i=1;while [ $i -le 10000 ];do sudo mkdir file$i;if [ $? != 0 ]; then exit 1; fi; i=$(($i + 1));done + - sync + - sudo rm -rf * + - sudo fsck.exfat /dev/loop22 + - cd - + - sudo umount /mnt/full_test/ + - sudo fsck.exfat /dev/loop22 + # run xfstests test + - truncate -s 100G test.img + - truncate -s 100G scratch.img + - sudo losetup /dev/loop20 test.img + - sudo losetup /dev/loop21 scratch.img + - sudo mkfs.exfat /dev/loop20 + - sudo mkfs.exfat /dev/loop21 + - cd .. + - cd exfat-testsuites/ + - tar xzvf xfstests-exfat.tgz > /dev/null + - cd xfstests-exfat + - make -j$((`nproc`+1)) > /dev/null + - sudo ./check generic/001 + - sudo ./check generic/006 diff --git a/fs/exfat/.travis_cmd_wrapper.pl b/fs/exfat/.travis_cmd_wrapper.pl new file mode 100755 index 000000000000..0cbaea440eb9 --- /dev/null +++ b/fs/exfat/.travis_cmd_wrapper.pl @@ -0,0 +1,65 @@ +#!/usr/bin/perl + +# +# SPDX-License-Identifier: GPL-2.0-or-later +# +# Copyright (C) 2019 Samsung Electronics Co., Ltd. +# + +use strict; + +sub tweak_sysctl() +{ + `sudo sysctl kernel.hardlockup_panic=0`; + `sudo sysctl kernel.hung_task_panic=0`; + `sudo sysctl kernel.panic=128`; + `sudo sysctl kernel.panic_on_io_nmi=0`; + `sudo sysctl kernel.panic_on_oops=0`; + `sudo sysctl kernel.panic_on_rcu_stall=0`; + `sudo sysctl kernel.panic_on_unrecovered_nmi=0`; + `sudo sysctl kernel.panic_on_warn=0`; + `sudo sysctl kernel.softlockup_panic=0`; + `sudo sysctl kernel.unknown_nmi_panic=0`; +} + +sub execute($$) +{ + my $cmd = shift; + my $timeout = shift; + my $output = "Timeout"; + my $status = 1; + + $timeout = 8 * 60 if (!defined $timeout); + + tweak_sysctl(); + + eval { + local $SIG{ALRM} = sub { + print "TIMEOUT:\n"; + system("top -n 1"), print "top\n"; + system("free"), print "free\n"; + system("dmesg"), print "dmesg\n"; + die "Timeout\n"; + }; + + print "Executing $cmd with timeout $timeout\n"; + + alarm $timeout; + $output = `$cmd`; + $status = $?; + alarm 0; + print $output."\n"; + print "Finished: status $status\n"; + }; + + if ($@) { + die unless $@ eq "Timeout\n"; + } +} + +if (! defined $ARGV[0]) { + print "Usage:\n\t./.travis_cmd_wrapper.pl command [timeout seconds]\n"; + exit 1; +} + +execute($ARGV[0], $ARGV[1]); diff --git a/fs/exfat/.travis_get_mainline_kernel b/fs/exfat/.travis_get_mainline_kernel new file mode 100755 index 000000000000..3356d15da772 --- /dev/null +++ b/fs/exfat/.travis_get_mainline_kernel @@ -0,0 +1,37 @@ +#!/bin/sh + +# +# A simple script we are using to get the latest mainline kernel +# tar ball +# + +wget https://www.kernel.org/releases.json +if [ $? -ne 0 ]; then + echo "Could not download kernel.org/releases.json" + exit 1 +fi + +VER=$(cat releases.json | python2.7 -c "import sys, json; print json.load(sys.stdin)['latest_stable']['version']") +if [ $? -ne 0 ]; then + echo "Could not parse release.json" + exit 1 +fi + +if [ "z$VER" = "z" ]; then + echo "Could not determine latest release version" + exit 1 +fi + +wget https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-"$VER".tar.gz +if [ $? -ne 0 ]; then + echo "Could not download $VER kernel version" + exit 1 +fi + +tar xf linux-"$VER".tar.gz +if [ $? -ne 0 ]; then + echo "Could not untar kernel tar ball" + exit 1 +fi + +mv linux-"$VER" linux diff --git a/fs/exfat/Kconfig b/fs/exfat/Kconfig new file mode 100644 index 000000000000..2d3636dc5b8c --- /dev/null +++ b/fs/exfat/Kconfig @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +config EXFAT_FS + tristate "exFAT filesystem support" + select NLS + help + This allows you to mount devices formatted with the exFAT file system. + exFAT is typically used on SD-Cards or USB sticks. + + To compile this as a module, choose M here: the module will be called + exfat. + +config EXFAT_DEFAULT_IOCHARSET + string "Default iocharset for exFAT" + default "utf8" + depends on EXFAT_FS + help + Set this to the default input/output character set to use for + converting between the encoding is used for user visible filename and + UTF-16 character that exfat filesystem use, and can be overridden with + the "iocharset" mount option for exFAT filesystems. diff --git a/fs/exfat/Makefile b/fs/exfat/Makefile new file mode 100644 index 000000000000..b1acf144466a --- /dev/null +++ b/fs/exfat/Makefile @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# Makefile for the linux exFAT filesystem support. +# +ifneq ($(KERNELRELEASE),) +obj-$(CONFIG_EXFAT_FS) += exfat.o + +exfat-y := inode.o namei.o dir.o super.o fatent.o cache.o nls.o misc.o \ + file.o balloc.o +else +# Called from external kernel module build + +KERNELRELEASE ?= $(shell uname -r) +KDIR ?= /lib/modules/${KERNELRELEASE}/build +MDIR ?= /lib/modules/${KERNELRELEASE} +PWD := $(shell pwd) + +export CONFIG_EXFAT_FS := m + +all: + $(MAKE) -C $(KDIR) M=$(PWD) modules + +clean: + $(MAKE) -C $(KDIR) M=$(PWD) clean + +help: + $(MAKE) -C $(KDIR) M=$(PWD) help + +install: exfat.ko + rm -f ${MDIR}/kernel/fs/exfat/exfat.ko + install -m644 -b -D exfat.ko ${MDIR}/kernel/fs/exfat/exfat.ko + depmod -aq + +uninstall: + rm -rf ${MDIR}/kernel/fs/exfat + depmod -aq + +endif + +.PHONY : all clean install uninstall diff --git a/fs/exfat/README.md b/fs/exfat/README.md new file mode 100644 index 000000000000..cee3699ed9be --- /dev/null +++ b/fs/exfat/README.md @@ -0,0 +1,53 @@ +## exFAT filesystem +This is the exfat filesystem for support from the linux 4.1 kernel +to the latest kernel. + +## Installing as a stand-alone module + +Install prerequisite package for Fedora, RHEL: +``` + yum install kernel-devel-$(uname -r) +``` + +Build step: +``` + make + sudo make install +``` + +To load the driver manually, run this as root: +``` + modprobe exfat +``` + + +## Installing as a part of the kernel + +1. Let's take [linux] as the path to your kernel source dir. +``` + cd [linux] + cp -ar exfat [linux]/fs/ +``` + +2. edit [linux]/fs/Kconfig +``` + source "fs/fat/Kconfig" + +source "fs/exfat/Kconfig" + source "fs/ntfs/Kconfig" +``` + +3. edit [linux]/fs/Makefile +``` + obj-$(CONFIG_FAT_FS) += fat/ + +obj-$(CONFIG_EXFAT_FS) += exfat/ + obj-$(CONFIG_BFS_FS) += bfs/ +``` +4. make menuconfig and set exfat +``` + File systems ---> + DOS/FAT/NT Filesystems ---> + exFAT filesystem support + (utf8) Default iocharset for exFAT +``` + +build your kernel diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c new file mode 100644 index 000000000000..30c2414d859c --- /dev/null +++ b/fs/exfat/balloc.c @@ -0,0 +1,356 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#include +#include +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) +#include +#else +#include +#endif + +#include "exfat_raw.h" +#include "exfat_fs.h" + +static const unsigned char free_bit[] = { + 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/* 0 ~ 19*/ + 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3,/* 20 ~ 39*/ + 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/* 40 ~ 59*/ + 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/* 60 ~ 79*/ + 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2,/* 80 ~ 99*/ + 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,/*100 ~ 119*/ + 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*120 ~ 139*/ + 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5,/*140 ~ 159*/ + 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/*160 ~ 179*/ + 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3,/*180 ~ 199*/ + 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*200 ~ 219*/ + 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/*220 ~ 239*/ + 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 /*240 ~ 254*/ +}; + +static const unsigned char used_bit[] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3,/* 0 ~ 19*/ + 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4,/* 20 ~ 39*/ + 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5,/* 40 ~ 59*/ + 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,/* 60 ~ 79*/ + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4,/* 80 ~ 99*/ + 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,/*100 ~ 119*/ + 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4,/*120 ~ 139*/ + 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,/*140 ~ 159*/ + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5,/*160 ~ 179*/ + 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5,/*180 ~ 199*/ + 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6,/*200 ~ 219*/ + 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,/*220 ~ 239*/ + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 /*240 ~ 255*/ +}; + +/* + * Allocation Bitmap Management Functions + */ +static int exfat_allocate_bitmap(struct super_block *sb, + struct exfat_dentry *ep) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + long long map_size; + unsigned int i, need_map_size; + sector_t sector; + + sbi->map_clu = le32_to_cpu(ep->dentry.bitmap.start_clu); + map_size = le64_to_cpu(ep->dentry.bitmap.size); + need_map_size = ((EXFAT_DATA_CLUSTER_COUNT(sbi) - 1) / BITS_PER_BYTE) + + 1; + if (need_map_size != map_size) { + exfat_err(sb, "bogus allocation bitmap size(need : %u, cur : %lld)", + need_map_size, map_size); + /* + * Only allowed when bogus allocation + * bitmap size is large + */ + if (need_map_size > map_size) + return -EIO; + } + sbi->map_sectors = ((need_map_size - 1) >> + (sb->s_blocksize_bits)) + 1; + sbi->vol_amap = kmalloc_array(sbi->map_sectors, + sizeof(struct buffer_head *), GFP_KERNEL); + if (!sbi->vol_amap) + return -ENOMEM; + + sector = exfat_cluster_to_sector(sbi, sbi->map_clu); + for (i = 0; i < sbi->map_sectors; i++) { + sbi->vol_amap[i] = sb_bread(sb, sector + i); + if (!sbi->vol_amap[i]) { + /* release all buffers and free vol_amap */ + int j = 0; + + while (j < i) + brelse(sbi->vol_amap[j++]); + + kfree(sbi->vol_amap); + sbi->vol_amap = NULL; + return -EIO; + } + } + + return 0; +} + +int exfat_load_bitmap(struct super_block *sb) +{ + unsigned int i, type; + struct exfat_chain clu; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + exfat_chain_set(&clu, sbi->root_dir, 0, ALLOC_FAT_CHAIN); + while (clu.dir != EXFAT_EOF_CLUSTER) { + for (i = 0; i < sbi->dentries_per_clu; i++) { + struct exfat_dentry *ep; + struct buffer_head *bh; + + ep = exfat_get_dentry(sb, &clu, i, &bh); + if (!ep) + return -EIO; + + type = exfat_get_entry_type(ep); + if (type == TYPE_UNUSED) + break; + if (type != TYPE_BITMAP) + continue; + if (ep->dentry.bitmap.flags == 0x0) { + int err; + + err = exfat_allocate_bitmap(sb, ep); + brelse(bh); + return err; + } + brelse(bh); + } + + if (exfat_get_next_cluster(sb, &clu.dir)) + return -EIO; + } + + return -EINVAL; +} + +void exfat_free_bitmap(struct exfat_sb_info *sbi) +{ + int i; + + for (i = 0; i < sbi->map_sectors; i++) + __brelse(sbi->vol_amap[i]); + + kfree(sbi->vol_amap); +} + +int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync) +{ + int i, b; + unsigned int ent_idx; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + if (!is_valid_cluster(sbi, clu)) + return -EINVAL; + + ent_idx = CLUSTER_TO_BITMAP_ENT(clu); + i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx); + b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx); + + set_bit_le(b, sbi->vol_amap[i]->b_data); + exfat_update_bh(sbi->vol_amap[i], sync); + return 0; +} + +void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync) +{ + int i, b; + unsigned int ent_idx; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_mount_options *opts = &sbi->options; + + if (!is_valid_cluster(sbi, clu)) + return; + + ent_idx = CLUSTER_TO_BITMAP_ENT(clu); + i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx); + b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx); + + clear_bit_le(b, sbi->vol_amap[i]->b_data); + exfat_update_bh(sbi->vol_amap[i], sync); + + if (opts->discard) { + int ret_discard; + + ret_discard = sb_issue_discard(sb, + exfat_cluster_to_sector(sbi, clu), + (1 << sbi->sect_per_clus_bits), GFP_NOFS, 0); + + if (ret_discard == -EOPNOTSUPP) { + exfat_err(sb, "discard not supported by device, disabling"); + opts->discard = 0; + } + } +} + +/* + * If the value of "clu" is 0, it means cluster 2 which is the first cluster of + * the cluster heap. + */ +unsigned int exfat_find_free_bitmap(struct super_block *sb, unsigned int clu) +{ + unsigned int i, map_i, map_b, ent_idx; + unsigned int clu_base, clu_free; + unsigned char k, clu_mask; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + WARN_ON(clu < EXFAT_FIRST_CLUSTER); + ent_idx = CLUSTER_TO_BITMAP_ENT(clu); + clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx & ~(BITS_PER_BYTE_MASK)); + clu_mask = IGNORED_BITS_REMAINED(clu, clu_base); + + map_i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx); + map_b = BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent_idx); + + for (i = EXFAT_FIRST_CLUSTER; i < sbi->num_clusters; + i += BITS_PER_BYTE) { + k = *(sbi->vol_amap[map_i]->b_data + map_b); + if (clu_mask > 0) { + k |= clu_mask; + clu_mask = 0; + } + if (k < 0xFF) { + clu_free = clu_base + free_bit[k]; + if (clu_free < sbi->num_clusters) + return clu_free; + } + clu_base += BITS_PER_BYTE; + + if (++map_b >= sb->s_blocksize || + clu_base >= sbi->num_clusters) { + if (++map_i >= sbi->map_sectors) { + clu_base = EXFAT_FIRST_CLUSTER; + map_i = 0; + } + map_b = 0; + } + } + + return EXFAT_EOF_CLUSTER; +} + +int exfat_count_used_clusters(struct super_block *sb, unsigned int *ret_count) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + unsigned int count = 0; + unsigned int i, map_i = 0, map_b = 0; + unsigned int total_clus = EXFAT_DATA_CLUSTER_COUNT(sbi); + unsigned int last_mask = total_clus & BITS_PER_BYTE_MASK; + unsigned char clu_bits; + const unsigned char last_bit_mask[] = {0, 0b00000001, 0b00000011, + 0b00000111, 0b00001111, 0b00011111, 0b00111111, 0b01111111}; + + total_clus &= ~last_mask; + for (i = 0; i < total_clus; i += BITS_PER_BYTE) { + clu_bits = *(sbi->vol_amap[map_i]->b_data + map_b); + count += used_bit[clu_bits]; + if (++map_b >= (unsigned int)sb->s_blocksize) { + map_i++; + map_b = 0; + } + } + + if (last_mask) { + clu_bits = *(sbi->vol_amap[map_i]->b_data + map_b); + clu_bits &= last_bit_mask[last_mask]; + count += used_bit[clu_bits]; + } + + *ret_count = count; + return 0; +} + +int exfat_trim_fs(struct inode *inode, struct fstrim_range *range) +{ + unsigned int trim_begin, trim_end, count, next_free_clu; + u64 clu_start, clu_end, trim_minlen, trimmed_total = 0; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + int err = 0; + + clu_start = max_t(u64, range->start >> sbi->cluster_size_bits, + EXFAT_FIRST_CLUSTER); + clu_end = clu_start + (range->len >> sbi->cluster_size_bits) - 1; + trim_minlen = range->minlen >> sbi->cluster_size_bits; + + if (clu_start >= sbi->num_clusters || range->len < sbi->cluster_size) + return -EINVAL; + + if (clu_end >= sbi->num_clusters) + clu_end = sbi->num_clusters - 1; + + mutex_lock(&sbi->bitmap_lock); + + trim_begin = trim_end = exfat_find_free_bitmap(sb, clu_start); + if (trim_begin == EXFAT_EOF_CLUSTER) + goto unlock; + + next_free_clu = exfat_find_free_bitmap(sb, trim_end + 1); + if (next_free_clu == EXFAT_EOF_CLUSTER) + goto unlock; + + do { + if (next_free_clu == trim_end + 1) { + /* extend trim range for continuous free cluster */ + trim_end++; + } else { + /* trim current range if it's larger than trim_minlen */ + count = trim_end - trim_begin + 1; + if (count >= trim_minlen) { + err = sb_issue_discard(sb, + exfat_cluster_to_sector(sbi, trim_begin), + count * sbi->sect_per_clus, GFP_NOFS, 0); + if (err) + goto unlock; + + trimmed_total += count; + } + + /* set next start point of the free hole */ + trim_begin = trim_end = next_free_clu; + } + + if (next_free_clu >= clu_end) + break; + + if (fatal_signal_pending(current)) { + err = -ERESTARTSYS; + goto unlock; + } + + next_free_clu = exfat_find_free_bitmap(sb, next_free_clu + 1); + } while (next_free_clu != EXFAT_EOF_CLUSTER && + next_free_clu > trim_end); + + /* try to trim remainder */ + count = trim_end - trim_begin + 1; + if (count >= trim_minlen) { + err = sb_issue_discard(sb, exfat_cluster_to_sector(sbi, trim_begin), + count * sbi->sect_per_clus, GFP_NOFS, 0); + if (err) + goto unlock; + + trimmed_total += count; + } + +unlock: + mutex_unlock(&sbi->bitmap_lock); + range->len = trimmed_total << sbi->cluster_size_bits; + + return err; +} diff --git a/fs/exfat/cache.c b/fs/exfat/cache.c new file mode 100644 index 000000000000..5a2f119b7e8c --- /dev/null +++ b/fs/exfat/cache.c @@ -0,0 +1,314 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * linux/fs/fat/cache.c + * + * Written 1992,1993 by Werner Almesberger + * + * Mar 1999. AV. Changed cache, so that it uses the starting cluster instead + * of inode number. + * May 1999. AV. Fixed the bogosity with FAT32 (read "FAT28"). Fscking lusers. + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#include +#include +#include + +#include "exfat_raw.h" +#include "exfat_fs.h" + +#define EXFAT_MAX_CACHE 16 + +struct exfat_cache { + struct list_head cache_list; + unsigned int nr_contig; /* number of contiguous clusters */ + unsigned int fcluster; /* cluster number in the file. */ + unsigned int dcluster; /* cluster number on disk. */ +}; + +struct exfat_cache_id { + unsigned int id; + unsigned int nr_contig; + unsigned int fcluster; + unsigned int dcluster; +}; + +static struct kmem_cache *exfat_cachep; + +static void exfat_cache_init_once(void *c) +{ + struct exfat_cache *cache = (struct exfat_cache *)c; + + INIT_LIST_HEAD(&cache->cache_list); +} + +int exfat_cache_init(void) +{ + exfat_cachep = kmem_cache_create("exfat_cache", + sizeof(struct exfat_cache), + 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, + exfat_cache_init_once); + if (!exfat_cachep) + return -ENOMEM; + return 0; +} + +void exfat_cache_shutdown(void) +{ + if (!exfat_cachep) + return; + kmem_cache_destroy(exfat_cachep); +} + +static inline struct exfat_cache *exfat_cache_alloc(void) +{ + return kmem_cache_alloc(exfat_cachep, GFP_NOFS); +} + +static inline void exfat_cache_free(struct exfat_cache *cache) +{ + WARN_ON(!list_empty(&cache->cache_list)); + kmem_cache_free(exfat_cachep, cache); +} + +static inline void exfat_cache_update_lru(struct inode *inode, + struct exfat_cache *cache) +{ + struct exfat_inode_info *ei = EXFAT_I(inode); + + if (ei->cache_lru.next != &cache->cache_list) + list_move(&cache->cache_list, &ei->cache_lru); +} + +static unsigned int exfat_cache_lookup(struct inode *inode, + unsigned int fclus, struct exfat_cache_id *cid, + unsigned int *cached_fclus, unsigned int *cached_dclus) +{ + struct exfat_inode_info *ei = EXFAT_I(inode); + static struct exfat_cache nohit = { .fcluster = 0, }; + struct exfat_cache *hit = &nohit, *p; + unsigned int offset = EXFAT_EOF_CLUSTER; + + spin_lock(&ei->cache_lru_lock); + list_for_each_entry(p, &ei->cache_lru, cache_list) { + /* Find the cache of "fclus" or nearest cache. */ + if (p->fcluster <= fclus && hit->fcluster < p->fcluster) { + hit = p; + if (hit->fcluster + hit->nr_contig < fclus) { + offset = hit->nr_contig; + } else { + offset = fclus - hit->fcluster; + break; + } + } + } + if (hit != &nohit) { + exfat_cache_update_lru(inode, hit); + + cid->id = ei->cache_valid_id; + cid->nr_contig = hit->nr_contig; + cid->fcluster = hit->fcluster; + cid->dcluster = hit->dcluster; + *cached_fclus = cid->fcluster + offset; + *cached_dclus = cid->dcluster + offset; + } + spin_unlock(&ei->cache_lru_lock); + + return offset; +} + +static struct exfat_cache *exfat_cache_merge(struct inode *inode, + struct exfat_cache_id *new) +{ + struct exfat_inode_info *ei = EXFAT_I(inode); + struct exfat_cache *p; + + list_for_each_entry(p, &ei->cache_lru, cache_list) { + /* Find the same part as "new" in cluster-chain. */ + if (p->fcluster == new->fcluster) { + if (new->nr_contig > p->nr_contig) + p->nr_contig = new->nr_contig; + return p; + } + } + return NULL; +} + +static void exfat_cache_add(struct inode *inode, + struct exfat_cache_id *new) +{ + struct exfat_inode_info *ei = EXFAT_I(inode); + struct exfat_cache *cache, *tmp; + + if (new->fcluster == EXFAT_EOF_CLUSTER) /* dummy cache */ + return; + + spin_lock(&ei->cache_lru_lock); + if (new->id != EXFAT_CACHE_VALID && + new->id != ei->cache_valid_id) + goto unlock; /* this cache was invalidated */ + + cache = exfat_cache_merge(inode, new); + if (cache == NULL) { + if (ei->nr_caches < EXFAT_MAX_CACHE) { + ei->nr_caches++; + spin_unlock(&ei->cache_lru_lock); + + tmp = exfat_cache_alloc(); + if (!tmp) { + spin_lock(&ei->cache_lru_lock); + ei->nr_caches--; + spin_unlock(&ei->cache_lru_lock); + return; + } + + spin_lock(&ei->cache_lru_lock); + cache = exfat_cache_merge(inode, new); + if (cache != NULL) { + ei->nr_caches--; + exfat_cache_free(tmp); + goto out_update_lru; + } + cache = tmp; + } else { + struct list_head *p = ei->cache_lru.prev; + + cache = list_entry(p, + struct exfat_cache, cache_list); + } + cache->fcluster = new->fcluster; + cache->dcluster = new->dcluster; + cache->nr_contig = new->nr_contig; + } +out_update_lru: + exfat_cache_update_lru(inode, cache); +unlock: + spin_unlock(&ei->cache_lru_lock); +} + +/* + * Cache invalidation occurs rarely, thus the LRU chain is not updated. It + * fixes itself after a while. + */ +static void __exfat_cache_inval_inode(struct inode *inode) +{ + struct exfat_inode_info *ei = EXFAT_I(inode); + struct exfat_cache *cache; + + while (!list_empty(&ei->cache_lru)) { + cache = list_entry(ei->cache_lru.next, + struct exfat_cache, cache_list); + list_del_init(&cache->cache_list); + ei->nr_caches--; + exfat_cache_free(cache); + } + /* Update. The copy of caches before this id is discarded. */ + ei->cache_valid_id++; + if (ei->cache_valid_id == EXFAT_CACHE_VALID) + ei->cache_valid_id++; +} + +void exfat_cache_inval_inode(struct inode *inode) +{ + struct exfat_inode_info *ei = EXFAT_I(inode); + + spin_lock(&ei->cache_lru_lock); + __exfat_cache_inval_inode(inode); + spin_unlock(&ei->cache_lru_lock); +} + +static inline int cache_contiguous(struct exfat_cache_id *cid, + unsigned int dclus) +{ + cid->nr_contig++; + return cid->dcluster + cid->nr_contig == dclus; +} + +static inline void cache_init(struct exfat_cache_id *cid, + unsigned int fclus, unsigned int dclus) +{ + cid->id = EXFAT_CACHE_VALID; + cid->fcluster = fclus; + cid->dcluster = dclus; + cid->nr_contig = 0; +} + +int exfat_get_cluster(struct inode *inode, unsigned int cluster, + unsigned int *fclus, unsigned int *dclus, + unsigned int *last_dclus, int allow_eof) +{ + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + unsigned int limit = sbi->num_clusters; + struct exfat_inode_info *ei = EXFAT_I(inode); + struct exfat_cache_id cid; + unsigned int content; + + if (ei->start_clu == EXFAT_FREE_CLUSTER) { + exfat_fs_error(sb, + "invalid access to exfat cache (entry 0x%08x)", + ei->start_clu); + return -EIO; + } + + *fclus = 0; + *dclus = ei->start_clu; + *last_dclus = *dclus; + + /* + * Don`t use exfat_cache if zero offset or non-cluster allocation + */ + if (cluster == 0 || *dclus == EXFAT_EOF_CLUSTER) + return 0; + + cache_init(&cid, EXFAT_EOF_CLUSTER, EXFAT_EOF_CLUSTER); + + if (exfat_cache_lookup(inode, cluster, &cid, fclus, dclus) == + EXFAT_EOF_CLUSTER) { + /* + * dummy, always not contiguous + * This is reinitialized by cache_init(), later. + */ + WARN_ON(cid.id != EXFAT_CACHE_VALID || + cid.fcluster != EXFAT_EOF_CLUSTER || + cid.dcluster != EXFAT_EOF_CLUSTER || + cid.nr_contig != 0); + } + + if (*fclus == cluster) + return 0; + + while (*fclus < cluster) { + /* prevent the infinite loop of cluster chain */ + if (*fclus > limit) { + exfat_fs_error(sb, + "detected the cluster chain loop (i_pos %u)", + (*fclus)); + return -EIO; + } + + if (exfat_ent_get(sb, *dclus, &content)) + return -EIO; + + *last_dclus = *dclus; + *dclus = content; + (*fclus)++; + + if (content == EXFAT_EOF_CLUSTER) { + if (!allow_eof) { + exfat_fs_error(sb, + "invalid cluster chain (i_pos %u, last_clus 0x%08x is EOF)", + *fclus, (*last_dclus)); + return -EIO; + } + + break; + } + + if (!cache_contiguous(&cid, *dclus)) + cache_init(&cid, *fclus, *dclus); + } + + exfat_cache_add(inode, &cid); + return 0; +} diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c new file mode 100644 index 000000000000..07765a74251a --- /dev/null +++ b/fs/exfat/dir.c @@ -0,0 +1,1206 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#include +#include +#include +#include +#include + +#include "exfat_raw.h" +#include "exfat_fs.h" + +static int exfat_extract_uni_name(struct exfat_dentry *ep, + unsigned short *uniname) +{ + int i, len = 0; + + for (i = 0; i < EXFAT_FILE_NAME_LEN; i++) { + *uniname = le16_to_cpu(ep->dentry.name.unicode_0_14[i]); + if (*uniname == 0x0) + return len; + uniname++; + len++; + } + + *uniname = 0x0; + return len; + +} + +static void exfat_get_uniname_from_ext_entry(struct super_block *sb, + struct exfat_chain *p_dir, int entry, unsigned short *uniname) +{ + int i; + struct exfat_entry_set_cache *es; + + es = exfat_get_dentry_set(sb, p_dir, entry, ES_ALL_ENTRIES); + if (!es) + return; + + /* + * First entry : file entry + * Second entry : stream-extension entry + * Third entry : first file-name entry + * So, the index of first file-name dentry should start from 2. + */ + for (i = 2; i < es->num_entries; i++) { + struct exfat_dentry *ep = exfat_get_dentry_cached(es, i); + + /* end of name entry */ + if (exfat_get_entry_type(ep) != TYPE_EXTEND) + break; + + exfat_extract_uni_name(ep, uniname); + uniname += EXFAT_FILE_NAME_LEN; + } + + exfat_free_dentry_set(es, false); +} + +/* read a directory entry from the opened directory */ +static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_entry *dir_entry) +{ + int i, dentries_per_clu, dentries_per_clu_bits = 0, num_ext; + unsigned int type, clu_offset, max_dentries; + struct exfat_chain dir, clu; + struct exfat_uni_name uni_name; + struct exfat_dentry *ep; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *ei = EXFAT_I(inode); + unsigned int dentry = EXFAT_B_TO_DEN(*cpos) & 0xFFFFFFFF; + struct buffer_head *bh; + + /* check if the given file ID is opened */ + if (ei->type != TYPE_DIR) + return -EPERM; + + if (ei->entry == -1) + exfat_chain_set(&dir, sbi->root_dir, 0, ALLOC_FAT_CHAIN); + else + exfat_chain_set(&dir, ei->start_clu, + EXFAT_B_TO_CLU(i_size_read(inode), sbi), ei->flags); + + dentries_per_clu = sbi->dentries_per_clu; + dentries_per_clu_bits = ilog2(dentries_per_clu); + max_dentries = (unsigned int)min_t(u64, MAX_EXFAT_DENTRIES, + (u64)sbi->num_clusters << dentries_per_clu_bits); + + clu_offset = dentry >> dentries_per_clu_bits; + exfat_chain_dup(&clu, &dir); + + if (clu.flags == ALLOC_NO_FAT_CHAIN) { + clu.dir += clu_offset; + clu.size -= clu_offset; + } else { + /* hint_information */ + if (clu_offset > 0 && ei->hint_bmap.off != EXFAT_EOF_CLUSTER && + ei->hint_bmap.off > 0 && clu_offset >= ei->hint_bmap.off) { + clu_offset -= ei->hint_bmap.off; + clu.dir = ei->hint_bmap.clu; + } + + while (clu_offset > 0) { + if (exfat_get_next_cluster(sb, &(clu.dir))) + return -EIO; + + clu_offset--; + } + } + + while (clu.dir != EXFAT_EOF_CLUSTER && dentry < max_dentries) { + i = dentry & (dentries_per_clu - 1); + + for ( ; i < dentries_per_clu; i++, dentry++) { + ep = exfat_get_dentry(sb, &clu, i, &bh); + if (!ep) + return -EIO; + + type = exfat_get_entry_type(ep); + if (type == TYPE_UNUSED) { + brelse(bh); + break; + } + + if (type != TYPE_FILE && type != TYPE_DIR) { + brelse(bh); + continue; + } + + num_ext = ep->dentry.file.num_ext; + dir_entry->attr = le16_to_cpu(ep->dentry.file.attr); + exfat_get_entry_time(sbi, &dir_entry->crtime, + ep->dentry.file.create_tz, + ep->dentry.file.create_time, + ep->dentry.file.create_date, + ep->dentry.file.create_time_cs); + exfat_get_entry_time(sbi, &dir_entry->mtime, + ep->dentry.file.modify_tz, + ep->dentry.file.modify_time, + ep->dentry.file.modify_date, + ep->dentry.file.modify_time_cs); + exfat_get_entry_time(sbi, &dir_entry->atime, + ep->dentry.file.access_tz, + ep->dentry.file.access_time, + ep->dentry.file.access_date, + 0); + + *uni_name.name = 0x0; + exfat_get_uniname_from_ext_entry(sb, &clu, i, + uni_name.name); + exfat_utf16_to_nls(sb, &uni_name, + dir_entry->namebuf.lfn, + dir_entry->namebuf.lfnbuf_len); + brelse(bh); + + ep = exfat_get_dentry(sb, &clu, i + 1, &bh); + if (!ep) + return -EIO; + dir_entry->size = + le64_to_cpu(ep->dentry.stream.valid_size); + dir_entry->entry = dentry; + brelse(bh); + + ei->hint_bmap.off = dentry >> dentries_per_clu_bits; + ei->hint_bmap.clu = clu.dir; + + *cpos = EXFAT_DEN_TO_B(dentry + 1 + num_ext); + return 0; + } + + if (clu.flags == ALLOC_NO_FAT_CHAIN) { + if (--clu.size > 0) + clu.dir++; + else + clu.dir = EXFAT_EOF_CLUSTER; + } else { + if (exfat_get_next_cluster(sb, &(clu.dir))) + return -EIO; + } + } + + dir_entry->namebuf.lfn[0] = '\0'; + *cpos = EXFAT_DEN_TO_B(dentry); + return 0; +} + +static void exfat_init_namebuf(struct exfat_dentry_namebuf *nb) +{ + nb->lfn = NULL; + nb->lfnbuf_len = 0; +} + +static int exfat_alloc_namebuf(struct exfat_dentry_namebuf *nb) +{ + nb->lfn = __getname(); + if (!nb->lfn) + return -ENOMEM; + nb->lfnbuf_len = MAX_VFSNAME_BUF_SIZE; + return 0; +} + +static void exfat_free_namebuf(struct exfat_dentry_namebuf *nb) +{ + if (!nb->lfn) + return; + + __putname(nb->lfn); + exfat_init_namebuf(nb); +} + +/* skip iterating emit_dots when dir is empty */ +#define ITER_POS_FILLED_DOTS (2) +static int exfat_iterate(struct file *filp, struct dir_context *ctx) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct super_block *sb = inode->i_sb; + struct inode *tmp; + struct exfat_dir_entry de; + struct exfat_dentry_namebuf *nb = &(de.namebuf); + struct exfat_inode_info *ei = EXFAT_I(inode); + unsigned long inum; + loff_t cpos, i_pos; + int err = 0, fake_offset = 0; + + exfat_init_namebuf(nb); + mutex_lock(&EXFAT_SB(sb)->s_lock); + + cpos = ctx->pos; + if (!dir_emit_dots(filp, ctx)) + goto unlock; + + if (ctx->pos == ITER_POS_FILLED_DOTS) { + cpos = 0; + fake_offset = 1; + } + + if (cpos & (DENTRY_SIZE - 1)) { + err = -ENOENT; + goto unlock; + } + + /* name buffer should be allocated before use */ + err = exfat_alloc_namebuf(nb); + if (err) + goto unlock; +get_new: + if (ei->flags == ALLOC_NO_FAT_CHAIN && cpos >= i_size_read(inode)) + goto end_of_dir; + + err = exfat_readdir(inode, &cpos, &de); + if (err) { + /* + * At least we tried to read a sector. Move cpos to next sector + * position (should be aligned). + */ + if (err == -EIO) { + cpos += 1 << (sb->s_blocksize_bits); + cpos &= ~(sb->s_blocksize - 1); + } + + err = -EIO; + goto end_of_dir; + } + + if (!nb->lfn[0]) + goto end_of_dir; + + i_pos = ((loff_t)ei->start_clu << 32) | (de.entry & 0xffffffff); + tmp = exfat_iget(sb, i_pos); + if (tmp) { + inum = tmp->i_ino; + iput(tmp); + } else { + inum = iunique(sb, EXFAT_ROOT_INO); + } + + /* + * Before calling dir_emit(), sb_lock should be released. + * Because page fault can occur in dir_emit() when the size + * of buffer given from user is larger than one page size. + */ + mutex_unlock(&EXFAT_SB(sb)->s_lock); + if (!dir_emit(ctx, nb->lfn, strlen(nb->lfn), inum, + (de.attr & ATTR_SUBDIR) ? DT_DIR : DT_REG)) + goto out_unlocked; + mutex_lock(&EXFAT_SB(sb)->s_lock); + ctx->pos = cpos; + goto get_new; + +end_of_dir: + if (!cpos && fake_offset) + cpos = ITER_POS_FILLED_DOTS; + ctx->pos = cpos; +unlock: + mutex_unlock(&EXFAT_SB(sb)->s_lock); +out_unlocked: + /* + * To improve performance, free namebuf after unlock sb_lock. + * If namebuf is not allocated, this function do nothing + */ + exfat_free_namebuf(nb); + return err; +} + +const struct file_operations exfat_dir_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .iterate = exfat_iterate, + .unlocked_ioctl = exfat_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = exfat_compat_ioctl, +#endif + .fsync = exfat_file_fsync, +}; + +int exfat_alloc_new_dir(struct inode *inode, struct exfat_chain *clu) +{ + int ret; + + exfat_chain_set(clu, EXFAT_EOF_CLUSTER, 0, ALLOC_NO_FAT_CHAIN); + + ret = exfat_alloc_cluster(inode, 1, clu, IS_DIRSYNC(inode)); + if (ret) + return ret; + + return exfat_zeroed_cluster(inode, clu->dir); +} + +int exfat_calc_num_entries(struct exfat_uni_name *p_uniname) +{ + int len; + + len = p_uniname->name_len; + if (len == 0) + return -EINVAL; + + /* 1 file entry + 1 stream entry + name entries */ + return ((len - 1) / EXFAT_FILE_NAME_LEN + 3); +} + +unsigned int exfat_get_entry_type(struct exfat_dentry *ep) +{ + if (ep->type == EXFAT_UNUSED) + return TYPE_UNUSED; + if (IS_EXFAT_DELETED(ep->type)) + return TYPE_DELETED; + if (ep->type == EXFAT_INVAL) + return TYPE_INVALID; + if (IS_EXFAT_CRITICAL_PRI(ep->type)) { + if (ep->type == EXFAT_BITMAP) + return TYPE_BITMAP; + if (ep->type == EXFAT_UPCASE) + return TYPE_UPCASE; + if (ep->type == EXFAT_VOLUME) + return TYPE_VOLUME; + if (ep->type == EXFAT_FILE) { + if (le16_to_cpu(ep->dentry.file.attr) & ATTR_SUBDIR) + return TYPE_DIR; + return TYPE_FILE; + } + return TYPE_CRITICAL_PRI; + } + if (IS_EXFAT_BENIGN_PRI(ep->type)) { + if (ep->type == EXFAT_GUID) + return TYPE_GUID; + if (ep->type == EXFAT_PADDING) + return TYPE_PADDING; + if (ep->type == EXFAT_ACLTAB) + return TYPE_ACLTAB; + return TYPE_BENIGN_PRI; + } + if (IS_EXFAT_CRITICAL_SEC(ep->type)) { + if (ep->type == EXFAT_STREAM) + return TYPE_STREAM; + if (ep->type == EXFAT_NAME) + return TYPE_EXTEND; + if (ep->type == EXFAT_ACL) + return TYPE_ACL; + return TYPE_CRITICAL_SEC; + } + return TYPE_BENIGN_SEC; +} + +static void exfat_set_entry_type(struct exfat_dentry *ep, unsigned int type) +{ + if (type == TYPE_UNUSED) { + ep->type = EXFAT_UNUSED; + } else if (type == TYPE_DELETED) { + ep->type &= EXFAT_DELETE; + } else if (type == TYPE_STREAM) { + ep->type = EXFAT_STREAM; + } else if (type == TYPE_EXTEND) { + ep->type = EXFAT_NAME; + } else if (type == TYPE_BITMAP) { + ep->type = EXFAT_BITMAP; + } else if (type == TYPE_UPCASE) { + ep->type = EXFAT_UPCASE; + } else if (type == TYPE_VOLUME) { + ep->type = EXFAT_VOLUME; + } else if (type == TYPE_DIR) { + ep->type = EXFAT_FILE; + ep->dentry.file.attr = cpu_to_le16(ATTR_SUBDIR); + } else if (type == TYPE_FILE) { + ep->type = EXFAT_FILE; + ep->dentry.file.attr = cpu_to_le16(ATTR_ARCHIVE); + } +} + +static void exfat_init_stream_entry(struct exfat_dentry *ep, + unsigned char flags, unsigned int start_clu, + unsigned long long size) +{ + exfat_set_entry_type(ep, TYPE_STREAM); + ep->dentry.stream.flags = flags; + ep->dentry.stream.start_clu = cpu_to_le32(start_clu); + ep->dentry.stream.valid_size = cpu_to_le64(size); + ep->dentry.stream.size = cpu_to_le64(size); +} + +static void exfat_init_name_entry(struct exfat_dentry *ep, + unsigned short *uniname) +{ + int i; + + exfat_set_entry_type(ep, TYPE_EXTEND); + ep->dentry.name.flags = 0x0; + + for (i = 0; i < EXFAT_FILE_NAME_LEN; i++) { + if (*uniname != 0x0) { + ep->dentry.name.unicode_0_14[i] = cpu_to_le16(*uniname); + uniname++; + } else { + ep->dentry.name.unicode_0_14[i] = 0x0; + } + } +} + +int exfat_init_dir_entry(struct inode *inode, struct exfat_chain *p_dir, + int entry, unsigned int type, unsigned int start_clu, + unsigned long long size) +{ + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0) + struct timespec64 ts; +#else + struct timespec ts; +#endif + struct exfat_dentry *ep; + struct buffer_head *bh; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + ts = current_time(inode); +#else + ts = CURRENT_TIME_SEC; +#endif + + /* + * We cannot use exfat_get_dentry_set here because file ep is not + * initialized yet. + */ + ep = exfat_get_dentry(sb, p_dir, entry, &bh); + if (!ep) + return -EIO; + + exfat_set_entry_type(ep, type); + exfat_set_entry_time(sbi, &ts, + &ep->dentry.file.create_tz, + &ep->dentry.file.create_time, + &ep->dentry.file.create_date, + &ep->dentry.file.create_time_cs); + exfat_set_entry_time(sbi, &ts, + &ep->dentry.file.modify_tz, + &ep->dentry.file.modify_time, + &ep->dentry.file.modify_date, + &ep->dentry.file.modify_time_cs); + exfat_set_entry_time(sbi, &ts, + &ep->dentry.file.access_tz, + &ep->dentry.file.access_time, + &ep->dentry.file.access_date, + NULL); + + exfat_update_bh(bh, IS_DIRSYNC(inode)); + brelse(bh); + + ep = exfat_get_dentry(sb, p_dir, entry + 1, &bh); + if (!ep) + return -EIO; + + exfat_init_stream_entry(ep, + (type == TYPE_FILE) ? ALLOC_FAT_CHAIN : ALLOC_NO_FAT_CHAIN, + start_clu, size); + exfat_update_bh(bh, IS_DIRSYNC(inode)); + brelse(bh); + + return 0; +} + +int exfat_update_dir_chksum(struct inode *inode, struct exfat_chain *p_dir, + int entry) +{ + struct super_block *sb = inode->i_sb; + int ret = 0; + int i, num_entries; + u16 chksum; + struct exfat_dentry *ep, *fep; + struct buffer_head *fbh, *bh; + + fep = exfat_get_dentry(sb, p_dir, entry, &fbh); + if (!fep) + return -EIO; + + num_entries = fep->dentry.file.num_ext + 1; + chksum = exfat_calc_chksum16(fep, DENTRY_SIZE, 0, CS_DIR_ENTRY); + + for (i = 1; i < num_entries; i++) { + ep = exfat_get_dentry(sb, p_dir, entry + i, &bh); + if (!ep) { + ret = -EIO; + goto release_fbh; + } + chksum = exfat_calc_chksum16(ep, DENTRY_SIZE, chksum, + CS_DEFAULT); + brelse(bh); + } + + fep->dentry.file.checksum = cpu_to_le16(chksum); + exfat_update_bh(fbh, IS_DIRSYNC(inode)); +release_fbh: + brelse(fbh); + return ret; +} + +int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir, + int entry, int num_entries, struct exfat_uni_name *p_uniname) +{ + struct super_block *sb = inode->i_sb; + int i; + unsigned short *uniname = p_uniname->name; + struct exfat_dentry *ep; + struct buffer_head *bh; + int sync = IS_DIRSYNC(inode); + + ep = exfat_get_dentry(sb, p_dir, entry, &bh); + if (!ep) + return -EIO; + + ep->dentry.file.num_ext = (unsigned char)(num_entries - 1); + exfat_update_bh(bh, sync); + brelse(bh); + + ep = exfat_get_dentry(sb, p_dir, entry + 1, &bh); + if (!ep) + return -EIO; + + ep->dentry.stream.name_len = p_uniname->name_len; + ep->dentry.stream.name_hash = cpu_to_le16(p_uniname->name_hash); + exfat_update_bh(bh, sync); + brelse(bh); + + for (i = EXFAT_FIRST_CLUSTER; i < num_entries; i++) { + ep = exfat_get_dentry(sb, p_dir, entry + i, &bh); + if (!ep) + return -EIO; + + exfat_init_name_entry(ep, uniname); + exfat_update_bh(bh, sync); + brelse(bh); + uniname += EXFAT_FILE_NAME_LEN; + } + + exfat_update_dir_chksum(inode, p_dir, entry); + return 0; +} + +int exfat_remove_entries(struct inode *inode, struct exfat_chain *p_dir, + int entry, int order, int num_entries) +{ + struct super_block *sb = inode->i_sb; + int i; + struct exfat_dentry *ep; + struct buffer_head *bh; + + for (i = order; i < num_entries; i++) { + ep = exfat_get_dentry(sb, p_dir, entry + i, &bh); + if (!ep) + return -EIO; + + exfat_set_entry_type(ep, TYPE_DELETED); + exfat_update_bh(bh, IS_DIRSYNC(inode)); + brelse(bh); + } + + return 0; +} + +void exfat_update_dir_chksum_with_entry_set(struct exfat_entry_set_cache *es) +{ + int chksum_type = CS_DIR_ENTRY, i; + unsigned short chksum = 0; + struct exfat_dentry *ep; + + for (i = 0; i < es->num_entries; i++) { + ep = exfat_get_dentry_cached(es, i); + chksum = exfat_calc_chksum16(ep, DENTRY_SIZE, chksum, + chksum_type); + chksum_type = CS_DEFAULT; + } + ep = exfat_get_dentry_cached(es, 0); + ep->dentry.file.checksum = cpu_to_le16(chksum); + es->modified = true; +} + +int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync) +{ + int i, err = 0; + + if (es->modified) + err = exfat_update_bhs(es->bh, es->num_bh, sync); + + for (i = 0; i < es->num_bh; i++) + if (err) + bforget(es->bh[i]); + else + brelse(es->bh[i]); + kfree(es); + return err; +} + +static int exfat_walk_fat_chain(struct super_block *sb, + struct exfat_chain *p_dir, unsigned int byte_offset, + unsigned int *clu) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + unsigned int clu_offset; + unsigned int cur_clu; + + clu_offset = EXFAT_B_TO_CLU(byte_offset, sbi); + cur_clu = p_dir->dir; + + if (p_dir->flags == ALLOC_NO_FAT_CHAIN) { + cur_clu += clu_offset; + } else { + while (clu_offset > 0) { + if (exfat_get_next_cluster(sb, &cur_clu)) + return -EIO; + if (cur_clu == EXFAT_EOF_CLUSTER) { + exfat_fs_error(sb, + "invalid dentry access beyond EOF (clu : %u, eidx : %d)", + p_dir->dir, + EXFAT_B_TO_DEN(byte_offset)); + return -EIO; + } + clu_offset--; + } + } + + *clu = cur_clu; + return 0; +} + +static int exfat_find_location(struct super_block *sb, struct exfat_chain *p_dir, + int entry, sector_t *sector, int *offset) +{ + int ret; + unsigned int off, clu = 0; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + off = EXFAT_DEN_TO_B(entry); + + ret = exfat_walk_fat_chain(sb, p_dir, off, &clu); + if (ret) + return ret; + + /* byte offset in cluster */ + off = EXFAT_CLU_OFFSET(off, sbi); + + /* byte offset in sector */ + *offset = EXFAT_BLK_OFFSET(off, sb); + + /* sector offset in cluster */ + *sector = EXFAT_B_TO_BLK(off, sb); + *sector += exfat_cluster_to_sector(sbi, clu); + return 0; +} + +#define EXFAT_MAX_RA_SIZE (128*1024) +static int exfat_dir_readahead(struct super_block *sb, sector_t sec) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct buffer_head *bh; + unsigned int max_ra_count = EXFAT_MAX_RA_SIZE >> sb->s_blocksize_bits; + unsigned int page_ra_count = PAGE_SIZE >> sb->s_blocksize_bits; + unsigned int adj_ra_count = max(sbi->sect_per_clus, page_ra_count); + unsigned int ra_count = min(adj_ra_count, max_ra_count); + + /* Read-ahead is not required */ + if (sbi->sect_per_clus == 1) + return 0; + + if (sec < sbi->data_start_sector) { + exfat_err(sb, "requested sector is invalid(sect:%llu, root:%llu)", + (unsigned long long)sec, sbi->data_start_sector); + return -EIO; + } + + /* Not sector aligned with ra_count, resize ra_count to page size */ + if ((sec - sbi->data_start_sector) & (ra_count - 1)) + ra_count = page_ra_count; + + bh = sb_find_get_block(sb, sec); + if (!bh || !buffer_uptodate(bh)) { + unsigned int i; + + for (i = 0; i < ra_count; i++) + sb_breadahead(sb, (sector_t)(sec + i)); + } + brelse(bh); + return 0; +} + +struct exfat_dentry *exfat_get_dentry(struct super_block *sb, + struct exfat_chain *p_dir, int entry, struct buffer_head **bh) +{ + unsigned int dentries_per_page = EXFAT_B_TO_DEN(PAGE_SIZE); + int off; + sector_t sec; + + if (p_dir->dir == DIR_DELETED) { + exfat_err(sb, "abnormal access to deleted dentry"); + return NULL; + } + + if (exfat_find_location(sb, p_dir, entry, &sec, &off)) + return NULL; + + if (p_dir->dir != EXFAT_FREE_CLUSTER && + !(entry & (dentries_per_page - 1))) + exfat_dir_readahead(sb, sec); + + *bh = sb_bread(sb, sec); + if (!*bh) + return NULL; + + return (struct exfat_dentry *)((*bh)->b_data + off); +} + +enum exfat_validate_dentry_mode { + ES_MODE_STARTED, + ES_MODE_GET_FILE_ENTRY, + ES_MODE_GET_STRM_ENTRY, + ES_MODE_GET_NAME_ENTRY, + ES_MODE_GET_CRITICAL_SEC_ENTRY, +}; + +static bool exfat_validate_entry(unsigned int type, + enum exfat_validate_dentry_mode *mode) +{ + if (type == TYPE_UNUSED || type == TYPE_DELETED) + return false; + + switch (*mode) { + case ES_MODE_STARTED: + if (type != TYPE_FILE && type != TYPE_DIR) + return false; + *mode = ES_MODE_GET_FILE_ENTRY; + return true; + case ES_MODE_GET_FILE_ENTRY: + if (type != TYPE_STREAM) + return false; + *mode = ES_MODE_GET_STRM_ENTRY; + return true; + case ES_MODE_GET_STRM_ENTRY: + if (type != TYPE_EXTEND) + return false; + *mode = ES_MODE_GET_NAME_ENTRY; + return true; + case ES_MODE_GET_NAME_ENTRY: + if (type == TYPE_STREAM) + return false; + if (type != TYPE_EXTEND) { + if (!(type & TYPE_CRITICAL_SEC)) + return false; + *mode = ES_MODE_GET_CRITICAL_SEC_ENTRY; + } + return true; + case ES_MODE_GET_CRITICAL_SEC_ENTRY: + if (type == TYPE_EXTEND || type == TYPE_STREAM) + return false; + if ((type & TYPE_CRITICAL_SEC) != TYPE_CRITICAL_SEC) + return false; + return true; + default: + WARN_ON_ONCE(1); + return false; + } +} + +struct exfat_dentry *exfat_get_dentry_cached( + struct exfat_entry_set_cache *es, int num) +{ + int off = es->start_off + num * DENTRY_SIZE; + struct buffer_head *bh = es->bh[EXFAT_B_TO_BLK(off, es->sb)]; + char *p = bh->b_data + EXFAT_BLK_OFFSET(off, es->sb); + + return (struct exfat_dentry *)p; +} + +/* + * Returns a set of dentries for a file or dir. + * + * Note It provides a direct pointer to bh->data via exfat_get_dentry_cached(). + * User should call exfat_get_dentry_set() after setting 'modified' to apply + * changes made in this entry set to the real device. + * + * in: + * sb+p_dir+entry: indicates a file/dir + * type: specifies how many dentries should be included. + * return: + * pointer of entry set on success, + * NULL on failure. + */ +struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb, + struct exfat_chain *p_dir, int entry, unsigned int type) +{ + int ret, i, num_bh; + unsigned int off, byte_offset, clu = 0; + sector_t sec; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_entry_set_cache *es; + struct exfat_dentry *ep; + int num_entries; + enum exfat_validate_dentry_mode mode = ES_MODE_STARTED; + struct buffer_head *bh; + + if (p_dir->dir == DIR_DELETED) { + exfat_err(sb, "access to deleted dentry"); + return NULL; + } + + byte_offset = EXFAT_DEN_TO_B(entry); + ret = exfat_walk_fat_chain(sb, p_dir, byte_offset, &clu); + if (ret) + return NULL; + + es = kzalloc(sizeof(*es), GFP_KERNEL); + if (!es) + return NULL; + es->sb = sb; + es->modified = false; + + /* byte offset in cluster */ + byte_offset = EXFAT_CLU_OFFSET(byte_offset, sbi); + + /* byte offset in sector */ + off = EXFAT_BLK_OFFSET(byte_offset, sb); + es->start_off = off; + + /* sector offset in cluster */ + sec = EXFAT_B_TO_BLK(byte_offset, sb); + sec += exfat_cluster_to_sector(sbi, clu); + + bh = sb_bread(sb, sec); + if (!bh) + goto free_es; + es->bh[es->num_bh++] = bh; + + ep = exfat_get_dentry_cached(es, 0); + if (!exfat_validate_entry(exfat_get_entry_type(ep), &mode)) + goto free_es; + + num_entries = type == ES_ALL_ENTRIES ? + ep->dentry.file.num_ext + 1 : type; + es->num_entries = num_entries; + + num_bh = EXFAT_B_TO_BLK_ROUND_UP(off + num_entries * DENTRY_SIZE, sb); + for (i = 1; i < num_bh; i++) { + /* get the next sector */ + if (exfat_is_last_sector_in_cluster(sbi, sec)) { + if (p_dir->flags == ALLOC_NO_FAT_CHAIN) + clu++; + else if (exfat_get_next_cluster(sb, &clu)) + goto free_es; + sec = exfat_cluster_to_sector(sbi, clu); + } else { + sec++; + } + + bh = sb_bread(sb, sec); + if (!bh) + goto free_es; + es->bh[es->num_bh++] = bh; + } + + /* validate cached dentries */ + for (i = 1; i < num_entries; i++) { + ep = exfat_get_dentry_cached(es, i); + if (!exfat_validate_entry(exfat_get_entry_type(ep), &mode)) + goto free_es; + } + return es; + +free_es: + exfat_free_dentry_set(es, false); + return NULL; +} + +enum { + DIRENT_STEP_FILE, + DIRENT_STEP_STRM, + DIRENT_STEP_NAME, + DIRENT_STEP_SECD, +}; + +/* + * @ei: inode info of parent directory + * @p_dir: directory structure of parent directory + * @num_entries:entry size of p_uniname + * @hint_opt: If p_uniname is found, filled with optimized dir/entry + * for traversing cluster chain. + * @return: + * >= 0: file directory entry position where the name exists + * -ENOENT: entry with the name does not exist + * -EIO: I/O error + */ +int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, + struct exfat_chain *p_dir, struct exfat_uni_name *p_uniname, + int num_entries, unsigned int type, struct exfat_hint *hint_opt) +{ + int i, rewind = 0, dentry = 0, end_eidx = 0, num_ext = 0, len; + int order, step, name_len = 0; + int dentries_per_clu, num_empty = 0; + unsigned int entry_type; + unsigned short *uniname = NULL; + struct exfat_chain clu; + struct exfat_hint *hint_stat = &ei->hint_stat; + struct exfat_hint_femp candi_empty; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + dentries_per_clu = sbi->dentries_per_clu; + + exfat_chain_dup(&clu, p_dir); + + if (hint_stat->eidx) { + clu.dir = hint_stat->clu; + dentry = hint_stat->eidx; + end_eidx = dentry; + } + + candi_empty.eidx = EXFAT_HINT_NONE; +rewind: + order = 0; + step = DIRENT_STEP_FILE; + while (clu.dir != EXFAT_EOF_CLUSTER) { + i = dentry & (dentries_per_clu - 1); + for (; i < dentries_per_clu; i++, dentry++) { + struct exfat_dentry *ep; + struct buffer_head *bh; + + if (rewind && dentry == end_eidx) + goto not_found; + + ep = exfat_get_dentry(sb, &clu, i, &bh); + if (!ep) + return -EIO; + + entry_type = exfat_get_entry_type(ep); + + if (entry_type == TYPE_UNUSED || + entry_type == TYPE_DELETED) { + step = DIRENT_STEP_FILE; + + num_empty++; + if (candi_empty.eidx == EXFAT_HINT_NONE && + num_empty == 1) { + exfat_chain_set(&candi_empty.cur, + clu.dir, clu.size, clu.flags); + } + + if (candi_empty.eidx == EXFAT_HINT_NONE && + num_empty >= num_entries) { + candi_empty.eidx = + dentry - (num_empty - 1); + WARN_ON(candi_empty.eidx < 0); + candi_empty.count = num_empty; + + if (ei->hint_femp.eidx == + EXFAT_HINT_NONE || + candi_empty.eidx <= + ei->hint_femp.eidx) + ei->hint_femp = candi_empty; + } + + brelse(bh); + if (entry_type == TYPE_UNUSED) + goto not_found; + continue; + } + + num_empty = 0; + candi_empty.eidx = EXFAT_HINT_NONE; + + if (entry_type == TYPE_FILE || entry_type == TYPE_DIR) { + step = DIRENT_STEP_FILE; + hint_opt->clu = clu.dir; + hint_opt->eidx = i; + if (type == TYPE_ALL || type == entry_type) { + num_ext = ep->dentry.file.num_ext; + step = DIRENT_STEP_STRM; + } + brelse(bh); + continue; + } + + if (entry_type == TYPE_STREAM) { + u16 name_hash; + + if (step != DIRENT_STEP_STRM) { + step = DIRENT_STEP_FILE; + brelse(bh); + continue; + } + step = DIRENT_STEP_FILE; + name_hash = le16_to_cpu( + ep->dentry.stream.name_hash); + if (p_uniname->name_hash == name_hash && + p_uniname->name_len == + ep->dentry.stream.name_len) { + step = DIRENT_STEP_NAME; + order = 1; + name_len = 0; + } + brelse(bh); + continue; + } + + brelse(bh); + if (entry_type == TYPE_EXTEND) { + unsigned short entry_uniname[16], unichar; + + if (step != DIRENT_STEP_NAME) { + step = DIRENT_STEP_FILE; + continue; + } + + if (++order == 2) + uniname = p_uniname->name; + else + uniname += EXFAT_FILE_NAME_LEN; + + len = exfat_extract_uni_name(ep, entry_uniname); + name_len += len; + + unichar = *(uniname+len); + *(uniname+len) = 0x0; + + if (exfat_uniname_ncmp(sb, uniname, + entry_uniname, len)) { + step = DIRENT_STEP_FILE; + } else if (p_uniname->name_len == name_len) { + if (order == num_ext) + goto found; + step = DIRENT_STEP_SECD; + } + + *(uniname+len) = unichar; + continue; + } + + if (entry_type & + (TYPE_CRITICAL_SEC | TYPE_BENIGN_SEC)) { + if (step == DIRENT_STEP_SECD) { + if (++order == num_ext) + goto found; + continue; + } + } + step = DIRENT_STEP_FILE; + } + + if (clu.flags == ALLOC_NO_FAT_CHAIN) { + if (--clu.size > 0) + clu.dir++; + else + clu.dir = EXFAT_EOF_CLUSTER; + } else { + if (exfat_get_next_cluster(sb, &clu.dir)) + return -EIO; + } + } + +not_found: + /* + * We started at not 0 index,so we should try to find target + * from 0 index to the index we started at. + */ + if (!rewind && end_eidx) { + rewind = 1; + dentry = 0; + clu.dir = p_dir->dir; + /* reset empty hint */ + num_empty = 0; + candi_empty.eidx = EXFAT_HINT_NONE; + goto rewind; + } + + /* initialized hint_stat */ + hint_stat->clu = p_dir->dir; + hint_stat->eidx = 0; + return -ENOENT; + +found: + /* next dentry we'll find is out of this cluster */ + if (!((dentry + 1) & (dentries_per_clu - 1))) { + int ret = 0; + + if (clu.flags == ALLOC_NO_FAT_CHAIN) { + if (--clu.size > 0) + clu.dir++; + else + clu.dir = EXFAT_EOF_CLUSTER; + } else { + ret = exfat_get_next_cluster(sb, &clu.dir); + } + + if (ret || clu.dir == EXFAT_EOF_CLUSTER) { + /* just initialized hint_stat */ + hint_stat->clu = p_dir->dir; + hint_stat->eidx = 0; + return (dentry - num_ext); + } + } + + hint_stat->clu = clu.dir; + hint_stat->eidx = dentry + 1; + return dentry - num_ext; +} + +int exfat_count_ext_entries(struct super_block *sb, struct exfat_chain *p_dir, + int entry, struct exfat_dentry *ep) +{ + int i, count = 0; + unsigned int type; + struct exfat_dentry *ext_ep; + struct buffer_head *bh; + + for (i = 0, entry++; i < ep->dentry.file.num_ext; i++, entry++) { + ext_ep = exfat_get_dentry(sb, p_dir, entry, &bh); + if (!ext_ep) + return -EIO; + + type = exfat_get_entry_type(ext_ep); + brelse(bh); + if (type == TYPE_EXTEND || type == TYPE_STREAM) + count++; + else + break; + } + return count; +} + +int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir) +{ + int i, count = 0; + int dentries_per_clu; + unsigned int entry_type; + struct exfat_chain clu; + struct exfat_dentry *ep; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct buffer_head *bh; + + dentries_per_clu = sbi->dentries_per_clu; + + exfat_chain_dup(&clu, p_dir); + + while (clu.dir != EXFAT_EOF_CLUSTER) { + for (i = 0; i < dentries_per_clu; i++) { + ep = exfat_get_dentry(sb, &clu, i, &bh); + if (!ep) + return -EIO; + entry_type = exfat_get_entry_type(ep); + brelse(bh); + + if (entry_type == TYPE_UNUSED) + return count; + if (entry_type != TYPE_DIR) + continue; + count++; + } + + if (clu.flags == ALLOC_NO_FAT_CHAIN) { + if (--clu.size > 0) + clu.dir++; + else + clu.dir = EXFAT_EOF_CLUSTER; + } else { + if (exfat_get_next_cluster(sb, &(clu.dir))) + return -EIO; + } + } + + return count; +} diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h new file mode 100644 index 000000000000..0db1f254212f --- /dev/null +++ b/fs/exfat/exfat_fs.h @@ -0,0 +1,576 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#ifndef _EXFAT_FS_H +#define _EXFAT_FS_H + +#include +#include +#include +#include + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 17, 0) +#include +#else +#define EXFAT_SUPER_MAGIC 0x2011BAB0UL +#endif + +#define EXFAT_VERSION "6.0.0" + +#define EXFAT_ROOT_INO 1 + +#define EXFAT_CLUSTERS_UNTRACKED (~0u) + +/* + * exfat error flags + */ +enum exfat_error_mode { + EXFAT_ERRORS_CONT, /* ignore error and continue */ + EXFAT_ERRORS_PANIC, /* panic on error */ + EXFAT_ERRORS_RO, /* remount r/o on error */ +}; + +/* + * exfat nls lossy flag + */ +enum { + NLS_NAME_NO_LOSSY = 0, /* no lossy */ + NLS_NAME_LOSSY = 1 << 0, /* just detected incorrect filename(s) */ + NLS_NAME_OVERLEN = 1 << 1, /* the length is over than its limit */ +}; + +#define EXFAT_HASH_BITS 8 +#define EXFAT_HASH_SIZE (1UL << EXFAT_HASH_BITS) + +/* + * Type Definitions + */ +#define ES_2_ENTRIES 2 +#define ES_ALL_ENTRIES 0 + +#define DIR_DELETED 0xFFFF0321 + +/* type values */ +#define TYPE_UNUSED 0x0000 +#define TYPE_DELETED 0x0001 +#define TYPE_INVALID 0x0002 +#define TYPE_CRITICAL_PRI 0x0100 +#define TYPE_BITMAP 0x0101 +#define TYPE_UPCASE 0x0102 +#define TYPE_VOLUME 0x0103 +#define TYPE_DIR 0x0104 +#define TYPE_FILE 0x011F +#define TYPE_CRITICAL_SEC 0x0200 +#define TYPE_STREAM 0x0201 +#define TYPE_EXTEND 0x0202 +#define TYPE_ACL 0x0203 +#define TYPE_BENIGN_PRI 0x0400 +#define TYPE_GUID 0x0401 +#define TYPE_PADDING 0x0402 +#define TYPE_ACLTAB 0x0403 +#define TYPE_BENIGN_SEC 0x0800 +#define TYPE_ALL 0x0FFF + +#define MAX_CHARSET_SIZE 6 /* max size of multi-byte character */ +#define MAX_NAME_LENGTH 255 /* max len of file name excluding NULL */ +#define MAX_VFSNAME_BUF_SIZE ((MAX_NAME_LENGTH + 1) * MAX_CHARSET_SIZE) + +/* Enough size to hold 256 dentry (even 512 Byte sector) */ +#define DIR_CACHE_SIZE (256*sizeof(struct exfat_dentry)/512+1) + +#define EXFAT_HINT_NONE -1 +#define EXFAT_MIN_SUBDIR 2 + +/* + * helpers for cluster size to byte conversion. + */ +#define EXFAT_CLU_TO_B(b, sbi) ((b) << (sbi)->cluster_size_bits) +#define EXFAT_B_TO_CLU(b, sbi) ((b) >> (sbi)->cluster_size_bits) +#define EXFAT_B_TO_CLU_ROUND_UP(b, sbi) \ + (((b - 1) >> (sbi)->cluster_size_bits) + 1) +#define EXFAT_CLU_OFFSET(off, sbi) ((off) & ((sbi)->cluster_size - 1)) + +/* + * helpers for block size to byte conversion. + */ +#define EXFAT_BLK_TO_B(b, sb) ((b) << (sb)->s_blocksize_bits) +#define EXFAT_B_TO_BLK(b, sb) ((b) >> (sb)->s_blocksize_bits) +#define EXFAT_B_TO_BLK_ROUND_UP(b, sb) \ + (((b - 1) >> (sb)->s_blocksize_bits) + 1) +#define EXFAT_BLK_OFFSET(off, sb) ((off) & ((sb)->s_blocksize - 1)) + +/* + * helpers for block size to dentry size conversion. + */ +#define EXFAT_B_TO_DEN_IDX(b, sbi) \ + ((b) << ((sbi)->cluster_size_bits - DENTRY_SIZE_BITS)) +#define EXFAT_B_TO_DEN(b) ((b) >> DENTRY_SIZE_BITS) +#define EXFAT_DEN_TO_B(b) ((b) << DENTRY_SIZE_BITS) + +/* + * helpers for fat entry. + */ +#define FAT_ENT_SIZE (4) +#define FAT_ENT_SIZE_BITS (2) +#define FAT_ENT_OFFSET_SECTOR(sb, loc) (EXFAT_SB(sb)->FAT1_start_sector + \ + (((u64)loc << FAT_ENT_SIZE_BITS) >> sb->s_blocksize_bits)) +#define FAT_ENT_OFFSET_BYTE_IN_SECTOR(sb, loc) \ + ((loc << FAT_ENT_SIZE_BITS) & (sb->s_blocksize - 1)) + +/* + * helpers for bitmap. + */ +#define CLUSTER_TO_BITMAP_ENT(clu) ((clu) - EXFAT_RESERVED_CLUSTERS) +#define BITMAP_ENT_TO_CLUSTER(ent) ((ent) + EXFAT_RESERVED_CLUSTERS) +#define BITS_PER_SECTOR(sb) ((sb)->s_blocksize * BITS_PER_BYTE) +#define BITS_PER_SECTOR_MASK(sb) (BITS_PER_SECTOR(sb) - 1) +#define BITMAP_OFFSET_SECTOR_INDEX(sb, ent) \ + ((ent / BITS_PER_BYTE) >> (sb)->s_blocksize_bits) +#define BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent) (ent & BITS_PER_SECTOR_MASK(sb)) +#define BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent) \ + ((ent / BITS_PER_BYTE) & ((sb)->s_blocksize - 1)) +#define BITS_PER_BYTE_MASK 0x7 +#define IGNORED_BITS_REMAINED(clu, clu_base) ((1 << ((clu) - (clu_base))) - 1) + +struct exfat_dentry_namebuf { + char *lfn; + int lfnbuf_len; /* usually MAX_UNINAME_BUF_SIZE */ +}; + +/* unicode name structure */ +struct exfat_uni_name { + /* +3 for null and for converting */ + unsigned short name[MAX_NAME_LENGTH + 3]; + u16 name_hash; + unsigned char name_len; +}; + +/* directory structure */ +struct exfat_chain { + unsigned int dir; + unsigned int size; + unsigned char flags; +}; + +/* first empty entry hint information */ +struct exfat_hint_femp { + /* entry index of a directory */ + int eidx; + /* count of continuous empty entry */ + int count; + /* the cluster that first empty slot exists in */ + struct exfat_chain cur; +}; + +/* hint structure */ +struct exfat_hint { + unsigned int clu; + union { + unsigned int off; /* cluster offset */ + int eidx; /* entry index */ + }; +}; + +struct exfat_entry_set_cache { + struct super_block *sb; + bool modified; + unsigned int start_off; + int num_bh; + struct buffer_head *bh[DIR_CACHE_SIZE]; + unsigned int num_entries; +}; + +struct exfat_dir_entry { + struct exfat_chain dir; + int entry; + unsigned int type; + unsigned int start_clu; + unsigned char flags; + unsigned short attr; + loff_t size; + unsigned int num_subdirs; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0) + struct timespec64 atime; + struct timespec64 mtime; + struct timespec64 crtime; +#else + struct timespec atime; + struct timespec mtime; + struct timespec crtime; +#endif + struct exfat_dentry_namebuf namebuf; +}; + +/* + * exfat mount in-memory data + */ +struct exfat_mount_options { + kuid_t fs_uid; + kgid_t fs_gid; + unsigned short fs_fmask; + unsigned short fs_dmask; + /* permission for setting the [am]time */ + unsigned short allow_utime; + /* charset for filename input/display */ + char *iocharset; + /* on error: continue, panic, remount-ro */ + enum exfat_error_mode errors; + unsigned utf8:1, /* Use of UTF-8 character set */ + sys_tz:1, /* Use local timezone */ + discard:1, /* Issue discard requests on deletions */ + keep_last_dots:1; /* Keep trailing periods in paths */ + int time_offset; /* Offset of timestamps from UTC (in minutes) */ +}; + +/* + * EXFAT file system superblock in-memory data + */ +struct exfat_sb_info { + unsigned long long num_sectors; /* num of sectors in volume */ + unsigned int num_clusters; /* num of clusters in volume */ + unsigned int cluster_size; /* cluster size in bytes */ + unsigned int cluster_size_bits; + unsigned int sect_per_clus; /* cluster size in sectors */ + unsigned int sect_per_clus_bits; + unsigned long long FAT1_start_sector; /* FAT1 start sector */ + unsigned long long FAT2_start_sector; /* FAT2 start sector */ + unsigned long long data_start_sector; /* data area start sector */ + unsigned int num_FAT_sectors; /* num of FAT sectors */ + unsigned int root_dir; /* root dir cluster */ + unsigned int dentries_per_clu; /* num of dentries per cluster */ + unsigned int vol_flags; /* volume flags */ + unsigned int vol_flags_persistent; /* volume flags to retain */ + struct buffer_head *boot_bh; /* buffer_head of BOOT sector */ + + unsigned int map_clu; /* allocation bitmap start cluster */ + unsigned int map_sectors; /* num of allocation bitmap sectors */ + struct buffer_head **vol_amap; /* allocation bitmap */ + + unsigned short *vol_utbl; /* upcase table */ + + unsigned int clu_srch_ptr; /* cluster search pointer */ + unsigned int used_clusters; /* number of used clusters */ + + struct mutex s_lock; /* superblock lock */ + struct mutex bitmap_lock; /* bitmap lock */ + struct exfat_mount_options options; + struct nls_table *nls_io; /* Charset used for input and display */ + struct ratelimit_state ratelimit; + + spinlock_t inode_hash_lock; + struct hlist_head inode_hashtable[EXFAT_HASH_SIZE]; + + struct rcu_head rcu; +}; + +#define EXFAT_CACHE_VALID 0 + +/* + * EXFAT file system inode in-memory data + */ +struct exfat_inode_info { + struct exfat_chain dir; + int entry; + unsigned int type; + unsigned short attr; + unsigned int start_clu; + unsigned char flags; + /* + * the copy of low 32bit of i_version to check + * the validation of hint_stat. + */ + unsigned int version; + + /* hint for cluster last accessed */ + struct exfat_hint hint_bmap; + /* hint for entry index we try to lookup next time */ + struct exfat_hint hint_stat; + /* hint for first empty entry */ + struct exfat_hint_femp hint_femp; + + spinlock_t cache_lru_lock; + struct list_head cache_lru; + int nr_caches; + /* for avoiding the race between alloc and free */ + unsigned int cache_valid_id; + + /* + * NOTE: i_size_ondisk is 64bits, so must hold ->inode_lock to access. + * physically allocated size. + */ + loff_t i_size_ondisk; + /* block-aligned i_size (used in cont_write_begin) */ + loff_t i_size_aligned; + /* on-disk position of directory entry or 0 */ + loff_t i_pos; + /* hash by i_location */ + struct hlist_node i_hash_fat; + /* protect bmap against truncate */ + struct rw_semaphore truncate_lock; + struct inode vfs_inode; + /* File creation time */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0) + struct timespec64 i_crtime; +#else + struct timespec i_crtime; +#endif +}; + +static inline struct exfat_sb_info *EXFAT_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct exfat_inode_info *EXFAT_I(struct inode *inode) +{ + return container_of(inode, struct exfat_inode_info, vfs_inode); +} + +/* + * If ->i_mode can't hold 0222 (i.e. ATTR_RO), we use ->i_attrs to + * save ATTR_RO instead of ->i_mode. + * + * If it's directory and !sbi->options.rodir, ATTR_RO isn't read-only + * bit, it's just used as flag for app. + */ +static inline int exfat_mode_can_hold_ro(struct inode *inode) +{ + struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb); + + if (S_ISDIR(inode->i_mode)) + return 0; + + if ((~sbi->options.fs_fmask) & 0222) + return 1; + return 0; +} + +/* Convert attribute bits and a mask to the UNIX mode. */ +static inline mode_t exfat_make_mode(struct exfat_sb_info *sbi, + unsigned short attr, mode_t mode) +{ + if ((attr & ATTR_READONLY) && !(attr & ATTR_SUBDIR)) + mode &= ~0222; + + if (attr & ATTR_SUBDIR) + return (mode & ~sbi->options.fs_dmask) | S_IFDIR; + + return (mode & ~sbi->options.fs_fmask) | S_IFREG; +} + +/* Return the FAT attribute byte for this inode */ +static inline unsigned short exfat_make_attr(struct inode *inode) +{ + unsigned short attr = EXFAT_I(inode)->attr; + + if (S_ISDIR(inode->i_mode)) + attr |= ATTR_SUBDIR; + if (exfat_mode_can_hold_ro(inode) && !(inode->i_mode & 0222)) + attr |= ATTR_READONLY; + return attr; +} + +static inline void exfat_save_attr(struct inode *inode, unsigned short attr) +{ + if (exfat_mode_can_hold_ro(inode)) + EXFAT_I(inode)->attr = attr & (ATTR_RWMASK | ATTR_READONLY); + else + EXFAT_I(inode)->attr = attr & ATTR_RWMASK; +} + +static inline bool exfat_is_last_sector_in_cluster(struct exfat_sb_info *sbi, + sector_t sec) +{ + return ((sec - sbi->data_start_sector + 1) & + ((1 << sbi->sect_per_clus_bits) - 1)) == 0; +} + +static inline sector_t exfat_cluster_to_sector(struct exfat_sb_info *sbi, + unsigned int clus) +{ + return ((sector_t)(clus - EXFAT_RESERVED_CLUSTERS) << sbi->sect_per_clus_bits) + + sbi->data_start_sector; +} + +static inline int exfat_sector_to_cluster(struct exfat_sb_info *sbi, + sector_t sec) +{ + return ((sec - sbi->data_start_sector) >> sbi->sect_per_clus_bits) + + EXFAT_RESERVED_CLUSTERS; +} + +static inline bool is_valid_cluster(struct exfat_sb_info *sbi, + unsigned int clus) +{ + return clus >= EXFAT_FIRST_CLUSTER && clus < sbi->num_clusters; +} + +/* super.c */ +int exfat_set_volume_dirty(struct super_block *sb); +int exfat_clear_volume_dirty(struct super_block *sb); + +/* fatent.c */ +#define exfat_get_next_cluster(sb, pclu) exfat_ent_get(sb, *(pclu), pclu) + +int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc, + struct exfat_chain *p_chain, bool sync_bmap); +int exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain); +int exfat_ent_get(struct super_block *sb, unsigned int loc, + unsigned int *content); +int exfat_ent_set(struct super_block *sb, unsigned int loc, + unsigned int content); +int exfat_count_ext_entries(struct super_block *sb, struct exfat_chain *p_dir, + int entry, struct exfat_dentry *p_entry); +int exfat_chain_cont_cluster(struct super_block *sb, unsigned int chain, + unsigned int len); +int exfat_zeroed_cluster(struct inode *dir, unsigned int clu); +int exfat_find_last_cluster(struct super_block *sb, struct exfat_chain *p_chain, + unsigned int *ret_clu); +int exfat_count_num_clusters(struct super_block *sb, + struct exfat_chain *p_chain, unsigned int *ret_count); + +/* balloc.c */ +int exfat_load_bitmap(struct super_block *sb); +void exfat_free_bitmap(struct exfat_sb_info *sbi); +int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync); +void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync); +unsigned int exfat_find_free_bitmap(struct super_block *sb, unsigned int clu); +int exfat_count_used_clusters(struct super_block *sb, unsigned int *ret_count); +int exfat_trim_fs(struct inode *inode, struct fstrim_range *range); + +/* file.c */ +extern const struct file_operations exfat_file_operations; +int __exfat_truncate(struct inode *inode, loff_t new_size); +void exfat_truncate(struct inode *inode, loff_t size); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0) +int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, + struct iattr *attr); +int exfat_getattr(struct user_namespace *mnt_userns, const struct path *path, + struct kstat *stat, unsigned int request_mask, + unsigned int query_flags); +#else +int exfat_setattr(struct dentry *dentry, struct iattr *attr); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) +int exfat_getattr(const struct path *path, struct kstat *stat, + unsigned int request_mask, unsigned int query_flags); +#else +int exfat_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat); +#endif +#endif +int exfat_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); +long exfat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); +long exfat_compat_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg); + + +/* namei.c */ +extern const struct dentry_operations exfat_dentry_ops; +extern const struct dentry_operations exfat_utf8_dentry_ops; + +/* cache.c */ +int exfat_cache_init(void); +void exfat_cache_shutdown(void); +void exfat_cache_inval_inode(struct inode *inode); +int exfat_get_cluster(struct inode *inode, unsigned int cluster, + unsigned int *fclus, unsigned int *dclus, + unsigned int *last_dclus, int allow_eof); + +/* dir.c */ +extern const struct inode_operations exfat_dir_inode_operations; +extern const struct file_operations exfat_dir_operations; +unsigned int exfat_get_entry_type(struct exfat_dentry *p_entry); +int exfat_init_dir_entry(struct inode *inode, struct exfat_chain *p_dir, + int entry, unsigned int type, unsigned int start_clu, + unsigned long long size); +int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir, + int entry, int num_entries, struct exfat_uni_name *p_uniname); +int exfat_remove_entries(struct inode *inode, struct exfat_chain *p_dir, + int entry, int order, int num_entries); +int exfat_update_dir_chksum(struct inode *inode, struct exfat_chain *p_dir, + int entry); +void exfat_update_dir_chksum_with_entry_set(struct exfat_entry_set_cache *es); +int exfat_calc_num_entries(struct exfat_uni_name *p_uniname); +int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, + struct exfat_chain *p_dir, struct exfat_uni_name *p_uniname, + int num_entries, unsigned int type, struct exfat_hint *hint_opt); +int exfat_alloc_new_dir(struct inode *inode, struct exfat_chain *clu); +struct exfat_dentry *exfat_get_dentry(struct super_block *sb, + struct exfat_chain *p_dir, int entry, struct buffer_head **bh); +struct exfat_dentry *exfat_get_dentry_cached(struct exfat_entry_set_cache *es, + int num); +struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb, + struct exfat_chain *p_dir, int entry, unsigned int type); +int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync); +int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir); + +/* inode.c */ +extern const struct inode_operations exfat_file_inode_operations; +void exfat_sync_inode(struct inode *inode); +struct inode *exfat_build_inode(struct super_block *sb, + struct exfat_dir_entry *info, loff_t i_pos); +void exfat_hash_inode(struct inode *inode, loff_t i_pos); +void exfat_unhash_inode(struct inode *inode); +struct inode *exfat_iget(struct super_block *sb, loff_t i_pos); +int __exfat_write_inode(struct inode *inode, int sync); +int exfat_write_inode(struct inode *inode, struct writeback_control *wbc); +void exfat_evict_inode(struct inode *inode); +int exfat_block_truncate_page(struct inode *inode, loff_t from); + +/* exfat/nls.c */ +unsigned short exfat_toupper(struct super_block *sb, unsigned short a); +int exfat_uniname_ncmp(struct super_block *sb, unsigned short *a, + unsigned short *b, unsigned int len); +int exfat_utf16_to_nls(struct super_block *sb, + struct exfat_uni_name *uniname, unsigned char *p_cstring, + int len); +int exfat_nls_to_utf16(struct super_block *sb, + const unsigned char *p_cstring, const int len, + struct exfat_uni_name *uniname, int *p_lossy); +int exfat_create_upcase_table(struct super_block *sb); +void exfat_free_upcase_table(struct exfat_sb_info *sbi); + +/* exfat/misc.c */ +void __exfat_fs_error(struct super_block *sb, int report, const char *fmt, ...) + __printf(3, 4) __cold; +#define exfat_fs_error(sb, fmt, args...) \ + __exfat_fs_error(sb, 1, fmt, ## args) +#define exfat_fs_error_ratelimit(sb, fmt, args...) \ + __exfat_fs_error(sb, __ratelimit(&EXFAT_SB(sb)->ratelimit), \ + fmt, ## args) + +/* expand to pr_*() with prefix */ +#define exfat_err(sb, fmt, ...) \ + pr_err("exFAT-fs (%s): " fmt "\n", (sb)->s_id, ##__VA_ARGS__) +#define exfat_warn(sb, fmt, ...) \ + pr_warn("exFAT-fs (%s): " fmt "\n", (sb)->s_id, ##__VA_ARGS__) +#define exfat_info(sb, fmt, ...) \ + pr_info("exFAT-fs (%s): " fmt "\n", (sb)->s_id, ##__VA_ARGS__) +#define exfat_debug(sb, fmt, ...) \ + pr_debug("exFAT-fs (%s): " fmt "\n", (sb)->s_id, ##__VA_ARGS__) + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0) +void exfat_get_entry_time(struct exfat_sb_info *sbi, struct timespec64 *ts, + u8 tz, __le16 time, __le16 date, u8 time_cs); +void exfat_truncate_atime(struct timespec64 *ts); +void exfat_set_entry_time(struct exfat_sb_info *sbi, struct timespec64 *ts, + u8 *tz, __le16 *time, __le16 *date, u8 *time_cs); +#else +void exfat_get_entry_time(struct exfat_sb_info *sbi, struct timespec *ts, + u8 tz, __le16 time, __le16 date, u8 time_cs); +void exfat_truncate_atime(struct timespec *ts); +void exfat_set_entry_time(struct exfat_sb_info *sbi, struct timespec *ts, + u8 *tz, __le16 *time, __le16 *date, u8 *time_cs); +#endif +u16 exfat_calc_chksum16(void *data, int len, u16 chksum, int type); +u32 exfat_calc_chksum32(void *data, int len, u32 chksum, int type); +void exfat_update_bh(struct buffer_head *bh, int sync); +int exfat_update_bhs(struct buffer_head **bhs, int nr_bhs, int sync); +void exfat_chain_set(struct exfat_chain *ec, unsigned int dir, + unsigned int size, unsigned char flags); +void exfat_chain_dup(struct exfat_chain *dup, struct exfat_chain *ec); + +#endif /* !_EXFAT_FS_H */ diff --git a/fs/exfat/exfat_raw.h b/fs/exfat/exfat_raw.h new file mode 100644 index 000000000000..7f39b1c6469c --- /dev/null +++ b/fs/exfat/exfat_raw.h @@ -0,0 +1,168 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#ifndef _EXFAT_RAW_H +#define _EXFAT_RAW_H + +#include + +#define BOOT_SIGNATURE 0xAA55 +#define EXBOOT_SIGNATURE 0xAA550000 +#define STR_EXFAT "EXFAT " /* size should be 8 */ + +#define EXFAT_MAX_FILE_LEN 255 + +#define VOLUME_DIRTY 0x0002 +#define MEDIA_FAILURE 0x0004 + +#define EXFAT_EOF_CLUSTER 0xFFFFFFFFu +#define EXFAT_BAD_CLUSTER 0xFFFFFFF7u +#define EXFAT_FREE_CLUSTER 0 +/* Cluster 0, 1 are reserved, the first cluster is 2 in the cluster heap. */ +#define EXFAT_RESERVED_CLUSTERS 2 +#define EXFAT_FIRST_CLUSTER 2 +#define EXFAT_DATA_CLUSTER_COUNT(sbi) \ + ((sbi)->num_clusters - EXFAT_RESERVED_CLUSTERS) + +/* AllocationPossible and NoFatChain field in GeneralSecondaryFlags Field */ +#define ALLOC_FAT_CHAIN 0x01 +#define ALLOC_NO_FAT_CHAIN 0x03 + +#define DENTRY_SIZE 32 /* directory entry size */ +#define DENTRY_SIZE_BITS 5 +/* exFAT allows 8388608(256MB) directory entries */ +#define MAX_EXFAT_DENTRIES 8388608 + +/* dentry types */ +#define EXFAT_UNUSED 0x00 /* end of directory */ +#define EXFAT_DELETE (~0x80) +#define IS_EXFAT_DELETED(x) ((x) < 0x80) /* deleted file (0x01~0x7F) */ +#define EXFAT_INVAL 0x80 /* invalid value */ +#define EXFAT_BITMAP 0x81 /* allocation bitmap */ +#define EXFAT_UPCASE 0x82 /* upcase table */ +#define EXFAT_VOLUME 0x83 /* volume label */ +#define EXFAT_FILE 0x85 /* file or dir */ +#define EXFAT_GUID 0xA0 +#define EXFAT_PADDING 0xA1 +#define EXFAT_ACLTAB 0xA2 +#define EXFAT_STREAM 0xC0 /* stream entry */ +#define EXFAT_NAME 0xC1 /* file name entry */ +#define EXFAT_ACL 0xC2 /* stream entry */ + +#define IS_EXFAT_CRITICAL_PRI(x) (x < 0xA0) +#define IS_EXFAT_BENIGN_PRI(x) (x < 0xC0) +#define IS_EXFAT_CRITICAL_SEC(x) (x < 0xE0) + +/* checksum types */ +#define CS_DIR_ENTRY 0 +#define CS_BOOT_SECTOR 1 +#define CS_DEFAULT 2 + +/* file attributes */ +#define ATTR_READONLY 0x0001 +#define ATTR_HIDDEN 0x0002 +#define ATTR_SYSTEM 0x0004 +#define ATTR_VOLUME 0x0008 +#define ATTR_SUBDIR 0x0010 +#define ATTR_ARCHIVE 0x0020 + +#define ATTR_RWMASK (ATTR_HIDDEN | ATTR_SYSTEM | ATTR_VOLUME | \ + ATTR_SUBDIR | ATTR_ARCHIVE) + +#define BOOTSEC_JUMP_BOOT_LEN 3 +#define BOOTSEC_FS_NAME_LEN 8 +#define BOOTSEC_OLDBPB_LEN 53 + +#define EXFAT_FILE_NAME_LEN 15 + +#define EXFAT_MIN_SECT_SIZE_BITS 9 +#define EXFAT_MAX_SECT_SIZE_BITS 12 +#define EXFAT_MAX_SECT_PER_CLUS_BITS(x) (25 - (x)->sect_size_bits) + +/* EXFAT: Main and Backup Boot Sector (512 bytes) */ +struct boot_sector { + __u8 jmp_boot[BOOTSEC_JUMP_BOOT_LEN]; + __u8 fs_name[BOOTSEC_FS_NAME_LEN]; + __u8 must_be_zero[BOOTSEC_OLDBPB_LEN]; + __le64 partition_offset; + __le64 vol_length; + __le32 fat_offset; + __le32 fat_length; + __le32 clu_offset; + __le32 clu_count; + __le32 root_cluster; + __le32 vol_serial; + __u8 fs_revision[2]; + __le16 vol_flags; + __u8 sect_size_bits; + __u8 sect_per_clus_bits; + __u8 num_fats; + __u8 drv_sel; + __u8 percent_in_use; + __u8 reserved[7]; + __u8 boot_code[390]; + __le16 signature; +} __packed; + +struct exfat_dentry { + __u8 type; + union { + struct { + __u8 num_ext; + __le16 checksum; + __le16 attr; + __le16 reserved1; + __le16 create_time; + __le16 create_date; + __le16 modify_time; + __le16 modify_date; + __le16 access_time; + __le16 access_date; + __u8 create_time_cs; + __u8 modify_time_cs; + __u8 create_tz; + __u8 modify_tz; + __u8 access_tz; + __u8 reserved2[7]; + } __packed file; /* file directory entry */ + struct { + __u8 flags; + __u8 reserved1; + __u8 name_len; + __le16 name_hash; + __le16 reserved2; + __le64 valid_size; + __le32 reserved3; + __le32 start_clu; + __le64 size; + } __packed stream; /* stream extension directory entry */ + struct { + __u8 flags; + __le16 unicode_0_14[EXFAT_FILE_NAME_LEN]; + } __packed name; /* file name directory entry */ + struct { + __u8 flags; + __u8 reserved[18]; + __le32 start_clu; + __le64 size; + } __packed bitmap; /* allocation bitmap directory entry */ + struct { + __u8 reserved1[3]; + __le32 checksum; + __u8 reserved2[12]; + __le32 start_clu; + __le64 size; + } __packed upcase; /* up-case table directory entry */ + } __packed dentry; +} __packed; + +#define EXFAT_TZ_VALID (1 << 7) + +/* Jan 1 GMT 00:00:00 1980 */ +#define EXFAT_MIN_TIMESTAMP_SECS 315532800LL +/* Dec 31 GMT 23:59:59 2107 */ +#define EXFAT_MAX_TIMESTAMP_SECS 4354819199LL + +#endif /* !_EXFAT_RAW_H */ diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c new file mode 100644 index 000000000000..ded465bcf84b --- /dev/null +++ b/fs/exfat/fatent.c @@ -0,0 +1,475 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#include +#include +#include +#include + +#include "exfat_raw.h" +#include "exfat_fs.h" + +static int exfat_mirror_bh(struct super_block *sb, sector_t sec, + struct buffer_head *bh) +{ + struct buffer_head *c_bh; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + sector_t sec2; + int err = 0; + + if (sbi->FAT2_start_sector != sbi->FAT1_start_sector) { + sec2 = sec - sbi->FAT1_start_sector + sbi->FAT2_start_sector; + c_bh = sb_getblk(sb, sec2); + if (!c_bh) + return -ENOMEM; + memcpy(c_bh->b_data, bh->b_data, sb->s_blocksize); + set_buffer_uptodate(c_bh); + mark_buffer_dirty(c_bh); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0) + if (sb->s_flags & SB_SYNCHRONOUS) +#else + if (sb->s_flags & MS_SYNCHRONOUS) +#endif + err = sync_dirty_buffer(c_bh); + brelse(c_bh); + } + + return err; +} + +static int __exfat_ent_get(struct super_block *sb, unsigned int loc, + unsigned int *content) +{ + unsigned int off; + sector_t sec; + struct buffer_head *bh; + + sec = FAT_ENT_OFFSET_SECTOR(sb, loc); + off = FAT_ENT_OFFSET_BYTE_IN_SECTOR(sb, loc); + + bh = sb_bread(sb, sec); + if (!bh) + return -EIO; + + *content = le32_to_cpu(*(__le32 *)(&bh->b_data[off])); + + /* remap reserved clusters to simplify code */ + if (*content > EXFAT_BAD_CLUSTER) + *content = EXFAT_EOF_CLUSTER; + + brelse(bh); + return 0; +} + +int exfat_ent_set(struct super_block *sb, unsigned int loc, + unsigned int content) +{ + unsigned int off; + sector_t sec; + __le32 *fat_entry; + struct buffer_head *bh; + + sec = FAT_ENT_OFFSET_SECTOR(sb, loc); + off = FAT_ENT_OFFSET_BYTE_IN_SECTOR(sb, loc); + + bh = sb_bread(sb, sec); + if (!bh) + return -EIO; + + fat_entry = (__le32 *)&(bh->b_data[off]); + *fat_entry = cpu_to_le32(content); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0) + exfat_update_bh(bh, sb->s_flags & SB_SYNCHRONOUS); +#else + exfat_update_bh(bh, sb->s_flags & MS_SYNCHRONOUS); +#endif + exfat_mirror_bh(sb, sec, bh); + brelse(bh); + return 0; +} + +int exfat_ent_get(struct super_block *sb, unsigned int loc, + unsigned int *content) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + int err; + + if (!is_valid_cluster(sbi, loc)) { + exfat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", + loc); + return -EIO; + } + + err = __exfat_ent_get(sb, loc, content); + if (err) { + exfat_fs_error(sb, + "failed to access to FAT (entry 0x%08x, err:%d)", + loc, err); + return err; + } + + if (*content == EXFAT_FREE_CLUSTER) { + exfat_fs_error(sb, + "invalid access to FAT free cluster (entry 0x%08x)", + loc); + return -EIO; + } + + if (*content == EXFAT_BAD_CLUSTER) { + exfat_fs_error(sb, + "invalid access to FAT bad cluster (entry 0x%08x)", + loc); + return -EIO; + } + + if (*content != EXFAT_EOF_CLUSTER && !is_valid_cluster(sbi, *content)) { + exfat_fs_error(sb, + "invalid access to FAT (entry 0x%08x) bogus content (0x%08x)", + loc, *content); + return -EIO; + } + + return 0; +} + +int exfat_chain_cont_cluster(struct super_block *sb, unsigned int chain, + unsigned int len) +{ + if (!len) + return 0; + + while (len > 1) { + if (exfat_ent_set(sb, chain, chain + 1)) + return -EIO; + chain++; + len--; + } + + if (exfat_ent_set(sb, chain, EXFAT_EOF_CLUSTER)) + return -EIO; + return 0; +} + +/* This function must be called with bitmap_lock held */ +static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain) +{ + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + int cur_cmap_i, next_cmap_i; + unsigned int num_clusters = 0; + unsigned int clu; + + /* invalid cluster number */ + if (p_chain->dir == EXFAT_FREE_CLUSTER || + p_chain->dir == EXFAT_EOF_CLUSTER || + p_chain->dir < EXFAT_FIRST_CLUSTER) + return 0; + + /* no cluster to truncate */ + if (p_chain->size == 0) + return 0; + + /* check cluster validation */ + if (!is_valid_cluster(sbi, p_chain->dir)) { + exfat_err(sb, "invalid start cluster (%u)", p_chain->dir); + return -EIO; + } + + clu = p_chain->dir; + + cur_cmap_i = next_cmap_i = + BITMAP_OFFSET_SECTOR_INDEX(sb, CLUSTER_TO_BITMAP_ENT(clu)); + + if (p_chain->flags == ALLOC_NO_FAT_CHAIN) { + unsigned int last_cluster = p_chain->dir + p_chain->size - 1; + do { + bool sync = false; + + if (clu < last_cluster) + next_cmap_i = + BITMAP_OFFSET_SECTOR_INDEX(sb, CLUSTER_TO_BITMAP_ENT(clu+1)); + + /* flush bitmap only if index would be changed or for last cluster */ + if (clu == last_cluster || cur_cmap_i != next_cmap_i) { + sync = true; + cur_cmap_i = next_cmap_i; + } + + exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode))); + clu++; + num_clusters++; + } while (num_clusters < p_chain->size); + } else { + do { + bool sync = false; + unsigned int n_clu = clu; + int err = exfat_get_next_cluster(sb, &n_clu); + + if (err || n_clu == EXFAT_EOF_CLUSTER) + sync = true; + else + next_cmap_i = + BITMAP_OFFSET_SECTOR_INDEX(sb, CLUSTER_TO_BITMAP_ENT(n_clu)); + + if (cur_cmap_i != next_cmap_i) { + sync = true; + cur_cmap_i = next_cmap_i; + } + + exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode))); + clu = n_clu; + num_clusters++; + + if (err) + goto dec_used_clus; + } while (clu != EXFAT_EOF_CLUSTER); + } + +dec_used_clus: + sbi->used_clusters -= num_clusters; + return 0; +} + +int exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain) +{ + int ret = 0; + + mutex_lock(&EXFAT_SB(inode->i_sb)->bitmap_lock); + ret = __exfat_free_cluster(inode, p_chain); + mutex_unlock(&EXFAT_SB(inode->i_sb)->bitmap_lock); + + return ret; +} + +int exfat_find_last_cluster(struct super_block *sb, struct exfat_chain *p_chain, + unsigned int *ret_clu) +{ + unsigned int clu, next; + unsigned int count = 0; + + next = p_chain->dir; + if (p_chain->flags == ALLOC_NO_FAT_CHAIN) { + *ret_clu = next + p_chain->size - 1; + return 0; + } + + do { + count++; + clu = next; + if (exfat_ent_get(sb, clu, &next)) + return -EIO; + } while (next != EXFAT_EOF_CLUSTER); + + if (p_chain->size != count) { + exfat_fs_error(sb, + "bogus directory size (clus : ondisk(%d) != counted(%d))", + p_chain->size, count); + return -EIO; + } + + *ret_clu = clu; + return 0; +} + +int exfat_zeroed_cluster(struct inode *dir, unsigned int clu) +{ + struct super_block *sb = dir->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct buffer_head *bh; + sector_t blknr, last_blknr, i; + + blknr = exfat_cluster_to_sector(sbi, clu); + last_blknr = blknr + sbi->sect_per_clus; + + if (last_blknr > sbi->num_sectors && sbi->num_sectors > 0) { + exfat_fs_error_ratelimit(sb, + "%s: out of range(sect:%llu len:%u)", + __func__, (unsigned long long)blknr, + sbi->sect_per_clus); + return -EIO; + } + + /* Zeroing the unused blocks on this cluster */ + for (i = blknr; i < last_blknr; i++) { + bh = sb_getblk(sb, i); + if (!bh) + return -ENOMEM; + + memset(bh->b_data, 0, sb->s_blocksize); + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + brelse(bh); + } + + if (IS_DIRSYNC(dir)) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) + return sync_blockdev_range(sb->s_bdev, + EXFAT_BLK_TO_B(blknr, sb), + EXFAT_BLK_TO_B(last_blknr, sb) - 1); +#else + return filemap_write_and_wait_range(sb->s_bdev->bd_inode->i_mapping, + EXFAT_BLK_TO_B(blknr, sb), + EXFAT_BLK_TO_B(last_blknr, sb) - 1); +#endif + + + return 0; +} + +int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc, + struct exfat_chain *p_chain, bool sync_bmap) +{ + int ret = -ENOSPC; + unsigned int num_clusters = 0, total_cnt; + unsigned int hint_clu, new_clu, last_clu = EXFAT_EOF_CLUSTER; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + total_cnt = EXFAT_DATA_CLUSTER_COUNT(sbi); + + if (unlikely(total_cnt < sbi->used_clusters)) { + exfat_fs_error_ratelimit(sb, + "%s: invalid used clusters(t:%u,u:%u)\n", + __func__, total_cnt, sbi->used_clusters); + return -EIO; + } + + if (num_alloc > total_cnt - sbi->used_clusters) + return -ENOSPC; + + mutex_lock(&sbi->bitmap_lock); + + hint_clu = p_chain->dir; + /* find new cluster */ + if (hint_clu == EXFAT_EOF_CLUSTER) { + if (sbi->clu_srch_ptr < EXFAT_FIRST_CLUSTER) { + exfat_err(sb, "sbi->clu_srch_ptr is invalid (%u)", + sbi->clu_srch_ptr); + sbi->clu_srch_ptr = EXFAT_FIRST_CLUSTER; + } + + hint_clu = exfat_find_free_bitmap(sb, sbi->clu_srch_ptr); + if (hint_clu == EXFAT_EOF_CLUSTER) { + ret = -ENOSPC; + goto unlock; + } + } + + /* check cluster validation */ + if (!is_valid_cluster(sbi, hint_clu)) { + exfat_err(sb, "hint_cluster is invalid (%u)", + hint_clu); + hint_clu = EXFAT_FIRST_CLUSTER; + if (p_chain->flags == ALLOC_NO_FAT_CHAIN) { + if (exfat_chain_cont_cluster(sb, p_chain->dir, + num_clusters)) { + ret = -EIO; + goto unlock; + } + p_chain->flags = ALLOC_FAT_CHAIN; + } + } + + p_chain->dir = EXFAT_EOF_CLUSTER; + + while ((new_clu = exfat_find_free_bitmap(sb, hint_clu)) != + EXFAT_EOF_CLUSTER) { + if (new_clu != hint_clu && + p_chain->flags == ALLOC_NO_FAT_CHAIN) { + if (exfat_chain_cont_cluster(sb, p_chain->dir, + num_clusters)) { + ret = -EIO; + goto free_cluster; + } + p_chain->flags = ALLOC_FAT_CHAIN; + } + + /* update allocation bitmap */ + if (exfat_set_bitmap(inode, new_clu, sync_bmap)) { + ret = -EIO; + goto free_cluster; + } + + num_clusters++; + + /* update FAT table */ + if (p_chain->flags == ALLOC_FAT_CHAIN) { + if (exfat_ent_set(sb, new_clu, EXFAT_EOF_CLUSTER)) { + ret = -EIO; + goto free_cluster; + } + } + + if (p_chain->dir == EXFAT_EOF_CLUSTER) { + p_chain->dir = new_clu; + } else if (p_chain->flags == ALLOC_FAT_CHAIN) { + if (exfat_ent_set(sb, last_clu, new_clu)) { + ret = -EIO; + goto free_cluster; + } + } + last_clu = new_clu; + + if (--num_alloc == 0) { + sbi->clu_srch_ptr = hint_clu; + sbi->used_clusters += num_clusters; + + p_chain->size += num_clusters; + mutex_unlock(&sbi->bitmap_lock); + return 0; + } + + hint_clu = new_clu + 1; + if (hint_clu >= sbi->num_clusters) { + hint_clu = EXFAT_FIRST_CLUSTER; + + if (p_chain->flags == ALLOC_NO_FAT_CHAIN) { + if (exfat_chain_cont_cluster(sb, p_chain->dir, + num_clusters)) { + ret = -EIO; + goto free_cluster; + } + p_chain->flags = ALLOC_FAT_CHAIN; + } + } + } +free_cluster: + if (num_clusters) + __exfat_free_cluster(inode, p_chain); +unlock: + mutex_unlock(&sbi->bitmap_lock); + return ret; +} + +int exfat_count_num_clusters(struct super_block *sb, + struct exfat_chain *p_chain, unsigned int *ret_count) +{ + unsigned int i, count; + unsigned int clu; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + if (!p_chain->dir || p_chain->dir == EXFAT_EOF_CLUSTER) { + *ret_count = 0; + return 0; + } + + if (p_chain->flags == ALLOC_NO_FAT_CHAIN) { + *ret_count = p_chain->size; + return 0; + } + + clu = p_chain->dir; + count = 0; + for (i = EXFAT_FIRST_CLUSTER; i < sbi->num_clusters; i++) { + count++; + if (exfat_ent_get(sb, clu, &clu)) + return -EIO; + if (clu == EXFAT_EOF_CLUSTER) + break; + } + + *ret_count = count; + return 0; +} diff --git a/fs/exfat/file.c b/fs/exfat/file.c new file mode 100644 index 000000000000..6e37277bbd84 --- /dev/null +++ b/fs/exfat/file.c @@ -0,0 +1,473 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#include +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) +#include +#endif +#include +#include + +#include "exfat_raw.h" +#include "exfat_fs.h" + +static int exfat_cont_expand(struct inode *inode, loff_t size) +{ + struct address_space *mapping = inode->i_mapping; + loff_t start = i_size_read(inode), count = size - i_size_read(inode); + int err, err2; + + err = generic_cont_expand_simple(inode, size); + if (err) + return err; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + inode->i_ctime = inode->i_mtime = current_time(inode); +#else + inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; +#endif + mark_inode_dirty(inode); + + if (!IS_SYNC(inode)) + return 0; + + err = filemap_fdatawrite_range(mapping, start, start + count - 1); + err2 = sync_mapping_buffers(mapping); + if (!err) + err = err2; + err2 = write_inode_now(inode, 1); + if (!err) + err = err2; + if (err) + return err; + + return filemap_fdatawait_range(mapping, start, start + count - 1); +} + +static bool exfat_allow_set_time(struct exfat_sb_info *sbi, struct inode *inode) +{ + mode_t allow_utime = sbi->options.allow_utime; + + if (!uid_eq(current_fsuid(), inode->i_uid)) { + if (in_group_p(inode->i_gid)) + allow_utime >>= 3; + if (allow_utime & MAY_WRITE) + return true; + } + + /* use a default check */ + return false; +} + +static int exfat_sanitize_mode(const struct exfat_sb_info *sbi, + struct inode *inode, umode_t *mode_ptr) +{ + mode_t i_mode, mask, perm; + + i_mode = inode->i_mode; + + mask = (S_ISREG(i_mode) || S_ISLNK(i_mode)) ? + sbi->options.fs_fmask : sbi->options.fs_dmask; + perm = *mode_ptr & ~(S_IFMT | mask); + + /* Of the r and x bits, all (subject to umask) must be present.*/ + if ((perm & 0555) != (i_mode & 0555)) + return -EPERM; + + if (exfat_mode_can_hold_ro(inode)) { + /* + * Of the w bits, either all (subject to umask) or none must + * be present. + */ + if ((perm & 0222) && ((perm & 0222) != (0222 & ~mask))) + return -EPERM; + } else { + /* + * If exfat_mode_can_hold_ro(inode) is false, can't change + * w bits. + */ + if ((perm & 0222) != (0222 & ~mask)) + return -EPERM; + } + + *mode_ptr &= S_IFMT | perm; + + return 0; +} + +/* resize the file length */ +int __exfat_truncate(struct inode *inode, loff_t new_size) +{ + unsigned int num_clusters_new, num_clusters_phys; + unsigned int last_clu = EXFAT_FREE_CLUSTER; + struct exfat_chain clu; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *ei = EXFAT_I(inode); + + /* check if the given file ID is opened */ + if (ei->type != TYPE_FILE && ei->type != TYPE_DIR) + return -EPERM; + + exfat_set_volume_dirty(sb); + + num_clusters_new = EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi); + num_clusters_phys = EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi); + + exfat_chain_set(&clu, ei->start_clu, num_clusters_phys, ei->flags); + + if (new_size > 0) { + /* + * Truncate FAT chain num_clusters after the first cluster + * num_clusters = min(new, phys); + */ + unsigned int num_clusters = + min(num_clusters_new, num_clusters_phys); + + /* + * Follow FAT chain + * (defensive coding - works fine even with corrupted FAT table + */ + if (clu.flags == ALLOC_NO_FAT_CHAIN) { + clu.dir += num_clusters; + clu.size -= num_clusters; + } else { + while (num_clusters > 0) { + last_clu = clu.dir; + if (exfat_get_next_cluster(sb, &(clu.dir))) + return -EIO; + + num_clusters--; + clu.size--; + } + } + } else { + ei->flags = ALLOC_NO_FAT_CHAIN; + ei->start_clu = EXFAT_EOF_CLUSTER; + } + + i_size_write(inode, new_size); + + if (ei->type == TYPE_FILE) + ei->attr |= ATTR_ARCHIVE; + + /* + * update the directory entry + * + * If the directory entry is updated by mark_inode_dirty(), the + * directory entry will be written after a writeback cycle of + * updating the bitmap/FAT, which may result in clusters being + * freed but referenced by the directory entry in the event of a + * sudden power failure. + * __exfat_write_inode() is called for directory entry, bitmap + * and FAT to be written in a same writeback. + */ + if (__exfat_write_inode(inode, inode_needs_sync(inode))) + return -EIO; + + /* cut off from the FAT chain */ + if (ei->flags == ALLOC_FAT_CHAIN && last_clu != EXFAT_FREE_CLUSTER && + last_clu != EXFAT_EOF_CLUSTER) { + if (exfat_ent_set(sb, last_clu, EXFAT_EOF_CLUSTER)) + return -EIO; + } + + /* invalidate cache and free the clusters */ + /* clear exfat cache */ + exfat_cache_inval_inode(inode); + + /* hint information */ + ei->hint_bmap.off = EXFAT_EOF_CLUSTER; + ei->hint_bmap.clu = EXFAT_EOF_CLUSTER; + + /* hint_stat will be used if this is directory. */ + ei->hint_stat.eidx = 0; + ei->hint_stat.clu = ei->start_clu; + ei->hint_femp.eidx = EXFAT_HINT_NONE; + + /* free the clusters */ + if (exfat_free_cluster(inode, &clu)) + return -EIO; + + return 0; +} + +void exfat_truncate(struct inode *inode, loff_t size) +{ + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *ei = EXFAT_I(inode); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 72) + unsigned int blocksize = i_blocksize(inode); +#else + unsigned int blocksize = 1 << inode->i_blkbits; +#endif + loff_t aligned_size; + int err; + + mutex_lock(&sbi->s_lock); + if (ei->start_clu == 0) { + /* + * Empty start_clu != ~0 (not allocated) + */ + exfat_fs_error(sb, "tried to truncate zeroed cluster."); + goto write_size; + } + + err = __exfat_truncate(inode, i_size_read(inode)); + if (err) + goto write_size; + + inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> + inode->i_blkbits; +write_size: + aligned_size = i_size_read(inode); + if (aligned_size & (blocksize - 1)) { + aligned_size |= (blocksize - 1); + aligned_size++; + } + + if (ei->i_size_ondisk > i_size_read(inode)) + ei->i_size_ondisk = aligned_size; + + if (ei->i_size_aligned > i_size_read(inode)) + ei->i_size_aligned = aligned_size; + mutex_unlock(&sbi->s_lock); +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0) +int exfat_getattr(struct user_namespace *mnt_uerns, const struct path *path, + struct kstat *stat, unsigned int request_mask, + unsigned int query_flags) +#else +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) +int exfat_getattr(const struct path *path, struct kstat *stat, + unsigned int request_mask, unsigned int query_flags) +#else +int exfat_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +#endif +#endif +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) + struct inode *inode = d_backing_inode(path->dentry); + struct exfat_inode_info *ei = EXFAT_I(inode); +#else + struct inode *inode = d_inode(dentry); +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0) + generic_fillattr(&init_user_ns, inode, stat); +#else + generic_fillattr(inode, stat); +#endif + exfat_truncate_atime(&stat->atime); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) + stat->result_mask |= STATX_BTIME; + stat->btime.tv_sec = ei->i_crtime.tv_sec; + stat->btime.tv_nsec = ei->i_crtime.tv_nsec; +#endif + stat->blksize = EXFAT_SB(inode->i_sb)->cluster_size; + return 0; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0) +int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, + struct iattr *attr) +#else +int exfat_setattr(struct dentry *dentry, struct iattr *attr) +#endif +{ + struct exfat_sb_info *sbi = EXFAT_SB(dentry->d_sb); + struct inode *inode = dentry->d_inode; + unsigned int ia_valid; + int error; + + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size > i_size_read(inode)) { + error = exfat_cont_expand(inode, attr->ia_size); + if (error || attr->ia_valid == ATTR_SIZE) + return error; + attr->ia_valid &= ~ATTR_SIZE; + } + + /* Check for setting the inode time. */ + ia_valid = attr->ia_valid; + if ((ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) && + exfat_allow_set_time(sbi, inode)) { + attr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET | + ATTR_TIMES_SET); + } + +#if ((LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0)) && \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 37))) || \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0) + error = setattr_prepare(&init_user_ns, dentry, attr); +#else + error = setattr_prepare(dentry, attr); +#endif +#else + error = inode_change_ok(inode, attr); +#endif + attr->ia_valid = ia_valid; + if (error) + goto out; + + if (((attr->ia_valid & ATTR_UID) && + !uid_eq(attr->ia_uid, sbi->options.fs_uid)) || + ((attr->ia_valid & ATTR_GID) && + !gid_eq(attr->ia_gid, sbi->options.fs_gid)) || + ((attr->ia_valid & ATTR_MODE) && + (attr->ia_mode & ~(S_IFREG | S_IFLNK | S_IFDIR | 0777)))) { + error = -EPERM; + goto out; + } + + /* + * We don't return -EPERM here. Yes, strange, but this is too + * old behavior. + */ + if (attr->ia_valid & ATTR_MODE) { + if (exfat_sanitize_mode(sbi, inode, &attr->ia_mode) < 0) + attr->ia_valid &= ~ATTR_MODE; + } + + if (attr->ia_valid & ATTR_SIZE) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + inode->i_mtime = inode->i_ctime = current_time(inode); +#else + inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0) + setattr_copy(&init_user_ns, inode, attr); +#else + setattr_copy(inode, attr); +#endif + exfat_truncate_atime(&inode->i_atime); + + if (attr->ia_valid & ATTR_SIZE) { + error = exfat_block_truncate_page(inode, attr->ia_size); + if (error) + goto out; + + down_write(&EXFAT_I(inode)->truncate_lock); + truncate_setsize(inode, attr->ia_size); + /* + * __exfat_write_inode() is called from exfat_truncate(), inode + * is already written by it, so mark_inode_dirty() is unneeded. + */ + exfat_truncate(inode, attr->ia_size); + up_write(&EXFAT_I(inode)->truncate_lock); + } else + mark_inode_dirty(inode); +out: + return error; +} + +static int exfat_ioctl_fitrim(struct inode *inode, unsigned long arg) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 19, 0) + struct request_queue *q = bdev_get_queue(inode->i_sb->s_bdev); +#endif + struct fstrim_range range; + int ret = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) + if (!bdev_max_discard_sectors(inode->i_sb->s_bdev)) +#else + if (!blk_queue_discard(q)) +#endif + return -EOPNOTSUPP; + + if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) + return -EFAULT; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) + range.minlen = max_t(unsigned int, range.minlen, + bdev_discard_granularity(inode->i_sb->s_bdev)); +#else + range.minlen = max_t(unsigned int, range.minlen, + q->limits.discard_granularity); +#endif + + ret = exfat_trim_fs(inode, &range); + if (ret < 0) + return ret; + + if (copy_to_user((struct fstrim_range __user *)arg, &range, sizeof(range))) + return -EFAULT; + + return 0; +} + +long exfat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + + switch (cmd) { + case FITRIM: + return exfat_ioctl_fitrim(inode, arg); + default: + return -ENOTTY; + } +} + +#ifdef CONFIG_COMPAT +long exfat_compat_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + return exfat_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)); +} +#endif + +int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync) +{ + struct inode *inode = filp->f_mapping->host; + int err; + + err = __generic_file_fsync(filp, start, end, datasync); + if (err) + return err; + + err = sync_blockdev(inode->i_sb->s_bdev); + if (err) + return err; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0) + return blkdev_issue_flush(inode->i_sb->s_bdev); +#else +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) + return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL); +#else + return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); +#endif +#endif +} + +const struct file_operations exfat_file_operations = { + .llseek = generic_file_llseek, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, + .unlocked_ioctl = exfat_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = exfat_compat_ioctl, +#endif + .mmap = generic_file_mmap, + .fsync = exfat_file_fsync, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, +}; + +const struct inode_operations exfat_file_inode_operations = { + .setattr = exfat_setattr, + .getattr = exfat_getattr, +}; diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c new file mode 100644 index 000000000000..f3dbeb300c64 --- /dev/null +++ b/fs/exfat/inode.c @@ -0,0 +1,706 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) +#include +#endif +#include "exfat_raw.h" +#include "exfat_fs.h" + +int __exfat_write_inode(struct inode *inode, int sync) +{ + unsigned long long on_disk_size; + struct exfat_dentry *ep, *ep2; + struct exfat_entry_set_cache *es = NULL; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *ei = EXFAT_I(inode); + bool is_dir = (ei->type == TYPE_DIR) ? true : false; + + if (inode->i_ino == EXFAT_ROOT_INO) + return 0; + + /* + * If the inode is already unlinked, there is no need for updating it. + */ + if (ei->dir.dir == DIR_DELETED) + return 0; + + if (is_dir && ei->dir.dir == sbi->root_dir && ei->entry == -1) + return 0; + + exfat_set_volume_dirty(sb); + + /* get the directory entry of given file or directory */ + es = exfat_get_dentry_set(sb, &(ei->dir), ei->entry, ES_ALL_ENTRIES); + if (!es) + return -EIO; + ep = exfat_get_dentry_cached(es, 0); + ep2 = exfat_get_dentry_cached(es, 1); + + ep->dentry.file.attr = cpu_to_le16(exfat_make_attr(inode)); + + /* set FILE_INFO structure using the acquired struct exfat_dentry */ + exfat_set_entry_time(sbi, &ei->i_crtime, + &ep->dentry.file.create_tz, + &ep->dentry.file.create_time, + &ep->dentry.file.create_date, + &ep->dentry.file.create_time_cs); + exfat_set_entry_time(sbi, &inode->i_mtime, + &ep->dentry.file.modify_tz, + &ep->dentry.file.modify_time, + &ep->dentry.file.modify_date, + &ep->dentry.file.modify_time_cs); + exfat_set_entry_time(sbi, &inode->i_atime, + &ep->dentry.file.access_tz, + &ep->dentry.file.access_time, + &ep->dentry.file.access_date, + NULL); + + /* File size should be zero if there is no cluster allocated */ + on_disk_size = i_size_read(inode); + + if (ei->start_clu == EXFAT_EOF_CLUSTER) + on_disk_size = 0; + + ep2->dentry.stream.valid_size = cpu_to_le64(on_disk_size); + ep2->dentry.stream.size = ep2->dentry.stream.valid_size; + if (on_disk_size) { + ep2->dentry.stream.flags = ei->flags; + ep2->dentry.stream.start_clu = cpu_to_le32(ei->start_clu); + } else { + ep2->dentry.stream.flags = ALLOC_FAT_CHAIN; + ep2->dentry.stream.start_clu = EXFAT_FREE_CLUSTER; + } + + exfat_update_dir_chksum_with_entry_set(es); + return exfat_free_dentry_set(es, sync); +} + +int exfat_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + int ret; + + mutex_lock(&EXFAT_SB(inode->i_sb)->s_lock); + ret = __exfat_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); + mutex_unlock(&EXFAT_SB(inode->i_sb)->s_lock); + + return ret; +} + +void exfat_sync_inode(struct inode *inode) +{ + lockdep_assert_held(&EXFAT_SB(inode->i_sb)->s_lock); + __exfat_write_inode(inode, 1); +} + +/* + * Input: inode, (logical) clu_offset, target allocation area + * Output: errcode, cluster number + * *clu = (~0), if it's unable to allocate a new cluster + */ +static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset, + unsigned int *clu, int create) +{ + int ret; + unsigned int last_clu; + struct exfat_chain new_clu; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *ei = EXFAT_I(inode); + unsigned int local_clu_offset = clu_offset; + unsigned int num_to_be_allocated = 0, num_clusters = 0; + + if (ei->i_size_ondisk > 0) + num_clusters = + EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi); + + if (clu_offset >= num_clusters) + num_to_be_allocated = clu_offset - num_clusters + 1; + + if (!create && (num_to_be_allocated > 0)) { + *clu = EXFAT_EOF_CLUSTER; + return 0; + } + + *clu = last_clu = ei->start_clu; + + if (ei->flags == ALLOC_NO_FAT_CHAIN) { + if (clu_offset > 0 && *clu != EXFAT_EOF_CLUSTER) { + last_clu += clu_offset - 1; + + if (clu_offset == num_clusters) + *clu = EXFAT_EOF_CLUSTER; + else + *clu += clu_offset; + } + } else if (ei->type == TYPE_FILE) { + unsigned int fclus = 0; + int err = exfat_get_cluster(inode, clu_offset, + &fclus, clu, &last_clu, 1); + if (err) + return -EIO; + + clu_offset -= fclus; + } else { + /* hint information */ + if (clu_offset > 0 && ei->hint_bmap.off != EXFAT_EOF_CLUSTER && + ei->hint_bmap.off > 0 && clu_offset >= ei->hint_bmap.off) { + clu_offset -= ei->hint_bmap.off; + /* hint_bmap.clu should be valid */ + WARN_ON(ei->hint_bmap.clu < 2); + *clu = ei->hint_bmap.clu; + } + + while (clu_offset > 0 && *clu != EXFAT_EOF_CLUSTER) { + last_clu = *clu; + if (exfat_get_next_cluster(sb, clu)) + return -EIO; + clu_offset--; + } + } + + if (*clu == EXFAT_EOF_CLUSTER) { + exfat_set_volume_dirty(sb); + + new_clu.dir = (last_clu == EXFAT_EOF_CLUSTER) ? + EXFAT_EOF_CLUSTER : last_clu + 1; + new_clu.size = 0; + new_clu.flags = ei->flags; + + /* allocate a cluster */ + if (num_to_be_allocated < 1) { + /* Broken FAT (i_sze > allocated FAT) */ + exfat_fs_error(sb, "broken FAT chain."); + return -EIO; + } + + ret = exfat_alloc_cluster(inode, num_to_be_allocated, &new_clu, + inode_needs_sync(inode)); + if (ret) + return ret; + + if (new_clu.dir == EXFAT_EOF_CLUSTER || + new_clu.dir == EXFAT_FREE_CLUSTER) { + exfat_fs_error(sb, + "bogus cluster new allocated (last_clu : %u, new_clu : %u)", + last_clu, new_clu.dir); + return -EIO; + } + + /* append to the FAT chain */ + if (last_clu == EXFAT_EOF_CLUSTER) { + if (new_clu.flags == ALLOC_FAT_CHAIN) + ei->flags = ALLOC_FAT_CHAIN; + ei->start_clu = new_clu.dir; + } else { + if (new_clu.flags != ei->flags) { + /* no-fat-chain bit is disabled, + * so fat-chain should be synced with + * alloc-bitmap + */ + exfat_chain_cont_cluster(sb, ei->start_clu, + num_clusters); + ei->flags = ALLOC_FAT_CHAIN; + } + if (new_clu.flags == ALLOC_FAT_CHAIN) + if (exfat_ent_set(sb, last_clu, new_clu.dir)) + return -EIO; + } + + num_clusters += num_to_be_allocated; + *clu = new_clu.dir; + + inode->i_blocks += + num_to_be_allocated << sbi->sect_per_clus_bits; + + /* + * Move *clu pointer along FAT chains (hole care) because the + * caller of this function expect *clu to be the last cluster. + * This only works when num_to_be_allocated >= 2, + * *clu = (the first cluster of the allocated chain) => + * (the last cluster of ...) + */ + if (ei->flags == ALLOC_NO_FAT_CHAIN) { + *clu += num_to_be_allocated - 1; + } else { + while (num_to_be_allocated > 1) { + if (exfat_get_next_cluster(sb, clu)) + return -EIO; + num_to_be_allocated--; + } + } + + } + + /* hint information */ + ei->hint_bmap.off = local_clu_offset; + ei->hint_bmap.clu = *clu; + + return 0; +} + +static int exfat_map_new_buffer(struct exfat_inode_info *ei, + struct buffer_head *bh, loff_t pos) +{ + if (buffer_delay(bh) && pos > ei->i_size_aligned) + return -EIO; + set_buffer_new(bh); + + /* + * Adjust i_size_aligned if i_size_ondisk is bigger than it. + */ + if (ei->i_size_ondisk > ei->i_size_aligned) + ei->i_size_aligned = ei->i_size_ondisk; + return 0; +} + +static int exfat_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + struct exfat_inode_info *ei = EXFAT_I(inode); + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; + int err = 0; + unsigned long mapped_blocks = 0; + unsigned int cluster, sec_offset; + sector_t last_block; + sector_t phys = 0; + loff_t pos; + + mutex_lock(&sbi->s_lock); + last_block = EXFAT_B_TO_BLK_ROUND_UP(i_size_read(inode), sb); + if (iblock >= last_block && !create) + goto done; + + /* Is this block already allocated? */ + err = exfat_map_cluster(inode, iblock >> sbi->sect_per_clus_bits, + &cluster, create); + if (err) { + if (err != -ENOSPC) + exfat_fs_error_ratelimit(sb, + "failed to bmap (inode : %p iblock : %llu, err : %d)", + inode, (unsigned long long)iblock, err); + goto unlock_ret; + } + + if (cluster == EXFAT_EOF_CLUSTER) + goto done; + + /* sector offset in cluster */ + sec_offset = iblock & (sbi->sect_per_clus - 1); + + phys = exfat_cluster_to_sector(sbi, cluster) + sec_offset; + mapped_blocks = sbi->sect_per_clus - sec_offset; + max_blocks = min(mapped_blocks, max_blocks); + + /* Treat newly added block / cluster */ + if (iblock < last_block) + create = 0; + + if (create || buffer_delay(bh_result)) { + pos = EXFAT_BLK_TO_B((iblock + 1), sb); + if (ei->i_size_ondisk < pos) + ei->i_size_ondisk = pos; + } + + if (create) { + err = exfat_map_new_buffer(ei, bh_result, pos); + if (err) { + exfat_fs_error(sb, + "requested for bmap out of range(pos : (%llu) > i_size_aligned(%llu)\n", + pos, ei->i_size_aligned); + goto unlock_ret; + } + } + + if (buffer_delay(bh_result)) + clear_buffer_delay(bh_result); + map_bh(bh_result, sb, phys); +done: + bh_result->b_size = EXFAT_BLK_TO_B(max_blocks, sb); +unlock_ret: + mutex_unlock(&sbi->s_lock); + return err; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) +static int exfat_read_folio(struct file *file, struct folio *folio) +{ + return mpage_read_folio(folio, exfat_get_block); +} +#else +static int exfat_readpage(struct file *file, struct page *page) +{ + return mpage_readpage(page, exfat_get_block); +} +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) +static void exfat_readahead(struct readahead_control *rac) +{ + mpage_readahead(rac, exfat_get_block); +} +#else +static int exfat_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned int nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, exfat_get_block); +} +#endif + +static int exfat_writepage(struct page *page, struct writeback_control *wbc) +{ + return block_write_full_page(page, exfat_get_block, wbc); +} + +static int exfat_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + return mpage_writepages(mapping, wbc, exfat_get_block); +} + +static void exfat_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > i_size_read(inode)) { + truncate_pagecache(inode, i_size_read(inode)); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + inode->i_mtime = inode->i_ctime = current_time(inode); +#else + inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; +#endif + exfat_truncate(inode, EXFAT_I(inode)->i_size_aligned); + } +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) +static int exfat_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned int len, + struct page **pagep, void **fsdata) +#else +static int exfat_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned int len, unsigned int flags, + struct page **pagep, void **fsdata) +#endif +{ + int ret; + + *pagep = NULL; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) + ret = cont_write_begin(file, mapping, pos, len, pagep, fsdata, + exfat_get_block, + &EXFAT_I(mapping->host)->i_size_ondisk); +#else + ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + exfat_get_block, + &EXFAT_I(mapping->host)->i_size_ondisk); +#endif + if (ret < 0) + exfat_write_failed(mapping, pos+len); + + return ret; +} + +static int exfat_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned int len, unsigned int copied, + struct page *pagep, void *fsdata) +{ + struct inode *inode = mapping->host; + struct exfat_inode_info *ei = EXFAT_I(inode); + int err; + + err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata); + + if (ei->i_size_aligned < i_size_read(inode)) { + exfat_fs_error(inode->i_sb, + "invalid size(size(%llu) > aligned(%llu)\n", + i_size_read(inode), ei->i_size_aligned); + return -EIO; + } + + if (err < len) + exfat_write_failed(mapping, pos+len); + + if (!(err < 0) && !(ei->attr & ATTR_ARCHIVE)) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + inode->i_mtime = inode->i_ctime = current_time(inode); +#else + inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; +#endif + ei->attr |= ATTR_ARCHIVE; + mark_inode_dirty(inode); + } + + return err; +} + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) +static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter) +#else +static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) +#endif +{ + struct address_space *mapping = iocb->ki_filp->f_mapping; + struct inode *inode = mapping->host; + loff_t size = iocb->ki_pos + iov_iter_count(iter); + int rw = iov_iter_rw(iter); + ssize_t ret; + + if (rw == WRITE) { + /* + * FIXME: blockdev_direct_IO() doesn't use ->write_begin(), + * so we need to update the ->i_size_aligned to block boundary. + * + * But we must fill the remaining area or hole by nul for + * updating ->i_size_aligned + * + * Return 0, and fallback to normal buffered write. + */ + if (EXFAT_I(inode)->i_size_aligned < size) + return 0; + } + + /* + * Need to use the DIO_LOCKING for avoiding the race + * condition of exfat_get_block() and ->truncate(). + */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) + ret = blockdev_direct_IO(iocb, inode, iter, exfat_get_block); +#else + ret = blockdev_direct_IO(iocb, inode, iter, offset, exfat_get_block); +#endif + if (ret < 0 && (rw & WRITE)) + exfat_write_failed(mapping, size); + return ret; +} + +static sector_t exfat_aop_bmap(struct address_space *mapping, sector_t block) +{ + sector_t blocknr; + + /* exfat_get_cluster() assumes the requested blocknr isn't truncated. */ + down_read(&EXFAT_I(mapping->host)->truncate_lock); + blocknr = generic_block_bmap(mapping, block, exfat_get_block); + up_read(&EXFAT_I(mapping->host)->truncate_lock); + return blocknr; +} + +/* + * exfat_block_truncate_page() zeroes out a mapping from file offset `from' + * up to the end of the block which corresponds to `from'. + * This is required during truncate to physically zeroout the tail end + * of that block so it doesn't yield old data if the file is later grown. + * Also, avoid causing failure from fsx for cases of "data past EOF" + */ +int exfat_block_truncate_page(struct inode *inode, loff_t from) +{ + return block_truncate_page(inode->i_mapping, from, exfat_get_block); +} + +static const struct address_space_operations exfat_aops = { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 18, 0) + .dirty_folio = block_dirty_folio, +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(5, 14, 0) + .set_page_dirty = __set_page_dirty_buffers, +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 18, 0) + .invalidate_folio = block_invalidate_folio, +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) + .read_folio = exfat_read_folio, +#else + .readpage = exfat_readpage, +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) + .readahead = exfat_readahead, +#else + .readpages = exfat_readpages, +#endif + .writepage = exfat_writepage, + .writepages = exfat_writepages, + .write_begin = exfat_write_begin, + .write_end = exfat_write_end, + .direct_IO = exfat_direct_IO, + .bmap = exfat_aop_bmap +}; + +static inline unsigned long exfat_hash(loff_t i_pos) +{ + return hash_32(i_pos, EXFAT_HASH_BITS); +} + +void exfat_hash_inode(struct inode *inode, loff_t i_pos) +{ + struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb); + struct hlist_head *head = sbi->inode_hashtable + exfat_hash(i_pos); + + spin_lock(&sbi->inode_hash_lock); + EXFAT_I(inode)->i_pos = i_pos; + hlist_add_head(&EXFAT_I(inode)->i_hash_fat, head); + spin_unlock(&sbi->inode_hash_lock); +} + +void exfat_unhash_inode(struct inode *inode) +{ + struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb); + + spin_lock(&sbi->inode_hash_lock); + hlist_del_init(&EXFAT_I(inode)->i_hash_fat); + EXFAT_I(inode)->i_pos = 0; + spin_unlock(&sbi->inode_hash_lock); +} + +struct inode *exfat_iget(struct super_block *sb, loff_t i_pos) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *info; + struct hlist_head *head = sbi->inode_hashtable + exfat_hash(i_pos); + struct inode *inode = NULL; + + spin_lock(&sbi->inode_hash_lock); + hlist_for_each_entry(info, head, i_hash_fat) { + WARN_ON(info->vfs_inode.i_sb != sb); + + if (i_pos != info->i_pos) + continue; + inode = igrab(&info->vfs_inode); + if (inode) + break; + } + spin_unlock(&sbi->inode_hash_lock); + return inode; +} + +/* doesn't deal with root inode */ +static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info) +{ + struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb); + struct exfat_inode_info *ei = EXFAT_I(inode); + loff_t size = info->size; + + ei->dir = info->dir; + ei->entry = info->entry; + ei->attr = info->attr; + ei->start_clu = info->start_clu; + ei->flags = info->flags; + ei->type = info->type; + + ei->version = 0; + ei->hint_stat.eidx = 0; + ei->hint_stat.clu = info->start_clu; + ei->hint_femp.eidx = EXFAT_HINT_NONE; + ei->hint_bmap.off = EXFAT_EOF_CLUSTER; + ei->i_pos = 0; + + inode->i_uid = sbi->options.fs_uid; + inode->i_gid = sbi->options.fs_gid; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) + inode_inc_iversion(inode); +#else + inode->i_version++; +#endif + inode->i_generation = prandom_u32(); + + if (info->attr & ATTR_SUBDIR) { /* directory */ + inode->i_generation &= ~1; + inode->i_mode = exfat_make_mode(sbi, info->attr, 0777); + inode->i_op = &exfat_dir_inode_operations; + inode->i_fop = &exfat_dir_operations; + set_nlink(inode, info->num_subdirs); + } else { /* regular file */ + inode->i_generation |= 1; + inode->i_mode = exfat_make_mode(sbi, info->attr, 0777); + inode->i_op = &exfat_file_inode_operations; + inode->i_fop = &exfat_file_operations; + inode->i_mapping->a_ops = &exfat_aops; + inode->i_mapping->nrpages = 0; + } + + i_size_write(inode, size); + + /* ondisk and aligned size should be aligned with block size */ + if (size & (inode->i_sb->s_blocksize - 1)) { + size |= (inode->i_sb->s_blocksize - 1); + size++; + } + + ei->i_size_aligned = size; + ei->i_size_ondisk = size; + + exfat_save_attr(inode, info->attr); + + inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> + inode->i_blkbits; + inode->i_mtime = info->mtime; + inode->i_ctime = info->mtime; + ei->i_crtime = info->crtime; + inode->i_atime = info->atime; + + return 0; +} + +struct inode *exfat_build_inode(struct super_block *sb, + struct exfat_dir_entry *info, loff_t i_pos) +{ + struct inode *inode; + int err; + + inode = exfat_iget(sb, i_pos); + if (inode) + goto out; + inode = new_inode(sb); + if (!inode) { + inode = ERR_PTR(-ENOMEM); + goto out; + } + inode->i_ino = iunique(sb, EXFAT_ROOT_INO); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) + inode_set_iversion(inode, 1); +#else + inode->i_version = 1; +#endif + err = exfat_fill_inode(inode, info); + if (err) { + iput(inode); + inode = ERR_PTR(err); + goto out; + } + exfat_hash_inode(inode, i_pos); + insert_inode_hash(inode); +out: + return inode; +} + +void exfat_evict_inode(struct inode *inode) +{ + truncate_inode_pages(&inode->i_data, 0); + + if (!inode->i_nlink) { + i_size_write(inode, 0); + mutex_lock(&EXFAT_SB(inode->i_sb)->s_lock); + __exfat_truncate(inode, 0); + mutex_unlock(&EXFAT_SB(inode->i_sb)->s_lock); + } + + invalidate_inode_buffers(inode); + clear_inode(inode); + exfat_cache_inval_inode(inode); + exfat_unhash_inode(inode); +} diff --git a/fs/exfat/misc.c b/fs/exfat/misc.c new file mode 100644 index 000000000000..e7ae6f629d6e --- /dev/null +++ b/fs/exfat/misc.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Written 1992,1993 by Werner Almesberger + * 22/11/2000 - Fixed fat_date_unix2dos for dates earlier than 01/01/1980 + * and date_dos2unix for date==0 by Igor Zhbanov(bsg@uniyar.ac.ru) + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#include +#include +#include +#include +#include + +#include "exfat_raw.h" +#include "exfat_fs.h" + +/* + * exfat_fs_error reports a file system problem that might indicate fa data + * corruption/inconsistency. Depending on 'errors' mount option the + * panic() is called, or error message is printed FAT and nothing is done, + * or filesystem is remounted read-only (default behavior). + * In case the file system is remounted read-only, it can be made writable + * again by remounting it. + */ +void __exfat_fs_error(struct super_block *sb, int report, const char *fmt, ...) +{ + struct exfat_mount_options *opts = &EXFAT_SB(sb)->options; + va_list args; + struct va_format vaf; + + if (report) { + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + exfat_err(sb, "error, %pV", &vaf); + va_end(args); + } + + if (opts->errors == EXFAT_ERRORS_PANIC) { + panic("exFAT-fs (%s): fs panic from previous error\n", + sb->s_id); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0) + } else if (opts->errors == EXFAT_ERRORS_RO && !sb_rdonly(sb)) { + sb->s_flags |= SB_RDONLY; +#else + } else if (opts->errors == EXFAT_ERRORS_RO && + !(sb->s_flags & MS_RDONLY)) { + sb->s_flags |= MS_RDONLY; +#endif + exfat_err(sb, "Filesystem has been set read-only"); + } +} + +#define SECS_PER_MIN (60) +#define TIMEZONE_SEC(x) ((x) * 15 * SECS_PER_MIN) + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0) +static void exfat_adjust_tz(struct timespec64 *ts, u8 tz_off) +#else +static void exfat_adjust_tz(struct timespec *ts, u8 tz_off) +#endif +{ + if (tz_off <= 0x3F) + ts->tv_sec -= TIMEZONE_SEC(tz_off); + else /* 0x40 <= (tz_off & 0x7F) <=0x7F */ + ts->tv_sec += TIMEZONE_SEC(0x80 - tz_off); +} + +static inline int exfat_tz_offset(struct exfat_sb_info *sbi) +{ + if (sbi->options.sys_tz) + return -sys_tz.tz_minuteswest; + return sbi->options.time_offset; +} + +/* Convert a EXFAT time/date pair to a UNIX date (seconds since 1 1 70). */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0) +void exfat_get_entry_time(struct exfat_sb_info *sbi, struct timespec64 *ts, + u8 tz, __le16 time, __le16 date, u8 time_cs) +#else +void exfat_get_entry_time(struct exfat_sb_info *sbi, struct timespec *ts, + u8 tz, __le16 time, __le16 date, u8 time_cs) +#endif +{ + u16 t = le16_to_cpu(time); + u16 d = le16_to_cpu(date); + + ts->tv_sec = mktime64(1980 + (d >> 9), d >> 5 & 0x000F, d & 0x001F, + t >> 11, (t >> 5) & 0x003F, (t & 0x001F) << 1); + + + /* time_cs field represent 0 ~ 199cs(1990 ms) */ + if (time_cs) { + ts->tv_sec += time_cs / 100; + ts->tv_nsec = (time_cs % 100) * 10 * NSEC_PER_MSEC; + } else + ts->tv_nsec = 0; + + if (tz & EXFAT_TZ_VALID) + /* Adjust timezone to UTC0. */ + exfat_adjust_tz(ts, tz & ~EXFAT_TZ_VALID); + else + ts->tv_sec -= exfat_tz_offset(sbi) * SECS_PER_MIN; +} + +/* Convert linear UNIX date to a EXFAT time/date pair. */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0) +void exfat_set_entry_time(struct exfat_sb_info *sbi, struct timespec64 *ts, + u8 *tz, __le16 *time, __le16 *date, u8 *time_cs) +#else +#undef EXFAT_MAX_TIMESTAMP_SECS +#define EXFAT_MAX_TIMESTAMP_SECS 0xffffffff +void exfat_set_entry_time(struct exfat_sb_info *sbi, struct timespec *ts, + u8 *tz, __le16 *time, __le16 *date, u8 *time_cs) +#endif +{ + struct tm tm; + u16 t, d; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0) + if (ts->tv_sec < EXFAT_MIN_TIMESTAMP_SECS) { + ts->tv_sec = EXFAT_MIN_TIMESTAMP_SECS; + ts->tv_nsec = 0; + } + else if (ts->tv_sec > EXFAT_MAX_TIMESTAMP_SECS) { + ts->tv_sec = EXFAT_MAX_TIMESTAMP_SECS; + ts->tv_nsec = 0; + } +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) + time64_to_tm(ts->tv_sec, 0, &tm); +#else + time_to_tm(ts->tv_sec, 0, &tm); +#endif + t = (tm.tm_hour << 11) | (tm.tm_min << 5) | (tm.tm_sec >> 1); + d = ((tm.tm_year - 80) << 9) | ((tm.tm_mon + 1) << 5) | tm.tm_mday; + + *time = cpu_to_le16(t); + *date = cpu_to_le16(d); + + /* time_cs field represent 0 ~ 199cs(1990 ms) */ + if (time_cs) + *time_cs = (tm.tm_sec & 1) * 100 + + ts->tv_nsec / (10 * NSEC_PER_MSEC); + + /* + * Record 00h value for OffsetFromUtc field and 1 value for OffsetValid + * to indicate that local time and UTC are the same. + */ + *tz = EXFAT_TZ_VALID; +} + +/* atime has only a 2-second resolution */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0) +void exfat_truncate_atime(struct timespec64 *ts) +#else +void exfat_truncate_atime(struct timespec *ts) +#endif +{ + ts->tv_sec = round_down(ts->tv_sec, 2); + ts->tv_nsec = 0; +} + +u16 exfat_calc_chksum16(void *data, int len, u16 chksum, int type) +{ + int i; + u8 *c = (u8 *)data; + + for (i = 0; i < len; i++, c++) { + if (unlikely(type == CS_DIR_ENTRY && (i == 2 || i == 3))) + continue; + chksum = ((chksum << 15) | (chksum >> 1)) + *c; + } + return chksum; +} + +u32 exfat_calc_chksum32(void *data, int len, u32 chksum, int type) +{ + int i; + u8 *c = (u8 *)data; + + for (i = 0; i < len; i++, c++) { + if (unlikely(type == CS_BOOT_SECTOR && + (i == 106 || i == 107 || i == 112))) + continue; + chksum = ((chksum << 31) | (chksum >> 1)) + *c; + } + return chksum; +} + +void exfat_update_bh(struct buffer_head *bh, int sync) +{ + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + + if (sync) + sync_dirty_buffer(bh); +} + +int exfat_update_bhs(struct buffer_head **bhs, int nr_bhs, int sync) +{ + int i, err = 0; + + for (i = 0; i < nr_bhs; i++) { + set_buffer_uptodate(bhs[i]); + mark_buffer_dirty(bhs[i]); + if (sync) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) + write_dirty_buffer(bhs[i], REQ_SYNC); +#else + write_dirty_buffer(bhs[i], WRITE_SYNC); +#endif + } + + for (i = 0; i < nr_bhs && sync; i++) { + wait_on_buffer(bhs[i]); + if (!err && !buffer_uptodate(bhs[i])) + err = -EIO; + } + return err; +} + +void exfat_chain_set(struct exfat_chain *ec, unsigned int dir, + unsigned int size, unsigned char flags) +{ + ec->dir = dir; + ec->size = size; + ec->flags = flags; +} + +void exfat_chain_dup(struct exfat_chain *dup, struct exfat_chain *ec) +{ + return exfat_chain_set(dup, ec->dir, ec->size, ec->flags); +} diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c new file mode 100644 index 000000000000..3342f29a7683 --- /dev/null +++ b/fs/exfat/namei.c @@ -0,0 +1,1531 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) +#include +#endif +#include +#include +#include +#include + +#include "exfat_raw.h" +#include "exfat_fs.h" + +static inline unsigned long exfat_d_version(struct dentry *dentry) +{ + return (unsigned long) dentry->d_fsdata; +} + +static inline void exfat_d_version_set(struct dentry *dentry, + unsigned long version) +{ + dentry->d_fsdata = (void *) version; +} + +/* + * If new entry was created in the parent, it could create the 8.3 alias (the + * shortname of logname). So, the parent may have the negative-dentry which + * matches the created 8.3 alias. + * + * If it happened, the negative dentry isn't actually negative anymore. So, + * drop it. + */ +static int exfat_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + int ret = 1; + + if (flags & LOOKUP_RCU) + return -ECHILD; + + /* + * This is not negative dentry. Always valid. + * + * Note, rename() to existing directory entry will have ->d_inode, and + * will use existing name which isn't specified name by user. + * + * We may be able to drop this positive dentry here. But dropping + * positive dentry isn't good idea. So it's unsupported like + * rename("filename", "FILENAME") for now. + */ + if (d_really_is_positive(dentry)) + return 1; + + /* + * Drop the negative dentry, in order to make sure to use the case + * sensitive name which is specified by user if this is for creation. + */ + if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) + return 0; + + spin_lock(&dentry->d_lock); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) + ret = inode_eq_iversion(d_inode(dentry->d_parent), + exfat_d_version(dentry)); +#else + if (dentry->d_parent->d_inode->i_version != exfat_d_version(dentry)) + ret = 0; +#endif + spin_unlock(&dentry->d_lock); + return ret; +} + +/* returns the length of a struct qstr, ignoring trailing dots if necessary */ +static unsigned int exfat_striptail_len(unsigned int len, const char *name, + bool keep_last_dots) +{ + if (!keep_last_dots) { + while (len && name[len - 1] == '.') + len--; + } + return len; +} + +/* + * Compute the hash for the exfat name corresponding to the dentry. If the name + * is invalid, we leave the hash code unchanged so that the existing dentry can + * be used. The exfat fs routines will return ENOENT or EINVAL as appropriate. + */ +static int exfat_d_hash(const struct dentry *dentry, struct qstr *qstr) +{ + struct super_block *sb = dentry->d_sb; + struct nls_table *t = EXFAT_SB(sb)->nls_io; + const unsigned char *name = qstr->name; + unsigned int len = exfat_striptail_len(qstr->len, qstr->name, + EXFAT_SB(sb)->options.keep_last_dots); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) + unsigned long hash = init_name_hash(dentry); +#else + unsigned long hash = init_name_hash(); +#endif + int i, charlen; + wchar_t c; + + for (i = 0; i < len; i += charlen) { + charlen = t->char2uni(&name[i], len - i, &c); + if (charlen < 0) + return charlen; + hash = partial_name_hash(exfat_toupper(sb, c), hash); + } + + qstr->hash = end_name_hash(hash); + return 0; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) +static int exfat_d_cmp(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name) +#else +static int exfat_d_cmp(const struct dentry *parent, const struct dentry *dentry, + unsigned int len, const char *str, const struct qstr *name) +#endif +{ + struct super_block *sb = dentry->d_sb; + struct nls_table *t = EXFAT_SB(sb)->nls_io; + unsigned int alen = exfat_striptail_len(name->len, name->name, + EXFAT_SB(sb)->options.keep_last_dots); + unsigned int blen = exfat_striptail_len(len, str, + EXFAT_SB(sb)->options.keep_last_dots); + wchar_t c1, c2; + int charlen, i; + + if (alen != blen) + return 1; + + for (i = 0; i < len; i += charlen) { + charlen = t->char2uni(&name->name[i], alen - i, &c1); + if (charlen < 0) + return 1; + if (charlen != t->char2uni(&str[i], blen - i, &c2)) + return 1; + + if (exfat_toupper(sb, c1) != exfat_toupper(sb, c2)) + return 1; + } + + return 0; +} + +const struct dentry_operations exfat_dentry_ops = { + .d_revalidate = exfat_d_revalidate, + .d_hash = exfat_d_hash, + .d_compare = exfat_d_cmp, +}; + +static int exfat_utf8_d_hash(const struct dentry *dentry, struct qstr *qstr) +{ + struct super_block *sb = dentry->d_sb; + const unsigned char *name = qstr->name; + unsigned int len = exfat_striptail_len(qstr->len, qstr->name, + EXFAT_SB(sb)->options.keep_last_dots); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) + unsigned long hash = init_name_hash(dentry); +#else + unsigned long hash = init_name_hash(); +#endif + int i, charlen; + unicode_t u; + + for (i = 0; i < len; i += charlen) { + charlen = utf8_to_utf32(&name[i], len - i, &u); + if (charlen < 0) + return charlen; + + /* + * exfat_toupper() works only for code points up to the U+FFFF. + */ + hash = partial_name_hash(u <= 0xFFFF ? exfat_toupper(sb, u) : u, + hash); + } + + qstr->hash = end_name_hash(hash); + return 0; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) +static int exfat_utf8_d_cmp(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name) +#else +static int exfat_utf8_d_cmp(const struct dentry *parent, + const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name) +#endif +{ + struct super_block *sb = dentry->d_sb; + unsigned int alen = exfat_striptail_len(name->len, name->name, + EXFAT_SB(sb)->options.keep_last_dots); + unsigned int blen = exfat_striptail_len(len, str, + EXFAT_SB(sb)->options.keep_last_dots); + + unicode_t u_a, u_b; + int charlen, i; + + if (alen != blen) + return 1; + + for (i = 0; i < alen; i += charlen) { + charlen = utf8_to_utf32(&name->name[i], alen - i, &u_a); + if (charlen < 0) + return 1; + if (charlen != utf8_to_utf32(&str[i], blen - i, &u_b)) + return 1; + + if (u_a <= 0xFFFF && u_b <= 0xFFFF) { + if (exfat_toupper(sb, u_a) != exfat_toupper(sb, u_b)) + return 1; + } else { + if (u_a != u_b) + return 1; + } + } + + return 0; +} + +const struct dentry_operations exfat_utf8_dentry_ops = { + .d_revalidate = exfat_d_revalidate, + .d_hash = exfat_utf8_d_hash, + .d_compare = exfat_utf8_d_cmp, +}; + +/* used only in search empty_slot() */ +#define CNT_UNUSED_NOHIT (-1) +#define CNT_UNUSED_HIT (-2) +/* search EMPTY CONTINUOUS "num_entries" entries */ +static int exfat_search_empty_slot(struct super_block *sb, + struct exfat_hint_femp *hint_femp, struct exfat_chain *p_dir, + int num_entries) +{ + int i, dentry, num_empty = 0; + int dentries_per_clu; + unsigned int type; + struct exfat_chain clu; + struct exfat_dentry *ep; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct buffer_head *bh; + + dentries_per_clu = sbi->dentries_per_clu; + + if (hint_femp->eidx != EXFAT_HINT_NONE) { + dentry = hint_femp->eidx; + if (num_entries <= hint_femp->count) { + hint_femp->eidx = EXFAT_HINT_NONE; + return dentry; + } + + exfat_chain_dup(&clu, &hint_femp->cur); + } else { + exfat_chain_dup(&clu, p_dir); + dentry = 0; + } + + while (clu.dir != EXFAT_EOF_CLUSTER) { + i = dentry & (dentries_per_clu - 1); + + for (; i < dentries_per_clu; i++, dentry++) { + ep = exfat_get_dentry(sb, &clu, i, &bh); + if (!ep) + return -EIO; + type = exfat_get_entry_type(ep); + brelse(bh); + + if (type == TYPE_UNUSED || type == TYPE_DELETED) { + num_empty++; + if (hint_femp->eidx == EXFAT_HINT_NONE) { + hint_femp->eidx = dentry; + hint_femp->count = CNT_UNUSED_NOHIT; + exfat_chain_set(&hint_femp->cur, + clu.dir, clu.size, clu.flags); + } + + if (type == TYPE_UNUSED && + hint_femp->count != CNT_UNUSED_HIT) + hint_femp->count = CNT_UNUSED_HIT; + } else { + if (hint_femp->eidx != EXFAT_HINT_NONE && + hint_femp->count == CNT_UNUSED_HIT) { + /* unused empty group means + * an empty group which includes + * unused dentry + */ + exfat_fs_error(sb, + "found bogus dentry(%d) beyond unused empty group(%d) (start_clu : %u, cur_clu : %u)", + dentry, hint_femp->eidx, + p_dir->dir, clu.dir); + return -EIO; + } + + num_empty = 0; + hint_femp->eidx = EXFAT_HINT_NONE; + } + + if (num_empty >= num_entries) { + /* found and invalidate hint_femp */ + hint_femp->eidx = EXFAT_HINT_NONE; + return (dentry - (num_entries - 1)); + } + } + + if (clu.flags == ALLOC_NO_FAT_CHAIN) { + if (--clu.size > 0) + clu.dir++; + else + clu.dir = EXFAT_EOF_CLUSTER; + } else { + if (exfat_get_next_cluster(sb, &clu.dir)) + return -EIO; + } + } + + return -ENOSPC; +} + +static int exfat_check_max_dentries(struct inode *inode) +{ + if (EXFAT_B_TO_DEN(i_size_read(inode)) >= MAX_EXFAT_DENTRIES) { + /* + * exFAT spec allows a dir to grow up to 8388608(256MB) + * dentries + */ + return -ENOSPC; + } + return 0; +} + +/* find empty directory entry. + * if there isn't any empty slot, expand cluster chain. + */ +static int exfat_find_empty_entry(struct inode *inode, + struct exfat_chain *p_dir, int num_entries) +{ + int dentry; + unsigned int ret, last_clu; + loff_t size = 0; + struct exfat_chain clu; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *ei = EXFAT_I(inode); + struct exfat_hint_femp hint_femp; + + hint_femp.eidx = EXFAT_HINT_NONE; + + if (ei->hint_femp.eidx != EXFAT_HINT_NONE) { + hint_femp = ei->hint_femp; + ei->hint_femp.eidx = EXFAT_HINT_NONE; + } + + while ((dentry = exfat_search_empty_slot(sb, &hint_femp, p_dir, + num_entries)) < 0) { + if (dentry == -EIO) + break; + + if (exfat_check_max_dentries(inode)) + return -ENOSPC; + + /* we trust p_dir->size regardless of FAT type */ + if (exfat_find_last_cluster(sb, p_dir, &last_clu)) + return -EIO; + + /* + * Allocate new cluster to this directory + */ + exfat_chain_set(&clu, last_clu + 1, 0, p_dir->flags); + + /* allocate a cluster */ + ret = exfat_alloc_cluster(inode, 1, &clu, IS_DIRSYNC(inode)); + if (ret) + return ret; + + if (exfat_zeroed_cluster(inode, clu.dir)) + return -EIO; + + /* append to the FAT chain */ + if (clu.flags != p_dir->flags) { + /* no-fat-chain bit is disabled, + * so fat-chain should be synced with alloc-bitmap + */ + exfat_chain_cont_cluster(sb, p_dir->dir, p_dir->size); + p_dir->flags = ALLOC_FAT_CHAIN; + hint_femp.cur.flags = ALLOC_FAT_CHAIN; + } + + if (clu.flags == ALLOC_FAT_CHAIN) + if (exfat_ent_set(sb, last_clu, clu.dir)) + return -EIO; + + if (hint_femp.eidx == EXFAT_HINT_NONE) { + /* the special case that new dentry + * should be allocated from the start of new cluster + */ + hint_femp.eidx = EXFAT_B_TO_DEN_IDX(p_dir->size, sbi); + hint_femp.count = sbi->dentries_per_clu; + + exfat_chain_set(&hint_femp.cur, clu.dir, 0, clu.flags); + } + hint_femp.cur.size++; + p_dir->size++; + size = EXFAT_CLU_TO_B(p_dir->size, sbi); + + /* directory inode should be updated in here */ + i_size_write(inode, size); + ei->i_size_ondisk += sbi->cluster_size; + ei->i_size_aligned += sbi->cluster_size; + ei->flags = p_dir->flags; + inode->i_blocks += 1 << sbi->sect_per_clus_bits; + } + + return dentry; +} + +/* + * Name Resolution Functions : + * Zero if it was successful; otherwise nonzero. + */ +static int __exfat_resolve_path(struct inode *inode, const unsigned char *path, + struct exfat_chain *p_dir, struct exfat_uni_name *p_uniname, + int lookup) +{ + int namelen; + int lossy = NLS_NAME_NO_LOSSY; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *ei = EXFAT_I(inode); + int pathlen = strlen(path); + + /* + * get the length of the pathname excluding + * trailing periods, if any. + */ + namelen = exfat_striptail_len(pathlen, path, false); + if (EXFAT_SB(sb)->options.keep_last_dots) { + /* + * Do not allow the creation of files with names + * ending with period(s). + */ + if (!lookup && (namelen < pathlen)) + return -EINVAL; + namelen = pathlen; + } + if (!namelen) + return -ENOENT; + if (pathlen > (MAX_NAME_LENGTH * MAX_CHARSET_SIZE)) + return -ENAMETOOLONG; + + /* + * strip all leading spaces : + * "MS windows 7" supports leading spaces. + * So we should skip this preprocessing for compatibility. + */ + + /* file name conversion : + * If lookup case, we allow bad-name for compatibility. + */ + namelen = exfat_nls_to_utf16(sb, path, namelen, p_uniname, + &lossy); + if (namelen < 0) + return namelen; /* return error value */ + + if ((lossy && !lookup) || !namelen) + return (lossy & NLS_NAME_OVERLEN) ? -ENAMETOOLONG : -EINVAL; + + exfat_chain_set(p_dir, ei->start_clu, + EXFAT_B_TO_CLU(i_size_read(inode), sbi), ei->flags); + + return 0; +} + +static inline int exfat_resolve_path(struct inode *inode, + const unsigned char *path, struct exfat_chain *dir, + struct exfat_uni_name *uni) +{ + return __exfat_resolve_path(inode, path, dir, uni, 0); +} + +static inline int exfat_resolve_path_for_lookup(struct inode *inode, + const unsigned char *path, struct exfat_chain *dir, + struct exfat_uni_name *uni) +{ + return __exfat_resolve_path(inode, path, dir, uni, 1); +} + +static inline loff_t exfat_make_i_pos(struct exfat_dir_entry *info) +{ + return ((loff_t) info->dir.dir << 32) | (info->entry & 0xffffffff); +} + +static int exfat_add_entry(struct inode *inode, const char *path, + struct exfat_chain *p_dir, unsigned int type, + struct exfat_dir_entry *info) +{ + int ret, dentry, num_entries; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_uni_name uniname; + struct exfat_chain clu; + int clu_size = 0; + unsigned int start_clu = EXFAT_FREE_CLUSTER; + + ret = exfat_resolve_path(inode, path, p_dir, &uniname); + if (ret) + goto out; + + num_entries = exfat_calc_num_entries(&uniname); + if (num_entries < 0) { + ret = num_entries; + goto out; + } + + /* exfat_find_empty_entry must be called before alloc_cluster() */ + dentry = exfat_find_empty_entry(inode, p_dir, num_entries); + if (dentry < 0) { + ret = dentry; /* -EIO or -ENOSPC */ + goto out; + } + + if (type == TYPE_DIR) { + ret = exfat_alloc_new_dir(inode, &clu); + if (ret) + goto out; + start_clu = clu.dir; + clu_size = sbi->cluster_size; + } + + /* update the directory entry */ + /* fill the dos name directory entry information of the created file. + * the first cluster is not determined yet. (0) + */ + ret = exfat_init_dir_entry(inode, p_dir, dentry, type, + start_clu, clu_size); + if (ret) + goto out; + + ret = exfat_init_ext_entry(inode, p_dir, dentry, num_entries, &uniname); + if (ret) + goto out; + + info->dir = *p_dir; + info->entry = dentry; + info->flags = ALLOC_NO_FAT_CHAIN; + info->type = type; + + if (type == TYPE_FILE) { + info->attr = ATTR_ARCHIVE; + info->start_clu = EXFAT_EOF_CLUSTER; + info->size = 0; + info->num_subdirs = 0; + } else { + info->attr = ATTR_SUBDIR; + info->start_clu = start_clu; + info->size = clu_size; + info->num_subdirs = EXFAT_MIN_SUBDIR; + } + memset(&info->crtime, 0, sizeof(info->crtime)); + memset(&info->mtime, 0, sizeof(info->mtime)); + memset(&info->atime, 0, sizeof(info->atime)); +out: + return ret; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0) +static int exfat_create(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, umode_t mode, bool excl) +#else +static int exfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool excl) +#endif +{ + struct super_block *sb = dir->i_sb; + struct inode *inode; + struct exfat_chain cdir; + struct exfat_dir_entry info; + loff_t i_pos; + int err; + + mutex_lock(&EXFAT_SB(sb)->s_lock); + exfat_set_volume_dirty(sb); + err = exfat_add_entry(dir, dentry->d_name.name, &cdir, TYPE_FILE, + &info); + if (err) + goto unlock; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) + inode_inc_iversion(dir); +#else + dir->i_version++; +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + dir->i_ctime = dir->i_mtime = current_time(dir); +#else + dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; +#endif + if (IS_DIRSYNC(dir)) + exfat_sync_inode(dir); + else + mark_inode_dirty(dir); + + i_pos = exfat_make_i_pos(&info); + inode = exfat_build_inode(sb, &info, i_pos); + err = PTR_ERR_OR_ZERO(inode); + if (err) + goto unlock; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) + inode_inc_iversion(inode); +#else + inode->i_version++; +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + inode->i_mtime = inode->i_atime = inode->i_ctime = + EXFAT_I(inode)->i_crtime = current_time(inode); +#else + inode->i_mtime = inode->i_atime = inode->i_ctime = + EXFAT_I(inode)->i_crtime = CURRENT_TIME_SEC; +#endif + exfat_truncate_atime(&inode->i_atime); + /* timestamp is already written, so mark_inode_dirty() is unneeded. */ + + d_instantiate(dentry, inode); +unlock: + mutex_unlock(&EXFAT_SB(sb)->s_lock); + return err; +} + +/* lookup a file */ +static int exfat_find(struct inode *dir, struct qstr *qname, + struct exfat_dir_entry *info) +{ + int ret, dentry, num_entries, count; + struct exfat_chain cdir; + struct exfat_uni_name uni_name; + struct super_block *sb = dir->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *ei = EXFAT_I(dir); + struct exfat_dentry *ep, *ep2; + struct exfat_entry_set_cache *es; + /* for optimized dir & entry to prevent long traverse of cluster chain */ + struct exfat_hint hint_opt; + + if (qname->len == 0) + return -ENOENT; + + /* check the validity of directory name in the given pathname */ + ret = exfat_resolve_path_for_lookup(dir, qname->name, &cdir, &uni_name); + if (ret) + return ret; + + num_entries = exfat_calc_num_entries(&uni_name); + if (num_entries < 0) + return num_entries; + + /* check the validation of hint_stat and initialize it if required */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) + if (ei->version != (inode_peek_iversion_raw(dir) & 0xffffffff)) { +#else + if (ei->version != (dir->i_version & 0xffffffff)) { +#endif + ei->hint_stat.clu = cdir.dir; + ei->hint_stat.eidx = 0; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) + ei->version = (inode_peek_iversion_raw(dir) & 0xffffffff); +#else + ei->version = (dir->i_version & 0xffffffff); +#endif + ei->hint_femp.eidx = EXFAT_HINT_NONE; + } + + /* search the file name for directories */ + dentry = exfat_find_dir_entry(sb, ei, &cdir, &uni_name, + num_entries, TYPE_ALL, &hint_opt); + + if (dentry < 0) + return dentry; /* -error value */ + + info->dir = cdir; + info->entry = dentry; + info->num_subdirs = 0; + + /* adjust cdir to the optimized value */ + cdir.dir = hint_opt.clu; + if (cdir.flags & ALLOC_NO_FAT_CHAIN) + cdir.size -= dentry / sbi->dentries_per_clu; + dentry = hint_opt.eidx; + es = exfat_get_dentry_set(sb, &cdir, dentry, ES_2_ENTRIES); + if (!es) + return -EIO; + ep = exfat_get_dentry_cached(es, 0); + ep2 = exfat_get_dentry_cached(es, 1); + + info->type = exfat_get_entry_type(ep); + info->attr = le16_to_cpu(ep->dentry.file.attr); + info->size = le64_to_cpu(ep2->dentry.stream.valid_size); + if ((info->type == TYPE_FILE) && (info->size == 0)) { + info->flags = ALLOC_NO_FAT_CHAIN; + info->start_clu = EXFAT_EOF_CLUSTER; + } else { + info->flags = ep2->dentry.stream.flags; + info->start_clu = + le32_to_cpu(ep2->dentry.stream.start_clu); + } + + exfat_get_entry_time(sbi, &info->crtime, + ep->dentry.file.create_tz, + ep->dentry.file.create_time, + ep->dentry.file.create_date, + ep->dentry.file.create_time_cs); + exfat_get_entry_time(sbi, &info->mtime, + ep->dentry.file.modify_tz, + ep->dentry.file.modify_time, + ep->dentry.file.modify_date, + ep->dentry.file.modify_time_cs); + exfat_get_entry_time(sbi, &info->atime, + ep->dentry.file.access_tz, + ep->dentry.file.access_time, + ep->dentry.file.access_date, + 0); + exfat_free_dentry_set(es, false); + + if (ei->start_clu == EXFAT_FREE_CLUSTER) { + exfat_fs_error(sb, + "non-zero size file starts with zero cluster (size : %llu, p_dir : %u, entry : 0x%08x)", + i_size_read(dir), ei->dir.dir, ei->entry); + return -EIO; + } + + if (info->type == TYPE_DIR) { + exfat_chain_set(&cdir, info->start_clu, + EXFAT_B_TO_CLU(info->size, sbi), info->flags); + count = exfat_count_dir_entries(sb, &cdir); + if (count < 0) + return -EIO; + + info->num_subdirs = count + EXFAT_MIN_SUBDIR; + } + return 0; +} + +static int exfat_d_anon_disconn(struct dentry *dentry) +{ + return IS_ROOT(dentry) && (dentry->d_flags & DCACHE_DISCONNECTED); +} + +static struct dentry *exfat_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + struct super_block *sb = dir->i_sb; + struct inode *inode; + struct dentry *alias; + struct exfat_dir_entry info; + int err; + loff_t i_pos; + mode_t i_mode; + + mutex_lock(&EXFAT_SB(sb)->s_lock); + err = exfat_find(dir, &dentry->d_name, &info); + if (err) { + if (err == -ENOENT) { + inode = NULL; + goto out; + } + goto unlock; + } + + i_pos = exfat_make_i_pos(&info); + inode = exfat_build_inode(sb, &info, i_pos); + err = PTR_ERR_OR_ZERO(inode); + if (err) + goto unlock; + + i_mode = inode->i_mode; + alias = d_find_alias(inode); + + /* + * Checking "alias->d_parent == dentry->d_parent" to make sure + * FS is not corrupted (especially double linked dir). + */ + if (alias && alias->d_parent == dentry->d_parent && + !exfat_d_anon_disconn(alias)) { + + /* + * Unhashed alias is able to exist because of revalidate() + * called by lookup_fast. You can easily make this status + * by calling create and lookup concurrently + * In such case, we reuse an alias instead of new dentry + */ + if (d_unhashed(alias)) { + WARN_ON(alias->d_name.hash_len != + dentry->d_name.hash_len); + exfat_info(sb, "rehashed a dentry(%p) in read lookup", + alias); + d_drop(dentry); + d_rehash(alias); + } else if (!S_ISDIR(i_mode)) { + /* + * This inode has non anonymous-DCACHE_DISCONNECTED + * dentry. This means, the user did ->lookup() by an + * another name (longname vs 8.3 alias of it) in past. + * + * Switch to new one for reason of locality if possible. + */ + d_move(alias, dentry); + } + iput(inode); + mutex_unlock(&EXFAT_SB(sb)->s_lock); + return alias; + } + dput(alias); +out: + mutex_unlock(&EXFAT_SB(sb)->s_lock); + if (!inode) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) + exfat_d_version_set(dentry, inode_query_iversion(dir)); +#else + exfat_d_version_set(dentry, dir->i_version); +#endif + + return d_splice_alias(inode, dentry); +unlock: + mutex_unlock(&EXFAT_SB(sb)->s_lock); + return ERR_PTR(err); +} + +/* remove an entry, BUT don't truncate */ +static int exfat_unlink(struct inode *dir, struct dentry *dentry) +{ + struct exfat_chain cdir; + struct exfat_dentry *ep; + struct super_block *sb = dir->i_sb; + struct inode *inode = dentry->d_inode; + struct exfat_inode_info *ei = EXFAT_I(inode); + struct buffer_head *bh; + int num_entries, entry, err = 0; + + mutex_lock(&EXFAT_SB(sb)->s_lock); + exfat_chain_dup(&cdir, &ei->dir); + entry = ei->entry; + if (ei->dir.dir == DIR_DELETED) { + exfat_err(sb, "abnormal access to deleted dentry"); + err = -ENOENT; + goto unlock; + } + + ep = exfat_get_dentry(sb, &cdir, entry, &bh); + if (!ep) { + err = -EIO; + goto unlock; + } + num_entries = exfat_count_ext_entries(sb, &cdir, entry, ep); + if (num_entries < 0) { + err = -EIO; + brelse(bh); + goto unlock; + } + num_entries++; + brelse(bh); + + exfat_set_volume_dirty(sb); + /* update the directory entry */ + if (exfat_remove_entries(dir, &cdir, entry, 0, num_entries)) { + err = -EIO; + goto unlock; + } + + /* This doesn't modify ei */ + ei->dir.dir = DIR_DELETED; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) + inode_inc_iversion(dir); +#else + dir->i_version++; +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + dir->i_mtime = dir->i_atime = current_time(dir); +#else + dir->i_mtime = dir->i_atime = CURRENT_TIME_SEC; +#endif + exfat_truncate_atime(&dir->i_atime); + if (IS_DIRSYNC(dir)) + exfat_sync_inode(dir); + else + mark_inode_dirty(dir); + + clear_nlink(inode); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + inode->i_mtime = inode->i_atime = current_time(inode); +#else + inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; +#endif + exfat_truncate_atime(&inode->i_atime); + exfat_unhash_inode(inode); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) + exfat_d_version_set(dentry, inode_query_iversion(dir)); +#else + exfat_d_version_set(dentry, dir->i_version); +#endif +unlock: + mutex_unlock(&EXFAT_SB(sb)->s_lock); + return err; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0) +static int exfat_mkdir(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, umode_t mode) +#else +static int exfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +#endif +{ + struct super_block *sb = dir->i_sb; + struct inode *inode; + struct exfat_dir_entry info; + struct exfat_chain cdir; + loff_t i_pos; + int err; + + mutex_lock(&EXFAT_SB(sb)->s_lock); + exfat_set_volume_dirty(sb); + err = exfat_add_entry(dir, dentry->d_name.name, &cdir, TYPE_DIR, + &info); + if (err) + goto unlock; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) + inode_inc_iversion(dir); +#else + dir->i_version++; +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + dir->i_ctime = dir->i_mtime = current_time(dir); +#else + dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; +#endif + if (IS_DIRSYNC(dir)) + exfat_sync_inode(dir); + else + mark_inode_dirty(dir); + inc_nlink(dir); + + i_pos = exfat_make_i_pos(&info); + inode = exfat_build_inode(sb, &info, i_pos); + err = PTR_ERR_OR_ZERO(inode); + if (err) + goto unlock; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) + inode_inc_iversion(inode); +#else + inode->i_version++; +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + inode->i_mtime = inode->i_atime = inode->i_ctime = + EXFAT_I(inode)->i_crtime = current_time(inode); +#else + inode->i_mtime = inode->i_atime = inode->i_ctime = + EXFAT_I(inode)->i_crtime = CURRENT_TIME_SEC; +#endif + exfat_truncate_atime(&inode->i_atime); + /* timestamp is already written, so mark_inode_dirty() is unneeded. */ + + d_instantiate(dentry, inode); + +unlock: + mutex_unlock(&EXFAT_SB(sb)->s_lock); + return err; +} + +static int exfat_check_dir_empty(struct super_block *sb, + struct exfat_chain *p_dir) +{ + int i, dentries_per_clu; + unsigned int type; + struct exfat_chain clu; + struct exfat_dentry *ep; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct buffer_head *bh; + + dentries_per_clu = sbi->dentries_per_clu; + + exfat_chain_dup(&clu, p_dir); + + while (clu.dir != EXFAT_EOF_CLUSTER) { + for (i = 0; i < dentries_per_clu; i++) { + ep = exfat_get_dentry(sb, &clu, i, &bh); + if (!ep) + return -EIO; + type = exfat_get_entry_type(ep); + brelse(bh); + if (type == TYPE_UNUSED) + return 0; + + if (type != TYPE_FILE && type != TYPE_DIR) + continue; + + return -ENOTEMPTY; + } + + if (clu.flags == ALLOC_NO_FAT_CHAIN) { + if (--clu.size > 0) + clu.dir++; + else + clu.dir = EXFAT_EOF_CLUSTER; + } else { + if (exfat_get_next_cluster(sb, &(clu.dir))) + return -EIO; + } + } + + return 0; +} + +static int exfat_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + struct exfat_dentry *ep; + struct exfat_chain cdir, clu_to_free; + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *ei = EXFAT_I(inode); + struct buffer_head *bh; + int num_entries, entry, err; + + mutex_lock(&EXFAT_SB(inode->i_sb)->s_lock); + + exfat_chain_dup(&cdir, &ei->dir); + entry = ei->entry; + + if (ei->dir.dir == DIR_DELETED) { + exfat_err(sb, "abnormal access to deleted dentry"); + err = -ENOENT; + goto unlock; + } + + exfat_chain_set(&clu_to_free, ei->start_clu, + EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi), ei->flags); + + err = exfat_check_dir_empty(sb, &clu_to_free); + if (err) { + if (err == -EIO) + exfat_err(sb, "failed to exfat_check_dir_empty : err(%d)", + err); + goto unlock; + } + + ep = exfat_get_dentry(sb, &cdir, entry, &bh); + if (!ep) { + err = -EIO; + goto unlock; + } + + num_entries = exfat_count_ext_entries(sb, &cdir, entry, ep); + if (num_entries < 0) { + err = -EIO; + brelse(bh); + goto unlock; + } + num_entries++; + brelse(bh); + + exfat_set_volume_dirty(sb); + err = exfat_remove_entries(dir, &cdir, entry, 0, num_entries); + if (err) { + exfat_err(sb, "failed to exfat_remove_entries : err(%d)", err); + goto unlock; + } + ei->dir.dir = DIR_DELETED; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) + inode_inc_iversion(dir); +#else + dir->i_version++; +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + dir->i_mtime = dir->i_atime = current_time(dir); +#else + dir->i_mtime = dir->i_atime = CURRENT_TIME_SEC; +#endif + exfat_truncate_atime(&dir->i_atime); + if (IS_DIRSYNC(dir)) + exfat_sync_inode(dir); + else + mark_inode_dirty(dir); + drop_nlink(dir); + + clear_nlink(inode); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + inode->i_mtime = inode->i_atime = current_time(inode); +#else + inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; +#endif + exfat_truncate_atime(&inode->i_atime); + exfat_unhash_inode(inode); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) + exfat_d_version_set(dentry, inode_query_iversion(dir)); +#else + exfat_d_version_set(dentry, dir->i_version); +#endif +unlock: + mutex_unlock(&EXFAT_SB(inode->i_sb)->s_lock); + return err; +} + +static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir, + int oldentry, struct exfat_uni_name *p_uniname, + struct exfat_inode_info *ei) +{ + int ret, num_old_entries, num_new_entries; + struct exfat_dentry *epold, *epnew; + struct super_block *sb = inode->i_sb; + struct buffer_head *new_bh, *old_bh; + int sync = IS_DIRSYNC(inode); + + epold = exfat_get_dentry(sb, p_dir, oldentry, &old_bh); + if (!epold) + return -EIO; + + num_old_entries = exfat_count_ext_entries(sb, p_dir, oldentry, epold); + if (num_old_entries < 0) + return -EIO; + num_old_entries++; + + num_new_entries = exfat_calc_num_entries(p_uniname); + if (num_new_entries < 0) + return num_new_entries; + + if (num_old_entries < num_new_entries) { + int newentry; + + newentry = + exfat_find_empty_entry(inode, p_dir, num_new_entries); + if (newentry < 0) + return newentry; /* -EIO or -ENOSPC */ + + epnew = exfat_get_dentry(sb, p_dir, newentry, &new_bh); + if (!epnew) + return -EIO; + + *epnew = *epold; + if (exfat_get_entry_type(epnew) == TYPE_FILE) { + epnew->dentry.file.attr |= cpu_to_le16(ATTR_ARCHIVE); + ei->attr |= ATTR_ARCHIVE; + } + exfat_update_bh(new_bh, sync); + brelse(old_bh); + brelse(new_bh); + + epold = exfat_get_dentry(sb, p_dir, oldentry + 1, &old_bh); + if (!epold) + return -EIO; + epnew = exfat_get_dentry(sb, p_dir, newentry + 1, &new_bh); + if (!epnew) { + brelse(old_bh); + return -EIO; + } + + *epnew = *epold; + exfat_update_bh(new_bh, sync); + brelse(old_bh); + brelse(new_bh); + + ret = exfat_init_ext_entry(inode, p_dir, newentry, + num_new_entries, p_uniname); + if (ret) + return ret; + + exfat_remove_entries(inode, p_dir, oldentry, 0, + num_old_entries); + ei->dir = *p_dir; + ei->entry = newentry; + } else { + if (exfat_get_entry_type(epold) == TYPE_FILE) { + epold->dentry.file.attr |= cpu_to_le16(ATTR_ARCHIVE); + ei->attr |= ATTR_ARCHIVE; + } + exfat_update_bh(old_bh, sync); + brelse(old_bh); + ret = exfat_init_ext_entry(inode, p_dir, oldentry, + num_new_entries, p_uniname); + if (ret) + return ret; + + exfat_remove_entries(inode, p_dir, oldentry, num_new_entries, + num_old_entries); + } + return 0; +} + +static int exfat_move_file(struct inode *inode, struct exfat_chain *p_olddir, + int oldentry, struct exfat_chain *p_newdir, + struct exfat_uni_name *p_uniname, struct exfat_inode_info *ei) +{ + int ret, newentry, num_new_entries, num_old_entries; + struct exfat_dentry *epmov, *epnew; + struct super_block *sb = inode->i_sb; + struct buffer_head *mov_bh, *new_bh; + + epmov = exfat_get_dentry(sb, p_olddir, oldentry, &mov_bh); + if (!epmov) + return -EIO; + + num_old_entries = exfat_count_ext_entries(sb, p_olddir, oldentry, + epmov); + if (num_old_entries < 0) + return -EIO; + num_old_entries++; + + num_new_entries = exfat_calc_num_entries(p_uniname); + if (num_new_entries < 0) + return num_new_entries; + + newentry = exfat_find_empty_entry(inode, p_newdir, num_new_entries); + if (newentry < 0) + return newentry; /* -EIO or -ENOSPC */ + + epnew = exfat_get_dentry(sb, p_newdir, newentry, &new_bh); + if (!epnew) + return -EIO; + + *epnew = *epmov; + if (exfat_get_entry_type(epnew) == TYPE_FILE) { + epnew->dentry.file.attr |= cpu_to_le16(ATTR_ARCHIVE); + ei->attr |= ATTR_ARCHIVE; + } + exfat_update_bh(new_bh, IS_DIRSYNC(inode)); + brelse(mov_bh); + brelse(new_bh); + + epmov = exfat_get_dentry(sb, p_olddir, oldentry + 1, &mov_bh); + if (!epmov) + return -EIO; + epnew = exfat_get_dentry(sb, p_newdir, newentry + 1, &new_bh); + if (!epnew) { + brelse(mov_bh); + return -EIO; + } + + *epnew = *epmov; + exfat_update_bh(new_bh, IS_DIRSYNC(inode)); + brelse(mov_bh); + brelse(new_bh); + + ret = exfat_init_ext_entry(inode, p_newdir, newentry, num_new_entries, + p_uniname); + if (ret) + return ret; + + exfat_remove_entries(inode, p_olddir, oldentry, 0, num_old_entries); + + exfat_chain_set(&ei->dir, p_newdir->dir, p_newdir->size, + p_newdir->flags); + + ei->entry = newentry; + return 0; +} + +/* rename or move a old file into a new file */ +static int __exfat_rename(struct inode *old_parent_inode, + struct exfat_inode_info *ei, struct inode *new_parent_inode, + struct dentry *new_dentry) +{ + int ret; + int dentry; + struct exfat_chain olddir, newdir; + struct exfat_chain *p_dir = NULL; + struct exfat_uni_name uni_name; + struct exfat_dentry *ep; + struct super_block *sb = old_parent_inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + const unsigned char *new_path = new_dentry->d_name.name; + struct inode *new_inode = new_dentry->d_inode; + int num_entries; + struct exfat_inode_info *new_ei = NULL; + unsigned int new_entry_type = TYPE_UNUSED; + int new_entry = 0; + struct buffer_head *old_bh, *new_bh = NULL; + + /* check the validity of pointer parameters */ + if (new_path == NULL || strlen(new_path) == 0) + return -EINVAL; + + if (ei->dir.dir == DIR_DELETED) { + exfat_err(sb, "abnormal access to deleted source dentry"); + return -ENOENT; + } + + exfat_chain_set(&olddir, EXFAT_I(old_parent_inode)->start_clu, + EXFAT_B_TO_CLU_ROUND_UP(i_size_read(old_parent_inode), sbi), + EXFAT_I(old_parent_inode)->flags); + dentry = ei->entry; + + ep = exfat_get_dentry(sb, &olddir, dentry, &old_bh); + if (!ep) { + ret = -EIO; + goto out; + } + brelse(old_bh); + + /* check whether new dir is existing directory and empty */ + if (new_inode) { + ret = -EIO; + new_ei = EXFAT_I(new_inode); + + if (new_ei->dir.dir == DIR_DELETED) { + exfat_err(sb, "abnormal access to deleted target dentry"); + goto out; + } + + p_dir = &(new_ei->dir); + new_entry = new_ei->entry; + ep = exfat_get_dentry(sb, p_dir, new_entry, &new_bh); + if (!ep) + goto out; + + new_entry_type = exfat_get_entry_type(ep); + brelse(new_bh); + + /* if new_inode exists, update ei */ + if (new_entry_type == TYPE_DIR) { + struct exfat_chain new_clu; + + new_clu.dir = new_ei->start_clu; + new_clu.size = + EXFAT_B_TO_CLU_ROUND_UP(i_size_read(new_inode), + sbi); + new_clu.flags = new_ei->flags; + + ret = exfat_check_dir_empty(sb, &new_clu); + if (ret) + goto out; + } + } + + /* check the validity of directory name in the given new pathname */ + ret = exfat_resolve_path(new_parent_inode, new_path, &newdir, + &uni_name); + if (ret) + goto out; + + exfat_set_volume_dirty(sb); + + if (olddir.dir == newdir.dir) + ret = exfat_rename_file(new_parent_inode, &olddir, dentry, + &uni_name, ei); + else + ret = exfat_move_file(new_parent_inode, &olddir, dentry, + &newdir, &uni_name, ei); + + if (!ret && new_inode) { + /* delete entries of new_dir */ + ep = exfat_get_dentry(sb, p_dir, new_entry, &new_bh); + if (!ep) { + ret = -EIO; + goto del_out; + } + + num_entries = exfat_count_ext_entries(sb, p_dir, new_entry, ep); + if (num_entries < 0) { + ret = -EIO; + goto del_out; + } + brelse(new_bh); + + if (exfat_remove_entries(new_inode, p_dir, new_entry, 0, + num_entries + 1)) { + ret = -EIO; + goto del_out; + } + + /* Free the clusters if new_inode is a dir(as if exfat_rmdir) */ + if (new_entry_type == TYPE_DIR) { + /* new_ei, new_clu_to_free */ + struct exfat_chain new_clu_to_free; + + exfat_chain_set(&new_clu_to_free, new_ei->start_clu, + EXFAT_B_TO_CLU_ROUND_UP(i_size_read(new_inode), + sbi), new_ei->flags); + + if (exfat_free_cluster(new_inode, &new_clu_to_free)) { + /* just set I/O error only */ + ret = -EIO; + } + + i_size_write(new_inode, 0); + new_ei->start_clu = EXFAT_EOF_CLUSTER; + new_ei->flags = ALLOC_NO_FAT_CHAIN; + } +del_out: + /* Update new_inode ei + * Prevent syncing removed new_inode + * (new_ei is already initialized above code ("if (new_inode)") + */ + new_ei->dir.dir = DIR_DELETED; + } +out: + return ret; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0) +static int exfat_rename(struct user_namespace *mnt_userns, + struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +#else +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) +static int exfat_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +#else +static int exfat_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +#endif +#endif +{ + struct inode *old_inode, *new_inode; + struct super_block *sb = old_dir->i_sb; + loff_t i_pos; + int err; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0) + /* + * The VFS already checks for existence, so for local filesystems + * the RENAME_NOREPLACE implementation is equivalent to plain rename. + * Don't support any other flags + */ + if (flags & ~RENAME_NOREPLACE) + return -EINVAL; +#endif + + mutex_lock(&EXFAT_SB(sb)->s_lock); + old_inode = old_dentry->d_inode; + new_inode = new_dentry->d_inode; + + err = __exfat_rename(old_dir, EXFAT_I(old_inode), new_dir, new_dentry); + if (err) + goto unlock; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) + inode_inc_iversion(new_dir); +#else + new_dir->i_version++; +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + new_dir->i_ctime = new_dir->i_mtime = new_dir->i_atime = + EXFAT_I(new_dir)->i_crtime = current_time(new_dir); +#else + new_dir->i_ctime = new_dir->i_mtime = new_dir->i_atime = + EXFAT_I(new_dir)->i_crtime = CURRENT_TIME_SEC; +#endif + exfat_truncate_atime(&new_dir->i_atime); + if (IS_DIRSYNC(new_dir)) + exfat_sync_inode(new_dir); + else + mark_inode_dirty(new_dir); + + i_pos = ((loff_t)EXFAT_I(old_inode)->dir.dir << 32) | + (EXFAT_I(old_inode)->entry & 0xffffffff); + exfat_unhash_inode(old_inode); + exfat_hash_inode(old_inode, i_pos); + if (IS_DIRSYNC(new_dir)) + exfat_sync_inode(old_inode); + else + mark_inode_dirty(old_inode); + + if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) { + drop_nlink(old_dir); + if (!new_inode) + inc_nlink(new_dir); + } + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) + inode_inc_iversion(old_dir); +#else + old_dir->i_version++; +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir); +#else + old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC; +#endif + if (IS_DIRSYNC(old_dir)) + exfat_sync_inode(old_dir); + else + mark_inode_dirty(old_dir); + + if (new_inode) { + exfat_unhash_inode(new_inode); + + /* skip drop_nlink if new_inode already has been dropped */ + if (new_inode->i_nlink) { + drop_nlink(new_inode); + if (S_ISDIR(new_inode->i_mode)) + drop_nlink(new_inode); + } else { + exfat_warn(sb, "abnormal access to an inode dropped"); + WARN_ON(new_inode->i_nlink == 0); + } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + new_inode->i_ctime = EXFAT_I(new_inode)->i_crtime = + current_time(new_inode); +#else + new_inode->i_ctime = EXFAT_I(new_inode)->i_crtime = + CURRENT_TIME_SEC; +#endif + } + +unlock: + mutex_unlock(&EXFAT_SB(sb)->s_lock); + return err; +} + +const struct inode_operations exfat_dir_inode_operations = { + .create = exfat_create, + .lookup = exfat_lookup, + .unlink = exfat_unlink, + .mkdir = exfat_mkdir, + .rmdir = exfat_rmdir, + .rename = exfat_rename, + .setattr = exfat_setattr, + .getattr = exfat_getattr, +}; diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c new file mode 100644 index 000000000000..3b9fd4670aa2 --- /dev/null +++ b/fs/exfat/nls.c @@ -0,0 +1,823 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#include +#include +#include +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 18, 0) +#include +#endif +#include + +#include "exfat_raw.h" +#include "exfat_fs.h" + +/* Upcase table macro */ +#define EXFAT_NUM_UPCASE (2918) +#define UTBL_COUNT (0x10000) + +/* + * Upcase table in compressed format (7.2.5.1 Recommended Up-case Table + * in exfat specification, See: + * https://docs.microsoft.com/en-us/windows/win32/fileio/exfat-specification). + */ +static const unsigned short uni_def_upcase[EXFAT_NUM_UPCASE] = { + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, + 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, + 0x0060, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, + 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, + 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, + 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, + 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, + 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, + 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, + 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, + 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, + 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, + 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00f7, + 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x0178, + 0x0100, 0x0100, 0x0102, 0x0102, 0x0104, 0x0104, 0x0106, 0x0106, + 0x0108, 0x0108, 0x010a, 0x010a, 0x010c, 0x010c, 0x010e, 0x010e, + 0x0110, 0x0110, 0x0112, 0x0112, 0x0114, 0x0114, 0x0116, 0x0116, + 0x0118, 0x0118, 0x011a, 0x011a, 0x011c, 0x011c, 0x011e, 0x011e, + 0x0120, 0x0120, 0x0122, 0x0122, 0x0124, 0x0124, 0x0126, 0x0126, + 0x0128, 0x0128, 0x012a, 0x012a, 0x012c, 0x012c, 0x012e, 0x012e, + 0x0130, 0x0131, 0x0132, 0x0132, 0x0134, 0x0134, 0x0136, 0x0136, + 0x0138, 0x0139, 0x0139, 0x013b, 0x013b, 0x013d, 0x013d, 0x013f, + 0x013f, 0x0141, 0x0141, 0x0143, 0x0143, 0x0145, 0x0145, 0x0147, + 0x0147, 0x0149, 0x014a, 0x014a, 0x014c, 0x014c, 0x014e, 0x014e, + 0x0150, 0x0150, 0x0152, 0x0152, 0x0154, 0x0154, 0x0156, 0x0156, + 0x0158, 0x0158, 0x015a, 0x015a, 0x015c, 0x015c, 0x015e, 0x015e, + 0x0160, 0x0160, 0x0162, 0x0162, 0x0164, 0x0164, 0x0166, 0x0166, + 0x0168, 0x0168, 0x016a, 0x016a, 0x016c, 0x016c, 0x016e, 0x016e, + 0x0170, 0x0170, 0x0172, 0x0172, 0x0174, 0x0174, 0x0176, 0x0176, + 0x0178, 0x0179, 0x0179, 0x017b, 0x017b, 0x017d, 0x017d, 0x017f, + 0x0243, 0x0181, 0x0182, 0x0182, 0x0184, 0x0184, 0x0186, 0x0187, + 0x0187, 0x0189, 0x018a, 0x018b, 0x018b, 0x018d, 0x018e, 0x018f, + 0x0190, 0x0191, 0x0191, 0x0193, 0x0194, 0x01f6, 0x0196, 0x0197, + 0x0198, 0x0198, 0x023d, 0x019b, 0x019c, 0x019d, 0x0220, 0x019f, + 0x01a0, 0x01a0, 0x01a2, 0x01a2, 0x01a4, 0x01a4, 0x01a6, 0x01a7, + 0x01a7, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ac, 0x01ae, 0x01af, + 0x01af, 0x01b1, 0x01b2, 0x01b3, 0x01b3, 0x01b5, 0x01b5, 0x01b7, + 0x01b8, 0x01b8, 0x01ba, 0x01bb, 0x01bc, 0x01bc, 0x01be, 0x01f7, + 0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x01c4, 0x01c5, 0x01c4, 0x01c7, + 0x01c8, 0x01c7, 0x01ca, 0x01cb, 0x01ca, 0x01cd, 0x01cd, 0x01cf, + 0x01cf, 0x01d1, 0x01d1, 0x01d3, 0x01d3, 0x01d5, 0x01d5, 0x01d7, + 0x01d7, 0x01d9, 0x01d9, 0x01db, 0x01db, 0x018e, 0x01de, 0x01de, + 0x01e0, 0x01e0, 0x01e2, 0x01e2, 0x01e4, 0x01e4, 0x01e6, 0x01e6, + 0x01e8, 0x01e8, 0x01ea, 0x01ea, 0x01ec, 0x01ec, 0x01ee, 0x01ee, + 0x01f0, 0x01f1, 0x01f2, 0x01f1, 0x01f4, 0x01f4, 0x01f6, 0x01f7, + 0x01f8, 0x01f8, 0x01fa, 0x01fa, 0x01fc, 0x01fc, 0x01fe, 0x01fe, + 0x0200, 0x0200, 0x0202, 0x0202, 0x0204, 0x0204, 0x0206, 0x0206, + 0x0208, 0x0208, 0x020a, 0x020a, 0x020c, 0x020c, 0x020e, 0x020e, + 0x0210, 0x0210, 0x0212, 0x0212, 0x0214, 0x0214, 0x0216, 0x0216, + 0x0218, 0x0218, 0x021a, 0x021a, 0x021c, 0x021c, 0x021e, 0x021e, + 0x0220, 0x0221, 0x0222, 0x0222, 0x0224, 0x0224, 0x0226, 0x0226, + 0x0228, 0x0228, 0x022a, 0x022a, 0x022c, 0x022c, 0x022e, 0x022e, + 0x0230, 0x0230, 0x0232, 0x0232, 0x0234, 0x0235, 0x0236, 0x0237, + 0x0238, 0x0239, 0x2c65, 0x023b, 0x023b, 0x023d, 0x2c66, 0x023f, + 0x0240, 0x0241, 0x0241, 0x0243, 0x0244, 0x0245, 0x0246, 0x0246, + 0x0248, 0x0248, 0x024a, 0x024a, 0x024c, 0x024c, 0x024e, 0x024e, + 0x0250, 0x0251, 0x0252, 0x0181, 0x0186, 0x0255, 0x0189, 0x018a, + 0x0258, 0x018f, 0x025a, 0x0190, 0x025c, 0x025d, 0x025e, 0x025f, + 0x0193, 0x0261, 0x0262, 0x0194, 0x0264, 0x0265, 0x0266, 0x0267, + 0x0197, 0x0196, 0x026a, 0x2c62, 0x026c, 0x026d, 0x026e, 0x019c, + 0x0270, 0x0271, 0x019d, 0x0273, 0x0274, 0x019f, 0x0276, 0x0277, + 0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x2c64, 0x027e, 0x027f, + 0x01a6, 0x0281, 0x0282, 0x01a9, 0x0284, 0x0285, 0x0286, 0x0287, + 0x01ae, 0x0244, 0x01b1, 0x01b2, 0x0245, 0x028d, 0x028e, 0x028f, + 0x0290, 0x0291, 0x01b7, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297, + 0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f, + 0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7, + 0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af, + 0x02b0, 0x02b1, 0x02b2, 0x02b3, 0x02b4, 0x02b5, 0x02b6, 0x02b7, + 0x02b8, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf, + 0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7, + 0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf, + 0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7, + 0x02d8, 0x02d9, 0x02da, 0x02db, 0x02dc, 0x02dd, 0x02de, 0x02df, + 0x02e0, 0x02e1, 0x02e2, 0x02e3, 0x02e4, 0x02e5, 0x02e6, 0x02e7, + 0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef, + 0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7, + 0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff, + 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, + 0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f, + 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317, + 0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f, + 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327, + 0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f, + 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337, + 0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f, + 0x0340, 0x0341, 0x0342, 0x0343, 0x0344, 0x0345, 0x0346, 0x0347, + 0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f, + 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357, + 0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f, + 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, + 0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f, + 0x0370, 0x0371, 0x0372, 0x0373, 0x0374, 0x0375, 0x0376, 0x0377, + 0x0378, 0x0379, 0x037a, 0x03fd, 0x03fe, 0x03ff, 0x037e, 0x037f, + 0x0380, 0x0381, 0x0382, 0x0383, 0x0384, 0x0385, 0x0386, 0x0387, + 0x0388, 0x0389, 0x038a, 0x038b, 0x038c, 0x038d, 0x038e, 0x038f, + 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, + 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, + 0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, + 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x0386, 0x0388, 0x0389, 0x038a, + 0x03b0, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, + 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, + 0x03a0, 0x03a1, 0x03a3, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, + 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x038c, 0x038e, 0x038f, 0x03cf, + 0x03d0, 0x03d1, 0x03d2, 0x03d3, 0x03d4, 0x03d5, 0x03d6, 0x03d7, + 0x03d8, 0x03d8, 0x03da, 0x03da, 0x03dc, 0x03dc, 0x03de, 0x03de, + 0x03e0, 0x03e0, 0x03e2, 0x03e2, 0x03e4, 0x03e4, 0x03e6, 0x03e6, + 0x03e8, 0x03e8, 0x03ea, 0x03ea, 0x03ec, 0x03ec, 0x03ee, 0x03ee, + 0x03f0, 0x03f1, 0x03f9, 0x03f3, 0x03f4, 0x03f5, 0x03f6, 0x03f7, + 0x03f7, 0x03f9, 0x03fa, 0x03fa, 0x03fc, 0x03fd, 0x03fe, 0x03ff, + 0x0400, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, + 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x040d, 0x040e, 0x040f, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, + 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, + 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, + 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, + 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, + 0x0400, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, + 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x040d, 0x040e, 0x040f, + 0x0460, 0x0460, 0x0462, 0x0462, 0x0464, 0x0464, 0x0466, 0x0466, + 0x0468, 0x0468, 0x046a, 0x046a, 0x046c, 0x046c, 0x046e, 0x046e, + 0x0470, 0x0470, 0x0472, 0x0472, 0x0474, 0x0474, 0x0476, 0x0476, + 0x0478, 0x0478, 0x047a, 0x047a, 0x047c, 0x047c, 0x047e, 0x047e, + 0x0480, 0x0480, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487, + 0x0488, 0x0489, 0x048a, 0x048a, 0x048c, 0x048c, 0x048e, 0x048e, + 0x0490, 0x0490, 0x0492, 0x0492, 0x0494, 0x0494, 0x0496, 0x0496, + 0x0498, 0x0498, 0x049a, 0x049a, 0x049c, 0x049c, 0x049e, 0x049e, + 0x04a0, 0x04a0, 0x04a2, 0x04a2, 0x04a4, 0x04a4, 0x04a6, 0x04a6, + 0x04a8, 0x04a8, 0x04aa, 0x04aa, 0x04ac, 0x04ac, 0x04ae, 0x04ae, + 0x04b0, 0x04b0, 0x04b2, 0x04b2, 0x04b4, 0x04b4, 0x04b6, 0x04b6, + 0x04b8, 0x04b8, 0x04ba, 0x04ba, 0x04bc, 0x04bc, 0x04be, 0x04be, + 0x04c0, 0x04c1, 0x04c1, 0x04c3, 0x04c3, 0x04c5, 0x04c5, 0x04c7, + 0x04c7, 0x04c9, 0x04c9, 0x04cb, 0x04cb, 0x04cd, 0x04cd, 0x04c0, + 0x04d0, 0x04d0, 0x04d2, 0x04d2, 0x04d4, 0x04d4, 0x04d6, 0x04d6, + 0x04d8, 0x04d8, 0x04da, 0x04da, 0x04dc, 0x04dc, 0x04de, 0x04de, + 0x04e0, 0x04e0, 0x04e2, 0x04e2, 0x04e4, 0x04e4, 0x04e6, 0x04e6, + 0x04e8, 0x04e8, 0x04ea, 0x04ea, 0x04ec, 0x04ec, 0x04ee, 0x04ee, + 0x04f0, 0x04f0, 0x04f2, 0x04f2, 0x04f4, 0x04f4, 0x04f6, 0x04f6, + 0x04f8, 0x04f8, 0x04fa, 0x04fa, 0x04fc, 0x04fc, 0x04fe, 0x04fe, + 0x0500, 0x0500, 0x0502, 0x0502, 0x0504, 0x0504, 0x0506, 0x0506, + 0x0508, 0x0508, 0x050a, 0x050a, 0x050c, 0x050c, 0x050e, 0x050e, + 0x0510, 0x0510, 0x0512, 0x0512, 0x0514, 0x0515, 0x0516, 0x0517, + 0x0518, 0x0519, 0x051a, 0x051b, 0x051c, 0x051d, 0x051e, 0x051f, + 0x0520, 0x0521, 0x0522, 0x0523, 0x0524, 0x0525, 0x0526, 0x0527, + 0x0528, 0x0529, 0x052a, 0x052b, 0x052c, 0x052d, 0x052e, 0x052f, + 0x0530, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537, + 0x0538, 0x0539, 0x053a, 0x053b, 0x053c, 0x053d, 0x053e, 0x053f, + 0x0540, 0x0541, 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547, + 0x0548, 0x0549, 0x054a, 0x054b, 0x054c, 0x054d, 0x054e, 0x054f, + 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, 0x0557, + 0x0558, 0x0559, 0x055a, 0x055b, 0x055c, 0x055d, 0x055e, 0x055f, + 0x0560, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537, + 0x0538, 0x0539, 0x053a, 0x053b, 0x053c, 0x053d, 0x053e, 0x053f, + 0x0540, 0x0541, 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547, + 0x0548, 0x0549, 0x054a, 0x054b, 0x054c, 0x054d, 0x054e, 0x054f, + 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, 0xffff, + 0x17f6, 0x2c63, 0x1d7e, 0x1d7f, 0x1d80, 0x1d81, 0x1d82, 0x1d83, + 0x1d84, 0x1d85, 0x1d86, 0x1d87, 0x1d88, 0x1d89, 0x1d8a, 0x1d8b, + 0x1d8c, 0x1d8d, 0x1d8e, 0x1d8f, 0x1d90, 0x1d91, 0x1d92, 0x1d93, + 0x1d94, 0x1d95, 0x1d96, 0x1d97, 0x1d98, 0x1d99, 0x1d9a, 0x1d9b, + 0x1d9c, 0x1d9d, 0x1d9e, 0x1d9f, 0x1da0, 0x1da1, 0x1da2, 0x1da3, + 0x1da4, 0x1da5, 0x1da6, 0x1da7, 0x1da8, 0x1da9, 0x1daa, 0x1dab, + 0x1dac, 0x1dad, 0x1dae, 0x1daf, 0x1db0, 0x1db1, 0x1db2, 0x1db3, + 0x1db4, 0x1db5, 0x1db6, 0x1db7, 0x1db8, 0x1db9, 0x1dba, 0x1dbb, + 0x1dbc, 0x1dbd, 0x1dbe, 0x1dbf, 0x1dc0, 0x1dc1, 0x1dc2, 0x1dc3, + 0x1dc4, 0x1dc5, 0x1dc6, 0x1dc7, 0x1dc8, 0x1dc9, 0x1dca, 0x1dcb, + 0x1dcc, 0x1dcd, 0x1dce, 0x1dcf, 0x1dd0, 0x1dd1, 0x1dd2, 0x1dd3, + 0x1dd4, 0x1dd5, 0x1dd6, 0x1dd7, 0x1dd8, 0x1dd9, 0x1dda, 0x1ddb, + 0x1ddc, 0x1ddd, 0x1dde, 0x1ddf, 0x1de0, 0x1de1, 0x1de2, 0x1de3, + 0x1de4, 0x1de5, 0x1de6, 0x1de7, 0x1de8, 0x1de9, 0x1dea, 0x1deb, + 0x1dec, 0x1ded, 0x1dee, 0x1def, 0x1df0, 0x1df1, 0x1df2, 0x1df3, + 0x1df4, 0x1df5, 0x1df6, 0x1df7, 0x1df8, 0x1df9, 0x1dfa, 0x1dfb, + 0x1dfc, 0x1dfd, 0x1dfe, 0x1dff, 0x1e00, 0x1e00, 0x1e02, 0x1e02, + 0x1e04, 0x1e04, 0x1e06, 0x1e06, 0x1e08, 0x1e08, 0x1e0a, 0x1e0a, + 0x1e0c, 0x1e0c, 0x1e0e, 0x1e0e, 0x1e10, 0x1e10, 0x1e12, 0x1e12, + 0x1e14, 0x1e14, 0x1e16, 0x1e16, 0x1e18, 0x1e18, 0x1e1a, 0x1e1a, + 0x1e1c, 0x1e1c, 0x1e1e, 0x1e1e, 0x1e20, 0x1e20, 0x1e22, 0x1e22, + 0x1e24, 0x1e24, 0x1e26, 0x1e26, 0x1e28, 0x1e28, 0x1e2a, 0x1e2a, + 0x1e2c, 0x1e2c, 0x1e2e, 0x1e2e, 0x1e30, 0x1e30, 0x1e32, 0x1e32, + 0x1e34, 0x1e34, 0x1e36, 0x1e36, 0x1e38, 0x1e38, 0x1e3a, 0x1e3a, + 0x1e3c, 0x1e3c, 0x1e3e, 0x1e3e, 0x1e40, 0x1e40, 0x1e42, 0x1e42, + 0x1e44, 0x1e44, 0x1e46, 0x1e46, 0x1e48, 0x1e48, 0x1e4a, 0x1e4a, + 0x1e4c, 0x1e4c, 0x1e4e, 0x1e4e, 0x1e50, 0x1e50, 0x1e52, 0x1e52, + 0x1e54, 0x1e54, 0x1e56, 0x1e56, 0x1e58, 0x1e58, 0x1e5a, 0x1e5a, + 0x1e5c, 0x1e5c, 0x1e5e, 0x1e5e, 0x1e60, 0x1e60, 0x1e62, 0x1e62, + 0x1e64, 0x1e64, 0x1e66, 0x1e66, 0x1e68, 0x1e68, 0x1e6a, 0x1e6a, + 0x1e6c, 0x1e6c, 0x1e6e, 0x1e6e, 0x1e70, 0x1e70, 0x1e72, 0x1e72, + 0x1e74, 0x1e74, 0x1e76, 0x1e76, 0x1e78, 0x1e78, 0x1e7a, 0x1e7a, + 0x1e7c, 0x1e7c, 0x1e7e, 0x1e7e, 0x1e80, 0x1e80, 0x1e82, 0x1e82, + 0x1e84, 0x1e84, 0x1e86, 0x1e86, 0x1e88, 0x1e88, 0x1e8a, 0x1e8a, + 0x1e8c, 0x1e8c, 0x1e8e, 0x1e8e, 0x1e90, 0x1e90, 0x1e92, 0x1e92, + 0x1e94, 0x1e94, 0x1e96, 0x1e97, 0x1e98, 0x1e99, 0x1e9a, 0x1e9b, + 0x1e9c, 0x1e9d, 0x1e9e, 0x1e9f, 0x1ea0, 0x1ea0, 0x1ea2, 0x1ea2, + 0x1ea4, 0x1ea4, 0x1ea6, 0x1ea6, 0x1ea8, 0x1ea8, 0x1eaa, 0x1eaa, + 0x1eac, 0x1eac, 0x1eae, 0x1eae, 0x1eb0, 0x1eb0, 0x1eb2, 0x1eb2, + 0x1eb4, 0x1eb4, 0x1eb6, 0x1eb6, 0x1eb8, 0x1eb8, 0x1eba, 0x1eba, + 0x1ebc, 0x1ebc, 0x1ebe, 0x1ebe, 0x1ec0, 0x1ec0, 0x1ec2, 0x1ec2, + 0x1ec4, 0x1ec4, 0x1ec6, 0x1ec6, 0x1ec8, 0x1ec8, 0x1eca, 0x1eca, + 0x1ecc, 0x1ecc, 0x1ece, 0x1ece, 0x1ed0, 0x1ed0, 0x1ed2, 0x1ed2, + 0x1ed4, 0x1ed4, 0x1ed6, 0x1ed6, 0x1ed8, 0x1ed8, 0x1eda, 0x1eda, + 0x1edc, 0x1edc, 0x1ede, 0x1ede, 0x1ee0, 0x1ee0, 0x1ee2, 0x1ee2, + 0x1ee4, 0x1ee4, 0x1ee6, 0x1ee6, 0x1ee8, 0x1ee8, 0x1eea, 0x1eea, + 0x1eec, 0x1eec, 0x1eee, 0x1eee, 0x1ef0, 0x1ef0, 0x1ef2, 0x1ef2, + 0x1ef4, 0x1ef4, 0x1ef6, 0x1ef6, 0x1ef8, 0x1ef8, 0x1efa, 0x1efb, + 0x1efc, 0x1efd, 0x1efe, 0x1eff, 0x1f08, 0x1f09, 0x1f0a, 0x1f0b, + 0x1f0c, 0x1f0d, 0x1f0e, 0x1f0f, 0x1f08, 0x1f09, 0x1f0a, 0x1f0b, + 0x1f0c, 0x1f0d, 0x1f0e, 0x1f0f, 0x1f18, 0x1f19, 0x1f1a, 0x1f1b, + 0x1f1c, 0x1f1d, 0x1f16, 0x1f17, 0x1f18, 0x1f19, 0x1f1a, 0x1f1b, + 0x1f1c, 0x1f1d, 0x1f1e, 0x1f1f, 0x1f28, 0x1f29, 0x1f2a, 0x1f2b, + 0x1f2c, 0x1f2d, 0x1f2e, 0x1f2f, 0x1f28, 0x1f29, 0x1f2a, 0x1f2b, + 0x1f2c, 0x1f2d, 0x1f2e, 0x1f2f, 0x1f38, 0x1f39, 0x1f3a, 0x1f3b, + 0x1f3c, 0x1f3d, 0x1f3e, 0x1f3f, 0x1f38, 0x1f39, 0x1f3a, 0x1f3b, + 0x1f3c, 0x1f3d, 0x1f3e, 0x1f3f, 0x1f48, 0x1f49, 0x1f4a, 0x1f4b, + 0x1f4c, 0x1f4d, 0x1f46, 0x1f47, 0x1f48, 0x1f49, 0x1f4a, 0x1f4b, + 0x1f4c, 0x1f4d, 0x1f4e, 0x1f4f, 0x1f50, 0x1f59, 0x1f52, 0x1f5b, + 0x1f54, 0x1f5d, 0x1f56, 0x1f5f, 0x1f58, 0x1f59, 0x1f5a, 0x1f5b, + 0x1f5c, 0x1f5d, 0x1f5e, 0x1f5f, 0x1f68, 0x1f69, 0x1f6a, 0x1f6b, + 0x1f6c, 0x1f6d, 0x1f6e, 0x1f6f, 0x1f68, 0x1f69, 0x1f6a, 0x1f6b, + 0x1f6c, 0x1f6d, 0x1f6e, 0x1f6f, 0x1fba, 0x1fbb, 0x1fc8, 0x1fc9, + 0x1fca, 0x1fcb, 0x1fda, 0x1fdb, 0x1ff8, 0x1ff9, 0x1fea, 0x1feb, + 0x1ffa, 0x1ffb, 0x1f7e, 0x1f7f, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, + 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, + 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f98, 0x1f99, 0x1f9a, 0x1f9b, + 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1f98, 0x1f99, 0x1f9a, 0x1f9b, + 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1fa8, 0x1fa9, 0x1faa, 0x1fab, + 0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x1fa8, 0x1fa9, 0x1faa, 0x1fab, + 0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x1fb8, 0x1fb9, 0x1fb2, 0x1fbc, + 0x1fb4, 0x1fb5, 0x1fb6, 0x1fb7, 0x1fb8, 0x1fb9, 0x1fba, 0x1fbb, + 0x1fbc, 0x1fbd, 0x1fbe, 0x1fbf, 0x1fc0, 0x1fc1, 0x1fc2, 0x1fc3, + 0x1fc4, 0x1fc5, 0x1fc6, 0x1fc7, 0x1fc8, 0x1fc9, 0x1fca, 0x1fcb, + 0x1fc3, 0x1fcd, 0x1fce, 0x1fcf, 0x1fd8, 0x1fd9, 0x1fd2, 0x1fd3, + 0x1fd4, 0x1fd5, 0x1fd6, 0x1fd7, 0x1fd8, 0x1fd9, 0x1fda, 0x1fdb, + 0x1fdc, 0x1fdd, 0x1fde, 0x1fdf, 0x1fe8, 0x1fe9, 0x1fe2, 0x1fe3, + 0x1fe4, 0x1fec, 0x1fe6, 0x1fe7, 0x1fe8, 0x1fe9, 0x1fea, 0x1feb, + 0x1fec, 0x1fed, 0x1fee, 0x1fef, 0x1ff0, 0x1ff1, 0x1ff2, 0x1ff3, + 0x1ff4, 0x1ff5, 0x1ff6, 0x1ff7, 0x1ff8, 0x1ff9, 0x1ffa, 0x1ffb, + 0x1ff3, 0x1ffd, 0x1ffe, 0x1fff, 0x2000, 0x2001, 0x2002, 0x2003, + 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 0x200b, + 0x200c, 0x200d, 0x200e, 0x200f, 0x2010, 0x2011, 0x2012, 0x2013, + 0x2014, 0x2015, 0x2016, 0x2017, 0x2018, 0x2019, 0x201a, 0x201b, + 0x201c, 0x201d, 0x201e, 0x201f, 0x2020, 0x2021, 0x2022, 0x2023, + 0x2024, 0x2025, 0x2026, 0x2027, 0x2028, 0x2029, 0x202a, 0x202b, + 0x202c, 0x202d, 0x202e, 0x202f, 0x2030, 0x2031, 0x2032, 0x2033, + 0x2034, 0x2035, 0x2036, 0x2037, 0x2038, 0x2039, 0x203a, 0x203b, + 0x203c, 0x203d, 0x203e, 0x203f, 0x2040, 0x2041, 0x2042, 0x2043, + 0x2044, 0x2045, 0x2046, 0x2047, 0x2048, 0x2049, 0x204a, 0x204b, + 0x204c, 0x204d, 0x204e, 0x204f, 0x2050, 0x2051, 0x2052, 0x2053, + 0x2054, 0x2055, 0x2056, 0x2057, 0x2058, 0x2059, 0x205a, 0x205b, + 0x205c, 0x205d, 0x205e, 0x205f, 0x2060, 0x2061, 0x2062, 0x2063, + 0x2064, 0x2065, 0x2066, 0x2067, 0x2068, 0x2069, 0x206a, 0x206b, + 0x206c, 0x206d, 0x206e, 0x206f, 0x2070, 0x2071, 0x2072, 0x2073, + 0x2074, 0x2075, 0x2076, 0x2077, 0x2078, 0x2079, 0x207a, 0x207b, + 0x207c, 0x207d, 0x207e, 0x207f, 0x2080, 0x2081, 0x2082, 0x2083, + 0x2084, 0x2085, 0x2086, 0x2087, 0x2088, 0x2089, 0x208a, 0x208b, + 0x208c, 0x208d, 0x208e, 0x208f, 0x2090, 0x2091, 0x2092, 0x2093, + 0x2094, 0x2095, 0x2096, 0x2097, 0x2098, 0x2099, 0x209a, 0x209b, + 0x209c, 0x209d, 0x209e, 0x209f, 0x20a0, 0x20a1, 0x20a2, 0x20a3, + 0x20a4, 0x20a5, 0x20a6, 0x20a7, 0x20a8, 0x20a9, 0x20aa, 0x20ab, + 0x20ac, 0x20ad, 0x20ae, 0x20af, 0x20b0, 0x20b1, 0x20b2, 0x20b3, + 0x20b4, 0x20b5, 0x20b6, 0x20b7, 0x20b8, 0x20b9, 0x20ba, 0x20bb, + 0x20bc, 0x20bd, 0x20be, 0x20bf, 0x20c0, 0x20c1, 0x20c2, 0x20c3, + 0x20c4, 0x20c5, 0x20c6, 0x20c7, 0x20c8, 0x20c9, 0x20ca, 0x20cb, + 0x20cc, 0x20cd, 0x20ce, 0x20cf, 0x20d0, 0x20d1, 0x20d2, 0x20d3, + 0x20d4, 0x20d5, 0x20d6, 0x20d7, 0x20d8, 0x20d9, 0x20da, 0x20db, + 0x20dc, 0x20dd, 0x20de, 0x20df, 0x20e0, 0x20e1, 0x20e2, 0x20e3, + 0x20e4, 0x20e5, 0x20e6, 0x20e7, 0x20e8, 0x20e9, 0x20ea, 0x20eb, + 0x20ec, 0x20ed, 0x20ee, 0x20ef, 0x20f0, 0x20f1, 0x20f2, 0x20f3, + 0x20f4, 0x20f5, 0x20f6, 0x20f7, 0x20f8, 0x20f9, 0x20fa, 0x20fb, + 0x20fc, 0x20fd, 0x20fe, 0x20ff, 0x2100, 0x2101, 0x2102, 0x2103, + 0x2104, 0x2105, 0x2106, 0x2107, 0x2108, 0x2109, 0x210a, 0x210b, + 0x210c, 0x210d, 0x210e, 0x210f, 0x2110, 0x2111, 0x2112, 0x2113, + 0x2114, 0x2115, 0x2116, 0x2117, 0x2118, 0x2119, 0x211a, 0x211b, + 0x211c, 0x211d, 0x211e, 0x211f, 0x2120, 0x2121, 0x2122, 0x2123, + 0x2124, 0x2125, 0x2126, 0x2127, 0x2128, 0x2129, 0x212a, 0x212b, + 0x212c, 0x212d, 0x212e, 0x212f, 0x2130, 0x2131, 0x2132, 0x2133, + 0x2134, 0x2135, 0x2136, 0x2137, 0x2138, 0x2139, 0x213a, 0x213b, + 0x213c, 0x213d, 0x213e, 0x213f, 0x2140, 0x2141, 0x2142, 0x2143, + 0x2144, 0x2145, 0x2146, 0x2147, 0x2148, 0x2149, 0x214a, 0x214b, + 0x214c, 0x214d, 0x2132, 0x214f, 0x2150, 0x2151, 0x2152, 0x2153, + 0x2154, 0x2155, 0x2156, 0x2157, 0x2158, 0x2159, 0x215a, 0x215b, + 0x215c, 0x215d, 0x215e, 0x215f, 0x2160, 0x2161, 0x2162, 0x2163, + 0x2164, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0x216a, 0x216b, + 0x216c, 0x216d, 0x216e, 0x216f, 0x2160, 0x2161, 0x2162, 0x2163, + 0x2164, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0x216a, 0x216b, + 0x216c, 0x216d, 0x216e, 0x216f, 0x2180, 0x2181, 0x2182, 0x2183, + 0x2183, 0xffff, 0x034b, 0x24b6, 0x24b7, 0x24b8, 0x24b9, 0x24ba, + 0x24bb, 0x24bc, 0x24bd, 0x24be, 0x24bf, 0x24c0, 0x24c1, 0x24c2, + 0x24c3, 0x24c4, 0x24c5, 0x24c6, 0x24c7, 0x24c8, 0x24c9, 0x24ca, + 0x24cb, 0x24cc, 0x24cd, 0x24ce, 0x24cf, 0xffff, 0x0746, 0x2c00, + 0x2c01, 0x2c02, 0x2c03, 0x2c04, 0x2c05, 0x2c06, 0x2c07, 0x2c08, + 0x2c09, 0x2c0a, 0x2c0b, 0x2c0c, 0x2c0d, 0x2c0e, 0x2c0f, 0x2c10, + 0x2c11, 0x2c12, 0x2c13, 0x2c14, 0x2c15, 0x2c16, 0x2c17, 0x2c18, + 0x2c19, 0x2c1a, 0x2c1b, 0x2c1c, 0x2c1d, 0x2c1e, 0x2c1f, 0x2c20, + 0x2c21, 0x2c22, 0x2c23, 0x2c24, 0x2c25, 0x2c26, 0x2c27, 0x2c28, + 0x2c29, 0x2c2a, 0x2c2b, 0x2c2c, 0x2c2d, 0x2c2e, 0x2c5f, 0x2c60, + 0x2c60, 0x2c62, 0x2c63, 0x2c64, 0x2c65, 0x2c66, 0x2c67, 0x2c67, + 0x2c69, 0x2c69, 0x2c6b, 0x2c6b, 0x2c6d, 0x2c6e, 0x2c6f, 0x2c70, + 0x2c71, 0x2c72, 0x2c73, 0x2c74, 0x2c75, 0x2c75, 0x2c77, 0x2c78, + 0x2c79, 0x2c7a, 0x2c7b, 0x2c7c, 0x2c7d, 0x2c7e, 0x2c7f, 0x2c80, + 0x2c80, 0x2c82, 0x2c82, 0x2c84, 0x2c84, 0x2c86, 0x2c86, 0x2c88, + 0x2c88, 0x2c8a, 0x2c8a, 0x2c8c, 0x2c8c, 0x2c8e, 0x2c8e, 0x2c90, + 0x2c90, 0x2c92, 0x2c92, 0x2c94, 0x2c94, 0x2c96, 0x2c96, 0x2c98, + 0x2c98, 0x2c9a, 0x2c9a, 0x2c9c, 0x2c9c, 0x2c9e, 0x2c9e, 0x2ca0, + 0x2ca0, 0x2ca2, 0x2ca2, 0x2ca4, 0x2ca4, 0x2ca6, 0x2ca6, 0x2ca8, + 0x2ca8, 0x2caa, 0x2caa, 0x2cac, 0x2cac, 0x2cae, 0x2cae, 0x2cb0, + 0x2cb0, 0x2cb2, 0x2cb2, 0x2cb4, 0x2cb4, 0x2cb6, 0x2cb6, 0x2cb8, + 0x2cb8, 0x2cba, 0x2cba, 0x2cbc, 0x2cbc, 0x2cbe, 0x2cbe, 0x2cc0, + 0x2cc0, 0x2cc2, 0x2cc2, 0x2cc4, 0x2cc4, 0x2cc6, 0x2cc6, 0x2cc8, + 0x2cc8, 0x2cca, 0x2cca, 0x2ccc, 0x2ccc, 0x2cce, 0x2cce, 0x2cd0, + 0x2cd0, 0x2cd2, 0x2cd2, 0x2cd4, 0x2cd4, 0x2cd6, 0x2cd6, 0x2cd8, + 0x2cd8, 0x2cda, 0x2cda, 0x2cdc, 0x2cdc, 0x2cde, 0x2cde, 0x2ce0, + 0x2ce0, 0x2ce2, 0x2ce2, 0x2ce4, 0x2ce5, 0x2ce6, 0x2ce7, 0x2ce8, + 0x2ce9, 0x2cea, 0x2ceb, 0x2cec, 0x2ced, 0x2cee, 0x2cef, 0x2cf0, + 0x2cf1, 0x2cf2, 0x2cf3, 0x2cf4, 0x2cf5, 0x2cf6, 0x2cf7, 0x2cf8, + 0x2cf9, 0x2cfa, 0x2cfb, 0x2cfc, 0x2cfd, 0x2cfe, 0x2cff, 0x10a0, + 0x10a1, 0x10a2, 0x10a3, 0x10a4, 0x10a5, 0x10a6, 0x10a7, 0x10a8, + 0x10a9, 0x10aa, 0x10ab, 0x10ac, 0x10ad, 0x10ae, 0x10af, 0x10b0, + 0x10b1, 0x10b2, 0x10b3, 0x10b4, 0x10b5, 0x10b6, 0x10b7, 0x10b8, + 0x10b9, 0x10ba, 0x10bb, 0x10bc, 0x10bd, 0x10be, 0x10bf, 0x10c0, + 0x10c1, 0x10c2, 0x10c3, 0x10c4, 0x10c5, 0xffff, 0xd21b, 0xff21, + 0xff22, 0xff23, 0xff24, 0xff25, 0xff26, 0xff27, 0xff28, 0xff29, + 0xff2a, 0xff2b, 0xff2c, 0xff2d, 0xff2e, 0xff2f, 0xff30, 0xff31, + 0xff32, 0xff33, 0xff34, 0xff35, 0xff36, 0xff37, 0xff38, 0xff39, + 0xff3a, 0xff5b, 0xff5c, 0xff5d, 0xff5e, 0xff5f, 0xff60, 0xff61, + 0xff62, 0xff63, 0xff64, 0xff65, 0xff66, 0xff67, 0xff68, 0xff69, + 0xff6a, 0xff6b, 0xff6c, 0xff6d, 0xff6e, 0xff6f, 0xff70, 0xff71, + 0xff72, 0xff73, 0xff74, 0xff75, 0xff76, 0xff77, 0xff78, 0xff79, + 0xff7a, 0xff7b, 0xff7c, 0xff7d, 0xff7e, 0xff7f, 0xff80, 0xff81, + 0xff82, 0xff83, 0xff84, 0xff85, 0xff86, 0xff87, 0xff88, 0xff89, + 0xff8a, 0xff8b, 0xff8c, 0xff8d, 0xff8e, 0xff8f, 0xff90, 0xff91, + 0xff92, 0xff93, 0xff94, 0xff95, 0xff96, 0xff97, 0xff98, 0xff99, + 0xff9a, 0xff9b, 0xff9c, 0xff9d, 0xff9e, 0xff9f, 0xffa0, 0xffa1, + 0xffa2, 0xffa3, 0xffa4, 0xffa5, 0xffa6, 0xffa7, 0xffa8, 0xffa9, + 0xffaa, 0xffab, 0xffac, 0xffad, 0xffae, 0xffaf, 0xffb0, 0xffb1, + 0xffb2, 0xffb3, 0xffb4, 0xffb5, 0xffb6, 0xffb7, 0xffb8, 0xffb9, + 0xffba, 0xffbb, 0xffbc, 0xffbd, 0xffbe, 0xffbf, 0xffc0, 0xffc1, + 0xffc2, 0xffc3, 0xffc4, 0xffc5, 0xffc6, 0xffc7, 0xffc8, 0xffc9, + 0xffca, 0xffcb, 0xffcc, 0xffcd, 0xffce, 0xffcf, 0xffd0, 0xffd1, + 0xffd2, 0xffd3, 0xffd4, 0xffd5, 0xffd6, 0xffd7, 0xffd8, 0xffd9, + 0xffda, 0xffdb, 0xffdc, 0xffdd, 0xffde, 0xffdf, 0xffe0, 0xffe1, + 0xffe2, 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe7, 0xffe8, 0xffe9, + 0xffea, 0xffeb, 0xffec, 0xffed, 0xffee, 0xffef, 0xfff0, 0xfff1, + 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7, 0xfff8, 0xfff9, + 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff, +}; + +/* + * Allow full-width illegal characters : + * "MS windows 7" supports full-width-invalid-name-characters. + * So we should check half-width-invalid-name-characters(ASCII) only + * for compatibility. + * + * " * / : < > ? \ | + */ +static unsigned short bad_uni_chars[] = { + 0x0022, 0x002A, 0x002F, 0x003A, + 0x003C, 0x003E, 0x003F, 0x005C, 0x007C, + 0 +}; + +static int exfat_convert_char_to_ucs2(struct nls_table *nls, + const unsigned char *ch, int ch_len, unsigned short *ucs2, + int *lossy) +{ + int len; + + *ucs2 = 0x0; + + if (ch[0] < 0x80) { + *ucs2 = ch[0]; + return 1; + } + + len = nls->char2uni(ch, ch_len, ucs2); + if (len < 0) { + /* conversion failed */ + if (lossy != NULL) + *lossy |= NLS_NAME_LOSSY; + *ucs2 = '_'; + return 1; + } + return len; +} + +static int exfat_convert_ucs2_to_char(struct nls_table *nls, + unsigned short ucs2, unsigned char *ch, int *lossy) +{ + int len; + + ch[0] = 0x0; + + if (ucs2 < 0x0080) { + ch[0] = ucs2; + return 1; + } + + len = nls->uni2char(ucs2, ch, MAX_CHARSET_SIZE); + if (len < 0) { + /* conversion failed */ + if (lossy != NULL) + *lossy |= NLS_NAME_LOSSY; + ch[0] = '_'; + return 1; + } + return len; +} + +unsigned short exfat_toupper(struct super_block *sb, unsigned short a) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + return sbi->vol_utbl[a] ? sbi->vol_utbl[a] : a; +} + +static unsigned short *exfat_wstrchr(unsigned short *str, unsigned short wchar) +{ + while (*str) { + if (*(str++) == wchar) + return str; + } + return NULL; +} + +int exfat_uniname_ncmp(struct super_block *sb, unsigned short *a, + unsigned short *b, unsigned int len) +{ + int i; + + for (i = 0; i < len; i++, a++, b++) + if (exfat_toupper(sb, *a) != exfat_toupper(sb, *b)) + return 1; + return 0; +} + +static int exfat_utf16_to_utf8(struct super_block *sb, + struct exfat_uni_name *p_uniname, unsigned char *p_cstring, + int buflen) +{ + int len; + const unsigned short *uniname = p_uniname->name; + + /* always len >= 0 */ + len = utf16s_to_utf8s(uniname, MAX_NAME_LENGTH, UTF16_HOST_ENDIAN, + p_cstring, buflen); + p_cstring[len] = '\0'; + return len; +} + +static int exfat_utf8_to_utf16(struct super_block *sb, + const unsigned char *p_cstring, const int len, + struct exfat_uni_name *p_uniname, int *p_lossy) +{ + int i, unilen, lossy = NLS_NAME_NO_LOSSY; + __le16 upname[MAX_NAME_LENGTH + 1]; + unsigned short *uniname = p_uniname->name; + + WARN_ON(!len); + + unilen = utf8s_to_utf16s(p_cstring, len, UTF16_HOST_ENDIAN, + (wchar_t *)uniname, MAX_NAME_LENGTH + 2); + if (unilen < 0) { + exfat_err(sb, "failed to %s (err : %d) nls len : %d", + __func__, unilen, len); + return unilen; + } + + if (unilen > MAX_NAME_LENGTH) { + exfat_debug(sb, "failed to %s (estr:ENAMETOOLONG) nls len : %d, unilen : %d > %d", + __func__, len, unilen, MAX_NAME_LENGTH); + return -ENAMETOOLONG; + } + + for (i = 0; i < unilen; i++) { + if (*uniname < 0x0020 || + exfat_wstrchr(bad_uni_chars, *uniname)) + lossy |= NLS_NAME_LOSSY; + + upname[i] = cpu_to_le16(exfat_toupper(sb, *uniname)); + uniname++; + } + + *uniname = '\0'; + p_uniname->name_len = unilen; + p_uniname->name_hash = exfat_calc_chksum16(upname, unilen << 1, 0, + CS_DEFAULT); + + if (p_lossy) + *p_lossy = lossy; + return unilen; +} + +#define SURROGATE_MASK 0xfffff800 +#define SURROGATE_PAIR 0x0000d800 +#define SURROGATE_LOW 0x00000400 + +static int __exfat_utf16_to_nls(struct super_block *sb, + struct exfat_uni_name *p_uniname, unsigned char *p_cstring, + int buflen) +{ + int i, j, len, out_len = 0; + unsigned char buf[MAX_CHARSET_SIZE]; + const unsigned short *uniname = p_uniname->name; + struct nls_table *nls = EXFAT_SB(sb)->nls_io; + + i = 0; + while (i < MAX_NAME_LENGTH && out_len < (buflen - 1)) { + if (*uniname == '\0') + break; + if ((*uniname & SURROGATE_MASK) != SURROGATE_PAIR) { + len = exfat_convert_ucs2_to_char(nls, *uniname, buf, + NULL); + } else { + /* Process UTF-16 surrogate pair as one character */ + if (!(*uniname & SURROGATE_LOW) && + i+1 < MAX_NAME_LENGTH && + (*(uniname+1) & SURROGATE_MASK) == SURROGATE_PAIR && + (*(uniname+1) & SURROGATE_LOW)) { + uniname++; + i++; + } + + /* + * UTF-16 surrogate pair encodes code points above + * U+FFFF. Code points above U+FFFF are not supported + * by kernel NLS framework therefore use replacement + * character + */ + len = 1; + buf[0] = '_'; + } + + if (out_len + len >= buflen) + len = buflen - 1 - out_len; + out_len += len; + + if (len > 1) { + for (j = 0; j < len; j++) + *p_cstring++ = buf[j]; + } else { /* len == 1 */ + *p_cstring++ = *buf; + } + + uniname++; + i++; + } + + *p_cstring = '\0'; + return out_len; +} + +static int exfat_nls_to_ucs2(struct super_block *sb, + const unsigned char *p_cstring, const int len, + struct exfat_uni_name *p_uniname, int *p_lossy) +{ + int i = 0, unilen = 0, lossy = NLS_NAME_NO_LOSSY; + __le16 upname[MAX_NAME_LENGTH + 1]; + unsigned short *uniname = p_uniname->name; + struct nls_table *nls = EXFAT_SB(sb)->nls_io; + + WARN_ON(!len); + + while (unilen < MAX_NAME_LENGTH && i < len) { + i += exfat_convert_char_to_ucs2(nls, p_cstring + i, len - i, + uniname, &lossy); + + if (*uniname < 0x0020 || + exfat_wstrchr(bad_uni_chars, *uniname)) + lossy |= NLS_NAME_LOSSY; + + upname[unilen] = cpu_to_le16(exfat_toupper(sb, *uniname)); + uniname++; + unilen++; + } + + if (p_cstring[i] != '\0') + lossy |= NLS_NAME_OVERLEN; + + *uniname = '\0'; + p_uniname->name_len = unilen; + p_uniname->name_hash = exfat_calc_chksum16(upname, unilen << 1, 0, + CS_DEFAULT); + + if (p_lossy) + *p_lossy = lossy; + return unilen; +} + +int exfat_utf16_to_nls(struct super_block *sb, struct exfat_uni_name *uniname, + unsigned char *p_cstring, int buflen) +{ + if (EXFAT_SB(sb)->options.utf8) + return exfat_utf16_to_utf8(sb, uniname, p_cstring, + buflen); + return __exfat_utf16_to_nls(sb, uniname, p_cstring, buflen); +} + +int exfat_nls_to_utf16(struct super_block *sb, const unsigned char *p_cstring, + const int len, struct exfat_uni_name *uniname, int *p_lossy) +{ + if (EXFAT_SB(sb)->options.utf8) + return exfat_utf8_to_utf16(sb, p_cstring, len, + uniname, p_lossy); + return exfat_nls_to_ucs2(sb, p_cstring, len, uniname, p_lossy); +} + +static int exfat_load_upcase_table(struct super_block *sb, + sector_t sector, unsigned long long num_sectors, + unsigned int utbl_checksum) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + unsigned int sect_size = sb->s_blocksize; + unsigned int i, index = 0; + u32 chksum = 0; + int ret; + unsigned char skip = false; + unsigned short *upcase_table; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0) + upcase_table = kvcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL); +#else + upcase_table = vzalloc(UTBL_COUNT * sizeof(unsigned short)); +#endif + if (!upcase_table) + return -ENOMEM; + + sbi->vol_utbl = upcase_table; + num_sectors += sector; + + while (sector < num_sectors) { + struct buffer_head *bh; + + bh = sb_bread(sb, sector); + if (!bh) { + exfat_err(sb, "failed to read sector(0x%llx)", + (unsigned long long)sector); + ret = -EIO; + goto free_table; + } + sector++; + for (i = 0; i < sect_size && index <= 0xFFFF; i += 2) { + unsigned short uni = get_unaligned_le16(bh->b_data + i); + + if (skip) { + index += uni; + skip = false; + } else if (uni == index) { + index++; + } else if (uni == 0xFFFF) { + skip = true; + } else { /* uni != index , uni != 0xFFFF */ + upcase_table[index] = uni; + index++; + } + } + chksum = exfat_calc_chksum32(bh->b_data, i, chksum, CS_DEFAULT); + brelse(bh); + } + + if (index >= 0xFFFF && utbl_checksum == chksum) + return 0; + + exfat_err(sb, "failed to load upcase table (idx : 0x%08x, chksum : 0x%08x, utbl_chksum : 0x%08x)", + index, chksum, utbl_checksum); + ret = -EINVAL; +free_table: + exfat_free_upcase_table(sbi); + return ret; +} + +static int exfat_load_default_upcase_table(struct super_block *sb) +{ + int i, ret = -EIO; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + unsigned char skip = false; + unsigned short uni = 0, *upcase_table; + unsigned int index = 0; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0) + upcase_table = kvcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL); +#else + upcase_table = vzalloc(UTBL_COUNT * sizeof(unsigned short)); +#endif + if (!upcase_table) + return -ENOMEM; + + sbi->vol_utbl = upcase_table; + + for (i = 0; index <= 0xFFFF && i < EXFAT_NUM_UPCASE; i++) { + uni = uni_def_upcase[i]; + if (skip) { + index += uni; + skip = false; + } else if (uni == index) { + index++; + } else if (uni == 0xFFFF) { + skip = true; + } else { + upcase_table[index] = uni; + index++; + } + } + + if (index >= 0xFFFF) + return 0; + + /* FATAL error: default upcase table has error */ + exfat_free_upcase_table(sbi); + return ret; +} + +int exfat_create_upcase_table(struct super_block *sb) +{ + int i, ret; + unsigned int tbl_clu, type; + sector_t sector; + unsigned long long tbl_size, num_sectors; + unsigned char blksize_bits = sb->s_blocksize_bits; + struct exfat_chain clu; + struct exfat_dentry *ep; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct buffer_head *bh; + + clu.dir = sbi->root_dir; + clu.flags = ALLOC_FAT_CHAIN; + + while (clu.dir != EXFAT_EOF_CLUSTER) { + for (i = 0; i < sbi->dentries_per_clu; i++) { + ep = exfat_get_dentry(sb, &clu, i, &bh); + if (!ep) + return -EIO; + + type = exfat_get_entry_type(ep); + if (type == TYPE_UNUSED) { + brelse(bh); + break; + } + + if (type != TYPE_UPCASE) { + brelse(bh); + continue; + } + + tbl_clu = le32_to_cpu(ep->dentry.upcase.start_clu); + tbl_size = le64_to_cpu(ep->dentry.upcase.size); + + sector = exfat_cluster_to_sector(sbi, tbl_clu); + num_sectors = ((tbl_size - 1) >> blksize_bits) + 1; + ret = exfat_load_upcase_table(sb, sector, num_sectors, + le32_to_cpu(ep->dentry.upcase.checksum)); + + brelse(bh); + if (ret && ret != -EIO) + goto load_default; + + /* load successfully */ + return ret; + } + + if (exfat_get_next_cluster(sb, &(clu.dir))) + return -EIO; + } + +load_default: + /* load default upcase table */ + return exfat_load_default_upcase_table(sb); +} + +void exfat_free_upcase_table(struct exfat_sb_info *sbi) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0) + kvfree(sbi->vol_utbl); +#else + vfree(sbi->vol_utbl); +#endif +} diff --git a/fs/exfat/super.c b/fs/exfat/super.c new file mode 100644 index 000000000000..c5d55b0308a9 --- /dev/null +++ b/fs/exfat/super.c @@ -0,0 +1,1178 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. + */ + +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) +#include +#include +#else +#include +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) +#include +#endif + +#include "exfat_raw.h" +#include "exfat_fs.h" + +#ifndef CONFIG_EXFAT_DEFAULT_IOCHARSET /* if Kconfig lacked iocharset */ +#define CONFIG_EXFAT_DEFAULT_IOCHARSET "utf8" +#endif + +static char exfat_default_iocharset[] = CONFIG_EXFAT_DEFAULT_IOCHARSET; +static struct kmem_cache *exfat_inode_cachep; + +static void exfat_free_iocharset(struct exfat_sb_info *sbi) +{ + if (sbi->options.iocharset != exfat_default_iocharset) + kfree(sbi->options.iocharset); +} + +static void exfat_delayed_free(struct rcu_head *p) +{ + struct exfat_sb_info *sbi = container_of(p, struct exfat_sb_info, rcu); + + unload_nls(sbi->nls_io); + exfat_free_iocharset(sbi); + exfat_free_upcase_table(sbi); + kfree(sbi); +} + +static void exfat_put_super(struct super_block *sb) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + mutex_lock(&sbi->s_lock); + exfat_free_bitmap(sbi); + brelse(sbi->boot_bh); + mutex_unlock(&sbi->s_lock); + + call_rcu(&sbi->rcu, exfat_delayed_free); +} + +static int exfat_sync_fs(struct super_block *sb, int wait) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + int err = 0; + + if (!wait) + return 0; + + /* If there are some dirty buffers in the bdev inode */ + mutex_lock(&sbi->s_lock); + sync_blockdev(sb->s_bdev); + if (exfat_clear_volume_dirty(sb)) + err = -EIO; + mutex_unlock(&sbi->s_lock); + return err; +} + +static int exfat_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct super_block *sb = dentry->d_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + unsigned long long id = huge_encode_dev(sb->s_bdev->bd_dev); + + if (sbi->used_clusters == EXFAT_CLUSTERS_UNTRACKED) { + mutex_lock(&sbi->s_lock); + if (exfat_count_used_clusters(sb, &sbi->used_clusters)) { + mutex_unlock(&sbi->s_lock); + return -EIO; + } + mutex_unlock(&sbi->s_lock); + } + + buf->f_type = sb->s_magic; + buf->f_bsize = sbi->cluster_size; + buf->f_blocks = sbi->num_clusters - 2; /* clu 0 & 1 */ + buf->f_bfree = buf->f_blocks - sbi->used_clusters; + buf->f_bavail = buf->f_bfree; + buf->f_fsid.val[0] = (unsigned int)id; + buf->f_fsid.val[1] = (unsigned int)(id >> 32); + /* Unicode utf16 255 characters */ + buf->f_namelen = EXFAT_MAX_FILE_LEN * NLS_MAX_CHARSET_SIZE; + return 0; +} + +static int exfat_set_vol_flags(struct super_block *sb, unsigned short new_flags) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct boot_sector *p_boot = (struct boot_sector *)sbi->boot_bh->b_data; + + /* retain persistent-flags */ + new_flags |= sbi->vol_flags_persistent; + + /* flags are not changed */ + if (sbi->vol_flags == new_flags) + return 0; + + sbi->vol_flags = new_flags; + + /* skip updating volume dirty flag, + * if this volume has been mounted with read-only + */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0) + if (sb_rdonly(sb)) +#else + if (sb->s_flags & MS_RDONLY) +#endif + return 0; + + p_boot->vol_flags = cpu_to_le16(new_flags); + + set_buffer_uptodate(sbi->boot_bh); + mark_buffer_dirty(sbi->boot_bh); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) + __sync_dirty_buffer(sbi->boot_bh, REQ_SYNC | REQ_FUA | REQ_PREFLUSH); +#else + __sync_dirty_buffer(sbi->boot_bh, WRITE_FLUSH_FUA); +#endif + + return 0; +} + +int exfat_set_volume_dirty(struct super_block *sb) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + return exfat_set_vol_flags(sb, sbi->vol_flags | VOLUME_DIRTY); +} + +int exfat_clear_volume_dirty(struct super_block *sb) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + return exfat_set_vol_flags(sb, sbi->vol_flags & ~VOLUME_DIRTY); +} + +static int exfat_show_options(struct seq_file *m, struct dentry *root) +{ + struct super_block *sb = root->d_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_mount_options *opts = &sbi->options; + + /* Show partition info */ + if (!uid_eq(opts->fs_uid, GLOBAL_ROOT_UID)) + seq_printf(m, ",uid=%u", + from_kuid_munged(&init_user_ns, opts->fs_uid)); + if (!gid_eq(opts->fs_gid, GLOBAL_ROOT_GID)) + seq_printf(m, ",gid=%u", + from_kgid_munged(&init_user_ns, opts->fs_gid)); + seq_printf(m, ",fmask=%04o,dmask=%04o", opts->fs_fmask, opts->fs_dmask); + if (opts->allow_utime) + seq_printf(m, ",allow_utime=%04o", opts->allow_utime); + if (opts->utf8) + seq_puts(m, ",iocharset=utf8"); + else if (sbi->nls_io) + seq_printf(m, ",iocharset=%s", sbi->nls_io->charset); + if (opts->errors == EXFAT_ERRORS_CONT) + seq_puts(m, ",errors=continue"); + else if (opts->errors == EXFAT_ERRORS_PANIC) + seq_puts(m, ",errors=panic"); + else + seq_puts(m, ",errors=remount-ro"); + if (opts->discard) + seq_puts(m, ",discard"); + if (opts->keep_last_dots) + seq_puts(m, ",keep_last_dots"); + if (opts->sys_tz) + seq_puts(m, ",sys_tz"); + else if (opts->time_offset) + seq_printf(m, ",time_offset=%d", opts->time_offset); + return 0; +} + +static struct inode *exfat_alloc_inode(struct super_block *sb) +{ + struct exfat_inode_info *ei; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 18, 0) + ei = alloc_inode_sb(sb, exfat_inode_cachep, GFP_NOFS); +#else + ei = kmem_cache_alloc(exfat_inode_cachep, GFP_NOFS); +#endif + if (!ei) + return NULL; + + init_rwsem(&ei->truncate_lock); + return &ei->vfs_inode; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 4, 0) +static void exfat_free_inode(struct inode *inode) +{ + kmem_cache_free(exfat_inode_cachep, EXFAT_I(inode)); +} +#else +static void exfat_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + kmem_cache_free(exfat_inode_cachep, EXFAT_I(inode)); +} + +static void exfat_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, exfat_i_callback); +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 6, 0) +static int exfat_remount(struct super_block *sb, int *flags, char *data) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0) + *flags |= SB_NODIRATIME; +#else + *flags |= MS_NODIRATIME; +#endif + + sync_filesystem(sb); + return 0; +} +#endif + +static const struct super_operations exfat_sops = { + .alloc_inode = exfat_alloc_inode, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 4, 0) + .free_inode = exfat_free_inode, +#else + .destroy_inode = exfat_destroy_inode, +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 6, 0) + .remount_fs = exfat_remount, +#endif + .write_inode = exfat_write_inode, + .evict_inode = exfat_evict_inode, + .put_super = exfat_put_super, + .sync_fs = exfat_sync_fs, + .statfs = exfat_statfs, + .show_options = exfat_show_options, +}; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) +enum { + Opt_uid, + Opt_gid, + Opt_umask, + Opt_dmask, + Opt_fmask, + Opt_allow_utime, + Opt_charset, + Opt_errors, + Opt_discard, + Opt_keep_last_dots, + Opt_sys_tz, + Opt_time_offset, + + /* Deprecated options */ + Opt_utf8, + Opt_debug, + Opt_namecase, + Opt_codepage, +}; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) +static const struct constant_table exfat_param_enums[] = { + { "continue", EXFAT_ERRORS_CONT }, + { "panic", EXFAT_ERRORS_PANIC }, + { "remount-ro", EXFAT_ERRORS_RO }, + {} +}; +#else +static const struct fs_parameter_enum exfat_param_enums[] = { + { Opt_errors, "continue", EXFAT_ERRORS_CONT }, + { Opt_errors, "panic", EXFAT_ERRORS_PANIC }, + { Opt_errors, "remount-ro", EXFAT_ERRORS_RO }, + {} +}; +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) +static const struct fs_parameter_spec exfat_parameters[] = { +#else +static const struct fs_parameter_spec exfat_param_specs[] = { +#endif + fsparam_u32("uid", Opt_uid), + fsparam_u32("gid", Opt_gid), + fsparam_u32oct("umask", Opt_umask), + fsparam_u32oct("dmask", Opt_dmask), + fsparam_u32oct("fmask", Opt_fmask), + fsparam_u32oct("allow_utime", Opt_allow_utime), + fsparam_string("iocharset", Opt_charset), +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) + fsparam_enum("errors", Opt_errors, exfat_param_enums), +#else + fsparam_enum("errors", Opt_errors), +#endif + fsparam_flag("discard", Opt_discard), + fsparam_flag("keep_last_dots", Opt_keep_last_dots), + fsparam_flag("sys_tz", Opt_sys_tz), + fsparam_s32("time_offset", Opt_time_offset), + __fsparam(NULL, "utf8", Opt_utf8, fs_param_deprecated, + NULL), + __fsparam(NULL, "debug", Opt_debug, fs_param_deprecated, + NULL), + __fsparam(fs_param_is_u32, "namecase", Opt_namecase, + fs_param_deprecated, NULL), + __fsparam(fs_param_is_u32, "codepage", Opt_codepage, + fs_param_deprecated, NULL), + {} +}; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(5, 6, 0) +static const struct fs_parameter_description exfat_parameters = { + .name = "exfat", + .specs = exfat_param_specs, + .enums = exfat_param_enums, +}; +#endif + +static int exfat_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct exfat_sb_info *sbi = fc->s_fs_info; + struct exfat_mount_options *opts = &sbi->options; + struct fs_parse_result result; + int opt; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) + opt = fs_parse(fc, exfat_parameters, param, &result); +#else + opt = fs_parse(fc, &exfat_parameters, param, &result); +#endif + if (opt < 0) + return opt; + + switch (opt) { + case Opt_uid: + opts->fs_uid = make_kuid(current_user_ns(), result.uint_32); + break; + case Opt_gid: + opts->fs_gid = make_kgid(current_user_ns(), result.uint_32); + break; + case Opt_umask: + opts->fs_fmask = result.uint_32; + opts->fs_dmask = result.uint_32; + break; + case Opt_dmask: + opts->fs_dmask = result.uint_32; + break; + case Opt_fmask: + opts->fs_fmask = result.uint_32; + break; + case Opt_allow_utime: + opts->allow_utime = result.uint_32 & 0022; + break; + case Opt_charset: + exfat_free_iocharset(sbi); + opts->iocharset = param->string; + param->string = NULL; + break; + case Opt_errors: + opts->errors = result.uint_32; + break; + case Opt_discard: + opts->discard = 1; + break; + case Opt_keep_last_dots: + opts->keep_last_dots = 1; + break; + case Opt_sys_tz: + opts->sys_tz = 1; + break; + case Opt_time_offset: + /* + * Make the limit 24 just in case someone invents something + * unusual. + */ + if (result.int_32 < -24 * 60 || result.int_32 > 24 * 60) + return -EINVAL; + opts->time_offset = result.int_32; + break; + case Opt_utf8: + case Opt_debug: + case Opt_namecase: + case Opt_codepage: + break; + default: + return -EINVAL; + } + + return 0; +} +#else +enum { + Opt_uid, + Opt_gid, + Opt_umask, + Opt_dmask, + Opt_fmask, + Opt_allow_utime, + Opt_charset, + Opt_err_cont, + Opt_err_panic, + Opt_err_ro, + Opt_err, + Opt_discard, + Opt_time_offset, + + /* Deprecated options */ + Opt_utf8, + Opt_debug, + Opt_namecase, + Opt_codepage, + Opt_fs, +}; + +static const match_table_t exfat_tokens = { + {Opt_uid, "uid=%u"}, + {Opt_gid, "gid=%u"}, + {Opt_umask, "umask=%o"}, + {Opt_dmask, "dmask=%o"}, + {Opt_fmask, "fmask=%o"}, + {Opt_allow_utime, "allow_utime=%o"}, + {Opt_charset, "iocharset=%s"}, + {Opt_err_cont, "errors=continue"}, + {Opt_err_panic, "errors=panic"}, + {Opt_err_ro, "errors=remount-ro"}, + {Opt_discard, "discard"}, + {Opt_codepage, "codepage=%u"}, + {Opt_namecase, "namecase=%u"}, + {Opt_debug, "debug"}, + {Opt_utf8, "utf8"}, + {Opt_err, NULL} +}; + +static int parse_options(struct super_block *sb, char *options, int silent, + struct exfat_mount_options *opts) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + char *tmpstr; + + opts->fs_uid = current_uid(); + opts->fs_gid = current_gid(); + opts->fs_fmask = opts->fs_dmask = current->fs->umask; + opts->allow_utime = -1; + opts->iocharset = exfat_default_iocharset; + opts->utf8 = 0; + opts->errors = EXFAT_ERRORS_RO; + opts->discard = 0; + + if (!options) + goto out; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + + if (!*p) + continue; + token = match_token(p, exfat_tokens, args); + switch (token) { + case Opt_uid: + if (match_int(&args[0], &option)) + return 0; + opts->fs_uid = make_kuid(current_user_ns(), option); + break; + case Opt_gid: + if (match_int(&args[0], &option)) + return 0; + opts->fs_gid = make_kgid(current_user_ns(), option); + break; + case Opt_umask: + case Opt_dmask: + case Opt_fmask: + if (match_octal(&args[0], &option)) + return 0; + if (token != Opt_dmask) + opts->fs_fmask = option; + if (token != Opt_fmask) + opts->fs_dmask = option; + break; + case Opt_allow_utime: + if (match_octal(&args[0], &option)) + return 0; + opts->allow_utime = option & (0022); + break; + case Opt_charset: + if (opts->iocharset != exfat_default_iocharset) + kfree(opts->iocharset); + tmpstr = match_strdup(&args[0]); + if (!tmpstr) + return -ENOMEM; + opts->iocharset = tmpstr; + break; + case Opt_err_cont: + opts->errors = EXFAT_ERRORS_CONT; + break; + case Opt_err_panic: + opts->errors = EXFAT_ERRORS_PANIC; + break; + case Opt_err_ro: + opts->errors = EXFAT_ERRORS_RO; + break; + case Opt_discard: + opts->discard = 1; + break; + case Opt_time_offset: + if (match_int(&args[0], &option)) + return -EINVAL; + /* + * Make the limit 24 just in case someone + * invents something unusual. + */ + if (option < -24 * 60 || option > 24 * 60) + return -EINVAL; + opts->time_offset = option; + break; + case Opt_utf8: + case Opt_debug: + case Opt_namecase: + case Opt_codepage: + break; + default: + if (!silent) { + exfat_err(sb, + "unrecognized mount option \"%s\" or missing value", + p); + } + return -EINVAL; + } + } + +out: + if (opts->allow_utime == -1) + opts->allow_utime = ~opts->fs_dmask & (0022); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) + if (opts->discard && !bdev_max_discard_sectors(sb->s_bdev)) { + exfat_warn(sb, "mounting with \"discard\" option, but the device does not support discard"); + opts->discard = 0; + } +#else + if (opts->discard) { + struct request_queue *q = bdev_get_queue(sb->s_bdev); + + if (!blk_queue_discard(q)) { + exfat_warn(sb, "mounting with \"discard\" option, but the device does not support discard"); + opts->discard = 0; + } + } +#endif + + return 0; +} +#endif + + +static void exfat_hash_init(struct super_block *sb) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + int i; + + spin_lock_init(&sbi->inode_hash_lock); + for (i = 0; i < EXFAT_HASH_SIZE; i++) + INIT_HLIST_HEAD(&sbi->inode_hashtable[i]); +} + +static int exfat_read_root(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *ei = EXFAT_I(inode); + struct exfat_chain cdir; + int num_subdirs, num_clu = 0; + + exfat_chain_set(&ei->dir, sbi->root_dir, 0, ALLOC_FAT_CHAIN); + ei->entry = -1; + ei->start_clu = sbi->root_dir; + ei->flags = ALLOC_FAT_CHAIN; + ei->type = TYPE_DIR; + ei->version = 0; + ei->hint_bmap.off = EXFAT_EOF_CLUSTER; + ei->hint_stat.eidx = 0; + ei->hint_stat.clu = sbi->root_dir; + ei->hint_femp.eidx = EXFAT_HINT_NONE; + + exfat_chain_set(&cdir, sbi->root_dir, 0, ALLOC_FAT_CHAIN); + if (exfat_count_num_clusters(sb, &cdir, &num_clu)) + return -EIO; + i_size_write(inode, num_clu << sbi->cluster_size_bits); + + num_subdirs = exfat_count_dir_entries(sb, &cdir); + if (num_subdirs < 0) + return -EIO; + set_nlink(inode, num_subdirs + EXFAT_MIN_SUBDIR); + + inode->i_uid = sbi->options.fs_uid; + inode->i_gid = sbi->options.fs_gid; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) + inode_inc_iversion(inode); +#else + inode->i_version++; +#endif + inode->i_generation = 0; + inode->i_mode = exfat_make_mode(sbi, ATTR_SUBDIR, 0777); + inode->i_op = &exfat_dir_inode_operations; + inode->i_fop = &exfat_dir_operations; + + inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> + inode->i_blkbits; + ei->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff; + ei->i_size_aligned = i_size_read(inode); + ei->i_size_ondisk = i_size_read(inode); + + exfat_save_attr(inode, ATTR_SUBDIR); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = + current_time(inode); +#else + inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = + CURRENT_TIME_SEC; +#endif + exfat_truncate_atime(&inode->i_atime); + return 0; +} + +static int exfat_calibrate_blocksize(struct super_block *sb, int logical_sect) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + if (!is_power_of_2(logical_sect)) { + exfat_err(sb, "bogus logical sector size %u", logical_sect); + return -EIO; + } + + if (logical_sect < sb->s_blocksize) { + exfat_err(sb, "logical sector size too small for device (logical sector size = %u)", + logical_sect); + return -EIO; + } + + if (logical_sect > sb->s_blocksize) { + brelse(sbi->boot_bh); + sbi->boot_bh = NULL; + + if (!sb_set_blocksize(sb, logical_sect)) { + exfat_err(sb, "unable to set blocksize %u", + logical_sect); + return -EIO; + } + sbi->boot_bh = sb_bread(sb, 0); + if (!sbi->boot_bh) { + exfat_err(sb, "unable to read boot sector (logical sector size = %lu)", + sb->s_blocksize); + return -EIO; + } + } + return 0; +} + +static int exfat_read_boot_sector(struct super_block *sb) +{ + struct boot_sector *p_boot; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + /* set block size to read super block */ + sb_min_blocksize(sb, 512); + + /* read boot sector */ + sbi->boot_bh = sb_bread(sb, 0); + if (!sbi->boot_bh) { + exfat_err(sb, "unable to read boot sector"); + return -EIO; + } + p_boot = (struct boot_sector *)sbi->boot_bh->b_data; + + /* check the validity of BOOT */ + if (le16_to_cpu((p_boot->signature)) != BOOT_SIGNATURE) { + exfat_err(sb, "invalid boot record signature"); + return -EINVAL; + } + + if (memcmp(p_boot->fs_name, STR_EXFAT, BOOTSEC_FS_NAME_LEN)) { + exfat_err(sb, "invalid fs_name"); /* fs_name may unprintable */ + return -EINVAL; + } + + /* + * must_be_zero field must be filled with zero to prevent mounting + * from FAT volume. + */ + if (memchr_inv(p_boot->must_be_zero, 0, sizeof(p_boot->must_be_zero))) + return -EINVAL; + + if (p_boot->num_fats != 1 && p_boot->num_fats != 2) { + exfat_err(sb, "bogus number of FAT structure"); + return -EINVAL; + } + + /* + * sect_size_bits could be at least 9 and at most 12. + */ + if (p_boot->sect_size_bits < EXFAT_MIN_SECT_SIZE_BITS || + p_boot->sect_size_bits > EXFAT_MAX_SECT_SIZE_BITS) { + exfat_err(sb, "bogus sector size bits : %u", + p_boot->sect_size_bits); + return -EINVAL; + } + + /* + * sect_per_clus_bits could be at least 0 and at most 25 - sect_size_bits. + */ + if (p_boot->sect_per_clus_bits > EXFAT_MAX_SECT_PER_CLUS_BITS(p_boot)) { + exfat_err(sb, "bogus sectors bits per cluster : %u", + p_boot->sect_per_clus_bits); + return -EINVAL; + } + + sbi->sect_per_clus = 1 << p_boot->sect_per_clus_bits; + sbi->sect_per_clus_bits = p_boot->sect_per_clus_bits; + sbi->cluster_size_bits = p_boot->sect_per_clus_bits + + p_boot->sect_size_bits; + sbi->cluster_size = 1 << sbi->cluster_size_bits; + sbi->num_FAT_sectors = le32_to_cpu(p_boot->fat_length); + sbi->FAT1_start_sector = le32_to_cpu(p_boot->fat_offset); + sbi->FAT2_start_sector = le32_to_cpu(p_boot->fat_offset); + if (p_boot->num_fats == 2) + sbi->FAT2_start_sector += sbi->num_FAT_sectors; + sbi->data_start_sector = le32_to_cpu(p_boot->clu_offset); + sbi->num_sectors = le64_to_cpu(p_boot->vol_length); + /* because the cluster index starts with 2 */ + sbi->num_clusters = le32_to_cpu(p_boot->clu_count) + + EXFAT_RESERVED_CLUSTERS; + + sbi->root_dir = le32_to_cpu(p_boot->root_cluster); + sbi->dentries_per_clu = 1 << + (sbi->cluster_size_bits - DENTRY_SIZE_BITS); + + sbi->vol_flags = le16_to_cpu(p_boot->vol_flags); + sbi->vol_flags_persistent = sbi->vol_flags & (VOLUME_DIRTY | MEDIA_FAILURE); + sbi->clu_srch_ptr = EXFAT_FIRST_CLUSTER; + sbi->used_clusters = EXFAT_CLUSTERS_UNTRACKED; + + /* check consistencies */ + if ((u64)sbi->num_FAT_sectors << p_boot->sect_size_bits < + (u64)sbi->num_clusters * 4) { + exfat_err(sb, "bogus fat length"); + return -EINVAL; + } + + if (sbi->data_start_sector < + (u64)sbi->FAT1_start_sector + + (u64)sbi->num_FAT_sectors * p_boot->num_fats) { + exfat_err(sb, "bogus data start sector"); + return -EINVAL; + } + + if (sbi->vol_flags & VOLUME_DIRTY) + exfat_warn(sb, "Volume was not properly unmounted. Some data may be corrupt. Please run fsck."); + if (sbi->vol_flags & MEDIA_FAILURE) + exfat_warn(sb, "Medium has reported failures. Some data may be lost."); + + /* exFAT file size is limited by a disk volume size */ + sb->s_maxbytes = (u64)(sbi->num_clusters - EXFAT_RESERVED_CLUSTERS) << + sbi->cluster_size_bits; + + /* check logical sector size */ + if (exfat_calibrate_blocksize(sb, 1 << p_boot->sect_size_bits)) + return -EIO; + + return 0; +} + +static int exfat_verify_boot_region(struct super_block *sb) +{ + struct buffer_head *bh = NULL; + u32 chksum = 0; + __le32 *p_sig, *p_chksum; + int sn, i; + + /* read boot sector sub-regions */ + for (sn = 0; sn < 11; sn++) { + bh = sb_bread(sb, sn); + if (!bh) + return -EIO; + + if (sn != 0 && sn <= 8) { + /* extended boot sector sub-regions */ + p_sig = (__le32 *)&bh->b_data[sb->s_blocksize - 4]; + if (le32_to_cpu(*p_sig) != EXBOOT_SIGNATURE) + exfat_warn(sb, "Invalid exboot-signature(sector = %d): 0x%08x", + sn, le32_to_cpu(*p_sig)); + } + + chksum = exfat_calc_chksum32(bh->b_data, sb->s_blocksize, + chksum, sn ? CS_DEFAULT : CS_BOOT_SECTOR); + brelse(bh); + } + + /* boot checksum sub-regions */ + bh = sb_bread(sb, sn); + if (!bh) + return -EIO; + + for (i = 0; i < sb->s_blocksize; i += sizeof(u32)) { + p_chksum = (__le32 *)&bh->b_data[i]; + if (le32_to_cpu(*p_chksum) != chksum) { + exfat_err(sb, "Invalid boot checksum (boot checksum : 0x%08x, checksum : 0x%08x)", + le32_to_cpu(*p_chksum), chksum); + brelse(bh); + return -EINVAL; + } + } + brelse(bh); + return 0; +} + +/* mount the file system volume */ +static int __exfat_fill_super(struct super_block *sb) +{ + int ret; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + ret = exfat_read_boot_sector(sb); + if (ret) { + exfat_err(sb, "failed to read boot sector"); + goto free_bh; + } + + ret = exfat_verify_boot_region(sb); + if (ret) { + exfat_err(sb, "invalid boot region"); + goto free_bh; + } + + ret = exfat_create_upcase_table(sb); + if (ret) { + exfat_err(sb, "failed to load upcase table"); + goto free_bh; + } + + ret = exfat_load_bitmap(sb); + if (ret) { + exfat_err(sb, "failed to load alloc-bitmap"); + goto free_upcase_table; + } + + ret = exfat_count_used_clusters(sb, &sbi->used_clusters); + if (ret) { + exfat_err(sb, "failed to scan clusters"); + goto free_alloc_bitmap; + } + + return 0; + +free_alloc_bitmap: + exfat_free_bitmap(sbi); +free_upcase_table: + exfat_free_upcase_table(sbi); +free_bh: + brelse(sbi->boot_bh); + return ret; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) +static int exfat_fill_super(struct super_block *sb, struct fs_context *fc) +#else +static int exfat_fill_super(struct super_block *sb, void *data, int silent) +#endif +{ + struct inode *root_inode; + int err; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) + struct exfat_sb_info *sbi = sb->s_fs_info; + struct exfat_mount_options *opts = &sbi->options; + + if (opts->allow_utime == (unsigned short)-1) + opts->allow_utime = ~opts->fs_dmask & 0022; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) + if (opts->discard && !bdev_max_discard_sectors(sb->s_bdev)) { + exfat_warn(sb, "mounting with \"discard\" option, but the device does not support discard"); + opts->discard = 0; + } +#else + if (opts->discard) { + struct request_queue *q = bdev_get_queue(sb->s_bdev); + + if (!blk_queue_discard(q)) { + exfat_warn(sb, "mounting with \"discard\" option, but the device does not support discard"); + opts->discard = 0; + } + } +#endif +#else + struct exfat_sb_info *sbi; + + /* + * GFP_KERNEL is ok here, because while we do hold the + * supeblock lock, memory pressure can't call back into + * the filesystem, since we're only just about to mount + * it and have no inodes etc active! + */ + sbi = kzalloc(sizeof(struct exfat_sb_info), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + + mutex_init(&sbi->s_lock); + mutex_init(&sbi->bitmap_lock); + sb->s_fs_info = sbi; + ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + err = parse_options(sb, data, silent, &sbi->options); + if (err) { + exfat_err(sb, "failed to parse options"); + goto check_nls_io; + } +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0) + sb->s_flags |= SB_NODIRATIME; +#else + sb->s_flags |= MS_NODIRATIME; +#endif + sb->s_magic = EXFAT_SUPER_MAGIC; + sb->s_op = &exfat_sops; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 4, 0) + sb->s_time_gran = 10 * NSEC_PER_MSEC; + sb->s_time_min = EXFAT_MIN_TIMESTAMP_SECS; + sb->s_time_max = EXFAT_MAX_TIMESTAMP_SECS; +#endif + + err = __exfat_fill_super(sb); + if (err) { + exfat_err(sb, "failed to recognize exfat type"); + goto check_nls_io; + } + + /* set up enough so that it can read an inode */ + exfat_hash_init(sb); + + if (!strcmp(sbi->options.iocharset, "utf8")) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) + opts->utf8 = 1; +#else + sbi->options.utf8 = 1; +#endif + else { + sbi->nls_io = load_nls(sbi->options.iocharset); + if (!sbi->nls_io) { + exfat_err(sb, "IO charset %s not found", + sbi->options.iocharset); + err = -EINVAL; + goto free_table; + } + } + + if (sbi->options.utf8) + sb->s_d_op = &exfat_utf8_dentry_ops; + else + sb->s_d_op = &exfat_dentry_ops; + + root_inode = new_inode(sb); + if (!root_inode) { + exfat_err(sb, "failed to allocate root inode"); + err = -ENOMEM; + goto free_table; + } + + root_inode->i_ino = EXFAT_ROOT_INO; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) + inode_set_iversion(root_inode, 1); +#else + root_inode->i_version = 1; +#endif + err = exfat_read_root(root_inode); + if (err) { + exfat_err(sb, "failed to initialize root inode"); + goto put_inode; + } + + exfat_hash_inode(root_inode, EXFAT_I(root_inode)->i_pos); + insert_inode_hash(root_inode); + + sb->s_root = d_make_root(root_inode); + if (!sb->s_root) { + exfat_err(sb, "failed to get the root dentry"); + err = -ENOMEM; + goto free_table; + } + + return 0; + +put_inode: + iput(root_inode); + sb->s_root = NULL; + +free_table: + exfat_free_upcase_table(sbi); + exfat_free_bitmap(sbi); + brelse(sbi->boot_bh); + +check_nls_io: + unload_nls(sbi->nls_io); + exfat_free_iocharset(sbi); + sb->s_fs_info = NULL; + kfree(sbi); + return err; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) +static int exfat_get_tree(struct fs_context *fc) +{ + return get_tree_bdev(fc, exfat_fill_super); +} + +static void exfat_free(struct fs_context *fc) +{ + struct exfat_sb_info *sbi = fc->s_fs_info; + + if (sbi) { + exfat_free_iocharset(sbi); + kfree(sbi); + } +} + +static int exfat_reconfigure(struct fs_context *fc) +{ + fc->sb_flags |= SB_NODIRATIME; + + /* volume flag will be updated in exfat_sync_fs */ + sync_filesystem(fc->root->d_sb); + return 0; +} + +static const struct fs_context_operations exfat_context_ops = { + .parse_param = exfat_parse_param, + .get_tree = exfat_get_tree, + .free = exfat_free, + .reconfigure = exfat_reconfigure, +}; + +static int exfat_init_fs_context(struct fs_context *fc) +{ + struct exfat_sb_info *sbi; + + sbi = kzalloc(sizeof(struct exfat_sb_info), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + + mutex_init(&sbi->s_lock); + mutex_init(&sbi->bitmap_lock); + ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + + sbi->options.fs_uid = current_uid(); + sbi->options.fs_gid = current_gid(); + sbi->options.fs_fmask = current->fs->umask; + sbi->options.fs_dmask = current->fs->umask; + sbi->options.allow_utime = -1; + sbi->options.iocharset = exfat_default_iocharset; + sbi->options.errors = EXFAT_ERRORS_RO; + + fc->s_fs_info = sbi; + fc->ops = &exfat_context_ops; + return 0; +} +#else +static struct dentry *exfat_fs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return mount_bdev(fs_type, flags, dev_name, data, exfat_fill_super); +} +#endif + +static struct file_system_type exfat_fs_type = { + .owner = THIS_MODULE, + .name = "exfat", +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) + .init_fs_context = exfat_init_fs_context, + .parameters = exfat_parameters, +#else + .mount = exfat_fs_mount, +#endif + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static void exfat_inode_init_once(void *foo) +{ + struct exfat_inode_info *ei = (struct exfat_inode_info *)foo; + + spin_lock_init(&ei->cache_lru_lock); + ei->nr_caches = 0; + ei->cache_valid_id = EXFAT_CACHE_VALID + 1; + INIT_LIST_HEAD(&ei->cache_lru); + INIT_HLIST_NODE(&ei->i_hash_fat); + inode_init_once(&ei->vfs_inode); +} + +static int __init init_exfat_fs(void) +{ + int err; + + err = exfat_cache_init(); + if (err) + return err; + + exfat_inode_cachep = kmem_cache_create("exfat_inode_cache", + sizeof(struct exfat_inode_info), + 0, SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, + exfat_inode_init_once); + if (!exfat_inode_cachep) { + err = -ENOMEM; + goto shutdown_cache; + } + + err = register_filesystem(&exfat_fs_type); + if (err) + goto destroy_cache; + + return 0; + +destroy_cache: + kmem_cache_destroy(exfat_inode_cachep); +shutdown_cache: + exfat_cache_shutdown(); + return err; +} + +static void __exit exit_exfat_fs(void) +{ + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + kmem_cache_destroy(exfat_inode_cachep); + unregister_filesystem(&exfat_fs_type); + exfat_cache_shutdown(); +} + +module_init(init_exfat_fs); +module_exit(exit_exfat_fs); + +MODULE_ALIAS_FS("exfat"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("exFAT filesystem support"); +MODULE_AUTHOR("Samsung Electronics Co., Ltd."); +MODULE_VERSION(EXFAT_VERSION); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index a504f0713e72..fe5896c97b70 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -18,7 +18,6 @@ #include "ext4.h" #include "xattr.h" #include "truncate.h" -#include #define EXT4_XATTR_SYSTEM_DATA "data" #define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS)) @@ -506,17 +505,6 @@ int ext4_readpage_inline(struct inode *inode, struct page *page) return -EAGAIN; } - if (trace_android_fs_dataread_start_enabled()) { - char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; - - path = android_fstrace_get_pathname(pathbuf, - MAX_TRACE_PATHBUF_LEN, - inode); - trace_android_fs_dataread_start(inode, page_offset(page), - PAGE_SIZE, current->pid, - path, current->comm); - } - /* * Current inline data can only exist in the 1st page, * So for all the other pages, just set them uptodate. @@ -528,8 +516,6 @@ int ext4_readpage_inline(struct inode *inode, struct page *page) SetPageUptodate(page); } - trace_android_fs_dataread_end(inode, page_offset(page), PAGE_SIZE); - up_read(&EXT4_I(inode)->xattr_sem); unlock_page(page); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5c77f065db5e..d762e9280b3f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -44,7 +44,6 @@ #include "truncate.h" #include -#include #define MPAGE_DA_EXTENT_TAIL 0x01 @@ -1188,16 +1187,6 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, pgoff_t index; unsigned from, to; - if (trace_android_fs_datawrite_start_enabled()) { - char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; - - path = android_fstrace_get_pathname(pathbuf, - MAX_TRACE_PATHBUF_LEN, - inode); - trace_android_fs_datawrite_start(inode, pos, len, - current->pid, path, - current->comm); - } trace_ext4_write_begin(inode, pos, len, flags); /* * Reserve one block more for addition to orphan list in case @@ -1343,7 +1332,6 @@ static int ext4_write_end(struct file *file, int i_size_changed = 0; int inline_data = ext4_has_inline_data(inode); - trace_android_fs_datawrite_end(inode, pos, len); trace_ext4_write_end(inode, pos, len, copied); if (inline_data) { ret = ext4_write_inline_data_end(inode, pos, len, @@ -1449,7 +1437,6 @@ static int ext4_journalled_write_end(struct file *file, int size_changed = 0; int inline_data = ext4_has_inline_data(inode); - trace_android_fs_datawrite_end(inode, pos, len); trace_ext4_journalled_write_end(inode, pos, len, copied); from = pos & (PAGE_SIZE - 1); to = from + len; @@ -2969,16 +2956,6 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, len, flags, pagep, fsdata); } *fsdata = (void *)0; - if (trace_android_fs_datawrite_start_enabled()) { - char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; - - path = android_fstrace_get_pathname(pathbuf, - MAX_TRACE_PATHBUF_LEN, - inode); - trace_android_fs_datawrite_start(inode, pos, len, - current->pid, - path, current->comm); - } trace_ext4_da_write_begin(inode, pos, len, flags); if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { @@ -3097,7 +3074,6 @@ static int ext4_da_write_end(struct file *file, return ext4_write_end(file, mapping, pos, len, copied, page, fsdata); - trace_android_fs_datawrite_end(inode, pos, len); trace_ext4_da_write_end(inode, pos, len, copied); start = pos & (PAGE_SIZE - 1); end = start + copied - 1; @@ -3663,7 +3639,6 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) size_t count = iov_iter_count(iter); loff_t offset = iocb->ki_pos; ssize_t ret; - int rw = iov_iter_rw(iter); #ifdef CONFIG_FS_ENCRYPTION if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode) && @@ -3681,28 +3656,6 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (ext4_has_inline_data(inode)) return 0; - if (trace_android_fs_dataread_start_enabled() && - (rw == READ)) { - char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; - - path = android_fstrace_get_pathname(pathbuf, - MAX_TRACE_PATHBUF_LEN, - inode); - trace_android_fs_dataread_start(inode, offset, count, - current->pid, path, - current->comm); - } - if (trace_android_fs_datawrite_start_enabled() && - (rw == WRITE)) { - char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; - - path = android_fstrace_get_pathname(pathbuf, - MAX_TRACE_PATHBUF_LEN, - inode); - trace_android_fs_datawrite_start(inode, offset, count, - current->pid, path, - current->comm); - } trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); if (iov_iter_rw(iter) == READ) ret = ext4_direct_IO_read(iocb, iter); @@ -3710,13 +3663,6 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) ret = ext4_direct_IO_write(iocb, iter); trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret); - if (trace_android_fs_dataread_start_enabled() && - (rw == READ)) - trace_android_fs_dataread_end(inode, offset, count); - if (trace_android_fs_datawrite_start_enabled() && - (rw == WRITE)) - trace_android_fs_datawrite_end(inode, offset, count); - return ret; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 89fbff1a9b2c..e32046f0848e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2387,7 +2387,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) return 0; size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size); - new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL); + new_groupinfo = kvzalloc(size, GFP_KERNEL); if (!new_groupinfo) { ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); return -ENOMEM; diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index c58a4d7671be..6389d13e5986 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -45,7 +45,6 @@ #include #include "ext4.h" -#include static inline bool ext4_bio_encrypted(struct bio *bio) { @@ -56,17 +55,6 @@ static inline bool ext4_bio_encrypted(struct bio *bio) #endif } -static void -ext4_trace_read_completion(struct bio *bio) -{ - struct page *first_page = bio->bi_io_vec[0].bv_page; - - if (first_page != NULL) - trace_android_fs_dataread_end(first_page->mapping->host, - page_offset(first_page), - bio->bi_iter.bi_size); -} - /* * I/O completion handler for multipage BIOs. * @@ -84,9 +72,6 @@ static void mpage_end_io(struct bio *bio) struct bio_vec *bv; int i; - if (trace_android_fs_dataread_start_enabled()) - ext4_trace_read_completion(bio); - if (ext4_bio_encrypted(bio)) { if (bio->bi_error) { fscrypt_release_ctx(bio->bi_private); @@ -110,30 +95,6 @@ static void mpage_end_io(struct bio *bio) bio_put(bio); } -static void -ext4_submit_bio_read(struct bio *bio) -{ - if (trace_android_fs_dataread_start_enabled()) { - struct page *first_page = bio->bi_io_vec[0].bv_page; - - if (first_page != NULL) { - char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; - - path = android_fstrace_get_pathname(pathbuf, - MAX_TRACE_PATHBUF_LEN, - first_page->mapping->host); - trace_android_fs_dataread_start( - first_page->mapping->host, - page_offset(first_page), - bio->bi_iter.bi_size, - current->pid, - path, - current->comm); - } - } - submit_bio(bio); -} - int ext4_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, unsigned nr_pages) @@ -274,7 +235,7 @@ int ext4_mpage_readpages(struct address_space *mapping, */ if (bio && (last_block_in_bio != blocks[0] - 1)) { submit_and_realloc: - ext4_submit_bio_read(bio); + submit_bio(bio); bio = NULL; } if (bio == NULL) { @@ -306,14 +267,14 @@ int ext4_mpage_readpages(struct address_space *mapping, if (((map.m_flags & EXT4_MAP_BOUNDARY) && (relative_block == map.m_len)) || (first_hole != blocks_per_page)) { - ext4_submit_bio_read(bio); + submit_bio(bio); bio = NULL; } else last_block_in_bio = blocks[blocks_per_page - 1]; goto next_page; confused: if (bio) { - ext4_submit_bio_read(bio); + submit_bio(bio); bio = NULL; } if (!PageUptodate(page)) @@ -326,6 +287,6 @@ int ext4_mpage_readpages(struct address_space *mapping, } BUG_ON(pages && !list_empty(pages)); if (bio) - ext4_submit_bio_read(bio); + submit_bio(bio); return 0; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4384bbe61415..f5e8c1247af5 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2130,8 +2130,8 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) if (size <= sbi->s_flex_groups_allocated) return 0; - new_groups = ext4_kvzalloc(roundup_pow_of_two(size * - sizeof(*sbi->s_flex_groups)), GFP_KERNEL); + new_groups = kvzalloc(roundup_pow_of_two(size * + sizeof(*sbi->s_flex_groups)), GFP_KERNEL); if (!new_groups) { ext4_msg(sb, KERN_ERR, "not enough memory for %d flex group pointers", size); @@ -3993,7 +3993,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } } rcu_assign_pointer(sbi->s_group_desc, - ext4_kvmalloc(db_count * + kvmalloc(db_count * sizeof(struct buffer_head *), GFP_KERNEL)); if (sbi->s_group_desc == NULL) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5795f9d80d56..1534fdbadc56 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2429,8 +2429,11 @@ static int f2fs_write_cache_pages(struct address_space *mapping, ret = 0; if (wbc->sync_mode == WB_SYNC_ALL) { cond_resched(); - congestion_wait(BLK_RW_ASYNC, - HZ/50); +#if (CONFIG_HZ > 100) + congestion_wait(BLK_RW_ASYNC, 2); +#else + congestion_wait(BLK_RW_ASYNC, 1); +#endif goto retry_write; } continue; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 532afeee5137..6e22674c2e32 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1215,6 +1215,7 @@ struct f2fs_sb_info { struct inode *meta_inode; /* cache meta blocks */ struct mutex cp_mutex; /* checkpoint procedure lock */ struct rw_semaphore cp_rwsem; /* blocking FS operations */ + struct rw_semaphore cp_quota_rwsem; /* blocking quota sync operations */ struct rw_semaphore node_write; /* locking node writes */ struct rw_semaphore node_change; /* locking node change */ wait_queue_head_t cp_wait; @@ -1708,12 +1709,14 @@ static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) { + down_write(&sbi->cp_quota_rwsem); down_write(&sbi->cp_rwsem); } static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) { up_write(&sbi->cp_rwsem); + up_write(&sbi->cp_quota_rwsem); } static inline int __get_cp_reason(struct f2fs_sb_info *sbi) @@ -2781,16 +2784,6 @@ static inline bool f2fs_may_extent_tree(struct inode *inode) return S_ISREG(inode->i_mode); } -static inline void *kvmalloc(size_t size, gfp_t flags) -{ - void *ret; - - ret = kmalloc(size, flags | __GFP_NOWARN); - if (!ret) - ret = __vmalloc(size, flags, PAGE_KERNEL); - return ret; -} - static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { @@ -2808,16 +2801,6 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, return kvmalloc(size, flags); } -static inline void *kvzalloc(size_t size, gfp_t flags) -{ - void *ret; - - ret = kzalloc(size, flags | __GFP_NOWARN); - if (!ret) - ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); - return ret; -} - static inline int wbc_to_write_flags(struct writeback_control *wbc) { if (wbc->sync_mode == WB_SYNC_ALL) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 70e93aeb09c6..9fa679cb9e74 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3444,11 +3444,6 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi) seg_i = CURSEG_I(sbi, i); segno = le32_to_cpu(ckpt->cur_data_segno[i]); blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]); - if (blk_off > ENTRIES_IN_SUM) { - f2fs_bug_on(sbi, 1); - f2fs_put_page(page, 1); - return -EFAULT; - } seg_i->next_segno = segno; reset_curseg(sbi, i, 0); seg_i->alloc_type = ckpt->alloc_type[i]; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index b74602813a05..f95ce0bda297 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -617,7 +617,7 @@ static inline int utilization(struct f2fs_sb_info *sbi) * F2FS_IPUT_DISABLE - disable IPU. (=default option) */ #define DEF_MIN_IPU_UTIL 70 -#define DEF_MIN_FSYNC_BLOCKS 8 +#define DEF_MIN_FSYNC_BLOCKS 20 #define DEF_MIN_HOT_BLOCKS 16 #define SMALL_VOLUME_SEGMENTS (16 * 512) /* 16GB */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5388426780dd..76d0c72033b8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -1989,8 +1990,16 @@ int f2fs_quota_sync(struct super_block *sb, int type) * f2fs_dquot_commit * block_operation * down_read(quota_sem) + * + * However, we cannot use the cp_rwsem to prevent this + * deadlock, as the cp_rwsem is taken for read inside the + * f2fs_dquot_commit code, and rwsem is not recursive. + * + * We therefore use a special lock to synchronize + * f2fs_quota_sync with block_operations, as this is the only + * place where such recursion occurs. */ - f2fs_lock_op(sbi); + down_read(&sbi->cp_quota_rwsem); down_read(&sbi->quota_sem); ret = dquot_writeback_dquots(sb, type); @@ -2031,7 +2040,7 @@ int f2fs_quota_sync(struct super_block *sb, int type) if (ret) set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); up_read(&sbi->quota_sem); - f2fs_unlock_op(sbi); + up_read(&sbi->cp_quota_rwsem); return ret; } @@ -3302,6 +3311,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) init_rwsem(&sbi->cp_rwsem); init_rwsem(&sbi->quota_sem); + init_rwsem(&sbi->cp_quota_rwsem); init_waitqueue_head(&sbi->cp_wait); init_sb_info(sbi); @@ -3533,6 +3543,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) f2fs_update_time(sbi, CP_TIME); f2fs_update_time(sbi, REQ_TIME); clear_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); + + cleancache_init_fs(sb); return 0; sync_free_meta: diff --git a/fs/file.c b/fs/file.c index be0792c0a231..33e39e40fe1e 100644 --- a/fs/file.c +++ b/fs/file.c @@ -42,7 +42,7 @@ static void *alloc_fdmem(size_t size) if (data != NULL) return data; } - return __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_HIGHMEM, PAGE_KERNEL); + return __vmalloc(size, GFP_KERNEL_ACCOUNT, PAGE_KERNEL); } static void __free_fdtable(struct fdtable *fdt) diff --git a/fs/file_table.c b/fs/file_table.c index 251d54ee7ef7..be36504b8a01 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -334,10 +334,10 @@ void __init files_init(void) void __init files_maxfiles_init(void) { unsigned long n; - unsigned long memreserve = (totalram_pages - nr_free_pages()) * 3/2; + unsigned long memreserve = (totalram_pages() - nr_free_pages()) * 3/2; - memreserve = min(memreserve, totalram_pages - 1); - n = ((totalram_pages - memreserve) * (PAGE_SIZE / 1024)) / 10; + memreserve = min(memreserve, totalram_pages() - 1); + n = ((totalram_pages() - memreserve) * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 75e0d1297775..5c68fa5a4c08 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -829,7 +829,7 @@ static const struct super_operations fuse_super_operations = { static void sanitize_global_limit(unsigned *limit) { if (*limit == 0) - *limit = ((totalram_pages << PAGE_SHIFT) >> 13) / + *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / sizeof(struct fuse_req); if (*limit >= 1 << 16) diff --git a/fs/namespace.c b/fs/namespace.c index 532baf2380f7..7d1e1d2492ea 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2880,9 +2880,9 @@ long do_mount(const char *dev_name, const char __user *dir_name, if (retval) goto dput_out; - /* Default to relatime unless overriden */ - if (!(flags & MS_NOATIME)) - mnt_flags |= MNT_RELATIME; + /* Default to noatime unless overriden */ + if (!(flags & MS_RELATIME)) + mnt_flags |= MNT_NOATIME; /* Separate the per-mountpoint flags */ if (flags & MS_NOSUID) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index a8b786a648cd..da6489b472fe 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -2092,7 +2092,7 @@ int __init nfs_init_writepagecache(void) * This allows larger machines to have larger/more transfers. * Limit the default to 256M */ - nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); + nfs_congestion_kb = (16*int_sqrt(totalram_pages())) << (PAGE_SHIFT-10); if (nfs_congestion_kb > 256*1024) nfs_congestion_kb = 256*1024; diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 54cde9a5864e..4d50b1805a5c 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -96,7 +96,7 @@ static unsigned int nfsd_cache_size_limit(void) { unsigned int limit; - unsigned long low_pages = totalram_pages - totalhigh_pages; + unsigned long low_pages = totalram_pages() - totalhigh_pages(); limit = (16 * int_sqrt(low_pages)) << (PAGE_SHIFT-10); return min_t(unsigned int, limit, 256*1024); diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index f8eb04387ca4..ff042ba218e4 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -128,6 +128,7 @@ static inline void handle_bounds_compressed_page(struct page *page, /** * ntfs_decompress - decompress a compression block into an array of pages * @dest_pages: destination array of pages + * @completed_pages: scratch space to track completed pages * @dest_index: current index into @dest_pages (IN/OUT) * @dest_ofs: current offset within @dest_pages[@dest_index] (IN/OUT) * @dest_max_index: maximum index into @dest_pages (IN) @@ -162,10 +163,10 @@ static inline void handle_bounds_compressed_page(struct page *page, * Note to hackers: This function may not sleep until it has finished accessing * the compression block @cb_start as it is a per-CPU buffer. */ -static int ntfs_decompress(struct page *dest_pages[], int *dest_index, - int *dest_ofs, const int dest_max_index, const int dest_max_ofs, - const int xpage, char *xpage_done, u8 *const cb_start, - const u32 cb_size, const loff_t i_size, +static int ntfs_decompress(struct page *dest_pages[], int completed_pages[], + int *dest_index, int *dest_ofs, const int dest_max_index, + const int dest_max_ofs, const int xpage, char *xpage_done, + u8 *const cb_start, const u32 cb_size, const loff_t i_size, const s64 initialized_size) { /* @@ -190,9 +191,6 @@ static int ntfs_decompress(struct page *dest_pages[], int *dest_index, /* Variables for tag and token parsing. */ u8 tag; /* Current tag. */ int token; /* Loop counter for the eight tokens in tag. */ - - /* Need this because we can't sleep, so need two stages. */ - int completed_pages[dest_max_index - *dest_index + 1]; int nr_completed_pages = 0; /* Default error code. */ @@ -516,6 +514,7 @@ int ntfs_read_compressed_block(struct page *page) unsigned int cb_clusters, cb_max_ofs; int block, max_block, cb_max_page, bhs_size, nr_bhs, err = 0; struct page **pages; + int *completed_pages; unsigned char xpage_done = 0; ntfs_debug("Entering, page->index = 0x%lx, cb_size = 0x%x, nr_pages = " @@ -528,14 +527,16 @@ int ntfs_read_compressed_block(struct page *page) BUG_ON(ni->name_len); pages = kmalloc(nr_pages * sizeof(struct page *), GFP_NOFS); + completed_pages = kmalloc((nr_pages + 1) * sizeof(int), GFP_NOFS); /* Allocate memory to store the buffer heads we need. */ bhs_size = cb_size / block_size * sizeof(struct buffer_head *); bhs = kmalloc(bhs_size, GFP_NOFS); - if (unlikely(!pages || !bhs)) { + if (unlikely(!pages || !bhs || !completed_pages)) { kfree(bhs); kfree(pages); + kfree(completed_pages); unlock_page(page); ntfs_error(vol->sb, "Failed to allocate internal buffers."); return -ENOMEM; @@ -562,6 +563,7 @@ int ntfs_read_compressed_block(struct page *page) if (xpage >= max_page) { kfree(bhs); kfree(pages); + kfree(completed_pages); zero_user(page, 0, PAGE_SIZE); ntfs_debug("Compressed read outside i_size - truncated?"); SetPageUptodate(page); @@ -854,10 +856,10 @@ int ntfs_read_compressed_block(struct page *page) unsigned int prev_cur_page = cur_page; ntfs_debug("Found compressed compression block."); - err = ntfs_decompress(pages, &cur_page, &cur_ofs, - cb_max_page, cb_max_ofs, xpage, &xpage_done, - cb_pos, cb_size - (cb_pos - cb), i_size, - initialized_size); + err = ntfs_decompress(pages, completed_pages, &cur_page, + &cur_ofs, cb_max_page, cb_max_ofs, xpage, + &xpage_done, cb_pos, cb_size - (cb_pos - cb), + i_size, initialized_size); /* * We can sleep from now on, lock already dropped by * ntfs_decompress(). @@ -912,6 +914,7 @@ int ntfs_read_compressed_block(struct page *page) /* We no longer need the list of pages. */ kfree(pages); + kfree(completed_pages); /* If we have completed the requested page, we return success. */ if (likely(xpage_done)) @@ -956,5 +959,6 @@ int ntfs_read_compressed_block(struct page *page) } } kfree(pages); + kfree(completed_pages); return -EIO; } diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h index ab172e5f51d9..5becc8acc8f4 100644 --- a/fs/ntfs/malloc.h +++ b/fs/ntfs/malloc.h @@ -47,7 +47,7 @@ static inline void *__ntfs_malloc(unsigned long size, gfp_t gfp_mask) return kmalloc(PAGE_SIZE, gfp_mask & ~__GFP_HIGHMEM); /* return (void *)__get_free_page(gfp_mask); */ } - if (likely((size >> PAGE_SHIFT) < totalram_pages)) + if (likely((size >> PAGE_SHIFT) < totalram_pages())) return __vmalloc(size, gfp_mask, PAGE_KERNEL); return NULL; } diff --git a/fs/proc/base.c b/fs/proc/base.c index b6959f6dae5b..eec3f997e9c7 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -97,6 +97,21 @@ #include "../../lib/kstrtox.h" +struct task_kill_info { + struct task_struct *task; + struct work_struct work; +}; + +static void proc_kill_task(struct work_struct *work) +{ + struct task_kill_info *kinfo = container_of(work, typeof(*kinfo), work); + struct task_struct *task = kinfo->task; + + send_sig(SIGKILL, task, 0); + put_task_struct(task); + kfree(kinfo); +} + /* NOTE: * Implementing inode permission operations in /proc is almost * certainly an error. Permission checks need to happen during @@ -576,7 +591,7 @@ static const struct file_operations proc_lstats_operations = { static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { - unsigned long totalpages = totalram_pages + total_swap_pages; + unsigned long totalpages = totalram_pages() + total_swap_pages; unsigned long points = 0; points = oom_badness(task, NULL, NULL, totalpages) * @@ -1071,6 +1086,7 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy) static DEFINE_MUTEX(oom_adj_mutex); struct mm_struct *mm = NULL; struct task_struct *task; + char task_comm[TASK_COMM_LEN]; int err = 0; task = get_proc_task(file_inode(file)); @@ -1120,6 +1136,8 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy) if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) task->signal->oom_score_adj_min = (short)oom_adj; trace_oom_score_adj_update(task); + if (oom_adj >= 700) + strncpy(task_comm, task->comm, TASK_COMM_LEN); if (mm) { struct task_struct *p; @@ -1147,6 +1165,20 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy) err_unlock: mutex_unlock(&oom_adj_mutex); put_task_struct(task); + /* These apps burn through CPU in the background. Don't let them. */ + if (!err && oom_adj >= 700) { + if (!strcmp(task_comm, "id.GoogleCamera")) { + struct task_kill_info *kinfo; + + kinfo = kmalloc(sizeof(*kinfo), GFP_KERNEL); + if (kinfo) { + get_task_struct(task); + kinfo->task = task; + INIT_WORK(&kinfo->work, proc_kill_task); + schedule_work(&kinfo->work); + } + } + } return err; } diff --git a/fs/seq_file.c b/fs/seq_file.c index 3ade39e02bb7..023d92dfffa9 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -25,24 +25,7 @@ static void seq_set_overflow(struct seq_file *m) static void *seq_buf_alloc(unsigned long size) { - void *buf; - gfp_t gfp = GFP_KERNEL; - - if (unlikely(size > MAX_RW_COUNT)) - return NULL; - - /* - * For high order allocations, use __GFP_NORETRY to avoid oom-killing - - * it's better to fall back to vmalloc() than to kill things. For small - * allocations, just use GFP_KERNEL which will oom kill, thus no need - * for vmalloc fallback. - */ - if (size > PAGE_SIZE) - gfp |= __GFP_NORETRY | __GFP_NOWARN; - buf = kmalloc(size, gfp); - if (!buf && size > PAGE_SIZE) - buf = vmalloc(size); - return buf; + return kvmalloc(size, GFP_KERNEL); } /** diff --git a/fs/sync.c b/fs/sync.c index 5c2420c3eb17..e2eb186bb4eb 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -17,6 +18,9 @@ #include #include "internal.h" +bool fsync_enabled = true; +module_param(fsync_enabled, bool, 0755); + #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ SYNC_FILE_RANGE_WAIT_AFTER) @@ -153,10 +157,14 @@ void emergency_sync(void) */ SYSCALL_DEFINE1(syncfs, int, fd) { - struct fd f = fdget(fd); + struct fd f; struct super_block *sb; int ret; + if (!fsync_enabled) + return 0; + + f = fdget(fd); if (!f.file) return -EBADF; sb = f.file->f_path.dentry->d_sb; @@ -184,6 +192,9 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; + if (!fsync_enabled) + return 0; + if (!file->f_op->fsync) return -EINVAL; if (!datasync && (inode->i_state & I_DIRTY_TIME)) { @@ -212,9 +223,13 @@ EXPORT_SYMBOL(vfs_fsync); static int do_fsync(unsigned int fd, int datasync) { - struct fd f = fdget(fd); + struct fd f; int ret = -EBADF; + + if (!fsync_enabled) + return 0; + f = fdget(fd); if (f.file) { ret = vfs_fsync(f.file, datasync); fdput(f); @@ -289,6 +304,9 @@ SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes, loff_t endbyte; /* inclusive */ umode_t i_mode; + if (!fsync_enabled) + return 0; + ret = -EINVAL; if (flags & ~VALID_FLAGS) goto out; diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index bb2beaef531a..0b770f861897 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -66,7 +66,7 @@ kmem_zalloc_large(size_t size, xfs_km_flags_t flags) noio_flag = memalloc_noio_save(); lflags = kmem_flags_convert(flags); - ptr = __vmalloc(size, lflags | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); + ptr = __vmalloc(size, lflags | __GFP_ZERO, PAGE_KERNEL); if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) memalloc_noio_restore(noio_flag); diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h index 288cc9e96395..323288b6e596 100644 --- a/include/asm-generic/atomic-long.h +++ b/include/asm-generic/atomic-long.h @@ -243,4 +243,9 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) #define atomic_long_inc_not_zero(l) \ ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l)) +#define atomic_long_cond_read_relaxed(v, c) \ + ATOMIC_LONG_PFX(_cond_read_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (c)) +#define atomic_long_cond_read_acquire(v, c) \ + ATOMIC_LONG_PFX(_cond_read_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (c)) + #endif /* _ASM_GENERIC_ATOMIC_LONG_H */ diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index fe297b599b0a..305e03b19a26 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -221,18 +221,17 @@ do { \ #endif /** - * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering + * smp_cond_load_relaxed() - (Spin) wait for cond with no ordering guarantees * @ptr: pointer to the variable to wait on * @cond: boolean expression to wait for * - * Equivalent to using smp_load_acquire() on the condition variable but employs - * the control dependency of the wait to reduce the barrier on many platforms. + * Equivalent to using READ_ONCE() on the condition variable. * * Due to C lacking lambda expressions we load the value of *ptr into a * pre-named variable @VAL to be used in @cond. */ -#ifndef smp_cond_load_acquire -#define smp_cond_load_acquire(ptr, cond_expr) ({ \ +#ifndef smp_cond_load_relaxed +#define smp_cond_load_relaxed(ptr, cond_expr) ({ \ typeof(ptr) __PTR = (ptr); \ typeof(*ptr) VAL; \ for (;;) { \ @@ -241,10 +240,26 @@ do { \ break; \ cpu_relax(); \ } \ - smp_acquire__after_ctrl_dep(); \ VAL; \ }) #endif +/** + * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering + * @ptr: pointer to the variable to wait on + * @cond: boolean expression to wait for + * + * Equivalent to using smp_load_acquire() on the condition variable but employs + * the control dependency of the wait to reduce the barrier on many platforms. + */ +#ifndef smp_cond_load_acquire +#define smp_cond_load_acquire(ptr, cond_expr) ({ \ + typeof(*ptr) _val; \ + _val = smp_cond_load_relaxed(ptr, cond_expr); \ + smp_acquire__after_ctrl_dep(); \ + _val; \ +}) +#endif + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_GENERIC_BARRIER_H */ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index b43fa9d95a7a..1fc64a5e2d79 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -841,6 +841,10 @@ static inline bool arch_has_pfn_modify_check(void) } #endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */ +#ifndef PAGE_KERNEL_EXEC +# define PAGE_KERNEL_EXEC PAGE_KERNEL +#endif + #endif /* !__ASSEMBLY__ */ #ifndef io_remap_pfn_range diff --git a/include/asm-generic/set_memory.h b/include/asm-generic/set_memory.h new file mode 100644 index 000000000000..83e81f8996b2 --- /dev/null +++ b/include/asm-generic/set_memory.h @@ -0,0 +1,12 @@ +#ifndef __ASM_SET_MEMORY_H +#define __ASM_SET_MEMORY_H + +/* + * Functions to change memory attributes. + */ +int set_memory_ro(unsigned long addr, int numpages); +int set_memory_rw(unsigned long addr, int numpages); +int set_memory_x(unsigned long addr, int numpages); +int set_memory_nx(unsigned long addr, int numpages); + +#endif diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h index 43783efc6c3b..d7755f342e03 100644 --- a/include/clocksource/arm_arch_timer.h +++ b/include/clocksource/arm_arch_timer.h @@ -47,7 +47,9 @@ enum arch_timer_reg { #define ARCH_TIMER_USR_VT_ACCESS_EN (1 << 8) /* virtual timer registers */ #define ARCH_TIMER_USR_PT_ACCESS_EN (1 << 9) /* physical timer registers */ -#define ARCH_TIMER_EVT_STREAM_FREQ 10000 /* 100us */ +#define ARCH_TIMER_EVT_STREAM_PERIOD_US 100 +#define ARCH_TIMER_EVT_STREAM_FREQ \ + (USEC_PER_SEC / ARCH_TIMER_EVT_STREAM_PERIOD_US) struct arch_timer_kvm_info { struct timecounter timecounter; @@ -60,6 +62,7 @@ extern u32 arch_timer_get_rate(void); extern u64 (*arch_timer_read_counter)(void); extern struct arch_timer_kvm_info *arch_timer_get_kvm_info(void); extern void arch_timer_mem_get_cval(u32 *lo, u32 *hi); +extern bool arch_timer_evtstrm_available(void); #else static inline u32 arch_timer_get_rate(void) @@ -76,6 +79,12 @@ static void arch_timer_mem_get_cval(u32 *lo, u32 *hi) { *lo = *hi = ~0U; } + +static inline bool arch_timer_evtstrm_available(void) +{ + return false; +} + #endif #endif diff --git a/include/crypto/aead.h b/include/crypto/aead.h index 12f84327ca36..ba56d6ee0bf1 100644 --- a/include/crypto/aead.h +++ b/include/crypto/aead.h @@ -483,30 +483,18 @@ static inline void aead_request_set_callback(struct aead_request *req, * destination is the ciphertext. For a decryption operation, the use is * reversed - the source is the ciphertext and the destination is the plaintext. * - * For both src/dst the layout is associated data, plain/cipher text, - * authentication tag. - * - * The content of the AD in the destination buffer after processing - * will either be untouched, or it will contain a copy of the AD - * from the source buffer. In order to ensure that it always has - * a copy of the AD, the user must copy the AD over either before - * or after processing. Of course this is not relevant if the user - * is doing in-place processing where src == dst. - * - * IMPORTANT NOTE AEAD requires an authentication tag (MAC). For decryption, - * the caller must concatenate the ciphertext followed by the - * authentication tag and provide the entire data stream to the - * decryption operation (i.e. the data length used for the - * initialization of the scatterlist and the data length for the - * decryption operation is identical). For encryption, however, - * the authentication tag is created while encrypting the data. - * The destination buffer must hold sufficient space for the - * ciphertext and the authentication tag while the encryption - * invocation must only point to the plaintext data size. The - * following code snippet illustrates the memory usage - * buffer = kmalloc(ptbuflen + (enc ? authsize : 0)); - * sg_init_one(&sg, buffer, ptbuflen + (enc ? authsize : 0)); - * aead_request_set_crypt(req, &sg, &sg, ptbuflen, iv); + * The memory structure for cipher operation has the following structure: + * + * - AEAD encryption input: assoc data || plaintext + * - AEAD encryption output: assoc data || cipherntext || auth tag + * - AEAD decryption input: assoc data || ciphertext || auth tag + * - AEAD decryption output: assoc data || plaintext + * + * Albeit the kernel requires the presence of the AAD buffer, however, + * the kernel does not fill the AAD buffer in the output case. If the + * caller wants to have that data buffer filled, the caller must either + * use an in-place cipher operation (i.e. same memory location for + * input/output memory location). */ static inline void aead_request_set_crypt(struct aead_request *req, struct scatterlist *src, diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 404e9558e879..d8dccd022f71 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -17,6 +17,16 @@ #include #include +/* + * Maximum values for blocksize and alignmask, used to allocate + * static buffers that are big enough for any combination of + * algs and architectures. Ciphers have a lower maximum size. + */ +#define MAX_ALGAPI_BLOCKSIZE 160 +#define MAX_ALGAPI_ALIGNMASK 63 +#define MAX_CIPHER_BLOCKSIZE 16 +#define MAX_CIPHER_ALIGNMASK 15 + struct crypto_aead; struct crypto_instance; struct module; diff --git a/include/crypto/cbc.h b/include/crypto/cbc.h new file mode 100644 index 000000000000..3bf28beefa33 --- /dev/null +++ b/include/crypto/cbc.h @@ -0,0 +1,146 @@ +/* + * CBC: Cipher Block Chaining mode + * + * Copyright (c) 2016 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#ifndef _CRYPTO_CBC_H +#define _CRYPTO_CBC_H + +#include +#include +#include + +static inline int crypto_cbc_encrypt_segment( + struct skcipher_walk *walk, struct crypto_skcipher *tfm, + void (*fn)(struct crypto_skcipher *, const u8 *, u8 *)) +{ + unsigned int bsize = crypto_skcipher_blocksize(tfm); + unsigned int nbytes = walk->nbytes; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + u8 *iv = walk->iv; + + do { + crypto_xor(iv, src, bsize); + fn(tfm, iv, dst); + memcpy(iv, dst, bsize); + + src += bsize; + dst += bsize; + } while ((nbytes -= bsize) >= bsize); + + return nbytes; +} + +static inline int crypto_cbc_encrypt_inplace( + struct skcipher_walk *walk, struct crypto_skcipher *tfm, + void (*fn)(struct crypto_skcipher *, const u8 *, u8 *)) +{ + unsigned int bsize = crypto_skcipher_blocksize(tfm); + unsigned int nbytes = walk->nbytes; + u8 *src = walk->src.virt.addr; + u8 *iv = walk->iv; + + do { + crypto_xor(src, iv, bsize); + fn(tfm, src, src); + iv = src; + + src += bsize; + } while ((nbytes -= bsize) >= bsize); + + memcpy(walk->iv, iv, bsize); + + return nbytes; +} + +static inline int crypto_cbc_encrypt_walk(struct skcipher_request *req, + void (*fn)(struct crypto_skcipher *, + const u8 *, u8 *)) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while (walk.nbytes) { + if (walk.src.virt.addr == walk.dst.virt.addr) + err = crypto_cbc_encrypt_inplace(&walk, tfm, fn); + else + err = crypto_cbc_encrypt_segment(&walk, tfm, fn); + err = skcipher_walk_done(&walk, err); + } + + return err; +} + +static inline int crypto_cbc_decrypt_segment( + struct skcipher_walk *walk, struct crypto_skcipher *tfm, + void (*fn)(struct crypto_skcipher *, const u8 *, u8 *)) +{ + unsigned int bsize = crypto_skcipher_blocksize(tfm); + unsigned int nbytes = walk->nbytes; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + u8 *iv = walk->iv; + + do { + fn(tfm, src, dst); + crypto_xor(dst, iv, bsize); + iv = src; + + src += bsize; + dst += bsize; + } while ((nbytes -= bsize) >= bsize); + + memcpy(walk->iv, iv, bsize); + + return nbytes; +} + +static inline int crypto_cbc_decrypt_inplace( + struct skcipher_walk *walk, struct crypto_skcipher *tfm, + void (*fn)(struct crypto_skcipher *, const u8 *, u8 *)) +{ + unsigned int bsize = crypto_skcipher_blocksize(tfm); + unsigned int nbytes = walk->nbytes; + u8 *src = walk->src.virt.addr; + u8 last_iv[MAX_CIPHER_BLOCKSIZE]; + + /* Start of the last block. */ + src += nbytes - (nbytes & (bsize - 1)) - bsize; + memcpy(last_iv, src, bsize); + + for (;;) { + fn(tfm, src, src); + if ((nbytes -= bsize) < bsize) + break; + crypto_xor(src, src - bsize, bsize); + src -= bsize; + } + + crypto_xor(src, walk->iv, bsize); + memcpy(walk->iv, last_iv, bsize); + + return nbytes; +} + +static inline int crypto_cbc_decrypt_blocks( + struct skcipher_walk *walk, struct crypto_skcipher *tfm, + void (*fn)(struct crypto_skcipher *, const u8 *, u8 *)) +{ + if (walk->src.virt.addr == walk->dst.virt.addr) + return crypto_cbc_decrypt_inplace(walk, tfm, fn); + else + return crypto_cbc_decrypt_segment(walk, tfm, fn); +} + +#endif /* _CRYPTO_CBC_H */ diff --git a/include/crypto/cryptd.h b/include/crypto/cryptd.h index bc792d5a9e88..94418cbf9013 100644 --- a/include/crypto/cryptd.h +++ b/include/crypto/cryptd.h @@ -12,10 +12,10 @@ #ifndef _CRYPTO_CRYPT_H #define _CRYPTO_CRYPT_H -#include #include #include #include +#include struct cryptd_ablkcipher { struct crypto_ablkcipher base; @@ -34,6 +34,17 @@ struct crypto_blkcipher *cryptd_ablkcipher_child(struct cryptd_ablkcipher *tfm); bool cryptd_ablkcipher_queued(struct cryptd_ablkcipher *tfm); void cryptd_free_ablkcipher(struct cryptd_ablkcipher *tfm); +struct cryptd_skcipher { + struct crypto_skcipher base; +}; + +struct cryptd_skcipher *cryptd_alloc_skcipher(const char *alg_name, + u32 type, u32 mask); +struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm); +/* Must be called without moving CPUs. */ +bool cryptd_skcipher_queued(struct cryptd_skcipher *tfm); +void cryptd_free_skcipher(struct cryptd_skcipher *tfm); + struct cryptd_ahash { struct crypto_ahash base; }; diff --git a/include/crypto/engine.h b/include/crypto/engine.h index 04eb5c77addd..1bf600fc99f7 100644 --- a/include/crypto/engine.h +++ b/include/crypto/engine.h @@ -43,8 +43,7 @@ * @prepare_hash_request: do some prepare if need before handle the current request * @unprepare_hash_request: undo any work done by prepare_hash_request() * @hash_one_request: do hash for current request - * @kworker: thread struct for request pump - * @kworker_task: pointer to task for request pump kworker thread + * @kworker: kthread worker struct for request pump * @pump_requests: work struct for scheduling work to the request pump * @priv_data: the engine private data * @cur_req: the current request which is on processing @@ -78,8 +77,7 @@ struct crypto_engine { int (*hash_one_request)(struct crypto_engine *engine, struct ahash_request *req); - struct kthread_worker kworker; - struct task_struct *kworker_task; + struct kthread_worker *kworker; struct kthread_work pump_requests; void *priv_data; diff --git a/include/crypto/gf128mul.h b/include/crypto/gf128mul.h index da2530e34b26..592d47e565a8 100644 --- a/include/crypto/gf128mul.h +++ b/include/crypto/gf128mul.h @@ -177,24 +177,23 @@ void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t); static inline void gf128mul_free_4k(struct gf128mul_4k *t) { - kfree(t); + kzfree(t); } -/* 64k table optimization, implemented for lle and bbe */ +/* 64k table optimization, implemented for bbe */ struct gf128mul_64k { struct gf128mul_4k *t[16]; }; -/* first initialize with the constant factor with which you - * want to multiply and then call gf128_64k_lle with the other - * factor in the first argument, the table in the second and a - * scratch register in the third. Afterwards *a = *r. */ -struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g); +/* First initialize with the constant factor with which you + * want to multiply and then call gf128mul_64k_bbe with the other + * factor in the first argument, and the table in the second. + * Afterwards, the result is stored in *a. + */ struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g); void gf128mul_free_64k(struct gf128mul_64k *t); -void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t); void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t); #endif /* _CRYPTO_GF128MUL_H */ diff --git a/include/crypto/hash.h b/include/crypto/hash.h index d3de3b819ec0..a20fd0a9d9e4 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -147,9 +147,13 @@ struct shash_desc { void *__ctx[] CRYPTO_MINALIGN_ATTR; }; +#define HASH_MAX_DIGESTSIZE 64 +#define HASH_MAX_DESCSIZE 360 +#define HASH_MAX_STATESIZE 512 + #define SHASH_DESC_ON_STACK(shash, ctx) \ char __##shash##_desc[sizeof(struct shash_desc) + \ - crypto_shash_descsize(ctx)] CRYPTO_MINALIGN_ATTR; \ + HASH_MAX_DESCSIZE] CRYPTO_MINALIGN_ATTR; \ struct shash_desc *shash = (struct shash_desc *)__##shash##_desc /** diff --git a/include/crypto/internal/geniv.h b/include/crypto/internal/geniv.h index 2bcfb931bc5b..71be24cd59bd 100644 --- a/include/crypto/internal/geniv.h +++ b/include/crypto/internal/geniv.h @@ -20,7 +20,7 @@ struct aead_geniv_ctx { spinlock_t lock; struct crypto_aead *child; - struct crypto_skcipher *sknull; + struct crypto_sync_skcipher *sknull; u8 salt[] __attribute__ ((aligned(__alignof__(u32)))); }; diff --git a/include/crypto/internal/simd.h b/include/crypto/internal/simd.h new file mode 100644 index 000000000000..429509968f68 --- /dev/null +++ b/include/crypto/internal/simd.h @@ -0,0 +1,17 @@ +/* + * Shared crypto simd helpers + */ + +#ifndef _CRYPTO_INTERNAL_SIMD_H +#define _CRYPTO_INTERNAL_SIMD_H + +struct simd_skcipher_alg; + +struct simd_skcipher_alg *simd_skcipher_create_compat(const char *algname, + const char *drvname, + const char *basename); +struct simd_skcipher_alg *simd_skcipher_create(const char *algname, + const char *basename); +void simd_skcipher_free(struct simd_skcipher_alg *alg); + +#endif /* _CRYPTO_INTERNAL_SIMD_H */ diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index a21a95e1a375..959ae6a8418f 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -15,8 +15,10 @@ #include #include +#include #include +struct aead_request; struct rtattr; struct skcipher_instance { @@ -34,6 +36,40 @@ struct crypto_skcipher_spawn { struct crypto_spawn base; }; +struct skcipher_walk { + union { + struct { + struct page *page; + unsigned long offset; + } phys; + + struct { + u8 *page; + void *addr; + } virt; + } src, dst; + + struct scatter_walk in; + unsigned int nbytes; + + struct scatter_walk out; + unsigned int total; + + struct list_head buffers; + + u8 *page; + u8 *buffer; + u8 *oiv; + void *iv; + + unsigned int ivsize; + + int flags; + unsigned int blocksize; + unsigned int chunksize; + unsigned int alignmask; +}; + extern const struct crypto_type crypto_givcipher_type; static inline struct crypto_instance *skcipher_crypto_instance( @@ -68,12 +104,6 @@ static inline void crypto_set_skcipher_spawn( int crypto_grab_skcipher(struct crypto_skcipher_spawn *spawn, const char *name, u32 type, u32 mask); -static inline int crypto_grab_skcipher2(struct crypto_skcipher_spawn *spawn, - const char *name, u32 type, u32 mask) -{ - return crypto_grab_skcipher(spawn, name, type, mask); -} - struct crypto_alg *crypto_lookup_skcipher(const char *name, u32 type, u32 mask); static inline void crypto_drop_skcipher(struct crypto_skcipher_spawn *spawn) @@ -99,12 +129,6 @@ static inline struct crypto_skcipher *crypto_spawn_skcipher( return crypto_spawn_tfm2(&spawn->base); } -static inline struct crypto_skcipher *crypto_spawn_skcipher2( - struct crypto_skcipher_spawn *spawn) -{ - return crypto_spawn_skcipher(spawn); -} - static inline void crypto_skcipher_set_reqsize( struct crypto_skcipher *skcipher, unsigned int reqsize) { @@ -118,6 +142,21 @@ void crypto_unregister_skciphers(struct skcipher_alg *algs, int count); int skcipher_register_instance(struct crypto_template *tmpl, struct skcipher_instance *inst); +int skcipher_walk_done(struct skcipher_walk *walk, int err); +int skcipher_walk_virt(struct skcipher_walk *walk, + struct skcipher_request *req, + bool atomic); +void skcipher_walk_atomise(struct skcipher_walk *walk); +int skcipher_walk_async(struct skcipher_walk *walk, + struct skcipher_request *req); +int skcipher_walk_aead(struct skcipher_walk *walk, struct aead_request *req, + bool atomic); +int skcipher_walk_aead_encrypt(struct skcipher_walk *walk, + struct aead_request *req, bool atomic); +int skcipher_walk_aead_decrypt(struct skcipher_walk *walk, + struct aead_request *req, bool atomic); +void skcipher_walk_complete(struct skcipher_walk *walk, int err); + static inline void ablkcipher_request_complete(struct ablkcipher_request *req, int err) { diff --git a/include/crypto/null.h b/include/crypto/null.h index 3f0c59fb0a61..7df307ecabee 100644 --- a/include/crypto/null.h +++ b/include/crypto/null.h @@ -8,17 +8,7 @@ #define NULL_DIGEST_SIZE 0 #define NULL_IV_SIZE 0 -struct crypto_skcipher *crypto_get_default_null_skcipher(void); +struct crypto_sync_skcipher *crypto_get_default_null_skcipher(void); void crypto_put_default_null_skcipher(void); -static inline struct crypto_skcipher *crypto_get_default_null_skcipher2(void) -{ - return crypto_get_default_null_skcipher(); -} - -static inline void crypto_put_default_null_skcipher2(void) -{ - crypto_put_default_null_skcipher(); -} - #endif diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h index cc4d98a7892e..97acb3b81bf8 100644 --- a/include/crypto/skcipher.h +++ b/include/crypto/skcipher.h @@ -65,6 +65,10 @@ struct crypto_skcipher { struct crypto_tfm base; }; +struct crypto_sync_skcipher { + struct crypto_skcipher base; +}; + /** * struct skcipher_alg - symmetric key cipher definition * @min_keysize: Minimum key size supported by the transformation. This is the @@ -135,9 +139,17 @@ struct skcipher_alg { struct crypto_alg base; }; -#define SKCIPHER_REQUEST_ON_STACK(name, tfm) \ +#define MAX_SYNC_SKCIPHER_REQSIZE 384 +/* + * This performs a type-check against the "tfm" argument to make sure + * all users have the correct skcipher tfm for doing on-stack requests. + */ +#define SYNC_SKCIPHER_REQUEST_ON_STACK(name, tfm) \ char __##name##_desc[sizeof(struct skcipher_request) + \ - crypto_skcipher_reqsize(tfm)] CRYPTO_MINALIGN_ATTR; \ + MAX_SYNC_SKCIPHER_REQSIZE + \ + (!(sizeof((struct crypto_sync_skcipher *)1 == \ + (typeof(tfm))1))) \ + ] CRYPTO_MINALIGN_ATTR; \ struct skcipher_request *name = (void *)__##name##_desc /** @@ -193,6 +205,9 @@ static inline struct crypto_skcipher *__crypto_skcipher_cast( struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name, u32 type, u32 mask); +struct crypto_sync_skcipher *crypto_alloc_sync_skcipher(const char *alg_name, + u32 type, u32 mask); + static inline struct crypto_tfm *crypto_skcipher_tfm( struct crypto_skcipher *tfm) { @@ -208,6 +223,11 @@ static inline void crypto_free_skcipher(struct crypto_skcipher *tfm) crypto_destroy_tfm(tfm, crypto_skcipher_tfm(tfm)); } +static inline void crypto_free_sync_skcipher(struct crypto_sync_skcipher *tfm) +{ + crypto_free_skcipher(&tfm->base); +} + /** * crypto_has_skcipher() - Search for the availability of an skcipher. * @alg_name: is the cra_name / name or cra_driver_name / driver name of the @@ -276,6 +296,12 @@ static inline unsigned int crypto_skcipher_ivsize(struct crypto_skcipher *tfm) return tfm->ivsize; } +static inline unsigned int crypto_sync_skcipher_ivsize( + struct crypto_sync_skcipher *tfm) +{ + return crypto_skcipher_ivsize(&tfm->base); +} + static inline unsigned int crypto_skcipher_alg_chunksize( struct skcipher_alg *alg) { @@ -322,6 +348,12 @@ static inline unsigned int crypto_skcipher_blocksize( return crypto_tfm_alg_blocksize(crypto_skcipher_tfm(tfm)); } +static inline unsigned int crypto_sync_skcipher_blocksize( + struct crypto_sync_skcipher *tfm) +{ + return crypto_skcipher_blocksize(&tfm->base); +} + static inline unsigned int crypto_skcipher_alignmask( struct crypto_skcipher *tfm) { @@ -345,6 +377,24 @@ static inline void crypto_skcipher_clear_flags(struct crypto_skcipher *tfm, crypto_tfm_clear_flags(crypto_skcipher_tfm(tfm), flags); } +static inline u32 crypto_sync_skcipher_get_flags( + struct crypto_sync_skcipher *tfm) +{ + return crypto_skcipher_get_flags(&tfm->base); +} + +static inline void crypto_sync_skcipher_set_flags( + struct crypto_sync_skcipher *tfm, u32 flags) +{ + crypto_skcipher_set_flags(&tfm->base, flags); +} + +static inline void crypto_sync_skcipher_clear_flags( + struct crypto_sync_skcipher *tfm, u32 flags) +{ + crypto_skcipher_clear_flags(&tfm->base, flags); +} + /** * crypto_skcipher_setkey() - set key for cipher * @tfm: cipher handle @@ -372,6 +422,12 @@ static inline bool crypto_skcipher_has_setkey(struct crypto_skcipher *tfm) return tfm->keysize; } +static inline int crypto_sync_skcipher_setkey(struct crypto_sync_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return crypto_skcipher_setkey(&tfm->base, key, keylen); +} + static inline unsigned int crypto_skcipher_default_keysize( struct crypto_skcipher *tfm) { @@ -393,6 +449,14 @@ static inline struct crypto_skcipher *crypto_skcipher_reqtfm( return __crypto_skcipher_cast(req->base.tfm); } +static inline struct crypto_sync_skcipher *crypto_sync_skcipher_reqtfm( + struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + + return container_of(tfm, struct crypto_sync_skcipher, base); +} + /** * crypto_skcipher_encrypt() - encrypt plaintext * @req: reference to the skcipher_request handle that holds all information @@ -465,6 +529,12 @@ static inline void skcipher_request_set_tfm(struct skcipher_request *req, req->base.tfm = crypto_skcipher_tfm(tfm); } +static inline void skcipher_request_set_sync_tfm(struct skcipher_request *req, + struct crypto_sync_skcipher *tfm) +{ + skcipher_request_set_tfm(req, &tfm->base); +} + static inline struct skcipher_request *skcipher_request_cast( struct crypto_async_request *req) { diff --git a/include/crypto/xts.h b/include/crypto/xts.h index ede6b97b24cc..77b630672b2c 100644 --- a/include/crypto/xts.h +++ b/include/crypto/xts.h @@ -2,8 +2,7 @@ #define _CRYPTO_XTS_H #include -#include -#include +#include #include struct scatterlist; @@ -51,4 +50,27 @@ static inline int xts_check_key(struct crypto_tfm *tfm, return 0; } +static inline int xts_verify_key(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + /* + * key consists of keys of equal size concatenated, therefore + * the length must be even. + */ + if (keylen % 2) { + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + /* ensure that the AES and tweak key are not identical */ + if ((fips_enabled || crypto_skcipher_get_flags(tfm) & + CRYPTO_TFM_REQ_WEAK_KEY) && + !crypto_memneq(key, key + (keylen / 2), keylen / 2)) { + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY); + return -EINVAL; + } + + return 0; +} + #endif /* _CRYPTO_XTS_H */ diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 0c4f9c67c221..240f20f15be9 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -58,6 +58,7 @@ #include #include #include +#include #include #include @@ -892,6 +893,12 @@ struct drm_device { struct drm_vma_offset_manager *vma_offset_manager; /*@} */ int switch_power_state; + + struct drm_bridge *bridge; + struct task_struct *bridge_enable_task; + struct kthread_worker bridge_enable_worker; + struct kthread_work bridge_enable_work; + atomic_t bridges_enabled; }; #include diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 07b2e9684064..e56597af375b 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -229,8 +229,11 @@ void drm_bridge_post_disable(struct drm_bridge *bridge); void drm_bridge_mode_set(struct drm_bridge *bridge, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode); +void __drm_bridge_pre_enable(struct drm_bridge *bridge); void drm_bridge_pre_enable(struct drm_bridge *bridge); +void __drm_bridge_enable(struct drm_bridge *bridge); void drm_bridge_enable(struct drm_bridge *bridge); +void drm_bridge_enable_all(struct drm_device *dev); int drm_bridge_connector_init(struct drm_bridge *bridge, struct drm_connector *connector); diff --git a/include/drm/drm_mem_util.h b/include/drm/drm_mem_util.h index 70d4e221a3ad..d0f6cf2e5324 100644 --- a/include/drm/drm_mem_util.h +++ b/include/drm/drm_mem_util.h @@ -37,8 +37,7 @@ static __inline__ void *drm_calloc_large(size_t nmemb, size_t size) if (size * nmemb <= PAGE_SIZE) return kcalloc(nmemb, size, GFP_KERNEL); - return __vmalloc(size * nmemb, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); + return vzalloc(size * nmemb); } /* Modeled after cairo's malloc_ab, it's like calloc but without the zeroing. */ @@ -50,8 +49,7 @@ static __inline__ void *drm_malloc_ab(size_t nmemb, size_t size) if (size * nmemb <= PAGE_SIZE) return kmalloc(nmemb * size, GFP_KERNEL); - return __vmalloc(size * nmemb, - GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); + return vmalloc(size * nmemb); } static __inline__ void *drm_malloc_gfp(size_t nmemb, size_t size, gfp_t gfp) @@ -69,8 +67,7 @@ static __inline__ void *drm_malloc_gfp(size_t nmemb, size_t size, gfp_t gfp) return ptr; } - return __vmalloc(size * nmemb, - gfp | __GFP_HIGHMEM, PAGE_KERNEL); + return __vmalloc(size * nmemb, gfp, PAGE_KERNEL); } static __inline void drm_free_large(void *ptr) diff --git a/include/linux/atomic.h b/include/linux/atomic.h index e71835bf60a9..81a85fd7edda 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -627,6 +627,9 @@ static inline int atomic_dec_if_positive(atomic_t *v) } #endif +#define atomic_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) +#define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) + #ifdef CONFIG_GENERIC_ATOMIC64 #include #endif @@ -1023,6 +1026,9 @@ static inline long long atomic64_fetch_andnot_release(long long i, atomic64_t *v } #endif +#define atomic64_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) +#define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) + #include #endif /* _LINUX_ATOMIC_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6390eea37825..cd93c20c7d00 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -526,13 +526,10 @@ struct request_queue { #define QUEUE_FLAG_FAST 27 /* fast block device (e.g. ram based) */ #define QUEUE_FLAG_INLINECRYPT 28 /* inline encryption support */ -#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ - (1 << QUEUE_FLAG_STACKABLE) | \ - (1 << QUEUE_FLAG_SAME_COMP) | \ - (1 << QUEUE_FLAG_ADD_RANDOM)) +#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_STACKABLE) | \ + (1 << QUEUE_FLAG_SAME_COMP)) -#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ - (1 << QUEUE_FLAG_STACKABLE) | \ +#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_STACKABLE) | \ (1 << QUEUE_FLAG_SAME_COMP) | \ (1 << QUEUE_FLAG_POLL)) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 27424c24a53d..35a28e4fb2dd 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -76,6 +76,7 @@ enum { CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */ + CFTYPE_PRESSURE = (1 << 6), /* only if pressure feature is enabled */ /* internal flags, do not use outside cgroup core proper */ __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b37dee9f10ad..04766bfc22c4 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -634,6 +634,8 @@ static inline struct psi_group *cgroup_psi(struct cgroup *cgrp) */ int subsys_cgroup_allow_attach(struct cgroup_taskset *tset); +bool cgroup_psi_enabled(void); + static inline void cgroup_init_kthreadd(void) { /* @@ -686,6 +688,11 @@ static inline struct psi_group *cgroup_psi(struct cgroup *cgrp) return NULL; } +static inline bool cgroup_psi_enabled(void) +{ + return false; +} + static inline bool task_under_cgroup_hierarchy(struct task_struct *task, struct cgroup *ancestor) { diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 7139ecb59c9c..b2ef03bb5b20 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -202,6 +202,7 @@ extern void cpu_hotplug_begin(void); extern void cpu_hotplug_done(void); extern void get_online_cpus(void); extern void put_online_cpus(void); +static inline void lockdep_assert_cpus_held(void) { } extern void cpu_hotplug_disable(void); extern void cpu_hotplug_enable(void); #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri) @@ -217,6 +218,7 @@ int cpu_down(unsigned int cpu); static inline void cpu_hotplug_begin(void) {} static inline void cpu_hotplug_done(void) {} +static inline void lockdep_assert_cpus_held(void) { } #define get_online_cpus() do { } while (0) #define put_online_cpus() do { } while (0) #define cpu_hotplug_disable() do { } while (0) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 8f3274acc5cf..0a5f4058444e 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -535,6 +535,20 @@ unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); +#define cpufreq_governor_init(__governor) \ +static int __init __governor##_init(void) \ +{ \ + return cpufreq_register_governor(&__governor); \ +} \ +core_initcall(__governor##_init) + +#define cpufreq_governor_exit(__governor) \ +static void __exit __governor##_exit(void) \ +{ \ + return cpufreq_unregister_governor(&__governor); \ +} \ +module_exit(__governor##_exit) + struct cpufreq_governor *cpufreq_default_governor(void); struct cpufreq_governor *cpufreq_fallback_governor(void); diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 9a8eec9e59b2..0a1b71602edc 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -74,6 +74,7 @@ struct cpuidle_driver_kobj; struct cpuidle_device { unsigned int registered:1; unsigned int enabled:1; + unsigned int use_deepest_state:1; unsigned int cpu; int last_residency; @@ -192,11 +193,12 @@ static inline struct cpuidle_driver *cpuidle_get_cpu_driver( static inline struct cpuidle_device *cpuidle_get_device(void) {return NULL; } #endif -#if defined(CONFIG_CPU_IDLE) && defined(CONFIG_SUSPEND) +#ifdef CONFIG_CPU_IDLE extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv, struct cpuidle_device *dev); extern int cpuidle_enter_freeze(struct cpuidle_driver *drv, struct cpuidle_device *dev); +extern void cpuidle_use_deepest_state(bool enable); #else static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv, struct cpuidle_device *dev) @@ -204,6 +206,9 @@ static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv, static inline int cpuidle_enter_freeze(struct cpuidle_driver *drv, struct cpuidle_device *dev) {return -ENODEV; } +static inline void cpuidle_use_deepest_state(bool enable) +{ +} #endif /* kernel/sched/idle.c */ @@ -235,8 +240,6 @@ struct cpuidle_governor { int (*select) (struct cpuidle_driver *drv, struct cpuidle_device *dev); void (*reflect) (struct cpuidle_device *dev, int index); - - struct module *owner; }; #ifdef CONFIG_CPU_IDLE diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index df1790150b0d..1d4451a1807b 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -54,6 +54,8 @@ extern int nr_cpu_ids; * cpu_online_mask - has bit 'cpu' set iff cpu available to scheduler * cpu_active_mask - has bit 'cpu' set iff cpu available to migration * cpu_isolated_mask- has bit 'cpu' set iff cpu isolated + * cpu_lp_mask - has bit 'cpu' set iff cpu is part of little cluster + * cpu_perf_mask - has bit 'cpu' set iff cpu is part of big cluster * * If !CONFIG_HOTPLUG_CPU, present == possible, and active == online. * @@ -96,6 +98,8 @@ extern struct cpumask __cpu_isolated_mask; #define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask) #define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask) #define cpu_isolated_mask ((const struct cpumask *)&__cpu_isolated_mask) +extern const struct cpumask *const cpu_lp_mask; +extern const struct cpumask *const cpu_perf_mask; #if NR_CPUS > 1 #define num_online_cpus() cpumask_weight(cpu_online_mask) diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 4a9838feb086..579dddd6a704 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -100,23 +100,23 @@ int gpiod_direction_output_raw(struct gpio_desc *desc, int value); /* Value get/set from non-sleeping context */ int gpiod_get_value(const struct gpio_desc *desc); void gpiod_set_value(struct gpio_desc *desc, int value); -void gpiod_set_array_value(unsigned int array_size, +int gpiod_set_array_value(unsigned int array_size, struct gpio_desc **desc_array, int *value_array); int gpiod_get_raw_value(const struct gpio_desc *desc); void gpiod_set_raw_value(struct gpio_desc *desc, int value); -void gpiod_set_raw_array_value(unsigned int array_size, +int gpiod_set_raw_array_value(unsigned int array_size, struct gpio_desc **desc_array, int *value_array); /* Value get/set from sleeping context */ int gpiod_get_value_cansleep(const struct gpio_desc *desc); void gpiod_set_value_cansleep(struct gpio_desc *desc, int value); -void gpiod_set_array_value_cansleep(unsigned int array_size, +int gpiod_set_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, int *value_array); int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc); void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value); -void gpiod_set_raw_array_value_cansleep(unsigned int array_size, +int gpiod_set_raw_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, int *value_array); @@ -305,12 +305,13 @@ static inline void gpiod_set_value(struct gpio_desc *desc, int value) /* GPIO can never have been requested */ WARN_ON(1); } -static inline void gpiod_set_array_value(unsigned int array_size, +static inline int gpiod_set_array_value(unsigned int array_size, struct gpio_desc **desc_array, int *value_array) { /* GPIO can never have been requested */ WARN_ON(1); + return 0; } static inline int gpiod_get_raw_value(const struct gpio_desc *desc) { @@ -323,12 +324,13 @@ static inline void gpiod_set_raw_value(struct gpio_desc *desc, int value) /* GPIO can never have been requested */ WARN_ON(1); } -static inline void gpiod_set_raw_array_value(unsigned int array_size, +static inline int gpiod_set_raw_array_value(unsigned int array_size, struct gpio_desc **desc_array, int *value_array) { /* GPIO can never have been requested */ WARN_ON(1); + return 0; } static inline int gpiod_get_value_cansleep(const struct gpio_desc *desc) @@ -342,12 +344,13 @@ static inline void gpiod_set_value_cansleep(struct gpio_desc *desc, int value) /* GPIO can never have been requested */ WARN_ON(1); } -static inline void gpiod_set_array_value_cansleep(unsigned int array_size, +static inline int gpiod_set_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, int *value_array) { /* GPIO can never have been requested */ WARN_ON(1); + return 0; } static inline int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc) { @@ -361,12 +364,13 @@ static inline void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, /* GPIO can never have been requested */ WARN_ON(1); } -static inline void gpiod_set_raw_array_value_cansleep(unsigned int array_size, +static inline int gpiod_set_raw_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, int *value_array) { /* GPIO can never have been requested */ WARN_ON(1); + return 0; } static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 657b56524a8a..dcc6ec1d473b 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -35,7 +35,31 @@ static inline void invalidate_kernel_vmap_range(void *vaddr, int size) /* declarations for linux/mm/highmem.c */ unsigned int nr_free_highpages(void); -extern unsigned long totalhigh_pages; +extern atomic_long_t _totalhigh_pages; +static inline unsigned long totalhigh_pages(void) +{ + return (unsigned long)atomic_long_read(&_totalhigh_pages); +} + +static inline void totalhigh_pages_inc(void) +{ + atomic_long_inc(&_totalhigh_pages); +} + +static inline void totalhigh_pages_dec(void) +{ + atomic_long_dec(&_totalhigh_pages); +} + +static inline void totalhigh_pages_add(long count) +{ + atomic_long_add(count, &_totalhigh_pages); +} + +static inline void totalhigh_pages_set(long val) +{ + atomic_long_set(&_totalhigh_pages, val); +} void kmap_flush_unused(void); @@ -57,7 +81,7 @@ static inline struct page *kmap_to_page(void *addr) return virt_to_page(addr); } -#define totalhigh_pages 0UL +static inline unsigned long totalhigh_pages(void) { return 0UL; } #ifndef ARCH_HAS_KMAP static inline void *kmap(struct page *page) diff --git a/include/linux/i2c/i2c-msm-v2.h b/include/linux/i2c/i2c-msm-v2.h index f5d699f60899..a49569d2361a 100644 --- a/include/linux/i2c/i2c-msm-v2.h +++ b/include/linux/i2c/i2c-msm-v2.h @@ -26,9 +26,12 @@ enum msm_i2_debug_level { MSM_DBG, /* Low level details. Use for debugging */ }; +#define CONFIG_I2C_MSM_DEBUG 0 #define i2c_msm_dbg(ctrl, dbg_level, fmt, ...) do {\ + if (CONFIG_I2C_MSM_DEBUG) {\ if (ctrl->dbgfs.dbg_lvl >= dbg_level)\ dev_info(ctrl->dev, pr_fmt(fmt), ##__VA_ARGS__);\ + }\ } while (0) #define BIT_IS_SET(val, idx) ((val >> idx) & 0x1) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 325f649d77ff..377417200728 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -210,6 +210,7 @@ extern struct task_group root_task_group; .policy = SCHED_NORMAL, \ .cpus_allowed = CPU_MASK_ALL, \ .nr_cpus_allowed= NR_CPUS, \ + .cpus_requested = CPU_MASK_ALL, \ .mm = NULL, \ .active_mm = &init_mm, \ .restart_block = { \ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index d6d31a019bb5..4aa4a3b0e2dd 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -62,6 +62,8 @@ * interrupt handler after suspending interrupts. For system * wakeup devices users need to implement wakeup detection in * their interrupt handlers. + * IRQF_PERF_CRITICAL - Interrupt is critical to the overall performance of the + * system and should be processed on a fast CPU. */ #define IRQF_SHARED 0x00000080 #define IRQF_PROBE_SHARED 0x00000100 @@ -75,6 +77,7 @@ #define IRQF_NO_THREAD 0x00010000 #define IRQF_EARLY_RESUME 0x00020000 #define IRQF_COND_SUSPEND 0x00040000 +#define IRQF_PERF_CRITICAL 0x00080000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) @@ -212,6 +215,8 @@ extern void irq_wake_thread(unsigned int irq, void *dev_id); /* The following three functions are for the core kernel use only. */ extern void suspend_device_irqs(void); extern void resume_device_irqs(void); +extern void unaffine_perf_irqs(void); +extern void reaffine_perf_irqs(void); /** * struct irq_affinity_notify - context for notification of IRQ affinity changes diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 6883e197acb9..45884ae0d454 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -9,7 +9,7 @@ #include #include -#define KSYM_NAME_LEN 128 +#define KSYM_NAME_LEN 134 #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index bab381f5431d..bb3b9b4be796 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -44,6 +44,12 @@ #define STACK_MAGIC 0xdeadbeef +/** + * REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value + * @x: value to repeat + * + * NOTE: @x is not checked for > 0xff; larger values produce odd results. + */ #define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) #define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) @@ -52,6 +58,10 @@ #define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) #define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) +/** + * ARRAY_SIZE - get the number of elements in array @arr + * @arr: array to be sized + */ #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) #define u64_to_user_ptr(x) ( \ @@ -71,7 +81,15 @@ #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) #define round_down(x, y) ((x) & ~__round_mask(x, y)) +/** + * FIELD_SIZEOF - get the size of a struct's field + * @t: the target struct + * @f: the target struct's field + * Return: the size of @f in the struct definition without having a + * declared instance of @t. + */ #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) + #define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP #define DIV_ROUND_UP_ULL(ll,d) \ ({ unsigned long long _tmp = (ll)+(d)-1; do_div(_tmp, d); _tmp; }) @@ -237,13 +255,13 @@ extern int _cond_resched(void); * @ep_ro: right open interval endpoint * * Perform a "reciprocal multiplication" in order to "scale" a value into - * range [0, ep_ro), where the upper interval endpoint is right-open. + * range [0, @ep_ro), where the upper interval endpoint is right-open. * This is useful, e.g. for accessing a index of an array containing - * ep_ro elements, for example. Think of it as sort of modulus, only that + * @ep_ro elements, for example. Think of it as sort of modulus, only that * the result isn't that of modulo. ;) Note that if initial input is a * small value, then result will return 0. * - * Return: a result based on val in interval [0, ep_ro). + * Return: a result based on @val in interval [0, @ep_ro). */ static inline u32 reciprocal_scale(u32 val, u32 ep_ro) { @@ -587,8 +605,8 @@ do { \ * trace_printk - printf formatting in the ftrace buffer * @fmt: the printf format for printing * - * Note: __trace_printk is an internal function for trace_printk and - * the @ip is passed in via the trace_printk macro. + * Note: __trace_printk is an internal function for trace_printk() and + * the @ip is passed in via the trace_printk() macro. * * This function allows a kernel developer to debug fast path sections * that printk is not appropriate for. By scattering in various @@ -598,7 +616,7 @@ do { \ * This is intended as a debugging tool for the developer only. * Please refrain from leaving trace_printks scattered around in * your code. (Extra memory is used for special buffers that are - * allocated when trace_printk() is used) + * allocated when trace_printk() is used.) * * A little optization trick is done here. If there's only one * argument, there's no need to scan the string for printf formats. @@ -613,6 +631,9 @@ do { \ * let gcc optimize the rest. */ +#ifdef CONFIG_DISABLE_TRACE_PRINTK +#define trace_printk pr_debug +#else #define trace_printk(fmt, ...) \ do { \ char _______STR[] = __stringify((__VA_ARGS__)); \ @@ -635,6 +656,7 @@ do { \ else \ __trace_printk(_THIS_IP_, fmt, ##args); \ } while (0) +#endif extern __printf(2, 3) int __trace_bprintk(unsigned long ip, const char *fmt, ...); @@ -650,7 +672,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...); * the @ip is passed in via the trace_puts macro. * * This is similar to trace_printk() but is made for those really fast - * paths that a developer wants the least amount of "Heisenbug" affects, + * paths that a developer wants the least amount of "Heisenbug" effects, * where the processing of the print format is still too much. * * This function allows a kernel developer to debug fast path sections @@ -661,7 +683,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...); * This is intended as a debugging tool for the developer only. * Please refrain from leaving trace_puts scattered around in * your code. (Extra memory is used for special buffers that are - * allocated when trace_puts() is used) + * allocated when trace_puts() is used.) * * Returns: 0 if nothing was written, positive # if string was. * (1 when __trace_bputs is used, strlen(str) when __trace_puts is used) @@ -731,31 +753,74 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #endif /* CONFIG_TRACING */ /* - * min()/max()/clamp() macros that also do - * strict type-checking.. See the - * "unnecessary" pointer comparison. - */ -#define __min(t1, t2, min1, min2, x, y) ({ \ - t1 min1 = (x); \ - t2 min2 = (y); \ - (void) (&min1 == &min2); \ - min1 < min2 ? min1 : min2; }) -#define min(x, y) \ - __min(typeof(x), typeof(y), \ - __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ - x, y) - -#define __max(t1, t2, max1, max2, x, y) ({ \ - t1 max1 = (x); \ - t2 max2 = (y); \ - (void) (&max1 == &max2); \ - max1 > max2 ? max1 : max2; }) -#define max(x, y) \ - __max(typeof(x), typeof(y), \ - __UNIQUE_ID(max1_), __UNIQUE_ID(max2_), \ - x, y) + * min()/max()/clamp() macros must accomplish three things: + * + * - avoid multiple evaluations of the arguments (so side-effects like + * "x++" happen only once) when non-constant. + * - perform strict type-checking (to generate warnings instead of + * nasty runtime surprises). See the "unnecessary" pointer comparison + * in __typecheck(). + * - retain result as a constant expressions when called with only + * constant expressions (to avoid tripping VLA warnings in stack + * allocation usage). + */ +#define __typecheck(x, y) \ + (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) + +/* + * This returns a constant expression while determining if an argument is + * a constant expression, most importantly without evaluating the argument. + * Glory to Martin Uecker + */ +#define __is_constexpr(x) \ + (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) + +#define __no_side_effects(x, y) \ + (__is_constexpr(x) && __is_constexpr(y)) + +#define __safe_cmp(x, y) \ + (__typecheck(x, y) && __no_side_effects(x, y)) + +#define __cmp(x, y, op) ((x) op (y) ? (x) : (y)) + +#define __cmp_once(x, y, unique_x, unique_y, op) ({ \ + typeof(x) unique_x = (x); \ + typeof(y) unique_y = (y); \ + __cmp(unique_x, unique_y, op); }) +#define __careful_cmp(x, y, op) \ + __builtin_choose_expr(__safe_cmp(x, y), \ + __cmp(x, y, op), \ + __cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op)) + +/** + * min - return minimum of two values of the same or compatible types + * @x: first value + * @y: second value + */ +#define min(x, y) __careful_cmp(x, y, <) + +/** + * max - return maximum of two values of the same or compatible types + * @x: first value + * @y: second value + */ +#define max(x, y) __careful_cmp(x, y, >) + +/** + * min3 - return minimum of three values + * @x: first value + * @y: second value + * @z: third value + */ #define min3(x, y, z) min((typeof(x))min(x, y), z) + +/** + * max3 - return maximum of three values + * @x: first value + * @y: second value + * @z: third value + */ #define max3(x, y, z) max((typeof(x))max(x, y), z) /** @@ -774,8 +839,8 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * @lo: lowest allowable value * @hi: highest allowable value * - * This macro does strict typechecking of lo/hi to make sure they are of the - * same type as val. See the unnecessary pointer comparisons. + * This macro does strict typechecking of @lo/@hi to make sure they are of the + * same type as @val. See the unnecessary pointer comparisons. */ #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) @@ -785,15 +850,22 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * * Or not use min/max/clamp at all, of course. */ -#define min_t(type, x, y) \ - __min(type, type, \ - __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ - x, y) -#define max_t(type, x, y) \ - __max(type, type, \ - __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ - x, y) +/** + * min_t - return minimum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ +#define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <) + +/** + * max_t - return maximum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ +#define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >) /** * clamp_t - return a value clamped to a given range using a given type @@ -803,7 +875,7 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * @hi: maximum allowable value * * This macro does no typechecking and uses temporary variables of type - * 'type' to make all the comparisons. + * @type to make all the comparisons. */ #define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi) @@ -814,15 +886,17 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * @hi: maximum allowable value * * This macro does no typechecking and uses temporary variables of whatever - * type the input argument 'val' is. This is useful when val is an unsigned - * type and min and max are literals that will otherwise be assigned a signed + * type the input argument @val is. This is useful when @val is an unsigned + * type and @lo and @hi are literals that will otherwise be assigned a signed * integer type. */ #define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi) -/* - * swap - swap value of @a and @b +/** + * swap - swap values of @a and @b + * @a: first value + * @b: second value */ #define swap(a, b) \ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index 1c2a32829620..590343f6c1b1 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -22,6 +22,7 @@ #define __KMEMLEAK_H #include +#include #ifdef CONFIG_DEBUG_KMEMLEAK @@ -30,6 +31,8 @@ extern void kmemleak_alloc(const void *ptr, size_t size, int min_count, gfp_t gfp) __ref; extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, gfp_t gfp) __ref; +extern void kmemleak_vmalloc(const struct vm_struct *area, size_t size, + gfp_t gfp) __ref; extern void kmemleak_free(const void *ptr) __ref; extern void kmemleak_free_part(const void *ptr, size_t size) __ref; extern void kmemleak_free_percpu(const void __percpu *ptr) __ref; @@ -81,6 +84,10 @@ static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, gfp_t gfp) { } +static inline void kmemleak_vmalloc(const struct vm_struct *area, size_t size, + gfp_t gfp) +{ +} static inline void kmemleak_free(const void *ptr) { } diff --git a/include/linux/kthread.h b/include/linux/kthread.h index a6e82a69c363..09b823683cec 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -48,6 +48,23 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), __k; \ }) +/** + * kthread_run_perf_critical - create and wake a performance-critical thread. + * + * Same as kthread_run(), but with the kthread bound to performance CPUs. + */ +#define kthread_run_perf_critical(threadfn, data, namefmt, ...) \ +({ \ + struct task_struct *__k \ + = kthread_create(threadfn, data, namefmt, ## __VA_ARGS__); \ + if (!IS_ERR(__k)) { \ + __k->flags |= PF_PERF_CRITICAL; \ + kthread_bind_mask(__k, cpu_perf_mask); \ + wake_up_process(__k); \ + } \ + __k; \ +}) + void kthread_bind(struct task_struct *k, unsigned int cpu); void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask); int kthread_stop(struct task_struct *k); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 05aa860daa5c..dad7fb0c949c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -762,8 +762,6 @@ void kvm_arch_check_processor_compat(void *rtn); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); -void *kvm_kvzalloc(unsigned long size); - #ifndef __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) { diff --git a/include/linux/list.h b/include/linux/list.h index 3ef3ade9930e..85baa403499d 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -556,6 +556,19 @@ static inline void list_splice_tail_init(struct list_head *list, for (; &pos->member != (head); \ pos = list_next_entry(pos, member)) +/** + * list_for_each_entry_from_reverse - iterate backwards over list of given type + * from the current point + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_head within the struct. + * + * Iterate backwards over list of given type, continuing from current position. + */ +#define list_for_each_entry_from_reverse(pos, head, member) \ + for (; &pos->member != (head); \ + pos = list_prev_entry(pos, member)) + /** * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry * @pos: the type * to use as a loop cursor. diff --git a/include/linux/llist.h b/include/linux/llist.h index ac6796138ba0..9bbc3ed179c8 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -121,6 +121,25 @@ static inline void init_llist_head(struct llist_head *list) #define llist_for_each(pos, node) \ for ((pos) = (node); pos; (pos) = (pos)->next) +/** + * llist_for_each_safe - iterate over some deleted entries of a lock-less list + * safe against removal of list entry + * @pos: the &struct llist_node to use as a loop cursor + * @n: another &struct llist_node to use as temporary storage + * @node: the first entry of deleted list entries + * + * In general, some entries of the lock-less list can be traversed + * safely only after being deleted from list, so start with an entry + * instead of list head. + * + * If being used on entries deleted from lock-less list directly, the + * traverse order is from the newest to the oldest added entry. If + * you want to traverse from the oldest to the newest, you must + * reverse the order by yourself before traversing. + */ +#define llist_for_each_safe(pos, n, node) \ + for ((pos) = (node); (pos) && ((n) = (pos)->next, true); (pos) = (n)) + /** * llist_for_each_entry - iterate over some deleted entries of lock-less list of given type * @pos: the type * to use as a loop cursor. diff --git a/include/linux/lz4.h b/include/linux/lz4.h index 1b0f8ca0f9c8..394e3d9213b8 100644 --- a/include/linux/lz4.h +++ b/include/linux/lz4.h @@ -172,18 +172,6 @@ static inline int LZ4_compressBound(size_t isize) return LZ4_COMPRESSBOUND(isize); } -/** - * lz4_compressbound() - For backwards compatibility; see LZ4_compressBound - * @isize: Size of the input data - * - * Return: Max. size LZ4 may output in a "worst case" szenario - * (data not compressible) - */ -static inline int lz4_compressbound(size_t isize) -{ - return LZ4_COMPRESSBOUND(isize); -} - /** * LZ4_compress_default() - Compress data from source to dest * @source: source address of the original data @@ -257,20 +245,6 @@ int LZ4_compress_fast(const char *source, char *dest, int inputSize, int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr, int targetDestSize, void *wrkmem); -/* - * lz4_compress() - For backward compatibility, see LZ4_compress_default - * @src: source address of the original data - * @src_len: size of the original data - * @dst: output buffer address of the compressed data. This requires 'dst' - * of size LZ4_COMPRESSBOUND - * @dst_len: is the output size, which is returned after compress done - * @workmem: address of the working memory. - * - * Return: Success if return 0, Error if return < 0 - */ -int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst, - size_t *dst_len, void *wrkmem); - /*-************************************************************************ * Decompression Functions **************************************************************************/ @@ -346,34 +320,6 @@ int LZ4_decompress_safe(const char *source, char *dest, int compressedSize, int LZ4_decompress_safe_partial(const char *source, char *dest, int compressedSize, int targetOutputSize, int maxDecompressedSize); -/* - * lz4_decompress_unknownoutputsize() - For backwards compatibility, - * see LZ4_decompress_safe - * @src: source address of the compressed data - * @src_len: is the input size, therefore the compressed size - * @dest: output buffer address of the decompressed data - * which must be already allocated - * @dest_len: is the max size of the destination buffer, which is - * returned with actual size of decompressed data after decompress done - * - * Return: Success if return 0, Error if return (< 0) - */ -int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, - unsigned char *dest, size_t *dest_len); - -/** - * lz4_decompress() - For backwards cocmpatibility, see LZ4_decompress_fast - * @src: source address of the compressed data - * @src_len: is the input size, which is returned after decompress done - * @dest: output buffer address of the decompressed data, - * which must be already allocated - * @actual_dest_len: is the size of uncompressed data, supposing it's known - * - * Return: Success if return 0, Error if return (< 0) - */ -int lz4_decompress(const unsigned char *src, size_t *src_len, - unsigned char *dest, size_t actual_dest_len); - /*-************************************************************************ * LZ4 HC Compression **************************************************************************/ @@ -400,21 +346,6 @@ int lz4_decompress(const unsigned char *src, size_t *src_len, int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity, int compressionLevel, void *wrkmem); -/** - * lz4hc_compress() - For backwards compatibility, see LZ4_compress_HC - * @src: source address of the original data - * @src_len: size of the original data - * @dst: output buffer address of the compressed data. This requires 'dst' - * of size LZ4_COMPRESSBOUND. - * @dst_len: is the output size, which is returned after compress done - * @wrkmem: address of the working memory. - * This requires 'workmem' of size LZ4HC_MEM_COMPRESS. - * - * Return : Success if return 0, Error if return (< 0) - */ -int lz4hc_compress(const unsigned char *src, size_t src_len, unsigned char *dst, - size_t *dst_len, void *wrkmem); - /** * LZ4_resetStreamHC() - Init an allocated 'LZ4_streamHC_t' structure * @streamHCPtr: pointer to the 'LZ4_streamHC_t' structure diff --git a/include/linux/lzo.h b/include/linux/lzo.h index a0848d9377e5..8e6795464b1d 100644 --- a/include/linux/lzo.h +++ b/include/linux/lzo.h @@ -17,12 +17,16 @@ #define LZO1X_1_MEM_COMPRESS (8192 * sizeof(unsigned short)) #define LZO1X_MEM_COMPRESS LZO1X_1_MEM_COMPRESS -#define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3) +#define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3 + 2) /* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */ int lzo1x_1_compress(const unsigned char *src, size_t src_len, unsigned char *dst, size_t *dst_len, void *wrkmem); +/* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */ +int lzorle1x_1_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + /* safe decompression with overrun testing */ int lzo1x_decompress_safe(const unsigned char *src, size_t src_len, unsigned char *dst, size_t *dst_len); diff --git a/include/linux/mm.h b/include/linux/mm.h index 2e2807ed281c..43cb6c04e3c5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -44,7 +44,32 @@ static inline void set_max_mapnr(unsigned long limit) static inline void set_max_mapnr(unsigned long limit) { } #endif -extern unsigned long totalram_pages; +extern atomic_long_t _totalram_pages; +static inline unsigned long totalram_pages(void) +{ + return (unsigned long)atomic_long_read(&_totalram_pages); +} + +static inline void totalram_pages_inc(void) +{ + atomic_long_inc(&_totalram_pages); +} + +static inline void totalram_pages_dec(void) +{ + atomic_long_dec(&_totalram_pages); +} + +static inline void totalram_pages_add(long count) +{ + atomic_long_add(count, &_totalram_pages); +} + +static inline void totalram_pages_set(long val) +{ + atomic_long_set(&_totalram_pages, val); +} + extern void * high_memory; extern int page_cluster; @@ -484,16 +509,16 @@ unsigned long vmalloc_to_pfn(const void *addr); * On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there * is no special casing required. */ - +static inline bool is_vmalloc_addr(const void *x) +{ #ifdef CONFIG_MMU -extern int is_vmalloc_addr(const void *x); + unsigned long addr = (unsigned long)x; + + return addr >= VMALLOC_START && addr < VMALLOC_END; #else -static inline int is_vmalloc_addr(const void *x) -{ - return 0; -} + return false; #endif - +} #ifdef CONFIG_MMU extern int is_vmalloc_or_module_addr(const void *x); #else @@ -503,6 +528,20 @@ static inline int is_vmalloc_or_module_addr(const void *x) } #endif +extern void *kvmalloc_node(size_t size, gfp_t flags, int node); +static inline void *kvmalloc(size_t size, gfp_t flags) +{ + return kvmalloc_node(size, flags, NUMA_NO_NODE); +} +static inline void *kvzalloc_node(size_t size, gfp_t flags, int node) +{ + return kvmalloc_node(size, flags | __GFP_ZERO, node); +} +static inline void *kvzalloc(size_t size, gfp_t flags) +{ + return kvmalloc(size, flags | __GFP_ZERO); +} + extern void kvfree(const void *addr); /* @@ -2161,8 +2200,8 @@ int write_one_page(struct page *page, int wait); void task_dirty_inc(struct task_struct *tsk); /* readahead.c */ -#define VM_MAX_READAHEAD 128 /* kbytes */ -#define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */ +#define VM_MAX_READAHEAD 0 /* kbytes */ +#define VM_MIN_READAHEAD 0 /* kbytes (includes current page) */ int force_page_cache_readahead(struct address_space *mapping, struct file *filp, pgoff_t offset, unsigned long nr_to_read); diff --git a/include/linux/oom.h b/include/linux/oom.h index 34ed57717a5a..e69445a2c74a 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -97,4 +97,8 @@ extern void wake_oom_reaper(struct task_struct *tsk); extern int sysctl_oom_dump_tasks; extern int sysctl_oom_kill_allocating_task; extern int sysctl_panic_on_oom; +extern int sysctl_reap_mem_on_sigkill; + +/* calls for LMK reaper */ +extern void add_to_oom_reaper(struct task_struct *p); #endif /* _INCLUDE_LINUX_OOM_H */ diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index cead4419f933..34c7660772d6 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -8,15 +8,16 @@ #ifndef _LINUX_PAGEVEC_H #define _LINUX_PAGEVEC_H -/* 14 pointers + two long's align the pagevec structure to a power of two */ -#define PAGEVEC_SIZE 14 +/* 15 pointers + header align the pagevec structure to a power of two */ +#define PAGEVEC_SIZE 15 struct page; struct address_space; struct pagevec { - unsigned long nr; - unsigned long cold; + unsigned char nr; + bool cold; + bool drained; struct page *pages[PAGEVEC_SIZE]; }; @@ -45,6 +46,7 @@ static inline void pagevec_init(struct pagevec *pvec, int cold) { pvec->nr = 0; pvec->cold = cold; + pvec->drained = false; } static inline void pagevec_reinit(struct pagevec *pvec) diff --git a/include/linux/profile.h b/include/linux/profile.h index b537a25ffa17..3303d415c820 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -66,9 +66,6 @@ static inline void profile_hit(int type, void *ip) struct task_struct; struct mm_struct; -/* task is in do_exit() */ -void profile_task_exit(struct task_struct * task); - /* task is dead, free task struct ? Returns 1 if * the task was taken, 0 if the task should be freed. */ @@ -80,9 +77,6 @@ void profile_munmap(unsigned long addr); int task_handoff_register(struct notifier_block * n); int task_handoff_unregister(struct notifier_block * n); -int profile_event_register(enum profile_type, struct notifier_block * n); -int profile_event_unregister(enum profile_type, struct notifier_block * n); - struct pt_regs; #else @@ -119,20 +113,15 @@ static inline int task_handoff_unregister(struct notifier_block * n) return -ENOSYS; } -static inline int profile_event_register(enum profile_type t, struct notifier_block * n) -{ - return -ENOSYS; -} - -static inline int profile_event_unregister(enum profile_type t, struct notifier_block * n) -{ - return -ENOSYS; -} - -#define profile_task_exit(a) do { } while (0) #define profile_handoff_task(a) (0) #define profile_munmap(a) do { } while (0) #endif /* CONFIG_PROFILING */ +/* task is in do_exit() */ +void profile_task_exit(struct task_struct * task); + +int profile_event_register(enum profile_type, struct notifier_block * n); +int profile_event_unregister(enum profile_type, struct notifier_block * n); + #endif /* _LINUX_PROFILE_H */ diff --git a/include/linux/psi.h b/include/linux/psi.h index 093186257f7d..0aa98f8a1d28 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -17,6 +17,8 @@ extern struct static_key_false psi_disabled; void psi_init(void); void psi_task_change(struct task_struct *task, int clear, int set); +void psi_task_switch(struct task_struct *prev, struct task_struct *next, + bool sleep); void psi_memstall_tick(struct task_struct *task, int cpu); void psi_memstall_enter(unsigned long *flags); diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index 0023052eab23..c22fe2e96d03 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -14,13 +14,21 @@ enum psi_task_count { NR_IOWAIT, NR_MEMSTALL, NR_RUNNING, - NR_PSI_TASK_COUNTS = 3, + /* + * This can't have values other than 0 or 1 and could be + * implemented as a bit flag. But for now we still have room + * in the first cacheline of psi_group_cpu, and this way we + * don't have to special case any state tracking for it. + */ + NR_ONCPU, + NR_PSI_TASK_COUNTS = 4, }; /* Task state bitmasks */ #define TSK_IOWAIT (1 << NR_IOWAIT) #define TSK_MEMSTALL (1 << NR_MEMSTALL) #define TSK_RUNNING (1 << NR_RUNNING) +#define TSK_ONCPU (1 << NR_ONCPU) /* Resources that workloads could be stalled on */ enum psi_res { diff --git a/include/linux/rslib.h b/include/linux/rslib.h index 746580c1939c..5974cedd008c 100644 --- a/include/linux/rslib.h +++ b/include/linux/rslib.h @@ -1,28 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * include/linux/rslib.h - * - * Overview: - * Generic Reed Solomon encoder / decoder library + * Generic Reed Solomon encoder / decoder library * * Copyright (C) 2004 Thomas Gleixner (tglx@linutronix.de) * * RS code lifted from reed solomon library written by Phil Karn * Copyright 2002 Phil Karn, KA9Q - * - * $Id: rslib.h,v 1.4 2005/11/07 11:14:52 gleixner Exp $ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ - #ifndef _RSLIB_H_ #define _RSLIB_H_ #include +#include /* for gfp_t */ +#include /* for GFP_KERNEL */ /** - * struct rs_control - rs control structure + * struct rs_codec - rs codec data * * @mm: Bits per symbol * @nn: Symbols per block (= (1<= 3 * rs->nn */ -static inline int rs_modnn(struct rs_control *rs, int x) +static inline int rs_modnn(struct rs_codec *rs, int x) { while (x >= rs->nn) { x -= rs->nn; diff --git a/include/linux/sched.h b/include/linux/sched.h index 45a72ce82245..f30a8eeb37f8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -365,7 +365,7 @@ extern int lockdep_tasklist_lock_is_held(void); extern void sched_init(void); extern void sched_init_smp(void); extern asmlinkage void schedule_tail(struct task_struct *prev); -extern void init_idle(struct task_struct *idle, int cpu, bool hotplug); +extern void init_idle(struct task_struct *idle, int cpu); extern void init_idle_bootup_task(struct task_struct *idle); extern cpumask_var_t cpu_isolated_map; @@ -1743,6 +1743,7 @@ struct task_struct { unsigned int policy; int nr_cpus_allowed; cpumask_t cpus_allowed; + cpumask_t cpus_requested; #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; @@ -1813,6 +1814,10 @@ struct task_struct { /* disallow userland-initiated cgroup migration */ unsigned no_cgroup_migration:1; #endif +#ifdef CONFIG_PSI + /* Stalled due to lack of memory */ + unsigned in_memstall:1; +#endif unsigned long atomic_flags; /* Flags needing atomic access. */ @@ -2528,6 +2533,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ #define PF_MEMSTALL 0x01000000 /* Stalled due to lack of memory */ +#define PF_PERF_CRITICAL 0x01000000 /* Thread is performance-critical */ #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index c014c38cf639..9330186d2933 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -34,6 +34,8 @@ extern unsigned int sysctl_sched_boost; extern unsigned int sysctl_sched_group_upmigrate_pct; extern unsigned int sysctl_sched_group_downmigrate_pct; extern unsigned int sysctl_sched_walt_rotate_big_tasks; +extern unsigned int sysctl_sched_min_task_util_for_boost_colocation; +extern unsigned int sysctl_sched_little_cluster_coloc_fmin_khz; extern int walt_proc_update_handler(struct ctl_table *table, int write, @@ -137,4 +139,9 @@ extern int sysctl_schedstats(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +#ifdef CONFIG_SCHED_WALT +extern int sched_little_cluster_coloc_fmin_khz_handler(struct ctl_table *table, + int write, void __user *buffer, + size_t *lenp, loff_t *ppos); +#endif #endif /* _SCHED_SYSCTL_H */ diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h new file mode 100644 index 000000000000..b5071497b8cb --- /dev/null +++ b/include/linux/set_memory.h @@ -0,0 +1,57 @@ +/* + * Copyright 2017, Michael Ellerman, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + */ +#ifndef _LINUX_SET_MEMORY_H_ +#define _LINUX_SET_MEMORY_H_ + +#ifdef CONFIG_ARCH_HAS_SET_MEMORY +#include +#else +static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; } +static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; } +static inline int set_memory_x(unsigned long addr, int numpages) { return 0; } +static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } +#endif + +#ifndef CONFIG_ARCH_HAS_SET_DIRECT_MAP +static inline int set_direct_map_invalid_noflush(struct page *page) +{ + return 0; +} +static inline int set_direct_map_default_noflush(struct page *page) +{ + return 0; +} +#endif + +#ifndef set_mce_nospec +static inline int set_mce_nospec(unsigned long pfn) +{ + return 0; +} +#endif + +#ifndef clear_mce_nospec +static inline int clear_mce_nospec(unsigned long pfn) +{ + return 0; +} +#endif + +#ifndef CONFIG_ARCH_HAS_MEM_ENCRYPT +static inline int set_memory_encrypted(unsigned long addr, int numpages) +{ + return 0; +} + +static inline int set_memory_decrypted(unsigned long addr, int numpages) +{ + return 0; +} +#endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */ + +#endif /* _LINUX_SET_MEMORY_H_ */ diff --git a/include/linux/string.h b/include/linux/string.h index a26fce1b2bc0..dbefca095c49 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -427,4 +427,34 @@ __FORTIFY_INLINE char *strcpy(char *p, const char *q) #endif +/* + * Replace some common string helpers with faster alternatives when one of the + * arguments is a constant (i.e., literal string). This uses strlen instead of + * sizeof for calculating the string length in order to silence compiler + * warnings that may arise due to what the compiler thinks is incorrect sizeof + * usage. The strlen calls on constants are folded into scalar values at compile + * time, so performance is not reduced by using strlen. + */ +#define strcpy(dest, src) \ + __builtin_choose_expr(__builtin_constant_p(src), \ + memcpy((dest), (src), strlen(src) + 1), \ + (strcpy)((dest), (src))) + +#define strcat(dest, src) \ + __builtin_choose_expr(__builtin_constant_p(src), \ + ({ \ + memcpy((dest) + strlen(dest), (src), strlen(src) + 1); \ + (dest); \ + }), \ + (strcat)((dest), (src))) + +#define strcmp(left, right) \ + __builtin_choose_expr(__builtin_constant_p(left), \ + __builtin_choose_expr(__builtin_constant_p(right), \ + (strcmp)((left), (right)), \ + memcmp((left), (right), strlen(left) + 1)), \ + __builtin_choose_expr(__builtin_constant_p(right), \ + memcmp((left), (right), strlen(right) + 1), \ + (strcmp)((left), (right)))) + #endif /* _LINUX_STRING_H_ */ diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 7df625d41e35..f6e8ceafafd8 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -71,10 +71,10 @@ struct gss_krb5_enctype { const u32 keyed_cksum; /* is it a keyed cksum? */ const u32 keybytes; /* raw key len, in bytes */ const u32 keylength; /* final key len, in bytes */ - u32 (*encrypt) (struct crypto_skcipher *tfm, + u32 (*encrypt) (struct crypto_sync_skcipher *tfm, void *iv, void *in, void *out, int length); /* encryption function */ - u32 (*decrypt) (struct crypto_skcipher *tfm, + u32 (*decrypt) (struct crypto_sync_skcipher *tfm, void *iv, void *in, void *out, int length); /* decryption function */ u32 (*mk_key) (const struct gss_krb5_enctype *gk5e, @@ -98,12 +98,12 @@ struct krb5_ctx { u32 enctype; u32 flags; const struct gss_krb5_enctype *gk5e; /* enctype-specific info */ - struct crypto_skcipher *enc; - struct crypto_skcipher *seq; - struct crypto_skcipher *acceptor_enc; - struct crypto_skcipher *initiator_enc; - struct crypto_skcipher *acceptor_enc_aux; - struct crypto_skcipher *initiator_enc_aux; + struct crypto_sync_skcipher *enc; + struct crypto_sync_skcipher *seq; + struct crypto_sync_skcipher *acceptor_enc; + struct crypto_sync_skcipher *initiator_enc; + struct crypto_sync_skcipher *acceptor_enc_aux; + struct crypto_sync_skcipher *initiator_enc_aux; u8 Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */ u8 cksum[GSS_KRB5_MAX_KEYLEN]; s32 endtime; @@ -262,24 +262,24 @@ gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, u32 -krb5_encrypt(struct crypto_skcipher *key, +krb5_encrypt(struct crypto_sync_skcipher *key, void *iv, void *in, void *out, int length); u32 -krb5_decrypt(struct crypto_skcipher *key, +krb5_decrypt(struct crypto_sync_skcipher *key, void *iv, void *in, void *out, int length); int -gss_encrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *outbuf, +gss_encrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *outbuf, int offset, struct page **pages); int -gss_decrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *inbuf, +gss_decrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *inbuf, int offset); s32 krb5_make_seq_num(struct krb5_ctx *kctx, - struct crypto_skcipher *key, + struct crypto_sync_skcipher *key, int direction, u32 seqnum, unsigned char *cksum, unsigned char *buf); @@ -320,12 +320,12 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, int krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, - struct crypto_skcipher *cipher, + struct crypto_sync_skcipher *cipher, unsigned char *cksum); int krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, - struct crypto_skcipher *cipher, + struct crypto_sync_skcipher *cipher, s32 seqnum); void gss_krb5_make_confounder(char *p, u32 conflen); diff --git a/include/linux/swait.h b/include/linux/swait.h index c1f9c62a8a50..4a4e180d0a35 100644 --- a/include/linux/swait.h +++ b/include/linux/swait.h @@ -169,4 +169,59 @@ do { \ __ret; \ }) +#define __swait_event_idle(wq, condition) \ + (void)___swait_event(wq, condition, TASK_IDLE, 0, schedule()) + +/** + * swait_event_idle - wait without system load contribution + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * + * The process is put to sleep (TASK_IDLE) until the @condition evaluates to + * true. The @condition is checked each time the waitqueue @wq is woken up. + * + * This function is mostly used when a kthread or workqueue waits for some + * condition and doesn't want to contribute to system load. Signals are + * ignored. + */ +#define swait_event_idle(wq, condition) \ +do { \ + if (condition) \ + break; \ + __swait_event_idle(wq, condition); \ +} while (0) + +#define __swait_event_idle_timeout(wq, condition, timeout) \ + ___swait_event(wq, ___wait_cond_timeout(condition), \ + TASK_IDLE, timeout, \ + __ret = schedule_timeout(__ret)) + +/** + * swait_event_idle_timeout - wait up to timeout without load contribution + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @timeout: timeout at which we'll give up in jiffies + * + * The process is put to sleep (TASK_IDLE) until the @condition evaluates to + * true. The @condition is checked each time the waitqueue @wq is woken up. + * + * This function is mostly used when a kthread or workqueue waits for some + * condition and doesn't want to contribute to system load. Signals are + * ignored. + * + * Returns: + * 0 if the @condition evaluated to %false after the @timeout elapsed, + * 1 if the @condition evaluated to %true after the @timeout elapsed, + * or the remaining jiffies (at least 1) if the @condition evaluated + * to %true before the @timeout elapsed. + */ +#define swait_event_idle_timeout(wq, condition, timeout) \ +({ \ + long __ret = timeout; \ + if (!___wait_cond_timeout(condition)) \ + __ret = __swait_event_idle_timeout(wq, \ + condition, timeout); \ + __ret; \ +}) + #endif /* _LINUX_SWAIT_H */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 72d03e816293..8ac4a1089e5a 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -27,6 +27,7 @@ struct bio; #define SWAP_FLAGS_VALID (SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \ SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \ SWAP_FLAG_DISCARD_PAGES) +#define SWAP_BATCH 64 static inline int current_is_kswapd(void) { @@ -178,6 +179,12 @@ enum { * protected by swap_info_struct.lock. */ struct swap_cluster_info { + spinlock_t lock; /* + * Protect swap_cluster_info fields + * and swap_info_struct->swap_map + * elements correspond to the swap + * cluster + */ unsigned int data:24; unsigned int flags:8; }; @@ -286,7 +293,6 @@ static inline void workingset_node_shadows_dec(struct radix_tree_node *node) } /* linux/mm/page_alloc.c */ -extern unsigned long totalram_pages; extern unsigned long totalreserve_pages; extern unsigned long nr_free_buffer_pages(void); extern unsigned long nr_free_pagecache_pages(void); @@ -366,8 +372,13 @@ int generic_swapfile_activate(struct swap_info_struct *, struct file *, sector_t *); /* linux/mm/swap_state.c */ -extern struct address_space swapper_spaces[]; -#define swap_address_space(entry) (&swapper_spaces[swp_type(entry)]) +/* One swap address space for each 64M swap space */ +#define SWAP_ADDRESS_SPACE_SHIFT 14 +#define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT) +extern struct address_space *swapper_spaces[]; +#define swap_address_space(entry) \ + (&swapper_spaces[swp_type(entry)][swp_offset(entry) \ + >> SWAP_ADDRESS_SPACE_SHIFT]) extern unsigned long total_swapcache_pages(void); extern void show_swap_cache_info(void); extern int add_to_swap(struct page *, struct list_head *list); @@ -390,6 +401,7 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t, extern atomic_long_t nr_swap_pages; extern long total_swap_pages; extern bool is_swap_fast(swp_entry_t entry); +extern bool has_usable_swap(void); /* Swap 50% full? Release swapcache more aggressively.. */ static inline bool vm_swap_full(struct swap_info_struct *si) @@ -412,23 +424,31 @@ static inline long get_nr_swap_pages(void) extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(void); extern swp_entry_t get_swap_page_of_type(int); +extern int get_swap_pages(int n, swp_entry_t swp_entries[]); extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t); extern int swap_duplicate(swp_entry_t); extern int swapcache_prepare(swp_entry_t); extern void swap_free(swp_entry_t); extern void swapcache_free(swp_entry_t); +extern void swapcache_free_entries(swp_entry_t *entries, int n); extern int free_swap_and_cache(swp_entry_t); extern int swap_type_of(dev_t, sector_t, struct block_device **); extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct page *, struct block_device **); extern sector_t swapdev_block(int, pgoff_t); extern int page_swapcount(struct page *); +extern int __swp_swapcount(swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); extern bool reuse_swap_page(struct page *, int *); extern int try_to_free_swap(struct page *); struct backing_dev_info; +extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); +extern void exit_swap_address_space(unsigned int type); + +extern int get_swap_slots(int n, swp_entry_t *slots); +extern void swapcache_free_batch(swp_entry_t *entries, int n); #else /* CONFIG_SWAP */ @@ -516,6 +536,11 @@ static inline int page_swapcount(struct page *page) return 0; } +static inline int __swp_swapcount(swp_entry_t entry) +{ + return 0; +} + static inline int swp_swapcount(swp_entry_t entry) { return 0; diff --git a/include/linux/swap_slots.h b/include/linux/swap_slots.h new file mode 100644 index 000000000000..6ef92d17633d --- /dev/null +++ b/include/linux/swap_slots.h @@ -0,0 +1,30 @@ +#ifndef _LINUX_SWAP_SLOTS_H +#define _LINUX_SWAP_SLOTS_H + +#include +#include +#include + +#define SWAP_SLOTS_CACHE_SIZE SWAP_BATCH +#define THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE (5*SWAP_SLOTS_CACHE_SIZE) +#define THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE (2*SWAP_SLOTS_CACHE_SIZE) + +struct swap_slots_cache { + bool lock_initialized; + struct mutex alloc_lock; /* protects slots, nr, cur */ + swp_entry_t *slots; + int nr; + int cur; + spinlock_t free_lock; /* protects slots_ret, n_ret */ + swp_entry_t *slots_ret; + int n_ret; +}; + +void disable_swap_slots_cache_lock(void); +void reenable_swap_slots_cache_unlock(void); +int enable_swap_slots_cache(void); +int free_swap_slot(swp_entry_t entry); + +extern bool swap_slot_cache_enabled; + +#endif /* _LINUX_SWAP_SLOTS_H */ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index ff496573f8af..49cc6201105b 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -192,7 +192,6 @@ struct tcp_sock { u32 tsoffset; /* timestamp offset */ struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ - unsigned long tsq_flags; /* Data for direct copy to user */ struct { @@ -292,6 +291,8 @@ struct tcp_sock { u32 sacked_out; /* SACK'd packets */ u32 fackets_out; /* FACK'd packets */ + struct hrtimer pacing_timer; + /* from STCP, retrans queue hinting */ struct sk_buff* lost_skb_hint; struct sk_buff *retransmit_skb_hint; @@ -371,7 +372,7 @@ struct tcp_sock { u32 *saved_syn; }; -enum tsq_flags { +enum tsq_enum { TSQ_THROTTLED, TSQ_QUEUED, TCP_TSQ_DEFERRED, /* tcp_tasklet_func() found socket was owned */ @@ -382,6 +383,15 @@ enum tsq_flags { */ }; +enum tsq_flags { + TSQF_THROTTLED = (1UL << TSQ_THROTTLED), + TSQF_QUEUED = (1UL << TSQ_QUEUED), + TCPF_TSQ_DEFERRED = (1UL << TCP_TSQ_DEFERRED), + TCPF_WRITE_TIMER_DEFERRED = (1UL << TCP_WRITE_TIMER_DEFERRED), + TCPF_DELACK_TIMER_DEFERRED = (1UL << TCP_DELACK_TIMER_DEFERRED), + TCPF_MTU_REDUCED_DEFERRED = (1UL << TCP_MTU_REDUCED_DEFERRED), +}; + static inline struct tcp_sock *tcp_sk(const struct sock *sk) { return (struct tcp_sock *)sk; diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 3306dd1da848..994b7780a96f 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -32,7 +32,7 @@ #include #define THERMAL_TRIPS_NONE -1 -#define THERMAL_MAX_TRIPS 12 +#define THERMAL_MAX_TRIPS 16 /* invalid cooling state */ #define THERMAL_CSTATE_INVALID -1UL diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 74e05e6a5d60..7e3f195e8138 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -19,8 +19,11 @@ struct notifier_block; /* in notifier.h */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ #define VM_NO_GUARD 0x00000040 /* don't add guard page */ #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ -#define VM_LOWMEM 0x00000100 /* Tracking of direct mapped lowmem */ - +/* + * Memory with VM_FLUSH_RESET_PERMS cannot be freed in an interrupt or with + * vfree_atomic(). + */ +#define VM_FLUSH_RESET_PERMS 0x00000100 /* Reset direct map and flush TLB on unmap */ /* bits [20..32] reserved for arch specific ioremap internals */ /* @@ -45,12 +48,16 @@ struct vm_struct { struct vmap_area { unsigned long va_start; unsigned long va_end; + + /* + * Largest available free size in subtree. + */ + unsigned long subtree_max_size; unsigned long flags; struct rb_node rb_node; /* address sorted rbtree */ struct list_head list; /* address sorted list */ struct llist_node purge_list; /* "lazy purge" list */ struct vm_struct *vm; - struct rcu_head rcu_head; }; /* @@ -84,6 +91,17 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, const void *caller); +#ifndef CONFIG_MMU +extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags); +static inline void *__vmalloc_node_flags_caller(unsigned long size, int node, + gfp_t flags, void *caller) +{ + return __vmalloc_node_flags(size, node, flags); +} +#else +extern void *__vmalloc_node_flags_caller(unsigned long size, + int node, gfp_t flags, void *caller); +#endif extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); @@ -134,6 +152,13 @@ extern int map_kernel_range_noflush(unsigned long start, unsigned long size, pgprot_t prot, struct page **pages); extern void unmap_kernel_range_noflush(unsigned long addr, unsigned long size); extern void unmap_kernel_range(unsigned long addr, unsigned long size); +static inline void set_vm_flush_reset_perms(void *addr) +{ + struct vm_struct *vm = find_vm_area(addr); + + if (vm) + vm->flags |= VM_FLUSH_RESET_PERMS; +} #else static inline int map_kernel_range_noflush(unsigned long start, unsigned long size, @@ -149,6 +174,9 @@ static inline void unmap_kernel_range(unsigned long addr, unsigned long size) { } +static inline void set_vm_flush_reset_perms(void *addr) +{ +} #endif /* Allocate/destroy a 'vmalloc' VM area. */ @@ -165,13 +193,6 @@ extern long vwrite(char *buf, char *addr, unsigned long count); extern struct list_head vmap_area_list; extern __init void vm_area_add_early(struct vm_struct *vm); extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); -extern __init int vm_area_check_early(struct vm_struct *vm); -#ifdef CONFIG_ENABLE_VMALLOC_SAVING -extern void mark_vmalloc_reserved_area(void *addr, unsigned long size); -#else -static inline void mark_vmalloc_reserved_area(void *addr, unsigned long size) -{ }; -#endif #ifdef CONFIG_SMP # ifdef CONFIG_MMU @@ -197,12 +218,7 @@ pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) #endif #ifdef CONFIG_MMU -#ifdef CONFIG_ENABLE_VMALLOC_SAVING -extern unsigned long total_vmalloc_size; -#define VMALLOC_TOTAL total_vmalloc_size -#else #define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) -#endif #else #define VMALLOC_TOTAL 0UL #endif diff --git a/include/linux/wake_gestures.h b/include/linux/wake_gestures.h new file mode 100644 index 000000000000..4711ea2a4203 --- /dev/null +++ b/include/linux/wake_gestures.h @@ -0,0 +1,35 @@ +/* + * include/linux/wake_gestures.h + * + * Copyright (c) 2013-15, Aaron Segaert + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef _LINUX_WAKE_GESTURES_H +#define _LINUX_WAKE_GESTURES_H + +#include + +extern bool wg_switch; +extern bool wg_switch_temp; +extern bool wg_changed; + +bool scr_suspended_blueline(void); +bool scr_suspended(void); +void set_vibrate(void); + +#endif /* _LINUX_WAKE_GESTURES_H */ + diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index b4eee186b602..cb7d053d0e47 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -23,7 +23,7 @@ struct fib_rule { u32 table; u8 action; u8 l3mdev; - /* 2 bytes hole, try to use */ + u8 ip_proto; u32 target; __be64 tun_id; struct fib_rule __rcu *ctarget; @@ -36,6 +36,8 @@ struct fib_rule { char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; struct fib_kuid_range uid_range; + struct fib_rule_port_range sport_range; + struct fib_rule_port_range dport_range; struct rcu_head rcu; }; @@ -134,6 +136,38 @@ static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) return frh->table; } +static inline bool fib_rule_port_range_set(const struct fib_rule_port_range *range) +{ + return range->start != 0 && range->end != 0; +} + +static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a, + __be16 port) +{ + return ntohs(port) >= a->start && + ntohs(port) <= a->end; +} + +static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a) +{ + return a->start != 0 && a->end != 0 && a->end < 0xffff && + a->start <= a->end; +} + +static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a, + struct fib_rule_port_range *b) +{ + return a->start == b->start && + a->end == b->end; +} + +static inline bool fib_rule_requires_fldissect(struct fib_rule *rule) +{ + return rule->ip_proto || + fib_rule_port_range_set(&rule->sport_range) || + fib_rule_port_range_set(&rule->dport_range); +} + struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *, struct net *); void fib_rules_unregister(struct fib_rules_ops *); diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index af0745f316fe..acf1031c78a7 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -60,7 +60,7 @@ struct rtnl_link_ops { size_t priv_size; void (*setup)(struct net_device *dev); - int maxtype; + unsigned int maxtype; const struct nla_policy *policy; int (*validate)(struct nlattr *tb[], struct nlattr *data[]); @@ -86,7 +86,7 @@ struct rtnl_link_ops { unsigned int (*get_num_rx_queues)(void); bool netns_refund; - int slave_maxtype; + unsigned int slave_maxtype; const struct nla_policy *slave_policy; int (*slave_validate)(struct nlattr *tb[], struct nlattr *data[]); diff --git a/include/net/sock.h b/include/net/sock.h index e850b221cf29..d578abdbf40d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -250,6 +250,7 @@ struct sock_common { * @sk_ll_usec: usecs to busypoll when there is no data * @sk_allocation: allocation mode * @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler) + * @sk_pacing_status: Pacing status (requested, handled by sch_fq) * @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE) * @sk_sndbuf: size of send buffer in bytes * @sk_padding: unused element for alignment @@ -344,6 +345,9 @@ struct sock { #define sk_rxhash __sk_common.skc_rxhash socket_lock_t sk_lock; + atomic_t sk_drops; + int sk_rcvlowat; + struct sk_buff_head sk_error_queue; struct sk_buff_head sk_receive_queue; /* * The backlog queue is special, it is always used with @@ -360,14 +364,13 @@ struct sock { struct sk_buff *tail; } sk_backlog; #define sk_rmem_alloc sk_backlog.rmem_alloc - int sk_forward_alloc; - __u32 sk_txhash; + int sk_forward_alloc; #ifdef CONFIG_NET_RX_BUSY_POLL - unsigned int sk_napi_id; unsigned int sk_ll_usec; + /* ===== mostly read cache line ===== */ + unsigned int sk_napi_id; #endif - atomic_t sk_drops; int sk_rcvbuf; struct sk_filter __rcu *sk_filter; @@ -380,11 +383,31 @@ struct sock { #endif struct dst_entry __rcu *sk_rx_dst; struct dst_entry __rcu *sk_dst_cache; - /* Note: 32bit hole on 64bit arches */ - atomic_t sk_wmem_alloc; atomic_t sk_omem_alloc; int sk_sndbuf; + + /* ===== cache line for TX ===== */ + int sk_wmem_queued; + atomic_t sk_wmem_alloc; + unsigned long sk_tsq_flags; + struct sk_buff *sk_send_head; struct sk_buff_head sk_write_queue; + __s32 sk_peek_off; + int sk_write_pending; + u32 sk_pacing_status; /* see enum sk_pacing */ + long sk_sndtimeo; + struct timer_list sk_timer; + __u32 sk_priority; + __u32 sk_mark; + u32 sk_pacing_rate; /* bytes per second */ + u32 sk_max_pacing_rate; + struct page_frag sk_frag; + netdev_features_t sk_route_caps; + netdev_features_t sk_route_nocaps; + int sk_gso_type; + unsigned int sk_gso_max_size; + gfp_t sk_allocation; + __u32 sk_txhash; /* * Because of non atomicity rules, all @@ -400,34 +423,20 @@ struct sock { #define SK_PROTOCOL_MAX U8_MAX kmemcheck_bitfield_end(flags); - int sk_wmem_queued; - gfp_t sk_allocation; - u32 sk_pacing_rate; /* bytes per second */ - u32 sk_max_pacing_rate; - netdev_features_t sk_route_caps; - netdev_features_t sk_route_nocaps; - int sk_gso_type; - unsigned int sk_gso_max_size; u16 sk_gso_max_segs; - int sk_rcvlowat; unsigned long sk_lingertime; - struct sk_buff_head sk_error_queue; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; int sk_err, sk_err_soft; u32 sk_ack_backlog; u32 sk_max_ack_backlog; - __u32 sk_priority; - __u32 sk_mark; kuid_t sk_uid; spinlock_t sk_peer_lock; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; long sk_rcvtimeo; - long sk_sndtimeo; - struct timer_list sk_timer; ktime_t sk_stamp; #if BITS_PER_LONG==32 seqlock_t sk_stamp_seq; @@ -437,10 +446,6 @@ struct sock { u32 sk_tskey; struct socket *sk_socket; void *sk_user_data; - struct page_frag sk_frag; - struct sk_buff *sk_send_head; - __s32 sk_peek_off; - int sk_write_pending; #ifdef CONFIG_SECURITY void *sk_security; #endif @@ -457,6 +462,12 @@ struct sock { struct rcu_head sk_rcu; }; +enum sk_pacing { + SK_PACING_NONE = 0, + SK_PACING_NEEDED = 1, + SK_PACING_FQ = 2, +}; + #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) #define rcu_dereference_sk_user_data(sk) rcu_dereference(__sk_user_data((sk))) diff --git a/include/net/tcp.h b/include/net/tcp.h index a65d01da7b80..4598f2a52932 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -224,7 +224,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_INIT_CWND 10 /* Bit Flags for sysctl_tcp_fastopen */ -#define TFO_CLIENT_ENABLE 1 +#define TFO_CLIENT_ENABLE 3 #define TFO_SERVER_ENABLE 2 #define TFO_CLIENT_NO_COOKIE 4 /* Data in SYN w/o cookie option */ @@ -559,8 +559,6 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss); #endif /* tcp_output.c */ -u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, - int min_tso_segs); void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); bool tcp_may_send_now(struct sock *sk); @@ -600,6 +598,7 @@ void tcp_check_space(struct sock *sk); void tcp_init_xmit_timers(struct sock *); static inline void tcp_clear_xmit_timers(struct sock *sk) { + hrtimer_cancel(&tcp_sk(sk)->pacing_timer); inet_csk_clear_xmit_timers(sk); } @@ -725,11 +724,14 @@ u32 __tcp_select_window(struct sock *sk); void tcp_send_window_probe(struct sock *sk); -/* TCP timestamps are only 32-bits, this causes a slight - * complication on 64-bit systems since we store a snapshot - * of jiffies in the buffer control blocks below. We decided - * to use only the low 32-bits of jiffies and hide the ugly - * casts with the following macro. +/* TCP uses 32bit jiffies to save some space. + * Note that this is different from tcp_time_stamp, which + * historically has been the same until linux-4.13. + */ +#define tcp_jiffies32 ((u32)jiffies) + +/* Generator for TCP TS option (RFC 7323) + * Currently tied to 'jiffies' but will soon be driven by 1 ms clock. */ #define tcp_time_stamp ((__u32)(jiffies)) @@ -926,6 +928,7 @@ struct rate_sample { u32 prior_in_flight; /* in flight before this ACK */ bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ + bool is_ack_delayed; /* is this (likely) a delayed ACK? */ }; struct tcp_congestion_ops { @@ -952,8 +955,8 @@ struct tcp_congestion_ops { u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); - /* suggest number of segments for each skb to transmit (optional) */ - u32 (*tso_segs_goal)(struct sock *sk); + /* override sysctl_tcp_min_tso_segs */ + u32 (*min_tso_segs)(struct sock *sk); /* returns the multiplier used in tcp_sndbuf_expand (optional) */ u32 (*sndbuf_expand)(struct sock *sk); /* call when packets are delivered to update cwnd and pacing rate, @@ -984,6 +987,7 @@ u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); u32 tcp_reno_ssthresh(struct sock *sk); +u32 tcp_reno_undo_cwnd(struct sock *sk); void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); extern struct tcp_congestion_ops tcp_reno; @@ -1180,6 +1184,17 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk) return false; } +/* BBR congestion control needs pacing. + * Same remark for SO_MAX_PACING_RATE. + * sch_fq packet scheduler is efficiently handling pacing, + * but is not always installed/used. + * Return true if TCP stack should pace packets itself. + */ +static inline bool tcp_needs_internal_pacing(const struct sock *sk) +{ + return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED; +} + /* Something is really bad, we could not queue an additional packet, * because qdisc is full or receiver sent a 0 window. * We do not want to add fuel to the fire, or abort too early, @@ -1282,7 +1297,7 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) if (!sysctl_tcp_slow_start_after_idle || tp->packets_out) return; - delta = tcp_time_stamp - tp->lsndtime; + delta = tcp_jiffies32 - tp->lsndtime; if (delta > inet_csk(sk)->icsk_rto) tcp_cwnd_restart(sk, delta); } @@ -1345,8 +1360,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) { const struct inet_connection_sock *icsk = &tp->inet_conn; - return min_t(u32, tcp_time_stamp - icsk->icsk_ack.lrcvtime, - tcp_time_stamp - tp->rcv_tstamp); + return min_t(u32, tcp_jiffies32 - icsk->icsk_ack.lrcvtime, + tcp_jiffies32 - tp->rcv_tstamp); } static inline int tcp_fin_time(const struct sock *sk) @@ -1993,4 +2008,6 @@ static inline void tcp_listendrop(const struct sock *sk) __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); } +enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer); + #endif /* _TCP_H */ diff --git a/include/soc/qcom/lpm_levels.h b/include/soc/qcom/lpm_levels.h index 3665cafba98a..7104ed5e64d7 100644 --- a/include/soc/qcom/lpm_levels.h +++ b/include/soc/qcom/lpm_levels.h @@ -15,7 +15,7 @@ struct system_pm_ops { int (*enter)(struct cpumask *mask); - void (*exit)(void); + void (*exit)(bool success); int (*update_wakeup)(bool); bool (*sleep_allowed)(void); }; diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h new file mode 100644 index 000000000000..27f5caa6299a --- /dev/null +++ b/include/trace/events/erofs.h @@ -0,0 +1,259 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM erofs + +#if !defined(_TRACE_EROFS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_EROFS_H + +#include +#include + +struct erofs_map_blocks; + +#define show_dev(dev) MAJOR(dev), MINOR(dev) +#define show_dev_nid(entry) show_dev(entry->dev), entry->nid + +#define show_file_type(type) \ + __print_symbolic(type, \ + { 0, "FILE" }, \ + { 1, "DIR" }) + +#define show_map_flags(flags) __print_flags(flags, "|", \ + { EROFS_GET_BLOCKS_RAW, "RAW" }) + +#define show_mflags(flags) __print_flags(flags, "", \ + { EROFS_MAP_MAPPED, "M" }, \ + { EROFS_MAP_META, "I" }, \ + { EROFS_MAP_ZIPPED, "Z" }) + +TRACE_EVENT(erofs_lookup, + + TP_PROTO(struct inode *dir, struct dentry *dentry, unsigned int flags), + + TP_ARGS(dir, dentry, flags), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(erofs_nid_t, nid ) + __field(const char *, name ) + __field(unsigned int, flags ) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->nid = EROFS_I(dir)->nid; + __entry->name = dentry->d_name.name; + __entry->flags = flags; + ), + + TP_printk("dev = (%d,%d), pnid = %llu, name:%s, flags:%x", + show_dev_nid(__entry), + __entry->name, + __entry->flags) +); + +TRACE_EVENT(erofs_fill_inode, + TP_PROTO(struct inode *inode, int isdir), + TP_ARGS(inode, isdir), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(erofs_nid_t, nid ) + __field(erofs_blk_t, blkaddr ) + __field(unsigned int, ofs ) + __field(int, isdir ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->nid = EROFS_I(inode)->nid; + __entry->blkaddr = erofs_blknr(iloc(EROFS_I_SB(inode), __entry->nid)); + __entry->ofs = erofs_blkoff(iloc(EROFS_I_SB(inode), __entry->nid)); + __entry->isdir = isdir; + ), + + TP_printk("dev = (%d,%d), nid = %llu, blkaddr %u ofs %u, isdir %d", + show_dev_nid(__entry), + __entry->blkaddr, __entry->ofs, + __entry->isdir) +); + +TRACE_EVENT(erofs_readpage, + + TP_PROTO(struct page *page, bool raw), + + TP_ARGS(page, raw), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(erofs_nid_t, nid ) + __field(int, dir ) + __field(pgoff_t, index ) + __field(int, uptodate) + __field(bool, raw ) + ), + + TP_fast_assign( + __entry->dev = page->mapping->host->i_sb->s_dev; + __entry->nid = EROFS_I(page->mapping->host)->nid; + __entry->dir = S_ISDIR(page->mapping->host->i_mode); + __entry->index = page->index; + __entry->uptodate = PageUptodate(page); + __entry->raw = raw; + ), + + TP_printk("dev = (%d,%d), nid = %llu, %s, index = %lu, uptodate = %d " + "raw = %d", + show_dev_nid(__entry), + show_file_type(__entry->dir), + (unsigned long)__entry->index, + __entry->uptodate, + __entry->raw) +); + +TRACE_EVENT(erofs_readpages, + + TP_PROTO(struct inode *inode, struct page *page, unsigned int nrpage, + bool raw), + + TP_ARGS(inode, page, nrpage, raw), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(erofs_nid_t, nid ) + __field(pgoff_t, start ) + __field(unsigned int, nrpage ) + __field(bool, raw ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->nid = EROFS_I(inode)->nid; + __entry->start = page->index; + __entry->nrpage = nrpage; + __entry->raw = raw; + ), + + TP_printk("dev = (%d,%d), nid = %llu, start = %lu nrpage = %u raw = %d", + show_dev_nid(__entry), + (unsigned long)__entry->start, + __entry->nrpage, + __entry->raw) +); + +DECLARE_EVENT_CLASS(erofs__map_blocks_enter, + TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, + unsigned int flags), + + TP_ARGS(inode, map, flags), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( erofs_nid_t, nid ) + __field( erofs_off_t, la ) + __field( u64, llen ) + __field( unsigned int, flags ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->nid = EROFS_I(inode)->nid; + __entry->la = map->m_la; + __entry->llen = map->m_llen; + __entry->flags = flags; + ), + + TP_printk("dev = (%d,%d), nid = %llu, la %llu llen %llu flags %s", + show_dev_nid(__entry), + __entry->la, __entry->llen, + __entry->flags ? show_map_flags(__entry->flags) : "NULL") +); + +DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_flatmode_enter, + TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, + unsigned flags), + + TP_ARGS(inode, map, flags) +); + +DEFINE_EVENT(erofs__map_blocks_enter, z_erofs_map_blocks_iter_enter, + TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, + unsigned int flags), + + TP_ARGS(inode, map, flags) +); + +DECLARE_EVENT_CLASS(erofs__map_blocks_exit, + TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, + unsigned int flags, int ret), + + TP_ARGS(inode, map, flags, ret), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( erofs_nid_t, nid ) + __field( unsigned int, flags ) + __field( erofs_off_t, la ) + __field( erofs_off_t, pa ) + __field( u64, llen ) + __field( u64, plen ) + __field( unsigned int, mflags ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->nid = EROFS_I(inode)->nid; + __entry->flags = flags; + __entry->la = map->m_la; + __entry->pa = map->m_pa; + __entry->llen = map->m_llen; + __entry->plen = map->m_plen; + __entry->mflags = map->m_flags; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), nid = %llu, flags %s " + "la %llu pa %llu llen %llu plen %llu mflags %s ret %d", + show_dev_nid(__entry), + __entry->flags ? show_map_flags(__entry->flags) : "NULL", + __entry->la, __entry->pa, __entry->llen, __entry->plen, + show_mflags(__entry->mflags), __entry->ret) +); + +DEFINE_EVENT(erofs__map_blocks_exit, erofs_map_blocks_flatmode_exit, + TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, + unsigned flags, int ret), + + TP_ARGS(inode, map, flags, ret) +); + +DEFINE_EVENT(erofs__map_blocks_exit, z_erofs_map_blocks_iter_exit, + TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, + unsigned int flags, int ret), + + TP_ARGS(inode, map, flags, ret) +); + +TRACE_EVENT(erofs_destroy_inode, + TP_PROTO(struct inode *inode), + + TP_ARGS(inode), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( erofs_nid_t, nid ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->nid = EROFS_I(inode)->nid; + ), + + TP_printk("dev = (%d,%d), nid = %llu", show_dev_nid(__entry)) +); + +#endif /* _TRACE_EROFS_H */ + + /* This part must be outside protection */ +#include diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index db75f333a50c..92332a715cbf 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -596,8 +596,14 @@ DEFINE_EVENT(sched_cpu_load, sched_cpu_load_lb, TRACE_EVENT(sched_load_to_gov, - TP_PROTO(struct rq *rq, u64 aggr_grp_load, u32 tt_load, u64 freq_aggr_thresh, u64 load, int policy, int big_task_rotation), - TP_ARGS(rq, aggr_grp_load, tt_load, freq_aggr_thresh, load, policy, big_task_rotation), + TP_PROTO(struct rq *rq, u64 aggr_grp_load, u32 tt_load, + u64 freq_aggr_thresh, u64 load, int policy, + int big_task_rotation, + unsigned int sysctl_sched_little_cluster_coloc_fmin_khz, + u64 coloc_boost_load), + TP_ARGS(rq, aggr_grp_load, tt_load, freq_aggr_thresh, load, policy, + big_task_rotation, sysctl_sched_little_cluster_coloc_fmin_khz, + coloc_boost_load), TP_STRUCT__entry( __field( int, cpu ) @@ -613,6 +619,9 @@ TRACE_EVENT(sched_load_to_gov, __field( u64, pl ) __field( u64, load ) __field( int, big_task_rotation ) + __field(unsigned int, + sysctl_sched_little_cluster_coloc_fmin_khz) + __field( u64, coloc_boost_load ) ), TP_fast_assign( @@ -629,14 +638,19 @@ TRACE_EVENT(sched_load_to_gov, __entry->pl = rq->walt_stats.pred_demands_sum; __entry->load = load; __entry->big_task_rotation = big_task_rotation; + __entry->sysctl_sched_little_cluster_coloc_fmin_khz = + sysctl_sched_little_cluster_coloc_fmin_khz; + __entry->coloc_boost_load = coloc_boost_load; ), - TP_printk("cpu=%d policy=%d ed_task_pid=%d aggr_grp_load=%llu freq_aggr_thresh=%llu tt_load=%llu rq_ps=%llu grp_rq_ps=%llu nt_ps=%llu grp_nt_ps=%llu pl=%llu load=%llu big_task_rotation=%d", + TP_printk("cpu=%d policy=%d ed_task_pid=%d aggr_grp_load=%llu freq_aggr_thresh=%llu tt_load=%llu rq_ps=%llu grp_rq_ps=%llu nt_ps=%llu grp_nt_ps=%llu pl=%llu load=%llu big_task_rotation=%d sysctl_sched_little_cluster_coloc_fmin_khz=%u coloc_boost_load=%llu", __entry->cpu, __entry->policy, __entry->ed_task_pid, __entry->aggr_grp_load, __entry->freq_aggr_thresh, __entry->tt_load, __entry->rq_ps, __entry->grp_rq_ps, __entry->nt_ps, __entry->grp_nt_ps, __entry->pl, __entry->load, - __entry->big_task_rotation) + __entry->big_task_rotation, + __entry->sysctl_sched_little_cluster_coloc_fmin_khz, + __entry->coloc_boost_load) ); #endif @@ -657,6 +671,10 @@ TRACE_EVENT(sched_cpu_util, __field(unsigned int, capacity_orig ) __field(int, idle_state ) __field(u64, irqload ) + __field(int, online ) + __field(int, isolated ) + __field(int, reserved ) + __field(int, high_irq_load ) ), TP_fast_assign( @@ -669,10 +687,14 @@ TRACE_EVENT(sched_cpu_util, __entry->capacity_orig = capacity_orig_of(cpu); __entry->idle_state = idle_get_state_idx(cpu_rq(cpu)); __entry->irqload = sched_irqload(cpu); + __entry->online = cpu_online(cpu); + __entry->isolated = cpu_isolated(cpu); + __entry->reserved = is_reserved(cpu); + __entry->high_irq_load = sched_cpu_high_irqload(cpu); ), - TP_printk("cpu=%d nr_running=%d cpu_util=%ld cpu_util_cum=%ld capacity_curr=%u capacity=%u capacity_orig=%u idle_state=%d irqload=%llu", - __entry->cpu, __entry->nr_running, __entry->cpu_util, __entry->cpu_util_cum, __entry->capacity_curr, __entry->capacity, __entry->capacity_orig, __entry->idle_state, __entry->irqload) + TP_printk("cpu=%d nr_running=%d cpu_util=%ld cpu_util_cum=%ld capacity_curr=%u capacity=%u capacity_orig=%u idle_state=%d irqload=%llu online=%u isolated=%u reserved=%u high_irq_load=%u", + __entry->cpu, __entry->nr_running, __entry->cpu_util, __entry->cpu_util_cum, __entry->capacity_curr, __entry->capacity, __entry->capacity_orig, __entry->idle_state, __entry->irqload, __entry->online, __entry->isolated, __entry->reserved, __entry->high_irq_load) ); TRACE_EVENT(sched_energy_diff, @@ -1635,10 +1657,11 @@ TRACE_EVENT(sched_find_best_target, TP_PROTO(struct task_struct *tsk, bool prefer_idle, unsigned long min_util, int start_cpu, - int best_idle, int best_active, int target), + int best_idle, int best_active, int target, + int backup_cpu), TP_ARGS(tsk, prefer_idle, min_util, start_cpu, - best_idle, best_active, target), + best_idle, best_active, target, backup_cpu), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) @@ -1649,6 +1672,7 @@ TRACE_EVENT(sched_find_best_target, __field( int, best_idle ) __field( int, best_active ) __field( int, target ) + __field( int, backup_cpu ) ), TP_fast_assign( @@ -1660,14 +1684,16 @@ TRACE_EVENT(sched_find_best_target, __entry->best_idle = best_idle; __entry->best_active = best_active; __entry->target = target; + __entry->backup_cpu = backup_cpu; ), TP_printk("pid=%d comm=%s prefer_idle=%d start_cpu=%d " - "best_idle=%d best_active=%d target=%d", + "best_idle=%d best_active=%d target=%d backup=%d", __entry->pid, __entry->comm, __entry->prefer_idle, __entry->start_cpu, __entry->best_idle, __entry->best_active, - __entry->target) + __entry->target, + __entry->backup_cpu) ); TRACE_EVENT(sched_group_energy, diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index bbf02a63a011..b67b5946e94d 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -34,6 +34,11 @@ struct fib_rule_uid_range { __u32 end; }; +struct fib_rule_port_range { + __u16 start; + __u16 end; +}; + enum { FRA_UNSPEC, FRA_DST, /* destination address */ @@ -57,6 +62,9 @@ enum { FRA_PAD, FRA_L3MDEV, /* iif or oif is l3mdev goto its table */ FRA_UID_RANGE, /* UID range */ + FRA_IP_PROTO, /* ip proto */ + FRA_SPORT_RANGE, /* sport */ + FRA_DPORT_RANGE, /* dport */ __FRA_MAX }; diff --git a/include/uapi/linux/msm_kgsl.h b/include/uapi/linux/msm_kgsl.h index 943ba9ec33f6..96053c68f85e 100644 --- a/include/uapi/linux/msm_kgsl.h +++ b/include/uapi/linux/msm_kgsl.h @@ -65,6 +65,7 @@ #define KGSL_CONTEXT_TYPE_CL 2 #define KGSL_CONTEXT_TYPE_C2D 3 #define KGSL_CONTEXT_TYPE_RS 4 +#define KGSL_CONTEXT_TYPE_VK 5 #define KGSL_CONTEXT_TYPE_UNKNOWN 0x1E #define KGSL_CONTEXT_INVALIDATE_ON_FAULT 0x10000000 @@ -332,6 +333,8 @@ enum kgsl_timestamp_type { #define KGSL_PROP_UBWC_MODE 0x1B #define KGSL_PROP_DEVICE_QTIMER 0x20 #define KGSL_PROP_L3_PWR_CONSTRAINT 0x22 +#define KGSL_PROP_SECURE_BUFFER_ALIGNMENT 0x23 +#define KGSL_PROP_SECURE_CTXT_SUPPORT 0x24 struct kgsl_shadowprop { unsigned long gpuaddr; diff --git a/init/Kconfig b/init/Kconfig index 5ac515188a57..fb26f15ff056 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1188,6 +1188,7 @@ config FAIR_GROUP_SCHED config CFS_BANDWIDTH bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED" depends on FAIR_GROUP_SCHED + depends on !SCHED_WALT default n help This option allows users to define CPU bandwidth rates (limits) for @@ -1699,6 +1700,16 @@ config KALLSYMS_BASE_RELATIVE time constants, and no relocation pass is required at runtime to fix up the entries based on the runtime load address of the kernel. +config PROC_KALLSYMS + bool "Expose kallsyms data in procfs" + depends on KALLSYMS + help + This option will make the kernel expose all the kernel symbol data + offered by kallsyms to userspace through procfs. Symbols will be + accessible from userspace under the /proc/kallsyms node. This may be + desirable for debugging, but can be a security risk because it exposes + information about the kernel's internals. + config PRINTK default y bool "Enable support for printk" if EXPERT diff --git a/init/initramfs.c b/init/initramfs.c index bf3af10c500a..2a2292b800ac 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -607,6 +607,16 @@ static void __init clean_rootfs(void) } #endif +static int __initdata skip_override = 0; + +static int __init skip_override_param(char *str) +{ + skip_override = 1; + return 1; +} +__setup("skip_override", skip_override_param); + + static int __initdata do_skip_initramfs; static int __init skip_initramfs_param(char *str) @@ -622,7 +632,7 @@ static int __init populate_rootfs(void) { char *err; - if (do_skip_initramfs) { + if (!skip_override && do_skip_initramfs) { if (initrd_start) free_initrd(); return default_rootfs(); diff --git a/init/main.c b/init/main.c index 2a857b9479b8..705adee5cb81 100644 --- a/init/main.c +++ b/init/main.c @@ -201,13 +201,13 @@ EXPORT_SYMBOL(loops_per_jiffy); static int __init debug_kernel(char *str) { - console_loglevel = CONSOLE_LOGLEVEL_DEBUG; + console_loglevel = 0; return 0; } static int __init quiet_kernel(char *str) { - console_loglevel = CONSOLE_LOGLEVEL_QUIET; + console_loglevel = 0; return 0; } @@ -674,8 +674,6 @@ asmlinkage __visible void __init start_kernel(void) efi_free_boot_services(); } - ftrace_init(); - /* Do the rest non-__init'ed, we're now alive */ rest_init(); diff --git a/ipc/util.c b/ipc/util.c index 76d4afcde7bb..721b96d8b9c3 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -403,12 +403,7 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) */ void *ipc_alloc(int size) { - void *out; - if (size > PAGE_SIZE) - out = vmalloc(size); - else - out = kmalloc(size, GFP_KERNEL); - return out; + return kvmalloc(size, GFP_KERNEL); } /** diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 2b1a925489cf..b513a02a015b 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -75,8 +75,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) { - gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | - gfp_extra_flags; + gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; struct bpf_prog_aux *aux; struct bpf_prog *fp; @@ -104,8 +103,7 @@ EXPORT_SYMBOL_GPL(bpf_prog_alloc); struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_extra_flags) { - gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | - gfp_extra_flags; + gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; struct bpf_prog *fp; u32 pages, delta; int ret; @@ -431,8 +429,7 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other, gfp_t gfp_extra_flags) { - gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | - gfp_extra_flags; + gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; struct bpf_prog *fp; fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 37bbced91701..3bdcd41aed3d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -68,8 +68,7 @@ void *bpf_map_area_alloc(size_t size) return area; } - return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags, - PAGE_KERNEL); + return __vmalloc(size, GFP_KERNEL | flags, PAGE_KERNEL); } void bpf_map_area_free(void *area) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 930e5aec1bcd..5c6deb033c96 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -238,6 +238,22 @@ static struct file_system_type cgroup2_fs_type; static struct cftype cgroup_dfl_base_files[]; static struct cftype cgroup_legacy_base_files[]; +/* cgroup optional features */ +enum cgroup_opt_features { +#ifdef CONFIG_PSI + OPT_FEATURE_PRESSURE, +#endif + OPT_FEATURE_COUNT +}; + +static const char *cgroup_opt_feature_names[OPT_FEATURE_COUNT] = { +#ifdef CONFIG_PSI + "pressure", +#endif +}; + +static u16 cgroup_feature_disable_mask __read_mostly; + static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); static void cgroup_lock_and_drain_offline(struct cgroup *cgrp); static int cgroup_apply_control(struct cgroup *cgrp); @@ -3634,6 +3650,18 @@ static void cgroup_pressure_release(struct kernfs_open_file *of) { psi_trigger_destroy(of->priv); } + +bool cgroup_psi_enabled(void) +{ + return (cgroup_feature_disable_mask & (1 << OPT_FEATURE_PRESSURE)) == 0; +} + +#else /* CONFIG_PSI */ +bool cgroup_psi_enabled(void) +{ + return false; +} + #endif /* CONFIG_PSI */ static int cgroup_file_open(struct kernfs_open_file *of) @@ -3871,6 +3899,8 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css, restart: for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) { /* does cft->flags tell us to skip this file on @cgrp? */ + if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled()) + continue; if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp)) continue; if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp)) @@ -3948,6 +3978,9 @@ static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) WARN_ON(cft->ss || cft->kf_ops); + if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled()) + continue; + if (cft->seq_start) kf_ops = &cgroup_kf_ops; else @@ -5092,7 +5125,7 @@ static struct cftype cgroup_dfl_base_files[] = { #ifdef CONFIG_PSI { .name = "io.pressure", - .flags = CFTYPE_NOT_ON_ROOT, + .flags = CFTYPE_NOT_ON_ROOT | CFTYPE_PRESSURE, .seq_show = cgroup_io_pressure_show, .write = cgroup_io_pressure_write, .poll = cgroup_pressure_poll, @@ -5100,7 +5133,7 @@ static struct cftype cgroup_dfl_base_files[] = { }, { .name = "memory.pressure", - .flags = CFTYPE_NOT_ON_ROOT, + .flags = CFTYPE_NOT_ON_ROOT | CFTYPE_PRESSURE, .seq_show = cgroup_memory_pressure_show, .write = cgroup_memory_pressure_write, .poll = cgroup_pressure_poll, @@ -5108,7 +5141,7 @@ static struct cftype cgroup_dfl_base_files[] = { }, { .name = "cpu.pressure", - .flags = CFTYPE_NOT_ON_ROOT, + .flags = CFTYPE_NOT_ON_ROOT | CFTYPE_PRESSURE, .seq_show = cgroup_cpu_pressure_show, .write = cgroup_cpu_pressure_write, .poll = cgroup_pressure_poll, @@ -6353,6 +6386,15 @@ static int __init cgroup_disable(char *str) pr_info("Disabling %s control group subsystem\n", ss->name); } + + for (i = 0; i < OPT_FEATURE_COUNT; i++) { + if (strcmp(token, cgroup_opt_feature_names[i])) + continue; + cgroup_feature_disable_mask |= 1 << i; + pr_info("Disabling %s control group feature\n", + cgroup_opt_feature_names[i]); + break; + } } return 1; } diff --git a/kernel/cpu.c b/kernel/cpu.c index 10f36f20dbe7..83ef94ce4841 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1091,8 +1091,15 @@ static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target) static int do_cpu_down(unsigned int cpu, enum cpuhp_state target) { + struct cpumask newmask; int err; + cpumask_andnot(&newmask, cpu_online_mask, cpumask_of(cpu)); + /* One big cluster CPU and one little cluster CPU must remain online */ + if (!cpumask_intersects(&newmask, cpu_perf_mask) || + !cpumask_intersects(&newmask, cpu_lp_mask)) + return -EINVAL; + cpu_maps_update_begin(); err = cpu_down_maps_locked(cpu, target); cpu_maps_update_done(); @@ -1299,6 +1306,7 @@ int freeze_secondary_cpus(int primary) { int cpu, error = 0; + unaffine_perf_irqs(); cpu_maps_update_begin(); if (!cpu_online(primary)) primary = cpumask_first(cpu_online_mask); @@ -1308,7 +1316,7 @@ int freeze_secondary_cpus(int primary) */ cpumask_clear(frozen_cpus); - pr_info("Disabling non-boot CPUs ...\n"); + pr_debug("Disabling non-boot CPUs ...\n"); for_each_online_cpu(cpu) { if (cpu == primary) continue; @@ -1326,7 +1334,7 @@ int freeze_secondary_cpus(int primary) if (!error) BUG_ON(num_online_cpus() > 1); else - pr_err("Non-boot CPUs are not disabled\n"); + pr_debug("Non-boot CPUs are not disabled\n"); /* * Make sure the CPUs won't be enabled by someone else. We need to do @@ -1358,7 +1366,7 @@ void enable_nonboot_cpus(void) if (cpumask_empty(frozen_cpus)) goto out; - pr_info("Enabling non-boot CPUs ...\n"); + pr_debug("Enabling non-boot CPUs ...\n"); arch_enable_nonboot_cpus_begin(); @@ -1367,10 +1375,10 @@ void enable_nonboot_cpus(void) error = _cpu_up(cpu, 1, CPUHP_ONLINE); trace_suspend_resume(TPS("CPU_ON"), cpu, false); if (!error) { - pr_info("CPU%d is up\n", cpu); + pr_debug("CPU%d is up\n", cpu); cpu_device = get_cpu_device(cpu); if (!cpu_device) - pr_err("%s: failed to get cpu%d device\n", + pr_debug("%s: failed to get cpu%d device\n", __func__, cpu); else kobject_uevent(&cpu_device->kobj, KOBJ_ONLINE); @@ -1384,6 +1392,7 @@ void enable_nonboot_cpus(void) cpumask_clear(frozen_cpus); out: cpu_maps_update_done(); + reaffine_perf_irqs(); } static int __init alloc_frozen_cpus(void) @@ -2280,6 +2289,21 @@ EXPORT_SYMBOL(__cpu_active_mask); struct cpumask __cpu_isolated_mask __read_mostly; EXPORT_SYMBOL(__cpu_isolated_mask); +/* + * This assumes that half of the CPUs are little and that they have lower + * CPU numbers than the big CPUs (e.g., on an 8-core system, CPUs 0-3 would be + * little and CPUs 4-7 would be big). + */ +#define LITTLE_CPU_MASK ((1UL << (NR_CPUS / 2)) - 1) +#define BIG_CPU_MASK (((1UL << NR_CPUS) - 1) & ~LITTLE_CPU_MASK) +static const unsigned long little_cluster_cpus = LITTLE_CPU_MASK; +const struct cpumask *const cpu_lp_mask = to_cpumask(&little_cluster_cpus); +EXPORT_SYMBOL(cpu_lp_mask); + +static const unsigned long big_cluster_cpus = BIG_CPU_MASK; +const struct cpumask *const cpu_perf_mask = to_cpumask(&big_cluster_cpus); +EXPORT_SYMBOL(cpu_perf_mask); + void init_cpu_present(const struct cpumask *src) { cpumask_copy(&__cpu_present_mask, src); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index d50bf8f893c3..2c66ee095192 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -698,7 +698,8 @@ static int generate_sched_domains(cpumask_var_t **domains, cpumask_intersects(cp->cpus_allowed, non_isolated_cpus))) continue; - if (is_sched_load_balance(cp)) + if (is_sched_load_balance(cp) && + !cpumask_empty(cp->effective_cpus)) csa[csn++] = cp; /* skip @cp's subtree */ @@ -852,6 +853,20 @@ void rebuild_sched_domains(void) put_online_cpus(); } +static int update_cpus_allowed(struct cpuset *cs, struct task_struct *p, + const struct cpumask *new_mask) +{ + int ret; + + if (cpumask_subset(&p->cpus_requested, cs->cpus_requested)) { + ret = set_cpus_allowed_ptr(p, &p->cpus_requested); + if (!ret) + return ret; + } + + return set_cpus_allowed_ptr(p, new_mask); +} + /** * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset. * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed @@ -867,7 +882,7 @@ static void update_tasks_cpumask(struct cpuset *cs) css_task_iter_start(&cs->css, &it); while ((task = css_task_iter_next(&it))) - set_cpus_allowed_ptr(task, cs->effective_cpus); + update_cpus_allowed(cs, task, cs->effective_cpus); css_task_iter_end(&it); } @@ -1550,7 +1565,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) * can_attach beforehand should guarantee that this doesn't * fail. TODO: have a better way to handle failure here */ - WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach)); + WARN_ON_ONCE(update_cpus_allowed(cs, task, cpus_attach)); cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to); cpuset_update_task_spread_flag(cs, task); diff --git a/kernel/events/core.c b/kernel/events/core.c index 1bc7f1d7448b..9b16c1315f7d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -439,7 +439,7 @@ int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' */ #define DEFAULT_MAX_SAMPLE_RATE 100000 #define DEFAULT_SAMPLE_PERIOD_NS (NSEC_PER_SEC / DEFAULT_MAX_SAMPLE_RATE) -#define DEFAULT_CPU_TIME_MAX_PERCENT 25 +#define DEFAULT_CPU_TIME_MAX_PERCENT 5 int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE; @@ -5075,8 +5075,8 @@ void perf_event_update_userpage(struct perf_event *event) userpg = rb->user_page; /* - * Disable preemption so as to not let the corresponding user-space - * spin too long if we get preempted. + * Disable preemption to guarantee consistent time stamps are stored to + * the user page. */ preempt_disable(); ++userpg->lock; diff --git a/kernel/exit.c b/kernel/exit.c index 37a88fd0bf1b..bab79bb82491 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -742,6 +742,32 @@ static void check_stack_usage(void) static inline void check_stack_usage(void) {} #endif +#ifndef CONFIG_PROFILING +static BLOCKING_NOTIFIER_HEAD(task_exit_notifier); + +int profile_event_register(enum profile_type t, struct notifier_block *n) +{ + if (t == PROFILE_TASK_EXIT) + return blocking_notifier_chain_register(&task_exit_notifier, n); + + return -ENOSYS; +} + +int profile_event_unregister(enum profile_type t, struct notifier_block *n) +{ + if (t == PROFILE_TASK_EXIT) + return blocking_notifier_chain_unregister(&task_exit_notifier, + n); + + return -ENOSYS; +} + +void profile_task_exit(struct task_struct *tsk) +{ + blocking_notifier_call_chain(&task_exit_notifier, 0, tsk); +} +#endif + void __noreturn do_exit(long code) { struct task_struct *tsk = current; diff --git a/kernel/fork.c b/kernel/fork.c index ba845cfab7be..96bcf919b20c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -199,7 +199,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE, VMALLOC_START, VMALLOC_END, - THREADINFO_GFP | __GFP_HIGHMEM, + THREADINFO_GFP, PAGE_KERNEL, 0, node, __builtin_return_address(0)); @@ -420,10 +420,10 @@ static void set_max_threads(unsigned int max_threads_suggested) * The number of threads shall be limited such that the thread * structures may only consume a small part of the available memory. */ - if (fls64(totalram_pages) + fls64(PAGE_SIZE) > 64) + if (fls64(totalram_pages()) + fls64(PAGE_SIZE) > 64) threads = MAX_THREADS; else - threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE, + threads = div64_u64((u64) totalram_pages() * (u64) PAGE_SIZE, (u64) THREAD_SIZE * 8UL); if (threads > max_threads_suggested) @@ -928,7 +928,7 @@ void mmput_async(struct mm_struct *mm) { if (atomic_dec_and_test(&mm->mm_users)) { INIT_WORK(&mm->async_put_work, mmput_async_fn); - schedule_work(&mm->async_put_work); + queue_work(system_highpri_wq, &mm->async_put_work); } } #endif @@ -2070,7 +2070,7 @@ struct task_struct *fork_idle(int cpu) cpu_to_node(cpu)); if (!IS_ERR(task)) { init_idle_pids(task->pids); - init_idle(task, cpu, false); + init_idle(task, cpu); } return task; diff --git a/kernel/groups.c b/kernel/groups.c index 94bde5210e3d..b5bb6e88376e 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -18,7 +18,7 @@ struct group_info *groups_alloc(int gidsetsize) len = sizeof(struct group_info) + sizeof(kgid_t) * gidsetsize; gi = kmalloc(len, GFP_KERNEL_ACCOUNT|__GFP_NOWARN|__GFP_NORETRY); if (!gi) - gi = __vmalloc(len, GFP_KERNEL_ACCOUNT|__GFP_HIGHMEM, PAGE_KERNEL); + gi = __vmalloc(len, GFP_KERNEL_ACCOUNT, PAGE_KERNEL); if (!gi) return NULL; diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index 4684b7595e63..b10279145e1a 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -106,10 +106,6 @@ void irq_migrate_all_off_this_cpu(void) raw_spin_lock(&desc->lock); affinity_broken = migrate_one_irq(desc); raw_spin_unlock(&desc->lock); - - if (affinity_broken) - pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n", - irq, smp_processor_id()); } local_irq_restore(flags); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 33770ef64d21..aac42c976b3f 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -21,6 +21,14 @@ #include "internals.h" +struct irq_desc_list { + struct list_head list; + struct irq_desc *desc; +} perf_crit_irqs = { + .list = LIST_HEAD_INIT(perf_crit_irqs.list) +}; +static DEFINE_RAW_SPINLOCK(perf_irqs_lock); + #ifdef CONFIG_IRQ_FORCED_THREADING __read_mostly bool force_irqthreads; @@ -201,6 +209,8 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, struct irq_chip *chip = irq_data_get_irq_chip(data); int ret; + /* IRQs only run on the first CPU in the affinity mask; reflect that */ + mask = cpumask_of(cpumask_first(mask)); ret = chip->irq_set_affinity(data, mask, force); switch (ret) { case IRQ_SET_MASK_OK: @@ -1134,6 +1144,72 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary) return 0; } +static void add_desc_to_perf_list(struct irq_desc *desc) +{ + struct irq_desc_list *item; + unsigned long flags; + + item = kmalloc(sizeof(*item), GFP_ATOMIC); + if (WARN_ON(!item)) + return; + + item->desc = desc; + + raw_spin_lock_irqsave(&perf_irqs_lock, flags); + list_add(&item->list, &perf_crit_irqs.list); + raw_spin_unlock_irqrestore(&perf_irqs_lock, flags); +} + +static void affine_one_perf_thread(struct task_struct *t) +{ + t->flags |= PF_PERF_CRITICAL; + set_cpus_allowed_ptr(t, cpu_perf_mask); +} + +static void unaffine_one_perf_thread(struct task_struct *t) +{ + t->flags &= ~PF_PERF_CRITICAL; + set_cpus_allowed_ptr(t, cpu_all_mask); +} + +void unaffine_perf_irqs(void) +{ + struct irq_desc_list *data; + unsigned long outer_flags; + + raw_spin_lock_irqsave(&perf_irqs_lock, outer_flags); + list_for_each_entry(data, &perf_crit_irqs.list, list) { + struct irq_desc *desc = data->desc; + unsigned long flags; + + raw_spin_lock_irqsave(&desc->lock, flags); + irq_set_affinity_locked(&desc->irq_data, cpu_all_mask, true); + if (desc->action->thread) + unaffine_one_perf_thread(desc->action->thread); + raw_spin_unlock_irqrestore(&desc->lock, flags); + } + raw_spin_unlock_irqrestore(&perf_irqs_lock, outer_flags); +} + +void reaffine_perf_irqs(void) +{ + struct irq_desc_list *data; + unsigned long outer_flags; + + raw_spin_lock_irqsave(&perf_irqs_lock, outer_flags); + list_for_each_entry(data, &perf_crit_irqs.list, list) { + struct irq_desc *desc = data->desc; + unsigned long flags; + + raw_spin_lock_irqsave(&desc->lock, flags); + irq_set_affinity_locked(&desc->irq_data, cpu_perf_mask, true); + if (desc->action->thread) + affine_one_perf_thread(desc->action->thread); + raw_spin_unlock_irqrestore(&desc->lock, flags); + } + raw_spin_unlock_irqrestore(&perf_irqs_lock, outer_flags); +} + /* * Internal function to register an irqaction - typically used to * allocate special interrupts that are part of the architecture. @@ -1201,6 +1277,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (ret) goto out_thread; } + + if (new->flags & IRQF_PERF_CRITICAL) + affine_one_perf_thread(new->thread); } if (!alloc_cpumask_var(&mask, GFP_KERNEL)) { @@ -1363,7 +1442,14 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } /* Set default affinity mask once everything is setup */ - setup_affinity(desc, mask); + if (new->flags & IRQF_PERF_CRITICAL) { + add_desc_to_perf_list(desc); + irqd_set(&desc->irq_data, IRQD_AFFINITY_MANAGED); + irq_set_affinity_locked(&desc->irq_data, + cpu_perf_mask, true); + } else { + setup_affinity(desc, mask); + } } else if (new->flags & IRQF_TRIGGER_MASK) { unsigned int nmsk = new->flags & IRQF_TRIGGER_MASK; diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 5707f97a3e6a..504836cf36c2 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -426,7 +426,7 @@ void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret) desc->irqs_unhandled = 0; } -bool noirqdebug __read_mostly; +bool noirqdebug __read_mostly = true; int noirqdebug_setup(char *str) { diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index a0c3365c884b..f3996788d53b 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -620,6 +620,7 @@ static const struct seq_operations kallsyms_op = { .show = s_show }; +#ifdef CONFIG_PROC_KALLSYMS static int kallsyms_open(struct inode *inode, struct file *file) { /* @@ -635,6 +636,7 @@ static int kallsyms_open(struct inode *inode, struct file *file) return 0; } +#endif #ifdef CONFIG_KGDB_KDB const char *kdb_walk_kallsyms(loff_t *pos) @@ -656,6 +658,7 @@ const char *kdb_walk_kallsyms(loff_t *pos) } #endif /* CONFIG_KGDB_KDB */ +#ifdef CONFIG_PROC_KALLSYMS static const struct file_operations kallsyms_operations = { .open = kallsyms_open, .read = seq_read, @@ -669,3 +672,4 @@ static int __init kallsyms_init(void) return 0; } device_initcall(kallsyms_init); +#endif diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index f5ab72ebda11..1a5ab1a64a14 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -223,13 +223,13 @@ int sanity_check_segment_list(struct kimage *image) * wasted allocating pages, which can cause a soft lockup. */ for (i = 0; i < nr_segments; i++) { - if (PAGE_COUNT(image->segment[i].memsz) > totalram_pages / 2) + if (PAGE_COUNT(image->segment[i].memsz) > totalram_pages() / 2) return -EINVAL; total_pages += PAGE_COUNT(image->segment[i].memsz); } - if (total_pages > totalram_pages / 2) + if (total_pages > totalram_pages() / 2) return -EINVAL; /* diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index ee1bc1bb8feb..4ef94087a6a0 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -145,7 +145,7 @@ static ssize_t fscaps_show(struct kobject *kobj, KERNEL_ATTR_RO(fscaps); #ifndef CONFIG_TINY_RCU -int rcu_expedited; +int rcu_expedited = 1; static ssize_t rcu_expedited_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { diff --git a/kernel/module.c b/kernel/module.c index 42717759b3e8..19a818b06a93 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2879,7 +2879,7 @@ static int copy_module_from_user(const void __user *umod, unsigned long len, /* Suck in entire file: we'll want most of it. */ info->hdr = __vmalloc(info->len, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN, PAGE_KERNEL); + GFP_KERNEL | __GFP_NOWARN, PAGE_KERNEL); if (!info->hdr) return -ENOMEM; diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index b4184971ac91..1f02995d69b2 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -353,3 +353,9 @@ config CPU_PM config DEDUCE_WAKEUP_REASONS bool default n + + +config BOEFFLA_WL_BLOCKER + bool "Boeffla generic wakelock blocker driver" + depends on PM + default N diff --git a/kernel/power/process.c b/kernel/power/process.c index 4a0c7b1a3820..f1841a61f0e8 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -23,8 +23,9 @@ /* * Timeout for stopping processes + * Put a lower value coz we need to freeze stuffs more */ -unsigned int __read_mostly freeze_timeout_msecs = 20 * MSEC_PER_SEC; +unsigned int __read_mostly freeze_timeout_msecs = 2 * MSEC_PER_SEC; static int try_to_freeze_tasks(bool user_only) { @@ -84,12 +85,12 @@ static int try_to_freeze_tasks(bool user_only) elapsed_msecs = ktime_to_ms(elapsed); if (wakeup) { - pr_cont("\n"); - pr_err("Freezing of tasks aborted after %d.%03d seconds", + pr_debug("\n"); + pr_debug("Freezing of tasks aborted after %d.%03d seconds", elapsed_msecs / 1000, elapsed_msecs % 1000); } else if (todo) { - pr_cont("\n"); - pr_err("Freezing of tasks failed after %d.%03d seconds" + pr_debug("\n"); + pr_debug("Freezing of tasks failed after %d.%03d seconds" " (%d tasks refusing to freeze, wq_busy=%d):\n", elapsed_msecs / 1000, elapsed_msecs % 1000, todo - wq_busy, wq_busy); @@ -105,7 +106,7 @@ static int try_to_freeze_tasks(bool user_only) } read_unlock(&tasklist_lock); } else { - pr_cont("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000, + pr_debug("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000, elapsed_msecs % 1000); } @@ -134,14 +135,14 @@ int freeze_processes(void) atomic_inc(&system_freezing_cnt); pm_wakeup_clear(); - pr_info("Freezing user space processes ... "); + pr_debug("Freezing user space processes ... "); pm_freezing = true; error = try_to_freeze_tasks(true); if (!error) { __usermodehelper_set_disable_depth(UMH_DISABLED); - pr_cont("done."); + pr_debug("done."); } - pr_cont("\n"); + pr_debug("\n"); BUG_ON(in_atomic()); /* @@ -170,14 +171,14 @@ int freeze_kernel_threads(void) { int error; - pr_info("Freezing remaining freezable tasks ... "); + pr_debug("Freezing remaining freezable tasks ... "); pm_nosig_freezing = true; error = try_to_freeze_tasks(false); if (!error) - pr_cont("done."); + pr_debug("done."); - pr_cont("\n"); + pr_debug("\n"); BUG_ON(in_atomic()); if (error) @@ -219,7 +220,7 @@ void thaw_processes(void) usermodehelper_enable(); schedule(); - pr_cont("done.\n"); + pr_debug("done.\n"); trace_suspend_resume(TPS("thaw_processes"), 0, false); } @@ -228,7 +229,7 @@ void thaw_kernel_threads(void) struct task_struct *g, *p; pm_nosig_freezing = false; - pr_info("Restarting kernel threads ... "); + pr_debug("Restarting kernel threads ... "); thaw_workqueues(); @@ -240,5 +241,5 @@ void thaw_kernel_threads(void) read_unlock(&tasklist_lock); schedule(); - pr_cont("done.\n"); + pr_debug("done.\n"); } diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 0469d8033bd6..d2eed22be147 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -879,6 +879,9 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, s32 value; struct pm_qos_request *req; + /* Don't let userspace impose restrictions on CPU idle levels */ + return count; + if (count == sizeof(s32)) { if (copy_from_user(&value, buf, sizeof(s32))) return -EFAULT; diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index b02850cfc8ee..e14e98c5de2d 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -101,7 +101,7 @@ unsigned long image_size; void __init hibernate_image_size_init(void) { - image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE; + image_size = ((totalram_pages() * 2) / 5) * PAGE_SIZE; } /* diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 1a70e71f1d1f..fccefad17690 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -610,7 +610,7 @@ int suspend_devices_and_enter(suspend_state_t state) suspend_test_start(); error = dpm_suspend_start(PMSG_SUSPEND); if (error) { - pr_err("PM: Some devices failed to suspend, or early wake event detected\n"); + pr_debug("PM: Some devices failed to suspend, or early wake event detected\n"); log_suspend_abort_reason("Some devices failed to suspend, or early wake event detected"); goto Recover_platform; } @@ -683,9 +683,9 @@ static int enter_state(suspend_state_t state) #ifndef CONFIG_SUSPEND_SKIP_SYNC trace_suspend_resume(TPS("sync_filesystems"), 0, true); - pr_info("PM: Syncing filesystems ... "); + pr_debug("PM: Syncing filesystems ... "); sys_sync(); - pr_cont("done.\n"); + pr_debug("done.\n"); trace_suspend_resume(TPS("sync_filesystems"), 0, false); #endif diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c index 78e354b1c593..8c76649b2de5 100644 --- a/kernel/power/wakelock.c +++ b/kernel/power/wakelock.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "power.h" @@ -78,7 +79,7 @@ static inline void decrement_wakelocks_number(void) {} #ifdef CONFIG_PM_WAKELOCKS_GC #define WL_GC_COUNT_MAX 100 -#define WL_GC_TIME_SEC 300 +#define WL_GC_TIME_SEC 60 static void __wakelocks_gc(struct work_struct *work); static LIST_HEAD(wakelocks_lru_list); @@ -131,9 +132,32 @@ static void __wakelocks_gc(struct work_struct *work) static void wakelocks_gc(void) { + bool expedite; + int cpu = get_cpu(); + + /* + * If the current CPU isn't occupied, go ahead and + * garbage collect inactive wakelocks. + */ + expedite = idle_cpu(cpu); + + put_cpu(); + + if (expedite) + goto do_gc; + + /* + * If our CPU is busy, allow wakelocks to + * accumulate before attempting to garbage collect. + * If by the time we register WL_GC_COUNT_MAX + * wakelocks and the current CPU is still busy, + * run the garbage collecton anyway. + */ + if (++wakelocks_gc_count <= WL_GC_COUNT_MAX) return; +do_gc: schedule_work(&wakelock_work); } #else /* !CONFIG_PM_WAKELOCKS_GC */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 512b55025e88..e9d24e256061 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -720,7 +720,7 @@ struct devkmsg_user { static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) { - char *buf, *line; + char buf[LOG_LINE_MAX + 1], *line; int level = default_message_loglevel; int facility = 1; /* LOG_USER */ struct file *file = iocb->ki_filp; @@ -741,10 +741,6 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) return ret; } - buf = kmalloc(len+1, GFP_KERNEL); - if (buf == NULL) - return -ENOMEM; - buf[len] = '\0'; if (copy_from_iter(buf, len, from) != len) { kfree(buf); @@ -776,8 +772,12 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) } } + if (unlikely(strncmp("healthd", line, 7) == 0 || strncmp("Trustonic TEE", line, 13) == 0)) + { + return len; + } + printk_emit(facility, level, NULL, 0, "%s", line); - kfree(buf); return ret; } @@ -1298,14 +1298,10 @@ static size_t msg_print_text(const struct printk_log *msg, bool syslog, char *bu static int syslog_print(char __user *buf, int size) { - char *text; + char text[LOG_LINE_MAX + PREFIX_MAX]; struct printk_log *msg; int len = 0; - text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); - if (!text) - return -ENOMEM; - while (size > 0) { size_t n; size_t skip; @@ -1353,7 +1349,6 @@ static int syslog_print(char __user *buf, int size) buf += n; } - kfree(text); return len; } @@ -2161,7 +2156,7 @@ int add_preferred_console(char *name, int idx, char *options) return __add_preferred_console(name, idx, options, NULL); } -bool console_suspend_enabled = true; +bool console_suspend_enabled = false; EXPORT_SYMBOL(console_suspend_enabled); static int __init console_suspend_disable(char *str) @@ -2189,7 +2184,6 @@ void suspend_console(void) { if (!console_suspend_enabled) return; - printk("Suspending console(s) (use no_console_suspend to debug)\n"); console_lock(); console_suspended = 1; up_console_sem(); diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 9e03db9ea9c0..f8a00dc95bd2 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -62,7 +62,7 @@ #ifndef CONFIG_TINY_RCU module_param(rcu_expedited, int, 0); module_param(rcu_normal, int, 0); -static int rcu_normal_after_boot; +static int rcu_normal_after_boot = 1; module_param(rcu_normal_after_boot, int, 0); #endif /* #ifndef CONFIG_TINY_RCU */ @@ -654,6 +654,7 @@ static int __noreturn rcu_tasks_kthread(void *arg) struct rcu_head *list; struct rcu_head *next; LIST_HEAD(rcu_tasks_holdouts); + int fract; /* Run on housekeeping CPUs by default. Sysadm can move if desired. */ housekeeping_affine(current); @@ -735,13 +736,25 @@ static int __noreturn rcu_tasks_kthread(void *arg) * holdouts. When the list is empty, we are done. */ lastreport = jiffies; - while (!list_empty(&rcu_tasks_holdouts)) { + + /* Start off with HZ/10 wait and slowly back off to 1 HZ wait*/ + fract = 10; + + for (;;) { bool firstreport; bool needreport; int rtst; struct task_struct *t1; - schedule_timeout_interruptible(HZ); + if (list_empty(&rcu_tasks_holdouts)) + break; + + /* Slowly back off waiting for holdouts */ + schedule_timeout_interruptible(HZ/fract); + + if (fract > 1) + fract--; + rtst = READ_ONCE(rcu_task_stall_timeout); needreport = rtst > 0 && time_after(jiffies, lastreport + rtst); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 92c14359d233..0c5972b8ec62 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -139,7 +139,7 @@ const_debug unsigned int sysctl_sched_features = * Number of tasks to iterate in a single balance run. * Limited because this is done with IRQs disabled. */ -const_debug unsigned int sysctl_sched_nr_migrate = 32; +const_debug unsigned int sysctl_sched_nr_migrate = NR_CPUS; /* * period over which we average the RT time consumption, measured @@ -990,6 +990,33 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) } #ifdef CONFIG_SMP + +static inline bool is_per_cpu_kthread(struct task_struct *p) +{ + if (!(p->flags & PF_KTHREAD)) + return false; + + if (p->nr_cpus_allowed != 1) + return false; + + return true; +} + +/* + * Per-CPU kthreads are allowed to run on !actie && online CPUs, see + * __set_cpus_allowed_ptr() and select_fallback_rq(). + */ +static inline bool is_cpu_allowed(struct task_struct *p, int cpu) +{ + if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) + return false; + + if (is_per_cpu_kthread(p)) + return cpu_online(cpu); + + return cpu_active(cpu); +} + /* * This is how migration works: * @@ -1046,16 +1073,8 @@ struct migration_arg { */ static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int dest_cpu) { - if (p->flags & PF_KTHREAD) { - if (unlikely(!cpu_online(dest_cpu))) - return rq; - } else { - if (unlikely(!cpu_active(dest_cpu))) - return rq; - } - /* Affinity changed (again). */ - if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) + if (!is_cpu_allowed(p, dest_cpu)) return rq; rq = move_queued_task(rq, p, dest_cpu); @@ -1120,11 +1139,24 @@ void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_ma p->nr_cpus_allowed = cpumask_weight(new_mask); } +static const struct cpumask * +get_adjusted_cpumask(const struct task_struct *p, + const struct cpumask *orig_mask) +{ + /* Force all performance-critical kthreads onto the big cluster */ + if (p->flags & PF_PERF_CRITICAL) + return cpu_perf_mask; + + return orig_mask; +} + void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { struct rq *rq = task_rq(p); bool queued, running; + new_mask = get_adjusted_cpumask(p, new_mask); + lockdep_assert_held(&p->pi_lock); queued = task_on_rq_queued(p); @@ -1168,6 +1200,8 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, int ret = 0; cpumask_t allowed_mask; + new_mask = get_adjusted_cpumask(p, new_mask); + rq = task_rq_lock(p, &rf); update_rq_clock(rq); @@ -1589,9 +1623,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p, bool allow_iso) for (;;) { /* Any allowed, online CPU? */ for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) { - if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu)) - continue; - if (!cpu_online(dest_cpu)) + if (!is_cpu_allowed(p, dest_cpu)) continue; if (cpu_isolated(dest_cpu)) { if (allow_iso) @@ -1639,7 +1671,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p, bool allow_iso) * leave kernel. */ if (p->mm && printk_ratelimit()) { - printk_deferred("process %d (%s) no longer affine to cpu%d\n", + pr_debug("process %d (%s) no longer affine to cpu%d\n", task_pid_nr(p), p->comm, cpu); } } @@ -1957,8 +1989,10 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) struct rq_flags rf; #if defined(CONFIG_SMP) - if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) { - sched_clock_cpu(cpu); /* sync clocks x-cpu */ + if (sched_feat(TTWU_QUEUE) && + !idle_cpu(cpu) && + !cpus_share_cache(smp_processor_id(), cpu)) { + sched_clock_cpu(cpu); /* Sync clocks across CPUs */ ttwu_queue_remote(p, cpu, wake_flags); return; } @@ -2167,7 +2201,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) rq = cpu_rq(task_cpu(p)); raw_spin_lock(&rq->lock); old_load = task_load(p); - wallclock = ktime_get_ns(); + wallclock = sched_ktime_clock(); update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0); update_task_ravg(p, rq, TASK_WAKE, wallclock, 0); raw_spin_unlock(&rq->lock); @@ -2253,7 +2287,7 @@ static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf) trace_sched_waking(p); if (!task_on_rq_queued(p)) { - u64 wallclock = ktime_get_ns(); + u64 wallclock = sched_ktime_clock(); update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0); update_task_ravg(p, rq, TASK_WAKE, wallclock, 0); @@ -2495,7 +2529,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) unsigned long flags; int cpu; - init_new_task_load(p, false); + init_new_task_load(p); cpu = get_cpu(); __sched_fork(clone_flags, p); @@ -3250,7 +3284,7 @@ void scheduler_tick(void) old_load = task_load(curr); set_window_start(rq); - wallclock = ktime_get_ns(); + wallclock = sched_ktime_clock(); update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0); update_rq_clock(rq); @@ -3622,7 +3656,7 @@ static void __sched notrace __schedule(bool preempt) clear_preempt_need_resched(); rq->clock_skip_update = 0; - wallclock = ktime_get_ns(); + wallclock = sched_ktime_clock(); if (likely(prev != next)) { prev->last_cpu_deselected_ts = wallclock; if (!prev->on_rq) @@ -3634,6 +3668,8 @@ static void __sched notrace __schedule(bool preempt) rq->curr = next; ++*switch_count; + psi_sched_switch(prev, next, !task_on_rq_queued(prev)); + trace_sched_switch(preempt, prev, next); rq = context_switch(rq, prev, next, &rf); /* unlocks the rq */ } else { @@ -3932,8 +3968,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) if (dl_prio(prio)) { struct task_struct *pi_task = rt_mutex_get_top_task(p); if (!dl_prio(p->normal_prio) || - (pi_task && dl_prio(pi_task->prio) && - dl_entity_preempt(&pi_task->dl, &p->dl))) { + (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) { p->dl.dl_boosted = 1; queue_flag |= ENQUEUE_REPLENISH; } else @@ -5009,6 +5044,9 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) retval = -EINVAL; } + if (!retval && !(p->flags & PF_KTHREAD)) + cpumask_and(&p->cpus_requested, in_mask, cpu_possible_mask); + out_free_new_mask: free_cpumask_var(new_mask); out_free_cpus_allowed: @@ -5521,12 +5559,11 @@ void init_idle_bootup_task(struct task_struct *idle) * init_idle - set up an idle thread for a given CPU * @idle: task in question * @cpu: cpu the idle task belongs to - * @cpu_up: differentiate between initial boot vs hotplug * * NOTE: this function does not set the idle thread's NEED_RESCHED * flag, to make booting more robust. */ -void init_idle(struct task_struct *idle, int cpu, bool cpu_up) +void init_idle(struct task_struct *idle, int cpu) { struct rq *rq = cpu_rq(cpu); unsigned long flags; @@ -5535,9 +5572,6 @@ void init_idle(struct task_struct *idle, int cpu, bool cpu_up) __sched_fork(0, idle); - if (!cpu_up) - init_new_task_load(idle, true); - raw_spin_lock_irqsave(&idle->pi_lock, flags); raw_spin_lock(&rq->lock); @@ -7680,11 +7714,14 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, static int build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *attr) { - enum s_alloc alloc_state; + enum s_alloc alloc_state = sa_none; struct sched_domain *sd; struct s_data d; int i, ret = -ENOMEM; + if (WARN_ON(cpumask_empty(cpu_map))) + goto error; + alloc_state = __visit_domain_allocation_hell(&d, cpu_map); if (alloc_state != sa_rootdomain) goto error; @@ -8033,6 +8070,7 @@ int sched_cpu_activate(unsigned int cpu) raw_spin_unlock_irqrestore(&rq->lock, flags); update_max_interval(); + walt_update_min_max_capacity(); return 0; } @@ -8074,6 +8112,7 @@ int sched_cpu_deactivate(unsigned int cpu) return ret; } sched_domains_numa_masks_clear(cpu); + walt_update_min_max_capacity(); return 0; } @@ -8093,6 +8132,7 @@ int sched_cpu_starting(unsigned int cpu) { set_cpu_rq_start_time(cpu); sched_rq_cpu_starting(cpu); + clear_walt_request(cpu); return 0; } @@ -8150,6 +8190,7 @@ void __init sched_init_smp(void) /* Move init over to a non-isolated CPU */ if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0) BUG(); + cpumask_copy(¤t->cpus_requested, cpu_possible_mask); sched_init_granularity(); free_cpumask_var(non_isolated_cpus); @@ -8370,7 +8411,8 @@ void __init sched_init(void) * but because we are the idle thread, we just pick up running again * when this runqueue becomes "idle". */ - init_idle(current, smp_processor_id(), false); + init_idle(current, smp_processor_id()); + init_new_task_load(current); calc_load_update = jiffies + LOAD_FREQ; @@ -9630,7 +9672,7 @@ void sched_exit(struct task_struct *p) rq = task_rq_lock(p, &rf); /* rq->curr == p */ - wallclock = ktime_get_ns(); + wallclock = sched_ktime_clock(); update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0); dequeue_task(rq, p, 0); /* diff --git a/kernel/sched/core_ctl.c b/kernel/sched/core_ctl.c index 7c28319b68cf..2f79e8105c15 100644 --- a/kernel/sched/core_ctl.c +++ b/kernel/sched/core_ctl.c @@ -789,6 +789,7 @@ static void try_to_isolate(struct cluster_data *cluster, unsigned int need) unsigned long flags; unsigned int num_cpus = cluster->num_cpus; unsigned int nr_isolated = 0; + bool first_pass = cluster->nr_not_preferred_cpus; /* * Protect against entry being removed (and added at tail) by other @@ -834,6 +835,7 @@ static void try_to_isolate(struct cluster_data *cluster, unsigned int need) cluster->nr_isolated_cpus += nr_isolated; spin_unlock_irqrestore(&state_lock, flags); +again: /* * If the number of active CPUs is within the limits, then * don't force isolation of any busy CPUs. @@ -853,6 +855,9 @@ static void try_to_isolate(struct cluster_data *cluster, unsigned int need) if (cluster->active_cpus <= cluster->max_cpus) break; + if (first_pass && !c->not_preferred) + continue; + spin_unlock_irqrestore(&state_lock, flags); pr_debug("Trying to isolate CPU%u\n", c->cpu); @@ -869,6 +874,10 @@ static void try_to_isolate(struct cluster_data *cluster, unsigned int need) cluster->nr_isolated_cpus += nr_isolated; spin_unlock_irqrestore(&state_lock, flags); + if (first_pass && cluster->active_cpus > cluster->max_cpus) { + first_pass = false; + goto again; + } } static void __try_to_unisolate(struct cluster_data *cluster, diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 5bdef43b9830..9621505230c7 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -72,8 +72,9 @@ struct sugov_cpu { struct update_util_data update_util; struct sugov_policy *sg_policy; - unsigned long iowait_boost; - unsigned long iowait_boost_max; + bool iowait_boost_pending; + unsigned int iowait_boost; + unsigned int iowait_boost_max; u64 last_update; struct sched_walt_cpu_load walt_load; @@ -160,7 +161,7 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, trace_cpu_frequency(next_freq, smp_processor_id()); } else { sg_policy->work_in_progress = true; - irq_work_queue(&sg_policy->irq_work); + sched_irq_work_queue(&sg_policy->irq_work); } } @@ -225,31 +226,56 @@ static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, if (!sg_policy->tunables->iowait_boost_enable) return; - if (flags & SCHED_CPUFREQ_IOWAIT) { - sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; - } else if (sg_cpu->iowait_boost) { + if (sg_cpu->iowait_boost) { s64 delta_ns = time - sg_cpu->last_update; /* Clear iowait_boost if the CPU apprears to have been idle. */ - if (delta_ns > TICK_NSEC) + if (delta_ns > TICK_NSEC) { sg_cpu->iowait_boost = 0; + sg_cpu->iowait_boost_pending = false; + } + } + if (flags & SCHED_CPUFREQ_IOWAIT) { + if (sg_cpu->iowait_boost_pending) + return; + + sg_cpu->iowait_boost_pending = true; + + if (sg_cpu->iowait_boost) { + sg_cpu->iowait_boost <<= 1; + if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max) + sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; + } else { + sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min; + } } } static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, unsigned long *max) { - unsigned long boost_util = sg_cpu->iowait_boost; - unsigned long boost_max = sg_cpu->iowait_boost_max; + unsigned int boost_util, boost_max; - if (!boost_util) + if (!sg_cpu->iowait_boost) return; + if (sg_cpu->iowait_boost_pending) { + sg_cpu->iowait_boost_pending = false; + } else { + sg_cpu->iowait_boost >>= 1; + if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) { + sg_cpu->iowait_boost = 0; + return; + } + } + + boost_util = sg_cpu->iowait_boost; + boost_max = sg_cpu->iowait_boost_max; + if (*util * boost_max < *max * boost_util) { *util = boost_util; *max = boost_max; } - sg_cpu->iowait_boost >>= 1; } static unsigned long freq_to_util(struct sugov_policy *sg_policy, @@ -433,6 +459,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) delta_ns = time - j_sg_cpu->last_update; if (delta_ns > stale_ns) { j_sg_cpu->iowait_boost = 0; + j_sg_cpu->iowait_boost_pending = false; continue; } if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL) { @@ -517,7 +544,7 @@ static void sugov_work(struct kthread_work *work) mutex_lock(&sg_policy->work_lock); raw_spin_lock_irqsave(&sg_policy->update_lock, flags); sugov_track_cycles(sg_policy, sg_policy->policy->cur, - ktime_get_ns()); + sched_ktime_clock()); raw_spin_unlock_irqrestore(&sg_policy->update_lock, flags); __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq, CPUFREQ_RELATION_L); @@ -589,6 +616,9 @@ static ssize_t up_rate_limit_us_store(struct gov_attr_set *attr_set, struct sugov_policy *sg_policy; unsigned int rate_limit_us; + /* Don't let userspace change this */ + return count; + if (kstrtouint(buf, 10, &rate_limit_us)) return -EINVAL; @@ -609,6 +639,9 @@ static ssize_t down_rate_limit_us_store(struct gov_attr_set *attr_set, struct sugov_policy *sg_policy; unsigned int rate_limit_us; + /* Don't let userspace change this */ + return count; + if (kstrtouint(buf, 10, &rate_limit_us)) return -EINVAL; @@ -905,6 +938,10 @@ static int sugov_init(struct cpufreq_policy *policy) tunables->down_rate_limit_us *= lat; } + /* Hard-code some sane rate-limit values */ + tunables->up_rate_limit_us = 10000; + tunables->down_rate_limit_us = 20000; + tunables->iowait_boost_enable = false; policy->governor_data = sg_policy; @@ -1025,7 +1062,7 @@ static void sugov_limits(struct cpufreq_policy *policy) mutex_lock(&sg_policy->work_lock); raw_spin_lock_irqsave(&sg_policy->update_lock, flags); sugov_track_cycles(sg_policy, sg_policy->policy->cur, - ktime_get_ns()); + sched_ktime_clock()); raw_spin_unlock_irqrestore(&sg_policy->update_lock, flags); cpufreq_policy_apply_limits(policy); mutex_unlock(&sg_policy->work_lock); @@ -1051,8 +1088,4 @@ struct cpufreq_governor *cpufreq_default_governor(void) } #endif -static int __init sugov_register(void) -{ - return cpufreq_register_governor(&schedutil_gov); -} -fs_initcall(sugov_register); +cpufreq_governor_init(schedutil_gov); diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 87bea1e2605e..aec86a2693cf 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c @@ -133,6 +133,8 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, if (lowest_mask) { cpumask_and(lowest_mask, tsk_cpus_allowed(p), vec->mask); + cpumask_andnot(lowest_mask, lowest_mask, + cpu_isolated_mask); if (drop_nopreempts) drop_nopreempt_cpus(lowest_mask); /* diff --git a/kernel/sched/energy.c b/kernel/sched/energy.c index b75b13acf078..953973a2a64a 100644 --- a/kernel/sched/energy.c +++ b/kernel/sched/energy.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -49,6 +50,17 @@ static void free_resources(void) } } +static int update_topology; + +/* + * Ideally this should be arch specific implementation, + * let's define here to help rebuild sched_domain with new capacities. + */ +int arch_update_cpu_topology(void) +{ + return update_topology; +} + void init_sched_energy_costs(void) { struct device_node *cn, *cp; @@ -289,8 +301,24 @@ static int sched_energy_probe(struct platform_device *pdev) kfree(max_frequencies); - if (is_sge_valid) + if (is_sge_valid) { + /* + * Sched_domains might have built with default cpu capacity + * values on bootup. + * + * Let's rebuild them again with actual cpu capacities. + * And partition_sched_domain() expects update in cpu topology + * to rebuild the domains, so make it satisfied.. + */ + update_topology = 1; + rebuild_sched_domains(); + update_topology = 0; + walt_sched_energy_populated_callback(); + } + + walt_map_freq_to_load(); + dev_info(&pdev->dev, "Sched-energy-costs capacity updated\n"); return 0; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 48b2f9c61743..f6bc0cdffe55 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -166,6 +166,11 @@ unsigned int sysctl_sched_capacity_margin = 1078; /* ~5% margin */ unsigned int sysctl_sched_capacity_margin_down = 1205; /* ~15% margin */ #define capacity_margin sysctl_sched_capacity_margin +#ifdef CONFIG_SCHED_WALT +unsigned int sysctl_sched_min_task_util_for_boost_colocation = 1; +#endif +static unsigned int __maybe_unused sched_small_task_threshold = 102; + static inline void update_load_add(struct load_weight *lw, unsigned long inc) { lw->weight += inc; @@ -2390,7 +2395,8 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) struct numa_group *ng; int priv; - if (!static_branch_likely(&sched_numa_balancing)) + if (!IS_ENABLED(CONFIG_NUMA_BALANCING) || + !static_branch_likely(&sched_numa_balancing)) return; /* for example, ksmd faulting in a user's mm */ @@ -3925,6 +3931,7 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) { struct sched_entity *left = __pick_first_entity(cfs_rq); struct sched_entity *se; + bool strict_skip = false; /* * If curr is set we have to see if its left of the leftmost entity @@ -3944,13 +3951,16 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) if (se == curr) { second = __pick_first_entity(cfs_rq); + if (sched_feat(STRICT_SKIP_BUDDY)) + strict_skip = true; } else { second = __pick_next_entity(se); if (!second || (curr && entity_before(curr, second))) second = curr; } - if (second && wakeup_preempt_entity(second, left) < 1) + if (second && (strict_skip || + wakeup_preempt_entity(second, left) < 1)) se = second; } @@ -4097,6 +4107,7 @@ void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) now = sched_clock_cpu(smp_processor_id()); cfs_b->runtime = cfs_b->quota; cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period); + cfs_b->expires_seq++; } static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) @@ -4119,6 +4130,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) struct task_group *tg = cfs_rq->tg; struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); u64 amount = 0, min_amount, expires; + int expires_seq; /* note: this is a positive sum as runtime_remaining <= 0 */ min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining; @@ -4135,6 +4147,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) cfs_b->idle = 0; } } + expires_seq = cfs_b->expires_seq; expires = cfs_b->runtime_expires; raw_spin_unlock(&cfs_b->lock); @@ -4144,8 +4157,10 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) * spread between our sched_clock and the one on which runtime was * issued. */ - if ((s64)(expires - cfs_rq->runtime_expires) > 0) + if (cfs_rq->expires_seq != expires_seq) { + cfs_rq->expires_seq = expires_seq; cfs_rq->runtime_expires = expires; + } return cfs_rq->runtime_remaining > 0; } @@ -4171,12 +4186,9 @@ static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq) * has not truly expired. * * Fortunately we can check determine whether this the case by checking - * whether the global deadline has advanced. It is valid to compare - * cfs_b->runtime_expires without any locks since we only care about - * exact equality, so a partial write will still work. + * whether the global deadline(cfs_b->expires_seq) has advanced. */ - - if (cfs_rq->runtime_expires != cfs_b->runtime_expires) { + if (cfs_rq->expires_seq == cfs_b->expires_seq) { /* extend local deadline, drift is bounded above by 2 ticks */ cfs_rq->runtime_expires += TICK_NSEC; } else { @@ -4770,13 +4782,18 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) { + u64 overrun; + lockdep_assert_held(&cfs_b->lock); - if (!cfs_b->period_active) { - cfs_b->period_active = 1; - hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period); - hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED); - } + if (cfs_b->period_active) + return; + + cfs_b->period_active = 1; + overrun = hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period); + cfs_b->runtime_expires += (overrun + 1) * ktime_to_ns(cfs_b->period); + cfs_b->expires_seq++; + hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED); } static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) @@ -4940,6 +4957,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) #ifdef CONFIG_SMP int task_new = flags & ENQUEUE_WAKEUP_NEW; #endif + bool prefer_idle = schedtune_prefer_idle(p) > 0; #ifdef CONFIG_SCHED_WALT p->misfit = !task_fits_max(p, rq->cpu); @@ -4949,7 +4967,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) * utilization updates, so do it here explicitly with the IOWAIT flag * passed. */ - if (p->in_iowait) + if (p->in_iowait && prefer_idle) cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT); for_each_sched_entity(se) { @@ -5784,6 +5802,9 @@ static int group_idle_state(struct energy_env *eenv, int cpu_idx) for_each_cpu(i, sched_group_cpus(sg)) state = min(state, idle_get_state_idx(cpu_rq(i))); + if (unlikely(state == INT_MAX)) + return -EINVAL; + /* Take non-cpuidle idling into account (active idle/arch_cpu_idle()) */ state++; @@ -5850,7 +5871,7 @@ static int group_idle_state(struct energy_env *eenv, int cpu_idx) * The required scaling will be performed just one time, by the calling * functions, once we accumulated the contributons for all the SGs. */ -static void calc_sg_energy(struct energy_env *eenv) +static int calc_sg_energy(struct energy_env *eenv) { struct sched_group *sg = eenv->sg; int busy_energy, idle_energy; @@ -5879,6 +5900,11 @@ static void calc_sg_energy(struct energy_env *eenv) /* Compute IDLE energy */ idle_idx = group_idle_state(eenv, cpu_idx); + if (unlikely(idle_idx < 0)) + return idle_idx; + if (idle_idx > sg->sge->nr_idle_states - 1) + idle_idx = sg->sge->nr_idle_states - 1; + idle_power = sg->sge->idle_states[idle_idx].power; idle_energy = SCHED_CAPACITY_SCALE - sg_util; @@ -5887,6 +5913,7 @@ static void calc_sg_energy(struct energy_env *eenv) total_energy = busy_energy + idle_energy; eenv->cpu[cpu_idx].energy += total_energy; } + return 0; } /* @@ -5948,7 +5975,8 @@ static int compute_energy(struct energy_env *eenv) * CPUs in the current visited SG. */ eenv->sg = sg; - calc_sg_energy(eenv); + if (calc_sg_energy(eenv)) + return -EINVAL; /* remove CPUs we have just visited */ if (!sd->child) { @@ -6221,13 +6249,19 @@ static inline bool task_fits_max(struct task_struct *p, int cpu) if (capacity == max_capacity) return true; - if (sched_boost_policy() == SCHED_BOOST_ON_BIG && - task_sched_boost(p)) + if (task_boost_policy(p) == SCHED_BOOST_ON_BIG || + schedtune_task_boost(p) > 0) return false; return __task_fits(p, cpu, 0); } +static inline bool cpu_check_overutil_condition(int cpu, + unsigned long util) +{ + return (capacity_orig_of(cpu) * 1024) < (util * capacity_margin); +} + bool __cpu_overutilized(int cpu, int delta) { return (capacity_orig_of(cpu) * 1024) < @@ -6888,7 +6922,7 @@ static int cpu_util_wake(int cpu, struct task_struct *p) struct find_best_target_env { struct cpumask *rtg_target; bool need_idle; - bool placement_boost; + int placement_boost; bool avoid_prev_cpu; }; @@ -6916,11 +6950,11 @@ static unsigned long cpu_estimated_capacity(int cpu, struct task_struct *p) static bool is_packing_eligible(struct task_struct *p, int target_cpu, struct find_best_target_env *fbt_env, unsigned int target_cpus_count, - int best_idle_cstate) + int best_idle_cstate, bool boosted) { unsigned long estimated_capacity; - if (fbt_env->placement_boost || fbt_env->need_idle) + if (fbt_env->placement_boost || fbt_env->need_idle || boosted) return false; if (best_idle_cstate == -1) @@ -6940,37 +6974,30 @@ static bool is_packing_eligible(struct task_struct *p, int target_cpu, return (estimated_capacity <= capacity_curr_of(target_cpu)); } -static inline bool skip_sg(struct task_struct *p, struct sched_group *sg, - struct cpumask *rtg_target) +static int start_cpu(struct task_struct *p, bool boosted, + struct cpumask *rtg_target) { - int fcpu = group_first_cpu(sg); - - /* Are all CPUs isolated in this group? */ - if (!sg->group_weight) - return true; - - /* - * Don't skip a group if a task affinity allows it - * to run only on that group. - */ - if (cpumask_subset(tsk_cpus_allowed(p), sched_group_cpus(sg))) - return false; + struct root_domain *rd = cpu_rq(smp_processor_id())->rd; + int start_cpu = -1; - if (!task_fits_max(p, fcpu)) - return true; + if (boosted) + return rd->max_cap_orig_cpu; - if (rtg_target && !cpumask_test_cpu(fcpu, rtg_target)) - return true; + /* A task always fits on its rtg_target */ + if (rtg_target) { + int rtg_target_cpu = cpumask_first_and(rtg_target, + cpu_online_mask); - return false; -} - -static int start_cpu(bool boosted) -{ - struct root_domain *rd = cpu_rq(smp_processor_id())->rd; - int start_cpu; + if (rtg_target_cpu < nr_cpu_ids) + return rtg_target_cpu; + } - start_cpu = boosted ? rd->max_cap_orig_cpu : rd->min_cap_orig_cpu; + /* Where the task should land based on its demand */ + if (rd->min_cap_orig_cpu != -1 + && task_fits_max(p, rd->min_cap_orig_cpu)) + start_cpu = rd->min_cap_orig_cpu; + else + start_cpu = rd->max_cap_orig_cpu; return walt_start_cpu(start_cpu); } @@ -6994,7 +7021,11 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, int best_idle_cpu = -1; int target_cpu = -1; int cpu, i; + long spare_cap, most_spare_cap = 0; + int most_spare_cap_cpu = -1; unsigned int active_cpus_count = 0; + int isolated_candidate = -1; + int prev_cpu = task_cpu(p); *backup_cpu = -1; @@ -7002,7 +7033,7 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, schedstat_inc(this_rq()->eas_stats.fbt_attempts); /* Find start CPU based on boost value */ - cpu = start_cpu(boosted); + cpu = start_cpu(p, boosted, fbt_env->rtg_target); if (cpu < 0) { schedstat_inc(p->se.statistics.nr_wakeups_fbt_no_cpu); schedstat_inc(this_rq()->eas_stats.fbt_no_cpu); @@ -7023,9 +7054,6 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, cpumask_t search_cpus; bool do_rotate = false, avoid_prev_cpu = false; - if (skip_sg(p, sg, fbt_env->rtg_target)) - continue; - cpumask_copy(&search_cpus, tsk_cpus_allowed(p)); cpumask_and(&search_cpus, &search_cpus, sched_group_cpus(sg)); i = find_first_cpu_bit(p, &search_cpus, sg, &avoid_prev_cpu, @@ -7040,16 +7068,18 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, unsigned long wake_util, new_util, min_capped_util; cpumask_clear_cpu(i, &search_cpus); - if (avoid_prev_cpu && i == task_cpu(p)) - continue; - if (!cpu_online(i) || cpu_isolated(i) || is_reserved(i)) + trace_sched_cpu_util(i); + if (!cpu_online(i) || cpu_isolated(i)) continue; - if (walt_cpu_high_irqload(i)) + isolated_candidate = i; + + if (avoid_prev_cpu && i == prev_cpu) continue; - trace_sched_cpu_util(i); + if (walt_cpu_high_irqload(i) || is_reserved(i)) + continue; /* * p's blocked utilization is still accounted for on prev_cpu @@ -7058,6 +7088,12 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, */ wake_util = cpu_util_wake(i, p); new_util = wake_util + task_util(p); + spare_cap = capacity_orig_of(i) - wake_util; + + if (spare_cap > most_spare_cap) { + most_spare_cap = spare_cap; + most_spare_cap_cpu = i; + } /* * Ensure minimum capacity to grant the required boost. @@ -7075,7 +7111,7 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, */ min_capped_util = max(new_util, capacity_min_of(i)); - if (new_util > capacity_orig) + if (cpu_check_overutil_condition(i, new_util)) continue; /* @@ -7119,7 +7155,8 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, trace_sched_find_best_target(p, prefer_idle, min_util, cpu, best_idle_cpu, - best_active_cpu, i); + best_active_cpu, + i, -1); return i; } @@ -7275,7 +7312,7 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, if (best_idle_cpu != -1) break; - if (fbt_env->placement_boost) { + if (fbt_env->placement_boost != SCHED_BOOST_NONE) { target_capacity = ULONG_MAX; continue; } @@ -7292,11 +7329,18 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, target_capacity = ULONG_MAX; } + /* + * if we have found a target cpu within a group, don't bother + * checking other groups. + */ + if (target_capacity != ULONG_MAX) + break; + } while (sg = sg->next, sg != sd->groups); if (best_idle_cpu != -1 && !is_packing_eligible(p, target_cpu, fbt_env, - active_cpus_count, best_idle_cstate)) { - if (target_cpu == task_cpu(p)) + active_cpus_count, best_idle_cstate, boosted)) { + if (target_cpu == prev_cpu) fbt_env->avoid_prev_cpu = true; target_cpu = best_idle_cpu; @@ -7332,9 +7376,36 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, ? best_active_cpu : best_idle_cpu; + if (target_cpu == -1 && most_spare_cap_cpu != -1 && + /* ensure we use active cpu for active migration */ + !(p->state == TASK_RUNNING && !idle_cpu(most_spare_cap_cpu))) + target_cpu = most_spare_cap_cpu; + + if (target_cpu == -1 && cpu_isolated(prev_cpu) && + isolated_candidate != -1) { + target_cpu = isolated_candidate; + fbt_env->avoid_prev_cpu = true; + } + + /* + * - It is possible for target and backup + * to select same CPU - if so, drop backup + * + * - The next step of energy evaluation includes + * prev_cpu. Drop target or backup if it is + * same as prev_cpu. + */ + if (*backup_cpu == target_cpu || *backup_cpu == prev_cpu) + *backup_cpu = -1; + + if (target_cpu == prev_cpu) { + target_cpu = *backup_cpu; + *backup_cpu = -1; + } + trace_sched_find_best_target(p, prefer_idle, min_util, cpu, best_idle_cpu, best_active_cpu, - target_cpu); + target_cpu, *backup_cpu); schedstat_inc(p->se.statistics.nr_wakeups_fbt_count); schedstat_inc(this_rq()->eas_stats.fbt_count); @@ -7396,9 +7467,9 @@ task_is_boosted(struct task_struct *p) { * to run on any cpu. */ static inline bool -cpu_is_in_target_set(struct task_struct *p, int cpu) +cpu_is_in_target_set(struct task_struct *p, int cpu, struct cpumask *rtg_target) { - int first_cpu = start_cpu(task_is_boosted(p)); + int first_cpu = start_cpu(p, task_is_boosted(p), rtg_target); int next_usable_cpu = cpumask_next(first_cpu - 1, tsk_cpus_allowed(p)); return cpu >= next_usable_cpu || next_usable_cpu >= nr_cpu_ids; } @@ -7417,7 +7488,7 @@ bias_to_prev_cpu(struct task_struct *p, struct cpumask *rtg_target) u64 ms = sched_clock(); #endif - if (!cpu_is_in_target_set(p, prev_cpu)) { + if (!cpu_is_in_target_set(p, prev_cpu, rtg_target)) { return false; } @@ -7449,7 +7520,9 @@ static inline struct cpumask *find_rtg_target(struct task_struct *p) rcu_read_lock(); grp = task_related_thread_group(p); - if (grp && grp->preferred_cluster) { + if (grp && grp->preferred_cluster && + (task_util(p) > + sysctl_sched_min_task_util_for_boost_colocation)) { rtg_target = &grp->preferred_cluster->cpus; if (!task_fits_max(p, cpumask_first(rtg_target))) rtg_target = NULL; @@ -7508,9 +7581,8 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, } else { fbt_env.need_idle = wake_to_idle(p); } - fbt_env.placement_boost = task_sched_boost(p) ? - sched_boost_policy() != SCHED_BOOST_NONE : - false; + + fbt_env.placement_boost = task_boost_policy(p); fbt_env.avoid_prev_cpu = false; if (bias_to_prev_cpu(p, rtg_target)) { @@ -7537,7 +7609,8 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, if (fbt_env.placement_boost || fbt_env.need_idle || fbt_env.avoid_prev_cpu || (rtg_target && - !cpumask_test_cpu(prev_cpu, rtg_target))) { + (!cpumask_test_cpu(prev_cpu, rtg_target) || + cpumask_test_cpu(next_cpu, rtg_target)))) { target_cpu = next_cpu; goto unlock; } @@ -7639,7 +7712,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f if (sysctl_sched_sync_hint_enable && sync && !_wake_cap && about_to_idle && - cpu_is_in_target_set(p, cpu)) + cpu_is_in_target_set(p, cpu, NULL)) return cpu; } @@ -7656,7 +7729,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f * that the selection algorithm for a boosted task * should be used. */ - bool sync_boost = sync && cpu >= start_cpu(true); + bool sync_boost = sync && cpu >= start_cpu(p, true, NULL); return select_energy_cpu_brute(p, prev_cpu, sync_boost); } @@ -7664,7 +7737,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f rcu_read_lock(); for_each_domain(cpu, tmp) { if (!(tmp->flags & SD_LOAD_BALANCE)) - break; + continue; /* * If both cpu and prev_cpu are part of this domain, @@ -8435,8 +8508,12 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) return 0; } + /* Record that we found atleast one task that could run on dst_cpu */ + env->flags &= ~LBF_ALL_PINNED; + if (energy_aware() && !env->dst_rq->rd->overutilized && - env->idle == CPU_NEWLY_IDLE) { + env->idle == CPU_NEWLY_IDLE && + !task_in_related_thread_group(p)) { long util_cum_dst, util_cum_src; unsigned long demand; @@ -8448,9 +8525,6 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) return 0; } - /* Record that we found atleast one task that could run on dst_cpu */ - env->flags &= ~LBF_ALL_PINNED; - #ifdef CONFIG_SCHED_WALT if (env->flags & LBF_IGNORE_PREFERRED_CLUSTER_TASKS && !preferred_cluster(cpu_rq(env->dst_cpu)->cluster, p)) @@ -8462,6 +8536,14 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) return 0; #endif + /* Dont allow boosted tasks to be pulled to small cores unless + * big cores are overutilized + */ + if (!env->src_rq->rd->overutilized && + env->flags & LBF_IGNORE_BIG_TASKS && + (schedtune_task_boost(p) > 0)) + return 0; + if (task_running(env->src_rq, p)) { schedstat_inc(p->se.statistics.nr_failed_migrations_running); return 0; @@ -8603,7 +8685,17 @@ static int detach_tasks(struct lb_env *env) if (sched_feat(LB_MIN) && load < 16 && !env->sd->nr_balance_failed) goto next; - if ((load / 2) > env->imbalance) + /* + * p is not running task when we goes until here, so if p is one + * of the 2 task in src cpu rq and not the running one, + * that means it is the only task that can be balanced. + * So only when there is other tasks can be balanced or + * there is situation to ignore big task, it is needed + * to skip the task load bigger than 2*imbalance. + */ + if (((cpu_rq(env->src_cpu)->nr_running > 2) || + (env->flags & LBF_IGNORE_BIG_TASKS)) && + ((load / 2) > env->imbalance)) goto next; detach_task(p, env); @@ -8946,7 +9038,7 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu) (max_capacity < capacity)) { mcc->val = capacity; mcc->cpu = cpu; -#ifdef CONFIG_SCHED_DEBUG +#if 0 raw_spin_unlock_irqrestore(&mcc->lock, flags); printk_deferred(KERN_INFO "CPU%d: update max cpu_capacity %lu\n", cpu, capacity); @@ -9686,6 +9778,11 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s } env->imbalance = 0; + if (busiest->group_type == group_overloaded && + local->group_type <= group_misfit_task) { + env->imbalance = busiest->load_per_task; + return; + } return fix_small_imbalance(env, sds); } @@ -9729,8 +9826,23 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s * a think about bumping its value to force at least one task to be * moved */ - if (env->imbalance < busiest->load_per_task) + if (env->imbalance < busiest->load_per_task) { + /* + * The busiest group is overloaded so it could use help + * from the other groups. If the local group has idle CPUs + * and it is not overloaded and has no imbalance with in + * the group, allow the load balance by bumping the + * imbalance. + */ + if (busiest->group_type == group_overloaded && + local->group_type <= group_misfit_task && + env->idle != CPU_NOT_IDLE) { + env->imbalance = busiest->load_per_task; + return; + } + return fix_small_imbalance(env, sds); + } } /******* find_busiest_group() helpers end here *********************/ @@ -9761,7 +9873,6 @@ static struct sched_group *find_busiest_group(struct lb_env *env) if (energy_aware() && !env->dst_rq->rd->overutilized) { int cpu_local, cpu_busiest; - long util_cum; unsigned long energy_local, energy_busiest; if (env->idle != CPU_NEWLY_IDLE) @@ -9781,10 +9892,6 @@ static struct sched_group *find_busiest_group(struct lb_env *env) } else if (energy_local == energy_busiest) { if (cpu_rq(cpu_busiest)->nr_running < 2) goto out_balanced; - - util_cum = cpu_util_cum(cpu_busiest, 0); - if (util_cum < cpu_util_cum(cpu_local, 0)) - goto out_balanced; } } @@ -9813,8 +9920,11 @@ static struct sched_group *find_busiest_group(struct lb_env *env) if (busiest->group_type == group_imbalanced) goto force_balance; - /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */ - if (env->idle == CPU_NEWLY_IDLE && group_has_capacity(env, local) && + /* + * When dst_cpu is idle, prevent SMP nice and/or asymmetric group + * capacities from resulting in underutilization due to avg_load. + */ + if (env->idle != CPU_NOT_IDLE && group_has_capacity(env, local) && busiest->group_no_capacity) goto force_balance; @@ -9911,6 +10021,18 @@ static struct rq *find_busiest_queue(struct lb_env *env, capacity = capacity_of(i); + /* + * For ASYM_CPUCAPACITY domains, don't pick a cpu that could + * eventually lead to active_balancing high->low capacity. + * Higher per-cpu capacity is considered better than balancing + * average load. + */ + if (env->sd->flags & SD_ASYM_CPUCAPACITY && + capacity_of(env->dst_cpu) < capacity && + (rq->nr_running == 1 || (rq->nr_running == 2 && + task_util(rq->curr) < sched_small_task_threshold))) + continue; + wl = weighted_cpuload(i); /* @@ -9992,8 +10114,10 @@ static int need_active_balance(struct lb_env *env) * It's worth migrating the task if the src_cpu's capacity is reduced * because of other sched_class or IRQs if more capacity stays * available on dst_cpu. + * Avoid pulling the CFS task if it is the only task running. */ if ((env->idle != CPU_NOT_IDLE) && + (env->src_rq->nr_running > 1) && (env->src_rq->cfs.h_nr_running == 1)) { if ((check_cpu_capacity(env->src_rq, sd)) && (capacity_of(env->src_cpu)*sd->imbalance_pct < capacity_of(env->dst_cpu)*100)) @@ -10285,6 +10409,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, busiest->active_balance = 1; busiest->push_cpu = this_cpu; active_balance = 1; + mark_reserved(this_cpu); } raw_spin_unlock_irqrestore(&busiest->lock, flags); @@ -10395,6 +10520,27 @@ update_next_balance(struct sched_domain *sd, unsigned long *next_balance) *next_balance = next; } +#ifdef CONFIG_SCHED_WALT +static inline bool min_cap_cluster_has_misfit_task(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + if (!is_min_capacity_cpu(cpu)) + break; + if (cpu_rq(cpu)->walt_stats.nr_big_tasks) + return true; + } + + return false; +} +#else +static inline bool min_cap_cluster_has_misfit_task(void) +{ + return false; +} +#endif + /* * idle_balance is called by schedule() if this_cpu is about to become * idle. Attempts to pull tasks from other CPUs. @@ -10406,17 +10552,25 @@ static int idle_balance(struct rq *this_rq) struct sched_domain *sd; int pulled_task = 0; u64 curr_cost = 0; + bool force_lb = false; if (cpu_isolated(this_cpu)) return 0; + /* + * Force higher capacity CPUs doing load balance, when the lower + * capacity CPUs has some misfit tasks. + */ + if (!is_min_capacity_cpu(this_cpu) && min_cap_cluster_has_misfit_task()) + force_lb = true; + /* * We must set idle_stamp _before_ calling idle_balance(), such that we * measure the duration of idle_balance() as idle time. */ this_rq->idle_stamp = rq_clock(this_rq); - if (!energy_aware() && + if (!energy_aware() && !force_lb && (this_rq->avg_idle < sysctl_sched_migration_cost || !this_rq->rd->overload)) { rcu_read_lock(); @@ -10439,7 +10593,8 @@ static int idle_balance(struct rq *this_rq) if (!(sd->flags & SD_LOAD_BALANCE)) continue; - if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) { + if (!force_lb && + this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) { update_next_balance(sd, &next_balance); break; } @@ -10589,6 +10744,7 @@ static int active_load_balance_cpu_stop(void *data) busiest_rq->active_balance = 0; push_task = busiest_rq->push_task; target_cpu = busiest_rq->push_cpu; + clear_reserved(target_cpu); if (push_task) busiest_rq->push_task = NULL; @@ -10599,7 +10755,6 @@ static int active_load_balance_cpu_stop(void *data) if (push_task_detached) attach_one_task(target_rq, push_task); put_task_struct(push_task); - clear_reserved(target_cpu); } if (p) @@ -10633,8 +10788,12 @@ static inline int find_new_ilb(int type) rcu_read_lock(); sd = rcu_dereference_check_sched_domain(rq->sd); if (sd) { - cpumask_and(&cpumask, nohz.idle_cpus_mask, - sched_domain_span(sd)); + if (energy_aware() && rq->misfit_task) + cpumask_andnot(&cpumask, nohz.idle_cpus_mask, + sched_domain_span(sd)); + else + cpumask_and(&cpumask, nohz.idle_cpus_mask, + sched_domain_span(sd)); cpumask_andnot(&cpumask, &cpumask, cpu_isolated_mask); ilb = cpumask_first(&cpumask); @@ -10645,7 +10804,7 @@ static inline int find_new_ilb(int type) if (!energy_aware() || (capacity_orig_of(cpu) == cpu_rq(cpu)->rd->max_cpu_capacity.val || - cpu_overutilized(cpu))) { + (cpu_overutilized(cpu) && rq->nr_running > 1))) { cpumask_andnot(&cpumask, nohz.idle_cpus_mask, cpu_isolated_mask); ilb = cpumask_first(&cpumask); @@ -10740,11 +10899,19 @@ void set_cpu_sd_state_idle(void) */ void nohz_balance_enter_idle(int cpu) { - /* - * If this cpu is going down, then nothing needs to be done. - */ - if (!cpu_active(cpu)) + if (!cpu_active(cpu)) { + /* + * A CPU can be paused while it is idle with it's tick + * stopped. nohz_balance_exit_idle() should be called + * from the local CPU, so it can't be called during + * pause. This results in paused CPU participating in + * the nohz idle balance, which should be avoided. + * When the paused CPU exits idle and enters again, + * exempt the paused CPU from nohz_balance_exit_idle. + */ + nohz_balance_exit_idle(cpu); return; + } if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) return; @@ -11137,7 +11304,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) entity_tick(cfs_rq, se, queued); } - if (static_branch_unlikely(&sched_numa_balancing)) + if (IS_ENABLED(CONFIG_NUMA_BALANCING) && + static_branch_unlikely(&sched_numa_balancing)) task_tick_numa(rq, curr); #ifdef CONFIG_SMP @@ -11935,7 +12103,7 @@ static void walt_check_for_rotation(struct rq *src_rq) if (is_max_capacity_cpu(src_cpu)) return; - wc = ktime_get_ns(); + wc = sched_ktime_clock(); for_each_possible_cpu(i) { struct rq *rq = cpu_rq(i); diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 785543471e56..fe3711c6f174 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -3,7 +3,7 @@ * them to run sooner, but does not allow tons of sleepers to * rip the spread apart. */ -SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true) +SCHED_FEAT(GENTLE_FAIR_SLEEPERS, false) /* * Place new tasks ahead so that they do not starve already running @@ -16,7 +16,7 @@ SCHED_FEAT(START_DEBIT, true) * wakeup-preemption), since its likely going to consume data we * touched, increases cache locality. */ -SCHED_FEAT(NEXT_BUDDY, false) +SCHED_FEAT(NEXT_BUDDY, true) /* * Prefer to schedule the task that ran last (when we did @@ -25,6 +25,12 @@ SCHED_FEAT(NEXT_BUDDY, false) */ SCHED_FEAT(LAST_BUDDY, true) +/* + * skip buddy i.e task called yield() is always skipped and the + * next entity is selected to run irrespective of the vruntime + */ +SCHED_FEAT(STRICT_SKIP_BUDDY, true) + /* * Consider buddies to be cache hot, decreases the likelyness of a * cache buddy being migrated away, increases cache locality. diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index cf75f00f7037..cc5a96cf8020 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -165,11 +165,14 @@ static void cpuidle_idle_call(void) * timekeeping to prevent timer interrupts from kicking us out of idle * until a proper wakeup interrupt happens. */ - if (idle_should_freeze()) { - entered_state = cpuidle_enter_freeze(drv, dev); - if (entered_state > 0) { - local_irq_enable(); - goto exit_idle; + + if (idle_should_freeze() || dev->use_deepest_state) { + if (idle_should_freeze()) { + entered_state = cpuidle_enter_freeze(drv, dev); + if (entered_state > 0) { + local_irq_enable(); + goto exit_idle; + } } next_state = cpuidle_find_deepest_state(drv, dev); @@ -218,7 +221,6 @@ static void cpu_idle_loop(void) */ __current_set_polling(); - quiet_vmstat(); tick_nohz_idle_enter(); while (!need_resched()) { diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index d3dda5d9bb61..e10de088b8fc 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -145,6 +145,7 @@ static int psi_bug __read_mostly; DEFINE_STATIC_KEY_FALSE(psi_disabled); +DEFINE_STATIC_KEY_TRUE(psi_cgroups_enabled); #ifdef CONFIG_PSI_DEFAULT_DISABLED static bool psi_enable; @@ -187,7 +188,7 @@ static void group_init(struct psi_group *group) seqcount_init(&per_cpu_ptr(group->pcpu, cpu)->seq); group->avg_last_update = sched_clock(); group->avg_next_update = group->avg_last_update + psi_period; - INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work); + INIT_DEFERRABLE_WORK(&group->avgs_work, psi_avgs_work); mutex_init(&group->avgs_lock); /* Init trigger-related members */ atomic_set(&group->poll_scheduled, 0); @@ -209,6 +210,9 @@ void __init psi_init(void) return; } + if (!cgroup_psi_enabled()) + static_branch_disable(&psi_cgroups_enabled); + psi_period = jiffies_to_nsecs(PSI_FREQ); group_init(&psi_system); } @@ -225,7 +229,7 @@ static bool test_state(unsigned int *tasks, enum psi_states state) case PSI_MEM_FULL: return tasks[NR_MEMSTALL] && !tasks[NR_RUNNING]; case PSI_CPU_SOME: - return tasks[NR_RUNNING] > 1; + return tasks[NR_RUNNING] > tasks[NR_ONCPU]; case PSI_NONIDLE: return tasks[NR_IOWAIT] || tasks[NR_MEMSTALL] || tasks[NR_RUNNING]; @@ -669,13 +673,14 @@ static void record_times(struct psi_group_cpu *groupc, int cpu, groupc->times[PSI_NONIDLE] += delta; } -static u32 psi_group_change(struct psi_group *group, int cpu, - unsigned int clear, unsigned int set) +static void psi_group_change(struct psi_group *group, int cpu, + unsigned int clear, unsigned int set, + bool wake_clock) { struct psi_group_cpu *groupc; + u32 state_mask = 0; unsigned int t, m; enum psi_states s; - u32 state_mask = 0; groupc = per_cpu_ptr(group->pcpu, cpu); @@ -695,10 +700,10 @@ static u32 psi_group_change(struct psi_group *group, int cpu, if (!(m & (1 << t))) continue; if (groupc->tasks[t] == 0 && !psi_bug) { - printk_deferred(KERN_ERR "psi: task underflow! cpu=%d t=%d tasks=[%u %u %u] clear=%x set=%x\n", + printk_deferred(KERN_ERR "psi: task underflow! cpu=%d t=%d tasks=[%u %u %u %u] clear=%x set=%x\n", cpu, t, groupc->tasks[0], groupc->tasks[1], groupc->tasks[2], - clear, set); + groupc->tasks[3], clear, set); psi_bug = 1; } groupc->tasks[t]--; @@ -717,54 +722,63 @@ static u32 psi_group_change(struct psi_group *group, int cpu, write_seqcount_end(&groupc->seq); - return state_mask; + if (state_mask & group->poll_states) + psi_schedule_poll_work(group, 1); + + if (wake_clock && !delayed_work_pending(&group->avgs_work)) + schedule_delayed_work(&group->avgs_work, PSI_FREQ); } static struct psi_group *iterate_groups(struct task_struct *task, void **iter) { + if (*iter == &psi_system) + return NULL; + #ifdef CONFIG_CGROUPS - struct cgroup *cgroup = NULL; + if (static_branch_likely(&psi_cgroups_enabled)) { + struct cgroup *cgroup = NULL; - if (!*iter) - cgroup = task->cgroups->dfl_cgrp; - else if (*iter == &psi_system) - return NULL; - else - cgroup = cgroup_parent(*iter); + if (!*iter) + cgroup = task->cgroups->dfl_cgrp; + else + cgroup = cgroup_parent(*iter); - if (cgroup && cgroup_parent(cgroup)) { - *iter = cgroup; - return cgroup_psi(cgroup); + if (cgroup && cgroup_parent(cgroup)) { + *iter = cgroup; + return cgroup_psi(cgroup); + } } -#else - if (*iter) - return NULL; #endif *iter = &psi_system; return &psi_system; } -void psi_task_change(struct task_struct *task, int clear, int set) +static void psi_flags_change(struct task_struct *task, int clear, int set) { - int cpu = task_cpu(task); - struct psi_group *group; - bool wake_clock = true; - void *iter = NULL; - - if (!task->pid) - return; - if (((task->psi_flags & set) || (task->psi_flags & clear) != clear) && !psi_bug) { printk_deferred(KERN_ERR "psi: inconsistent task state! task=%d:%s cpu=%d psi_flags=%x clear=%x set=%x\n", - task->pid, task->comm, cpu, + task->pid, task->comm, task_cpu(task), task->psi_flags, clear, set); psi_bug = 1; } task->psi_flags &= ~clear; task->psi_flags |= set; +} + +void psi_task_change(struct task_struct *task, int clear, int set) +{ + int cpu = task_cpu(task); + struct psi_group *group; + bool wake_clock = true; + void *iter = NULL; + + if (!task->pid) + return; + + psi_flags_change(task, clear, set); /* * Periodic aggregation shuts off if there is a period of no @@ -777,14 +791,51 @@ void psi_task_change(struct task_struct *task, int clear, int set) wq_worker_last_func(task) == psi_avgs_work)) wake_clock = false; - while ((group = iterate_groups(task, &iter))) { - u32 state_mask = psi_group_change(group, cpu, clear, set); + while ((group = iterate_groups(task, &iter))) + psi_group_change(group, cpu, clear, set, wake_clock); +} + +void psi_task_switch(struct task_struct *prev, struct task_struct *next, + bool sleep) +{ + struct psi_group *group, *common = NULL; + int cpu = task_cpu(prev); + void *iter; + + if (next->pid) { + psi_flags_change(next, 0, TSK_ONCPU); + /* + * When moving state between tasks, the group that + * contains them both does not change: we can stop + * updating the tree once we reach the first common + * ancestor. Iterate @next's ancestors until we + * encounter @prev's state. + */ + iter = NULL; + while ((group = iterate_groups(next, &iter))) { + if (per_cpu_ptr(group->pcpu, cpu)->tasks[NR_ONCPU]) { + common = group; + break; + } + + psi_group_change(group, cpu, 0, TSK_ONCPU, true); + } + } + + /* + * If this is a voluntary sleep, dequeue will have taken care + * of the outgoing TSK_ONCPU alongside TSK_RUNNING already. We + * only need to deal with it during preemption. + */ + if (sleep) + return; - if (state_mask & group->poll_states) - psi_schedule_poll_work(group, 1); + if (prev->pid) { + psi_flags_change(prev, TSK_ONCPU, 0); - if (wake_clock && !delayed_work_pending(&group->avgs_work)) - schedule_delayed_work(&group->avgs_work, PSI_FREQ); + iter = NULL; + while ((group = iterate_groups(prev, &iter)) && group != common) + psi_group_change(group, cpu, TSK_ONCPU, 0, true); } } @@ -818,17 +869,17 @@ void psi_memstall_enter(unsigned long *flags) if (static_branch_likely(&psi_disabled)) return; - *flags = current->flags & PF_MEMSTALL; + *flags = current->in_memstall; if (*flags) return; /* - * PF_MEMSTALL setting & accounting needs to be atomic wrt + * in_memstall setting & accounting needs to be atomic wrt * changes to the task's scheduling state, otherwise we can * race with CPU migration. */ rq = this_rq_lock_irq(&rf); - current->flags |= PF_MEMSTALL; + current->in_memstall = 1; psi_task_change(current, 0, TSK_MEMSTALL); rq_unlock_irq(rq, &rf); @@ -851,13 +902,13 @@ void psi_memstall_leave(unsigned long *flags) if (*flags) return; /* - * PF_MEMSTALL clearing & accounting needs to be atomic wrt + * in_memstall clearing & accounting needs to be atomic wrt * changes to the task's scheduling state, otherwise we could * race with CPU migration. */ rq = this_rq_lock_irq(&rf); - current->flags &= ~PF_MEMSTALL; + current->in_memstall = 0; psi_task_change(current, TSK_MEMSTALL, 0); rq_unlock_irq(rq, &rf); @@ -916,12 +967,14 @@ void cgroup_move_task(struct task_struct *task, struct css_set *to) rq = task_rq_lock(task, &rf); - if (task_on_rq_queued(task)) + if (task_on_rq_queued(task)) { task_flags = TSK_RUNNING; - else if (task->in_iowait) + if (task_current(rq, task)) + task_flags |= TSK_ONCPU; + } else if (task->in_iowait) task_flags = TSK_IOWAIT; - if (task->flags & PF_MEMSTALL) + if (task->in_memstall) task_flags |= TSK_MEMSTALL; if (task_flags) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 73396704a28d..39a1498da825 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -88,6 +88,7 @@ struct sched_cluster { int notifier_sent; bool wake_up_idle; u64 aggr_grp_load; + u64 coloc_boost_load; }; extern unsigned int sched_disable_window_stats; @@ -324,8 +325,9 @@ struct cfs_bandwidth { u64 quota, runtime; s64 hierarchical_quota; u64 runtime_expires; + int expires_seq; - int idle, period_active; + short idle, period_active; struct hrtimer period_timer, slack_timer; struct list_head throttled_cfs_rq; @@ -532,6 +534,7 @@ struct cfs_rq { #endif int runtime_enabled; + int expires_seq; u64 runtime_expires; s64 runtime_remaining; @@ -2333,8 +2336,13 @@ static inline u64 irq_time_read(int cpu) #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ #ifdef CONFIG_SCHED_WALT +u64 sched_ktime_clock(void); void note_task_waking(struct task_struct *p, u64 wallclock); #else /* CONFIG_SCHED_WALT */ +static inline u64 sched_ktime_clock(void) +{ + return 0; +} static inline void note_task_waking(struct task_struct *p, u64 wallclock) { } #endif /* CONFIG_SCHED_WALT */ @@ -2370,7 +2378,7 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data, cpu_of(rq))); if (data) - data->func(data, ktime_get_ns(), flags); + data->func(data, sched_ktime_clock(), flags); } static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) @@ -2511,7 +2519,11 @@ static inline void __update_min_max_capacity(void) int i; int max_cap = 0, min_cap = INT_MAX; - for_each_online_cpu(i) { + for_each_possible_cpu(i) { + + if (!cpu_active(i)) + continue; + max_cap = max(max_cap, cpu_capacity(i)); min_cap = min(min_cap, cpu_capacity(i)); } @@ -2776,6 +2788,33 @@ static inline unsigned int power_cost(int cpu, bool max) } extern void walt_sched_energy_populated_callback(void); +extern void walt_update_min_max_capacity(void); + +static inline enum sched_boost_policy task_boost_policy(struct task_struct *p) +{ + enum sched_boost_policy boost_on_big = task_sched_boost(p) ? + sched_boost_policy() : SCHED_BOOST_NONE; + + if (boost_on_big) { + /* + * Filter out tasks less than min task util threshold + * under conservative boost. + */ + if (sysctl_sched_boost == CONSERVATIVE_BOOST && + task_util(p) <= + sysctl_sched_min_task_util_for_boost_colocation) + boost_on_big = SCHED_BOOST_NONE; + } + + return boost_on_big; +} + +extern void walt_map_freq_to_load(void); + +static inline bool is_min_capacity_cluster(struct sched_cluster *cluster) +{ + return is_min_capacity_cpu(cluster_first_cpu(cluster)); +} #else /* CONFIG_SCHED_WALT */ @@ -2787,7 +2826,10 @@ static inline bool task_sched_boost(struct task_struct *p) { return true; } - +static inline enum sched_boost_policy task_boost_policy(struct task_struct *p) +{ + return SCHED_BOOST_NONE; +} static inline void check_for_migration(struct rq *rq, struct task_struct *p) { } static inline int sched_boost(void) @@ -2909,7 +2951,9 @@ static inline unsigned int power_cost(int cpu, bool max) } static inline void walt_sched_energy_populated_callback(void) { } +static inline void walt_update_min_max_capacity(void) { } +static inline void walt_map_freq_to_load(void) { } #endif /* CONFIG_SCHED_WALT */ static inline bool energy_aware(void) @@ -2932,3 +2976,13 @@ find_first_cpu_bit(struct task_struct *p, const cpumask_t *search_cpus, #else #define find_first_cpu_bit(...) -1 #endif + +#ifdef CONFIG_SMP +static inline void sched_irq_work_queue(struct irq_work *work) +{ + if (likely(cpu_online(raw_smp_processor_id()))) + irq_work_queue(work); + else + irq_work_queue_on(work, cpumask_any(cpu_online_mask)); +} +#endif diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 3c0d8f133eb3..546b9ebd47c5 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -69,7 +69,7 @@ static inline void psi_enqueue(struct task_struct *p, bool wakeup) return; if (!wakeup || p->sched_psi_wake_requeue) { - if (p->flags & PF_MEMSTALL) + if (p->in_memstall) set |= TSK_MEMSTALL; if (p->sched_psi_wake_requeue) p->sched_psi_wake_requeue = 0; @@ -89,9 +89,17 @@ static inline void psi_dequeue(struct task_struct *p, bool sleep) return; if (!sleep) { - if (p->flags & PF_MEMSTALL) + if (p->in_memstall) clear |= TSK_MEMSTALL; } else { + /* + * When a task sleeps, schedule() dequeues it before + * switching to the next one. Merge the clearing of + * TSK_RUNNING and TSK_ONCPU to save an unnecessary + * psi_task_change() call in psi_sched_switch(). + */ + clear |= TSK_ONCPU; + if (p->in_iowait) set |= TSK_IOWAIT; } @@ -108,14 +116,14 @@ static inline void psi_ttwu_dequeue(struct task_struct *p) * deregister its sleep-persistent psi states from the old * queue, and let psi_enqueue() know it has to requeue. */ - if (unlikely(p->in_iowait || (p->flags & PF_MEMSTALL))) { + if (unlikely(p->in_iowait || p->in_memstall)) { struct rq_flags rf; struct rq *rq; int clear = 0; if (p->in_iowait) clear |= TSK_IOWAIT; - if (p->flags & PF_MEMSTALL) + if (p->in_memstall) clear |= TSK_MEMSTALL; rq = __task_rq_lock(p, &rf); @@ -125,18 +133,31 @@ static inline void psi_ttwu_dequeue(struct task_struct *p) } } +static inline void psi_sched_switch(struct task_struct *prev, + struct task_struct *next, + bool sleep) +{ + if (static_branch_likely(&psi_disabled)) + return; + + psi_task_switch(prev, next, sleep); +} + static inline void psi_task_tick(struct rq *rq) { if (static_branch_likely(&psi_disabled)) return; - if (unlikely(rq->curr->flags & PF_MEMSTALL)) + if (unlikely(rq->curr->in_memstall)) psi_memstall_tick(rq->curr, cpu_of(rq)); } #else /* CONFIG_PSI */ static inline void psi_enqueue(struct task_struct *p, bool wakeup) {} static inline void psi_dequeue(struct task_struct *p, bool sleep) {} static inline void psi_ttwu_dequeue(struct task_struct *p) {} +static inline void psi_sched_switch(struct task_struct *prev, + struct task_struct *next, + bool sleep) {} static inline void psi_task_tick(struct rq *rq) {} #endif /* CONFIG_PSI */ diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c index 7bbb6fdebf36..500eccf4adc7 100644 --- a/kernel/sched/walt.c +++ b/kernel/sched/walt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2016-2019, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -19,6 +19,7 @@ * and Todd Kjos */ +#include #include #include #include @@ -41,6 +42,8 @@ const char *migrate_type_names[] = {"GROUP_TO_RQ", "RQ_TO_GROUP", #define EARLY_DETECTION_DURATION 9500000 +static ktime_t ktime_last; +static bool sched_ktime_suspended; static struct cpu_cycle_counter_cb cpu_cycle_counter_cb; static bool use_cycle_counter; DEFINE_MUTEX(cluster_lock); @@ -50,6 +53,37 @@ u64 walt_load_reported_window; static struct irq_work walt_cpufreq_irq_work; static struct irq_work walt_migration_irq_work; +u64 sched_ktime_clock(void) +{ + if (unlikely(sched_ktime_suspended)) + return ktime_to_ns(ktime_last); + return ktime_get_ns(); +} + +static void sched_resume(void) +{ + sched_ktime_suspended = false; +} + +static int sched_suspend(void) +{ + ktime_last = ktime_get(); + sched_ktime_suspended = true; + return 0; +} + +static struct syscore_ops sched_syscore_ops = { + .resume = sched_resume, + .suspend = sched_suspend +}; + +static int __init sched_init_ops(void) +{ + register_syscore_ops(&sched_syscore_ops); + return 0; +} +late_initcall(sched_init_ops); + static void acquire_rq_locks_irqsave(const cpumask_t *cpus, unsigned long *flags) { @@ -275,12 +309,7 @@ update_window_start(struct rq *rq, u64 wallclock, int event) u64 old_window_start = rq->window_start; delta = wallclock - rq->window_start; - /* If the MPM global timer is cleared, set delta as 0 to avoid kernel BUG happening */ - if (delta < 0) { - delta = 0; - WARN_ONCE(1, "WALT wallclock appears to have gone backwards or reset\n"); - } - + BUG_ON(delta < 0); if (delta < sched_ravg_window) return old_window_start; @@ -294,14 +323,19 @@ update_window_start(struct rq *rq, u64 wallclock, int event) int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb) { + unsigned long flags; + mutex_lock(&cluster_lock); if (!cb->get_cpu_cycle_counter) { mutex_unlock(&cluster_lock); return -EINVAL; } + acquire_rq_locks_irqsave(cpu_possible_mask, &flags); cpu_cycle_counter_cb = *cb; use_cycle_counter = true; + release_rq_locks_irqrestore(cpu_possible_mask, &flags); + mutex_unlock(&cluster_lock); return 0; @@ -368,10 +402,17 @@ void sched_account_irqstart(int cpu, struct task_struct *curr, u64 wallclock) if (!rq->window_start || sched_disable_window_stats) return; + /* + * We don’t have to note down an irqstart event when cycle + * counter is not used. + */ + if (!use_cycle_counter) + return; + if (is_idle_task(curr)) { /* We're here without rq->lock held, IRQ disabled */ raw_spin_lock(&rq->lock); - update_task_cpu_cycles(curr, cpu, ktime_get_ns()); + update_task_cpu_cycles(curr, cpu, sched_ktime_clock()); raw_spin_unlock(&rq->lock); } } @@ -432,7 +473,7 @@ void sched_account_irqtime(int cpu, struct task_struct *curr, cur_jiffies_ts = get_jiffies_64(); if (is_idle_task(curr)) - update_task_ravg(curr, rq, IRQ_UPDATE, ktime_get_ns(), + update_task_ravg(curr, rq, IRQ_UPDATE, sched_ktime_clock(), delta); nr_windows = cur_jiffies_ts - rq->irqload_ts; @@ -486,6 +527,7 @@ u64 freq_policy_load(struct rq *rq) struct sched_cluster *cluster = rq->cluster; u64 aggr_grp_load = cluster->aggr_grp_load; u64 load, tt_load = 0; + u64 coloc_boost_load = cluster->coloc_boost_load; if (rq->ed_task != NULL) { load = sched_ravg_window; @@ -497,6 +539,9 @@ u64 freq_policy_load(struct rq *rq) else load = rq->prev_runnable_sum + rq->grp_time.prev_runnable_sum; + if (coloc_boost_load) + load = max_t(u64, load, coloc_boost_load); + tt_load = top_task_load(rq); switch (reporting_policy) { case FREQ_REPORT_MAX_CPU_LOAD_TOP_TASK: @@ -513,7 +558,9 @@ u64 freq_policy_load(struct rq *rq) done: trace_sched_load_to_gov(rq, aggr_grp_load, tt_load, freq_aggr_thresh, - load, reporting_policy, walt_rotation_enabled); + load, reporting_policy, walt_rotation_enabled, + sysctl_sched_little_cluster_coloc_fmin_khz, + coloc_boost_load); return load; } @@ -765,7 +812,7 @@ void fixup_busy_time(struct task_struct *p, int new_cpu) if (sched_disable_window_stats) goto done; - wallclock = ktime_get_ns(); + wallclock = sched_ktime_clock(); update_task_ravg(task_rq(p)->curr, task_rq(p), TASK_UPDATE, @@ -844,7 +891,7 @@ void fixup_busy_time(struct task_struct *p, int new_cpu) if (!same_freq_domain(new_cpu, task_cpu(p))) { src_rq->notif_pending = true; dest_rq->notif_pending = true; - irq_work_queue(&walt_migration_irq_work); + sched_irq_work_queue(&walt_migration_irq_work); } if (p == src_rq->ed_task) { @@ -1907,7 +1954,7 @@ static inline void run_walt_irq_work(u64 old_window_start, struct rq *rq) result = atomic64_cmpxchg(&walt_irq_work_lastq_ws, old_window_start, rq->window_start); if (result == old_window_start) - irq_work_queue(&walt_cpufreq_irq_work); + sched_irq_work_queue(&walt_cpufreq_irq_work); } /* Reflect task activity on its demand and cpu's busy time statistics */ @@ -1963,7 +2010,7 @@ int sched_set_init_task_load(struct task_struct *p, int init_load_pct) return 0; } -void init_new_task_load(struct task_struct *p, bool idle_task) +void init_new_task_load(struct task_struct *p) { int i; u32 init_load_windows; @@ -1981,9 +2028,6 @@ void init_new_task_load(struct task_struct *p, bool idle_task) /* Don't have much choice. CPU frequency would be bogus */ BUG_ON(!p->ravg.curr_window_cpu || !p->ravg.prev_window_cpu); - if (idle_task) - return; - if (current->init_load_pct) init_load_pct = current->init_load_pct; else @@ -2052,7 +2096,7 @@ void mark_task_starting(struct task_struct *p) return; } - wallclock = ktime_get_ns(); + wallclock = sched_ktime_clock(); p->ravg.mark_start = p->last_wake_ts = wallclock; p->last_enqueued_ts = wallclock; p->last_switch_out_ts = 0; @@ -2150,7 +2194,7 @@ static int compute_max_possible_capacity(struct sched_cluster *cluster) return capacity; } -static void update_min_max_capacity(void) +void walt_update_min_max_capacity(void) { unsigned long flags; @@ -2350,6 +2394,7 @@ struct sched_cluster init_cluster = { .notifier_sent = 0, .wake_up_idle = 0, .aggr_grp_load = 0, + .coloc_boost_load = 0, }; void init_clusters(void) @@ -2376,7 +2421,7 @@ static int cpufreq_notifier_policy(struct notifier_block *nb, return 0; if (val == CPUFREQ_REMOVE_POLICY || val == CPUFREQ_CREATE_POLICY) { - update_min_max_capacity(); + walt_update_min_max_capacity(); return 0; } @@ -2454,7 +2499,7 @@ static int cpufreq_notifier_trans(struct notifier_block *nb, raw_spin_lock_irqsave(&rq->lock, flags); update_task_ravg(rq->curr, rq, TASK_UPDATE, - ktime_get_ns(), 0); + sched_ktime_clock(), 0); raw_spin_unlock_irqrestore(&rq->lock, flags); } } @@ -2592,7 +2637,6 @@ static void _set_preferred_cluster(struct related_thread_group *grp) { struct task_struct *p; u64 combined_demand = 0; - bool boost_on_big = sched_boost_policy() == SCHED_BOOST_ON_BIG; bool group_boost = false; u64 wallclock; @@ -2604,7 +2648,7 @@ static void _set_preferred_cluster(struct related_thread_group *grp) return; } - wallclock = ktime_get_ns(); + wallclock = sched_ktime_clock(); /* * wakeup of two or more related tasks could race with each other and @@ -2616,7 +2660,7 @@ static void _set_preferred_cluster(struct related_thread_group *grp) return; list_for_each_entry(p, &grp->tasks, grp_list) { - if (boost_on_big && task_sched_boost(p)) { + if (task_boost_policy(p) == SCHED_BOOST_ON_BIG) { group_boost = true; break; } @@ -2631,7 +2675,7 @@ static void _set_preferred_cluster(struct related_thread_group *grp) grp->preferred_cluster = best_cluster(grp, combined_demand, group_boost); - grp->last_update = ktime_get_ns(); + grp->last_update = sched_ktime_clock(); trace_sched_set_preferred_cluster(grp, combined_demand); } @@ -2655,7 +2699,7 @@ int update_preferred_cluster(struct related_thread_group *grp, * has passed since we last updated preference */ if (abs(new_load - old_load) > sched_ravg_window / 4 || - ktime_get_ns() - grp->last_update > sched_ravg_window) + sched_ktime_clock() - grp->last_update > sched_ravg_window) return 1; return 0; @@ -3038,7 +3082,7 @@ static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp, bool new_task; int i; - wallclock = ktime_get_ns(); + wallclock = sched_ktime_clock(); update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0); update_task_ravg(p, rq, TASK_UPDATE, wallclock, 0); @@ -3126,6 +3170,70 @@ static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp, BUG_ON((s64)*src_nt_prev_runnable_sum < 0); } +unsigned int sysctl_sched_little_cluster_coloc_fmin_khz; +static u64 coloc_boost_load; + +void walt_map_freq_to_load(void) +{ + struct sched_cluster *cluster; + + for_each_sched_cluster(cluster) { + if (is_min_capacity_cluster(cluster)) { + int fcpu = cluster_first_cpu(cluster); + + coloc_boost_load = div64_u64( + ((u64)sched_ravg_window * + arch_scale_cpu_capacity(NULL, fcpu) * + sysctl_sched_little_cluster_coloc_fmin_khz), + (u64)1024 * cpu_max_possible_freq(fcpu)); + coloc_boost_load = div64_u64(coloc_boost_load << 2, 5); + break; + } + } +} + +static void walt_update_coloc_boost_load(void) +{ + struct related_thread_group *grp; + struct sched_cluster *cluster; + + if (!sysctl_sched_little_cluster_coloc_fmin_khz || + sysctl_sched_boost == CONSERVATIVE_BOOST) + return; + + grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID); + if (!grp || !grp->preferred_cluster || + is_min_capacity_cluster(grp->preferred_cluster)) + return; + + for_each_sched_cluster(cluster) { + if (is_min_capacity_cluster(cluster)) { + cluster->coloc_boost_load = coloc_boost_load; + break; + } + } +} + +int sched_little_cluster_coloc_fmin_khz_handler(struct ctl_table *table, + int write, void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret; + static DEFINE_MUTEX(mutex); + + mutex_lock(&mutex); + + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (ret || !write) + goto done; + + walt_map_freq_to_load(); + +done: + mutex_unlock(&mutex); + return ret; +} + /* * Runs in hard-irq context. This should ideally run just after the latest * window roll-over. @@ -3135,7 +3243,7 @@ void walt_irq_work(struct irq_work *irq_work) struct sched_cluster *cluster; struct rq *rq; int cpu; - u64 wc; + u64 wc, total_grp_load = 0; bool is_migration = false; int level = 0; @@ -3151,8 +3259,9 @@ void walt_irq_work(struct irq_work *irq_work) level++; } - wc = ktime_get_ns(); + wc = sched_ktime_clock(); walt_load_reported_window = atomic64_read(&walt_irq_work_lastq_ws); + for_each_sched_cluster(cluster) { u64 aggr_grp_load = 0; @@ -3169,10 +3278,15 @@ void walt_irq_work(struct irq_work *irq_work) } cluster->aggr_grp_load = aggr_grp_load; + total_grp_load = aggr_grp_load; + cluster->coloc_boost_load = 0; raw_spin_unlock(&cluster->load_lock); } + if (total_grp_load) + walt_update_coloc_boost_load(); + for_each_sched_cluster(cluster) { for_each_cpu(cpu, &cluster->cpus) { int nflag = 0; diff --git a/kernel/sched/walt.h b/kernel/sched/walt.h index be06e7d40888..7d505c300015 100644 --- a/kernel/sched/walt.h +++ b/kernel/sched/walt.h @@ -150,7 +150,7 @@ extern void fixup_walt_sched_stats_common(struct rq *rq, struct task_struct *p, extern void inc_rq_walt_stats(struct rq *rq, struct task_struct *p); extern void dec_rq_walt_stats(struct rq *rq, struct task_struct *p); extern void fixup_busy_time(struct task_struct *p, int new_cpu); -extern void init_new_task_load(struct task_struct *p, bool idle_task); +extern void init_new_task_load(struct task_struct *p); extern void mark_task_starting(struct task_struct *p); extern void set_window_start(struct rq *rq); void account_irqtime(int cpu, struct task_struct *curr, u64 delta, @@ -292,7 +292,7 @@ static inline int walt_start_cpu(int prev_cpu) static inline void walt_update_last_enqueue(struct task_struct *p) { - p->last_enqueued_ts = ktime_get_ns(); + p->last_enqueued_ts = sched_ktime_clock(); } extern void walt_rotate_work_init(void); extern void walt_rotation_checkpoint(int nr_big); @@ -337,7 +337,7 @@ static inline void walt_dec_cumulative_runnable_avg(struct rq *rq, } static inline void fixup_busy_time(struct task_struct *p, int new_cpu) { } -static inline void init_new_task_load(struct task_struct *p, bool idle_task) +static inline void init_new_task_load(struct task_struct *p) { } diff --git a/kernel/signal.c b/kernel/signal.c index 13f8cff95156..4d0f34170cf0 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -36,6 +36,8 @@ #include #include #include +#include +#include #define CREATE_TRACE_POINTS #include @@ -1327,8 +1329,11 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) ret = check_kill_permission(sig, info, p); rcu_read_unlock(); - if (!ret && sig) + if (!ret && sig) { ret = do_send_sig_info(sig, info, p, true); + if (capable(CAP_KILL) && sig == SIGKILL) + add_to_oom_reaper(p); + } return ret; } diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 10c5a3a8d638..abf30efca6dd 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -31,7 +31,7 @@ struct task_struct *idle_thread_get(unsigned int cpu) if (!tsk) return ERR_PTR(-ENOMEM); - init_idle(tsk, cpu, true); + init_idle(tsk, cpu); return tsk; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index bce3254b37dc..a5f438dca4c6 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -130,8 +130,9 @@ static unsigned long zero_ul; static unsigned long one_ul = 1; static unsigned long long_max = LONG_MAX; static int one_hundred = 100; -#ifdef CONFIG_PERF_EVENTS -static int one_thousand = 1000; +static int __maybe_unused one_thousand = 1000; +#ifdef CONFIG_SCHED_WALT +static int two_million = 2000000; #endif #ifdef CONFIG_PRINTK static int ten_thousand = 10000; @@ -390,6 +391,24 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "sched_min_task_util_for_boost_colocation", + .data = &sysctl_sched_min_task_util_for_boost_colocation, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one_thousand, + }, + { + .procname = "sched_little_cluster_coloc_fmin_khz", + .data = &sysctl_sched_little_cluster_coloc_fmin_khz, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sched_little_cluster_coloc_fmin_khz_handler, + .extra1 = &zero, + .extra2 = &two_million, + }, #endif { .procname = "sched_upmigrate", @@ -1441,6 +1460,13 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "reap_mem_on_sigkill", + .data = &sysctl_reap_mem_on_sigkill, + .maxlen = sizeof(sysctl_reap_mem_on_sigkill), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "overcommit_ratio", .data = &sysctl_overcommit_ratio, diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index e14fe2780e90..ec631f02ac0b 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -59,7 +59,7 @@ static void alarmtimer_triggered_func(void *p) if (!(rtc->irq_data & RTC_AF)) return; - __pm_wakeup_event(ws, 2 * MSEC_PER_SEC); + __pm_wakeup_event(ws, MSEC_PER_SEC / 2); } static struct rtc_task alarmtimer_rtc_task = { @@ -279,8 +279,8 @@ static int alarmtimer_suspend(struct device *dev) if (min.tv64 == 0) return 0; - if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) { - __pm_wakeup_event(ws, 2 * MSEC_PER_SEC); + if (ktime_to_ns(min) < NSEC_PER_SEC / 2) { + __pm_wakeup_event(ws, MSEC_PER_SEC / 2); return -EBUSY; } @@ -293,7 +293,7 @@ static int alarmtimer_suspend(struct device *dev) /* Set alarm, if in the past reject suspend briefly to handle */ ret = rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0)); if (ret < 0) - __pm_wakeup_event(ws, MSEC_PER_SEC); + __pm_wakeup_event(ws, MSEC_PER_SEC / 2); return ret; } diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 6ecf6b0541de..b0f4d6b6e454 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -32,7 +32,6 @@ */ #include -#include #include #include #include @@ -50,6 +49,7 @@ #include #include #include +#include #include @@ -150,6 +150,7 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags); } cpu_relax(); + ndelay(TIMER_LOCK_TIGHT_LOOP_DELAY_NS); } } @@ -1046,7 +1047,8 @@ int hrtimer_cancel(struct hrtimer *timer) if (ret >= 0) return ret; - udelay(1); + cpu_relax(); + ndelay(TIMER_LOCK_TIGHT_LOOP_DELAY_NS); } } EXPORT_SYMBOL_GPL(hrtimer_cancel); diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 4bdb59604526..fce8ea4a5c94 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -559,7 +559,7 @@ static void sync_cmos_clock(struct work_struct *work) if (!fail || fail == -ENODEV) next.tv_sec = 659; else - next.tv_sec = 0; + next.tv_sec = 10; if (next.tv_nsec >= NSEC_PER_SEC) { next.tv_sec++; diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 4776585d785a..f4a20bd1544f 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -285,7 +285,7 @@ int sched_clock_suspend(void) suspend_ns = rd->epoch_ns; suspend_cycles = rd->epoch_cyc; - pr_info("suspend ns:%17llu suspend cycles:%17llu\n", + pr_debug("suspend ns:%17llu suspend cycles:%17llu\n", rd->epoch_ns, rd->epoch_cyc); hrtimer_cancel(&sched_clock_timer); rd->read_sched_clock = suspended_sched_clock_read; @@ -299,7 +299,7 @@ void sched_clock_resume(void) rd->epoch_cyc = cd.actual_read_sched_clock(); resume_cycles = rd->epoch_cyc; - pr_info("resume cycles:%17llu\n", rd->epoch_cyc); + pr_debug("resume cycles:%17llu\n", rd->epoch_cyc); hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); rd->read_sched_clock = cd.actual_read_sched_clock; } diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index f738251000fe..0059764b8dc7 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -165,3 +165,4 @@ DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem); void timer_clear_idle(void); +#define TIMER_LOCK_TIGHT_LOOP_DELAY_NS 350 diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 8f482de0adc0..70a5cec48520 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -822,6 +823,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, nohz_balance_enter_idle(cpu); calc_load_enter_idle(); cpu_load_update_nohz_start(); + quiet_vmstat(); ts->last_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; @@ -947,8 +949,6 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts) ktime_t now, expires; int cpu = smp_processor_id(); - now = tick_nohz_start_idle(ts); - #ifdef CONFIG_SMP if (check_pending_deferrable_timers(cpu)) raise_softirq_irqoff(TIMER_SOFTIRQ); @@ -957,6 +957,7 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts) if (can_stop_idle_tick(cpu, ts)) { int was_stopped = ts->tick_stopped; + now = tick_nohz_start_idle(ts); ts->idle_calls++; expires = tick_nohz_stop_sched_tick(ts, now, cpu); diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c index 38bc4d2208e8..8c73276a2d55 100644 --- a/kernel/time/timekeeping_debug.c +++ b/kernel/time/timekeeping_debug.c @@ -75,7 +75,7 @@ void tk_debug_account_sleep_time(struct timespec64 *t) int bin = min(fls(t->tv_sec), NUM_BINS-1); sleep_time_bin[bin]++; - printk_deferred(KERN_INFO "Suspended for %lld.%03lu seconds\n", + pr_debug(KERN_INFO "Suspended for %lld.%03lu seconds\n", (s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC); } diff --git a/kernel/time/timer.c b/kernel/time/timer.c index cc40addad625..4d15d91b194d 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -212,7 +212,7 @@ struct timer_base timer_base_deferrable; static atomic_t deferrable_pending; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -unsigned int sysctl_timer_migration = 1; +unsigned int sysctl_timer_migration = 0; void timers_update_migration(bool update_nohz) { @@ -943,6 +943,7 @@ static struct timer_base *lock_timer_base(struct timer_list *timer, spin_unlock_irqrestore(&base->lock, *flags); } cpu_relax(); + ndelay(TIMER_LOCK_TIGHT_LOOP_DELAY_NS); } } @@ -1264,7 +1265,8 @@ int del_timer_sync(struct timer_list *timer) int ret = try_to_del_timer_sync(timer); if (ret >= 0) return ret; - udelay(1); + cpu_relax(); + ndelay(TIMER_LOCK_TIGHT_LOOP_DELAY_NS); } } EXPORT_SYMBOL(del_timer_sync); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index ac67f2196904..3420a626f96e 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -135,6 +135,20 @@ config GENERIC_TRACER bool select TRACING +if TRACING + +config DISABLE_TRACE_PRINTK + bool "Force disable trace_printk() usage" + default y + help + When trace_printk() is used in any of the kernel source, it enables + debugging functions which are not desired for production kernel. + Enabling this option will replace trace_printk() with pr_debug(). + + If in doubt, say Y. + +endif + # # Minimum requirements an architecture has to meet for us to # be able to offer generic tracing facilities: @@ -165,6 +179,7 @@ config FUNCTION_TRACER select KALLSYMS select GENERIC_TRACER select CONTEXT_SWITCH_TRACER + default n help Enable the kernel to trace every kernel function. This is done by using a compiler feature to insert a small, 5-byte No-Operation @@ -181,6 +196,7 @@ config FUNCTION_GRAPH_TRACER depends on !X86_32 || !CC_OPTIMIZE_FOR_SIZE depends on ROP_PROTECTION_NONE default y + default n help Enable the kernel to trace a function at both its return and its entry. @@ -429,6 +445,7 @@ config STACK_TRACER select FUNCTION_TRACER select STACKTRACE select KALLSYMS + default n help This special tracer records the maximum stack footprint of the kernel and displays it in /sys/kernel/debug/tracing/stack_trace. @@ -522,7 +539,7 @@ config DYNAMIC_FTRACE bool "enable/disable function tracing dynamically" depends on FUNCTION_TRACER depends on HAVE_DYNAMIC_FTRACE - default y + default n help This option will modify all the calls to function tracing dynamically (will patch them out of the binary image and diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index b4f8e1691fdc..1ebc359dbc21 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -1,16 +1,16 @@ # Do not instrument the tracer itself: -ifdef CONFIG_FUNCTION_TRACER -ORIG_CFLAGS := $(KBUILD_CFLAGS) -KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS)) +#ifdef CONFIG_FUNCTION_TRACER +#ORIG_CFLAGS := $(KBUILD_CFLAGS) +#KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS)) -ifdef CONFIG_FTRACE_SELFTEST +#ifdef CONFIG_FTRACE_SELFTEST # selftest needs instrumentation -CFLAGS_trace_selftest_dynamic.o = $(CC_FLAGS_FTRACE) -obj-y += trace_selftest_dynamic.o -endif -endif +#CFLAGS_trace_selftest_dynamic.o = $(CC_FLAGS_FTRACE) +#obj-y += trace_selftest_dynamic.o +#endif +#endif # If unlikely tracing is enabled, do not trace these files ifdef CONFIG_TRACING_BRANCHES diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 90e15f86d59c..de73ba5564f1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -297,11 +297,11 @@ static inline struct trace_array *top_trace_array(void) __builtin_types_compatible_p(typeof(var), type *) #undef IF_ASSIGN -#define IF_ASSIGN(var, entry, etype, id) \ - if (FTRACE_CMP_TYPE(var, etype)) { \ - var = (typeof(var))(entry); \ - WARN_ON(id && (entry)->type != id); \ - break; \ +#define IF_ASSIGN(var, entry, etype, id) \ + if (FTRACE_CMP_TYPE(var, etype)) { \ + var = (typeof(var))(entry); \ + WARN_ON(id != 0 && (entry)->type != id); \ + break; \ } /* Will cause compile errors if type is not found. */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 134200a12196..b91893080cdb 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -292,7 +292,7 @@ module_param_named(disable_numa, wq_disable_numa, bool, 0444); /* see the comment above the definition of WQ_POWER_EFFICIENT */ static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT); -module_param_named(power_efficient, wq_power_efficient, bool, 0444); +module_param_named(power_efficient, wq_power_efficient, bool, 0644); bool wq_online; /* can kworkers be created yet? */ @@ -4424,7 +4424,7 @@ static void show_pwq(struct pool_workqueue *pwq) bool has_in_flight = false, has_pending = false; int bkt; - pr_info(" pwq %d:", pool->id); + pr_debug(" pwq %d:", pool->id); pr_cont_pool_info(pool); pr_cont(" active=%d/%d refcnt=%d%s\n", @@ -4440,7 +4440,7 @@ static void show_pwq(struct pool_workqueue *pwq) if (has_in_flight) { bool comma = false; - pr_info(" in-flight:"); + pr_debug(" in-flight:"); hash_for_each(pool->busy_hash, bkt, worker, hentry) { if (worker->current_pwq != pwq) continue; @@ -4465,7 +4465,7 @@ static void show_pwq(struct pool_workqueue *pwq) if (has_pending) { bool comma = false; - pr_info(" pending:"); + pr_debug(" pending:"); list_for_each_entry(work, &pool->worklist, entry) { if (get_work_pwq(work) != pwq) continue; @@ -4479,7 +4479,7 @@ static void show_pwq(struct pool_workqueue *pwq) if (!list_empty(&pwq->delayed_works)) { bool comma = false; - pr_info(" delayed:"); + pr_debug(" delayed:"); list_for_each_entry(work, &pwq->delayed_works, entry) { pr_cont_work(comma, work); comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED); @@ -4503,7 +4503,7 @@ void show_workqueue_state(void) rcu_read_lock_sched(); - pr_info("Showing busy workqueues and worker pools:\n"); + pr_debug("Showing busy workqueues and worker pools:\n"); list_for_each_entry_rcu(wq, &workqueues, list) { struct pool_workqueue *pwq; @@ -4518,7 +4518,7 @@ void show_workqueue_state(void) if (idle) continue; - pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags); + pr_debug("workqueue %s: flags=0x%x\n", wq->name, wq->flags); for_each_pwq(pwq, wq) { spin_lock_irqsave(&pwq->pool->lock, flags); @@ -4542,7 +4542,7 @@ void show_workqueue_state(void) if (pool->nr_workers == pool->nr_idle) goto next_pool; - pr_info("pool %d:", pool->id); + pr_debug("pool %d:", pool->id); pr_cont_pool_info(pool); pr_cont(" hung=%us workers=%d", jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000, @@ -5560,7 +5560,7 @@ static void __init wq_numa_init(void) return; if (wq_disable_numa) { - pr_info("workqueue: NUMA affinity support disabled\n"); + pr_debug("workqueue: NUMA affinity support disabled\n"); return; } diff --git a/lib/crc32.c b/lib/crc32.c index 0d450462b0bd..7db4034acc69 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -182,21 +182,21 @@ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, } #if CRC_LE_BITS == 1 -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, NULL, CRCPOLY_LE); } -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE); } #else -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, (const u32 (*)[256])crc32table_le, CRCPOLY_LE); } -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, (const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE); @@ -205,6 +205,9 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) EXPORT_SYMBOL(crc32_le); EXPORT_SYMBOL(__crc32c_le); +u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le); +u32 __pure __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le); + /* * This multiplies the polynomials x and y modulo the given modulus. * This follows the "little-endian" CRC convention that the lsbit diff --git a/lib/debug_locks.c b/lib/debug_locks.c index 124fdf238b3d..73b861e03d4b 100644 --- a/lib/debug_locks.c +++ b/lib/debug_locks.c @@ -21,7 +21,7 @@ * that would just muddy the log. So we report the first one and * shut up after that. */ -int debug_locks = 1; +int debug_locks = 0; EXPORT_SYMBOL_GPL(debug_locks); /* diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c index 6d35274170bc..40209fe61eef 100644 --- a/lib/int_sqrt.c +++ b/lib/int_sqrt.c @@ -15,25 +15,33 @@ * * A very rough approximation to the sqrt() function. */ -unsigned long int_sqrt(unsigned long x) +inline unsigned long int_sqrt(unsigned long x) { - unsigned long b, m, y = 0; + register unsigned long tmp; + register unsigned long place; + register unsigned long root = 0; if (x <= 1) return x; - m = 1UL << (__fls(x) & ~1UL); - while (m != 0) { - b = y + m; - y >>= 1; + place = 1UL << (BITS_PER_LONG - 2); - if (x >= b) { - x -= b; - y += m; + do{ + place >>= 2; + }while(place > x); + + do { + tmp = root + place; + root >>= 1; + + if (x >= tmp) + { + x -= tmp; + root += place; } - m >>= 2; - } + place >>= 2; + }while (place != 0); - return y; + return root; } EXPORT_SYMBOL(int_sqrt); diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c index 668400bd38ee..90bb67994688 100644 --- a/lib/lz4/lz4_compress.c +++ b/lib/lz4/lz4_compress.c @@ -446,7 +446,7 @@ static FORCE_INLINE int LZ4_compress_generic( *op++ = (BYTE)(lastRun << ML_BITS); } - memcpy(op, anchor, lastRun); + LZ4_memcpy(op, anchor, lastRun); op += lastRun; } @@ -708,7 +708,7 @@ static int LZ4_compress_destSize_generic( } else { *op++ = (BYTE)(lastRunSize< decode everything */ - if ((partialDecoding) && (oexit > oend - MFLIMIT)) - oexit = oend - MFLIMIT; + assert(lowPrefix <= op); + assert(src != NULL); /* Empty output buffer */ if ((endOnInput) && (unlikely(outputSize == 0))) - return ((inputSize == 1) && (*ip == 0)) ? 0 : -1; + return ((srcSize == 1) && (*ip == 0)) ? 0 : -1; if ((!endOnInput) && (unlikely(outputSize == 0))) return (*ip == 0 ? 1 : -1); + if ((endOnInput) && unlikely(srcSize == 0)) + return -1; + /* Main Loop : decode sequences */ while (1) { size_t length; @@ -111,12 +125,77 @@ static FORCE_INLINE int LZ4_decompress_generic( /* get literal length */ unsigned int const token = *ip++; - length = token>>ML_BITS; + /* ip < iend before the increment */ + assert(!endOnInput || ip <= iend); + + /* + * A two-stage shortcut for the most common case: + * 1) If the literal length is 0..14, and there is enough + * space, enter the shortcut and copy 16 bytes on behalf + * of the literals (in the fast mode, only 8 bytes can be + * safely copied this way). + * 2) Further if the match length is 4..18, copy 18 bytes + * in a similar manner; but we ensure that there's enough + * space in the output for those 18 bytes earlier, upon + * entering the shortcut (in other words, there is a + * combined check for both stages). + * + * The & in the likely() below is intentionally not && so that + * some compilers can produce better parallelized runtime code + */ + if ((endOnInput ? length != RUN_MASK : length <= 8) + /* + * strictly "less than" on input, to re-enter + * the loop with at least one byte + */ + && likely((endOnInput ? ip < shortiend : 1) & + (op <= shortoend))) { + /* Copy the literals */ + LZ4_memcpy(op, ip, endOnInput ? 16 : 8); + op += length; ip += length; + + /* + * The second stage: + * prepare for match copying, decode full info. + * If it doesn't work out, the info won't be wasted. + */ + length = token & ML_MASK; /* match length */ + offset = LZ4_readLE16(ip); + ip += 2; + match = op - offset; + assert(match <= op); /* check overflow */ + + /* Do not deal with overlapping matches. */ + if ((length != ML_MASK) && + (offset >= 8) && + (dict == withPrefix64k || match >= lowPrefix)) { + /* Copy the match. */ + LZ4_memcpy(op + 0, match + 0, 8); + LZ4_memcpy(op + 8, match + 8, 8); + LZ4_memcpy(op + 16, match + 16, 2); + op += length + MINMATCH; + /* Both stages worked, load the next token. */ + continue; + } + + /* + * The second stage didn't work out, but the info + * is ready. Propel it right to the point of match + * copying. + */ + goto _copy_match; + } + + /* decode literal length */ if (length == RUN_MASK) { unsigned int s; + if (unlikely(endOnInput ? ip >= iend - RUN_MASK : 0)) { + /* overflow detection */ + goto _output_error; + } do { s = *ip++; length += s; @@ -125,14 +204,14 @@ static FORCE_INLINE int LZ4_decompress_generic( : 1) & (s == 255)); if ((safeDecode) - && unlikely( - (size_t)(op + length) < (size_t)(op))) { + && unlikely((uptrval)(op) + + length < (uptrval)(op))) { /* overflow detection */ goto _output_error; } if ((safeDecode) - && unlikely( - (size_t)(ip + length) < (size_t)(ip))) { + && unlikely((uptrval)(ip) + + length < (uptrval)(ip))) { /* overflow detection */ goto _output_error; } @@ -140,16 +219,19 @@ static FORCE_INLINE int LZ4_decompress_generic( /* copy literals */ cpy = op + length; - if (((endOnInput) && ((cpy > (partialDecoding ? oexit : oend - MFLIMIT)) + LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); + + if (((endOnInput) && ((cpy > oend - MFLIMIT) || (ip + length > iend - (2 + 1 + LASTLITERALS)))) || ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) { if (partialDecoding) { if (cpy > oend) { /* - * Error : - * write attempt beyond end of output buffer + * Partial decoding : + * stop in the middle of literal segment */ - goto _output_error; + cpy = oend; + length = oend - op; } if ((endOnInput) && (ip + length > iend)) { @@ -181,32 +263,54 @@ static FORCE_INLINE int LZ4_decompress_generic( } } - memcpy(op, ip, length); + /* + * supports overlapping memory regions; only matters + * for in-place decompression scenarios + */ + LZ4_memmove(op, ip, length); ip += length; op += length; - /* Necessarily EOF, due to parsing restrictions */ - break; - } - LZ4_wildCopy(op, ip, cpy); - ip += length; - op = cpy; + /* Necessarily EOF when !partialDecoding. + * When partialDecoding, it is EOF if we've either + * filled the output buffer or + * can't proceed with reading an offset for following match. + */ + if (!partialDecoding || (cpy == oend) || (ip >= (iend - 2))) + break; + } else { + /* may overwrite up to WILDCOPYLENGTH beyond cpy */ + LZ4_wildCopy(op, ip, cpy); + ip += length; + op = cpy; + } /* get offset */ offset = LZ4_readLE16(ip); ip += 2; match = op - offset; - if ((checkOffset) && (unlikely(match < lowLimit))) { + /* get matchlength */ + length = token & ML_MASK; + +_copy_match: + if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { /* Error : offset outside buffers */ goto _output_error; } /* costs ~1%; silence an msan warning when offset == 0 */ - LZ4_write32(op, (U32)offset); + /* + * note : when partialDecoding, there is no guarantee that + * at least 4 bytes remain available in output buffer + */ + if (!partialDecoding) { + assert(oend > op); + assert(oend - op >= 4); + + LZ4_write32(op, (U32)offset); + } - /* get matchlength */ - length = token & ML_MASK; if (length == ML_MASK) { unsigned int s; @@ -221,7 +325,7 @@ static FORCE_INLINE int LZ4_decompress_generic( if ((safeDecode) && unlikely( - (size_t)(op + length) < (size_t)op)) { + (uptrval)(op) + length < (uptrval)op)) { /* overflow detection */ goto _output_error; } @@ -229,32 +333,33 @@ static FORCE_INLINE int LZ4_decompress_generic( length += MINMATCH; - /* check external dictionary */ + /* match starting within external dictionary */ if ((dict == usingExtDict) && (match < lowPrefix)) { if (unlikely(op + length > oend - LASTLITERALS)) { /* doesn't respect parsing restriction */ - goto _output_error; + if (!partialDecoding) + goto _output_error; + length = min(length, (size_t)(oend - op)); } if (length <= (size_t)(lowPrefix - match)) { /* - * match can be copied as a single segment - * from external dictionary + * match fits entirely within external + * dictionary : just copy */ memmove(op, dictEnd - (lowPrefix - match), length); op += length; } else { /* - * match encompass external + * match stretches into both external * dictionary and current block */ size_t const copySize = (size_t)(lowPrefix - match); size_t const restSize = length - copySize; - memcpy(op, dictEnd - copySize, copySize); + LZ4_memcpy(op, dictEnd - copySize, copySize); op += copySize; - if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ BYTE * const endOfMatch = op + restSize; @@ -263,27 +368,48 @@ static FORCE_INLINE int LZ4_decompress_generic( while (op < endOfMatch) *op++ = *copyFrom++; } else { - memcpy(op, lowPrefix, restSize); + LZ4_memcpy(op, lowPrefix, restSize); op += restSize; } } - continue; } /* copy match within block */ cpy = op + length; - if (unlikely(offset < 8)) { - const int dec64 = dec64table[offset]; + /* + * partialDecoding : + * may not respect endBlock parsing restrictions + */ + assert(op <= oend); + if (partialDecoding && + (cpy > oend - MATCH_SAFEGUARD_DISTANCE)) { + size_t const mlen = min(length, (size_t)(oend - op)); + const BYTE * const matchEnd = match + mlen; + BYTE * const copyEnd = op + mlen; + + if (matchEnd > op) { + /* overlap copy */ + while (op < copyEnd) + *op++ = *match++; + } else { + LZ4_memcpy(op, match, mlen); + } + op = copyEnd; + if (op == oend) + break; + continue; + } + if (unlikely(offset < 8)) { op[0] = match[0]; op[1] = match[1]; op[2] = match[2]; op[3] = match[3]; - match += dec32table[offset]; - memcpy(op + 4, match, 4); - match -= dec64; + match += inc32table[offset]; + LZ4_memcpy(op + 4, match, 4); + match -= dec64table[offset]; } else { LZ4_copy8(op, match); match += 8; @@ -291,7 +417,7 @@ static FORCE_INLINE int LZ4_decompress_generic( op += 8; - if (unlikely(cpy > oend - 12)) { + if (unlikely(cpy > oend - MATCH_SAFEGUARD_DISTANCE)) { BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1); if (cpy > oend - LASTLITERALS) { @@ -307,60 +433,139 @@ static FORCE_INLINE int LZ4_decompress_generic( match += oCopyLimit - op; op = oCopyLimit; } - while (op < cpy) *op++ = *match++; } else { LZ4_copy8(op, match); - if (length > 16) LZ4_wildCopy(op + 8, match + 8, cpy); } - - op = cpy; /* correction */ + op = cpy; /* wildcopy correction */ } /* end of decoding */ if (endOnInput) { /* Nb of output bytes decoded */ - return (int) (((char *)op) - dest); + return (int) (((char *)op) - dst); } else { /* Nb of input bytes read */ - return (int) (((const char *)ip) - source); + return (int) (((const char *)ip) - src); } /* Overflow error detected */ _output_error: - return -1; + return (int) (-(((const char *)ip) - src)) - 1; } int LZ4_decompress_safe(const char *source, char *dest, int compressedSize, int maxDecompressedSize) { - return LZ4_decompress_generic(source, dest, compressedSize, - maxDecompressedSize, endOnInputSize, full, 0, - noDict, (BYTE *)dest, NULL, 0); + return LZ4_decompress_generic(source, dest, + compressedSize, maxDecompressedSize, + endOnInputSize, decode_full_block, + noDict, (BYTE *)dest, NULL, 0); } -int LZ4_decompress_safe_partial(const char *source, char *dest, - int compressedSize, int targetOutputSize, int maxDecompressedSize) +int LZ4_decompress_safe_partial(const char *src, char *dst, + int compressedSize, int targetOutputSize, int dstCapacity) { - return LZ4_decompress_generic(source, dest, compressedSize, - maxDecompressedSize, endOnInputSize, partial, - targetOutputSize, noDict, (BYTE *)dest, NULL, 0); + dstCapacity = min(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity, + endOnInputSize, partial_decode, + noDict, (BYTE *)dst, NULL, 0); } int LZ4_decompress_fast(const char *source, char *dest, int originalSize) { return LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, full, 0, withPrefix64k, - (BYTE *)(dest - 64 * KB), NULL, 64 * KB); + endOnOutputSize, decode_full_block, + withPrefix64k, + (BYTE *)dest - 64 * KB, NULL, 0); +} + +/* ===== Instantiate a few more decoding cases, used more than once. ===== */ + +static int LZ4_decompress_safe_withPrefix64k(const char *source, char *dest, + int compressedSize, int maxOutputSize) +{ + return LZ4_decompress_generic(source, dest, + compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, + withPrefix64k, + (BYTE *)dest - 64 * KB, NULL, 0); +} + +static int LZ4_decompress_safe_withSmallPrefix(const char *source, char *dest, + int compressedSize, + int maxOutputSize, + size_t prefixSize) +{ + return LZ4_decompress_generic(source, dest, + compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, + noDict, + (BYTE *)dest - prefixSize, NULL, 0); +} + +static int LZ4_decompress_safe_forceExtDict(const char *source, char *dest, + int compressedSize, int maxOutputSize, + const void *dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, + compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, + usingExtDict, (BYTE *)dest, + (const BYTE *)dictStart, dictSize); +} + +static int LZ4_decompress_fast_extDict(const char *source, char *dest, + int originalSize, + const void *dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, + 0, originalSize, + endOnOutputSize, decode_full_block, + usingExtDict, (BYTE *)dest, + (const BYTE *)dictStart, dictSize); } +/* + * The "double dictionary" mode, for use with e.g. ring buffers: the first part + * of the dictionary is passed as prefix, and the second via dictStart + dictSize. + * These routines are used only once, in LZ4_decompress_*_continue(). + */ +static FORCE_INLINE +int LZ4_decompress_safe_doubleDict(const char *source, char *dest, + int compressedSize, int maxOutputSize, + size_t prefixSize, + const void *dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, + compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, + usingExtDict, (BYTE *)dest - prefixSize, + (const BYTE *)dictStart, dictSize); +} + +static FORCE_INLINE +int LZ4_decompress_fast_doubleDict(const char *source, char *dest, + int originalSize, size_t prefixSize, + const void *dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, + 0, originalSize, + endOnOutputSize, decode_full_block, + usingExtDict, (BYTE *)dest - prefixSize, + (const BYTE *)dictStart, dictSize); +} + +/* ===== streaming decompression functions ===== */ + int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode, const char *dictionary, int dictSize) { - LZ4_streamDecode_t_internal *lz4sd = (LZ4_streamDecode_t_internal *) LZ4_streamDecode; + LZ4_streamDecode_t_internal *lz4sd = + &LZ4_streamDecode->internal_donotuse; lz4sd->prefixSize = (size_t) dictSize; lz4sd->prefixEnd = (const BYTE *) dictionary + dictSize; @@ -382,35 +587,51 @@ int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode, int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode, const char *source, char *dest, int compressedSize, int maxOutputSize) { - LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse; + LZ4_streamDecode_t_internal *lz4sd = + &LZ4_streamDecode->internal_donotuse; int result; - if (lz4sd->prefixEnd == (BYTE *)dest) { - result = LZ4_decompress_generic(source, dest, - compressedSize, - maxOutputSize, - endOnInputSize, full, 0, - usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, - lz4sd->externalDict, - lz4sd->extDictSize); - + if (lz4sd->prefixSize == 0) { + /* The first call, no dictionary yet. */ + assert(lz4sd->extDictSize == 0); + result = LZ4_decompress_safe(source, dest, + compressedSize, maxOutputSize); + if (result <= 0) + return result; + lz4sd->prefixSize = result; + lz4sd->prefixEnd = (BYTE *)dest + result; + } else if (lz4sd->prefixEnd == (BYTE *)dest) { + /* They're rolling the current segment. */ + if (lz4sd->prefixSize >= 64 * KB - 1) + result = LZ4_decompress_safe_withPrefix64k(source, dest, + compressedSize, maxOutputSize); + else if (lz4sd->extDictSize == 0) + result = LZ4_decompress_safe_withSmallPrefix(source, + dest, compressedSize, maxOutputSize, + lz4sd->prefixSize); + else + result = LZ4_decompress_safe_doubleDict(source, dest, + compressedSize, maxOutputSize, + lz4sd->prefixSize, + lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; - lz4sd->prefixSize += result; - lz4sd->prefixEnd += result; + lz4sd->prefixEnd += result; } else { + /* + * The buffer wraps around, or they're + * switching to another buffer. + */ lz4sd->extDictSize = lz4sd->prefixSize; lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; - result = LZ4_decompress_generic(source, dest, + result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, - endOnInputSize, full, 0, - usingExtDict, (BYTE *)dest, lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; lz4sd->prefixSize = result; - lz4sd->prefixEnd = (BYTE *)dest + result; + lz4sd->prefixEnd = (BYTE *)dest + result; } return result; @@ -422,115 +643,66 @@ int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode, LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse; int result; - if (lz4sd->prefixEnd == (BYTE *)dest) { - result = LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, full, 0, - usingExtDict, - lz4sd->prefixEnd - lz4sd->prefixSize, - lz4sd->externalDict, lz4sd->extDictSize); - + if (lz4sd->prefixSize == 0) { + assert(lz4sd->extDictSize == 0); + result = LZ4_decompress_fast(source, dest, originalSize); + if (result <= 0) + return result; + lz4sd->prefixSize = originalSize; + lz4sd->prefixEnd = (BYTE *)dest + originalSize; + } else if (lz4sd->prefixEnd == (BYTE *)dest) { + if (lz4sd->prefixSize >= 64 * KB - 1 || + lz4sd->extDictSize == 0) + result = LZ4_decompress_fast(source, dest, + originalSize); + else + result = LZ4_decompress_fast_doubleDict(source, dest, + originalSize, lz4sd->prefixSize, + lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; - lz4sd->prefixSize += originalSize; - lz4sd->prefixEnd += originalSize; + lz4sd->prefixEnd += originalSize; } else { lz4sd->extDictSize = lz4sd->prefixSize; lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; - result = LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, full, 0, - usingExtDict, (BYTE *)dest, - lz4sd->externalDict, lz4sd->extDictSize); + result = LZ4_decompress_fast_extDict(source, dest, + originalSize, lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; lz4sd->prefixSize = originalSize; - lz4sd->prefixEnd = (BYTE *)dest + originalSize; + lz4sd->prefixEnd = (BYTE *)dest + originalSize; } - return result; } -/* - * Advanced decoding functions : - * *_usingDict() : - * These decoding functions work the same as "_continue" ones, - * the dictionary must be explicitly provided within parameters - */ -static FORCE_INLINE int LZ4_decompress_usingDict_generic(const char *source, - char *dest, int compressedSize, int maxOutputSize, int safe, - const char *dictStart, int dictSize) +int LZ4_decompress_safe_usingDict(const char *source, char *dest, + int compressedSize, int maxOutputSize, + const char *dictStart, int dictSize) { if (dictSize == 0) - return LZ4_decompress_generic(source, dest, - compressedSize, maxOutputSize, safe, full, 0, - noDict, (BYTE *)dest, NULL, 0); - if (dictStart + dictSize == dest) { - if (dictSize >= (int)(64 * KB - 1)) - return LZ4_decompress_generic(source, dest, - compressedSize, maxOutputSize, safe, full, 0, - withPrefix64k, (BYTE *)dest - 64 * KB, NULL, 0); - return LZ4_decompress_generic(source, dest, compressedSize, - maxOutputSize, safe, full, 0, noDict, - (BYTE *)dest - dictSize, NULL, 0); + return LZ4_decompress_safe(source, dest, + compressedSize, maxOutputSize); + if (dictStart+dictSize == dest) { + if (dictSize >= 64 * KB - 1) + return LZ4_decompress_safe_withPrefix64k(source, dest, + compressedSize, maxOutputSize); + return LZ4_decompress_safe_withSmallPrefix(source, dest, + compressedSize, maxOutputSize, dictSize); } - return LZ4_decompress_generic(source, dest, compressedSize, - maxOutputSize, safe, full, 0, usingExtDict, - (BYTE *)dest, (const BYTE *)dictStart, dictSize); -} - -int LZ4_decompress_safe_usingDict(const char *source, char *dest, - int compressedSize, int maxOutputSize, - const char *dictStart, int dictSize) -{ - return LZ4_decompress_usingDict_generic(source, dest, - compressedSize, maxOutputSize, 1, dictStart, dictSize); + return LZ4_decompress_safe_forceExtDict(source, dest, + compressedSize, maxOutputSize, dictStart, dictSize); } int LZ4_decompress_fast_usingDict(const char *source, char *dest, - int originalSize, const char *dictStart, int dictSize) + int originalSize, + const char *dictStart, int dictSize) { - return LZ4_decompress_usingDict_generic(source, dest, 0, - originalSize, 0, dictStart, dictSize); -} - -/*-****************************** - * For backwards compatibility - ********************************/ -int lz4_decompress_unknownoutputsize(const unsigned char *src, - size_t src_len, unsigned char *dest, size_t *dest_len) { - *dest_len = LZ4_decompress_safe(src, dest, - src_len, *dest_len); - - /* - * Prior lz4_decompress_unknownoutputsize will return - * 0 for success and a negative result for error - * new LZ4_decompress_safe returns - * - the length of data read on success - * - and also a negative result on error - * meaning when result > 0, we just return 0 here - */ - if (src_len > 0) - return 0; - else - return -1; -} + if (dictSize == 0 || dictStart + dictSize == dest) + return LZ4_decompress_fast(source, dest, originalSize); -int lz4_decompress(const unsigned char *src, size_t *src_len, - unsigned char *dest, size_t actual_dest_len) { - *src_len = LZ4_decompress_fast(src, dest, actual_dest_len); - - /* - * Prior lz4_decompress will return - * 0 for success and a negative result for error - * new LZ4_decompress_fast returns - * - the length of data read on success - * - and also a negative result on error - * meaning when result > 0, we just return 0 here - */ - if (*src_len > 0) - return 0; - else - return -1; + return LZ4_decompress_fast_extDict(source, dest, originalSize, + dictStart, dictSize); } #ifndef STATIC @@ -542,8 +714,6 @@ EXPORT_SYMBOL(LZ4_decompress_safe_continue); EXPORT_SYMBOL(LZ4_decompress_fast_continue); EXPORT_SYMBOL(LZ4_decompress_safe_usingDict); EXPORT_SYMBOL(LZ4_decompress_fast_usingDict); -EXPORT_SYMBOL(lz4_decompress_unknownoutputsize); -EXPORT_SYMBOL(lz4_decompress); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("LZ4 decompressor"); diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h index 00a0b58a0871..673bd206aa98 100644 --- a/lib/lz4/lz4defs.h +++ b/lib/lz4/lz4defs.h @@ -75,6 +75,11 @@ typedef uintptr_t uptrval; #define WILDCOPYLENGTH 8 #define LASTLITERALS 5 #define MFLIMIT (WILDCOPYLENGTH + MINMATCH) +/* + * ensure it's possible to write 2 x wildcopyLength + * without overflowing output buffer + */ +#define MATCH_SAFEGUARD_DISTANCE ((2 * WILDCOPYLENGTH) - MINMATCH) /* Increase this value ==> compression run slower on incompressible data */ #define LZ4_SKIPTRIGGER 6 @@ -132,6 +137,17 @@ static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value) return put_unaligned_le16(value, memPtr); } +/* + * LZ4 relies on memcpy with a constant size being inlined. In freestanding + * environments, the compiler can't assume the implementation of memcpy() is + * standard compliant, so apply its specialized memcpy() inlining logic. When + * possible, use __builtin_memcpy() to tell the compiler to analyze memcpy() + * as-if it were standard compliant, so it can inline it in freestanding + * environments. This is needed when decompressing the Linux Kernel, for example. + */ +#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) +#define LZ4_memmove(dst, src, size) __builtin_memmove(dst, src, size) + static FORCE_INLINE void LZ4_copy8(void *dst, const void *src) { #if LZ4_ARCH64 @@ -222,6 +238,8 @@ typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive; typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; -typedef enum { full = 0, partial = 1 } earlyEnd_directive; +typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; + +#define LZ4_STATIC_ASSERT(c) BUILD_BUG_ON(!(c)) #endif diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c index e1fbf1561e55..e7ac8694b797 100644 --- a/lib/lz4/lz4hc_compress.c +++ b/lib/lz4/lz4hc_compress.c @@ -570,7 +570,7 @@ static int LZ4HC_compress_generic( *op++ = (BYTE) lastRun; } else *op++ = (BYTE)(lastRun<nextToUpdate = ctxPtr->dictLimit; } -EXPORT_SYMBOL(LZ4HC_setExternalDict); static int LZ4_compressHC_continue_generic( LZ4_streamHC_t *LZ4_streamHCPtr, @@ -765,28 +764,5 @@ int LZ4_saveDictHC( } EXPORT_SYMBOL(LZ4_saveDictHC); -/*-****************************** - * For backwards compatibility - ********************************/ -int lz4hc_compress(const unsigned char *src, size_t src_len, - unsigned char *dst, size_t *dst_len, void *wrkmem) -{ - *dst_len = LZ4_compress_HC(src, dst, src_len, - *dst_len, LZ4HC_DEFAULT_CLEVEL, wrkmem); - - /* - * Prior lz4hc_compress will return -1 in case of error - * and 0 on success - * while new LZ4_compress_HC - * returns 0 in case of error - * and the output length on success - */ - if (!*dst_len) - return -1; - else - return 0; -} -EXPORT_SYMBOL(lz4hc_compress); - MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("LZ4 HC compressor"); diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c index 236eb21167b5..a8ede77afe0d 100644 --- a/lib/lzo/lzo1x_compress.c +++ b/lib/lzo/lzo1x_compress.c @@ -20,7 +20,8 @@ static noinline size_t lzo1x_1_do_compress(const unsigned char *in, size_t in_len, unsigned char *out, size_t *out_len, - size_t ti, void *wrkmem) + size_t ti, void *wrkmem, signed char *state_offset, + const unsigned char bitstream_version) { const unsigned char *ip; unsigned char *op; @@ -35,27 +36,85 @@ lzo1x_1_do_compress(const unsigned char *in, size_t in_len, ip += ti < 4 ? 4 - ti : 0; for (;;) { - const unsigned char *m_pos; + const unsigned char *m_pos = NULL; size_t t, m_len, m_off; u32 dv; + u32 run_length = 0; literal: ip += 1 + ((ip - ii) >> 5); next: if (unlikely(ip >= ip_end)) break; dv = get_unaligned_le32(ip); - t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK; - m_pos = in + dict[t]; - dict[t] = (lzo_dict_t) (ip - in); - if (unlikely(dv != get_unaligned_le32(m_pos))) - goto literal; + + if (dv == 0 && bitstream_version) { + const unsigned char *ir = ip + 4; + const unsigned char *limit = ip_end + < (ip + MAX_ZERO_RUN_LENGTH + 1) + ? ip_end : ip + MAX_ZERO_RUN_LENGTH + 1; +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \ + defined(LZO_FAST_64BIT_MEMORY_ACCESS) + u64 dv64; + + for (; (ir + 32) <= limit; ir += 32) { + dv64 = get_unaligned((u64 *)ir); + dv64 |= get_unaligned((u64 *)ir + 1); + dv64 |= get_unaligned((u64 *)ir + 2); + dv64 |= get_unaligned((u64 *)ir + 3); + if (dv64) + break; + } + for (; (ir + 8) <= limit; ir += 8) { + dv64 = get_unaligned((u64 *)ir); + if (dv64) { +# if defined(__LITTLE_ENDIAN) + ir += __builtin_ctzll(dv64) >> 3; +# elif defined(__BIG_ENDIAN) + ir += __builtin_clzll(dv64) >> 3; +# else +# error "missing endian definition" +# endif + break; + } + } +#else + while ((ir < (const unsigned char *) + ALIGN((uintptr_t)ir, 4)) && + (ir < limit) && (*ir == 0)) + ir++; + for (; (ir + 4) <= limit; ir += 4) { + dv = *((u32 *)ir); + if (dv) { +# if defined(__LITTLE_ENDIAN) + ir += __builtin_ctz(dv) >> 3; +# elif defined(__BIG_ENDIAN) + ir += __builtin_clz(dv) >> 3; +# else +# error "missing endian definition" +# endif + break; + } + } +#endif + while (likely(ir < limit) && unlikely(*ir == 0)) + ir++; + run_length = ir - ip; + if (run_length > MAX_ZERO_RUN_LENGTH) + run_length = MAX_ZERO_RUN_LENGTH; + } else { + t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK; + m_pos = in + dict[t]; + dict[t] = (lzo_dict_t) (ip - in); + if (unlikely(dv != get_unaligned_le32(m_pos))) + goto literal; + } ii -= ti; ti = 0; t = ip - ii; if (t != 0) { if (t <= 3) { - op[-2] |= t; + op[*state_offset] |= t; COPY4(op, ii); op += t; } else if (t <= 16) { @@ -88,6 +147,17 @@ lzo1x_1_do_compress(const unsigned char *in, size_t in_len, } } + if (unlikely(run_length)) { + ip += run_length; + run_length -= MIN_ZERO_RUN_LENGTH; + put_unaligned_le32((run_length << 21) | 0xfffc18 + | (run_length & 0x7), op); + op += 4; + run_length = 0; + *state_offset = -3; + goto finished_writing_instruction; + } + m_len = 4; { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64) @@ -170,7 +240,6 @@ lzo1x_1_do_compress(const unsigned char *in, size_t in_len, m_off = ip - m_pos; ip += m_len; - ii = ip; if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) { m_off -= 1; *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2)); @@ -207,29 +276,48 @@ lzo1x_1_do_compress(const unsigned char *in, size_t in_len, *op++ = (m_off << 2); *op++ = (m_off >> 6); } + *state_offset = -2; +finished_writing_instruction: + ii = ip; goto next; } *out_len = op - out; return in_end - (ii - ti); } -int lzo1x_1_compress(const unsigned char *in, size_t in_len, +int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out, size_t *out_len, - void *wrkmem) + void *wrkmem, const unsigned char bitstream_version) { const unsigned char *ip = in; unsigned char *op = out; + unsigned char *data_start; size_t l = in_len; size_t t = 0; + signed char state_offset = -2; + unsigned int m4_max_offset; + + // LZO v0 will never write 17 as first byte (except for zero-length + // input), so this is used to version the bitstream + if (bitstream_version > 0) { + *op++ = 17; + *op++ = bitstream_version; + m4_max_offset = M4_MAX_OFFSET_V1; + } else { + m4_max_offset = M4_MAX_OFFSET_V0; + } + + data_start = op; while (l > 20) { - size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1); + size_t ll = l <= (m4_max_offset + 1) ? l : (m4_max_offset + 1); uintptr_t ll_end = (uintptr_t) ip + ll; if ((ll_end + ((t + ll) >> 5)) <= ll_end) break; BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS); memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t)); - t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem); + t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem, + &state_offset, bitstream_version); ip += ll; op += *out_len; l -= ll; @@ -239,10 +327,10 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len, if (t > 0) { const unsigned char *ii = in + in_len - t; - if (op == out && t <= 238) { + if (op == data_start && t <= 238) { *op++ = (17 + t); } else if (t <= 3) { - op[-2] |= t; + op[state_offset] |= t; } else if (t <= 18) { *op++ = (t - 3); } else { @@ -273,7 +361,24 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len, *out_len = op - out; return LZO_E_OK; } + +int lzo1x_1_compress(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len, + void *wrkmem) +{ + return lzogeneric1x_1_compress(in, in_len, out, out_len, wrkmem, 0); +} + +int lzorle1x_1_compress(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len, + void *wrkmem) +{ + return lzogeneric1x_1_compress(in, in_len, out, out_len, + wrkmem, LZO_VERSION); +} + EXPORT_SYMBOL_GPL(lzo1x_1_compress); +EXPORT_SYMBOL_GPL(lzorle1x_1_compress); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("LZO1X-1 Compressor"); diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c index a1c387f6afba..9e07e9ef1aad 100644 --- a/lib/lzo/lzo1x_decompress_safe.c +++ b/lib/lzo/lzo1x_decompress_safe.c @@ -46,11 +46,21 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, const unsigned char * const ip_end = in + in_len; unsigned char * const op_end = out + *out_len; + unsigned char bitstream_version; + op = out; ip = in; if (unlikely(in_len < 3)) goto input_overrun; + + if (likely(in_len >= 5) && likely(*ip == 17)) { + bitstream_version = ip[1]; + ip += 2; + } else { + bitstream_version = 0; + } + if (*ip > 17) { t = *ip++ - 17; if (t < 4) { @@ -154,32 +164,49 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, m_pos -= next >> 2; next &= 3; } else { - m_pos = op; - m_pos -= (t & 8) << 11; - t = (t & 7) + (3 - 1); - if (unlikely(t == 2)) { - size_t offset; - const unsigned char *ip_last = ip; + NEED_IP(2); + next = get_unaligned_le16(ip); + if (((next & 0xfffc) == 0xfffc) && + ((t & 0xf8) == 0x18) && + likely(bitstream_version)) { + NEED_IP(3); + t &= 7; + t |= ip[2] << 3; + t += MIN_ZERO_RUN_LENGTH; + NEED_OP(t); + memset(op, 0, t); + op += t; + next &= 3; + ip += 3; + goto match_next; + } else { + m_pos = op; + m_pos -= (t & 8) << 11; + t = (t & 7) + (3 - 1); + if (unlikely(t == 2)) { + size_t offset; + const unsigned char *ip_last = ip; - while (unlikely(*ip == 0)) { - ip++; - NEED_IP(1); - } - offset = ip - ip_last; - if (unlikely(offset > MAX_255_COUNT)) - return LZO_E_ERROR; + while (unlikely(*ip == 0)) { + ip++; + NEED_IP(1); + } + offset = ip - ip_last; + if (unlikely(offset > MAX_255_COUNT)) + return LZO_E_ERROR; - offset = (offset << 8) - offset; - t += offset + 7 + *ip++; - NEED_IP(2); + offset = (offset << 8) - offset; + t += offset + 7 + *ip++; + NEED_IP(2); + next = get_unaligned_le16(ip); + } + ip += 2; + m_pos -= next >> 2; + next &= 3; + if (m_pos == op) + goto eof_found; + m_pos -= 0x4000; } - next = get_unaligned_le16(ip); - ip += 2; - m_pos -= next >> 2; - next &= 3; - if (m_pos == op) - goto eof_found; - m_pos -= 0x4000; } TEST_LB(m_pos); #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h index 6710b83ce72e..b60851fcf6ce 100644 --- a/lib/lzo/lzodefs.h +++ b/lib/lzo/lzodefs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * lzodefs.h -- architecture, OS and compiler specific defines * @@ -12,9 +13,15 @@ */ +/* Version + * 0: original lzo version + * 1: lzo with support for RLE + */ +#define LZO_VERSION 1 + #define COPY4(dst, src) \ put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) -#if defined(__x86_64__) +#if defined(CONFIG_X86_64) || defined(CONFIG_ARM64) #define COPY8(dst, src) \ put_unaligned(get_unaligned((const u64 *)(src)), (u64 *)(dst)) #else @@ -24,19 +31,21 @@ #if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN) #error "conflicting endian definitions" -#elif defined(__x86_64__) +#elif defined(CONFIG_X86_64) || defined(CONFIG_ARM64) #define LZO_USE_CTZ64 1 #define LZO_USE_CTZ32 1 -#elif defined(__i386__) || defined(__powerpc__) +#define LZO_FAST_64BIT_MEMORY_ACCESS +#elif defined(CONFIG_X86) || defined(CONFIG_PPC) #define LZO_USE_CTZ32 1 -#elif defined(__arm__) && (__LINUX_ARM_ARCH__ >= 5) +#elif defined(CONFIG_ARM) && (__LINUX_ARM_ARCH__ >= 5) #define LZO_USE_CTZ32 1 #endif #define M1_MAX_OFFSET 0x0400 #define M2_MAX_OFFSET 0x0800 #define M3_MAX_OFFSET 0x4000 -#define M4_MAX_OFFSET 0xbfff +#define M4_MAX_OFFSET_V0 0xbfff +#define M4_MAX_OFFSET_V1 0xbffe #define M1_MIN_LEN 2 #define M1_MAX_LEN 2 @@ -52,6 +61,9 @@ #define M3_MARKER 32 #define M4_MARKER 16 +#define MIN_ZERO_RUN_LENGTH 4 +#define MAX_ZERO_RUN_LENGTH (2047 + MIN_ZERO_RUN_LENGTH) + #define lzo_dict_t unsigned short #define D_BITS 13 #define D_SIZE (1u << D_BITS) diff --git a/lib/reed_solomon/decode_rs.c b/lib/reed_solomon/decode_rs.c index a5d313381539..121beb2f0930 100644 --- a/lib/reed_solomon/decode_rs.c +++ b/lib/reed_solomon/decode_rs.c @@ -1,22 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * lib/reed_solomon/decode_rs.c - * - * Overview: - * Generic Reed Solomon encoder / decoder library + * Generic Reed Solomon encoder / decoder library * * Copyright 2002, Phil Karn, KA9Q * May be used under the terms of the GNU General Public License (GPL) * * Adaption to the kernel by Thomas Gleixner (tglx@linutronix.de) * - * $Id: decode_rs.c,v 1.7 2005/11/07 11:14:59 gleixner Exp $ - * - */ - -/* Generic data width independent code which is included by the - * wrappers. + * Generic data width independent code which is included by the wrappers. */ { + struct rs_codec *rs = rsc->codec; int deg_lambda, el, deg_omega; int i, j, r, k, pad; int nn = rs->nn; @@ -27,16 +21,22 @@ uint16_t *alpha_to = rs->alpha_to; uint16_t *index_of = rs->index_of; uint16_t u, q, tmp, num1, num2, den, discr_r, syn_error; - /* Err+Eras Locator poly and syndrome poly The maximum value - * of nroots is 8. So the necessary stack size will be about - * 220 bytes max. - */ - uint16_t lambda[nroots + 1], syn[nroots]; - uint16_t b[nroots + 1], t[nroots + 1], omega[nroots + 1]; - uint16_t root[nroots], reg[nroots + 1], loc[nroots]; int count = 0; uint16_t msk = (uint16_t) rs->nn; + /* + * The decoder buffers are in the rs control struct. They are + * arrays sized [nroots + 1] + */ + uint16_t *lambda = rsc->buffers + RS_DECODE_LAMBDA * (nroots + 1); + uint16_t *syn = rsc->buffers + RS_DECODE_SYN * (nroots + 1); + uint16_t *b = rsc->buffers + RS_DECODE_B * (nroots + 1); + uint16_t *t = rsc->buffers + RS_DECODE_T * (nroots + 1); + uint16_t *omega = rsc->buffers + RS_DECODE_OMEGA * (nroots + 1); + uint16_t *root = rsc->buffers + RS_DECODE_ROOT * (nroots + 1); + uint16_t *reg = rsc->buffers + RS_DECODE_REG * (nroots + 1); + uint16_t *loc = rsc->buffers + RS_DECODE_LOC * (nroots + 1); + /* Check length parameter for validity */ pad = nn - nroots - len; BUG_ON(pad < 0 || pad >= nn); diff --git a/lib/reed_solomon/encode_rs.c b/lib/reed_solomon/encode_rs.c index 0b5b1a6728ec..9112d46e869e 100644 --- a/lib/reed_solomon/encode_rs.c +++ b/lib/reed_solomon/encode_rs.c @@ -1,23 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * lib/reed_solomon/encode_rs.c - * - * Overview: - * Generic Reed Solomon encoder / decoder library + * Generic Reed Solomon encoder / decoder library * * Copyright 2002, Phil Karn, KA9Q * May be used under the terms of the GNU General Public License (GPL) * * Adaption to the kernel by Thomas Gleixner (tglx@linutronix.de) * - * $Id: encode_rs.c,v 1.5 2005/11/07 11:14:59 gleixner Exp $ - * - */ - -/* Generic data width independent code which is included by the - * wrappers. - * int encode_rsX (struct rs_control *rs, uintX_t *data, int len, uintY_t *par) + * Generic data width independent code which is included by the wrappers. */ { + struct rs_codec *rs = rsc->codec; int i, j, pad; int nn = rs->nn; int nroots = rs->nroots; diff --git a/lib/reed_solomon/reed_solomon.c b/lib/reed_solomon/reed_solomon.c index 06d04cfa9339..dfcf54242fb9 100644 --- a/lib/reed_solomon/reed_solomon.c +++ b/lib/reed_solomon/reed_solomon.c @@ -1,43 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * lib/reed_solomon/reed_solomon.c - * - * Overview: - * Generic Reed Solomon encoder / decoder library + * Generic Reed Solomon encoder / decoder library * * Copyright (C) 2004 Thomas Gleixner (tglx@linutronix.de) * * Reed Solomon code lifted from reed solomon library written by Phil Karn * Copyright 2002 Phil Karn, KA9Q * - * $Id: rslib.c,v 1.7 2005/11/07 11:14:59 gleixner Exp $ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * * Description: * * The generic Reed Solomon library provides runtime configurable * encoding / decoding of RS codes. - * Each user must call init_rs to get a pointer to a rs_control - * structure for the given rs parameters. This structure is either - * generated or a already available matching control structure is used. - * If a structure is generated then the polynomial arrays for - * fast encoding / decoding are built. This can take some time so - * make sure not to call this function from a time critical path. - * Usually a module / driver should initialize the necessary - * rs_control structure on module / driver init and release it - * on exit. - * The encoding puts the calculated syndrome into a given syndrome - * buffer. - * The decoding is a two step process. The first step calculates - * the syndrome over the received (data + syndrome) and calls the - * second stage, which does the decoding / error correction itself. - * Many hw encoders provide a syndrome calculation over the received - * data + syndrome and can call the second stage directly. * + * Each user must call init_rs to get a pointer to a rs_control structure + * for the given rs parameters. The control struct is unique per instance. + * It points to a codec which can be shared by multiple control structures. + * If a codec is newly allocated then the polynomial arrays for fast + * encoding / decoding are built. This can take some time so make sure not + * to call this function from a time critical path. Usually a module / + * driver should initialize the necessary rs_control structure on module / + * driver init and release it on exit. + * + * The encoding puts the calculated syndrome into a given syndrome buffer. + * + * The decoding is a two step process. The first step calculates the + * syndrome over the received (data + syndrome) and calls the second stage, + * which does the decoding / error correction itself. Many hw encoders + * provide a syndrome calculation over the received data + syndrome and can + * call the second stage directly. */ - #include #include #include @@ -46,32 +37,44 @@ #include #include -/* This list holds all currently allocated rs control structures */ -static LIST_HEAD (rslist); +enum { + RS_DECODE_LAMBDA, + RS_DECODE_SYN, + RS_DECODE_B, + RS_DECODE_T, + RS_DECODE_OMEGA, + RS_DECODE_ROOT, + RS_DECODE_REG, + RS_DECODE_LOC, + RS_DECODE_NUM_BUFFERS +}; + +/* This list holds all currently allocated rs codec structures */ +static LIST_HEAD(codec_list); /* Protection for the list */ static DEFINE_MUTEX(rslistlock); /** - * rs_init - Initialize a Reed-Solomon codec + * codec_init - Initialize a Reed-Solomon codec * @symsize: symbol size, bits (1-8) * @gfpoly: Field generator polynomial coefficients * @gffunc: Field generator function * @fcr: first root of RS code generator polynomial, index form * @prim: primitive element to generate polynomial roots * @nroots: RS code generator polynomial degree (number of roots) + * @gfp: GFP_ flags for allocations * - * Allocate a control structure and the polynom arrays for faster + * Allocate a codec structure and the polynom arrays for faster * en/decoding. Fill the arrays according to the given parameters. */ -static struct rs_control *rs_init(int symsize, int gfpoly, int (*gffunc)(int), - int fcr, int prim, int nroots) +static struct rs_codec *codec_init(int symsize, int gfpoly, int (*gffunc)(int), + int fcr, int prim, int nroots, gfp_t gfp) { - struct rs_control *rs; int i, j, sr, root, iprim; + struct rs_codec *rs; - /* Allocate the control structure */ - rs = kmalloc(sizeof (struct rs_control), GFP_KERNEL); - if (rs == NULL) + rs = kzalloc(sizeof(*rs), gfp); + if (!rs) return NULL; INIT_LIST_HEAD(&rs->list); @@ -85,17 +88,17 @@ static struct rs_control *rs_init(int symsize, int gfpoly, int (*gffunc)(int), rs->gffunc = gffunc; /* Allocate the arrays */ - rs->alpha_to = kmalloc(sizeof(uint16_t) * (rs->nn + 1), GFP_KERNEL); + rs->alpha_to = kmalloc(sizeof(uint16_t) * (rs->nn + 1), gfp); if (rs->alpha_to == NULL) - goto errrs; + goto err; - rs->index_of = kmalloc(sizeof(uint16_t) * (rs->nn + 1), GFP_KERNEL); + rs->index_of = kmalloc(sizeof(uint16_t) * (rs->nn + 1), gfp); if (rs->index_of == NULL) - goto erralp; + goto err; - rs->genpoly = kmalloc(sizeof(uint16_t) * (rs->nroots + 1), GFP_KERNEL); + rs->genpoly = kmalloc(sizeof(uint16_t) * (rs->nroots + 1), gfp); if(rs->genpoly == NULL) - goto erridx; + goto err; /* Generate Galois field lookup tables */ rs->index_of[0] = rs->nn; /* log(zero) = -inf */ @@ -120,7 +123,7 @@ static struct rs_control *rs_init(int symsize, int gfpoly, int (*gffunc)(int), } /* If it's not primitive, exit */ if(sr != rs->alpha_to[0]) - goto errpol; + goto err; /* Find prim-th root of 1, used in decoding */ for(iprim = 1; (iprim % prim) != 0; iprim += rs->nn); @@ -148,42 +151,52 @@ static struct rs_control *rs_init(int symsize, int gfpoly, int (*gffunc)(int), /* convert rs->genpoly[] to index form for quicker encoding */ for (i = 0; i <= nroots; i++) rs->genpoly[i] = rs->index_of[rs->genpoly[i]]; + + rs->users = 1; + list_add(&rs->list, &codec_list); return rs; - /* Error exit */ -errpol: +err: kfree(rs->genpoly); -erridx: kfree(rs->index_of); -erralp: kfree(rs->alpha_to); -errrs: kfree(rs); return NULL; } /** - * free_rs - Free the rs control structure, if it is no longer used - * @rs: the control structure which is not longer used by the + * free_rs - Free the rs control structure + * @rs: The control structure which is not longer used by the * caller + * + * Free the control structure. If @rs is the last user of the associated + * codec, free the codec as well. */ void free_rs(struct rs_control *rs) { + struct rs_codec *cd; + + if (!rs) + return; + + cd = rs->codec; mutex_lock(&rslistlock); - rs->users--; - if(!rs->users) { - list_del(&rs->list); - kfree(rs->alpha_to); - kfree(rs->index_of); - kfree(rs->genpoly); - kfree(rs); + cd->users--; + if(!cd->users) { + list_del(&cd->list); + kfree(cd->alpha_to); + kfree(cd->index_of); + kfree(cd->genpoly); + kfree(cd); } mutex_unlock(&rslistlock); + kfree(rs); } +EXPORT_SYMBOL_GPL(free_rs); /** - * init_rs_internal - Find a matching or allocate a new rs control structure + * init_rs_internal - Allocate rs control, find a matching codec or allocate a new one * @symsize: the symbol size (number of bits) * @gfpoly: the extended Galois field generator polynomial coefficients, * with the 0th coefficient in the low order bit. The polynomial @@ -191,55 +204,69 @@ void free_rs(struct rs_control *rs) * @gffunc: pointer to function to generate the next field element, * or the multiplicative identity element if given 0. Used * instead of gfpoly if gfpoly is 0 - * @fcr: the first consecutive root of the rs code generator polynomial + * @fcr: the first consecutive root of the rs code generator polynomial * in index form * @prim: primitive element to generate polynomial roots * @nroots: RS code generator polynomial degree (number of roots) + * @gfp: GFP_ flags for allocations */ static struct rs_control *init_rs_internal(int symsize, int gfpoly, - int (*gffunc)(int), int fcr, - int prim, int nroots) + int (*gffunc)(int), int fcr, + int prim, int nroots, gfp_t gfp) { - struct list_head *tmp; - struct rs_control *rs; + struct list_head *tmp; + struct rs_control *rs; + unsigned int bsize; /* Sanity checks */ if (symsize < 1) return NULL; if (fcr < 0 || fcr >= (1<= (1<= (1<mm) + list_for_each(tmp, &codec_list) { + struct rs_codec *cd = list_entry(tmp, struct rs_codec, list); + + if (symsize != cd->mm) continue; - if (gfpoly != rs->gfpoly) + if (gfpoly != cd->gfpoly) continue; - if (gffunc != rs->gffunc) + if (gffunc != cd->gffunc) continue; - if (fcr != rs->fcr) + if (fcr != cd->fcr) continue; - if (prim != rs->prim) + if (prim != cd->prim) continue; - if (nroots != rs->nroots) + if (nroots != cd->nroots) continue; /* We have a matching one already */ - rs->users++; + cd->users++; + rs->codec = cd; goto out; } /* Create a new one */ - rs = rs_init(symsize, gfpoly, gffunc, fcr, prim, nroots); - if (rs) { - rs->users = 1; - list_add(&rs->list, &rslist); + rs->codec = codec_init(symsize, gfpoly, gffunc, fcr, prim, nroots, gfp); + if (!rs->codec) { + kfree(rs); + rs = NULL; } out: mutex_unlock(&rslistlock); @@ -247,45 +274,48 @@ static struct rs_control *init_rs_internal(int symsize, int gfpoly, } /** - * init_rs - Find a matching or allocate a new rs control structure + * init_rs_gfp - Create a RS control struct and initialize it * @symsize: the symbol size (number of bits) * @gfpoly: the extended Galois field generator polynomial coefficients, * with the 0th coefficient in the low order bit. The polynomial * must be primitive; - * @fcr: the first consecutive root of the rs code generator polynomial + * @fcr: the first consecutive root of the rs code generator polynomial * in index form * @prim: primitive element to generate polynomial roots * @nroots: RS code generator polynomial degree (number of roots) + * @gfp: GFP_ flags for allocations */ -struct rs_control *init_rs(int symsize, int gfpoly, int fcr, int prim, - int nroots) +struct rs_control *init_rs_gfp(int symsize, int gfpoly, int fcr, int prim, + int nroots, gfp_t gfp) { - return init_rs_internal(symsize, gfpoly, NULL, fcr, prim, nroots); + return init_rs_internal(symsize, gfpoly, NULL, fcr, prim, nroots, gfp); } +EXPORT_SYMBOL_GPL(init_rs_gfp); /** - * init_rs_non_canonical - Find a matching or allocate a new rs control - * structure, for fields with non-canonical - * representation + * init_rs_non_canonical - Allocate rs control struct for fields with + * non-canonical representation * @symsize: the symbol size (number of bits) * @gffunc: pointer to function to generate the next field element, * or the multiplicative identity element if given 0. Used * instead of gfpoly if gfpoly is 0 - * @fcr: the first consecutive root of the rs code generator polynomial + * @fcr: the first consecutive root of the rs code generator polynomial * in index form * @prim: primitive element to generate polynomial roots * @nroots: RS code generator polynomial degree (number of roots) */ struct rs_control *init_rs_non_canonical(int symsize, int (*gffunc)(int), - int fcr, int prim, int nroots) + int fcr, int prim, int nroots) { - return init_rs_internal(symsize, 0, gffunc, fcr, prim, nroots); + return init_rs_internal(symsize, 0, gffunc, fcr, prim, nroots, + GFP_KERNEL); } +EXPORT_SYMBOL_GPL(init_rs_non_canonical); #ifdef CONFIG_REED_SOLOMON_ENC8 /** * encode_rs8 - Calculate the parity for data values (8bit data width) - * @rs: the rs control structure + * @rsc: the rs control structure * @data: data field of a given type * @len: data length * @par: parity data, must be initialized by caller (usually all 0) @@ -295,7 +325,7 @@ struct rs_control *init_rs_non_canonical(int symsize, int (*gffunc)(int), * symbol size > 8. The calling code must take care of encoding of the * syndrome result for storage itself. */ -int encode_rs8(struct rs_control *rs, uint8_t *data, int len, uint16_t *par, +int encode_rs8(struct rs_control *rsc, uint8_t *data, int len, uint16_t *par, uint16_t invmsk) { #include "encode_rs.c" @@ -306,7 +336,7 @@ EXPORT_SYMBOL_GPL(encode_rs8); #ifdef CONFIG_REED_SOLOMON_DEC8 /** * decode_rs8 - Decode codeword (8bit data width) - * @rs: the rs control structure + * @rsc: the rs control structure * @data: data field of a given type * @par: received parity data field * @len: data length @@ -319,9 +349,14 @@ EXPORT_SYMBOL_GPL(encode_rs8); * The syndrome and parity uses a uint16_t data type to enable * symbol size > 8. The calling code must take care of decoding of the * syndrome result and the received parity before calling this code. + * + * Note: The rs_control struct @rsc contains buffers which are used for + * decoding, so the caller has to ensure that decoder invocations are + * serialized. + * * Returns the number of corrected bits or -EBADMSG for uncorrectable errors. */ -int decode_rs8(struct rs_control *rs, uint8_t *data, uint16_t *par, int len, +int decode_rs8(struct rs_control *rsc, uint8_t *data, uint16_t *par, int len, uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk, uint16_t *corr) { @@ -333,7 +368,7 @@ EXPORT_SYMBOL_GPL(decode_rs8); #ifdef CONFIG_REED_SOLOMON_ENC16 /** * encode_rs16 - Calculate the parity for data values (16bit data width) - * @rs: the rs control structure + * @rsc: the rs control structure * @data: data field of a given type * @len: data length * @par: parity data, must be initialized by caller (usually all 0) @@ -341,7 +376,7 @@ EXPORT_SYMBOL_GPL(decode_rs8); * * Each field in the data array contains up to symbol size bits of valid data. */ -int encode_rs16(struct rs_control *rs, uint16_t *data, int len, uint16_t *par, +int encode_rs16(struct rs_control *rsc, uint16_t *data, int len, uint16_t *par, uint16_t invmsk) { #include "encode_rs.c" @@ -352,7 +387,7 @@ EXPORT_SYMBOL_GPL(encode_rs16); #ifdef CONFIG_REED_SOLOMON_DEC16 /** * decode_rs16 - Decode codeword (16bit data width) - * @rs: the rs control structure + * @rsc: the rs control structure * @data: data field of a given type * @par: received parity data field * @len: data length @@ -363,9 +398,14 @@ EXPORT_SYMBOL_GPL(encode_rs16); * @corr: buffer to store correction bitmask on eras_pos * * Each field in the data array contains up to symbol size bits of valid data. + * + * Note: The rc_control struct @rsc contains buffers which are used for + * decoding, so the caller has to ensure that decoder invocations are + * serialized. + * * Returns the number of corrected bits or -EBADMSG for uncorrectable errors. */ -int decode_rs16(struct rs_control *rs, uint16_t *data, uint16_t *par, int len, +int decode_rs16(struct rs_control *rsc, uint16_t *data, uint16_t *par, int len, uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk, uint16_t *corr) { @@ -374,10 +414,6 @@ int decode_rs16(struct rs_control *rs, uint16_t *data, uint16_t *par, int len, EXPORT_SYMBOL_GPL(decode_rs16); #endif -EXPORT_SYMBOL_GPL(init_rs); -EXPORT_SYMBOL_GPL(init_rs_non_canonical); -EXPORT_SYMBOL_GPL(free_rs); - MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Reed Solomon encoder/decoder"); MODULE_AUTHOR("Phil Karn, Thomas Gleixner"); diff --git a/mm/Kconfig b/mm/Kconfig index 17c842eb4e1a..a377622dc3c9 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -759,3 +759,8 @@ config FORCE_ALLOC_FROM_DMA_ZONE always using ZONE_DMA memory. If unsure, say "n". + +config VMSTAT_INTERVAL + int "Default interval in seconds to update vmstat" + default 1 + \ No newline at end of file diff --git a/mm/Makefile b/mm/Makefile index 457892dc4b9f..56dff0ae4e53 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -38,7 +38,7 @@ obj-y := filemap.o mempool.o oom_kill.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ util.o mmzone.o vmstat.o backing-dev.o \ mm_init.o mmu_context.o percpu.o slab_common.o \ - compaction.o vmacache.o \ + compaction.o vmacache.o swap_slots.o \ interval_tree.o list_lru.o workingset.o \ debug.o $(mmu-y) showmem.o vmpressure.o diff --git a/mm/bootmem.c b/mm/bootmem.c index d14efd6fda06..0b8c5b3ab621 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -162,7 +162,7 @@ void free_bootmem_late(unsigned long physaddr, unsigned long size) for (; cursor < end; cursor++) { __free_pages_bootmem(pfn_to_page(cursor), cursor, 0); - totalram_pages++; + totalram_pages_inc(); } } @@ -278,7 +278,7 @@ unsigned long __init free_all_bootmem(void) list_for_each_entry(bdata, &bdata_list, list) total_pages += free_all_bootmem_core(bdata); - totalram_pages += total_pages; + totalram_pages_add(total_pages); return total_pages; } diff --git a/mm/cma.c b/mm/cma.c index 0676bb69ba14..1d9235029feb 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -543,7 +543,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align) trace_cma_alloc(pfn, page, count, align); if (ret) { - pr_info("%s: alloc failed, req-size: %zu pages, ret: %d\n", + pr_debug("%s: alloc failed, req-size: %zu pages, ret: %d\n", __func__, count, ret); cma_debug_show_areas(cma); } diff --git a/mm/compaction.c b/mm/compaction.c index 450161d04292..8d4ab768af60 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -767,11 +767,15 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, * Periodically drop the lock (if held) regardless of its * contention, to give chance to IRQs. Abort async compaction * if contended. - */ - if (!(low_pfn % SWAP_CLUSTER_MAX) + * contention, to give chance to IRQs. Abort completely if + * a fatal signal is pending. + */ + if (!(low_pfn % SWAP_CLUSTER_MAX) && compact_unlock_should_abort(zone_lru_lock(zone), flags, - &locked, cc)) - break; + &locked, cc)) { + low_pfn = 0; + goto fatal_pending; + } if (!pfn_valid_within(low_pfn)) goto isolate_fail; @@ -957,6 +961,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, trace_mm_compaction_isolate_migratepages(start_pfn, low_pfn, nr_scanned, nr_isolated); +fatal_pending: count_compact_events(COMPACTMIGRATE_SCANNED, nr_scanned); if (nr_isolated) count_compact_events(COMPACTISOLATED, nr_isolated); @@ -1205,7 +1210,7 @@ typedef enum { * Allow userspace to control policy on scanning the unevictable LRU for * compactable pages. */ -int sysctl_compact_unevictable_allowed __read_mostly = 1; +int sysctl_compact_unevictable_allowed __read_mostly = 0; /* * Isolate all pages that can be migrated from the first suitable block, diff --git a/mm/highmem.c b/mm/highmem.c index 50b4ca6787f0..a81bbfe69049 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -104,9 +104,8 @@ static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color) } #endif -unsigned long totalhigh_pages __read_mostly; -EXPORT_SYMBOL(totalhigh_pages); - +atomic_long_t _totalhigh_pages __read_mostly; +EXPORT_SYMBOL(_totalhigh_pages); EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 34511b5b22b1..f9714777813a 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -409,7 +409,7 @@ static int __init hugepage_init(void) * where the extra memory used could hurt more than TLB overhead * is likely to save. The admin can still enable it through /sys. */ - if (totalram_pages < (512 << (20 - PAGE_SHIFT))) { + if (totalram_pages() < (512 << (20 - PAGE_SHIFT))) { transparent_hugepage_flags = 0; return 0; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b9128eaafffe..69c1194db950 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2220,7 +2220,7 @@ static void __init gather_bootmem_prealloc(void) prep_new_huge_page(h, page, page_to_nid(page)); /* * If we had gigantic hugepages allocated at boot time, we need - * to restore the 'stolen' pages to totalram_pages in order to + * to restore the 'stolen' pages to totalram_pages() in order to * fix confusing memory reports from free(1) and another * side-effects, like CommitLimit going negative. */ diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 3c572104eb76..2d33a92a89fb 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -691,7 +691,7 @@ int kasan_module_alloc(void *addr, size_t size) ret = __vmalloc_node_range(shadow_size, 1, shadow_start, shadow_start + shadow_size, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, + GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE, __builtin_return_address(0)); diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index 3a8ddf8baf7d..daffe1da66d6 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -236,7 +236,7 @@ void quarantine_reduce(void) * Update quarantine size in case of hotplug. Allocate a fraction of * the installed memory to quarantine minus per-cpu queue limits. */ - total_size = (READ_ONCE(totalram_pages) << PAGE_SHIFT) / + total_size = (totalram_pages() << PAGE_SHIFT) / QUARANTINE_FRACTION; percpu_quarantines = QUARANTINE_PERCPU_SIZE * num_online_cpus(); new_quarantine_size = (total_size < percpu_quarantines) ? diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 3d822d1491c4..83ecadc66586 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -148,7 +148,7 @@ struct kmemleak_scan_area { */ struct kmemleak_object { spinlock_t lock; - unsigned long flags; /* object status flags */ + unsigned int flags; /* object status flags */ struct list_head object_list; struct list_head gray_list; struct rb_node rb_node; @@ -157,6 +157,8 @@ struct kmemleak_object { atomic_t use_count; unsigned long pointer; size_t size; + /* pass surplus references to this pointer */ + unsigned long excess_ref; /* minimum number of a pointers found before it is considered leak */ int min_count; /* the total number of pointers found pointing to this object */ @@ -263,7 +265,8 @@ enum { KMEMLEAK_NOT_LEAK, KMEMLEAK_IGNORE, KMEMLEAK_SCAN_AREA, - KMEMLEAK_NO_SCAN + KMEMLEAK_NO_SCAN, + KMEMLEAK_SET_EXCESS_REF }; /* @@ -272,9 +275,12 @@ enum { */ struct early_log { int op_type; /* kmemleak operation type */ - const void *ptr; /* allocated/freed memory block */ - size_t size; /* memory block size */ int min_count; /* minimum reference count */ + const void *ptr; /* allocated/freed memory block */ + union { + size_t size; /* memory block size */ + unsigned long excess_ref; /* surplus reference passing */ + }; unsigned long trace[MAX_TRACE]; /* stack trace */ unsigned int trace_len; /* stack trace length */ }; @@ -403,7 +409,7 @@ static void dump_object_info(struct kmemleak_object *object) object->comm, object->pid, object->jiffies); pr_notice(" min_count = %d\n", object->min_count); pr_notice(" count = %d\n", object->count); - pr_notice(" flags = 0x%lx\n", object->flags); + pr_notice(" flags = 0x%x\n", object->flags); pr_notice(" checksum = %u\n", object->checksum); pr_notice(" backtrace:\n"); print_stack_trace(&trace, 4); @@ -572,6 +578,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, object->flags = OBJECT_ALLOCATED; object->pointer = ptr; object->size = size; + object->excess_ref = 0; object->min_count = min_count; object->count = 0; /* white color initially */ object->jiffies = jiffies; @@ -804,6 +811,30 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp) put_object(object); } +/* + * Any surplus references (object already gray) to 'ptr' are passed to + * 'excess_ref'. This is used in the vmalloc() case where a pointer to + * vm_struct may be used as an alternative reference to the vmalloc'ed object + * (see free_thread_stack()). + */ +static void object_set_excess_ref(unsigned long ptr, unsigned long excess_ref) +{ + unsigned long flags; + struct kmemleak_object *object; + + object = find_and_get_object(ptr, 0); + if (!object) { + kmemleak_warn("Setting excess_ref on unknown object at 0x%08lx\n", + ptr); + return; + } + + spin_lock_irqsave(&object->lock, flags); + object->excess_ref = excess_ref; + spin_unlock_irqrestore(&object->lock, flags); + put_object(object); +} + /* * Set the OBJECT_NO_SCAN flag for the object corresponding to the give * pointer. Such object will not be scanned by kmemleak but references to it @@ -918,7 +949,7 @@ static void early_alloc_percpu(struct early_log *log) * @gfp: kmalloc() flags used for kmemleak internal memory allocations * * This function is called from the kernel allocators when a new object - * (memory block) is allocated (kmem_cache_alloc, kmalloc, vmalloc etc.). + * (memory block) is allocated (kmem_cache_alloc, kmalloc etc.). */ void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count, gfp_t gfp) @@ -961,6 +992,36 @@ void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, } EXPORT_SYMBOL_GPL(kmemleak_alloc_percpu); +/** + * kmemleak_vmalloc - register a newly vmalloc'ed object + * @area: pointer to vm_struct + * @size: size of the object + * @gfp: __vmalloc() flags used for kmemleak internal memory allocations + * + * This function is called from the vmalloc() kernel allocator when a new + * object (memory block) is allocated. + */ +void __ref kmemleak_vmalloc(const struct vm_struct *area, size_t size, gfp_t gfp) +{ + pr_debug("%s(0x%p, %zu)\n", __func__, area, size); + + /* + * A min_count = 2 is needed because vm_struct contains a reference to + * the virtual address of the vmalloc'ed block. + */ + if (kmemleak_enabled) { + create_object((unsigned long)area->addr, size, 2, gfp); + object_set_excess_ref((unsigned long)area, + (unsigned long)area->addr); + } else if (kmemleak_early_log) { + log_early(KMEMLEAK_ALLOC, area->addr, size, 2); + /* reusing early_log.size for storing area->addr */ + log_early(KMEMLEAK_SET_EXCESS_REF, + area, (unsigned long)area->addr, 0); + } +} +EXPORT_SYMBOL_GPL(kmemleak_vmalloc); + /** * kmemleak_free - unregister a previously registered object * @ptr: pointer to beginning of the object @@ -1197,6 +1258,30 @@ static bool update_checksum(struct kmemleak_object *object) return object->checksum != old_csum; } +/* + * Update an object's references. object->lock must be held by the caller. + */ +static void update_refs(struct kmemleak_object *object) +{ + if (!color_white(object)) { + /* non-orphan, ignored or new */ + return; + } + + /* + * Increase the object's reference count (number of pointers to the + * memory block). If this count reaches the required minimum, the + * object's color will become gray and it will be added to the + * gray_list. + */ + object->count++; + if (color_gray(object)) { + /* put_object() called when removing from gray_list */ + WARN_ON(!get_object(object)); + list_add_tail(&object->gray_list, &gray_list); + } +} + /* * Memory scanning is a long process and it needs to be interruptable. This * function checks whether such interrupt condition occurred. @@ -1234,6 +1319,7 @@ static void scan_block(void *_start, void *_end, for (ptr = start; ptr < end; ptr++) { struct kmemleak_object *object; unsigned long pointer; + unsigned long excess_ref; if (scan_should_stop()) break; @@ -1269,25 +1355,27 @@ static void scan_block(void *_start, void *_end, * enclosed by scan_mutex. */ spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING); - if (!color_white(object)) { - /* non-orphan, ignored or new */ - spin_unlock(&object->lock); - continue; - } - - /* - * Increase the object's reference count (number of pointers - * to the memory block). If this count reaches the required - * minimum, the object's color will become gray and it will be - * added to the gray_list. - */ - object->count++; + /* only pass surplus references (object already gray) */ if (color_gray(object)) { - /* put_object() called when removing from gray_list */ - WARN_ON(!get_object(object)); - list_add_tail(&object->gray_list, &gray_list); + excess_ref = object->excess_ref; + /* no need for update_refs() if object already gray */ + } else { + excess_ref = 0; + update_refs(object); } spin_unlock(&object->lock); + + if (excess_ref) { + object = lookup_object(excess_ref, 0); + if (!object) + continue; + if (object == scanned) + /* circular reference, ignore */ + continue; + spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING); + update_refs(object); + spin_unlock(&object->lock); + } } read_unlock_irqrestore(&kmemleak_lock, flags); } @@ -1994,6 +2082,10 @@ void __init kmemleak_init(void) case KMEMLEAK_NO_SCAN: kmemleak_no_scan(log->ptr); break; + case KMEMLEAK_SET_EXCESS_REF: + object_set_excess_ref((unsigned long)log->ptr, + log->excess_ref); + break; default: kmemleak_warn("Unknown early log operation: %d\n", log->op_type); diff --git a/mm/memblock.c b/mm/memblock.c index 9c96c537b68d..422cd907008f 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1397,7 +1397,7 @@ void __init __memblock_free_late(phys_addr_t base, phys_addr_t size) for (; cursor < end; cursor++) { __free_pages_bootmem(pfn_to_page(cursor), cursor, 0); - totalram_pages++; + totalram_pages_inc(); } } diff --git a/mm/mm_init.c b/mm/mm_init.c index 5b72266b4b03..db3a196826da 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -151,7 +151,7 @@ static void __meminit mm_compute_batch(void) s32 batch = max_t(s32, nr*2, 32); /* batch size set to 0.4% of (total memory/#cpus), or max int32 */ - memsized_batch = min_t(u64, (totalram_pages/nr)/256, 0x7fffffff); + memsized_batch = min_t(u64, (totalram_pages()/nr)/256, 0x7fffffff); vm_committed_as_batch = max_t(s32, memsized_batch, batch); } diff --git a/mm/nobootmem.c b/mm/nobootmem.c index aa59572cbac6..88817aab4393 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -91,7 +91,7 @@ void free_bootmem_late(unsigned long addr, unsigned long size) for (; cursor < end; cursor++) { __free_pages_bootmem(pfn_to_page(cursor), cursor, 0); - totalram_pages++; + totalram_pages_inc(); } } @@ -184,7 +184,7 @@ unsigned long __init free_all_bootmem(void) reset_all_zones_managed_pages(); pages = free_low_memory_core_early(); - totalram_pages += pages; + totalram_pages_add(pages); return pages; } diff --git a/mm/nommu.c b/mm/nommu.c index b40ec74f364c..10be196aec37 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -236,12 +236,16 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) } EXPORT_SYMBOL(__vmalloc); +void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags) +{ + return __vmalloc(size, flags, PAGE_KERNEL); +} + void *vmalloc_user(unsigned long size) { void *ret; - ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, - PAGE_KERNEL); + ret = __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); if (ret) { struct vm_area_struct *vma; @@ -359,10 +363,6 @@ void *vzalloc_node(unsigned long size, int node) } EXPORT_SYMBOL(vzalloc_node); -#ifndef PAGE_KERNEL_EXEC -# define PAGE_KERNEL_EXEC PAGE_KERNEL -#endif - /** * vmalloc_exec - allocate virtually contiguous, executable memory * @size: allocation size diff --git a/mm/oom_kill.c b/mm/oom_kill.c index a72d15dd5304..5ca6780ffc06 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -49,6 +49,7 @@ int sysctl_panic_on_oom; int sysctl_oom_kill_allocating_task; int sysctl_oom_dump_tasks = 1; +int sysctl_reap_mem_on_sigkill = 1; DEFINE_MUTEX(oom_lock); @@ -237,7 +238,7 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc) } /* Default to all available memory */ - oc->totalpages = totalram_pages + total_swap_pages; + oc->totalpages = totalram_pages() + total_swap_pages; if (!IS_ENABLED(CONFIG_NUMA)) return CONSTRAINT_NONE; @@ -545,7 +546,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) tlb_finish_mmu(&tlb, vma->vm_start, vma->vm_end); } } - pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", + pr_debug("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", task_pid_nr(tsk), tsk->comm, K(get_mm_counter(mm, MM_ANONPAGES)), K(get_mm_counter(mm, MM_FILEPAGES)), @@ -614,10 +615,13 @@ void wake_oom_reaper(struct task_struct *tsk) if (!oom_reaper_th) return; - /* move the lock here to avoid scenario of queuing - * the same task by both OOM killer and LMK. + /* + * Move the lock here to avoid scenario of queuing + * the same task by both OOM killer and any other SIGKILL + * path. */ spin_lock(&oom_reaper_lock); + /* mm is already queued? */ if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) { spin_unlock(&oom_reaper_lock); @@ -650,6 +654,16 @@ static inline void wake_oom_reaper(struct task_struct *tsk) } #endif /* CONFIG_MMU */ +static void __mark_oom_victim(struct task_struct *tsk) +{ + struct mm_struct *mm = tsk->mm; + + if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) { + atomic_inc(&tsk->signal->oom_mm->mm_count); + set_bit(MMF_OOM_VICTIM, &mm->flags); + } +} + /** * mark_oom_victim - mark the given task as OOM victim * @tsk: task to mark @@ -662,18 +676,13 @@ static inline void wake_oom_reaper(struct task_struct *tsk) */ static void mark_oom_victim(struct task_struct *tsk) { - struct mm_struct *mm = tsk->mm; - WARN_ON(oom_killer_disabled); /* OOM killer might race with memcg OOM */ if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE)) return; /* oom_mm is bound to the signal struct life time. */ - if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) { - atomic_inc(&tsk->signal->oom_mm->mm_count); - set_bit(MMF_OOM_VICTIM, &mm->flags); - } + __mark_oom_victim(tsk); /* * Make sure that the task is woken up from uninterruptible sleep @@ -1094,3 +1103,22 @@ void pagefault_out_of_memory(void) if (__ratelimit(&pfoom_rs)) pr_warn("Huh VM_FAULT_OOM leaked out to the #PF handler. Retrying PF\n"); } + +void add_to_oom_reaper(struct task_struct *p) + __releases(p->alloc_lock) +{ + if (!sysctl_reap_mem_on_sigkill) + return; + + p = find_lock_task_mm(p); + if (!p) + return; + + get_task_struct(p); + if (task_will_free_mem(p)) { + __mark_oom_victim(p); + wake_oom_reaper(p); + } + task_unlock(p); + put_task_struct(p); +} diff --git a/mm/page-writeback.c b/mm/page-writeback.c index dedc1fac8f35..f3bfef0b3394 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2066,7 +2066,7 @@ static int page_writeback_cpu_online(unsigned int cpu) * However, that was when we used "dirty_ratio" to scale with * all memory, and we don't do that any more. "dirty_ratio" * is now applied to total non-HIGHPAGE memory (by subtracting - * totalhigh_pages from vm_total_pages), and as such we can't + * totalhigh_pages() from vm_total_pages), and as such we can't * get into the old insane situation any more where we had * large amounts of dirty pages compared to a small amount of * non-HIGHMEM memory. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 532fa53c1316..fcb17aa08248 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -121,7 +122,8 @@ EXPORT_SYMBOL(node_states); /* Protect totalram_pages and zone->managed_pages */ static DEFINE_SPINLOCK(managed_page_count_lock); -unsigned long totalram_pages __read_mostly; +atomic_long_t _totalram_pages __read_mostly; +EXPORT_SYMBOL(_totalram_pages); unsigned long totalreserve_pages __read_mostly; unsigned long totalcma_pages __read_mostly; @@ -4375,11 +4377,11 @@ EXPORT_SYMBOL_GPL(si_mem_available); void si_meminfo(struct sysinfo *val) { - val->totalram = totalram_pages; + val->totalram = totalram_pages(); val->sharedram = global_node_page_state(NR_SHMEM); val->freeram = global_page_state(NR_FREE_PAGES); val->bufferram = nr_blockdev_pages(); - val->totalhigh = totalhigh_pages; + val->totalhigh = totalhigh_pages(); val->freehigh = nr_free_highpages(); val->mem_unit = PAGE_SIZE; } @@ -6641,10 +6643,10 @@ void adjust_managed_page_count(struct page *page, long count) { spin_lock(&managed_page_count_lock); page_zone(page)->managed_pages += count; - totalram_pages += count; + totalram_pages_add(count); #ifdef CONFIG_HIGHMEM if (PageHighMem(page)) - totalhigh_pages += count; + totalhigh_pages_add(count); #endif spin_unlock(&managed_page_count_lock); } @@ -6675,9 +6677,9 @@ EXPORT_SYMBOL(free_reserved_area); void free_highmem_page(struct page *page) { __free_reserved_page(page); - totalram_pages++; + totalram_pages_inc(); page_zone(page)->managed_pages++; - totalhigh_pages++; + totalhigh_pages_inc(); } #endif @@ -6726,10 +6728,10 @@ void __init mem_init_print_info(const char *str) physpages << (PAGE_SHIFT - 10), codesize >> 10, datasize >> 10, rosize >> 10, (init_data_size + init_code_size) >> 10, bss_size >> 10, - (physpages - totalram_pages - totalcma_pages) << (PAGE_SHIFT - 10), + (physpages - totalram_pages() - totalcma_pages) << (PAGE_SHIFT - 10), totalcma_pages << (PAGE_SHIFT - 10), #ifdef CONFIG_HIGHMEM - totalhigh_pages << (PAGE_SHIFT - 10), + totalhigh_pages() << (PAGE_SHIFT - 10), #endif str ? ", " : "", str ? str : ""); } diff --git a/mm/shmem.c b/mm/shmem.c index a612d765dc59..a06511d6ceaa 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -101,12 +101,12 @@ struct shmem_falloc { #ifdef CONFIG_TMPFS static unsigned long shmem_default_max_blocks(void) { - return totalram_pages / 2; + return totalram_pages() / 2; } static unsigned long shmem_default_max_inodes(void) { - return min(totalram_pages - totalhigh_pages, totalram_pages / 2); + return min(totalram_pages() - totalhigh_pages(), totalram_pages() / 2); } #endif @@ -3380,7 +3380,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, size = memparse(value,&rest); if (*rest == '%') { size <<= PAGE_SHIFT; - size *= totalram_pages; + size *= totalram_pages(); do_div(size, 100); rest++; } diff --git a/mm/slab.c b/mm/slab.c index a671328e0610..89fe1cf2d241 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1256,7 +1256,7 @@ void __init kmem_cache_init(void) * page orders on machines with more than 32MB of memory if * not overridden on the command line. */ - if (!slab_max_order_set && totalram_pages > (32 << 20) >> PAGE_SHIFT) + if (!slab_max_order_set && totalram_pages() > (32 << 20) >> PAGE_SHIFT) slab_max_order = SLAB_MAX_ORDER_HI; /* Bootstrap is tricky, because several objects are allocated diff --git a/mm/swap.c b/mm/swap.c index 0b01f9d7c1e5..0de718722f8d 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -41,7 +41,7 @@ #include /* How many pages do we try to swap or page in/out together? */ -int page_cluster; +int page_cluster = 0; static DEFINE_PER_CPU(struct pagevec, lru_add_pvec); static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); @@ -810,7 +810,10 @@ EXPORT_SYMBOL(release_pages); */ void __pagevec_release(struct pagevec *pvec) { - lru_add_drain(); + if (!pvec->drained) { + lru_add_drain(); + pvec->drained = true; + } release_pages(pvec->pages, pagevec_count(pvec), pvec->cold); pagevec_reinit(pvec); } @@ -981,19 +984,6 @@ EXPORT_SYMBOL(pagevec_lookup_range_nr_tag); */ void __init swap_setup(void) { - unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT); -#ifdef CONFIG_SWAP - int i; - - for (i = 0; i < MAX_SWAPFILES; i++) - spin_lock_init(&swapper_spaces[i].tree_lock); -#endif - - /* Use a smaller cluster for small-memory machines */ - if (megs < 16) - page_cluster = 2; - else - page_cluster = 3; /* * Right now other parts of the system means that we * _really_ don't want to cluster much more diff --git a/mm/swap_ratio.c b/mm/swap_ratio.c index 4ca5783af413..3e1f45ac556f 100644 --- a/mm/swap_ratio.c +++ b/mm/swap_ratio.c @@ -17,8 +17,9 @@ #define SWAP_RATIO_GROUP_START (SWAP_FLAG_PRIO_MASK - 9) /* 32758 */ #define SWAP_RATIO_GROUP_END (SWAP_FLAG_PRIO_MASK) /* 32767 */ -#define SWAP_FAST_WRITES (SWAPFILE_CLUSTER * (SWAP_CLUSTER_MAX / 8)) -#define SWAP_SLOW_WRITES SWAPFILE_CLUSTER +#define SWAP_FAST_WRITES \ + ((SWAPFILE_CLUSTER * (SWAP_CLUSTER_MAX / 8)) / SWAP_BATCH) +#define SWAP_SLOW_WRITES (SWAPFILE_CLUSTER / SWAP_BATCH) /* * The fast/slow swap write ratio. diff --git a/mm/swap_slots.c b/mm/swap_slots.c new file mode 100644 index 000000000000..7376d2ffb2db --- /dev/null +++ b/mm/swap_slots.c @@ -0,0 +1,345 @@ +/* + * Manage cache of swap slots to be used for and returned from + * swap. + * + * Copyright(c) 2016 Intel Corporation. + * + * Author: Tim Chen + * + * We allocate the swap slots from the global pool and put + * it into local per cpu caches. This has the advantage + * of no needing to acquire the swap_info lock every time + * we need a new slot. + * + * There is also opportunity to simply return the slot + * to local caches without needing to acquire swap_info + * lock. We do not reuse the returned slots directly but + * move them back to the global pool in a batch. This + * allows the slots to coaellesce and reduce fragmentation. + * + * The swap entry allocated is marked with SWAP_HAS_CACHE + * flag in map_count that prevents it from being allocated + * again from the global pool. + * + * The swap slots cache is protected by a mutex instead of + * a spin lock as when we search for slots with scan_swap_map, + * we can possibly sleep. + */ + +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_SWAP + +static DEFINE_PER_CPU(struct swap_slots_cache, swp_slots); +static bool swap_slot_cache_active; +bool swap_slot_cache_enabled; +static bool swap_slot_cache_initialized; +DEFINE_MUTEX(swap_slots_cache_mutex); +/* Serialize swap slots cache enable/disable operations */ +DEFINE_MUTEX(swap_slots_cache_enable_mutex); + +static void __drain_swap_slots_cache(unsigned int type); +static void deactivate_swap_slots_cache(void); +static void reactivate_swap_slots_cache(void); + +#define use_swap_slot_cache (swap_slot_cache_active && \ + swap_slot_cache_enabled && swap_slot_cache_initialized) +#define SLOTS_CACHE 0x1 +#define SLOTS_CACHE_RET 0x2 + +static void deactivate_swap_slots_cache(void) +{ + mutex_lock(&swap_slots_cache_mutex); + swap_slot_cache_active = false; + __drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET); + mutex_unlock(&swap_slots_cache_mutex); +} + +static void reactivate_swap_slots_cache(void) +{ + mutex_lock(&swap_slots_cache_mutex); + swap_slot_cache_active = true; + mutex_unlock(&swap_slots_cache_mutex); +} + +/* Must not be called with cpu hot plug lock */ +void disable_swap_slots_cache_lock(void) +{ + mutex_lock(&swap_slots_cache_enable_mutex); + swap_slot_cache_enabled = false; + if (swap_slot_cache_initialized) { + /* serialize with cpu hotplug operations */ + get_online_cpus(); + __drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET); + put_online_cpus(); + } +} + +static void __reenable_swap_slots_cache(void) +{ + swap_slot_cache_enabled = has_usable_swap(); +} + +void reenable_swap_slots_cache_unlock(void) +{ + __reenable_swap_slots_cache(); + mutex_unlock(&swap_slots_cache_enable_mutex); +} + +static bool check_cache_active(void) +{ + long pages; + + if (!swap_slot_cache_enabled || !swap_slot_cache_initialized) + return false; + + pages = get_nr_swap_pages(); + if (!swap_slot_cache_active) { + if (pages > num_online_cpus() * + THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE) + reactivate_swap_slots_cache(); + goto out; + } + + /* if global pool of slot caches too low, deactivate cache */ + if (pages < num_online_cpus() * THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE) + deactivate_swap_slots_cache(); +out: + return swap_slot_cache_active; +} + +static int alloc_swap_slot_cache(unsigned int cpu) +{ + struct swap_slots_cache *cache; + swp_entry_t *slots, *slots_ret; + + /* + * Do allocation outside swap_slots_cache_mutex + * as kvzalloc could trigger reclaim and get_swap_page, + * which can lock swap_slots_cache_mutex. + */ + slots = kvzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE, + GFP_KERNEL); + if (!slots) + return -ENOMEM; + + slots_ret = kvzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE, + GFP_KERNEL); + if (!slots_ret) { + kvfree(slots); + return -ENOMEM; + } + + mutex_lock(&swap_slots_cache_mutex); + cache = &per_cpu(swp_slots, cpu); + if (cache->slots || cache->slots_ret) + /* cache already allocated */ + goto out; + if (!cache->lock_initialized) { + mutex_init(&cache->alloc_lock); + spin_lock_init(&cache->free_lock); + cache->lock_initialized = true; + } + cache->nr = 0; + cache->cur = 0; + cache->n_ret = 0; + cache->slots = slots; + slots = NULL; + cache->slots_ret = slots_ret; + slots_ret = NULL; +out: + mutex_unlock(&swap_slots_cache_mutex); + if (slots) + kvfree(slots); + if (slots_ret) + kvfree(slots_ret); + return 0; +} + +static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type, + bool free_slots) +{ + struct swap_slots_cache *cache; + swp_entry_t *slots = NULL; + + cache = &per_cpu(swp_slots, cpu); + if ((type & SLOTS_CACHE) && cache->slots) { + mutex_lock(&cache->alloc_lock); + swapcache_free_entries(cache->slots + cache->cur, cache->nr); + cache->cur = 0; + cache->nr = 0; + if (free_slots && cache->slots) { + kvfree(cache->slots); + cache->slots = NULL; + } + mutex_unlock(&cache->alloc_lock); + } + if ((type & SLOTS_CACHE_RET) && cache->slots_ret) { + spin_lock_irq(&cache->free_lock); + swapcache_free_entries(cache->slots_ret, cache->n_ret); + cache->n_ret = 0; + if (free_slots && cache->slots_ret) { + slots = cache->slots_ret; + cache->slots_ret = NULL; + } + spin_unlock_irq(&cache->free_lock); + if (slots) + kvfree(slots); + } +} + +static void __drain_swap_slots_cache(unsigned int type) +{ + unsigned int cpu; + + /* + * This function is called during + * 1) swapoff, when we have to make sure no + * left over slots are in cache when we remove + * a swap device; + * 2) disabling of swap slot cache, when we run low + * on swap slots when allocating memory and need + * to return swap slots to global pool. + * + * We cannot acquire cpu hot plug lock here as + * this function can be invoked in the cpu + * hot plug path: + * cpu_up -> lock cpu_hotplug -> cpu hotplug state callback + * -> memory allocation -> direct reclaim -> get_swap_page + * -> drain_swap_slots_cache + * + * Hence the loop over current online cpu below could miss cpu that + * is being brought online but not yet marked as online. + * That is okay as we do not schedule and run anything on a + * cpu before it has been marked online. Hence, we will not + * fill any swap slots in slots cache of such cpu. + * There are no slots on such cpu that need to be drained. + */ + for_each_online_cpu(cpu) + drain_slots_cache_cpu(cpu, type, false); +} + +static int free_slot_cache(unsigned int cpu) +{ + mutex_lock(&swap_slots_cache_mutex); + drain_slots_cache_cpu(cpu, SLOTS_CACHE | SLOTS_CACHE_RET, true); + mutex_unlock(&swap_slots_cache_mutex); + return 0; +} + +int enable_swap_slots_cache(void) +{ + int ret = 0; + + mutex_lock(&swap_slots_cache_enable_mutex); + if (swap_slot_cache_initialized) { + __reenable_swap_slots_cache(); + goto out_unlock; + } + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "swap_slots_cache", + alloc_swap_slot_cache, free_slot_cache); + if (ret < 0) + goto out_unlock; + swap_slot_cache_initialized = true; + __reenable_swap_slots_cache(); +out_unlock: + mutex_unlock(&swap_slots_cache_enable_mutex); + return 0; +} + +/* called with swap slot cache's alloc lock held */ +static int refill_swap_slots_cache(struct swap_slots_cache *cache) +{ + if (!use_swap_slot_cache || cache->nr) + return 0; + + cache->cur = 0; + if (swap_slot_cache_active) + cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE, cache->slots); + + return cache->nr; +} + +int free_swap_slot(swp_entry_t entry) +{ + struct swap_slots_cache *cache; + + BUG_ON(!swap_slot_cache_initialized); + + cache = &get_cpu_var(swp_slots); + if (use_swap_slot_cache && cache->slots_ret) { + spin_lock_irq(&cache->free_lock); + /* Swap slots cache may be deactivated before acquiring lock */ + if (!use_swap_slot_cache) { + spin_unlock_irq(&cache->free_lock); + goto direct_free; + } + if (cache->n_ret >= SWAP_SLOTS_CACHE_SIZE) { + /* + * Return slots to global pool. + * The current swap_map value is SWAP_HAS_CACHE. + * Set it to 0 to indicate it is available for + * allocation in global pool + */ + swapcache_free_entries(cache->slots_ret, cache->n_ret); + cache->n_ret = 0; + } + cache->slots_ret[cache->n_ret++] = entry; + spin_unlock_irq(&cache->free_lock); + } else { +direct_free: + swapcache_free_entries(&entry, 1); + } + put_cpu_var(swp_slots); + + return 0; +} + +swp_entry_t get_swap_page(void) +{ + swp_entry_t entry, *pentry; + struct swap_slots_cache *cache; + + /* + * Preemption is allowed here, because we may sleep + * in refill_swap_slots_cache(). But it is safe, because + * accesses to the per-CPU data structure are protected by the + * mutex cache->alloc_lock. + * + * The alloc path here does not touch cache->slots_ret + * so cache->free_lock is not taken. + */ + cache = raw_cpu_ptr(&swp_slots); + + entry.val = 0; + if (check_cache_active()) { + mutex_lock(&cache->alloc_lock); + if (cache->slots) { +repeat: + if (cache->nr) { + pentry = &cache->slots[cache->cur++]; + entry = *pentry; + pentry->val = 0; + cache->nr--; + } else { + if (refill_swap_slots_cache(cache)) + goto repeat; + } + } + mutex_unlock(&cache->alloc_lock); + if (entry.val) + return entry; + } + + get_swap_pages(1, &entry); + + return entry; +} + +#endif /* CONFIG_SWAP */ diff --git a/mm/swap_state.c b/mm/swap_state.c index b6818adcc248..3a04230ca0d7 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include #include "internal.h" @@ -33,15 +35,8 @@ static const struct address_space_operations swap_aops = { #endif }; -struct address_space swapper_spaces[MAX_SWAPFILES] = { - [0 ... MAX_SWAPFILES - 1] = { - .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), - .i_mmap_writable = ATOMIC_INIT(0), - .a_ops = &swap_aops, - /* swap cache doesn't use writeback related tags */ - .flags = 1 << AS_NO_WRITEBACK_TAGS, - } -}; +struct address_space *swapper_spaces[MAX_SWAPFILES]; +static unsigned int nr_swapper_spaces[MAX_SWAPFILES]; #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) @@ -54,11 +49,26 @@ static struct { unsigned long total_swapcache_pages(void) { - int i; + unsigned int i, j, nr; unsigned long ret = 0; + struct address_space *spaces; - for (i = 0; i < MAX_SWAPFILES; i++) - ret += swapper_spaces[i].nrpages; + rcu_read_lock(); + for (i = 0; i < MAX_SWAPFILES; i++) { + /* + * The corresponding entries in nr_swapper_spaces and + * swapper_spaces will be reused only after at least + * one grace period. So it is impossible for them + * belongs to different usage. + */ + nr = nr_swapper_spaces[i]; + spaces = rcu_dereference(swapper_spaces[i]); + if (!nr || !spaces) + continue; + for (j = 0; j < nr; j++) + ret += spaces[j].nrpages; + } + rcu_read_unlock(); return ret; } @@ -315,6 +325,17 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, if (found_page) break; + /* + * Just skip read ahead for unused swap slot. + * During swap_off when swap_slot_cache is disabled, + * we have to handle the race between putting + * swap entry in swap cache and marking swap slot + * as SWAP_HAS_CACHE. That's done in later part of code or + * else swap_off will be aborted if we return NULL. + */ + if (!__swp_swapcount(entry) && swap_slot_cache_enabled) + break; + /* * Get a new page to read into from swap. */ @@ -507,3 +528,38 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, skip: return read_swap_cache_async(entry, gfp_mask, vma, addr); } + +int init_swap_address_space(unsigned int type, unsigned long nr_pages) +{ + struct address_space *spaces, *space; + unsigned int i, nr; + + nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES); + spaces = kvzalloc(sizeof(struct address_space) * nr, GFP_KERNEL); + if (!spaces) + return -ENOMEM; + for (i = 0; i < nr; i++) { + space = spaces + i; + INIT_RADIX_TREE(&space->page_tree, GFP_ATOMIC|__GFP_NOWARN); + atomic_set(&space->i_mmap_writable, 0); + space->a_ops = &swap_aops; + /* swap cache doesn't use writeback related tags */ + mapping_set_no_writeback_tags(space); + spin_lock_init(&space->tree_lock); + } + nr_swapper_spaces[type] = nr; + rcu_assign_pointer(swapper_spaces[type], spaces); + + return 0; +} + +void exit_swap_address_space(unsigned int type) +{ + struct address_space *spaces; + + spaces = swapper_spaces[type]; + nr_swapper_spaces[type] = 0; + rcu_assign_pointer(swapper_spaces[type], NULL); + synchronize_rcu(); + kvfree(spaces); +} diff --git a/mm/swapfile.c b/mm/swapfile.c index 2c6c23deeb08..1b775a4e836b 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -276,6 +277,47 @@ static inline void cluster_set_null(struct swap_cluster_info *info) info->data = 0; } +static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si, + unsigned long offset) +{ + struct swap_cluster_info *ci; + + ci = si->cluster_info; + if (ci) { + ci += offset / SWAPFILE_CLUSTER; + spin_lock(&ci->lock); + } + return ci; +} + +static inline void unlock_cluster(struct swap_cluster_info *ci) +{ + if (ci) + spin_unlock(&ci->lock); +} + +static inline struct swap_cluster_info *lock_cluster_or_swap_info( + struct swap_info_struct *si, + unsigned long offset) +{ + struct swap_cluster_info *ci; + + ci = lock_cluster(si, offset); + if (!ci) + spin_lock(&si->lock); + + return ci; +} + +static inline void unlock_cluster_or_swap_info(struct swap_info_struct *si, + struct swap_cluster_info *ci) +{ + if (ci) + unlock_cluster(ci); + else + spin_unlock(&si->lock); +} + static inline bool cluster_list_empty(struct swap_cluster_list *list) { return cluster_is_null(&list->head); @@ -300,9 +342,17 @@ static void cluster_list_add_tail(struct swap_cluster_list *list, cluster_set_next_flag(&list->head, idx, 0); cluster_set_next_flag(&list->tail, idx, 0); } else { + struct swap_cluster_info *ci_tail; unsigned int tail = cluster_next(&list->tail); - cluster_set_next(&ci[tail], idx); + /* + * Nested cluster lock, but both cluster locks are + * only acquired when we held swap_info_struct->lock + */ + ci_tail = ci + tail; + spin_lock_nested(&ci_tail->lock, SINGLE_DEPTH_NESTING); + cluster_set_next(ci_tail, idx); + unlock_cluster(ci_tail); cluster_set_next_flag(&list->tail, idx, 0); } } @@ -347,7 +397,7 @@ static void swap_cluster_schedule_discard(struct swap_info_struct *si, */ static void swap_do_scheduled_discard(struct swap_info_struct *si) { - struct swap_cluster_info *info; + struct swap_cluster_info *info, *ci; unsigned int idx; info = si->cluster_info; @@ -360,10 +410,14 @@ static void swap_do_scheduled_discard(struct swap_info_struct *si) SWAPFILE_CLUSTER); spin_lock(&si->lock); - cluster_set_flag(&info[idx], CLUSTER_FLAG_FREE); + ci = lock_cluster(si, idx * SWAPFILE_CLUSTER); + cluster_set_flag(ci, CLUSTER_FLAG_FREE); + unlock_cluster(ci); cluster_list_add_tail(&si->free_clusters, info, idx); + ci = lock_cluster(si, idx * SWAPFILE_CLUSTER); memset(si->swap_map + idx * SWAPFILE_CLUSTER, 0, SWAPFILE_CLUSTER); + unlock_cluster(ci); } } @@ -462,12 +516,13 @@ scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si, * Try to get a swap entry from current cpu's swap entry pool (a cluster). This * might involve allocating a new cluster for current CPU too. */ -static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si, +static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si, unsigned long *offset, unsigned long *scan_base) { struct percpu_cluster *cluster; + struct swap_cluster_info *ci; bool found_free; - unsigned long tmp; + unsigned long tmp, max; new_cluster: cluster = this_cpu_ptr(si->percpu_cluster); @@ -485,7 +540,7 @@ static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si, *scan_base = *offset = si->cluster_next; goto new_cluster; } else - return; + return false; } found_free = false; @@ -495,14 +550,21 @@ static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si, * check if there is still free entry in the cluster */ tmp = cluster->next; - while (tmp < si->max && tmp < (cluster_next(&cluster->index) + 1) * - SWAPFILE_CLUSTER) { + max = min_t(unsigned long, si->max, + (cluster_next(&cluster->index) + 1) * SWAPFILE_CLUSTER); + if (tmp >= max) { + cluster_set_null(&cluster->index); + goto new_cluster; + } + ci = lock_cluster(si, tmp); + while (tmp < max) { if (!si->swap_map[tmp]) { found_free = true; break; } tmp++; } + unlock_cluster(ci); if (!found_free) { cluster_set_null(&cluster->index); goto new_cluster; @@ -510,15 +572,22 @@ static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si, cluster->next = tmp + 1; *offset = tmp; *scan_base = tmp; + return found_free; } -static unsigned long scan_swap_map(struct swap_info_struct *si, - unsigned char usage) +static int scan_swap_map_slots(struct swap_info_struct *si, + unsigned char usage, int nr, + swp_entry_t slots[]) { + struct swap_cluster_info *ci; unsigned long offset; unsigned long scan_base; unsigned long last_in_cluster = 0; int latency_ration = LATENCY_LIMIT; + int n_ret = 0; + + if (nr > SWAP_BATCH) + nr = SWAP_BATCH; /* * We try to cluster swap pages by allocating them sequentially @@ -536,8 +605,10 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, /* SSD algorithm */ if (si->cluster_info) { - scan_swap_map_try_ssd_cluster(si, &offset, &scan_base); - goto checks; + if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base)) + goto checks; + else + goto scan; } if (unlikely(!si->cluster_nr--)) { @@ -581,8 +652,14 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, checks: if (si->cluster_info) { - while (scan_swap_map_ssd_cluster_conflict(si, offset)) - scan_swap_map_try_ssd_cluster(si, &offset, &scan_base); + while (scan_swap_map_ssd_cluster_conflict(si, offset)) { + /* take a break if we already got some slots */ + if (n_ret) + goto done; + if (!scan_swap_map_try_ssd_cluster(si, &offset, + &scan_base)) + goto scan; + } } if (!(si->flags & SWP_WRITEOK)) goto no_page; @@ -591,9 +668,11 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, if (offset > si->highest_bit) scan_base = offset = si->lowest_bit; + ci = lock_cluster(si, offset); /* reuse swap entry of cache-only swap if not busy. */ if (vm_swap_full(si) && si->swap_map[offset] == SWAP_HAS_CACHE) { int swap_was_freed; + unlock_cluster(ci); spin_unlock(&si->lock); swap_was_freed = __try_to_reclaim_swap(si, offset); spin_lock(&si->lock); @@ -603,8 +682,16 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, goto scan; /* check next one */ } - if (si->swap_map[offset]) - goto scan; + if (si->swap_map[offset]) { + unlock_cluster(ci); + if (!n_ret) + goto scan; + else + goto done; + } + si->swap_map[offset] = usage; + inc_cluster_info_page(si, si->cluster_info, offset); + unlock_cluster(ci); if (offset == si->lowest_bit) si->lowest_bit++; @@ -618,12 +705,44 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, plist_del(&si->avail_list, &swap_avail_head); spin_unlock(&swap_avail_lock); } - si->swap_map[offset] = usage; - inc_cluster_info_page(si, si->cluster_info, offset); si->cluster_next = offset + 1; - si->flags -= SWP_SCANNING; + slots[n_ret++] = swp_entry(si->type, offset); + + /* got enough slots or reach max slots? */ + if ((n_ret == nr) || (offset >= si->highest_bit)) + goto done; + + /* search for next available slot */ + + /* time to take a break? */ + if (unlikely(--latency_ration < 0)) { + if (n_ret) + goto done; + spin_unlock(&si->lock); + cond_resched(); + spin_lock(&si->lock); + latency_ration = LATENCY_LIMIT; + } + + /* try to get more slots in cluster */ + if (si->cluster_info) { + if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base)) + goto checks; + else + goto done; + } + /* non-ssd case */ + ++offset; - return offset; + /* non-ssd case, still more slots in cluster? */ + if (si->cluster_nr && !si->swap_map[offset]) { + --si->cluster_nr; + goto checks; + } + +done: + si->flags -= SWP_SCANNING; + return n_ret; scan: spin_unlock(&si->lock); @@ -663,18 +782,42 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, no_page: si->flags -= SWP_SCANNING; - return 0; + return n_ret; +} + +static unsigned long scan_swap_map(struct swap_info_struct *si, + unsigned char usage) +{ + swp_entry_t entry; + int n_ret; + + n_ret = scan_swap_map_slots(si, usage, 1, &entry); + + if (n_ret) + return swp_offset(entry); + else + return 0; + } -swp_entry_t get_swap_page(void) +int get_swap_pages(int n_goal, swp_entry_t swp_entries[]) { struct swap_info_struct *si, *next; - pgoff_t offset; + long avail_pgs; + int n_ret = 0; int swap_ratio_off = 0; - if (atomic_long_read(&nr_swap_pages) <= 0) + avail_pgs = atomic_long_read(&nr_swap_pages); + if (avail_pgs <= 0) goto noswap; - atomic_long_dec(&nr_swap_pages); + + if (n_goal > SWAP_BATCH) + n_goal = SWAP_BATCH; + + if (n_goal > avail_pgs) + n_goal = avail_pgs; + + atomic_long_sub(n_goal, &nr_swap_pages); lock_and_start: spin_lock(&swap_avail_lock); @@ -720,14 +863,14 @@ swp_entry_t get_swap_page(void) spin_unlock(&si->lock); goto nextsi; } - - /* This is called for allocating swap entry for cache */ - offset = scan_swap_map(si, SWAP_HAS_CACHE); + n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE, + n_goal, swp_entries); spin_unlock(&si->lock); - if (offset) - return swp_entry(si->type, offset); + if (n_ret) + goto check_out; pr_debug("scan_swap_map of si %d failed to find offset\n", - si->type); + si->type); + spin_lock(&swap_avail_lock); nextsi: /* @@ -738,7 +881,8 @@ swp_entry_t get_swap_page(void) * up between us dropping swap_avail_lock and taking si->lock. * Since we dropped the swap_avail_lock, the swap_avail_head * list may have been modified; so if next is still in the - * swap_avail_head list then try it, otherwise start over. + * swap_avail_head list then try it, otherwise start over + * if we have not gotten any slots. */ if (plist_node_empty(&next->avail_list)) goto start_over; @@ -746,9 +890,11 @@ swp_entry_t get_swap_page(void) spin_unlock(&swap_avail_lock); - atomic_long_inc(&nr_swap_pages); +check_out: + if (n_ret < n_goal) + atomic_long_add((long) (n_goal-n_ret), &nr_swap_pages); noswap: - return (swp_entry_t) {0}; + return n_ret; } /* The only caller of this function is now suspend routine */ @@ -773,7 +919,7 @@ swp_entry_t get_swap_page_of_type(int type) return (swp_entry_t) {0}; } -static struct swap_info_struct *swap_info_get(swp_entry_t entry) +static struct swap_info_struct *__swap_info_get(swp_entry_t entry) { struct swap_info_struct *p; unsigned long offset, type; @@ -789,34 +935,76 @@ static struct swap_info_struct *swap_info_get(swp_entry_t entry) offset = swp_offset(entry); if (offset >= p->max) goto bad_offset; - if (!p->swap_map[offset]) - goto bad_free; - spin_lock(&p->lock); return p; -bad_free: - pr_err("swap_free: %s%08lx\n", Unused_offset, entry.val); - goto out; bad_offset: - pr_err("swap_free: %s%08lx\n", Bad_offset, entry.val); + pr_err("swap_info_get: %s%08lx\n", Bad_offset, entry.val); goto out; bad_device: - pr_err("swap_free: %s%08lx\n", Unused_file, entry.val); + pr_err("swap_info_get: %s%08lx\n", Unused_file, entry.val); goto out; bad_nofile: - pr_err("swap_free: %s%08lx\n", Bad_file, entry.val); + pr_err("swap_info_get: %s%08lx\n", Bad_file, entry.val); +out: + return NULL; +} + +static struct swap_info_struct *_swap_info_get(swp_entry_t entry) +{ + struct swap_info_struct *p; + + p = __swap_info_get(entry); + if (!p) + goto out; + if (!p->swap_map[swp_offset(entry)]) + goto bad_free; + return p; + +bad_free: + pr_err("swap_info_get: %s%08lx\n", Unused_offset, entry.val); + goto out; out: return NULL; } -static unsigned char swap_entry_free(struct swap_info_struct *p, - swp_entry_t entry, unsigned char usage) +static struct swap_info_struct *swap_info_get(swp_entry_t entry) +{ + struct swap_info_struct *p; + + p = _swap_info_get(entry); + if (p) + spin_lock(&p->lock); + return p; +} + +static struct swap_info_struct *swap_info_get_cont(swp_entry_t entry, + struct swap_info_struct *q) +{ + struct swap_info_struct *p; + + p = _swap_info_get(entry); + + if (p != q) { + if (q != NULL) + spin_unlock(&q->lock); + if (p != NULL) + spin_lock(&p->lock); + } + return p; +} + +static unsigned char __swap_entry_free(struct swap_info_struct *p, + swp_entry_t entry, unsigned char usage) { + struct swap_cluster_info *ci; unsigned long offset = swp_offset(entry); unsigned char count; unsigned char has_cache; + ci = lock_cluster_or_swap_info(p, offset); + count = p->swap_map[offset]; + has_cache = count & SWAP_HAS_CACHE; count &= ~SWAP_HAS_CACHE; @@ -840,38 +1028,52 @@ static unsigned char swap_entry_free(struct swap_info_struct *p, } usage = count | has_cache; - p->swap_map[offset] = usage; - - /* free if no reference */ - if (!usage) { - mem_cgroup_uncharge_swap(entry); - dec_cluster_info_page(p, p->cluster_info, offset); - if (offset < p->lowest_bit) - p->lowest_bit = offset; - if (offset > p->highest_bit) { - bool was_full = !p->highest_bit; - p->highest_bit = offset; - if (was_full && (p->flags & SWP_WRITEOK)) { - spin_lock(&swap_avail_lock); - WARN_ON(!plist_node_empty(&p->avail_list)); - if (plist_node_empty(&p->avail_list)) - plist_add(&p->avail_list, - &swap_avail_head); - spin_unlock(&swap_avail_lock); - } - } - atomic_long_inc(&nr_swap_pages); - p->inuse_pages--; - frontswap_invalidate_page(p->type, offset); - if (p->flags & SWP_BLKDEV) { - struct gendisk *disk = p->bdev->bd_disk; - if (disk->fops->swap_slot_free_notify) - disk->fops->swap_slot_free_notify(p->bdev, - offset); + p->swap_map[offset] = usage ? : SWAP_HAS_CACHE; + + unlock_cluster_or_swap_info(p, ci); + + return usage; +} + +static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry) +{ + struct swap_cluster_info *ci; + unsigned long offset = swp_offset(entry); + unsigned char count; + + ci = lock_cluster(p, offset); + count = p->swap_map[offset]; + VM_BUG_ON(count != SWAP_HAS_CACHE); + p->swap_map[offset] = 0; + dec_cluster_info_page(p, p->cluster_info, offset); + unlock_cluster(ci); + + mem_cgroup_uncharge_swap(entry); + if (offset < p->lowest_bit) + p->lowest_bit = offset; + if (offset > p->highest_bit) { + bool was_full = !p->highest_bit; + + p->highest_bit = offset; + if (was_full && (p->flags & SWP_WRITEOK)) { + spin_lock(&swap_avail_lock); + WARN_ON(!plist_node_empty(&p->avail_list)); + if (plist_node_empty(&p->avail_list)) + plist_add(&p->avail_list, + &swap_avail_head); + spin_unlock(&swap_avail_lock); } } + atomic_long_inc(&nr_swap_pages); + p->inuse_pages--; + frontswap_invalidate_page(p->type, offset); + if (p->flags & SWP_BLKDEV) { + struct gendisk *disk = p->bdev->bd_disk; - return usage; + if (disk->fops->swap_slot_free_notify) + disk->fops->swap_slot_free_notify(p->bdev, + offset); + } } /* @@ -882,10 +1084,10 @@ void swap_free(swp_entry_t entry) { struct swap_info_struct *p; - p = swap_info_get(entry); + p = _swap_info_get(entry); if (p) { - swap_entry_free(p, entry, 1); - spin_unlock(&p->lock); + if (!__swap_entry_free(p, entry, 1)) + free_swap_slot(entry); } } @@ -896,13 +1098,33 @@ void swapcache_free(swp_entry_t entry) { struct swap_info_struct *p; - p = swap_info_get(entry); + p = _swap_info_get(entry); if (p) { - swap_entry_free(p, entry, SWAP_HAS_CACHE); - spin_unlock(&p->lock); + if (!__swap_entry_free(p, entry, SWAP_HAS_CACHE)) + free_swap_slot(entry); } } +void swapcache_free_entries(swp_entry_t *entries, int n) +{ + struct swap_info_struct *p, *prev; + int i; + + if (n <= 0) + return; + + prev = NULL; + p = NULL; + for (i = 0; i < n; ++i) { + p = swap_info_get_cont(entries[i], prev); + if (p) + swap_entry_free(p, entries[i]); + prev = p; + } + if (p) + spin_unlock(&p->lock); +} + /* * How many references to page are currently swapped out? * This does not give an exact answer when swap count is continued, @@ -912,17 +1134,49 @@ int page_swapcount(struct page *page) { int count = 0; struct swap_info_struct *p; + struct swap_cluster_info *ci; swp_entry_t entry; + unsigned long offset; entry.val = page_private(page); - p = swap_info_get(entry); + p = _swap_info_get(entry); if (p) { - count = swap_count(p->swap_map[swp_offset(entry)]); - spin_unlock(&p->lock); + offset = swp_offset(entry); + ci = lock_cluster_or_swap_info(p, offset); + count = swap_count(p->swap_map[offset]); + unlock_cluster_or_swap_info(p, ci); } return count; } +static int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry) +{ + int count = 0; + pgoff_t offset = swp_offset(entry); + struct swap_cluster_info *ci; + + ci = lock_cluster_or_swap_info(si, offset); + count = swap_count(si->swap_map[offset]); + unlock_cluster_or_swap_info(si, ci); + return count; +} + +/* + * How many references to @entry are currently swapped out? + * This does not give an exact answer when swap count is continued, + * but does include the high COUNT_CONTINUED flag to allow for that. + */ +int __swp_swapcount(swp_entry_t entry) +{ + int count = 0; + struct swap_info_struct *si; + + si = __swap_info_get(entry); + if (si) + count = swap_swapcount(si, entry); + return count; +} + /* * How many references to @entry are currently swapped out? * This considers COUNT_CONTINUED so it returns exact answer. @@ -931,22 +1185,26 @@ int swp_swapcount(swp_entry_t entry) { int count, tmp_count, n; struct swap_info_struct *p; + struct swap_cluster_info *ci; struct page *page; pgoff_t offset; unsigned char *map; - p = swap_info_get(entry); + p = _swap_info_get(entry); if (!p) return 0; - count = swap_count(p->swap_map[swp_offset(entry)]); + offset = swp_offset(entry); + + ci = lock_cluster_or_swap_info(p, offset); + + count = swap_count(p->swap_map[offset]); if (!(count & COUNT_CONTINUED)) goto out; count &= ~COUNT_CONTINUED; n = SWAP_MAP_MAX + 1; - offset = swp_offset(entry); page = vmalloc_to_page(p->swap_map + offset); offset &= ~PAGE_MASK; VM_BUG_ON(page_private(page) != SWP_CONTINUED); @@ -961,7 +1219,7 @@ int swp_swapcount(swp_entry_t entry) n *= (SWAP_CONT_MAX + 1); } while (tmp_count & COUNT_CONTINUED); out: - spin_unlock(&p->lock); + unlock_cluster_or_swap_info(p, ci); return count; } @@ -1053,21 +1311,23 @@ int free_swap_and_cache(swp_entry_t entry) { struct swap_info_struct *p; struct page *page = NULL; + unsigned char count; if (non_swap_entry(entry)) return 1; - p = swap_info_get(entry); + p = _swap_info_get(entry); if (p) { - if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) { + count = __swap_entry_free(p, entry, 1); + if (count == SWAP_HAS_CACHE) { page = find_get_page(swap_address_space(entry), swp_offset(entry)); if (page && !trylock_page(page)) { put_page(page); page = NULL; } - } - spin_unlock(&p->lock); + } else if (!count) + free_swap_slot(entry); } if (page) { /* @@ -1075,7 +1335,8 @@ int free_swap_and_cache(swp_entry_t entry) * Also recheck PageSwapCache now page is locked (above). */ if (PageSwapCache(page) && !PageWriteback(page) && - (!page_mapped(page) || mem_cgroup_swap_full(page))) { + (!page_mapped(page) || mem_cgroup_swap_full(page)) && + !swap_swapcount(p, entry)) { delete_from_swap_cache(page); SetPageDirty(page); } @@ -1290,6 +1551,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, pmd = pmd_offset(pud, addr); do { + cond_resched(); next = pmd_addr_end(addr, end); if (pmd_none_or_trans_huge_or_clear_bad(pmd)) continue; @@ -1369,6 +1631,7 @@ static int unuse_mm(struct mm_struct *mm, for (vma = mm->mmap; vma; vma = vma->vm_next) { if (vma->anon_vma && (ret = unuse_vma(vma, entry, page))) break; + cond_resched(); } up_read(&mm->mmap_sem); return (ret < 0)? ret: 0; @@ -1406,15 +1669,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, prev = 0; i = 1; } - if (frontswap) { - if (frontswap_test(si, i)) - break; - else - continue; - } count = READ_ONCE(si->swap_map[i]); if (count && swap_count(count) != SWAP_MAP_BAD) - break; + if (!frontswap || frontswap_test(si, i)) + break; + if ((i % LATENCY_LIMIT) == 0) + cond_resched(); } return i; } @@ -1896,6 +2156,17 @@ static void reinsert_swap_info(struct swap_info_struct *p) spin_unlock(&swap_lock); } +bool has_usable_swap(void) +{ + bool ret = true; + + spin_lock(&swap_lock); + if (plist_head_empty(&swap_active_head)) + ret = false; + spin_unlock(&swap_lock); + return ret; +} + SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) { struct swap_info_struct *p = NULL; @@ -1966,6 +2237,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) spin_unlock(&p->lock); spin_unlock(&swap_lock); + disable_swap_slots_cache_lock(); + set_current_oom_origin(); err = try_to_unuse(p->type, false, 0); /* force unuse all pages */ clear_current_oom_origin(); @@ -1973,9 +2246,12 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) if (err) { /* re-insert swap space back into swap_list */ reinsert_swap_info(p); + reenable_swap_slots_cache_unlock(); goto out_dput; } + reenable_swap_slots_cache_unlock(); + flush_work(&p->discard_work); destroy_swap_extents(p); @@ -2014,10 +2290,11 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) free_percpu(p->percpu_cluster); p->percpu_cluster = NULL; vfree(swap_map); - vfree(cluster_info); - vfree(frontswap_map); + kvfree(cluster_info); + kvfree(frontswap_map); /* Destroy swap account information */ swap_cgroup_swapoff(p->type); + exit_swap_address_space(p->type); inode = mapping->host; if (S_ISBLK(inode->i_mode)) { @@ -2359,6 +2636,13 @@ static unsigned long read_swap_header(struct swap_info_struct *p, return maxpages; } +#define SWAP_CLUSTER_INFO_COLS \ + DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info)) +#define SWAP_CLUSTER_SPACE_COLS \ + DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER) +#define SWAP_CLUSTER_COLS \ + max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS) + static int setup_swap_map_and_extents(struct swap_info_struct *p, union swap_header *swap_header, unsigned char *swap_map, @@ -2366,11 +2650,12 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p, unsigned long maxpages, sector_t *span) { - int i; + unsigned int j, k; unsigned int nr_good_pages; int nr_extents; unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER); - unsigned long idx = p->cluster_next / SWAPFILE_CLUSTER; + unsigned long col = p->cluster_next / SWAPFILE_CLUSTER % SWAP_CLUSTER_COLS; + unsigned long i, idx; nr_good_pages = maxpages - 1; /* omit header page */ @@ -2418,15 +2703,23 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p, if (!cluster_info) return nr_extents; - for (i = 0; i < nr_clusters; i++) { - if (!cluster_count(&cluster_info[idx])) { + + /* + * Reduce false cache line sharing between cluster_info and + * sharing same address space. + */ + for (k = 0; k < SWAP_CLUSTER_COLS; k++) { + j = (k + col) % SWAP_CLUSTER_COLS; + for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) { + idx = i * SWAP_CLUSTER_COLS + j; + if (idx >= nr_clusters) + continue; + if (cluster_count(&cluster_info[idx])) + continue; cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE); cluster_list_add_tail(&p->free_clusters, cluster_info, idx); } - idx++; - if (idx == nr_clusters) - idx = 0; } return nr_extents; } @@ -2529,6 +2822,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) { int cpu; + unsigned long ci, nr_cluster; p->flags |= SWP_SOLIDSTATE; /* @@ -2536,13 +2830,18 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) * SSD */ p->cluster_next = 1 + (prandom_u32() % p->highest_bit); + nr_cluster = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER); - cluster_info = vzalloc(DIV_ROUND_UP(maxpages, - SWAPFILE_CLUSTER) * sizeof(*cluster_info)); + cluster_info = kvzalloc(nr_cluster * sizeof(*cluster_info), + GFP_KERNEL); if (!cluster_info) { error = -ENOMEM; goto bad_swap; } + + for (ci = 0; ci < nr_cluster; ci++) + spin_lock_init(&((cluster_info + ci)->lock)); + p->percpu_cluster = alloc_percpu(struct percpu_cluster); if (!p->percpu_cluster) { error = -ENOMEM; @@ -2567,7 +2866,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) } /* frontswap enabled? set up bit-per-page map for frontswap */ if (IS_ENABLED(CONFIG_FRONTSWAP)) - frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long)); + frontswap_map = kvzalloc(BITS_TO_LONGS(maxpages) * sizeof(long), + GFP_KERNEL); if (p->bdev &&(swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) { /* @@ -2602,6 +2902,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) if (p->bdev && blk_queue_fast(bdev_get_queue(p->bdev))) p->flags |= SWP_FAST; + error = init_swap_address_space(p->type, maxpages); + if (error) + goto bad_swap; + mutex_lock(&swapon_mutex); prio = -1; if (swap_flags & SWAP_FLAG_PREFER) { @@ -2642,7 +2946,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) p->flags = 0; spin_unlock(&swap_lock); vfree(swap_map); - vfree(cluster_info); + kvfree(cluster_info); + kvfree(frontswap_map); if (swap_file) { if (inode && S_ISREG(inode->i_mode)) { inode_unlock(inode); @@ -2659,6 +2964,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) putname(name); if (inode && S_ISREG(inode->i_mode)) inode_unlock(inode); + if (!error) + enable_swap_slots_cache(); return error; } @@ -2693,6 +3000,7 @@ void si_swapinfo(struct sysinfo *val) static int __swap_duplicate(swp_entry_t entry, unsigned char usage) { struct swap_info_struct *p; + struct swap_cluster_info *ci; unsigned long offset, type; unsigned char count; unsigned char has_cache; @@ -2706,10 +3014,10 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage) goto bad_file; p = swap_info[type]; offset = swp_offset(entry); - - spin_lock(&p->lock); if (unlikely(offset >= p->max)) - goto unlock_out; + goto out; + + ci = lock_cluster_or_swap_info(p, offset); count = p->swap_map[offset]; @@ -2752,7 +3060,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage) p->swap_map[offset] = count | has_cache; unlock_out: - spin_unlock(&p->lock); + unlock_cluster_or_swap_info(p, ci); out: return err; @@ -2841,6 +3149,7 @@ EXPORT_SYMBOL_GPL(__page_file_index); int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) { struct swap_info_struct *si; + struct swap_cluster_info *ci; struct page *head; struct page *page; struct page *list_page; @@ -2864,6 +3173,9 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) } offset = swp_offset(entry); + + ci = lock_cluster(si, offset); + count = si->swap_map[offset] & ~SWAP_HAS_CACHE; if ((count & ~COUNT_CONTINUED) != SWAP_MAP_MAX) { @@ -2876,6 +3188,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) } if (!page) { + unlock_cluster(ci); spin_unlock(&si->lock); return -ENOMEM; } @@ -2924,6 +3237,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) list_add_tail(&page->lru, &head->lru); page = NULL; /* now it's attached, don't free it */ out: + unlock_cluster(ci); spin_unlock(&si->lock); outer: if (page) @@ -2937,7 +3251,8 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) * into, carry if so, or else fail until a new continuation page is allocated; * when the original swap_map count is decremented from 0 with continuation, * borrow from the continuation and report whether it still holds more. - * Called while __swap_duplicate() or swap_entry_free() holds swap_lock. + * Called while __swap_duplicate() or swap_entry_free() holds swap or cluster + * lock. */ static bool swap_count_continued(struct swap_info_struct *si, pgoff_t offset, unsigned char count) diff --git a/mm/util.c b/mm/util.c index 734b2d0e4a49..571f4100be6a 100644 --- a/mm/util.c +++ b/mm/util.c @@ -381,6 +381,52 @@ unsigned long vm_mmap(struct file *file, unsigned long addr, } EXPORT_SYMBOL(vm_mmap); +/** + * kvmalloc_node - attempt to allocate physically contiguous memory, but upon + * failure, fall back to non-contiguous (vmalloc) allocation. + * @size: size of the request. + * @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL. + * @node: numa node to allocate from + * + * Uses kmalloc to get the memory but if the allocation fails then falls back + * to the vmalloc allocator. Use kvfree for freeing the memory. + * + * Reclaim modifiers - __GFP_NORETRY, __GFP_REPEAT and __GFP_NOFAIL are not supported + * + * Any use of gfp flags outside of GFP_KERNEL should be consulted with mm people. + */ +void *kvmalloc_node(size_t size, gfp_t flags, int node) +{ + gfp_t kmalloc_flags = flags; + void *ret; + + /* + * vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables) + * so the given set of flags has to be compatible. + */ + WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL); + + /* + * Make sure that larger requests are not too disruptive - no OOM + * killer and no allocation failure warnings as we have a fallback + */ + if (size > PAGE_SIZE) + kmalloc_flags |= __GFP_NORETRY | __GFP_NOWARN; + + ret = kmalloc_node(size, kmalloc_flags, node); + + /* + * It doesn't really make sense to fallback to vmalloc for sub page + * requests + */ + if (ret || size <= PAGE_SIZE) + return ret; + + return __vmalloc_node_flags_caller(size, node, flags, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(kvmalloc_node); + void kvfree(const void *addr) { if (is_vmalloc_addr(addr)) @@ -527,7 +573,7 @@ unsigned long vm_commit_limit(void) if (sysctl_overcommit_kbytes) allowed = sysctl_overcommit_kbytes >> (PAGE_SHIFT - 10); else - allowed = ((totalram_pages - hugetlb_total_pages()) + allowed = ((totalram_pages() - hugetlb_total_pages()) * sysctl_overcommit_ratio / 100); allowed += total_swap_pages; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 75081da0bf19..4fd41aa90b06 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -32,6 +33,7 @@ #include #include #include +#include #include #include @@ -50,12 +52,10 @@ static void __vunmap(const void *, int); static void free_work(struct work_struct *w) { struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq); - struct llist_node *llnode = llist_del_all(&p->list); - while (llnode) { - void *p = llnode; - llnode = llist_next(llnode); - __vunmap(p, 1); - } + struct llist_node *t, *llnode; + + llist_for_each_safe(llnode, t, llist_del_all(&p->list)) + __vunmap((void *)llnode, 1); } /*** Page table manipulation functions ***/ @@ -286,6 +286,10 @@ EXPORT_SYMBOL(vmalloc_to_pfn); /*** Global kva allocator ***/ +#define DEBUG_AUGMENT_PROPAGATE_CHECK 0 +#define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0 + +#define VM_LAZY_FREE 0x02 #define VM_VM_AREA 0x04 static DEFINE_SPINLOCK(vmap_area_lock); @@ -293,65 +297,67 @@ static DEFINE_SPINLOCK(vmap_area_lock); LIST_HEAD(vmap_area_list); static LLIST_HEAD(vmap_purge_list); static struct rb_root vmap_area_root = RB_ROOT; +static bool vmap_initialized __read_mostly; -/* The vmap cache globals are protected by vmap_area_lock */ -static struct rb_node *free_vmap_cache; -static unsigned long cached_hole_size; -static unsigned long cached_vstart; -static unsigned long cached_align; - -static unsigned long vmap_area_pcpu_hole; - -#ifdef CONFIG_ENABLE_VMALLOC_SAVING -#define POSSIBLE_VMALLOC_START PAGE_OFFSET - -#define VMALLOC_BITMAP_SIZE ((VMALLOC_END - PAGE_OFFSET) >> \ - PAGE_SHIFT) -#define VMALLOC_TO_BIT(addr) ((addr - PAGE_OFFSET) >> PAGE_SHIFT) -#define BIT_TO_VMALLOC(i) (PAGE_OFFSET + i * PAGE_SIZE) +/* + * This kmem_cache is used for vmap_area objects. Instead of + * allocating from slab we reuse an object from this cache to + * make things faster. Especially in "no edge" splitting of + * free block. + */ +static struct kmem_cache *vmap_area_cachep; -unsigned long total_vmalloc_size; -unsigned long vmalloc_reserved; +/* + * This linked list is used in pair with free_vmap_area_root. + * It gives O(1) access to prev/next to perform fast coalescing. + */ +static LIST_HEAD(free_vmap_area_list); -DECLARE_BITMAP(possible_areas, VMALLOC_BITMAP_SIZE); +/* + * This augment red-black tree represents the free vmap space. + * All vmap_area objects in this tree are sorted by va->va_start + * address. It is used for allocation and merging when a vmap + * object is released. + * + * Each vmap_area node contains a maximum available free block + * of its sub-tree, right or left. Therefore it is possible to + * find a lowest match of free area. + */ +static struct rb_root free_vmap_area_root = RB_ROOT; -void mark_vmalloc_reserved_area(void *x, unsigned long size) +static __always_inline unsigned long +va_size(struct vmap_area *va) { - unsigned long addr = (unsigned long)x; - - bitmap_set(possible_areas, VMALLOC_TO_BIT(addr), size >> PAGE_SHIFT); - vmalloc_reserved += size; + return (va->va_end - va->va_start); } -int is_vmalloc_addr(const void *x) +static __always_inline unsigned long +get_subtree_max_size(struct rb_node *node) { - unsigned long addr = (unsigned long)x; - - if (addr < POSSIBLE_VMALLOC_START || addr >= VMALLOC_END) - return 0; - - if (test_bit(VMALLOC_TO_BIT(addr), possible_areas)) - return 0; + struct vmap_area *va; - return 1; + va = rb_entry_safe(node, struct vmap_area, rb_node); + return va ? va->subtree_max_size : 0; } -static void calc_total_vmalloc_size(void) +/* + * Gets called when remove the node and rotate. + */ +static __always_inline unsigned long +compute_subtree_max_size(struct vmap_area *va) { - total_vmalloc_size = VMALLOC_END - POSSIBLE_VMALLOC_START - - vmalloc_reserved; + return max3(va_size(va), + get_subtree_max_size(va->rb_node.rb_left), + get_subtree_max_size(va->rb_node.rb_right)); } -#else -int is_vmalloc_addr(const void *x) -{ - unsigned long addr = (unsigned long)x; - return addr >= VMALLOC_START && addr < VMALLOC_END; -} +RB_DECLARE_CALLBACKS(static, free_vmap_area_rb_augment_cb, + struct vmap_area, rb_node, unsigned long, subtree_max_size, + compute_subtree_max_size) -static void calc_total_vmalloc_size(void) { } -#endif -EXPORT_SYMBOL(is_vmalloc_addr); +static void purge_vmap_area_lazy(void); +static BLOCKING_NOTIFIER_HEAD(vmap_notify_list); +static unsigned long lazy_max_pages(void); static atomic_long_t nr_vmalloc_pages; @@ -379,157 +385,706 @@ static struct vmap_area *__find_vmap_area(unsigned long addr) return NULL; } -static void __insert_vmap_area(struct vmap_area *va) -{ - struct rb_node **p = &vmap_area_root.rb_node; - struct rb_node *parent = NULL; - struct rb_node *tmp; +/* + * This function returns back addresses of parent node + * and its left or right link for further processing. + */ +static __always_inline struct rb_node ** +find_va_links(struct vmap_area *va, + struct rb_root *root, struct rb_node *from, + struct rb_node **parent) +{ + struct vmap_area *tmp_va; + struct rb_node **link; + + if (root) { + link = &root->rb_node; + if (unlikely(!*link)) { + *parent = NULL; + return link; + } + } else { + link = &from; + } - while (*p) { - struct vmap_area *tmp_va; + /* + * Go to the bottom of the tree. When we hit the last point + * we end up with parent rb_node and correct direction, i name + * it link, where the new va->rb_node will be attached to. + */ + do { + tmp_va = rb_entry(*link, struct vmap_area, rb_node); - parent = *p; - tmp_va = rb_entry(parent, struct vmap_area, rb_node); - if (va->va_start < tmp_va->va_end) - p = &(*p)->rb_left; - else if (va->va_end > tmp_va->va_start) - p = &(*p)->rb_right; + /* + * During the traversal we also do some sanity check. + * Trigger the BUG() if there are sides(left/right) + * or full overlaps. + */ + if (va->va_start < tmp_va->va_end && + va->va_end <= tmp_va->va_start) + link = &(*link)->rb_left; + else if (va->va_end > tmp_va->va_start && + va->va_start >= tmp_va->va_end) + link = &(*link)->rb_right; else BUG(); + } while (*link); + + *parent = &tmp_va->rb_node; + return link; +} + +static __always_inline struct list_head * +get_va_next_sibling(struct rb_node *parent, struct rb_node **link) +{ + struct list_head *list; + + if (unlikely(!parent)) + /* + * The red-black tree where we try to find VA neighbors + * before merging or inserting is empty, i.e. it means + * there is no free vmap space. Normally it does not + * happen but we handle this case anyway. + */ + return NULL; + + list = &rb_entry(parent, struct vmap_area, rb_node)->list; + return (&parent->rb_right == link ? list->next : list); +} + +static __always_inline void +link_va(struct vmap_area *va, struct rb_root *root, + struct rb_node *parent, struct rb_node **link, struct list_head *head) +{ + /* + * VA is still not in the list, but we can + * identify its future previous list_head node. + */ + if (likely(parent)) { + head = &rb_entry(parent, struct vmap_area, rb_node)->list; + if (&parent->rb_right != link) + head = head->prev; } - rb_link_node(&va->rb_node, parent, p); - rb_insert_color(&va->rb_node, &vmap_area_root); + /* Insert to the rb-tree */ + rb_link_node(&va->rb_node, parent, link); + if (root == &free_vmap_area_root) { + /* + * Some explanation here. Just perform simple insertion + * to the tree. We do not set va->subtree_max_size to + * its current size before calling rb_insert_augmented(). + * It is because of we populate the tree from the bottom + * to parent levels when the node _is_ in the tree. + * + * Therefore we set subtree_max_size to zero after insertion, + * to let __augment_tree_propagate_from() puts everything to + * the correct order later on. + */ + rb_insert_augmented(&va->rb_node, + root, &free_vmap_area_rb_augment_cb); + va->subtree_max_size = 0; + } else { + rb_insert_color(&va->rb_node, root); + } - /* address-sort this list */ - tmp = rb_prev(&va->rb_node); - if (tmp) { - struct vmap_area *prev; - prev = rb_entry(tmp, struct vmap_area, rb_node); - list_add_rcu(&va->list, &prev->list); - } else - list_add_rcu(&va->list, &vmap_area_list); + /* Address-sort this list */ + list_add(&va->list, head); } -static void purge_vmap_area_lazy(void); +static __always_inline void +unlink_va(struct vmap_area *va, struct rb_root *root) +{ + /* + * During merging a VA node can be empty, therefore + * not linked with the tree nor list. Just check it. + */ + if (!RB_EMPTY_NODE(&va->rb_node)) { + if (root == &free_vmap_area_root) + rb_erase_augmented(&va->rb_node, + root, &free_vmap_area_rb_augment_cb); + else + rb_erase(&va->rb_node, root); -static BLOCKING_NOTIFIER_HEAD(vmap_notify_list); + list_del(&va->list); + RB_CLEAR_NODE(&va->rb_node); + } +} + +#if DEBUG_AUGMENT_PROPAGATE_CHECK +static void +augment_tree_propagate_check(struct rb_node *n) +{ + struct vmap_area *va; + struct rb_node *node; + unsigned long size; + bool found = false; + + if (n == NULL) + return; + + va = rb_entry(n, struct vmap_area, rb_node); + size = va->subtree_max_size; + node = n; + + while (node) { + va = rb_entry(node, struct vmap_area, rb_node); + + if (get_subtree_max_size(node->rb_left) == size) { + node = node->rb_left; + } else { + if (va_size(va) == size) { + found = true; + break; + } + + node = node->rb_right; + } + } + + if (!found) { + va = rb_entry(n, struct vmap_area, rb_node); + pr_emerg("tree is corrupted: %lu, %lu\n", + va_size(va), va->subtree_max_size); + } + + augment_tree_propagate_check(n->rb_left); + augment_tree_propagate_check(n->rb_right); +} +#endif + +#if DEBUG_AUGMENT_PROPAGATE_CHECK +static void +augment_tree_propagate_check(struct rb_node *n) +{ + struct vmap_area *va; + struct rb_node *node; + unsigned long size; + bool found = false; + + if (n == NULL) + return; + + va = rb_entry(n, struct vmap_area, rb_node); + size = va->subtree_max_size; + node = n; + + while (node) { + va = rb_entry(node, struct vmap_area, rb_node); + + if (get_subtree_max_size(node->rb_left) == size) { + node = node->rb_left; + } else { + if (va_size(va) == size) { + found = true; + break; + } + + node = node->rb_right; + } + } + + if (!found) { + va = rb_entry(n, struct vmap_area, rb_node); + pr_emerg("tree is corrupted: %lu, %lu\n", + va_size(va), va->subtree_max_size); + } + + augment_tree_propagate_check(n->rb_left); + augment_tree_propagate_check(n->rb_right); +} +#endif /* - * Allocate a region of KVA of the specified size and alignment, within the - * vstart and vend. + * This function populates subtree_max_size from bottom to upper + * levels starting from VA point. The propagation must be done + * when VA size is modified by changing its va_start/va_end. Or + * in case of newly inserting of VA to the tree. + * + * It means that __augment_tree_propagate_from() must be called: + * - After VA has been inserted to the tree(free path); + * - After VA has been shrunk(allocation path); + * - After VA has been increased(merging path). + * + * Please note that, it does not mean that upper parent nodes + * and their subtree_max_size are recalculated all the time up + * to the root node. + * + * 4--8 + * /\ + * / \ + * / \ + * 2--2 8--8 + * + * For example if we modify the node 4, shrinking it to 2, then + * no any modification is required. If we shrink the node 2 to 1 + * its subtree_max_size is updated only, and set to 1. If we shrink + * the node 8 to 6, then its subtree_max_size is set to 6 and parent + * node becomes 4--6. */ -static struct vmap_area *alloc_vmap_area(unsigned long size, - unsigned long align, - unsigned long vstart, unsigned long vend, - int node, gfp_t gfp_mask) +static __always_inline void +augment_tree_propagate_from(struct vmap_area *va) { - struct vmap_area *va; - struct rb_node *n; - unsigned long addr; - int purged = 0; - struct vmap_area *first; + struct rb_node *node = &va->rb_node; + unsigned long new_va_sub_max_size; - BUG_ON(!size); - BUG_ON(offset_in_page(size)); - BUG_ON(!is_power_of_2(align)); + while (node) { + va = rb_entry(node, struct vmap_area, rb_node); + new_va_sub_max_size = compute_subtree_max_size(va); - might_sleep(); + /* + * If the newly calculated maximum available size of the + * subtree is equal to the current one, then it means that + * the tree is propagated correctly. So we have to stop at + * this point to save cycles. + */ + if (va->subtree_max_size == new_va_sub_max_size) + break; - va = kmalloc_node(sizeof(struct vmap_area), - gfp_mask & GFP_RECLAIM_MASK, node); - if (unlikely(!va)) - return ERR_PTR(-ENOMEM); + va->subtree_max_size = new_va_sub_max_size; + node = rb_parent(&va->rb_node); + } + +#if DEBUG_AUGMENT_PROPAGATE_CHECK + augment_tree_propagate_check(free_vmap_area_root.rb_node); +#endif +} + +static void +insert_vmap_area(struct vmap_area *va, + struct rb_root *root, struct list_head *head) +{ + struct rb_node **link; + struct rb_node *parent; + + link = find_va_links(va, root, NULL, &parent); + link_va(va, root, parent, link, head); +} + +static void +insert_vmap_area_augment(struct vmap_area *va, + struct rb_node *from, struct rb_root *root, + struct list_head *head) +{ + struct rb_node **link; + struct rb_node *parent; + + if (from) + link = find_va_links(va, NULL, from, &parent); + else + link = find_va_links(va, root, NULL, &parent); + + link_va(va, root, parent, link, head); + augment_tree_propagate_from(va); +} + +/* + * Merge de-allocated chunk of VA memory with previous + * and next free blocks. If coalesce is not done a new + * free area is inserted. If VA has been merged, it is + * freed. + */ +static __always_inline void +merge_or_add_vmap_area(struct vmap_area *va, + struct rb_root *root, struct list_head *head) +{ + struct vmap_area *sibling; + struct list_head *next; + struct rb_node **link; + struct rb_node *parent; + bool merged = false; /* - * Only scan the relevant parts containing pointers to other objects - * to avoid false negatives. + * Find a place in the tree where VA potentially will be + * inserted, unless it is merged with its sibling/siblings. */ - kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask & GFP_RECLAIM_MASK); + link = find_va_links(va, root, NULL, &parent); -retry: - spin_lock(&vmap_area_lock); /* - * Invalidate cache if we have more permissive parameters. - * cached_hole_size notes the largest hole noticed _below_ - * the vmap_area cached in free_vmap_cache: if size fits - * into that hole, we want to scan from vstart to reuse - * the hole instead of allocating above free_vmap_cache. - * Note that __free_vmap_area may update free_vmap_cache - * without updating cached_hole_size or cached_align. + * Get next node of VA to check if merging can be done. */ - if (!free_vmap_cache || - size < cached_hole_size || - vstart < cached_vstart || - align < cached_align) { -nocache: - cached_hole_size = 0; - free_vmap_cache = NULL; - } - /* record if we encounter less permissive parameters */ - cached_vstart = vstart; - cached_align = align; - - /* find starting point for our search */ - if (free_vmap_cache) { - first = rb_entry(free_vmap_cache, struct vmap_area, rb_node); - addr = ALIGN(first->va_end, align); - if (addr < vstart) - goto nocache; - if (addr + size < addr) - goto overflow; + next = get_va_next_sibling(parent, link); + if (unlikely(next == NULL)) + goto insert; - } else { - addr = ALIGN(vstart, align); - if (addr + size < addr) - goto overflow; + /* + * start end + * | | + * |<------VA------>|<-----Next----->| + * | | + * start end + */ + if (next != head) { + sibling = list_entry(next, struct vmap_area, list); + if (sibling->va_start == va->va_end) { + sibling->va_start = va->va_start; + + /* Check and update the tree if needed. */ + augment_tree_propagate_from(sibling); - n = vmap_area_root.rb_node; - first = NULL; + /* Remove this VA, it has been merged. */ + unlink_va(va, root); - while (n) { - struct vmap_area *tmp; - tmp = rb_entry(n, struct vmap_area, rb_node); - if (tmp->va_end >= addr) { - first = tmp; - if (tmp->va_start <= addr) + /* Free vmap_area object. */ + kmem_cache_free(vmap_area_cachep, va); + + /* Point to the new merged area. */ + va = sibling; + merged = true; + } + } + + /* + * start end + * | | + * |<-----Prev----->|<------VA------>| + * | | + * start end + */ + if (next->prev != head) { + sibling = list_entry(next->prev, struct vmap_area, list); + if (sibling->va_end == va->va_start) { + sibling->va_end = va->va_end; + + /* Check and update the tree if needed. */ + augment_tree_propagate_from(sibling); + + /* Remove this VA, it has been merged. */ + unlink_va(va, root); + + /* Free vmap_area object. */ + kmem_cache_free(vmap_area_cachep, va); + + return; + } + } + +insert: + if (!merged) { + link_va(va, root, parent, link, head); + augment_tree_propagate_from(va); + } + +#if DEBUG_AUGMENT_PROPAGATE_CHECK + augment_tree_propagate_check(free_vmap_area_root.rb_node); +#endif +} + +static __always_inline bool +is_within_this_va(struct vmap_area *va, unsigned long size, + unsigned long align, unsigned long vstart) +{ + unsigned long nva_start_addr; + + if (va->va_start > vstart) + nva_start_addr = ALIGN(va->va_start, align); + else + nva_start_addr = ALIGN(vstart, align); + + /* Can be overflowed due to big size or alignment. */ + if (nva_start_addr + size < nva_start_addr || + nva_start_addr < vstart) + return false; + + return (nva_start_addr + size <= va->va_end); +} + +/* + * Find the first free block(lowest start address) in the tree, + * that will accomplish the request corresponding to passing + * parameters. + */ +static __always_inline struct vmap_area * +find_vmap_lowest_match(unsigned long size, + unsigned long align, unsigned long vstart) +{ + struct vmap_area *va; + struct rb_node *node; + unsigned long length; + + /* Start from the root. */ + node = free_vmap_area_root.rb_node; + + /* Adjust the search size for alignment overhead. */ + length = size + align - 1; + + while (node) { + va = rb_entry(node, struct vmap_area, rb_node); + + if (get_subtree_max_size(node->rb_left) >= length && + vstart < va->va_start) { + node = node->rb_left; + } else { + if (is_within_this_va(va, size, align, vstart)) + return va; + + /* + * Does not make sense to go deeper towards the right + * sub-tree if it does not have a free block that is + * equal or bigger to the requested search length. + */ + if (get_subtree_max_size(node->rb_right) >= length) { + node = node->rb_right; + continue; + } + + /* + * OK. We roll back and find the fist right sub-tree, + * that will satisfy the search criteria. It can happen + * only once due to "vstart" restriction. + */ + while ((node = rb_parent(node))) { + va = rb_entry(node, struct vmap_area, rb_node); + if (is_within_this_va(va, size, align, vstart)) + return va; + + if (get_subtree_max_size(node->rb_right) >= length && + vstart <= va->va_start) { + node = node->rb_right; break; - n = n->rb_left; - } else - n = n->rb_right; + } + } } + } + + return NULL; +} + +#if DEBUG_AUGMENT_LOWEST_MATCH_CHECK +#include + +static struct vmap_area * +find_vmap_lowest_linear_match(unsigned long size, + unsigned long align, unsigned long vstart) +{ + struct vmap_area *va; + + list_for_each_entry(va, &free_vmap_area_list, list) { + if (!is_within_this_va(va, size, align, vstart)) + continue; + + return va; + } + + return NULL; +} + +static void +find_vmap_lowest_match_check(unsigned long size) +{ + struct vmap_area *va_1, *va_2; + unsigned long vstart; + unsigned int rnd; + + get_random_bytes(&rnd, sizeof(rnd)); + vstart = VMALLOC_START + rnd; + + va_1 = find_vmap_lowest_match(size, 1, vstart); + va_2 = find_vmap_lowest_linear_match(size, 1, vstart); + + if (va_1 != va_2) + pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx\n", + va_1, va_2, vstart); +} +#endif + +enum fit_type { + NOTHING_FIT = 0, + FL_FIT_TYPE = 1, /* full fit */ + LE_FIT_TYPE = 2, /* left edge fit */ + RE_FIT_TYPE = 3, /* right edge fit */ + NE_FIT_TYPE = 4 /* no edge fit */ +}; + +static __always_inline enum fit_type +classify_va_fit_type(struct vmap_area *va, + unsigned long nva_start_addr, unsigned long size) +{ + enum fit_type type; + + /* Check if it is within VA. */ + if (nva_start_addr < va->va_start || + nva_start_addr + size > va->va_end) + return NOTHING_FIT; + + /* Now classify. */ + if (va->va_start == nva_start_addr) { + if (va->va_end == nva_start_addr + size) + type = FL_FIT_TYPE; + else + type = LE_FIT_TYPE; + } else if (va->va_end == nva_start_addr + size) { + type = RE_FIT_TYPE; + } else { + type = NE_FIT_TYPE; + } + + return type; +} + +static __always_inline int +adjust_va_to_fit_type(struct vmap_area *va, + unsigned long nva_start_addr, unsigned long size, + enum fit_type type) +{ + struct vmap_area *lva; + + if (type == FL_FIT_TYPE) { + /* + * No need to split VA, it fully fits. + * + * | | + * V NVA V + * |---------------| + */ + unlink_va(va, &free_vmap_area_root); + kmem_cache_free(vmap_area_cachep, va); + } else if (type == LE_FIT_TYPE) { + /* + * Split left edge of fit VA. + * + * | | + * V NVA V R + * |-------|-------| + */ + va->va_start += size; + } else if (type == RE_FIT_TYPE) { + /* + * Split right edge of fit VA. + * + * | | + * L V NVA V + * |-------|-------| + */ + va->va_end = nva_start_addr; + } else if (type == NE_FIT_TYPE) { + /* + * Split no edge of fit VA. + * + * | | + * L V NVA V R + * |---|-------|---| + */ + lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT); + if (unlikely(!lva)) + return -1; + + /* + * Build the remainder. + */ + lva->va_start = va->va_start; + lva->va_end = nva_start_addr; + + /* + * Shrink this VA to remaining size. + */ + va->va_start = nva_start_addr + size; + } else { + return -1; + } + + if (type != FL_FIT_TYPE) { + augment_tree_propagate_from(va); + + if (type == NE_FIT_TYPE) + insert_vmap_area_augment(lva, &va->rb_node, + &free_vmap_area_root, &free_vmap_area_list); + } + + return 0; +} + +/* + * Returns a start address of the newly allocated area, if success. + * Otherwise a vend is returned that indicates failure. + */ +static __always_inline unsigned long +__alloc_vmap_area(unsigned long size, unsigned long align, + unsigned long vstart, unsigned long vend, int node) +{ + unsigned long nva_start_addr; + struct vmap_area *va; + enum fit_type type; + int ret; + + va = find_vmap_lowest_match(size, align, vstart); + if (unlikely(!va)) + return vend; + + if (va->va_start > vstart) + nva_start_addr = ALIGN(va->va_start, align); + else + nva_start_addr = ALIGN(vstart, align); + + /* Check the "vend" restriction. */ + if (nva_start_addr + size > vend) + return vend; + + /* Classify what we have found. */ + type = classify_va_fit_type(va, nva_start_addr, size); + if (WARN_ON_ONCE(type == NOTHING_FIT)) + return vend; + + /* Update the free vmap_area. */ + ret = adjust_va_to_fit_type(va, nva_start_addr, size, type); + if (ret) + return vend; + +#if DEBUG_AUGMENT_LOWEST_MATCH_CHECK + find_vmap_lowest_match_check(size); +#endif + + return nva_start_addr; +} + +/* + * Allocate a region of KVA of the specified size and alignment, within the + * vstart and vend. + */ +static struct vmap_area *alloc_vmap_area(unsigned long size, + unsigned long align, + unsigned long vstart, unsigned long vend, + int node, gfp_t gfp_mask) +{ + struct vmap_area *va; + unsigned long addr; + int purged = 0; + + BUG_ON(!size); + BUG_ON(offset_in_page(size)); + BUG_ON(!is_power_of_2(align)); - if (!first) - goto found; - } + if (unlikely(!vmap_initialized)) + return ERR_PTR(-EBUSY); - /* from the starting point, walk areas until a suitable hole is found */ - while (addr + size > first->va_start && addr + size <= vend) { - if (addr + cached_hole_size < first->va_start) - cached_hole_size = first->va_start - addr; - addr = ALIGN(first->va_end, align); - if (addr + size < addr) - goto overflow; + might_sleep(); - if (list_is_last(&first->list, &vmap_area_list)) - goto found; + va = kmem_cache_alloc_node(vmap_area_cachep, + gfp_mask & GFP_RECLAIM_MASK, node); + if (unlikely(!va)) + return ERR_PTR(-ENOMEM); - first = list_next_entry(first, list); - } + /* + * Only scan the relevant parts containing pointers to other objects + * to avoid false negatives. + */ + kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask & GFP_RECLAIM_MASK); + +retry: + spin_lock(&vmap_area_lock); -found: /* - * Check also calculated address against the vstart, - * because it can be 0 because of big align request. + * If an allocation fails, the "vend" address is + * returned. Therefore trigger the overflow path. */ - if (addr + size > vend || addr < vstart) + addr = __alloc_vmap_area(size, align, vstart, vend, node); + if (unlikely(addr == vend)) goto overflow; va->va_start = addr; va->va_end = addr + size; va->flags = 0; - __insert_vmap_area(va); - free_vmap_cache = &va->rb_node; + insert_vmap_area(va, &vmap_area_root, &vmap_area_list); + spin_unlock(&vmap_area_lock); BUG_ON(!IS_ALIGNED(va->va_start, align)); @@ -558,7 +1113,8 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, if (printk_ratelimit()) pr_warn("vmap allocation for size %lu failed: use vmalloc= to increase size\n", size); - kfree(va); + + kmem_cache_free(vmap_area_cachep, va); return ERR_PTR(-EBUSY); } @@ -578,35 +1134,16 @@ static void __free_vmap_area(struct vmap_area *va) { BUG_ON(RB_EMPTY_NODE(&va->rb_node)); - if (free_vmap_cache) { - if (va->va_end < cached_vstart) { - free_vmap_cache = NULL; - } else { - struct vmap_area *cache; - cache = rb_entry(free_vmap_cache, struct vmap_area, rb_node); - if (va->va_start <= cache->va_start) { - free_vmap_cache = rb_prev(&va->rb_node); - /* - * We don't try to update cached_hole_size or - * cached_align, but it won't go very wrong. - */ - } - } - } - rb_erase(&va->rb_node, &vmap_area_root); - RB_CLEAR_NODE(&va->rb_node); - list_del_rcu(&va->list); - /* - * Track the highest possible candidate for pcpu area - * allocation. Areas outside of vmalloc area can be returned - * here too, consider only end addresses which fall inside - * vmalloc area proper. + * Remove from the busy tree/list. */ - if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END) - vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end); + unlink_va(va, &vmap_area_root); - kfree_rcu(va, rcu_head); + /* + * Merge VA with its neighbors, otherwise just add it. + */ + merge_or_add_vmap_area(va, + &free_vmap_area_root, &free_vmap_area_list); } /* @@ -627,26 +1164,6 @@ static void unmap_vmap_area(struct vmap_area *va) vunmap_page_range(va->va_start, va->va_end); } -static void vmap_debug_free_range(unsigned long start, unsigned long end) -{ - /* - * Unmap page tables and force a TLB flush immediately if pagealloc - * debugging is enabled. This catches use after free bugs similarly to - * those in linear kernel virtual address space after a page has been - * freed. - * - * All the lazy freeing logic is still retained, in order to minimise - * intrusiveness of this debugging feature. - * - * This is going to be *slow* (linear kernel virtual address debugging - * doesn't do a broadcast TLB flush so it is a lot faster). - */ - if (debug_pagealloc_enabled()) { - vunmap_page_range(start, end); - flush_tlb_kernel_range(start, end); - } -} - /* * lazy_max_pages is the maximum amount of virtual address space we gather up * before attempting to purge with a TLB flush. @@ -672,7 +1189,7 @@ static unsigned long lazy_max_pages(void) return log * (32UL * 1024 * 1024 / PAGE_SIZE); } -static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); +static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0); /* * Serialize vmap purging. There is no actual criticial section protected @@ -690,7 +1207,7 @@ static void purge_fragmented_blocks_allcpus(void); */ void set_iounmap_nonlazy(void) { - atomic_set(&vmap_lazy_nr, lazy_max_pages()+1); + atomic_long_set(&vmap_lazy_nr, lazy_max_pages()+1); } /* @@ -698,34 +1215,40 @@ void set_iounmap_nonlazy(void) */ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end) { + unsigned long resched_threshold; struct llist_node *valist; struct vmap_area *va; struct vmap_area *n_va; - bool do_free = false; lockdep_assert_held(&vmap_purge_lock); valist = llist_del_all(&vmap_purge_list); + if (unlikely(valist == NULL)) + return false; + + /* + * TODO: to calculate a flush range without looping. + * The list can be up to lazy_max_pages() elements. + */ llist_for_each_entry(va, valist, purge_list) { if (va->va_start < start) start = va->va_start; if (va->va_end > end) end = va->va_end; - do_free = true; } - if (!do_free) - return false; - flush_tlb_kernel_range(start, end); + resched_threshold = lazy_max_pages() << 1; spin_lock(&vmap_area_lock); llist_for_each_entry_safe(va, n_va, valist, purge_list) { - int nr = (va->va_end - va->va_start) >> PAGE_SHIFT; + unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT; __free_vmap_area(va); - atomic_sub(nr, &vmap_lazy_nr); - cond_resched_lock(&vmap_area_lock); + atomic_long_sub(nr, &vmap_lazy_nr); + + if (atomic_long_read(&vmap_lazy_nr) < resched_threshold) + cond_resched_lock(&vmap_area_lock); } spin_unlock(&vmap_area_lock); return true; @@ -761,10 +1284,10 @@ static void purge_vmap_area_lazy(void) */ static void free_vmap_area_noflush(struct vmap_area *va) { - int nr_lazy; + unsigned long nr_lazy; - nr_lazy = atomic_add_return((va->va_end - va->va_start) >> PAGE_SHIFT, - &vmap_lazy_nr); + nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >> + PAGE_SHIFT, &vmap_lazy_nr); /* After this point, we may free va at any time */ llist_add(&va->purge_list, &vmap_purge_list); @@ -780,6 +1303,9 @@ static void free_unmap_vmap_area(struct vmap_area *va) { flush_cache_vunmap(va->va_start, va->va_end); unmap_vmap_area(va); + if (debug_pagealloc_enabled()) + flush_tlb_kernel_range(va->va_start, va->va_end); + free_vmap_area_noflush(va); } @@ -824,8 +1350,6 @@ static struct vmap_area *find_vmap_area(unsigned long addr) #define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE) -static bool vmap_initialized __read_mostly = false; - struct vmap_block_queue { spinlock_t lock; struct list_head free; @@ -1077,6 +1601,10 @@ static void vb_free(const void *addr, unsigned long size) vunmap_page_range((unsigned long)addr, (unsigned long)addr + size); + if (debug_pagealloc_enabled()) + flush_tlb_kernel_range((unsigned long)addr, + (unsigned long)addr + size); + spin_lock(&vb->lock); /* Expand dirty range */ @@ -1092,24 +1620,9 @@ static void vb_free(const void *addr, unsigned long size) spin_unlock(&vb->lock); } -/** - * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer - * - * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily - * to amortize TLB flushing overheads. What this means is that any page you - * have now, may, in a former life, have been mapped into kernel virtual - * address by the vmap layer and so there might be some CPUs with TLB entries - * still referencing that page (additional to the regular 1:1 kernel mapping). - * - * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can - * be sure that none of the pages we have control over will have any aliases - * from the vmap layer. - */ -void vm_unmap_aliases(void) +static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush) { - unsigned long start = ULONG_MAX, end = 0; int cpu; - int flush = 0; if (unlikely(!vmap_initialized)) return; @@ -1146,6 +1659,27 @@ void vm_unmap_aliases(void) flush_tlb_kernel_range(start, end); mutex_unlock(&vmap_purge_lock); } + +/** + * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer + * + * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily + * to amortize TLB flushing overheads. What this means is that any page you + * have now, may, in a former life, have been mapped into kernel virtual + * address by the vmap layer and so there might be some CPUs with TLB entries + * still referencing that page (additional to the regular 1:1 kernel mapping). + * + * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can + * be sure that none of the pages we have control over will have any aliases + * from the vmap layer. + */ +void vm_unmap_aliases(void) +{ + unsigned long start = ULONG_MAX, end = 0; + int flush = 0; + + _vm_unmap_aliases(start, end, flush); +} EXPORT_SYMBOL_GPL(vm_unmap_aliases); /** @@ -1165,16 +1699,16 @@ void vm_unmap_ram(const void *mem, unsigned int count) BUG_ON(addr > VMALLOC_END); BUG_ON(!PAGE_ALIGNED(addr)); - debug_check_no_locks_freed(mem, size); - vmap_debug_free_range(addr, addr+size); - if (likely(count <= VMAP_MAX_ALLOC)) { + debug_check_no_locks_freed(mem, size); vb_free(mem, size); return; } va = find_vmap_area(addr); BUG_ON(!va); + debug_check_no_locks_freed((void *)va->va_start, + (va->va_end - va->va_start)); free_unmap_vmap_area(va); } EXPORT_SYMBOL(vm_unmap_ram); @@ -1224,33 +1758,6 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro EXPORT_SYMBOL(vm_map_ram); static struct vm_struct *vmlist __initdata; - -/** - * vm_area_check_early - check if vmap area is already mapped - * @vm: vm_struct to be checked - * - * This function is used to check if the vmap area has been - * mapped already. @vm->addr, @vm->size and @vm->flags should - * contain proper values. - * - */ -int __init vm_area_check_early(struct vm_struct *vm) -{ - struct vm_struct *tmp, **p; - - BUG_ON(vmap_initialized); - for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) { - if (tmp->addr >= vm->addr) { - if (tmp->addr < vm->addr + vm->size) - return 1; - } else { - if (tmp->addr + tmp->size > vm->addr) - return 1; - } - } - return 0; -} - /** * vm_area_add_early - add vmap area early during boot * @vm: vm_struct to add @@ -1302,12 +1809,58 @@ void __init vm_area_register_early(struct vm_struct *vm, size_t align) vm_area_add_early(vm); } +static void vmap_init_free_space(void) +{ + unsigned long vmap_start = 1; + const unsigned long vmap_end = ULONG_MAX; + struct vmap_area *busy, *free; + + /* + * B F B B B F + * -|-----|.....|-----|-----|-----|.....|- + * | The KVA space | + * |<--------------------------------->| + */ + list_for_each_entry(busy, &vmap_area_list, list) { + if (busy->va_start - vmap_start > 0) { + free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT); + if (!WARN_ON_ONCE(!free)) { + free->va_start = vmap_start; + free->va_end = busy->va_start; + + insert_vmap_area_augment(free, NULL, + &free_vmap_area_root, + &free_vmap_area_list); + } + } + + vmap_start = busy->va_end; + } + + if (vmap_end - vmap_start > 0) { + free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT); + if (!WARN_ON_ONCE(!free)) { + free->va_start = vmap_start; + free->va_end = vmap_end; + + insert_vmap_area_augment(free, NULL, + &free_vmap_area_root, + &free_vmap_area_list); + } + } +} + void __init vmalloc_init(void) { struct vmap_area *va; struct vm_struct *tmp; int i; + /* + * Create the cache for vmap_area objects. + */ + vmap_area_cachep = KMEM_CACHE(vmap_area, SLAB_PANIC); + for_each_possible_cpu(i) { struct vmap_block_queue *vbq; struct vfree_deferred *p; @@ -1322,16 +1875,21 @@ void __init vmalloc_init(void) /* Import existing vmlist entries. */ for (tmp = vmlist; tmp; tmp = tmp->next) { - va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT); + va = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT); + if (WARN_ON_ONCE(!va)) + continue; + va->flags = VM_VM_AREA; va->va_start = (unsigned long)tmp->addr; va->va_end = va->va_start + tmp->size; va->vm = tmp; - __insert_vmap_area(va); + insert_vmap_area(va, &vmap_area_root, &vmap_area_list); } - vmap_area_pcpu_hole = VMALLOC_END; - calc_total_vmalloc_size(); + /* + * Now we can initialize a free vmap space. + */ + vmap_init_free_space(); vmap_initialized = true; } @@ -1558,9 +2116,9 @@ struct vm_struct *remove_vm_area(const void *addr) spin_lock(&vmap_area_lock); va->vm = NULL; va->flags &= ~VM_VM_AREA; + va->flags |= VM_LAZY_FREE; spin_unlock(&vmap_area_lock); - vmap_debug_free_range(va->va_start, va->va_end); kasan_free_shadow(vm); free_unmap_vmap_area(va); @@ -1569,6 +2127,72 @@ struct vm_struct *remove_vm_area(const void *addr) return NULL; } +static inline void set_area_direct_map(const struct vm_struct *area, + int (*set_direct_map)(struct page *page)) +{ + int i; + + for (i = 0; i < area->nr_pages; i++) + if (page_address(area->pages[i])) + set_direct_map(area->pages[i]); +} + +/* Handle removing and resetting vm mappings related to the vm_struct. */ +static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages) +{ + unsigned long addr = (unsigned long)area->addr; + unsigned long start = ULONG_MAX, end = 0; + int flush_reset = area->flags & VM_FLUSH_RESET_PERMS; + int i; + + /* + * The below block can be removed when all architectures that have + * direct map permissions also have set_direct_map_() implementations. + * This is concerned with resetting the direct map any an vm alias with + * execute permissions, without leaving a RW+X window. + */ + if (flush_reset && !IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) { + set_memory_nx(addr, area->nr_pages); + set_memory_rw(addr, area->nr_pages); + } + + remove_vm_area(area->addr); + + /* If this is not VM_FLUSH_RESET_PERMS memory, no need for the below. */ + if (!flush_reset) + return; + + /* + * If not deallocating pages, just do the flush of the VM area and + * return. + */ + if (!deallocate_pages) { + vm_unmap_aliases(); + return; + } + + /* + * If execution gets here, flush the vm mapping and reset the direct + * map. Find the start and end range of the direct mappings to make sure + * the vm_unmap_aliases() flush includes the direct map. + */ + for (i = 0; i < area->nr_pages; i++) { + if (page_address(area->pages[i])) { + start = min(addr, start); + end = max(addr, end); + } + } + + /* + * Set direct map to something invalid so that it won't be cached if + * there are any accesses after the TLB flush, then flush the TLB and + * reset the direct map permissions to the default. + */ + set_area_direct_map(area, set_direct_map_invalid_noflush); + _vm_unmap_aliases(start, end, 1); + set_area_direct_map(area, set_direct_map_default_noflush); +} + static void __vunmap(const void *addr, int deallocate_pages) { struct vm_struct *area; @@ -1587,10 +2211,11 @@ static void __vunmap(const void *addr, int deallocate_pages) return; } - debug_check_no_locks_freed(addr, get_vm_area_size(area)); - debug_check_no_obj_freed(addr, get_vm_area_size(area)); + debug_check_no_locks_freed(area->addr, get_vm_area_size(area)); + debug_check_no_obj_freed(area->addr, get_vm_area_size(area)); + + vm_remove_mappings(area, deallocate_pages); - remove_vm_area(addr); if (deallocate_pages) { int i; @@ -1641,6 +2266,14 @@ void vfree_atomic(const void *addr) __vfree_deferred(addr); } +static void __vfree(const void *addr) +{ + if (unlikely(in_interrupt())) + __vfree_deferred(addr); + else + __vunmap(addr, 1); +} + /** * vfree - release memory allocated by vmalloc() * @addr: memory base address @@ -1661,12 +2294,12 @@ void vfree(const void *addr) kmemleak_free(addr); + might_sleep_if(!in_interrupt()); + if (!addr) return; - if (unlikely(in_interrupt())) - __vfree_deferred(addr); - else - __vunmap(addr, 1); + + __vfree(addr); } EXPORT_SYMBOL(vfree); @@ -1706,7 +2339,7 @@ void *vmap(struct page **pages, unsigned int count, might_sleep(); - if (count > totalram_pages) + if (count > totalram_pages()) return NULL; size = (unsigned long)count << PAGE_SHIFT; @@ -1733,6 +2366,9 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, unsigned int nr_pages, array_size, i; const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN; + const gfp_t highmem_mask = (gfp_mask & (GFP_DMA | GFP_DMA32)) ? + 0 : + __GFP_HIGHMEM; nr_pages = get_vm_area_size(area) >> PAGE_SHIFT; array_size = (nr_pages * sizeof(struct page *)); @@ -1740,7 +2376,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, area->nr_pages = nr_pages; /* Please note that the recursion is strictly bounded. */ if (array_size > PAGE_SIZE) { - pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM, + pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask, PAGE_KERNEL, node, area->caller); } else { pages = kmalloc_node(array_size, nested_gfp, node); @@ -1756,9 +2392,9 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, struct page *page; if (node == NUMA_NO_NODE) - page = alloc_page(alloc_mask); + page = alloc_page(alloc_mask|highmem_mask); else - page = alloc_pages_node(node, alloc_mask, 0); + page = alloc_pages_node(node, alloc_mask|highmem_mask, 0); if (unlikely(!page)) { /* Successfully allocated i pages, free them in __vunmap() */ @@ -1767,7 +2403,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, goto fail; } area->pages[i] = page; - if (gfpflags_allow_blocking(gfp_mask)) + if (gfpflags_allow_blocking(gfp_mask|highmem_mask)) cond_resched(); } atomic_long_add(area->nr_pages, &nr_vmalloc_pages); @@ -1780,7 +2416,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, warn_alloc(gfp_mask, "vmalloc: allocation failure, allocated %ld of %ld bytes", (area->nr_pages*PAGE_SIZE), area->size); - vfree(area->addr); + __vfree(area->addr); return NULL; } @@ -1810,7 +2446,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long real_size = size; size = PAGE_ALIGN(size); - if (!size || (size >> PAGE_SHIFT) > totalram_pages) + if (!size || (size >> PAGE_SHIFT) > totalram_pages()) goto fail; area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED | @@ -1835,12 +2471,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, */ clear_vm_uninitialized_flag(area); - /* - * A ref_count = 2 is needed because vm_struct allocated in - * __get_vm_area_node() contains a reference to the virtual address of - * the vmalloc'ed block. - */ - kmemleak_alloc(addr, real_size, 2, gfp_mask); + kmemleak_vmalloc(area, size, gfp_mask); return addr; @@ -1850,6 +2481,15 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, return NULL; } +/* + * This is only for performance analysis of vmalloc and stress purpose. + * It is required by vmalloc test module, therefore do not use it other + * than that. + */ +#ifdef CONFIG_TEST_VMALLOC_MODULE +EXPORT_SYMBOL_GPL(__vmalloc_node_range); +#endif + /** * __vmalloc_node - allocate virtually contiguous memory * @size: allocation size @@ -1862,6 +2502,13 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, * Allocate enough pages to cover @size from the page level * allocator with @gfp_mask flags. Map them into contiguous * kernel virtual space, using a pagetable protection of @prot. + * + * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_REPEAT + * and __GFP_NOFAIL are not supported + * + * Any use of gfp flags outside of GFP_KERNEL should be consulted + * with mm people. + * */ static void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, pgprot_t prot, @@ -1885,6 +2532,13 @@ static inline void *__vmalloc_node_flags(unsigned long size, node, __builtin_return_address(0)); } + +void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags, + void *caller) +{ + return __vmalloc_node(size, 1, flags, PAGE_KERNEL, node, caller); +} + /** * vmalloc - allocate virtually contiguous memory * @size: allocation size @@ -1897,7 +2551,7 @@ static inline void *__vmalloc_node_flags(unsigned long size, void *vmalloc(unsigned long size) { return __vmalloc_node_flags(size, NUMA_NO_NODE, - GFP_KERNEL | __GFP_HIGHMEM); + GFP_KERNEL); } EXPORT_SYMBOL(vmalloc); @@ -1914,7 +2568,7 @@ EXPORT_SYMBOL(vmalloc); void *vzalloc(unsigned long size) { return __vmalloc_node_flags(size, NUMA_NO_NODE, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); + GFP_KERNEL | __GFP_ZERO); } EXPORT_SYMBOL(vzalloc); @@ -1927,18 +2581,10 @@ EXPORT_SYMBOL(vzalloc); */ void *vmalloc_user(unsigned long size) { - struct vm_struct *area; - void *ret; - - ret = __vmalloc_node(size, SHMLBA, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, - PAGE_KERNEL, NUMA_NO_NODE, - __builtin_return_address(0)); - if (ret) { - area = find_vm_area(ret); - area->flags |= VM_USERMAP; - } - return ret; + return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END, + GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL, + VM_USERMAP, NUMA_NO_NODE, + __builtin_return_address(0)); } EXPORT_SYMBOL(vmalloc_user); @@ -1955,7 +2601,7 @@ EXPORT_SYMBOL(vmalloc_user); */ void *vmalloc_node(unsigned long size, int node) { - return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, + return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL, node, __builtin_return_address(0)); } EXPORT_SYMBOL(vmalloc_node); @@ -1975,14 +2621,10 @@ EXPORT_SYMBOL(vmalloc_node); void *vzalloc_node(unsigned long size, int node) { return __vmalloc_node_flags(size, node, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); + GFP_KERNEL | __GFP_ZERO); } EXPORT_SYMBOL(vzalloc_node); -#ifndef PAGE_KERNEL_EXEC -# define PAGE_KERNEL_EXEC PAGE_KERNEL -#endif - /** * vmalloc_exec - allocate virtually contiguous, executable memory * @size: allocation size @@ -1997,16 +2639,21 @@ EXPORT_SYMBOL(vzalloc_node); void *vmalloc_exec(unsigned long size) { - return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, - NUMA_NO_NODE, __builtin_return_address(0)); + return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, + GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, + NUMA_NO_NODE, __builtin_return_address(0)); } #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32) -#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL +#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL) #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA) -#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL +#define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL) #else -#define GFP_VMALLOC32 GFP_KERNEL +/* + * 64b systems should always have either DMA or DMA32 zones. For others + * GFP_DMA32 should do the right thing and use the normal zone. + */ +#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL #endif /** @@ -2032,16 +2679,10 @@ EXPORT_SYMBOL(vmalloc_32); */ void *vmalloc_32_user(unsigned long size) { - struct vm_struct *area; - void *ret; - - ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL, - NUMA_NO_NODE, __builtin_return_address(0)); - if (ret) { - area = find_vm_area(ret); - area->flags |= VM_USERMAP; - } - return ret; + return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END, + GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL, + VM_USERMAP, NUMA_NO_NODE, + __builtin_return_address(0)); } EXPORT_SYMBOL(vmalloc_32_user); @@ -2449,81 +3090,64 @@ static struct vmap_area *node_to_va(struct rb_node *n) } /** - * pvm_find_next_prev - find the next and prev vmap_area surrounding @end - * @end: target address - * @pnext: out arg for the next vmap_area - * @pprev: out arg for the previous vmap_area - * - * Returns: %true if either or both of next and prev are found, - * %false if no vmap_area exists + * pvm_find_va_enclose_addr - find the vmap_area @addr belongs to + * @addr: target address * - * Find vmap_areas end addresses of which enclose @end. ie. if not - * NULL, *pnext->va_end > @end and *pprev->va_end <= @end. + * Returns: vmap_area if it is found. If there is no such area + * the first highest(reverse order) vmap_area is returned + * i.e. va->va_start < addr && va->va_end < addr or NULL + * if there are no any areas before @addr. */ -static bool pvm_find_next_prev(unsigned long end, - struct vmap_area **pnext, - struct vmap_area **pprev) +static struct vmap_area * +pvm_find_va_enclose_addr(unsigned long addr) { - struct rb_node *n = vmap_area_root.rb_node; - struct vmap_area *va = NULL; + struct vmap_area *va, *tmp; + struct rb_node *n; + + n = free_vmap_area_root.rb_node; + va = NULL; while (n) { - va = rb_entry(n, struct vmap_area, rb_node); - if (end < va->va_end) - n = n->rb_left; - else if (end > va->va_end) + tmp = rb_entry(n, struct vmap_area, rb_node); + if (tmp->va_start <= addr) { + va = tmp; + if (tmp->va_end >= addr) + break; + n = n->rb_right; - else - break; + } else { + n = n->rb_left; + } } - if (!va) - return false; - - if (va->va_end > end) { - *pnext = va; - *pprev = node_to_va(rb_prev(&(*pnext)->rb_node)); - } else { - *pprev = va; - *pnext = node_to_va(rb_next(&(*pprev)->rb_node)); - } - return true; + return va; } /** - * pvm_determine_end - find the highest aligned address between two vmap_areas - * @pnext: in/out arg for the next vmap_area - * @pprev: in/out arg for the previous vmap_area - * @align: alignment - * - * Returns: determined end address + * pvm_determine_end_from_reverse - find the highest aligned address + * of free block below VMALLOC_END + * @va: + * in - the VA we start the search(reverse order); + * out - the VA with the highest aligned end address. * - * Find the highest aligned address between *@pnext and *@pprev below - * VMALLOC_END. *@pnext and *@pprev are adjusted so that the aligned - * down address is between the end addresses of the two vmap_areas. - * - * Please note that the address returned by this function may fall - * inside *@pnext vmap_area. The caller is responsible for checking - * that. + * Returns: determined end address within vmap_area */ -static unsigned long pvm_determine_end(struct vmap_area **pnext, - struct vmap_area **pprev, - unsigned long align) +static unsigned long +pvm_determine_end_from_reverse(struct vmap_area **va, unsigned long align) { - const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); + unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); unsigned long addr; - if (*pnext) - addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end); - else - addr = vmalloc_end; - - while (*pprev && (*pprev)->va_end > addr) { - *pnext = *pprev; - *pprev = node_to_va(rb_prev(&(*pnext)->rb_node)); + if (likely(*va)) { + list_for_each_entry_from_reverse((*va), + &free_vmap_area_list, list) { + addr = min((*va)->va_end & ~(align - 1), vmalloc_end); + if ((*va)->va_start < addr) + return addr; + } } - return addr; + return 0; } /** @@ -2543,12 +3167,12 @@ static unsigned long pvm_determine_end(struct vmap_area **pnext, * to gigabytes. To avoid interacting with regular vmallocs, these * areas are allocated from top. * - * Despite its complicated look, this allocator is rather simple. It - * does everything top-down and scans areas from the end looking for - * matching slot. While scanning, if any of the areas overlaps with - * existing vmap_area, the base address is pulled down to fit the - * area. Scanning is repeated till all the areas fit and then all - * necessary data structres are inserted and the result is returned. + * Despite its complicated look, this allocator is rather simple. It + * does everything top-down and scans free blocks from the end looking + * for matching base. While scanning, if any of the areas do not fit the + * base address is pulled down to fit the area. Scanning is repeated till + * all the areas fit and then all necessary data structures are inserted + * and the result is returned. */ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, const size_t *sizes, int nr_vms, @@ -2556,11 +3180,12 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, { const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align); const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); - struct vmap_area **vas, *prev, *next; + struct vmap_area **vas, *va; struct vm_struct **vms; int area, area2, last_area, term_area; - unsigned long base, start, end, last_end; + unsigned long base, start, size, end, last_end; bool purged = false; + enum fit_type type; /* verify parameters and allocate data structures */ BUG_ON(offset_in_page(align) || !is_power_of_2(align)); @@ -2576,15 +3201,11 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, if (start > offsets[last_area]) last_area = area; - for (area2 = 0; area2 < nr_vms; area2++) { + for (area2 = area + 1; area2 < nr_vms; area2++) { unsigned long start2 = offsets[area2]; unsigned long end2 = start2 + sizes[area2]; - if (area2 == area) - continue; - - BUG_ON(start2 >= start && start2 < end); - BUG_ON(end2 <= end && end2 > start); + BUG_ON(start2 < end && start < end2); } } last_end = offsets[last_area] + sizes[last_area]; @@ -2600,7 +3221,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, goto err_free2; for (area = 0; area < nr_vms; area++) { - vas[area] = kzalloc(sizeof(struct vmap_area), GFP_KERNEL); + vas[area] = kmem_cache_zalloc(vmap_area_cachep, GFP_KERNEL); vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL); if (!vas[area] || !vms[area]) goto err_free; @@ -2613,49 +3234,29 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, start = offsets[area]; end = start + sizes[area]; - if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) { - base = vmalloc_end - last_end; - goto found; - } - base = pvm_determine_end(&next, &prev, align) - end; + va = pvm_find_va_enclose_addr(vmalloc_end); + base = pvm_determine_end_from_reverse(&va, align) - end; while (true) { - BUG_ON(next && next->va_end <= base + end); - BUG_ON(prev && prev->va_end > base + end); - /* * base might have underflowed, add last_end before * comparing. */ - if (base + last_end < vmalloc_start + last_end) { - spin_unlock(&vmap_area_lock); - if (!purged) { - purge_vmap_area_lazy(); - purged = true; - goto retry; - } - goto err_free; - } + if (base + last_end < vmalloc_start + last_end) + goto overflow; /* - * If next overlaps, move base downwards so that it's - * right below next and then recheck. + * Fitting base has not been found. */ - if (next && next->va_start < base + end) { - base = pvm_determine_end(&next, &prev, align) - end; - term_area = area; - continue; - } + if (va == NULL) + goto overflow; /* - * If prev overlaps, shift down next and prev and move - * base so that it's right below new next and then - * recheck. + * If this VA does not fit, move base downwards and recheck. */ - if (prev && prev->va_end > base + start) { - next = prev; - prev = node_to_va(rb_prev(&next->rb_node)); - base = pvm_determine_end(&next, &prev, align) - end; + if (base + start < va->va_start || base + end > va->va_end) { + va = node_to_va(rb_prev(&va->rb_node)); + base = pvm_determine_end_from_reverse(&va, align) - end; term_area = area; continue; } @@ -2667,21 +3268,40 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, area = (area + nr_vms - 1) % nr_vms; if (area == term_area) break; + start = offsets[area]; end = start + sizes[area]; - pvm_find_next_prev(base + end, &next, &prev); + va = pvm_find_va_enclose_addr(base + end); } -found: + /* we've found a fitting base, insert all va's */ for (area = 0; area < nr_vms; area++) { - struct vmap_area *va = vas[area]; + int ret; - va->va_start = base + offsets[area]; - va->va_end = va->va_start + sizes[area]; - __insert_vmap_area(va); - } + start = base + offsets[area]; + size = sizes[area]; + + va = pvm_find_va_enclose_addr(start); + if (WARN_ON_ONCE(va == NULL)) + /* It is a BUG(), but trigger recovery instead. */ + goto recovery; + + type = classify_va_fit_type(va, start, size); + if (WARN_ON_ONCE(type == NOTHING_FIT)) + /* It is a BUG(), but trigger recovery instead. */ + goto recovery; + + ret = adjust_va_to_fit_type(va, start, size, type); + if (unlikely(ret)) + goto recovery; - vmap_area_pcpu_hole = base + offsets[last_area]; + /* Allocated area. */ + va = vas[area]; + va->va_start = start; + va->va_end = start + size; + + insert_vmap_area(va, &vmap_area_root, &vmap_area_list); + } spin_unlock(&vmap_area_lock); @@ -2693,9 +3313,38 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, kfree(vas); return vms; +recovery: + /* Remove previously inserted areas. */ + while (area--) { + __free_vmap_area(vas[area]); + vas[area] = NULL; + } + +overflow: + spin_unlock(&vmap_area_lock); + if (!purged) { + purge_vmap_area_lazy(); + purged = true; + + /* Before "retry", check if we recover. */ + for (area = 0; area < nr_vms; area++) { + if (vas[area]) + continue; + + vas[area] = kmem_cache_zalloc( + vmap_area_cachep, GFP_KERNEL); + if (!vas[area]) + goto err_free; + } + + goto retry; + } + err_free: for (area = 0; area < nr_vms; area++) { - kfree(vas[area]); + if (vas[area]) + kmem_cache_free(vmap_area_cachep, vas[area]); + kfree(vms[area]); } err_free2: @@ -2792,8 +3441,14 @@ static int s_show(struct seq_file *m, void *p) * s_show can encounter race with remove_vm_area, !VM_VM_AREA on * behalf of vmap area is being tear down or vm_map_ram allocation. */ - if (!(va->flags & VM_VM_AREA)) + if (!(va->flags & VM_VM_AREA)) { + seq_printf(m, "0x%pK-0x%pK %7ld %s\n", + (void *)va->va_start, (void *)va->va_end, + va->va_end - va->va_start, + va->flags & VM_LAZY_FREE ? "unpurged vm_area" : "vm_map_ram"); + return 0; + } v = va->vm; @@ -2801,7 +3456,7 @@ static int s_show(struct seq_file *m, void *p) v->addr, v->addr + v->size, v->size); if (v->caller) - seq_printf(m, " %pS", v->caller); + seq_printf(m, " dummy+0x0/0x0"); if (v->nr_pages) seq_printf(m, " pages=%d", v->nr_pages); @@ -2824,9 +3479,6 @@ static int s_show(struct seq_file *m, void *p) if (is_vmalloc_addr(v->pages)) seq_puts(m, " vpages"); - if (v->flags & VM_LOWMEM) - seq_puts(m, " lowmem"); - show_numa_info(m, v); seq_putc(m, '\n'); return 0; diff --git a/mm/vmstat.c b/mm/vmstat.c index 250cd44699a1..129c0a83f55e 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1564,7 +1564,7 @@ static const struct file_operations proc_vmstat_file_operations = { #ifdef CONFIG_SMP static struct workqueue_struct *vmstat_wq; static DEFINE_PER_CPU(struct delayed_work, vmstat_work); -int sysctl_stat_interval __read_mostly = HZ; +int sysctl_stat_interval __read_mostly = CONFIG_VMSTAT_INTERVAL * HZ; #ifdef CONFIG_PROC_FS static void refresh_vm_stats(struct work_struct *work) diff --git a/mm/workingset.c b/mm/workingset.c index a6976119b4a4..522f8e66d957 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -511,10 +511,10 @@ static int __init workingset_init(void) * actionable refault distance, which is currently half of * memory (totalram_pages/2). However, memory hotplug may add * some more pages at runtime, so keep working with up to - * double the initial memory by using totalram_pages as-is. + * double the initial memory by using totalram_pages() as-is. */ timestamp_bits = BITS_PER_LONG - EVICTION_SHIFT; - max_order = fls_long(totalram_pages - 1); + max_order = fls_long(totalram_pages() - 1); if (max_order > timestamp_bits) bucket_order = max_order - timestamp_bits; pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", diff --git a/mm/zswap.c b/mm/zswap.c index c2b5435fe617..d1146c76f3d3 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -203,8 +203,8 @@ static const struct zpool_ops zswap_zpool_ops = { static bool zswap_is_full(void) { - return totalram_pages * zswap_max_pool_percent / 100 < - DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); + return totalram_pages() * zswap_max_pool_percent / 100 < + DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); } static void zswap_update_total_size(void) diff --git a/net/Kconfig b/net/Kconfig index 0b8c2558df6e..31734d1790a9 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -83,6 +83,7 @@ config INET Short answer: say Y. if INET +source "net/wireguard/Kconfig" source "net/ipv4/Kconfig" source "net/ipv6/Kconfig" source "net/netlabel/Kconfig" diff --git a/net/Makefile b/net/Makefile index 83e91dcafec0..0c4a0f913b1a 100644 --- a/net/Makefile +++ b/net/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_NET) += $(tmp-y) obj-$(CONFIG_LLC) += llc/ obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ obj-$(CONFIG_NETFILTER) += netfilter/ +obj-$(CONFIG_WIREGUARD) += wireguard/ obj-$(CONFIG_INET) += ipv4/ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX_SCM) += unix/ diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 18c4b34bd6e0..696f13b58e65 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -174,7 +174,7 @@ void *ceph_kvmalloc(size_t size, gfp_t flags) return ptr; } - return __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL); + return __vmalloc(size, flags, PAGE_KERNEL); } diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 5f3a627afcc6..fa7240c9000b 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -43,9 +43,9 @@ static int set_secret(struct ceph_crypto_key *key, void *buf) goto fail; } - /* crypto_alloc_skcipher() allocates with GFP_KERNEL */ + /* crypto_alloc_sync_skcipher() allocates with GFP_KERNEL */ noio_flag = memalloc_noio_save(); - key->tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); + key->tfm = crypto_alloc_sync_skcipher("cbc(aes)", 0, 0); memalloc_noio_restore(noio_flag); if (IS_ERR(key->tfm)) { ret = PTR_ERR(key->tfm); @@ -53,7 +53,7 @@ static int set_secret(struct ceph_crypto_key *key, void *buf) goto fail; } - ret = crypto_skcipher_setkey(key->tfm, key->key, key->len); + ret = crypto_sync_skcipher_setkey(key->tfm, key->key, key->len); if (ret) goto fail; @@ -133,7 +133,7 @@ void ceph_crypto_key_destroy(struct ceph_crypto_key *key) if (key) { kfree(key->key); key->key = NULL; - crypto_free_skcipher(key->tfm); + crypto_free_sync_skcipher(key->tfm); key->tfm = NULL; } } @@ -213,7 +213,7 @@ static void teardown_sgtable(struct sg_table *sgt) static int ceph_aes_crypt(const struct ceph_crypto_key *key, bool encrypt, void *buf, int buf_len, int in_len, int *pout_len) { - SKCIPHER_REQUEST_ON_STACK(req, key->tfm); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, key->tfm); struct sg_table sgt; struct scatterlist prealloc_sg; char iv[AES_BLOCK_SIZE] __aligned(8); @@ -229,7 +229,7 @@ static int ceph_aes_crypt(const struct ceph_crypto_key *key, bool encrypt, return ret; memcpy(iv, aes_iv, AES_BLOCK_SIZE); - skcipher_request_set_tfm(req, key->tfm); + skcipher_request_set_sync_tfm(req, key->tfm); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sgt.sgl, sgt.sgl, crypt_len, iv); diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h index 58d83aa7740f..ac444db0ad26 100644 --- a/net/ceph/crypto.h +++ b/net/ceph/crypto.h @@ -12,7 +12,7 @@ struct ceph_crypto_key { struct ceph_timespec created; int len; void *key; - struct crypto_skcipher *tfm; + struct crypto_sync_skcipher *tfm; }; int ceph_crypto_key_clone(struct ceph_crypto_key *dst, diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 5473af8cb4c9..82d9a8cbdcfc 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -206,6 +206,26 @@ static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range) return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out); } +static int nla_get_port_range(struct nlattr *pattr, + struct fib_rule_port_range *port_range) +{ + const struct fib_rule_port_range *pr = nla_data(pattr); + + if (!fib_rule_port_range_valid(pr)) + return -EINVAL; + + port_range->start = pr->start; + port_range->end = pr->end; + + return 0; +} + +static int nla_put_port_range(struct sk_buff *skb, int attrtype, + struct fib_rule_port_range *range) +{ + return nla_put(skb, attrtype, sizeof(*range), range); +} + static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) @@ -551,6 +571,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); + struct fib_rule_port_range sprange = {0, 0}; + struct fib_rule_port_range dprange = {0, 0}; struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *tmp; struct nlattr *tb[FRA_MAX+1]; @@ -584,6 +606,20 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) range = fib_kuid_range_unset; } + if (tb[FRA_SPORT_RANGE]) { + err = nla_get_port_range(tb[FRA_SPORT_RANGE], + &sprange); + if (err) + goto errout; + } + + if (tb[FRA_DPORT_RANGE]) { + err = nla_get_port_range(tb[FRA_DPORT_RANGE], + &dprange); + if (err) + goto errout; + } + list_for_each_entry(rule, &ops->rules_list, list) { if (frh->action && (frh->action != rule->action)) continue; @@ -625,6 +661,18 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) !uid_eq(rule->uid_range.end, range.end))) continue; + if (tb[FRA_IP_PROTO] && + (rule->ip_proto != nla_get_u8(tb[FRA_IP_PROTO]))) + continue; + + if (fib_rule_port_range_set(&sprange) && + !fib_rule_port_range_compare(&rule->sport_range, &sprange)) + continue; + + if (fib_rule_port_range_set(&dprange) && + !fib_rule_port_range_compare(&rule->dport_range, &dprange)) + continue; + if (!ops->compare(rule, frh, tb)) continue; @@ -694,6 +742,12 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(4) /* FRA_FWMASK */ + nla_total_size_64bit(8) /* FRA_TUN_ID */ + nla_total_size(sizeof(struct fib_kuid_range)); + + nla_total_size(8); /* FRA_TUN_ID */ + + nla_total_size(sizeof(struct fib_kuid_range)) + + nla_total_size(1) /* FRA_PROTOCOL */ + + nla_total_size(1) /* FRA_IP_PROTO */ + + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */ + + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */ if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -755,7 +809,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, (rule->l3mdev && nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) || (uid_range_set(&rule->uid_range) && - nla_put_uid_range(skb, &rule->uid_range))) + nla_put_uid_range(skb, &rule->uid_range)) || + (fib_rule_port_range_set(&rule->sport_range) && + nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) || + (fib_rule_port_range_set(&rule->dport_range) && + nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) || + (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto))) goto nla_put_failure; if (rule->suppress_ifgroup != -1) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d5cad076daf5..e6224962851d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -57,6 +57,9 @@ #include #include +#define RTNL_MAX_TYPE 48 +#define RTNL_SLAVE_MAX_TYPE 36 + struct rtnl_link { rtnl_doit_func doit; rtnl_dumpit_func dumpit; @@ -348,6 +351,11 @@ int rtnl_link_register(struct rtnl_link_ops *ops) { int err; + /* Sanity-check max sizes to avoid stack buffer overflow. */ + if (WARN_ON(ops->maxtype > RTNL_MAX_TYPE || + ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE)) + return -EINVAL; + rtnl_lock(); err = __rtnl_link_register(ops); rtnl_unlock(); @@ -2516,13 +2524,16 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh) } if (1) { - struct nlattr *attr[ops ? ops->maxtype + 1 : 1]; - struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 1]; + struct nlattr *attr[RTNL_MAX_TYPE + 1]; + struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1]; struct nlattr **data = NULL; struct nlattr **slave_data = NULL; struct net *dest_net, *link_net = NULL; if (ops) { + if (ops->maxtype > RTNL_MAX_TYPE) + return -EINVAL; + if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { err = nla_parse_nested(attr, ops->maxtype, linkinfo[IFLA_INFO_DATA], @@ -2539,6 +2550,9 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh) } if (m_ops) { + if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE) + return -EINVAL; + if (m_ops->slave_maxtype && linkinfo[IFLA_INFO_SLAVE_DATA]) { err = nla_parse_nested(slave_attr, diff --git a/net/core/sock.c b/net/core/sock.c index 0410b74e72c8..58a8607a4bfd 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -989,6 +989,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname, #endif case SO_MAX_PACING_RATE: + if (val != ~0U) + cmpxchg(&sk->sk_pacing_status, + SK_PACING_NONE, + SK_PACING_NEEDED); sk->sk_max_pacing_rate = val; sk->sk_pacing_rate = min(sk->sk_pacing_rate, sk->sk_max_pacing_rate); diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 161dfcf86126..fc31c02d616c 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -248,7 +248,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) { struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - const u32 now = ccid2_time_stamp; + const u32 now = ccid2_jiffies32; struct ccid2_seq *next; /* slow-start after idle periods (RFC 2581, RFC 2861) */ @@ -481,7 +481,7 @@ static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp, * The cleanest solution is to not use the ccid2s_sent field at all * and instead use DCCP timestamps: requires changes in other places. */ - ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent); + ccid2_rtt_estimator(sk, ccid2_jiffies32 - seqp->ccid2s_sent); } static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) @@ -493,7 +493,7 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) return; } - hc->tx_last_cong = ccid2_time_stamp; + hc->tx_last_cong = ccid2_jiffies32; hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; hc->tx_ssthresh = max(hc->tx_cwnd, 2U); @@ -746,7 +746,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) hc->tx_rto = DCCP_TIMEOUT_INIT; hc->tx_rpdupack = -1; - hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_time_stamp; + hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_jiffies32; hc->tx_cwnd_used = 0; setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk); diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index 18c97543e522..6e50ef2898fb 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -27,7 +27,7 @@ * CCID-2 timestamping faces the same issues as TCP timestamping. * Hence we reuse/share as much of the code as possible. */ -#define ccid2_time_stamp tcp_time_stamp +#define ccid2_jiffies32 ((u32)jiffies) /* NUMDUPACK parameter from RFC 4341, p. 6 */ #define NUMDUPACK 3 diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 936dab12f99f..21db0a88b158 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1149,10 +1149,10 @@ static int __init dccp_init(void) * * The methodology is similar to that of the buffer cache. */ - if (totalram_pages >= (128 * 1024)) - goal = totalram_pages >> (21 - PAGE_SHIFT); + if (totalram_pages() >= (128 * 1024)) + goal = totalram_pages() >> (21 - PAGE_SHIFT); else - goal = totalram_pages >> (23 - PAGE_SHIFT); + goal = totalram_pages() >> (23 - PAGE_SHIFT); if (thash_entries) goal = (thash_entries * diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 403593bd2b83..86b7eec8470c 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1877,7 +1877,7 @@ void __init dn_route_init(void) dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; add_timer(&dn_route_timer); - goal = totalram_pages >> (26 - PAGE_SHIFT); + goal = totalram_pages() >> (26 - PAGE_SHIFT); for(order = 0; (1UL << order) < goal; order++) /* NOTHING */; diff --git a/net/ipc_router/ipc_router_core.c b/net/ipc_router/ipc_router_core.c index 913089f83091..00ced3107cde 100644 --- a/net/ipc_router/ipc_router_core.c +++ b/net/ipc_router/ipc_router_core.c @@ -2781,8 +2781,8 @@ static void do_read_data(struct kthread_work *work) port_ptr = ipc_router_get_port_ref(hdr->dst_port_id); if (!port_ptr) { - IPC_RTR_ERR("%s: No local port id %08x\n", __func__, - hdr->dst_port_id); +// IPC_RTR_ERR("%s: No local port id %08x\n", __func__, +// hdr->dst_port_id); goto read_next_pkt1; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 3096468a115d..32c841146c31 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -151,6 +151,7 @@ static const char *const comments[] = { [NF_IP_TRACE_COMMENT_POLICY] = "policy", }; +#ifdef CONFIG_DEBUG_KERNEL static struct nf_loginfo trace_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { @@ -160,6 +161,7 @@ static struct nf_loginfo trace_loginfo = { }, }, }; +#endif /* Mildly perf critical (only if packet tracing is on) */ static inline int @@ -216,9 +218,11 @@ static void trace_packet(struct net *net, &chainname, &comment, &rulenum) != 0) break; +#ifdef CONFIG_DEBUG_KERNEL nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", tablename, chainname, comment, rulenum); +#endif } #endif diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a8eb7f56d8c4..67d123e5e6b4 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -318,7 +318,7 @@ static struct ctl_table ipv4_table[] = { .procname = "tcp_fastopen", .data = &sysctl_tcp_fastopen, .maxlen = sizeof(int), - .mode = 0644, + .mode = 0444, .proc_handler = proc_dointvec, }, { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 22d204fc4c20..91691a4b2873 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -393,7 +393,7 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); - minmax_reset(&tp->rtt_min, tcp_time_stamp, ~0U); + minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U); /* So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control @@ -677,9 +677,9 @@ static void tcp_push(struct sock *sk, int flags, int mss_now, if (tcp_should_autocork(sk, skb, size_goal)) { /* avoid atomic op if TSQ_THROTTLED bit is already set */ - if (!test_bit(TSQ_THROTTLED, &tp->tsq_flags)) { + if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING); - set_bit(TSQ_THROTTLED, &tp->tsq_flags); + set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); } /* It is possible TX completion already happened * before we set TSQ_THROTTLED. @@ -2507,6 +2507,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, lock_sock(sk); + /* Hack optname to use TCP_NODELAY for everything */ + optname=TCP_NODELAY; + switch (optname) { case TCP_MAXSEG: /* Values greater than interface MTU won't take effect. However @@ -3440,7 +3443,7 @@ void __init tcp_init(void) max_rshare = min(6UL*1024*1024, limit); sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; - sysctl_tcp_wmem[1] = 16*1024; + sysctl_tcp_wmem[1] = 20*1024; sysctl_tcp_wmem[2] = max(64*1024, max_wshare); sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 47f40e105044..704b321d37e9 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -22,10 +22,9 @@ * There is a public e-mail list for discussing BBR development and testing: * https://groups.google.com/forum/#!forum/bbr-dev * - * NOTE: BBR *must* be used with the fq qdisc ("man tc-fq") with pacing enabled, - * since pacing is integral to the BBR design and implementation. - * BBR without pacing would not function properly, and may incur unnecessary - * high packet loss rates. + * NOTE: BBR might be used with the fq qdisc ("man tc-fq") with pacing enabled, + * otherwise TCP stack falls back to an internal pacing using one high + * resolution timer per TCP socket and may use more resources. */ #include #include @@ -66,12 +65,10 @@ struct bbr { u32 mode:3, /* current bbr_mode in state machine */ prev_ca_state:3, /* CA state on previous ACK */ packet_conservation:1, /* use packet conservation? */ - restore_cwnd:1, /* decided to revert cwnd to old value */ round_start:1, /* start of packet-timed tx->ack round? */ - tso_segs_goal:7, /* segments we want in each skb we send */ idle_restart:1, /* restarting after idle? */ probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */ - unused:5, + unused:13, lt_is_sampling:1, /* taking long-term ("LT") samples now? */ lt_rtt_cnt:7, /* round trips in long-term interval */ lt_use_bw:1; /* use lt_bw as our bw estimate? */ @@ -147,6 +144,8 @@ static const u32 bbr_lt_bw_diff = 4000 / 8; /* If we estimate we're policed, use lt_bw for this many round trips: */ static const u32 bbr_lt_bw_max_rtts = 48; +static void bbr_check_probe_rtt_done(struct sock *sk); + /* Do we estimate that STARTUP filled the pipe? */ static bool bbr_full_bw_reached(const struct sock *sk) { @@ -177,7 +176,11 @@ static u32 bbr_bw(const struct sock *sk) */ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) { - rate *= tcp_mss_to_mtu(sk, tcp_sk(sk)->mss_cache); + unsigned int mss = tcp_sk(sk)->mss_cache; + + if (!tcp_needs_internal_pacing(sk)) + mss = tcp_mss_to_mtu(sk, mss); + rate *= mss; rate *= gain; rate >>= BBR_SCALE; rate *= USEC_PER_SEC; @@ -232,23 +235,25 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) sk->sk_pacing_rate = rate; } -/* Return count of segments we want in the skbs we send, or 0 for default. */ -static u32 bbr_tso_segs_goal(struct sock *sk) +/* override sysctl_tcp_min_tso_segs */ +static u32 bbr_min_tso_segs(struct sock *sk) { - struct bbr *bbr = inet_csk_ca(sk); - - return bbr->tso_segs_goal; + return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; } -static void bbr_set_tso_segs_goal(struct sock *sk) +static u32 bbr_tso_segs_goal(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - u32 min_segs; + u32 segs, bytes; - min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; - bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs), - 0x7FU); + /* Sort of tcp_tso_autosize() but ignoring + * driver provided sk_gso_max_size. + */ + bytes = min_t(u32, sk->sk_pacing_rate >> 10, + GSO_MAX_SIZE - 1 - MAX_TCP_HEADER); + segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); + + return min(segs, 0x7FU); } /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ @@ -275,33 +280,24 @@ static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) */ if (bbr->mode == BBR_PROBE_BW) bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT); + else if (bbr->mode == BBR_PROBE_RTT) + bbr_check_probe_rtt_done(sk); } } -/* Find target cwnd. Right-size the cwnd based on min RTT and the - * estimated bottleneck bandwidth: +/* Calculate bdp based on min RTT and the estimated bottleneck bandwidth: * - * cwnd = bw * min_rtt * gain = BDP * gain + * bdp = ceil(bw * min_rtt * gain) * * The key factor, gain, controls the amount of queue. While a small gain * builds a smaller queue, it becomes more vulnerable to noise in RTT * measurements (e.g., delayed ACKs or other ACK compression effects). This * noise may cause BBR to under-estimate the rate. - * - * To achieve full performance in high-speed paths, we budget enough cwnd to - * fit full-sized skbs in-flight on both end hosts to fully utilize the path: - * - one skb in sending host Qdisc, - * - one skb in sending host TSO/GSO engine - * - one skb being received by receiver host LRO/GRO/delayed-ACK engine - * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because - * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets, - * which allows 2 outstanding 2-packet sequences, to try to keep pipe - * full even with ACK-every-other-packet delayed ACKs. */ -static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain) +static u32 bbr_bdp(struct sock *sk, u32 bw, int gain) { struct bbr *bbr = inet_csk_ca(sk); - u32 cwnd; + u32 bdp; u64 w; /* If we've never had a valid RTT sample, cap cwnd at the initial @@ -315,22 +311,52 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain) w = (u64)bw * bbr->min_rtt_us; - /* Apply a gain to the given value, then remove the BW_SCALE shift. */ - cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT; + /* Apply a gain to the given value, remove the BW_SCALE shift, and + * round the value up to avoid a negative feedback loop. + */ + bdp = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT; + + return bdp; +} + +/* To achieve full performance in high-speed paths, we budget enough cwnd to + * fit full-sized skbs in-flight on both end hosts to fully utilize the path: + * - one skb in sending host Qdisc, + * - one skb in sending host TSO/GSO engine + * - one skb being received by receiver host LRO/GRO/delayed-ACK engine + * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because + * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets, + * which allows 2 outstanding 2-packet sequences, to try to keep pipe + * full even with ACK-every-other-packet delayed ACKs. + */ +static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd) +{ + struct bbr *bbr = inet_csk_ca(sk); /* Allow enough full-sized skbs in flight to utilize end systems. */ - cwnd += 3 * bbr->tso_segs_goal; + cwnd += 3 * bbr_tso_segs_goal(sk); /* Reduce delayed ACKs by rounding up cwnd to the next even number. */ cwnd = (cwnd + 1) & ~1U; /* Ensure gain cycling gets inflight above BDP even for small BDPs. */ - if (bbr->mode == BBR_PROBE_BW && gain > BBR_UNIT) + if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == 0) cwnd += 2; return cwnd; } +/* Find inflight based on min RTT and the estimated bottleneck bandwidth. */ +static u32 bbr_inflight(struct sock *sk, u32 bw, int gain) +{ + u32 inflight; + + inflight = bbr_bdp(sk, bw, gain); + inflight = bbr_quantization_budget(sk, inflight); + + return inflight; +} + /* An optimization in BBR to reduce losses: On the first round of recovery, we * follow the packet conservation principle: send P packets per P packets acked. * After that, we slow-start and send at most 2*P packets per P packets acked. @@ -362,17 +388,11 @@ static bool bbr_set_cwnd_to_recover_or_restore( cwnd = tcp_packets_in_flight(tp) + acked; } else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) { /* Exiting loss recovery; restore cwnd saved before recovery. */ - bbr->restore_cwnd = 1; + cwnd = max(cwnd, bbr->prior_cwnd); bbr->packet_conservation = 0; } bbr->prev_ca_state = state; - if (bbr->restore_cwnd) { - /* Restore cwnd after exiting loss recovery or PROBE_RTT. */ - cwnd = max(cwnd, bbr->prior_cwnd); - bbr->restore_cwnd = 0; - } - if (bbr->packet_conservation) { *new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked); return true; /* yes, using packet conservation */ @@ -389,16 +409,17 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - u32 cwnd = 0, target_cwnd = 0; + u32 cwnd = tp->snd_cwnd, target_cwnd = 0; if (!acked) - return; + goto done; /* no packet fully ACKed; just apply caps */ if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd)) goto done; /* If we're below target cwnd, slow start cwnd toward target cwnd. */ - target_cwnd = bbr_target_cwnd(sk, bw, gain); + target_cwnd = bbr_bdp(sk, bw, gain); + target_cwnd = bbr_quantization_budget(sk, target_cwnd); if (bbr_full_bw_reached(sk)) /* only cut cwnd if we filled the pipe */ cwnd = min(cwnd + acked, target_cwnd); else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND) @@ -439,14 +460,14 @@ static bool bbr_is_next_cycle_phase(struct sock *sk, if (bbr->pacing_gain > BBR_UNIT) return is_full_length && (rs->losses || /* perhaps pacing_gain*BDP won't fit */ - inflight >= bbr_target_cwnd(sk, bw, bbr->pacing_gain)); + inflight >= bbr_inflight(sk, bw, bbr->pacing_gain)); /* A pacing_gain < 1.0 tries to drain extra queue we added if bw * probing didn't find more bw. If inflight falls to match BDP then we * estimate queue is drained; persisting would underutilize the pipe. */ return is_full_length || - inflight <= bbr_target_cwnd(sk, bw, BBR_UNIT); + inflight <= bbr_inflight(sk, bw, BBR_UNIT); } static void bbr_advance_cycle_phase(struct sock *sk) @@ -456,8 +477,6 @@ static void bbr_advance_cycle_phase(struct sock *sk) bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1); bbr->cycle_mstamp = tp->delivered_mstamp; - bbr->pacing_gain = bbr->lt_use_bw ? BBR_UNIT : - bbr_pacing_gain[bbr->cycle_idx]; } /* Gain cycling: cycle pacing gain to converge to fair share of available bw. */ @@ -475,8 +494,6 @@ static void bbr_reset_startup_mode(struct sock *sk) struct bbr *bbr = inet_csk_ca(sk); bbr->mode = BBR_STARTUP; - bbr->pacing_gain = bbr_high_gain; - bbr->cwnd_gain = bbr_high_gain; } static void bbr_reset_probe_bw_mode(struct sock *sk) @@ -484,8 +501,6 @@ static void bbr_reset_probe_bw_mode(struct sock *sk) struct bbr *bbr = inet_csk_ca(sk); bbr->mode = BBR_PROBE_BW; - bbr->pacing_gain = BBR_UNIT; - bbr->cwnd_gain = bbr_cwnd_gain; bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand); bbr_advance_cycle_phase(sk); /* flip to next phase of gain cycle */ } @@ -702,15 +717,29 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { bbr->mode = BBR_DRAIN; /* drain queue we created */ - bbr->pacing_gain = bbr_drain_gain; /* pace slow to drain */ - bbr->cwnd_gain = bbr_high_gain; /* maintain cwnd */ + tcp_sk(sk)->snd_ssthresh = + bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); } /* fall through to check if in-flight is already small: */ if (bbr->mode == BBR_DRAIN && tcp_packets_in_flight(tcp_sk(sk)) <= - bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT)) + bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)) bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */ } +static void bbr_check_probe_rtt_done(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (!(bbr->probe_rtt_done_stamp && + after(tcp_jiffies32, bbr->probe_rtt_done_stamp))) + return; + + bbr->min_rtt_stamp = tcp_jiffies32; /* wait a while until PROBE_RTT */ + tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd); + bbr_reset_mode(sk); +} + /* The goal of PROBE_RTT mode is to have BBR flows cooperatively and * periodically drain the bottleneck queue, to converge to measure the true * min_rtt (unloaded propagation delay). This allows the flows to keep queues @@ -737,19 +766,18 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) bool filter_expired; /* Track min RTT seen in the min_rtt_win_sec filter window: */ - filter_expired = after(tcp_time_stamp, + filter_expired = after(tcp_jiffies32, bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); if (rs->rtt_us >= 0 && - (rs->rtt_us < bbr->min_rtt_us || filter_expired)) { + (rs->rtt_us < bbr->min_rtt_us || + (filter_expired && !rs->is_ack_delayed))) { bbr->min_rtt_us = rs->rtt_us; - bbr->min_rtt_stamp = tcp_time_stamp; + bbr->min_rtt_stamp = tcp_jiffies32; } if (bbr_probe_rtt_mode_ms > 0 && filter_expired && !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) { bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */ - bbr->pacing_gain = BBR_UNIT; - bbr->cwnd_gain = BBR_UNIT; bbr_save_cwnd(sk); /* note cwnd so we can restore it */ bbr->probe_rtt_done_stamp = 0; } @@ -761,19 +789,15 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) /* Maintain min packets in flight for max(200 ms, 1 round). */ if (!bbr->probe_rtt_done_stamp && tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) { - bbr->probe_rtt_done_stamp = tcp_time_stamp + + bbr->probe_rtt_done_stamp = tcp_jiffies32 + msecs_to_jiffies(bbr_probe_rtt_mode_ms); bbr->probe_rtt_round_done = 0; bbr->next_rtt_delivered = tp->delivered; } else if (bbr->probe_rtt_done_stamp) { if (bbr->round_start) bbr->probe_rtt_round_done = 1; - if (bbr->probe_rtt_round_done && - after(tcp_time_stamp, bbr->probe_rtt_done_stamp)) { - bbr->min_rtt_stamp = tcp_time_stamp; - bbr->restore_cwnd = 1; /* snap to prior_cwnd */ - bbr_reset_mode(sk); - } + if (bbr->probe_rtt_round_done) + bbr_check_probe_rtt_done(sk); } } /* Restart after idle ends only once we process a new S/ACK for data */ @@ -781,6 +805,35 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) bbr->idle_restart = 0; } +static void bbr_update_gains(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + switch (bbr->mode) { + case BBR_STARTUP: + bbr->pacing_gain = bbr_high_gain; + bbr->cwnd_gain = bbr_high_gain; + break; + case BBR_DRAIN: + bbr->pacing_gain = bbr_drain_gain; /* slow, to drain */ + bbr->cwnd_gain = bbr_high_gain; /* keep cwnd */ + break; + case BBR_PROBE_BW: + bbr->pacing_gain = (bbr->lt_use_bw ? + BBR_UNIT : + bbr_pacing_gain[bbr->cycle_idx]); + bbr->cwnd_gain = bbr_cwnd_gain; + break; + case BBR_PROBE_RTT: + bbr->pacing_gain = BBR_UNIT; + bbr->cwnd_gain = BBR_UNIT; + break; + default: + WARN_ONCE(1, "BBR bad mode: %u\n", bbr->mode); + break; + } +} + static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) { bbr_update_bw(sk, rs); @@ -788,6 +841,7 @@ static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) bbr_check_full_bw_reached(sk, rs); bbr_check_drain(sk, rs); bbr_update_min_rtt(sk, rs); + bbr_update_gains(sk); } static void bbr_main(struct sock *sk, const struct rate_sample *rs) @@ -799,7 +853,6 @@ static void bbr_main(struct sock *sk, const struct rate_sample *rs) bw = bbr_bw(sk); bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); - bbr_set_tso_segs_goal(sk); bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain); } @@ -809,7 +862,7 @@ static void bbr_init(struct sock *sk) struct bbr *bbr = inet_csk_ca(sk); bbr->prior_cwnd = 0; - bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */ + tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; bbr->rtt_cnt = 0; bbr->next_rtt_delivered = tp->delivered; bbr->prev_ca_state = TCP_CA_Open; @@ -818,14 +871,13 @@ static void bbr_init(struct sock *sk) bbr->probe_rtt_done_stamp = 0; bbr->probe_rtt_round_done = 0; bbr->min_rtt_us = tcp_min_rtt(tp); - bbr->min_rtt_stamp = tcp_time_stamp; + bbr->min_rtt_stamp = tcp_jiffies32; minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ bbr->has_seen_rtt = 0; bbr_init_pacing_rate_from_rtt(sk); - bbr->restore_cwnd = 0; bbr->round_start = 0; bbr->idle_restart = 0; bbr->full_bw_reached = 0; @@ -835,6 +887,8 @@ static void bbr_init(struct sock *sk) bbr->cycle_idx = 0; bbr_reset_lt_bw_sampling(sk); bbr_reset_startup_mode(sk); + + cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); } static u32 bbr_sndbuf_expand(struct sock *sk) @@ -860,7 +914,7 @@ static u32 bbr_undo_cwnd(struct sock *sk) static u32 bbr_ssthresh(struct sock *sk) { bbr_save_cwnd(sk); - return TCP_INFINITE_SSTHRESH; /* BBR does not use ssthresh */ + return tcp_sk(sk)->snd_ssthresh; } static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr, @@ -909,7 +963,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .undo_cwnd = bbr_undo_cwnd, .cwnd_event = bbr_cwnd_event, .ssthresh = bbr_ssthresh, - .tso_segs_goal = bbr_tso_segs_goal, + .min_tso_segs = bbr_min_tso_segs, .get_info = bbr_get_info, .set_state = bbr_set_state, }; diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 36087bca9f48..609965f0e298 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -84,14 +84,14 @@ static void bictcp_init(struct sock *sk) static inline void bictcp_update(struct bictcp *ca, u32 cwnd) { if (ca->last_cwnd == cwnd && - (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32) + (s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32) return; ca->last_cwnd = cwnd; - ca->last_time = tcp_time_stamp; + ca->last_time = tcp_jiffies32; if (ca->epoch_start == 0) /* record the beginning of an epoch */ - ca->epoch_start = tcp_time_stamp; + ca->epoch_start = tcp_jiffies32; /* start off normal */ if (cwnd <= low_window) { diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 05643dd7b15d..254012eaa19b 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -68,8 +68,9 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) { int ret = 0; - /* all algorithms must implement ssthresh and cong_avoid ops */ - if (!ca->ssthresh || !(ca->cong_avoid || ca->cong_control)) { + /* all algorithms must implement these */ + if (!ca->ssthresh || !ca->undo_cwnd || + !(ca->cong_avoid || ca->cong_control)) { pr_err("%s does not implement required ops\n", ca->name); return -EINVAL; } @@ -439,10 +440,19 @@ u32 tcp_reno_ssthresh(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); +u32 tcp_reno_undo_cwnd(struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + + return max(tp->snd_cwnd, tp->snd_ssthresh << 1); +} +EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd); + struct tcp_congestion_ops tcp_reno = { .flags = TCP_CONG_NON_RESTRICTED, .name = "reno", .owner = THIS_MODULE, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, + .undo_cwnd = tcp_reno_undo_cwnd, }; diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index d710c519a035..0c64cd9fc3d3 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -155,7 +155,7 @@ static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) { if (event == CA_EVENT_TX_START) { struct bictcp *ca = inet_csk_ca(sk); - u32 now = tcp_time_stamp; + u32 now = tcp_jiffies32; s32 delta; delta = now - tcp_sk(sk)->lsndtime; @@ -231,21 +231,21 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked) ca->ack_cnt += acked; /* count the number of ACKed packets */ if (ca->last_cwnd == cwnd && - (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32) + (s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32) return; /* The CUBIC function can update ca->cnt at most once per jiffy. * On all cwnd reduction events, ca->epoch_start is set to 0, * which will force a recalculation of ca->cnt. */ - if (ca->epoch_start && tcp_time_stamp == ca->last_time) + if (ca->epoch_start && tcp_jiffies32 == ca->last_time) goto tcp_friendliness; ca->last_cwnd = cwnd; - ca->last_time = tcp_time_stamp; + ca->last_time = tcp_jiffies32; if (ca->epoch_start == 0) { - ca->epoch_start = tcp_time_stamp; /* record beginning */ + ca->epoch_start = tcp_jiffies32; /* record beginning */ ca->ack_cnt = acked; /* start counting */ ca->tcp_cwnd = cwnd; /* syn with cubic */ @@ -276,7 +276,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked) * if the cwnd < 1 million packets !!! */ - t = (s32)(tcp_time_stamp - ca->epoch_start); + t = (s32)(tcp_jiffies32 - ca->epoch_start); t += msecs_to_jiffies(ca->delay_min >> 3); /* change the unit from HZ to bictcp_HZ */ t <<= BICTCP_HZ; @@ -451,7 +451,7 @@ static void bictcp_acked(struct sock *sk, const struct ack_sample *sample) return; /* Discard delay samples right after fast recovery */ - if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ) + if (ca->epoch_start && (s32)(tcp_jiffies32 - ca->epoch_start) < HZ) return; delay = (sample->rtt_us << 3) / USEC_PER_MSEC; diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 910ef01759e7..f0de9fb92f0d 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -292,6 +292,7 @@ static struct tcp_congestion_ops dctcp __read_mostly = { static struct tcp_congestion_ops dctcp_reno __read_mostly = { .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, + .undo_cwnd = tcp_reno_undo_cwnd, .get_info = dctcp_get_info, .owner = THIS_MODULE, .name = "dctcp-reno", diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 8ea4e9787f82..20d2d70e30cf 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -9,7 +9,11 @@ #include #include -int sysctl_tcp_fastopen __read_mostly = TFO_CLIENT_ENABLE; +int sysctl_tcp_fastopen __read_mostly = TFO_CLIENT_ENABLE | + TFO_SERVER_ENABLE | + TFO_CLIENT_NO_COOKIE | + TFO_SERVER_COOKIE_NOT_REQD | + TFO_SERVER_WO_SOCKOPT1; struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 4a4d8e76738f..3eb78cde6ff0 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -104,7 +104,7 @@ static void measure_achieved_throughput(struct sock *sk, const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); struct htcp *ca = inet_csk_ca(sk); - u32 now = tcp_time_stamp; + u32 now = tcp_jiffies32; if (icsk->icsk_ca_state == TCP_CA_Open) ca->pkts_acked = sample->pkts_acked; diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 083831e359df..0f7175c3338e 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -166,6 +166,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked) static struct tcp_congestion_ops tcp_hybla __read_mostly = { .init = hybla_init, .ssthresh = tcp_reno_ssthresh, + .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = hybla_cong_avoid, .set_state = hybla_state, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2d15cb57e66f..92e4b80a0702 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -116,6 +116,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ #define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ #define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */ +#define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -471,7 +472,7 @@ void tcp_init_buffer_space(struct sock *sk) tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss); tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp); - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; } /* 5. Recalculate window clamp after socket hit its memory bounds. */ @@ -677,7 +678,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) tcp_rcv_rtt_measure(tp); - now = tcp_time_stamp; + now = tcp_jiffies32; if (!icsk->icsk_ack.ato) { /* The _first_ data packet received, initialize @@ -1984,7 +1985,7 @@ void tcp_enter_loss(struct sock *sk) } tp->snd_cwnd = 1; tp->snd_cwnd_cnt = 0; - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; tp->retrans_out = 0; tp->lost_out = 0; @@ -2450,17 +2451,14 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) if (tp->prior_ssthresh) { const struct inet_connection_sock *icsk = inet_csk(sk); - if (icsk->icsk_ca_ops->undo_cwnd) - tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk); - else - tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); + tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk); if (tp->prior_ssthresh > tp->snd_ssthresh) { tp->snd_ssthresh = tp->prior_ssthresh; tcp_ecn_withdraw_cwr(tp); } } - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; tp->undo_marker = 0; } @@ -2612,7 +2610,7 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH && (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) { tp->snd_cwnd = tp->snd_ssthresh; - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; } tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); } @@ -2685,7 +2683,7 @@ static void tcp_mtup_probe_success(struct sock *sk) tp->snd_cwnd = max_t(u32, 1U, val); tp->snd_cwnd_cnt = 0; - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; tp->snd_ssthresh = tcp_current_ssthresh(sk); icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size; @@ -2987,12 +2985,19 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, *rexmit = REXMIT_LOST; } -static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us) +static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag) { struct tcp_sock *tp = tcp_sk(sk); u32 wlen = sysctl_tcp_min_rtt_wlen * HZ; - minmax_running_min(&tp->rtt_min, wlen, tcp_time_stamp, + if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) { + /* If the remote keeps returning delayed ACKs, eventually + * the min filter would pick it up and overestimate the + * prop. delay when it expires. Skip suspected delayed ACKs. + */ + return; + } + minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32, rtt_us ? : jiffies_to_usecs(1)); } @@ -3027,7 +3032,7 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, * always taken together with ACK, SACK, or TS-opts. Any negative * values will be skipped with the seq_rtt_us < 0 check above. */ - tcp_update_rtt_min(sk, ca_rtt_us); + tcp_update_rtt_min(sk, ca_rtt_us, flag); tcp_rtt_estimator(sk, seq_rtt_us); tcp_set_rto(sk); @@ -3057,7 +3062,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) const struct inet_connection_sock *icsk = inet_csk(sk); icsk->icsk_ca_ops->cong_avoid(sk, ack, acked); - tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; + tcp_sk(sk)->snd_cwnd_stamp = tcp_jiffies32; } /* Restart timer after forward progress on connection. @@ -3262,6 +3267,17 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) { seq_rtt_us = skb_mstamp_us_delta(now, &first_ackt); ca_rtt_us = skb_mstamp_us_delta(now, &last_ackt); + + if (pkts_acked == 1 && last_in_flight < tp->mss_cache && + last_in_flight && !prior_sacked && fully_acked && + sack->rate->prior_delivered + 1 == tp->delivered && + !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) { + /* Conservatively mark a delayed ACK. It's typically + * from a lone runt packet over the round trip to + * a receiver w/o out-of-order or CE events. + */ + flag |= FLAG_ACK_MAYBE_DELAYED; + } } if (sack->first_sackt.v64) { sack_rtt_us = skb_mstamp_us_delta(now, &sack->first_sackt); @@ -3497,7 +3513,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, u32 *last_oow_ack_time) { if (*last_oow_ack_time) { - s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time); + s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { NET_INC_STATS(net, mib_idx); @@ -3505,7 +3521,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, } } - *last_oow_ack_time = tcp_time_stamp; + *last_oow_ack_time = tcp_jiffies32; return false; /* not rate-limited: go ahead, send dupack now! */ } @@ -3750,7 +3766,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) */ sk->sk_err_soft = 0; icsk->icsk_probes_out = 0; - tp->rcv_tstamp = tcp_time_stamp; + tp->rcv_tstamp = tcp_jiffies32; if (!prior_packets) goto no_queue; @@ -3775,6 +3791,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_schedule_loss_probe(sk); delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ lost = tp->lost - lost; /* freshly marked lost */ + rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED); tcp_rate_gen(sk, delivered, lost, is_sack_reneg, &now, &rs); tcp_cong_control(sk, ack, delivered, flag, &rs); tcp_xmit_recovery(sk, rexmit); @@ -5168,7 +5185,7 @@ static void tcp_new_space(struct sock *sk) if (tcp_should_expand_sndbuf(sk)) { tcp_sndbuf_expand(sk); - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; } sk->sk_write_space(sk); @@ -5682,7 +5699,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) struct inet_connection_sock *icsk = inet_csk(sk); tcp_set_state(sk, TCP_ESTABLISHED); - icsk->icsk_ack.lrcvtime = tcp_time_stamp; + icsk->icsk_ack.lrcvtime = tcp_jiffies32; if (skb) { icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); @@ -5699,7 +5716,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) /* Prevent spurious tcp_cwnd_restart() on first data * packet. */ - tp->lsndtime = tcp_time_stamp; + tp->lsndtime = tcp_jiffies32; tcp_init_buffer_space(sk); @@ -6140,7 +6157,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tcp_update_pacing_rate(sk); /* Prevent spurious tcp_cwnd_restart() on first data packet */ - tp->lsndtime = tcp_time_stamp; + tp->lsndtime = tcp_jiffies32; tcp_initialize_rcv_mss(sk); tcp_fast_path_on(tp); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 845d360f4b9f..656c7bfd1b7a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -451,7 +451,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (!sock_owned_by_user(sk)) { tcp_v4_mtu_reduced(sk); } else { - if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags)) + if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags)) sock_hold(sk); } goto out; @@ -2480,7 +2480,7 @@ static int __net_init tcp_sk_init(struct net *net) *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; } - net->ipv4.sysctl_tcp_ecn = 2; + net->ipv4.sysctl_tcp_ecn = 1; net->ipv4.sysctl_tcp_ecn_fallback = 1; net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 7d86fc505397..d6fb6c067af4 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -318,6 +318,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample) static struct tcp_congestion_ops tcp_lp __read_mostly = { .init = tcp_lp_init, .ssthresh = tcp_reno_ssthresh, + .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = tcp_lp_cong_avoid, .pkts_acked = tcp_lp_pkts_acked, diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index bf1f3b2b29d1..0e4a4f897016 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -574,7 +574,7 @@ void tcp_init_metrics(struct sock *sk) tp->snd_cwnd = 1; else tp->snd_cwnd = tcp_init_cwnd(tp, dst); - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; } bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, @@ -1142,7 +1142,7 @@ static int __net_init tcp_net_metrics_init(struct net *net) slots = tcpmhash_entries; if (!slots) { - if (totalram_pages >= 128 * 1024) + if (totalram_pages() >= 128 * 1024) slots = 16 * 1024; else slots = 8 * 1024; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 25d028a5577c..d9fa9e29448d 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -471,9 +471,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->srtt_us = 0; newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); - minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U); + minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U); newicsk->icsk_rto = TCP_TIMEOUT_INIT; - newicsk->icsk_ack.lrcvtime = tcp_time_stamp; + newicsk->icsk_ack.lrcvtime = tcp_jiffies32; newtp->packets_out = 0; newtp->retrans_out = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 67b391620d1e..7e3c91b13fe7 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -60,7 +60,7 @@ int sysctl_tcp_limit_output_bytes __read_mostly = 262144; int sysctl_tcp_tso_win_divisor __read_mostly = 3; /* By default, RFC2861 behavior. */ -int sysctl_tcp_slow_start_after_idle __read_mostly = 1; +int sysctl_tcp_slow_start_after_idle __read_mostly; static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp); @@ -151,7 +151,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta) while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd) cwnd >>= 1; tp->snd_cwnd = max(cwnd, restart_cwnd); - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; tp->snd_cwnd_used = 0; } @@ -160,7 +160,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp, struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); - const u32 now = tcp_time_stamp; + const u32 now = tcp_jiffies32; if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); @@ -784,19 +784,19 @@ static void tcp_tasklet_func(unsigned long data) tcp_tsq_handler(sk); } else { /* defer the work to tcp_release_cb() */ - set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags); + set_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags); } bh_unlock_sock(sk); - clear_bit(TSQ_QUEUED, &tp->tsq_flags); + clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags); sk_free(sk); } } -#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \ - (1UL << TCP_WRITE_TIMER_DEFERRED) | \ - (1UL << TCP_DELACK_TIMER_DEFERRED) | \ - (1UL << TCP_MTU_REDUCED_DEFERRED)) +#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \ + TCPF_WRITE_TIMER_DEFERRED | \ + TCPF_DELACK_TIMER_DEFERRED | \ + TCPF_MTU_REDUCED_DEFERRED) /** * tcp_release_cb - tcp release_sock() callback * @sk: socket @@ -806,18 +806,17 @@ static void tcp_tasklet_func(unsigned long data) */ void tcp_release_cb(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); unsigned long flags, nflags; /* perform an atomic operation only if at least one flag is set */ do { - flags = tp->tsq_flags; + flags = sk->sk_tsq_flags; if (!(flags & TCP_DEFERRED_ALL)) return; nflags = flags & ~TCP_DEFERRED_ALL; - } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags); + } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags); - if (flags & (1UL << TCP_TSQ_DEFERRED)) + if (flags & TCPF_TSQ_DEFERRED) tcp_tsq_handler(sk); /* Here begins the tricky part : @@ -831,15 +830,15 @@ void tcp_release_cb(struct sock *sk) */ sock_release_ownership(sk); - if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { + if (flags & TCPF_WRITE_TIMER_DEFERRED) { tcp_write_timer_handler(sk); __sock_put(sk); } - if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) { + if (flags & TCPF_DELACK_TIMER_DEFERRED) { tcp_delack_timer_handler(sk); __sock_put(sk); } - if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) { + if (flags & TCPF_MTU_REDUCED_DEFERRED) { inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); __sock_put(sk); } @@ -886,8 +885,8 @@ void tcp_wfree(struct sk_buff *skb) if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) goto out; - if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) && - !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) { + if (test_and_clear_bit(TSQ_THROTTLED, &sk->sk_tsq_flags) && + !test_and_set_bit(TSQ_QUEUED, &sk->sk_tsq_flags)) { unsigned long flags; struct tsq_tasklet *tsq; @@ -903,6 +902,58 @@ void tcp_wfree(struct sk_buff *skb) sk_free(sk); } +/* Note: Called under hard irq. + * We can not call TCP stack right away. + */ +enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer) +{ + struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer); + struct sock *sk = (struct sock *)tp; + unsigned long nval, oval; + + for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) { + struct tsq_tasklet *tsq; + bool empty; + + if (oval & TSQF_QUEUED) + break; + + nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED; + nval = cmpxchg(&sk->sk_tsq_flags, oval, nval); + if (nval != oval) + continue; + + if (!atomic_inc_not_zero(&sk->sk_wmem_alloc)) + break; + /* queue this socket to tasklet queue */ + tsq = this_cpu_ptr(&tsq_tasklet); + empty = list_empty(&tsq->head); + list_add(&tp->tsq_node, &tsq->head); + if (empty) + tasklet_schedule(&tsq->tasklet); + break; + } + return HRTIMER_NORESTART; +} + +static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb) +{ + u64 len_ns; + u32 rate; + + if (!tcp_needs_internal_pacing(sk)) + return; + rate = sk->sk_pacing_rate; + if (!rate || rate == ~0U) + return; + + len_ns = (u64)skb->len * NSEC_PER_SEC; + do_div(len_ns, rate); + hrtimer_start(&tcp_sk(sk)->pacing_timer, + ktime_add_ns(ktime_get(), len_ns), + HRTIMER_MODE_ABS_PINNED); +} + /* This routine actually transmits TCP packets queued in by * tcp_do_sendmsg(). This is used by both the initial * transmission and possible later retransmissions. @@ -1024,6 +1075,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, if (skb->len != tcp_header_size) { tcp_event_data_sent(tp, sk); tp->data_segs_out += tcp_skb_pcount(skb); + tcp_internal_pacing(sk, skb); } if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) @@ -1422,7 +1474,7 @@ void tcp_mtup_init(struct sock *sk) icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); icsk->icsk_mtup.probe_size = 0; if (icsk->icsk_mtup.enabled) - icsk->icsk_mtup.probe_timestamp = tcp_time_stamp; + icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; } EXPORT_SYMBOL(tcp_mtup_init); @@ -1523,7 +1575,7 @@ static void tcp_cwnd_application_limited(struct sock *sk) } tp->snd_cwnd_used = 0; } - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; } static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) @@ -1549,14 +1601,14 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) if (tcp_is_cwnd_limited(sk)) { /* Network is feed fully. */ tp->snd_cwnd_used = 0; - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd_stamp = tcp_jiffies32; } else { /* Network starves. */ if (tp->packets_out > tp->snd_cwnd_used) tp->snd_cwnd_used = tp->packets_out; if (sysctl_tcp_slow_start_after_idle && - (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto) + (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto) tcp_cwnd_application_limited(sk); } } @@ -1601,8 +1653,8 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp, /* Return how many segs we'd like on a TSO packet, * to send one TSO packet per ms */ -u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, - int min_tso_segs) +static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, + int min_tso_segs) { u32 bytes, segs; @@ -1618,7 +1670,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, return segs; } -EXPORT_SYMBOL(tcp_tso_autosize); /* Return the number of segments we want in the skb we are transmitting. * See if congestion control module wants to decide; otherwise, autosize. @@ -1626,11 +1677,13 @@ EXPORT_SYMBOL(tcp_tso_autosize); static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) { const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; - u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; + u32 min_tso, tso_segs; - if (!tso_segs) - tso_segs = tcp_tso_autosize(sk, mss_now, - sysctl_tcp_min_tso_segs); + min_tso = ca_ops->min_tso_segs ? + ca_ops->min_tso_segs(sk) : + sysctl_tcp_min_tso_segs; + + tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); return min_t(u32, tso_segs, sk->sk_gso_max_segs); } @@ -1860,7 +1913,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, /* Avoid bursty behavior by allowing defer * only if the last write was recent. */ - if ((s32)(tcp_time_stamp - tp->lsndtime) > 0) + if ((s32)(tcp_jiffies32 - tp->lsndtime) > 0) goto send_now; in_flight = tcp_packets_in_flight(tp); @@ -1929,7 +1982,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk) s32 delta; interval = net->ipv4.sysctl_tcp_probe_interval; - delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp; + delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp; if (unlikely(delta >= interval * HZ)) { int mss = tcp_current_mss(sk); @@ -1941,7 +1994,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk) icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); /* Update probe time stamp */ - icsk->icsk_mtup.probe_timestamp = tcp_time_stamp; + icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; } } @@ -2121,6 +2174,12 @@ static int tcp_mtu_probe(struct sock *sk) return -1; } +static bool tcp_pacing_check(const struct sock *sk) +{ + return tcp_needs_internal_pacing(sk) && + hrtimer_active(&tcp_sk(sk)->pacing_timer); +} + /* TCP Small Queues : * Control number of packets in qdisc/devices to two packets / or ~1 ms. * (These limits are doubled for retransmits) @@ -2142,7 +2201,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, limit <<= factor; if (atomic_read(&sk->sk_wmem_alloc) > limit) { - set_bit(TSQ_THROTTLED, &tcp_sk(sk)->tsq_flags); + set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); /* It is possible TX completion already happened * before we set TSQ_THROTTLED, so we must * test again the condition. @@ -2195,6 +2254,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, while ((skb = tcp_send_head(sk))) { unsigned int limit; + if (tcp_pacing_check(sk)) + break; + tso_segs = tcp_init_tso_segs(skb, mss_now); BUG_ON(!tso_segs); @@ -2308,14 +2370,12 @@ bool tcp_schedule_loss_probe(struct sock *sk) return false; /* Schedule a loss probe in 2*RTT for SACK capable connections - * in Open state, that are either limited by cwnd or application. + * not in loss recovery, that are either limited by cwnd or application. */ if (sysctl_tcp_early_retrans < 3 || !tp->packets_out || - !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) - return false; - - if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && - tcp_send_head(sk)) + !tcp_is_sack(tp) || + (inet_csk(sk)->icsk_ca_state != TCP_CA_Open && + inet_csk(sk)->icsk_ca_state != TCP_CA_CWR)) return false; /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account @@ -2329,10 +2389,10 @@ bool tcp_schedule_loss_probe(struct sock *sk) timeout = max_t(u32, timeout, msecs_to_jiffies(10)); /* If RTO is shorter, just schedule TLP in its place. */ - tlp_time_stamp = tcp_time_stamp + timeout; + tlp_time_stamp = tcp_jiffies32 + timeout; rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout; if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) { - s32 delta = rto_time_stamp - tcp_time_stamp; + s32 delta = rto_time_stamp - tcp_jiffies32; if (delta > 0) timeout = delta; } @@ -2917,6 +2977,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (skb == tcp_send_head(sk)) break; + + if (tcp_pacing_check(sk)) + break; + /* we could do better than to assign each time */ if (!hole) tp->retransmit_skb_hint = skb; @@ -3302,7 +3366,7 @@ static void tcp_connect_init(struct sock *sk) if (likely(!tp->repair)) tp->rcv_nxt = 0; else - tp->rcv_tstamp = tcp_time_stamp; + tp->rcv_tstamp = tcp_jiffies32; tp->rcv_wup = tp->rcv_nxt; tp->copied_seq = tp->rcv_nxt; @@ -3552,8 +3616,6 @@ void __tcp_send_ack(struct sock *sk, u32 rcv_nxt) /* We do not want pure acks influencing TCP Small Queues or fq/pacing * too much. * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784 - * We also avoid tcp_wfree() overhead (cache line miss accessing - * tp->tsq_flags) by using regular sock_wfree() */ skb_set_tcp_pure_ack(buff); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index fb75b60d53f8..dfdf4b0c55e9 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -103,7 +103,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) /* If peer does not open window for long time, or did not transmit * anything for long time, penalize it. */ - if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) + if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) shift++; /* If some dubious ICMP arrived, penalize even more. */ @@ -113,7 +113,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) if (tcp_check_oom(sk, shift)) { /* Catch exceptional cases, when connection requires reset. * 1. Last segment was sent recently. */ - if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN || + if ((s32)(tcp_jiffies32 - tp->lsndtime) <= TCP_TIMEWAIT_LEN || /* 2. Window is closed. */ (!tp->snd_wnd && !tp->packets_out)) do_reset = true; @@ -162,7 +162,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) if (net->ipv4.sysctl_tcp_mtu_probing) { if (!icsk->icsk_mtup.enabled) { icsk->icsk_mtup.enabled = 1; - icsk->icsk_mtup.probe_timestamp = tcp_time_stamp; + icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } else { struct net *net = sock_net(sk); @@ -359,7 +359,7 @@ static void tcp_delack_timer(unsigned long data) inet_csk(sk)->icsk_ack.blocked = 1; __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); /* deleguate our work to tcp_release_cb() */ - if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) + if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags)) sock_hold(sk); } bh_unlock_sock(sk); @@ -500,7 +500,7 @@ void tcp_retransmit_timer(struct sock *sk) tp->snd_una, tp->snd_nxt); } #endif - if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) { + if (tcp_jiffies32 - tp->rcv_tstamp > TCP_RTO_MAX) { tcp_write_err(sk); goto out; } @@ -642,7 +642,7 @@ static void tcp_write_timer(unsigned long data) tcp_write_timer_handler(sk); } else { /* delegate our work to tcp_release_cb() */ - if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) + if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags)) sock_hold(sk); } bh_unlock_sock(sk); @@ -759,4 +759,7 @@ void tcp_init_xmit_timers(struct sock *sk) { inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, &tcp_keepalive_timer); + hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_ABS_PINNED); + tcp_sk(sk)->pacing_timer.function = tcp_pace_kick; } diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 3ecb61ee42fb..ee113ff15fd0 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -307,6 +307,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_get_info); static struct tcp_congestion_ops tcp_vegas __read_mostly = { .init = tcp_vegas_init, .ssthresh = tcp_reno_ssthresh, + .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = tcp_vegas_cong_avoid, .pkts_acked = tcp_vegas_pkts_acked, .set_state = tcp_vegas_state, diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index 4b03a2e2a050..ecccb8bbdce2 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -43,8 +43,12 @@ struct westwood { }; /* TCP Westwood functions and constants */ -#define TCP_WESTWOOD_RTT_MIN (HZ/20) /* 50ms */ -#define TCP_WESTWOOD_INIT_RTT (20*HZ) /* maybe too conservative?! */ +int tcp_westwood_rtt_min = 30; +int tcp_westwood_init_rtt = 200; + +/* Let's make them tunable */ +module_param_named(rtt_min, tcp_westwood_rtt_min, int, 0644); +module_param_named(rtt_init, tcp_westwood_init_rtt, int, 0644); /* * @tcp_westwood_create @@ -67,8 +71,8 @@ static void tcp_westwood_init(struct sock *sk) w->accounted = 0; w->cumul_ack = 0; w->reset_rtt_min = 1; - w->rtt_min = w->rtt = TCP_WESTWOOD_INIT_RTT; - w->rtt_win_sx = tcp_time_stamp; + w->rtt_min = w->rtt = msecs_to_jiffies(tcp_westwood_init_rtt); + w->rtt_win_sx = tcp_jiffies32; w->snd_una = tcp_sk(sk)->snd_una; w->first_ack = 1; } @@ -116,7 +120,7 @@ static void tcp_westwood_pkts_acked(struct sock *sk, static void westwood_update_window(struct sock *sk) { struct westwood *w = inet_csk_ca(sk); - s32 delta = tcp_time_stamp - w->rtt_win_sx; + s32 delta = tcp_jiffies32 - w->rtt_win_sx; /* Initialize w->snd_una with the first acked sequence number in order * to fix mismatch between tp->snd_una and w->snd_una for the first @@ -136,11 +140,11 @@ static void westwood_update_window(struct sock *sk) * Obviously on a LAN we reasonably will always have * right_bound = left_bound + WESTWOOD_RTT_MIN */ - if (w->rtt && delta > max_t(u32, w->rtt, TCP_WESTWOOD_RTT_MIN)) { + if (w->rtt && delta > max_t(u32, w->rtt, msecs_to_jiffies(tcp_westwood_rtt_min))) { westwood_filter(w, delta); w->bk = 0; - w->rtt_win_sx = tcp_time_stamp; + w->rtt_win_sx = tcp_jiffies32; } } @@ -265,8 +269,8 @@ static size_t tcp_westwood_info(struct sock *sk, u32 ext, int *attr, if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { info->vegas.tcpv_enabled = 1; info->vegas.tcpv_rttcnt = 0; - info->vegas.tcpv_rtt = jiffies_to_usecs(ca->rtt), - info->vegas.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min), + info->vegas.tcpv_rtt = jiffies_to_usecs(ca->rtt); + info->vegas.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min); *attr = INET_DIAG_VEGASINFO; return sizeof(struct tcpvegas_info); @@ -278,6 +282,7 @@ static struct tcp_congestion_ops tcp_westwood __read_mostly = { .init = tcp_westwood_init, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, + .undo_cwnd = tcp_reno_undo_cwnd, .cwnd_event = tcp_westwood_event, .in_ack_event = tcp_westwood_ack, .get_info = tcp_westwood_info, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2f2042c7307c..c1cf09f2f2eb 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1591,6 +1591,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } } + prefetch(&sk->sk_rmem_alloc); if (rcu_access_pointer(sk->sk_filter) && udp_lib_checksum_complete(skb)) goto csum_error; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 509610934110..ed180e7a6e14 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -194,6 +194,7 @@ static const char *const comments[] = { [NF_IP6_TRACE_COMMENT_POLICY] = "policy", }; +#ifdef CONFIG_DEBUG_KERNEL static struct nf_loginfo trace_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { @@ -203,6 +204,7 @@ static struct nf_loginfo trace_loginfo = { }, }, }; +#endif /* Mildly perf critical (only if packet tracing is on) */ static inline int @@ -259,9 +261,11 @@ static void trace_packet(struct net *net, &chainname, &comment, &rulenum) != 0) break; +#ifdef CONFIG_DEBUG_KERNEL nf_log_trace(net, AF_INET6, hook, skb, in, out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", tablename, chainname, comment, rulenum); +#endif } #endif diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1ce3484d1c22..b84ba8725cc2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -424,7 +424,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!sock_owned_by_user(sk)) tcp_v6_mtu_reduced(sk); else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, - &tp->tsq_flags)) + &sk->sk_tsq_flags)) sock_hold(sk); goto out; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index a1870910ea00..5c84937b04c3 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -612,6 +612,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } } + prefetch(&sk->sk_rmem_alloc); if (rcu_access_pointer(sk->sk_filter) && udp_lib_checksum_complete(skb)) goto csum_error; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 8a1f9c0d2a13..d369fefec9b0 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -59,11 +59,9 @@ xfrm6_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl, static int __xfrm6_sort(void **dst, void **src, int n, int (*cmp)(void *p), int maxclass) { - int i; + int count[XFRM_MAX_DEPTH] = { }; int class[XFRM_MAX_DEPTH]; - int count[maxclass]; - - memset(count, 0, sizeof(count)); + int i; for (i = 0; i < n; i++) { int c; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ba46d5fa646b..ca54273657bd 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2002,11 +2002,11 @@ int nf_conntrack_init_start(void) * >= 4GB machines have 65536 buckets. */ nf_conntrack_htable_size - = (((totalram_pages << PAGE_SHIFT) / 16384) + = (((totalram_pages() << PAGE_SHIFT) / 16384) / sizeof(struct hlist_head)); - if (totalram_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE))) + if (totalram_pages() > (4 * (1024 * 1024 * 1024 / PAGE_SIZE))) nf_conntrack_htable_size = 65536; - else if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE)) + else if (totalram_pages() > (1024 * 1024 * 1024 / PAGE_SIZE)) nf_conntrack_htable_size = 16384; if (nf_conntrack_htable_size < 32) nf_conntrack_htable_size = 32; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 9837a61cb3e3..a124cbd7408a 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -39,6 +39,8 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); static char __initdata nfversion[] = "0.30"; +#define NFNL_MAX_ATTR_COUNT 32 + static struct { struct mutex mutex; const struct nfnetlink_subsystem __rcu *subsys; @@ -78,6 +80,13 @@ EXPORT_SYMBOL_GPL(lockdep_nfnl_is_held); int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) { + u8 cb_id; + + /* Sanity-check attr_count size to avoid stack buffer overflow. */ + for (cb_id = 0; cb_id < n->cb_count; cb_id++) + if (WARN_ON(n->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT)) + return -EINVAL; + nfnl_lock(n->subsys_id); if (table[n->subsys_id].subsys) { nfnl_unlock(n->subsys_id); @@ -186,11 +195,17 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); - struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; + struct nlattr *cda[NFNL_MAX_ATTR_COUNT + 1]; struct nlattr *attr = (void *)nlh + min_len; int attrlen = nlh->nlmsg_len - min_len; __u8 subsys_id = NFNL_SUBSYS_ID(type); + /* Sanity-check NFNL_MAX_ATTR_COUNT */ + if (ss->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT) { + rcu_read_unlock(); + return -ENOMEM; + } + err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, attrlen, ss->cb[cb_id].policy); if (err < 0) { @@ -366,10 +381,16 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, { int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); - struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; + struct nlattr *cda[NFNL_MAX_ATTR_COUNT + 1]; struct nlattr *attr = (void *)nlh + min_len; int attrlen = nlh->nlmsg_len - min_len; + /* Sanity-check NFTA_MAX_ATTR */ + if (ss->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT) { + err = -ENOMEM; + goto ack; + } + err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, attrlen, ss->cb[cb_id].policy); if (err < 0) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 2820fa6f399c..5c021ffcb51f 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1038,7 +1038,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size) return NULL; /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ - if ((size >> PAGE_SHIFT) + 2 > totalram_pages) + if ((size >> PAGE_SHIFT) + 2 > totalram_pages()) return NULL; if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 97df6f9dbdde..d8cfddf61b30 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -253,9 +253,9 @@ static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg, if (cfg->size) { size = cfg->size; } else { - size = (totalram_pages << PAGE_SHIFT) / 16384 / + size = (totalram_pages() << PAGE_SHIFT) / 16384 / sizeof(struct list_head); - if (totalram_pages > 1024 * 1024 * 1024 / PAGE_SIZE) + if (totalram_pages() > 1024 * 1024 * 1024 / PAGE_SIZE) size = 8192; if (size < 16) size = 16; diff --git a/net/rmnet_data/rmnet_data_main.c b/net/rmnet_data/rmnet_data_main.c index b5baa6f9410f..98eac8457d89 100644 --- a/net/rmnet_data/rmnet_data_main.c +++ b/net/rmnet_data/rmnet_data_main.c @@ -25,7 +25,7 @@ #include "rmnet_data_trace.h" /* Module Parameters */ -unsigned int rmnet_data_log_level = RMNET_LOG_LVL_ERR | RMNET_LOG_LVL_HI; +unsigned int rmnet_data_log_level; module_param(rmnet_data_log_level, uint, 0644); MODULE_PARM_DESC(log_level, "Logging level"); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index d6a64771463f..c8b638710842 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -368,7 +368,7 @@ struct rxrpc_connection { struct sk_buff_head rx_queue; /* received conn-level packets */ const struct rxrpc_security *security; /* applied security module */ struct key *server_key; /* security for this service */ - struct crypto_skcipher *cipher; /* encryption handle */ + struct crypto_sync_skcipher *cipher; /* encryption handle */ struct rxrpc_crypt csum_iv; /* packet checksum base */ unsigned long flags; unsigned long events; diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 90df95e18ea4..0c8ee797bbf3 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -46,7 +46,7 @@ struct rxkad_level2_hdr { * alloc routine, but since we have it to hand, we use it to decrypt RESPONSE * packets */ -static struct crypto_skcipher *rxkad_ci; +static struct crypto_sync_skcipher *rxkad_ci; static DEFINE_MUTEX(rxkad_ci_mutex); /* @@ -54,7 +54,7 @@ static DEFINE_MUTEX(rxkad_ci_mutex); */ static int rxkad_init_connection_security(struct rxrpc_connection *conn) { - struct crypto_skcipher *ci; + struct crypto_sync_skcipher *ci; struct rxrpc_key_token *token; int ret; @@ -63,14 +63,14 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn) token = conn->params.key->payload.data[0]; conn->security_ix = token->security_index; - ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC); + ci = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0); if (IS_ERR(ci)) { _debug("no cipher"); ret = PTR_ERR(ci); goto error; } - if (crypto_skcipher_setkey(ci, token->kad->session_key, + if (crypto_sync_skcipher_setkey(ci, token->kad->session_key, sizeof(token->kad->session_key)) < 0) BUG(); @@ -104,7 +104,7 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn) static int rxkad_prime_packet_security(struct rxrpc_connection *conn) { struct rxrpc_key_token *token; - SKCIPHER_REQUEST_ON_STACK(req, conn->cipher); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, conn->cipher); struct scatterlist sg; struct rxrpc_crypt iv; __be32 *tmpbuf; @@ -128,7 +128,7 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn) tmpbuf[3] = htonl(conn->security_ix); sg_init_one(&sg, tmpbuf, tmpsize); - skcipher_request_set_tfm(req, conn->cipher); + skcipher_request_set_sync_tfm(req, conn->cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, tmpsize, iv.x); crypto_skcipher_encrypt(req); @@ -169,7 +169,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, memset(&iv, 0, sizeof(iv)); sg_init_one(&sg, sechdr, 8); - skcipher_request_set_tfm(req, call->conn->cipher); + skcipher_request_set_sync_tfm(req, call->conn->cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x); crypto_skcipher_encrypt(req); @@ -214,7 +214,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, memcpy(&iv, token->kad->session_key, sizeof(iv)); sg_init_one(&sg[0], sechdr, sizeof(rxkhdr)); - skcipher_request_set_tfm(req, call->conn->cipher); + skcipher_request_set_sync_tfm(req, call->conn->cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg[0], &sg[0], sizeof(rxkhdr), iv.x); crypto_skcipher_encrypt(req); @@ -252,7 +252,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, void *sechdr) { struct rxrpc_skb_priv *sp; - SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); struct rxrpc_crypt iv; struct scatterlist sg; u32 x, y; @@ -281,7 +281,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, call->crypto_buf[1] = htonl(x); sg_init_one(&sg, call->crypto_buf, 8); - skcipher_request_set_tfm(req, call->conn->cipher); + skcipher_request_set_sync_tfm(req, call->conn->cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x); crypto_skcipher_encrypt(req); @@ -351,7 +351,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, /* start the decryption afresh */ memset(&iv, 0, sizeof(iv)); - skcipher_request_set_tfm(req, call->conn->cipher); + skcipher_request_set_sync_tfm(req, call->conn->cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, 8, iv.x); crypto_skcipher_decrypt(req); @@ -444,7 +444,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, token = call->conn->params.key->payload.data[0]; memcpy(&iv, token->kad->session_key, sizeof(iv)); - skcipher_request_set_tfm(req, call->conn->cipher); + skcipher_request_set_sync_tfm(req, call->conn->cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, len, iv.x); crypto_skcipher_decrypt(req); @@ -497,7 +497,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, unsigned int offset, unsigned int len, rxrpc_seq_t seq, u16 expected_cksum) { - SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); struct rxrpc_crypt iv; struct scatterlist sg; u16 cksum; @@ -519,7 +519,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, call->crypto_buf[1] = htonl(x); sg_init_one(&sg, call->crypto_buf, 8); - skcipher_request_set_tfm(req, call->conn->cipher); + skcipher_request_set_sync_tfm(req, call->conn->cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x); crypto_skcipher_encrypt(req); @@ -735,7 +735,7 @@ static void rxkad_encrypt_response(struct rxrpc_connection *conn, struct rxkad_response *resp, const struct rxkad_key *s2) { - SKCIPHER_REQUEST_ON_STACK(req, conn->cipher); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, conn->cipher); struct rxrpc_crypt iv; struct scatterlist sg[1]; @@ -744,7 +744,7 @@ static void rxkad_encrypt_response(struct rxrpc_connection *conn, sg_init_table(sg, 1); sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted)); - skcipher_request_set_tfm(req, conn->cipher); + skcipher_request_set_sync_tfm(req, conn->cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x); crypto_skcipher_encrypt(req); @@ -989,7 +989,7 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn, struct rxkad_response *resp, const struct rxrpc_crypt *session_key) { - SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci); struct scatterlist sg[1]; struct rxrpc_crypt iv; @@ -999,7 +999,7 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn, ASSERT(rxkad_ci != NULL); mutex_lock(&rxkad_ci_mutex); - if (crypto_skcipher_setkey(rxkad_ci, session_key->x, + if (crypto_sync_skcipher_setkey(rxkad_ci, session_key->x, sizeof(*session_key)) < 0) BUG(); @@ -1007,7 +1007,7 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn, sg_init_table(sg, 1); sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted)); - skcipher_request_set_tfm(req, rxkad_ci); + skcipher_request_set_sync_tfm(req, rxkad_ci); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x); crypto_skcipher_decrypt(req); @@ -1159,7 +1159,7 @@ static void rxkad_clear(struct rxrpc_connection *conn) _enter(""); if (conn->cipher) - crypto_free_skcipher(conn->cipher); + crypto_free_sync_skcipher(conn->cipher); } /* @@ -1169,7 +1169,7 @@ static int rxkad_init(void) { /* pin the cipher we need so that the crypto layer doesn't invoke * keventd to go get it */ - rxkad_ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC); + rxkad_ci = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0); return PTR_ERR_OR_ZERO(rxkad_ci); } @@ -1179,7 +1179,7 @@ static int rxkad_init(void) static void rxkad_exit(void) { if (rxkad_ci) - crypto_free_skcipher(rxkad_ci); + crypto_free_sync_skcipher(rxkad_ci); } /* diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 9f53d4ec0e37..f37a1b8ae828 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -256,6 +256,17 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) */ sk = (struct sock *)((hash << 1) | 1UL); skb_orphan(skb); + } else if (sk->sk_state == TCP_CLOSE) { + unsigned long hash = skb_get_hash(skb) & q->orphan_mask; + /* + * Sockets in TCP_CLOSE are non connected. + * Typical use case is UDP sockets, they can send packets + * with sendto() to many different destinations. + * We probably could use a generic bit advertising + * non connected sockets, instead of sk_state == TCP_CLOSE, + * if we care enough. + */ + sk = (struct sock *)((hash << 1) | 1UL); } root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)]; @@ -276,7 +287,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) * It not, we need to refill credit with * initial quantum */ - if (unlikely(skb->sk && + if (unlikely(skb->sk == sk && f->socket_hash != sk->sk_hash)) { f->credit = q->initial_quantum; f->socket_hash = sk->sk_hash; @@ -299,7 +310,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) } fq_flow_set_detached(f); f->sk = sk; - if (skb->sk) + if (skb->sk == sk) f->socket_hash = sk->sk_hash; f->credit = q->initial_quantum; @@ -405,9 +416,17 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, q->stat_tcp_retrans++; qdisc_qstats_backlog_inc(sch, skb); if (fq_flow_is_detached(f)) { + struct sock *sk = skb->sk; + fq_flow_add_tail(&q->new_flows, f); if (time_after(jiffies, f->age + q->flow_refill_delay)) f->credit = max_t(u32, f->credit, q->quantum); + if (sk && q->rate_enable) { + if (unlikely(smp_load_acquire(&sk->sk_pacing_status) != + SK_PACING_FQ)) + smp_store_release(&sk->sk_pacing_status, + SK_PACING_FQ); + } q->inactive_flows--; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 0eabd3dd5450..4ce9efa0c6fd 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -316,11 +316,8 @@ static void dev_watchdog(unsigned long arg) } } - if (some_queue_timedout) { - WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", - dev->name, netdev_drivername(dev), i); + if (some_queue_timedout) dev->netdev_ops->ndo_tx_timeout(dev); - } if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo))) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 95fe75d441eb..c7485107a9cf 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1337,6 +1337,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_opt *hopt; u64 rate64, ceil64; + int warn = 0; /* extract all subattrs from opt attr */ if (!opt) @@ -1490,13 +1491,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->quantum = min_t(u64, quantum, INT_MAX); if (!hopt->quantum && cl->quantum < 1000) { - pr_warn("HTB: quantum of class %X is small. Consider r2q change.\n", - cl->common.classid); + warn = -1; cl->quantum = 1000; } if (!hopt->quantum && cl->quantum > 200000) { - pr_warn("HTB: quantum of class %X is big. Consider r2q change.\n", - cl->common.classid); + warn = 1; cl->quantum = 200000; } if (hopt->quantum) @@ -1510,6 +1509,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, sch_tree_unlock(sch); + if (warn) + pr_debug("HTB: quantum of class %X is %s. Consider r2q change.\n", + cl->common.classid, (warn == -1 ? "small" : "big")); + qdisc_class_hash_grow(sch, &q->clhash); *arg = (unsigned long)cl; diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 6c4f7496cec6..4d7b02b2907f 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -11,8 +11,9 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ transport.o chunk.o sm_make_chunk.o ulpevent.o \ inqueue.o outqueue.o ulpqueue.o \ tsnmap.o bind_addr.o socket.o primitive.o \ - output.o input.o debug.o ssnmap.o auth.o \ - offload.o + output.o input.o ssnmap.o auth.o offload.o \ + +stcp-$(CONFIG_DEBUG_KERNEL) += debug.o sctp_probe-y := probe.o diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 02afbe571008..24321a504dc5 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1433,10 +1433,10 @@ static __init int sctp_init(void) * The methodology is similar to that of the tcp hash tables. * Though not identical. Start by getting a goal size */ - if (totalram_pages >= (128 * 1024)) - goal = totalram_pages >> (22 - PAGE_SHIFT); + if (totalram_pages() >= (128 * 1024)) + goal = totalram_pages() >> (22 - PAGE_SHIFT); else - goal = totalram_pages >> (24 - PAGE_SHIFT); + goal = totalram_pages() >> (24 - PAGE_SHIFT); /* Then compute the page order for said goal */ order = get_order(goal); diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index bad69e91fea3..dcee414b2473 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -53,7 +53,7 @@ u32 krb5_encrypt( - struct crypto_skcipher *tfm, + struct crypto_sync_skcipher *tfm, void * iv, void * in, void * out, @@ -62,24 +62,24 @@ krb5_encrypt( u32 ret = -EINVAL; struct scatterlist sg[1]; u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0}; - SKCIPHER_REQUEST_ON_STACK(req, tfm); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm); - if (length % crypto_skcipher_blocksize(tfm) != 0) + if (length % crypto_sync_skcipher_blocksize(tfm) != 0) goto out; - if (crypto_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) { + if (crypto_sync_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) { dprintk("RPC: gss_k5encrypt: tfm iv size too large %d\n", - crypto_skcipher_ivsize(tfm)); + crypto_sync_skcipher_ivsize(tfm)); goto out; } if (iv) - memcpy(local_iv, iv, crypto_skcipher_ivsize(tfm)); + memcpy(local_iv, iv, crypto_sync_skcipher_ivsize(tfm)); memcpy(out, in, length); sg_init_one(sg, out, length); - skcipher_request_set_tfm(req, tfm); + skcipher_request_set_sync_tfm(req, tfm); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, length, local_iv); @@ -92,7 +92,7 @@ krb5_encrypt( u32 krb5_decrypt( - struct crypto_skcipher *tfm, + struct crypto_sync_skcipher *tfm, void * iv, void * in, void * out, @@ -101,23 +101,23 @@ krb5_decrypt( u32 ret = -EINVAL; struct scatterlist sg[1]; u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0}; - SKCIPHER_REQUEST_ON_STACK(req, tfm); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm); - if (length % crypto_skcipher_blocksize(tfm) != 0) + if (length % crypto_sync_skcipher_blocksize(tfm) != 0) goto out; - if (crypto_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) { + if (crypto_sync_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) { dprintk("RPC: gss_k5decrypt: tfm iv size too large %d\n", - crypto_skcipher_ivsize(tfm)); + crypto_sync_skcipher_ivsize(tfm)); goto out; } if (iv) - memcpy(local_iv,iv, crypto_skcipher_ivsize(tfm)); + memcpy(local_iv, iv, crypto_sync_skcipher_ivsize(tfm)); memcpy(out, in, length); sg_init_one(sg, out, length); - skcipher_request_set_tfm(req, tfm); + skcipher_request_set_sync_tfm(req, tfm); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, length, local_iv); @@ -468,7 +468,8 @@ encryptor(struct scatterlist *sg, void *data) { struct encryptor_desc *desc = data; struct xdr_buf *outbuf = desc->outbuf; - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(desc->req); + struct crypto_sync_skcipher *tfm = + crypto_sync_skcipher_reqtfm(desc->req); struct page *in_page; int thislen = desc->fraglen + sg->length; int fraglen, ret; @@ -494,7 +495,7 @@ encryptor(struct scatterlist *sg, void *data) desc->fraglen += sg->length; desc->pos += sg->length; - fraglen = thislen & (crypto_skcipher_blocksize(tfm) - 1); + fraglen = thislen & (crypto_sync_skcipher_blocksize(tfm) - 1); thislen -= fraglen; if (thislen == 0) @@ -528,16 +529,16 @@ encryptor(struct scatterlist *sg, void *data) } int -gss_encrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *buf, +gss_encrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *buf, int offset, struct page **pages) { int ret; struct encryptor_desc desc; - SKCIPHER_REQUEST_ON_STACK(req, tfm); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm); - BUG_ON((buf->len - offset) % crypto_skcipher_blocksize(tfm) != 0); + BUG_ON((buf->len - offset) % crypto_sync_skcipher_blocksize(tfm) != 0); - skcipher_request_set_tfm(req, tfm); + skcipher_request_set_sync_tfm(req, tfm); skcipher_request_set_callback(req, 0, NULL, NULL); memset(desc.iv, 0, sizeof(desc.iv)); @@ -569,7 +570,8 @@ decryptor(struct scatterlist *sg, void *data) { struct decryptor_desc *desc = data; int thislen = desc->fraglen + sg->length; - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(desc->req); + struct crypto_sync_skcipher *tfm = + crypto_sync_skcipher_reqtfm(desc->req); int fraglen, ret; /* Worst case is 4 fragments: head, end of page 1, start @@ -580,7 +582,7 @@ decryptor(struct scatterlist *sg, void *data) desc->fragno++; desc->fraglen += sg->length; - fraglen = thislen & (crypto_skcipher_blocksize(tfm) - 1); + fraglen = thislen & (crypto_sync_skcipher_blocksize(tfm) - 1); thislen -= fraglen; if (thislen == 0) @@ -610,17 +612,17 @@ decryptor(struct scatterlist *sg, void *data) } int -gss_decrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *buf, +gss_decrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *buf, int offset) { int ret; struct decryptor_desc desc; - SKCIPHER_REQUEST_ON_STACK(req, tfm); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm); /* XXXJBF: */ - BUG_ON((buf->len - offset) % crypto_skcipher_blocksize(tfm) != 0); + BUG_ON((buf->len - offset) % crypto_sync_skcipher_blocksize(tfm) != 0); - skcipher_request_set_tfm(req, tfm); + skcipher_request_set_sync_tfm(req, tfm); skcipher_request_set_callback(req, 0, NULL, NULL); memset(desc.iv, 0, sizeof(desc.iv)); @@ -674,12 +676,12 @@ xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen) } static u32 -gss_krb5_cts_crypt(struct crypto_skcipher *cipher, struct xdr_buf *buf, +gss_krb5_cts_crypt(struct crypto_sync_skcipher *cipher, struct xdr_buf *buf, u32 offset, u8 *iv, struct page **pages, int encrypt) { u32 ret; struct scatterlist sg[1]; - SKCIPHER_REQUEST_ON_STACK(req, cipher); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, cipher); u8 *data; struct page **save_pages; u32 len = buf->len - offset; @@ -708,7 +710,7 @@ gss_krb5_cts_crypt(struct crypto_skcipher *cipher, struct xdr_buf *buf, sg_init_one(sg, data, len); - skcipher_request_set_tfm(req, cipher); + skcipher_request_set_sync_tfm(req, cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, len, iv); @@ -737,7 +739,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_netobj hmac; u8 *cksumkey; u8 *ecptr; - struct crypto_skcipher *cipher, *aux_cipher; + struct crypto_sync_skcipher *cipher, *aux_cipher; int blocksize; struct page **save_pages; int nblocks, nbytes; @@ -756,7 +758,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, cksumkey = kctx->acceptor_integ; usage = KG_USAGE_ACCEPTOR_SEAL; } - blocksize = crypto_skcipher_blocksize(cipher); + blocksize = crypto_sync_skcipher_blocksize(cipher); /* hide the gss token header and insert the confounder */ offset += GSS_KRB5_TOK_HDR_LEN; @@ -809,7 +811,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, memset(desc.iv, 0, sizeof(desc.iv)); if (cbcbytes) { - SKCIPHER_REQUEST_ON_STACK(req, aux_cipher); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, aux_cipher); desc.pos = offset + GSS_KRB5_TOK_HDR_LEN; desc.fragno = 0; @@ -818,7 +820,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, desc.outbuf = buf; desc.req = req; - skcipher_request_set_tfm(req, aux_cipher); + skcipher_request_set_sync_tfm(req, aux_cipher); skcipher_request_set_callback(req, 0, NULL, NULL); sg_init_table(desc.infrags, 4); @@ -857,7 +859,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, struct xdr_buf subbuf; u32 ret = 0; u8 *cksum_key; - struct crypto_skcipher *cipher, *aux_cipher; + struct crypto_sync_skcipher *cipher, *aux_cipher; struct xdr_netobj our_hmac_obj; u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN]; u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN]; @@ -876,7 +878,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, cksum_key = kctx->initiator_integ; usage = KG_USAGE_INITIATOR_SEAL; } - blocksize = crypto_skcipher_blocksize(cipher); + blocksize = crypto_sync_skcipher_blocksize(cipher); /* create a segment skipping the header and leaving out the checksum */ @@ -893,13 +895,13 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, memset(desc.iv, 0, sizeof(desc.iv)); if (cbcbytes) { - SKCIPHER_REQUEST_ON_STACK(req, aux_cipher); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, aux_cipher); desc.fragno = 0; desc.fraglen = 0; desc.req = req; - skcipher_request_set_tfm(req, aux_cipher); + skcipher_request_set_sync_tfm(req, aux_cipher); skcipher_request_set_callback(req, 0, NULL, NULL); sg_init_table(desc.frags, 4); @@ -948,7 +950,8 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, * Set the key of the given cipher. */ int -krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher, +krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, + struct crypto_sync_skcipher *cipher, unsigned char *cksum) { struct crypto_shash *hmac; @@ -996,7 +999,7 @@ krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher, if (err) goto out_err; - err = crypto_skcipher_setkey(cipher, Kseq, kctx->gk5e->keylength); + err = crypto_sync_skcipher_setkey(cipher, Kseq, kctx->gk5e->keylength); if (err) goto out_err; @@ -1014,7 +1017,8 @@ krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher, * Set the key of cipher kctx->enc. */ int -krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher, +krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, + struct crypto_sync_skcipher *cipher, s32 seqnum) { struct crypto_shash *hmac; @@ -1071,7 +1075,8 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher, if (err) goto out_err; - err = crypto_skcipher_setkey(cipher, Kcrypt, kctx->gk5e->keylength); + err = crypto_sync_skcipher_setkey(cipher, Kcrypt, + kctx->gk5e->keylength); if (err) goto out_err; diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c index 870133146026..817f2a418db5 100644 --- a/net/sunrpc/auth_gss/gss_krb5_keys.c +++ b/net/sunrpc/auth_gss/gss_krb5_keys.c @@ -147,7 +147,7 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e, size_t blocksize, keybytes, keylength, n; unsigned char *inblockdata, *outblockdata, *rawkey; struct xdr_netobj inblock, outblock; - struct crypto_skcipher *cipher; + struct crypto_sync_skcipher *cipher; u32 ret = EINVAL; blocksize = gk5e->blocksize; @@ -157,11 +157,10 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e, if ((inkey->len != keylength) || (outkey->len != keylength)) goto err_return; - cipher = crypto_alloc_skcipher(gk5e->encrypt_name, 0, - CRYPTO_ALG_ASYNC); + cipher = crypto_alloc_sync_skcipher(gk5e->encrypt_name, 0, 0); if (IS_ERR(cipher)) goto err_return; - if (crypto_skcipher_setkey(cipher, inkey->data, inkey->len)) + if (crypto_sync_skcipher_setkey(cipher, inkey->data, inkey->len)) goto err_return; /* allocate and set up buffers */ @@ -238,7 +237,7 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e, memset(inblockdata, 0, blocksize); kfree(inblockdata); err_free_cipher: - crypto_free_skcipher(cipher); + crypto_free_sync_skcipher(cipher); err_return: return ret; } diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index ea2f6022b3d5..2912725d3da4 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -191,7 +191,7 @@ get_gss_krb5_enctype(int etype) static inline const void * get_key(const void *p, const void *end, - struct krb5_ctx *ctx, struct crypto_skcipher **res) + struct krb5_ctx *ctx, struct crypto_sync_skcipher **res) { struct xdr_netobj key; int alg; @@ -219,15 +219,14 @@ get_key(const void *p, const void *end, if (IS_ERR(p)) goto out_err; - *res = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0, - CRYPTO_ALG_ASYNC); + *res = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0); if (IS_ERR(*res)) { printk(KERN_WARNING "gss_kerberos_mech: unable to initialize " "crypto algorithm %s\n", ctx->gk5e->encrypt_name); *res = NULL; goto out_err_free_key; } - if (crypto_skcipher_setkey(*res, key.data, key.len)) { + if (crypto_sync_skcipher_setkey(*res, key.data, key.len)) { printk(KERN_WARNING "gss_kerberos_mech: error setting key for " "crypto algorithm %s\n", ctx->gk5e->encrypt_name); goto out_err_free_tfm; @@ -237,7 +236,7 @@ get_key(const void *p, const void *end, return p; out_err_free_tfm: - crypto_free_skcipher(*res); + crypto_free_sync_skcipher(*res); out_err_free_key: kfree(key.data); p = ERR_PTR(-EINVAL); @@ -309,30 +308,30 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx) return 0; out_err_free_key2: - crypto_free_skcipher(ctx->seq); + crypto_free_sync_skcipher(ctx->seq); out_err_free_key1: - crypto_free_skcipher(ctx->enc); + crypto_free_sync_skcipher(ctx->enc); out_err_free_mech: kfree(ctx->mech_used.data); out_err: return PTR_ERR(p); } -static struct crypto_skcipher * +static struct crypto_sync_skcipher * context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key) { - struct crypto_skcipher *cp; + struct crypto_sync_skcipher *cp; - cp = crypto_alloc_skcipher(cname, 0, CRYPTO_ALG_ASYNC); + cp = crypto_alloc_sync_skcipher(cname, 0, 0); if (IS_ERR(cp)) { dprintk("gss_kerberos_mech: unable to initialize " "crypto algorithm %s\n", cname); return NULL; } - if (crypto_skcipher_setkey(cp, key, ctx->gk5e->keylength)) { + if (crypto_sync_skcipher_setkey(cp, key, ctx->gk5e->keylength)) { dprintk("gss_kerberos_mech: error setting key for " "crypto algorithm %s\n", cname); - crypto_free_skcipher(cp); + crypto_free_sync_skcipher(cp); return NULL; } return cp; @@ -386,9 +385,9 @@ context_derive_keys_des3(struct krb5_ctx *ctx, gfp_t gfp_mask) return 0; out_free_enc: - crypto_free_skcipher(ctx->enc); + crypto_free_sync_skcipher(ctx->enc); out_free_seq: - crypto_free_skcipher(ctx->seq); + crypto_free_sync_skcipher(ctx->seq); out_err: return -EINVAL; } @@ -443,17 +442,15 @@ context_derive_keys_rc4(struct krb5_ctx *ctx) /* * allocate hash, and skciphers for data and seqnum encryption */ - ctx->enc = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0, - CRYPTO_ALG_ASYNC); + ctx->enc = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0); if (IS_ERR(ctx->enc)) { err = PTR_ERR(ctx->enc); goto out_err_free_hmac; } - ctx->seq = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0, - CRYPTO_ALG_ASYNC); + ctx->seq = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0); if (IS_ERR(ctx->seq)) { - crypto_free_skcipher(ctx->enc); + crypto_free_sync_skcipher(ctx->enc); err = PTR_ERR(ctx->seq); goto out_err_free_hmac; } @@ -565,7 +562,7 @@ context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask) context_v2_alloc_cipher(ctx, "cbc(aes)", ctx->acceptor_seal); if (ctx->acceptor_enc_aux == NULL) { - crypto_free_skcipher(ctx->initiator_enc_aux); + crypto_free_sync_skcipher(ctx->initiator_enc_aux); goto out_free_acceptor_enc; } } @@ -573,9 +570,9 @@ context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask) return 0; out_free_acceptor_enc: - crypto_free_skcipher(ctx->acceptor_enc); + crypto_free_sync_skcipher(ctx->acceptor_enc); out_free_initiator_enc: - crypto_free_skcipher(ctx->initiator_enc); + crypto_free_sync_skcipher(ctx->initiator_enc); out_err: return -EINVAL; } @@ -687,12 +684,12 @@ static void gss_delete_sec_context_kerberos(void *internal_ctx) { struct krb5_ctx *kctx = internal_ctx; - crypto_free_skcipher(kctx->seq); - crypto_free_skcipher(kctx->enc); - crypto_free_skcipher(kctx->acceptor_enc); - crypto_free_skcipher(kctx->initiator_enc); - crypto_free_skcipher(kctx->acceptor_enc_aux); - crypto_free_skcipher(kctx->initiator_enc_aux); + crypto_free_sync_skcipher(kctx->seq); + crypto_free_sync_skcipher(kctx->enc); + crypto_free_sync_skcipher(kctx->acceptor_enc); + crypto_free_sync_skcipher(kctx->initiator_enc); + crypto_free_sync_skcipher(kctx->acceptor_enc_aux); + crypto_free_sync_skcipher(kctx->initiator_enc_aux); kfree(kctx->mech_used.data); kfree(kctx); } diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c index c8b9082f4a9d..fb6656295204 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c +++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c @@ -43,13 +43,12 @@ static s32 krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum, unsigned char *cksum, unsigned char *buf) { - struct crypto_skcipher *cipher; + struct crypto_sync_skcipher *cipher; unsigned char plain[8]; s32 code; dprintk("RPC: %s:\n", __func__); - cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0, - CRYPTO_ALG_ASYNC); + cipher = crypto_alloc_sync_skcipher(kctx->gk5e->encrypt_name, 0, 0); if (IS_ERR(cipher)) return PTR_ERR(cipher); @@ -68,12 +67,12 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum, code = krb5_encrypt(cipher, cksum, plain, buf, 8); out: - crypto_free_skcipher(cipher); + crypto_free_sync_skcipher(cipher); return code; } s32 krb5_make_seq_num(struct krb5_ctx *kctx, - struct crypto_skcipher *key, + struct crypto_sync_skcipher *key, int direction, u32 seqnum, unsigned char *cksum, unsigned char *buf) @@ -101,13 +100,12 @@ static s32 krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum, unsigned char *buf, int *direction, s32 *seqnum) { - struct crypto_skcipher *cipher; + struct crypto_sync_skcipher *cipher; unsigned char plain[8]; s32 code; dprintk("RPC: %s:\n", __func__); - cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0, - CRYPTO_ALG_ASYNC); + cipher = crypto_alloc_sync_skcipher(kctx->gk5e->encrypt_name, 0, 0); if (IS_ERR(cipher)) return PTR_ERR(cipher); @@ -130,7 +128,7 @@ krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum, *seqnum = ((plain[0] << 24) | (plain[1] << 16) | (plain[2] << 8) | (plain[3])); out: - crypto_free_skcipher(cipher); + crypto_free_sync_skcipher(cipher); return code; } @@ -142,7 +140,7 @@ krb5_get_seq_num(struct krb5_ctx *kctx, { s32 code; unsigned char plain[8]; - struct crypto_skcipher *key = kctx->seq; + struct crypto_sync_skcipher *key = kctx->seq; dprintk("RPC: krb5_get_seq_num:\n"); diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index a737c2da0837..7de4d5cb16a8 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -174,7 +174,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset, now = get_seconds(); - blocksize = crypto_skcipher_blocksize(kctx->enc); + blocksize = crypto_sync_skcipher_blocksize(kctx->enc); gss_krb5_add_padding(buf, offset, blocksize); BUG_ON((buf->len - offset) % blocksize); plainlen = conflen + buf->len - offset; @@ -239,10 +239,10 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset, return GSS_S_FAILURE; if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) { - struct crypto_skcipher *cipher; + struct crypto_sync_skcipher *cipher; int err; - cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0, - CRYPTO_ALG_ASYNC); + cipher = crypto_alloc_sync_skcipher(kctx->gk5e->encrypt_name, + 0, 0); if (IS_ERR(cipher)) return GSS_S_FAILURE; @@ -250,7 +250,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset, err = gss_encrypt_xdr_buf(cipher, buf, offset + headlen - conflen, pages); - crypto_free_skcipher(cipher); + crypto_free_sync_skcipher(cipher); if (err) return GSS_S_FAILURE; } else { @@ -327,18 +327,18 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) return GSS_S_BAD_SIG; if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) { - struct crypto_skcipher *cipher; + struct crypto_sync_skcipher *cipher; int err; - cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0, - CRYPTO_ALG_ASYNC); + cipher = crypto_alloc_sync_skcipher(kctx->gk5e->encrypt_name, + 0, 0); if (IS_ERR(cipher)) return GSS_S_FAILURE; krb5_rc4_setup_enc_key(kctx, cipher, seqnum); err = gss_decrypt_xdr_buf(cipher, buf, crypt_offset); - crypto_free_skcipher(cipher); + crypto_free_sync_skcipher(cipher); if (err) return GSS_S_DEFECTIVE_TOKEN; } else { @@ -371,7 +371,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) /* Copy the data back to the right position. XXX: Would probably be * better to copy and encrypt at the same time. */ - blocksize = crypto_skcipher_blocksize(kctx->enc); + blocksize = crypto_sync_skcipher_blocksize(kctx->enc); data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) + conflen; orig_start = buf->head[0].iov_base + offset; diff --git a/net/wireguard/Kbuild b/net/wireguard/Kbuild new file mode 100644 index 000000000000..2f7b634c6055 --- /dev/null +++ b/net/wireguard/Kbuild @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + +ccflags-y := -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt' +ccflags-y += -Wframe-larger-than=2048 +ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG -g +ccflags-$(if $(WIREGUARD_VERSION),y,) += -D'WIREGUARD_VERSION="$(WIREGUARD_VERSION)"' + +wireguard-y := main.o noise.o device.o peer.o timers.o queueing.o send.o receive.o socket.o peerlookup.o allowedips.o ratelimiter.o cookie.o netlink.o + +include $(src)/crypto/Kbuild.include +include $(src)/compat/Kbuild.include + +obj-$(if $(KBUILD_EXTMOD),m,$(CONFIG_WIREGUARD)) := wireguard.o diff --git a/net/wireguard/Kconfig b/net/wireguard/Kconfig new file mode 100644 index 000000000000..06d19a3f4bc1 --- /dev/null +++ b/net/wireguard/Kconfig @@ -0,0 +1,33 @@ +config WIREGUARD + tristate "IP: WireGuard secure network tunnel" + depends on NET && INET + depends on IPV6 || !IPV6 + select NET_UDP_TUNNEL + select DST_CACHE + select CRYPTO + select CRYPTO_ALGAPI + select VFP + select VFPv3 if CPU_V7 + select NEON if CPU_V7 + select KERNEL_MODE_NEON if CPU_V7 + default y + help + WireGuard is a secure, fast, and easy to use replacement for IPsec + that uses modern cryptography and clever networking tricks. It's + designed to be fairly general purpose and abstract enough to fit most + use cases, while at the same time remaining extremely simple to + configure. See www.wireguard.com for more info. + + It's safe to say Y or M here, as the driver is very lightweight and + is only in use when an administrator chooses to add an interface. + +config WIREGUARD_DEBUG + bool "Debugging checks and verbose messages" + depends on WIREGUARD + help + This will write log messages for handshake and other events + that occur for a WireGuard interface. It will also perform some + extra validation checks and unit tests at various points. This is + only useful for debugging. + + Say N here unless you know what you're doing. diff --git a/net/wireguard/Makefile b/net/wireguard/Makefile new file mode 100644 index 000000000000..b35ddbbcff3e --- /dev/null +++ b/net/wireguard/Makefile @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + +KERNELRELEASE ?= $(shell uname -r) +KERNELDIR ?= /lib/modules/$(KERNELRELEASE)/build +PREFIX ?= /usr +DESTDIR ?= +SRCDIR ?= $(PREFIX)/src +DKMSDIR ?= $(SRCDIR)/wireguard +DEPMOD ?= depmod +DEPMODBASEDIR ?= / + +PWD := $(shell pwd) + +all: module +debug: module-debug + +ifneq ($(V),1) +MAKEFLAGS += --no-print-directory +endif + +WIREGUARD_VERSION = $(patsubst v%,%,$(shell GIT_CEILING_DIRECTORIES="$(PWD)/../.." git describe --dirty 2>/dev/null)) + +module: + @$(MAKE) -C $(KERNELDIR) M=$(PWD) WIREGUARD_VERSION="$(WIREGUARD_VERSION)" modules + +module-debug: + @$(MAKE) -C $(KERNELDIR) M=$(PWD) V=1 CONFIG_WIREGUARD_DEBUG=y WIREGUARD_VERSION="$(WIREGUARD_VERSION)" modules + +clean: + @$(MAKE) -C $(KERNELDIR) M=$(PWD) clean + +module-install: + @$(MAKE) -C $(KERNELDIR) M=$(PWD) WIREGUARD_VERSION="$(WIREGUARD_VERSION)" modules_install + $(DEPMOD) -b "$(DEPMODBASEDIR)" -a $(KERNELRELEASE) + +install: module-install + +rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) +DKMS_SOURCES := version.h Makefile Kbuild Kconfig dkms.conf $(filter-out version.h wireguard.mod.c tests/%,$(call rwildcard,,*.c *.h *.S *.pl *.include)) +dkms-install: $(DKMS_SOURCES) + @$(foreach f,$(DKMS_SOURCES),install -v -m0644 -D $(f) $(DESTDIR)$(DKMSDIR)/$(f);) + +style: + $(KERNELDIR)/scripts/checkpatch.pl -f --max-line-length=4000 --codespell --color=always $(filter-out wireguard.mod.c,$(wildcard *.c)) $(wildcard *.h) $(wildcard selftest/*.c) + +check: clean + scan-build --html-title=wireguard-linux-compat -maxloop 100 --view --keep-going $(MAKE) module CONFIG_WIREGUARD_DEBUG=y C=2 CF="-D__CHECK_ENDIAN__" + +coccicheck: clean + @$(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_WIREGUARD_DEBUG=y coccicheck MODE=report + +cloc: + @cloc --skip-uniqueness --by-file --extract-with="$$(readlink -f ../kernel-tree-scripts/filter-compat-defines.sh) >FILE< > \$$(basename >FILE<)" $(filter-out wireguard.mod.c,$(wildcard *.c)) $(wildcard *.h) + +-include tests/debug.mk + +.PHONY: all module module-debug module-install install dkms-install clean cloc check style diff --git a/net/wireguard/allowedips.c b/net/wireguard/allowedips.c new file mode 100644 index 000000000000..9a4c8ff32d9d --- /dev/null +++ b/net/wireguard/allowedips.c @@ -0,0 +1,386 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "allowedips.h" +#include "peer.h" + +static struct kmem_cache *node_cache; + +static void swap_endian(u8 *dst, const u8 *src, u8 bits) +{ + if (bits == 32) { + *(u32 *)dst = be32_to_cpu(*(const __be32 *)src); + } else if (bits == 128) { + ((u64 *)dst)[0] = be64_to_cpu(((const __be64 *)src)[0]); + ((u64 *)dst)[1] = be64_to_cpu(((const __be64 *)src)[1]); + } +} + +static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src, + u8 cidr, u8 bits) +{ + node->cidr = cidr; + node->bit_at_a = cidr / 8U; +#ifdef __LITTLE_ENDIAN + node->bit_at_a ^= (bits / 8U - 1U) % 8U; +#endif + node->bit_at_b = 7U - (cidr % 8U); + node->bitlen = bits; + memcpy(node->bits, src, bits / 8U); +} + +static inline u8 choose(struct allowedips_node *node, const u8 *key) +{ + return (key[node->bit_at_a] >> node->bit_at_b) & 1; +} + +static void push_rcu(struct allowedips_node **stack, + struct allowedips_node __rcu *p, unsigned int *len) +{ + if (rcu_access_pointer(p)) { + WARN_ON(IS_ENABLED(DEBUG) && *len >= 128); + stack[(*len)++] = rcu_dereference_raw(p); + } +} + +static void node_free_rcu(struct rcu_head *rcu) +{ + kmem_cache_free(node_cache, container_of(rcu, struct allowedips_node, rcu)); +} + +static void root_free_rcu(struct rcu_head *rcu) +{ + struct allowedips_node *node, *stack[128] = { + container_of(rcu, struct allowedips_node, rcu) }; + unsigned int len = 1; + + while (len > 0 && (node = stack[--len])) { + push_rcu(stack, node->bit[0], &len); + push_rcu(stack, node->bit[1], &len); + kmem_cache_free(node_cache, node); + } +} + +static void root_remove_peer_lists(struct allowedips_node *root) +{ + struct allowedips_node *node, *stack[128] = { root }; + unsigned int len = 1; + + while (len > 0 && (node = stack[--len])) { + push_rcu(stack, node->bit[0], &len); + push_rcu(stack, node->bit[1], &len); + if (rcu_access_pointer(node->peer)) + list_del(&node->peer_list); + } +} + +static unsigned int fls128(u64 a, u64 b) +{ + return a ? fls64(a) + 64U : fls64(b); +} + +static u8 common_bits(const struct allowedips_node *node, const u8 *key, + u8 bits) +{ + if (bits == 32) + return 32U - fls(*(const u32 *)node->bits ^ *(const u32 *)key); + else if (bits == 128) + return 128U - fls128( + *(const u64 *)&node->bits[0] ^ *(const u64 *)&key[0], + *(const u64 *)&node->bits[8] ^ *(const u64 *)&key[8]); + return 0; +} + +static bool prefix_matches(const struct allowedips_node *node, const u8 *key, + u8 bits) +{ + /* This could be much faster if it actually just compared the common + * bits properly, by precomputing a mask bswap(~0 << (32 - cidr)), and + * the rest, but it turns out that common_bits is already super fast on + * modern processors, even taking into account the unfortunate bswap. + * So, we just inline it like this instead. + */ + return common_bits(node, key, bits) >= node->cidr; +} + +static struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits, + const u8 *key) +{ + struct allowedips_node *node = trie, *found = NULL; + + while (node && prefix_matches(node, key, bits)) { + if (rcu_access_pointer(node->peer)) + found = node; + if (node->cidr == bits) + break; + node = rcu_dereference_bh(node->bit[choose(node, key)]); + } + return found; +} + +/* Returns a strong reference to a peer */ +static struct wg_peer *lookup(struct allowedips_node __rcu *root, u8 bits, + const void *be_ip) +{ + /* Aligned so it can be passed to fls/fls64 */ + u8 ip[16] __aligned(__alignof(u64)); + struct allowedips_node *node; + struct wg_peer *peer = NULL; + + swap_endian(ip, be_ip, bits); + + rcu_read_lock_bh(); +retry: + node = find_node(rcu_dereference_bh(root), bits, ip); + if (node) { + peer = wg_peer_get_maybe_zero(rcu_dereference_bh(node->peer)); + if (!peer) + goto retry; + } + rcu_read_unlock_bh(); + return peer; +} + +static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key, + u8 cidr, u8 bits, struct allowedips_node **rnode, + struct mutex *lock) +{ + struct allowedips_node *node = rcu_dereference_protected(trie, lockdep_is_held(lock)); + struct allowedips_node *parent = NULL; + bool exact = false; + + while (node && node->cidr <= cidr && prefix_matches(node, key, bits)) { + parent = node; + if (parent->cidr == cidr) { + exact = true; + break; + } + node = rcu_dereference_protected(parent->bit[choose(parent, key)], lockdep_is_held(lock)); + } + *rnode = parent; + return exact; +} + +static inline void connect_node(struct allowedips_node __rcu **parent, u8 bit, struct allowedips_node *node) +{ + node->parent_bit_packed = (unsigned long)parent | bit; + rcu_assign_pointer(*parent, node); +} + +static inline void choose_and_connect_node(struct allowedips_node *parent, struct allowedips_node *node) +{ + u8 bit = choose(parent, node->bits); + connect_node(&parent->bit[bit], bit, node); +} + +static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, + u8 cidr, struct wg_peer *peer, struct mutex *lock) +{ + struct allowedips_node *node, *parent, *down, *newnode; + + if (unlikely(cidr > bits || !peer)) + return -EINVAL; + + if (!rcu_access_pointer(*trie)) { + node = kmem_cache_zalloc(node_cache, GFP_KERNEL); + if (unlikely(!node)) + return -ENOMEM; + RCU_INIT_POINTER(node->peer, peer); + list_add_tail(&node->peer_list, &peer->allowedips_list); + copy_and_assign_cidr(node, key, cidr, bits); + connect_node(trie, 2, node); + return 0; + } + if (node_placement(*trie, key, cidr, bits, &node, lock)) { + rcu_assign_pointer(node->peer, peer); + list_move_tail(&node->peer_list, &peer->allowedips_list); + return 0; + } + + newnode = kmem_cache_zalloc(node_cache, GFP_KERNEL); + if (unlikely(!newnode)) + return -ENOMEM; + RCU_INIT_POINTER(newnode->peer, peer); + list_add_tail(&newnode->peer_list, &peer->allowedips_list); + copy_and_assign_cidr(newnode, key, cidr, bits); + + if (!node) { + down = rcu_dereference_protected(*trie, lockdep_is_held(lock)); + } else { + const u8 bit = choose(node, key); + down = rcu_dereference_protected(node->bit[bit], lockdep_is_held(lock)); + if (!down) { + connect_node(&node->bit[bit], bit, newnode); + return 0; + } + } + cidr = min(cidr, common_bits(down, key, bits)); + parent = node; + + if (newnode->cidr == cidr) { + choose_and_connect_node(newnode, down); + if (!parent) + connect_node(trie, 2, newnode); + else + choose_and_connect_node(parent, newnode); + return 0; + } + + node = kmem_cache_zalloc(node_cache, GFP_KERNEL); + if (unlikely(!node)) { + list_del(&newnode->peer_list); + kmem_cache_free(node_cache, newnode); + return -ENOMEM; + } + INIT_LIST_HEAD(&node->peer_list); + copy_and_assign_cidr(node, newnode->bits, cidr, bits); + + choose_and_connect_node(node, down); + choose_and_connect_node(node, newnode); + if (!parent) + connect_node(trie, 2, node); + else + choose_and_connect_node(parent, node); + return 0; +} + +void wg_allowedips_init(struct allowedips *table) +{ + table->root4 = table->root6 = NULL; + table->seq = 1; +} + +void wg_allowedips_free(struct allowedips *table, struct mutex *lock) +{ + struct allowedips_node __rcu *old4 = table->root4, *old6 = table->root6; + + ++table->seq; + RCU_INIT_POINTER(table->root4, NULL); + RCU_INIT_POINTER(table->root6, NULL); + if (rcu_access_pointer(old4)) { + struct allowedips_node *node = rcu_dereference_protected(old4, + lockdep_is_held(lock)); + + root_remove_peer_lists(node); + call_rcu(&node->rcu, root_free_rcu); + } + if (rcu_access_pointer(old6)) { + struct allowedips_node *node = rcu_dereference_protected(old6, + lockdep_is_held(lock)); + + root_remove_peer_lists(node); + call_rcu(&node->rcu, root_free_rcu); + } +} + +int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip, + u8 cidr, struct wg_peer *peer, struct mutex *lock) +{ + /* Aligned so it can be passed to fls */ + u8 key[4] __aligned(__alignof(u32)); + + ++table->seq; + swap_endian(key, (const u8 *)ip, 32); + return add(&table->root4, 32, key, cidr, peer, lock); +} + +int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip, + u8 cidr, struct wg_peer *peer, struct mutex *lock) +{ + /* Aligned so it can be passed to fls64 */ + u8 key[16] __aligned(__alignof(u64)); + + ++table->seq; + swap_endian(key, (const u8 *)ip, 128); + return add(&table->root6, 128, key, cidr, peer, lock); +} + +void wg_allowedips_remove_by_peer(struct allowedips *table, + struct wg_peer *peer, struct mutex *lock) +{ + struct allowedips_node *node, *child, **parent_bit, *parent, *tmp; + bool free_parent; + + if (list_empty(&peer->allowedips_list)) + return; + ++table->seq; + list_for_each_entry_safe(node, tmp, &peer->allowedips_list, peer_list) { + list_del_init(&node->peer_list); + RCU_INIT_POINTER(node->peer, NULL); + if (node->bit[0] && node->bit[1]) + continue; + child = rcu_dereference_protected(node->bit[!rcu_access_pointer(node->bit[0])], + lockdep_is_held(lock)); + if (child) + child->parent_bit_packed = node->parent_bit_packed; + parent_bit = (struct allowedips_node **)(node->parent_bit_packed & ~3UL); + *parent_bit = child; + parent = (void *)parent_bit - + offsetof(struct allowedips_node, bit[node->parent_bit_packed & 1]); + free_parent = !rcu_access_pointer(node->bit[0]) && + !rcu_access_pointer(node->bit[1]) && + (node->parent_bit_packed & 3) <= 1 && + !rcu_access_pointer(parent->peer); + if (free_parent) + child = rcu_dereference_protected( + parent->bit[!(node->parent_bit_packed & 1)], + lockdep_is_held(lock)); + call_rcu(&node->rcu, node_free_rcu); + if (!free_parent) + continue; + if (child) + child->parent_bit_packed = parent->parent_bit_packed; + *(struct allowedips_node **)(parent->parent_bit_packed & ~3UL) = child; + call_rcu(&parent->rcu, node_free_rcu); + } +} + +int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr) +{ + const unsigned int cidr_bytes = DIV_ROUND_UP(node->cidr, 8U); + swap_endian(ip, node->bits, node->bitlen); + memset(ip + cidr_bytes, 0, node->bitlen / 8U - cidr_bytes); + if (node->cidr) + ip[cidr_bytes - 1U] &= ~0U << (-node->cidr % 8U); + + *cidr = node->cidr; + return node->bitlen == 32 ? AF_INET : AF_INET6; +} + +/* Returns a strong reference to a peer */ +struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table, + struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_IP)) + return lookup(table->root4, 32, &ip_hdr(skb)->daddr); + else if (skb->protocol == htons(ETH_P_IPV6)) + return lookup(table->root6, 128, &ipv6_hdr(skb)->daddr); + return NULL; +} + +/* Returns a strong reference to a peer */ +struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table, + struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_IP)) + return lookup(table->root4, 32, &ip_hdr(skb)->saddr); + else if (skb->protocol == htons(ETH_P_IPV6)) + return lookup(table->root6, 128, &ipv6_hdr(skb)->saddr); + return NULL; +} + +int __init wg_allowedips_slab_init(void) +{ + node_cache = KMEM_CACHE(allowedips_node, 0); + return node_cache ? 0 : -ENOMEM; +} + +void wg_allowedips_slab_uninit(void) +{ + rcu_barrier(); + kmem_cache_destroy(node_cache); +} + +#include "selftest/allowedips.c" diff --git a/net/wireguard/allowedips.h b/net/wireguard/allowedips.h new file mode 100644 index 000000000000..2346c797eb4d --- /dev/null +++ b/net/wireguard/allowedips.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_ALLOWEDIPS_H +#define _WG_ALLOWEDIPS_H + +#include +#include +#include + +struct wg_peer; + +struct allowedips_node { + struct wg_peer __rcu *peer; + struct allowedips_node __rcu *bit[2]; + u8 cidr, bit_at_a, bit_at_b, bitlen; + u8 bits[16] __aligned(__alignof(u64)); + + /* Keep rarely used members at bottom to be beyond cache line. */ + unsigned long parent_bit_packed; + union { + struct list_head peer_list; + struct rcu_head rcu; + }; +}; + +struct allowedips { + struct allowedips_node __rcu *root4; + struct allowedips_node __rcu *root6; + u64 seq; +} __aligned(4); /* We pack the lower 2 bits of &root, but m68k only gives 16-bit alignment. */ + +void wg_allowedips_init(struct allowedips *table); +void wg_allowedips_free(struct allowedips *table, struct mutex *mutex); +int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip, + u8 cidr, struct wg_peer *peer, struct mutex *lock); +int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip, + u8 cidr, struct wg_peer *peer, struct mutex *lock); +void wg_allowedips_remove_by_peer(struct allowedips *table, + struct wg_peer *peer, struct mutex *lock); +/* The ip input pointer should be __aligned(__alignof(u64))) */ +int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr); + +/* These return a strong reference to a peer: */ +struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table, + struct sk_buff *skb); +struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table, + struct sk_buff *skb); + +#ifdef DEBUG +bool wg_allowedips_selftest(void); +#endif + +int wg_allowedips_slab_init(void); +void wg_allowedips_slab_uninit(void); + +#endif /* _WG_ALLOWEDIPS_H */ diff --git a/net/wireguard/compat/Kbuild.include b/net/wireguard/compat/Kbuild.include new file mode 100644 index 000000000000..0192ecdcbc50 --- /dev/null +++ b/net/wireguard/compat/Kbuild.include @@ -0,0 +1,111 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + +kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src)) + +ccflags-y += -include $(kbuild-dir)/compat/compat.h +asflags-y += -include $(kbuild-dir)/compat/compat-asm.h +LINUXINCLUDE := -DCOMPAT_VERSION=$(VERSION) -DCOMPAT_PATCHLEVEL=$(PATCHLEVEL) -DCOMPAT_SUBLEVEL=$(SUBLEVEL) -I$(kbuild-dir)/compat/version $(LINUXINCLUDE) + +ifeq ($(wildcard $(srctree)/include/linux/ptr_ring.h),) +ccflags-y += -I$(kbuild-dir)/compat/ptr_ring/include +endif + +ifeq ($(wildcard $(srctree)/include/linux/skb_array.h),) +ccflags-y += -I$(kbuild-dir)/compat/skb_array/include +endif + +ifeq ($(wildcard $(srctree)/include/linux/siphash.h),) +ccflags-y += -I$(kbuild-dir)/compat/siphash/include +wireguard-y += compat/siphash/siphash.o +endif + +ifeq ($(wildcard $(srctree)/include/net/dst_cache.h),) +ccflags-y += -I$(kbuild-dir)/compat/dst_cache/include +wireguard-y += compat/dst_cache/dst_cache.o +endif + +ifeq ($(wildcard $(srctree)/arch/x86/include/asm/intel-family.h)$(CONFIG_X86),y) +ccflags-y += -I$(kbuild-dir)/compat/intel-family-x86/include +endif + +ifeq ($(wildcard $(srctree)/arch/x86/include/asm/fpu/api.h)$(CONFIG_X86),y) +ccflags-y += -I$(kbuild-dir)/compat/fpu-x86/include +endif + +ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/simd.h)$(shell grep -s -F "generic-y += simd.h" "$(srctree)/arch/$(SRCARCH)/Kbuild" "$(srctree)/arch/$(SRCARCH)/Makefile"),) +ccflags-y += -I$(kbuild-dir)/compat/simd-asm/include +endif + +ifeq ($(wildcard $(srctree)/include/linux/simd.h),) +ccflags-y += -I$(kbuild-dir)/compat/simd/include +endif + +ifeq ($(wildcard $(srctree)/include/net/udp_tunnel.h),) +ccflags-y += -I$(kbuild-dir)/compat/udp_tunnel/include +wireguard-y += compat/udp_tunnel/udp_tunnel.o +endif + +ifeq ($(shell grep -s -F "int crypto_memneq" "$(srctree)/include/crypto/algapi.h"),) +ccflags-y += -include $(kbuild-dir)/compat/memneq/include.h +wireguard-y += compat/memneq/memneq.o +endif + +ifeq ($(shell grep -s -F "addr_gen_mode" "$(srctree)/include/linux/ipv6.h"),) +ccflags-y += -DCOMPAT_CANNOT_USE_DEV_CNF +endif + +ifdef CONFIG_HZ +ifeq ($(wildcard $(CURDIR)/include/generated/timeconst.h),) +ccflags-y += $(shell bash -c '((a=$(CONFIG_HZ), b=1000000)); while ((b > 0)); do ((t=b, b=a%b, a=t)); done; echo "-DHZ_TO_USEC_NUM=$$((1000000/a)) -DHZ_TO_USEC_DEN=$$(($(CONFIG_HZ)/a))";') +endif +endif + +ifeq ($(wildcard $(srctree)/arch/arm/include/asm/neon.h)$(CONFIG_ARM),y) +ccflags-y += -I$(kbuild-dir)/compat/neon-arm/include +endif +ifeq ($(wildcard $(srctree)/arch/arm64/include/asm/neon.h)$(CONFIG_ARM64),y) +ccflags-y += -I$(kbuild-dir)/compat/neon-arm/include +endif + +ifeq ($(wildcard $(srctree)/include/net/dst_metadata.h),) +ccflags-y += -I$(kbuild-dir)/compat/dstmetadata/include +endif + +ifeq ($(CONFIG_X86_64),y) + ifeq ($(ssse3_instr),) + ssse3_instr := $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1) + ccflags-y += $(ssse3_instr) + asflags-y += $(ssse3_instr) + endif + ifeq ($(avx_instr),) + avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1) + ccflags-y += $(avx_instr) + asflags-y += $(avx_instr) + endif + ifeq ($(avx2_instr),) + avx2_instr := $(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1) + ccflags-y += $(avx2_instr) + asflags-y += $(avx2_instr) + endif + ifeq ($(avx512_instr),) + avx512_instr := $(call as-instr,vpmovm2b %k1$(comma)%zmm5,-DCONFIG_AS_AVX512=1) + ccflags-y += $(avx512_instr) + asflags-y += $(avx512_instr) + endif + ifeq ($(bmi2_instr),) + bmi2_instr :=$(call as-instr,mulx %rax$(comma)%rax$(comma)%rax,-DCONFIG_AS_BMI2=1) + ccflags-y += $(bmi2_instr) + asflags-y += $(bmi2_instr) + endif + ifeq ($(adx_instr),) + adx_instr :=$(call as-instr,adcx %rax$(comma)%rax,-DCONFIG_AS_ADX=1) + ccflags-y += $(adx_instr) + asflags-y += $(adx_instr) + endif +endif + +ifneq ($(shell grep -s -F "\#define LINUX_PACKAGE_ID \" Debian " "$(CURDIR)/include/generated/package.h"),) +ccflags-y += -DISDEBIAN +endif diff --git a/net/wireguard/compat/checksum/checksum_partial_compat.h b/net/wireguard/compat/checksum/checksum_partial_compat.h new file mode 100644 index 000000000000..3dfe5397a94f --- /dev/null +++ b/net/wireguard/compat/checksum/checksum_partial_compat.h @@ -0,0 +1,208 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include +#include +#include +#include +#include + +#define IP6_MF 0x0001 +#define IP6_OFFSET 0xFFF8 +static inline int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len, unsigned int max) +{ + if (skb_headlen(skb) >= len) + return 0; + if (max > skb->len) + max = skb->len; + if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL) + return -ENOMEM; + if (skb_headlen(skb) < len) + return -EPROTO; + return 0; +} +#define MAX_IP_HDR_LEN 128 +static inline int skb_checksum_setup_ip(struct sk_buff *skb, bool recalculate) +{ + unsigned int off; + bool fragment; + int err; + fragment = false; + err = skb_maybe_pull_tail(skb, sizeof(struct iphdr), MAX_IP_HDR_LEN); + if (err < 0) + goto out; + if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF)) + fragment = true; + off = ip_hdrlen(skb); + err = -EPROTO; + if (fragment) + goto out; + switch (ip_hdr(skb)->protocol) { + case IPPROTO_TCP: + err = skb_maybe_pull_tail(skb, + off + sizeof(struct tcphdr), + MAX_IP_HDR_LEN); + if (err < 0) + goto out; + + if (!skb_partial_csum_set(skb, off, + offsetof(struct tcphdr, check))) { + err = -EPROTO; + goto out; + } + + if (recalculate) + tcp_hdr(skb)->check = + ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + skb->len - off, + IPPROTO_TCP, 0); + break; + case IPPROTO_UDP: + err = skb_maybe_pull_tail(skb, + off + sizeof(struct udphdr), + MAX_IP_HDR_LEN); + if (err < 0) + goto out; + + if (!skb_partial_csum_set(skb, off, + offsetof(struct udphdr, check))) { + err = -EPROTO; + goto out; + } + + if (recalculate) + udp_hdr(skb)->check = + ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + skb->len - off, + IPPROTO_UDP, 0); + break; + default: + goto out; + } + err = 0; +out: + return err; +} +#define MAX_IPV6_HDR_LEN 256 +#define OPT_HDR(type, skb, off) \ + (type *)(skb_network_header(skb) + (off)) +static inline int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate) +{ + int err; + u8 nexthdr; + unsigned int off; + unsigned int len; + bool fragment; + bool done; + fragment = false; + done = false; + off = sizeof(struct ipv6hdr); + err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + nexthdr = ipv6_hdr(skb)->nexthdr; + len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); + while (off <= len && !done) { + switch (nexthdr) { + case IPPROTO_DSTOPTS: + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: { + struct ipv6_opt_hdr *hp; + + err = skb_maybe_pull_tail(skb, off + sizeof(struct ipv6_opt_hdr), MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + hp = OPT_HDR(struct ipv6_opt_hdr, skb, off); + nexthdr = hp->nexthdr; + off += ipv6_optlen(hp); + break; + } + case IPPROTO_FRAGMENT: { + struct frag_hdr *hp; + err = skb_maybe_pull_tail(skb, off + sizeof(struct frag_hdr), MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + hp = OPT_HDR(struct frag_hdr, skb, off); + if (hp->frag_off & htons(IP6_OFFSET | IP6_MF)) + fragment = true; + nexthdr = hp->nexthdr; + off += sizeof(struct frag_hdr); + break; + } + default: + done = true; + break; + } + } + err = -EPROTO; + if (!done || fragment) + goto out; + switch (nexthdr) { + case IPPROTO_TCP: + err = skb_maybe_pull_tail(skb, + off + sizeof(struct tcphdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + + if (!skb_partial_csum_set(skb, off, + offsetof(struct tcphdr, check))) { + err = -EPROTO; + goto out; + } + + if (recalculate) + tcp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len - off, + IPPROTO_TCP, 0); + break; + case IPPROTO_UDP: + err = skb_maybe_pull_tail(skb, + off + sizeof(struct udphdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + + if (!skb_partial_csum_set(skb, off, + offsetof(struct udphdr, check))) { + err = -EPROTO; + goto out; + } + + if (recalculate) + udp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len - off, + IPPROTO_UDP, 0); + break; + default: + goto out; + } + err = 0; +out: + return err; +} +static inline int skb_checksum_setup(struct sk_buff *skb, bool recalculate) +{ + int err; + switch (skb->protocol) { + case htons(ETH_P_IP): + err = skb_checksum_setup_ip(skb, recalculate); + break; + + case htons(ETH_P_IPV6): + err = skb_checksum_setup_ipv6(skb, recalculate); + break; + default: + err = -EPROTO; + break; + } + return err; +} diff --git a/net/wireguard/compat/compat-asm.h b/net/wireguard/compat/compat-asm.h new file mode 100644 index 000000000000..345087bf0de8 --- /dev/null +++ b/net/wireguard/compat/compat-asm.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_COMPATASM_H +#define _WG_COMPATASM_H + +#include +#include +#include + +#ifdef RHEL_MAJOR +#if RHEL_MAJOR == 7 +#define ISRHEL7 +#elif RHEL_MAJOR == 8 +#define ISRHEL8 +#endif +#endif + +/* PaX compatibility */ +#if defined(RAP_PLUGIN) && defined(RAP_ENTRY) +#undef ENTRY +#define ENTRY RAP_ENTRY +#endif + +#if defined(__LINUX_ARM_ARCH__) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) + .irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo + .macro ret\c, reg +#if __LINUX_ARM_ARCH__ < 6 + mov\c pc, \reg +#else + .ifeqs "\reg", "lr" + bx\c \reg + .else + mov\c pc, \reg + .endif +#endif + .endm + .endr +#endif + +#if defined(__LINUX_ARM_ARCH__) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) +#include +#define lspush push +#define lspull pull +#undef push +#undef pull +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 76) && !defined(ISRHEL8) && !defined(SYM_FUNC_START) +#define SYM_FUNC_START ENTRY +#define SYM_FUNC_END ENDPROC +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0) +#define blake2s_compress_ssse3 zinc_blake2s_compress_ssse3 +#define blake2s_compress_avx512 zinc_blake2s_compress_avx512 +#define poly1305_init_arm zinc_poly1305_init_arm +#define poly1305_blocks_arm zinc_poly1305_blocks_arm +#define poly1305_emit_arm zinc_poly1305_emit_arm +#define poly1305_blocks_neon zinc_poly1305_blocks_neon +#define poly1305_emit_neon zinc_poly1305_emit_neon +#define poly1305_init_mips zinc_poly1305_init_mips +#define poly1305_blocks_mips zinc_poly1305_blocks_mips +#define poly1305_emit_mips zinc_poly1305_emit_mips +#define poly1305_init_x86_64 zinc_poly1305_init_x86_64 +#define poly1305_blocks_x86_64 zinc_poly1305_blocks_x86_64 +#define poly1305_emit_x86_64 zinc_poly1305_emit_x86_64 +#define poly1305_emit_avx zinc_poly1305_emit_avx +#define poly1305_blocks_avx zinc_poly1305_blocks_avx +#define poly1305_blocks_avx2 zinc_poly1305_blocks_avx2 +#define poly1305_blocks_avx512 zinc_poly1305_blocks_avx512 +#define curve25519_neon zinc_curve25519_neon +#define hchacha20_ssse3 zinc_hchacha20_ssse3 +#define chacha20_ssse3 zinc_chacha20_ssse3 +#define chacha20_avx2 zinc_chacha20_avx2 +#define chacha20_avx512 zinc_chacha20_avx512 +#define chacha20_avx512vl zinc_chacha20_avx512vl +#define chacha20_mips zinc_chacha20_mips +#define chacha20_arm zinc_chacha20_arm +#define hchacha20_arm zinc_hchacha20_arm +#define chacha20_neon zinc_chacha20_neon +#endif + +#endif /* _WG_COMPATASM_H */ diff --git a/net/wireguard/compat/compat.h b/net/wireguard/compat/compat.h new file mode 100644 index 000000000000..60550e7e57c5 --- /dev/null +++ b/net/wireguard/compat/compat.h @@ -0,0 +1,1195 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_COMPAT_H +#define _WG_COMPAT_H + +#include +#include +#include +#include + +#ifdef RHEL_MAJOR +#if RHEL_MAJOR == 7 +#define ISRHEL7 +#elif RHEL_MAJOR == 8 +#define ISRHEL8 +#endif +#endif +#ifdef UTS_UBUNTU_RELEASE_ABI +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) +#define ISUBUNTU1604 +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0) +#define ISUBUNTU1804 +#elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0) +#define ISUBUNTU1904 +#elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 3, 0) +#define ISUBUNTU1910 +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0) +#error "WireGuard requires Linux >= 3.10" +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) +#error "WireGuard has been merged into Linux >= 5.6 and therefore this compatibility module is no longer required." +#endif + +#if defined(ISRHEL7) +#include +#define headers_end headers_start +#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0) +#define headers_start data +#define headers_end data +#endif + +#include +#include +#ifndef __ro_after_init +#define __ro_after_init __read_mostly +#endif + +#include +#ifndef READ_ONCE +#define READ_ONCE ACCESS_ONCE +#endif +#ifndef WRITE_ONCE +#ifdef ACCESS_ONCE_RW +#define WRITE_ONCE(p, v) (ACCESS_ONCE_RW(p) = (v)) +#else +#define WRITE_ONCE(p, v) (ACCESS_ONCE(p) = (v)) +#endif +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) +#include "udp_tunnel/udp_tunnel_partial_compat.h" +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && !defined(DEBUG) && defined(net_dbg_ratelimited) +#undef net_dbg_ratelimited +#define net_dbg_ratelimited(fmt, ...) do { if (0) no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); } while (0) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) +#include +#ifndef RCU_LOCKDEP_WARN +#define RCU_LOCKDEP_WARN(cond, message) rcu_lockdep_assert(!(cond), message) +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && !defined(ISRHEL7) +#define ipv6_dst_lookup(a, b, c, d) ipv6_dst_lookup(b, c, d) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 83) +#define ipv6_dst_lookup_flow(a, b, c, d) ipv6_dst_lookup_flow(b, c, d) +#elif (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 5) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 4, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(5, 3, 18) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0) && !defined(ISUBUNTU1904)) || (!defined(ISRHEL8) && !defined(ISDEBIAN) && !defined(ISUBUNTU1804) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 119) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 181) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 224) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 224) && !defined(ISUBUNTU1604) && !defined(ISRHEL7)) +#define ipv6_dst_lookup_flow(a, b, c, d) ipv6_dst_lookup(a, b, &dst, c) + (void *)0 ?: dst +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) && IS_ENABLED(CONFIG_IPV6) && !defined(ISRHEL7) +#include +struct ipv6_stub_type { + void *udpv6_encap_enable; + int (*ipv6_dst_lookup)(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6); +}; +static const struct ipv6_stub_type ipv6_stub_impl = { + .udpv6_encap_enable = (void *)1, + .ipv6_dst_lookup = ip6_dst_lookup +}; +static const struct ipv6_stub_type *ipv6_stub = &ipv6_stub_impl; +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0) && IS_ENABLED(CONFIG_IPV6) && !defined(ISRHEL7) +#include +static inline bool ipv6_mod_enabled(void) +{ + return ipv6_stub != NULL && ipv6_stub->udpv6_encap_enable != NULL; +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) && !defined(ISRHEL7) +#include +static inline void skb_reset_tc(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_CLS_ACT + skb->tc_verd = 0; +#endif +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) +#include +#include +static inline u32 __compat_get_random_u32(void) +{ + static siphash_key_t key; + static u32 counter = 0; +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + static bool has_seeded = false; + if (unlikely(!has_seeded)) { + get_random_bytes(&key, sizeof(key)); + has_seeded = true; + } +#else + get_random_once(&key, sizeof(key)); +#endif + return siphash_2u32(counter++, get_random_int(), &key); +} +#define get_random_u32 __compat_get_random_u32 +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0) && !defined(ISRHEL7) +static inline void netif_keep_dst(struct net_device *dev) +{ + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; +} +#define COMPAT_CANNOT_USE_CSUM_LEVEL +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) && !defined(ISRHEL7) +#include +#ifndef netdev_alloc_pcpu_stats +#define pcpu_sw_netstats pcpu_tstats +#endif +#ifndef netdev_alloc_pcpu_stats +#define netdev_alloc_pcpu_stats alloc_percpu +#endif +#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) && !defined(ISRHEL7) +#include +#ifndef netdev_alloc_pcpu_stats +#define netdev_alloc_pcpu_stats(type) \ +({ \ + typeof(type) __percpu *pcpu_stats = alloc_percpu(type); \ + if (pcpu_stats) { \ + int __cpu; \ + for_each_possible_cpu(__cpu) { \ + typeof(type) *stat; \ + stat = per_cpu_ptr(pcpu_stats, __cpu); \ + u64_stats_init(&stat->syncp); \ + } \ + } \ + pcpu_stats; \ +}) +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) && !defined(ISRHEL7) +#include "checksum/checksum_partial_compat.h" +static inline void *__compat_pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) +{ + if (tail != skb) { + skb->data_len += len; + skb->len += len; + } + return skb_put(tail, len); +} +#define pskb_put __compat_pskb_put +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0) && !defined(ISRHEL7) +#include +static inline void skb_scrub_packet(struct sk_buff *skb, bool xnet) +{ +#ifdef CONFIG_CAVIUM_OCTEON_IPFWD_OFFLOAD + memset(&skb->cvm_info, 0, sizeof(skb->cvm_info)); + skb->cvm_reserved = 0; +#endif + skb->tstamp.tv64 = 0; + skb->pkt_type = PACKET_HOST; + skb->skb_iif = 0; + skb_dst_drop(skb); + secpath_reset(skb); + nf_reset(skb); + nf_reset_trace(skb); + if (!xnet) + return; + skb_orphan(skb); + skb->mark = 0; +} +#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0) +#define skb_scrub_packet(a, b) skb_scrub_packet(a) +#endif + +#if ((LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 63)) && !defined(ISRHEL7) +#include +static inline u32 __compat_prandom_u32_max(u32 ep_ro) +{ + return (u32)(((u64)prandom_u32() * ep_ro) >> 32); +} +#define prandom_u32_max __compat_prandom_u32_max +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) +#include +#ifndef U8_MAX +#define U8_MAX ((u8)~0U) +#endif +#ifndef S8_MAX +#define S8_MAX ((s8)(U8_MAX >> 1)) +#endif +#ifndef S8_MIN +#define S8_MIN ((s8)(-S8_MAX - 1)) +#endif +#ifndef U16_MAX +#define U16_MAX ((u16)~0U) +#endif +#ifndef S16_MAX +#define S16_MAX ((s16)(U16_MAX >> 1)) +#endif +#ifndef S16_MIN +#define S16_MIN ((s16)(-S16_MAX - 1)) +#endif +#ifndef U32_MAX +#define U32_MAX ((u32)~0U) +#endif +#ifndef S32_MAX +#define S32_MAX ((s32)(U32_MAX >> 1)) +#endif +#ifndef S32_MIN +#define S32_MIN ((s32)(-S32_MAX - 1)) +#endif +#ifndef U64_MAX +#define U64_MAX ((u64)~0ULL) +#endif +#ifndef S64_MAX +#define S64_MAX ((s64)(U64_MAX >> 1)) +#endif +#ifndef S64_MIN +#define S64_MIN ((s64)(-S64_MAX - 1)) +#endif +#endif + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 3) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 35) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 24) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 33) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 60) && !defined(ISRHEL7)) +static inline void memzero_explicit(void *s, size_t count) +{ + memset(s, 0, count); + barrier(); +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) && !defined(ISRHEL7) +static const struct in6_addr __compat_in6addr_any = IN6ADDR_ANY_INIT; +#define in6addr_any __compat_in6addr_any +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) && (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 320)) +#include +#include +#include +struct rng_initializer { + struct completion done; + struct random_ready_callback cb; +}; +static inline void rng_initialized_callback(struct random_ready_callback *cb) +{ + complete(&container_of(cb, struct rng_initializer, cb)->done); +} +static inline int wait_for_random_bytes(void) +{ + static bool rng_is_initialized = false; + int ret; + if (unlikely(!rng_is_initialized)) { + struct rng_initializer rng = { + .done = COMPLETION_INITIALIZER(rng.done), + .cb = { .owner = THIS_MODULE, .func = rng_initialized_callback } + }; + ret = add_random_ready_callback(&rng.cb); + if (!ret) { + ret = wait_for_completion_interruptible(&rng.done); + if (ret) { + del_random_ready_callback(&rng.cb); + return ret; + } + } else if (ret != -EALREADY) + return ret; + rng_is_initialized = true; + } + return 0; +} +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0) +/* This is a disaster. Without this API, we really have no way of + * knowing if it's initialized. We just return that it has and hope + * for the best... */ +static inline int wait_for_random_bytes(void) +{ + return 0; +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) && (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 285)) && (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 320)) && !defined(ISRHEL8) +#include +#include +struct rng_is_initialized_callback { + struct random_ready_callback cb; + atomic_t *rng_state; +}; +static inline void rng_is_initialized_callback(struct random_ready_callback *cb) +{ + struct rng_is_initialized_callback *rdy = container_of(cb, struct rng_is_initialized_callback, cb); + atomic_set(rdy->rng_state, 2); + kfree(rdy); +} +static inline bool rng_is_initialized(void) +{ + static atomic_t rng_state = ATOMIC_INIT(0); + + if (atomic_read(&rng_state) == 2) + return true; + + if (atomic_cmpxchg(&rng_state, 0, 1) == 0) { + int ret; + struct rng_is_initialized_callback *rdy = kmalloc(sizeof(*rdy), GFP_ATOMIC); + if (!rdy) { + atomic_set(&rng_state, 0); + return false; + } + rdy->cb.owner = THIS_MODULE; + rdy->cb.func = rng_is_initialized_callback; + rdy->rng_state = &rng_state; + ret = add_random_ready_callback(&rdy->cb); + if (ret) + kfree(rdy); + if (ret == -EALREADY) { + atomic_set(&rng_state, 2); + return true; + } else if (ret) + atomic_set(&rng_state, 0); + return false; + } + return false; +} +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0) +/* This is a disaster. Without this API, we really have no way of + * knowing if it's initialized. We just return that it has and hope + * for the best... */ +static inline bool rng_is_initialized(void) +{ + return true; +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) && (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 320)) +static inline int get_random_bytes_wait(void *buf, int nbytes) +{ + int ret = wait_for_random_bytes(); + if (unlikely(ret)) + return ret; + get_random_bytes(buf, nbytes); + return 0; +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0) && !defined(ISRHEL7) +#define system_power_efficient_wq system_unbound_wq +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 3, 0) +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) +#include +#ifndef ktime_get_real_ts64 +#define timespec64 timespec +#define ktime_get_real_ts64 ktime_get_real_ts +#endif +#else +#include +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) +static inline u64 __compat_jiffies64_to_nsecs(u64 j) +{ +#if !(NSEC_PER_SEC % HZ) + return (NSEC_PER_SEC / HZ) * j; +#else + return div_u64(j * HZ_TO_USEC_NUM, HZ_TO_USEC_DEN) * 1000; +#endif +} +#define jiffies64_to_nsecs __compat_jiffies64_to_nsecs +#endif +static inline u64 ktime_get_coarse_boottime_ns(void) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) + return ktime_to_ns(ktime_get_boottime()); +#elif (LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 12) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 53) + return ktime_to_ns(ktime_mono_to_any(ns_to_ktime(jiffies64_to_nsecs(get_jiffies_64())), TK_OFFS_BOOT)); +#else + return ktime_to_ns(ktime_get_coarse_boottime()); +#endif +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) +#include +static inline __be32 __compat_confirm_addr_indev(struct in_device *in_dev, __be32 dst, __be32 local, int scope) +{ + int same = 0; + __be32 addr = 0; + for_ifa(in_dev) { + if (!addr && (local == ifa->ifa_local || !local) && ifa->ifa_scope <= scope) { + addr = ifa->ifa_local; + if (same) + break; + } + if (!same) { + same = (!local || inet_ifa_match(local, ifa)) && (!dst || inet_ifa_match(dst, ifa)); + if (same && addr) { + if (local || !dst) + break; + if (inet_ifa_match(addr, ifa)) + break; + if (ifa->ifa_scope <= scope) { + addr = ifa->ifa_local; + break; + } + same = 0; + } + } + } endfor_ifa(in_dev); + return same ? addr : 0; +} +static inline __be32 __compat_inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst, __be32 local, int scope) +{ + __be32 addr = 0; + struct net_device *dev; + if (in_dev) + return __compat_confirm_addr_indev(in_dev, dst, local, scope); + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + in_dev = __in_dev_get_rcu(dev); + if (in_dev) { + addr = __compat_confirm_addr_indev(in_dev, dst, local, scope); + if (addr) + break; + } + } + rcu_read_unlock(); + return addr; +} +#define inet_confirm_addr __compat_inet_confirm_addr +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) +#include +#include +#include +static inline void *__compat_kvmalloc(size_t size, gfp_t flags) +{ + gfp_t kmalloc_flags = flags; + void *ret; + if (size > PAGE_SIZE) { + kmalloc_flags |= __GFP_NOWARN; + if (!(kmalloc_flags & __GFP_REPEAT) || (size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) + kmalloc_flags |= __GFP_NORETRY; + } + ret = kmalloc(size, kmalloc_flags); + if (ret || size <= PAGE_SIZE) + return ret; + return __vmalloc(size, flags, PAGE_KERNEL); +} +static inline void *__compat_kvzalloc(size_t size, gfp_t flags) +{ + return __compat_kvmalloc(size, flags | __GFP_ZERO); +} +#define kvmalloc __compat_kvmalloc +#define kvzalloc __compat_kvzalloc +#endif + +#if ((LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 41)) +#include +#include +static inline void __compat_kvfree(const void *addr) +{ + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); +} +#define kvfree __compat_kvfree +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) +#include +#include +static inline void *__compat_kvmalloc_array(size_t n, size_t size, gfp_t flags) +{ + if (n != 0 && SIZE_MAX / n < size) + return NULL; + return kvmalloc(n * size, flags); +} +#define kvmalloc_array __compat_kvmalloc_array +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 18, 0) +#include +#include +static inline void *__compat_kvcalloc(size_t n, size_t size, gfp_t flags) +{ + return kvmalloc_array(n, size, flags | __GFP_ZERO); +} +#define kvcalloc __compat_kvcalloc +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 9) +#include +#define priv_destructor destructor +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) +#define wg_newlink(a,b,c,d,e) wg_newlink(a,b,c,d) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) +#include +#include +#define nlmsg_parse(a, b, c, d, e, f) nlmsg_parse(a, b, c, d, e) +#define nla_parse_nested(a, b, c, d, e) nla_parse_nested(a, b, c, d) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) && !defined(ISRHEL7) +static inline struct nlattr **genl_family_attrbuf(const struct genl_family *family) +{ + return family->attrbuf; +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) +#define PTR_ERR_OR_ZERO(p) PTR_RET(p) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0) +#include +#define nla_put_u64_64bit(a, b, c, d) nla_put_u64(a, b, c) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) +#include +#ifndef GENL_UNS_ADMIN_PERM +#define GENL_UNS_ADMIN_PERM GENL_ADMIN_PERM +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) && !defined(ISRHEL7) +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0) +#define genl_register_family(a) genl_register_family_with_ops(a, genl_ops, ARRAY_SIZE(genl_ops)) +#define COMPAT_CANNOT_USE_CONST_GENL_OPS +#else +#define genl_register_family(a) genl_register_family_with_ops(a, genl_ops) +#endif +#define COMPAT_CANNOT_USE_GENL_NOPS +#endif + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 2) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 16) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 65) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 101) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 84) +#define __COMPAT_NETLINK_DUMP_BLOCK { \ + int ret; \ + skb->end -= nlmsg_total_size(sizeof(int)); \ + ret = wg_get_device_dump_real(skb, cb); \ + skb->end += nlmsg_total_size(sizeof(int)); \ + return ret; \ +} +#define __COMPAT_NETLINK_DUMP_OVERRIDE +#else +#define __COMPAT_NETLINK_DUMP_BLOCK return wg_get_device_dump_real(skb, cb); +#endif +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 8) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 25) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 87) +#define wg_get_device_dump(a, b) wg_get_device_dump_real(a, b); \ +static int wg_get_device_dump(a, b) { \ + struct wg_device *wg = (struct wg_device *)cb->args[0]; \ + if (!wg) { \ + int ret = wg_get_device_start(cb); \ + if (ret) \ + return ret; \ + } \ + __COMPAT_NETLINK_DUMP_BLOCK \ +} \ +static int wg_get_device_dump_real(a, b) +#define COMPAT_CANNOT_USE_NETLINK_START +#elif defined(__COMPAT_NETLINK_DUMP_OVERRIDE) +#define wg_get_device_dump(a, b) wg_get_device_dump_real(a, b); \ +static int wg_get_device_dump(a, b) { \ + __COMPAT_NETLINK_DUMP_BLOCK \ +} \ +static int wg_get_device_dump_real(a, b) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) +#define COMPAT_CANNOT_USE_IN6_DEV_GET +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) +#define COMPAT_CANNOT_USE_IFF_NO_QUEUE +#endif + +#if defined(CONFIG_X86_64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0) +#include +#include +static inline int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) +{ + return boot_cpu_has(X86_FEATURE_XSAVE) && xgetbv(XCR_XFEATURE_ENABLED_MASK) & xfeatures_needed; +} +#endif +#ifndef XFEATURE_MASK_YMM +#define XFEATURE_MASK_YMM XSTATE_YMM +#endif +#ifndef XFEATURE_MASK_SSE +#define XFEATURE_MASK_SSE XSTATE_SSE +#endif +#ifndef XSTATE_AVX512 +#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) +#endif +#ifndef XFEATURE_MASK_AVX512 +#define XFEATURE_MASK_AVX512 XSTATE_AVX512 +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && defined(CONFIG_X86_64) +/* This is incredibly dumb and reckless, but as it turns out, there's + * not really hardware Linux runs properly on that supports F but not BW + * and VL, so in practice this isn't so bad. Plus, this is compat layer, + * so the bar remains fairly low. + */ +#include +#ifndef X86_FEATURE_AVX512BW +#define X86_FEATURE_AVX512BW X86_FEATURE_AVX512F +#endif +#ifndef X86_FEATURE_AVX512VL +#define X86_FEATURE_AVX512VL X86_FEATURE_AVX512F +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0) +struct __compat_dummy_container { char dev; }; +#define netdev_notifier_info net_device *)data); __attribute((unused)) char __compat_dummy_variable = ((struct __compat_dummy_container +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) +#define timer_setup(a, b, c) setup_timer(a, ((void (*)(unsigned long))b), ((unsigned long)a)) +#define from_timer(var, callback_timer, timer_fieldname) container_of(callback_timer, typeof(*var), timer_fieldname) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 3) +#define COMPAT_CANNOT_USE_AVX512 +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0) +#include +#define genl_dump_check_consistent(a, b) genl_dump_check_consistent(a, b, &genl_family) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) && !defined(ISRHEL7) +static inline void *skb_put_data(struct sk_buff *skb, const void *data, unsigned int len) +{ + void *tmp = skb_put(skb, len); + memcpy(tmp, data, len); + return tmp; +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0) && !defined(ISRHEL7) +#define napi_complete_done(n, work_done) napi_complete(n) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) +#include +/* NAPI_STATE_SCHED gets set by netif_napi_add anyway, so this is safe. + * Also, kernels without NAPI_STATE_NO_BUSY_POLL don't have a call to + * napi_hash_add inside of netif_napi_add. + */ +#define NAPI_STATE_NO_BUSY_POLL NAPI_STATE_SCHED +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) +#include +#ifndef atomic_read_acquire +#define atomic_read_acquire(v) ({ int __compat_p1 = atomic_read(v); smp_rmb(); __compat_p1; }) +#endif +#ifndef atomic_set_release +#define atomic_set_release(v, i) ({ smp_wmb(); atomic_set(v, i); }) +#endif +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) +#include +#ifndef atomic_read_acquire +#define atomic_read_acquire(v) smp_load_acquire(&(v)->counter) +#endif +#ifndef atomic_set_release +#define atomic_set_release(v, i) smp_store_release(&(v)->counter, (i)) +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 17, 0) && (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 285)) && (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 320)) +static inline void le32_to_cpu_array(u32 *buf, unsigned int words) +{ + while (words--) { + __le32_to_cpus(buf); + buf++; + } +} +static inline void cpu_to_le32_array(u32 *buf, unsigned int words) +{ + while (words--) { + __cpu_to_le32s(buf); + buf++; + } +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) +#include +static inline void crypto_xor_cpy(u8 *dst, const u8 *src1, const u8 *src2, + unsigned int size) +{ + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && + __builtin_constant_p(size) && + (size % sizeof(unsigned long)) == 0) { + unsigned long *d = (unsigned long *)dst; + unsigned long *s1 = (unsigned long *)src1; + unsigned long *s2 = (unsigned long *)src2; + + while (size > 0) { + *d++ = *s1++ ^ *s2++; + size -= sizeof(unsigned long); + } + } else { + if (unlikely(dst != src1)) + memmove(dst, src1, size); +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) + crypto_xor(dst, src2, size); +#else + __crypto_xor(dst, src2, size); +#endif + } +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) +#define read_cpuid_part() read_cpuid_part_number() +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) && !defined(ISRHEL7) +#define hlist_add_behind(a, b) hlist_add_after(b, a) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 18, 0) +struct __kernel_timespec { + int64_t tv_sec, tv_nsec; +}; +#elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0) +#include +#ifdef __kernel_timespec +#undef __kernel_timespec +struct __kernel_timespec { + int64_t tv_sec, tv_nsec; +}; +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) +#include +#ifndef ALIGN_DOWN +#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0) && !defined(ISRHEL8) +#include +#define skb_probe_transport_header(a) skb_probe_transport_header(a, 0) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0) && !defined(ISRHEL7) +#define ignore_df local_df +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0) && !defined(ISRHEL8) +/* Note that all intentional uses of the non-_bh variety need to explicitly + * undef these, conditionalized on COMPAT_CANNOT_DEPRECIATE_BH_RCU. + */ +#include +static __always_inline void old_synchronize_rcu(void) +{ + synchronize_rcu(); +} +static __always_inline void old_call_rcu(void *a, void *b) +{ + call_rcu(a, b); +} +static __always_inline void old_rcu_barrier(void) +{ + rcu_barrier(); +} +#ifdef synchronize_rcu +#undef synchronize_rcu +#endif +#ifdef call_rcu +#undef call_rcu +#endif +#ifdef rcu_barrier +#undef rcu_barrier +#endif +#define synchronize_rcu synchronize_rcu_bh +#define call_rcu call_rcu_bh +#define rcu_barrier rcu_barrier_bh +#define COMPAT_CANNOT_DEPRECIATE_BH_RCU +#endif + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 10) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0) && !defined(ISRHEL8)) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 217) +static inline void skb_mark_not_on_list(struct sk_buff *skb) +{ + skb->next = NULL; +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 20, 0) && !defined(ISRHEL8) +#include +#ifndef NLA_POLICY_EXACT_LEN +#define NLA_POLICY_EXACT_LEN(_len) { .type = NLA_UNSPEC, .len = _len } +#endif +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0) && !defined(ISRHEL8) +#include +#ifndef NLA_POLICY_MIN_LEN +#define NLA_POLICY_MIN_LEN(_len) { .type = NLA_UNSPEC, .len = _len } +#endif +#define COMPAT_CANNOT_INDIVIDUAL_NETLINK_OPS_POLICY +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0) && defined(__aarch64__) +#define cpu_have_named_feature(name) (elf_hwcap & (HWCAP_ ## name)) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0) +#include +#ifndef offsetofend +#define offsetofend(TYPE, MEMBER) (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 5, 0) && !defined(ISRHEL8) +#define genl_dumpit_info(cb) ({ \ + struct { struct nlattr **attrs; } *a = (void *)((u8 *)cb->args + offsetofend(struct dump_ctx, next_allowedip)); \ + BUILD_BUG_ON(sizeof(cb->args) < offsetofend(struct dump_ctx, next_allowedip) + sizeof(*a)); \ + a->attrs = genl_family_attrbuf(&genl_family); \ + if (nlmsg_parse(cb->nlh, GENL_HDRLEN + genl_family.hdrsize, a->attrs, genl_family.maxattr, device_policy, NULL) < 0) \ + memset(a->attrs, 0, (genl_family.maxattr + 1) * sizeof(struct nlattr *)); \ + a; \ +}) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 6, 0) +#include +#ifndef skb_list_walk_safe +#define skb_list_walk_safe(first, skb, next) \ + for ((skb) = (first), (next) = (skb) ? (skb)->next : NULL; (skb); \ + (skb) = (next), (next) = (skb) ? (skb)->next : NULL) +#endif +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 4, 200) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 20, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 249)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 285)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 320)) +#define blake2s_init zinc_blake2s_init +#define blake2s_init_key zinc_blake2s_init_key +#define blake2s_update zinc_blake2s_update +#define blake2s_final zinc_blake2s_final +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0) +#define blake2s_hmac zinc_blake2s_hmac +#define chacha20 zinc_chacha20 +#define hchacha20 zinc_hchacha20 +#define chacha20poly1305_encrypt zinc_chacha20poly1305_encrypt +#define chacha20poly1305_encrypt_sg_inplace zinc_chacha20poly1305_encrypt_sg_inplace +#define chacha20poly1305_decrypt zinc_chacha20poly1305_decrypt +#define chacha20poly1305_decrypt_sg_inplace zinc_chacha20poly1305_decrypt_sg_inplace +#define xchacha20poly1305_encrypt zinc_xchacha20poly1305_encrypt +#define xchacha20poly1305_decrypt zinc_xchacha20poly1305_decrypt +#define curve25519 zinc_curve25519 +#define curve25519_generate_secret zinc_curve25519_generate_secret +#define curve25519_generate_public zinc_curve25519_generate_public +#define poly1305_init zinc_poly1305_init +#define poly1305_update zinc_poly1305_update +#define poly1305_final zinc_poly1305_final +#define blake2s_compress_ssse3 zinc_blake2s_compress_ssse3 +#define blake2s_compress_avx512 zinc_blake2s_compress_avx512 +#define poly1305_init_arm zinc_poly1305_init_arm +#define poly1305_blocks_arm zinc_poly1305_blocks_arm +#define poly1305_emit_arm zinc_poly1305_emit_arm +#define poly1305_blocks_neon zinc_poly1305_blocks_neon +#define poly1305_emit_neon zinc_poly1305_emit_neon +#define poly1305_init_mips zinc_poly1305_init_mips +#define poly1305_blocks_mips zinc_poly1305_blocks_mips +#define poly1305_emit_mips zinc_poly1305_emit_mips +#define poly1305_init_x86_64 zinc_poly1305_init_x86_64 +#define poly1305_blocks_x86_64 zinc_poly1305_blocks_x86_64 +#define poly1305_emit_x86_64 zinc_poly1305_emit_x86_64 +#define poly1305_emit_avx zinc_poly1305_emit_avx +#define poly1305_blocks_avx zinc_poly1305_blocks_avx +#define poly1305_blocks_avx2 zinc_poly1305_blocks_avx2 +#define poly1305_blocks_avx512 zinc_poly1305_blocks_avx512 +#define curve25519_neon zinc_curve25519_neon +#define hchacha20_ssse3 zinc_hchacha20_ssse3 +#define chacha20_ssse3 zinc_chacha20_ssse3 +#define chacha20_avx2 zinc_chacha20_avx2 +#define chacha20_avx512 zinc_chacha20_avx512 +#define chacha20_avx512vl zinc_chacha20_avx512vl +#define chacha20_mips zinc_chacha20_mips +#define chacha20_arm zinc_chacha20_arm +#define hchacha20_arm zinc_hchacha20_arm +#define chacha20_neon zinc_chacha20_neon +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0) && !defined(ISRHEL7) +#include +static inline int skb_ensure_writable(struct sk_buff *skb, int write_len) +{ + if (!pskb_may_pull(skb, write_len)) + return -ENOMEM; + + if (!skb_cloned(skb) || skb_clone_writable(skb, write_len)) + return 0; + + return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); +} +#endif + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 6, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 102) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 178) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 223) && LINUX_VERSION_CODE > KERNEL_VERSION(4, 10, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 259) || defined(ISRHEL8) || defined(ISUBUNTU1804) +#include +#include +#if IS_ENABLED(CONFIG_NF_NAT) +#include +#include +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0) && !defined(ISRHEL8) +#include +#endif +static inline void __compat_icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) +{ + struct sk_buff *cloned_skb = NULL; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + __be32 orig_ip; + + ct = nf_ct_get(skb_in, &ctinfo); + if (!ct || !(ct->status & IPS_SRC_NAT)) { + memset(skb_in->cb, 0, sizeof(skb_in->cb)); + icmp_send(skb_in, type, code, info); + return; + } + + if (skb_shared(skb_in)) + skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC); + + if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head || + (skb_network_header(skb_in) + sizeof(struct iphdr)) > + skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in, + skb_network_offset(skb_in) + sizeof(struct iphdr)))) + goto out; + + orig_ip = ip_hdr(skb_in)->saddr; + ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip; + memset(skb_in->cb, 0, sizeof(skb_in->cb)); + icmp_send(skb_in, type, code, info); + ip_hdr(skb_in)->saddr = orig_ip; +out: + consume_skb(cloned_skb); +} +static inline void __compat_icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) +{ + struct sk_buff *cloned_skb = NULL; + enum ip_conntrack_info ctinfo; + struct in6_addr orig_ip; + struct nf_conn *ct; + + ct = nf_ct_get(skb_in, &ctinfo); + if (!ct || !(ct->status & IPS_SRC_NAT)) { + memset(skb_in->cb, 0, sizeof(skb_in->cb)); + icmpv6_send(skb_in, type, code, info); + return; + } + + if (skb_shared(skb_in)) + skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC); + + if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head || + (skb_network_header(skb_in) + sizeof(struct ipv6hdr)) > + skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in, + skb_network_offset(skb_in) + sizeof(struct ipv6hdr)))) + goto out; + + orig_ip = ipv6_hdr(skb_in)->saddr; + ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6; + memset(skb_in->cb, 0, sizeof(skb_in->cb)); + icmpv6_send(skb_in, type, code, info); + ipv6_hdr(skb_in)->saddr = orig_ip; +out: + consume_skb(cloned_skb); +} +#else +static inline void __compat_icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) +{ + memset(skb_in->cb, 0, sizeof(skb_in->cb)); + icmp_send(skb_in, type, code, info); +} +static inline void __compat_icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) +{ + memset(skb_in->cb, 0, sizeof(skb_in->cb)); + icmpv6_send(skb_in, type, code, info); +} +#endif +#define icmp_ndo_send __compat_icmp_ndo_send +#define icmpv6_ndo_send __compat_icmpv6_ndo_send +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) +#define COMPAT_CANNOT_USE_MAX_MTU +#endif + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 5, 14) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 29) && !defined(ISUBUNTU1910) && !defined(ISUBUNTU1904) && !defined(ISRHEL8)) +#include +#include +static inline void skb_reset_redirect(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_SCHED + skb_reset_tc(skb); +#endif +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) && !defined(ISRHEL7) +#define skb_get_hash skb_get_rxhash +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) && !defined(ISRHEL7) +#define hash rxhash +#define l4_hash l4_rxhash +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) && !defined(ISRHEL7) +#define sw_hash ignore_df = 0; skb->nf_trace = skb->ooo_okay +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 3, 0) +#define pre_exit exit +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0) +#include +#include +#include +static inline __be16 ip_tunnel_parse_protocol(const struct sk_buff *skb) +{ + if (skb_network_header(skb) >= skb->head && + (skb_network_header(skb) + sizeof(struct iphdr)) <= skb_tail_pointer(skb) && + ip_hdr(skb)->version == 4) + return htons(ETH_P_IP); + if (skb_network_header(skb) >= skb->head && + (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= skb_tail_pointer(skb) && + ipv6_hdr(skb)->version == 6) + return htons(ETH_P_IPV6); + return 0; +} +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0) || defined(ISRHEL8) +static const struct header_ops ip_tunnel_header_ops = { .parse_protocol = ip_tunnel_parse_protocol }; +#else +#define header_ops hard_header_len +#define ip_tunnel_header_ops *(char *)0 - (char *)0 +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 9, 0) +#define kfree_sensitive(a) kzfree(a) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && !defined(ISRHEL7) +#define xchg_release xchg +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) && !defined(ISRHEL7) +#include +#ifndef smp_load_acquire +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + smp_mb(); \ + ___p1; \ +}) +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) +#include +struct dst_cache_pcpu { + unsigned long refresh_ts; + struct dst_entry *dst; + u32 cookie; + union { + struct in_addr in_saddr; + struct in6_addr in6_saddr; + }; +}; +#define COMPAT_HAS_DEFINED_DST_CACHE_PCPU +static inline void dst_cache_reset_now(struct dst_cache *dst_cache) +{ + int i; + + if (!dst_cache->cache) + return; + + dst_cache->reset_ts = jiffies; + for_each_possible_cpu(i) { + struct dst_cache_pcpu *idst = per_cpu_ptr(dst_cache->cache, i); + struct dst_entry *dst = idst->dst; + + idst->cookie = 0; + idst->dst = NULL; + dst_release(dst); + } +} +#endif + +#if defined(ISUBUNTU1604) || defined(ISRHEL7) +#include +#ifndef _WG_LINUX_SIPHASH_H +#define hsiphash_1u32 siphash_1u32 +#define hsiphash_2u32 siphash_2u32 +#define hsiphash_3u32 siphash_3u32 +#define hsiphash_key_t siphash_key_t +#endif +#endif + +#ifdef CONFIG_VE +#include +#ifdef NETIF_F_VIRTUAL +#undef NETIF_F_LLTX +#define NETIF_F_LLTX (__NETIF_F(LLTX) | __NETIF_F(VIRTUAL)) +#endif +#endif + +/* https://github.com/ClangBuiltLinux/linux/issues/7 */ +#if defined( __clang__) && (!defined(CONFIG_CLANG_VERSION) || CONFIG_CLANG_VERSION < 80000) +#include +#undef BUILD_BUG_ON +#define BUILD_BUG_ON(x) +#endif + +/* PaX compatibility */ +#ifdef CONSTIFY_PLUGIN +#include +#undef __read_mostly +#define __read_mostly +#endif +#if (defined(CONFIG_PAX) || defined(CONFIG_CFI_CLANG)) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0) +#include +#define wg_expired_retransmit_handshake(a) wg_expired_retransmit_handshake(unsigned long timer) +#define wg_expired_send_keepalive(a) wg_expired_send_keepalive(unsigned long timer) +#define wg_expired_new_handshake(a) wg_expired_new_handshake(unsigned long timer) +#define wg_expired_zero_key_material(a) wg_expired_zero_key_material(unsigned long timer) +#define wg_expired_send_persistent_keepalive(a) wg_expired_send_persistent_keepalive(unsigned long timer) +#undef timer_setup +#define timer_setup(a, b, c) setup_timer(a, ((void (*)(unsigned long))b), ((unsigned long)a)) +#undef from_timer +#define from_timer(var, callback_timer, timer_fieldname) container_of((struct timer_list *)callback_timer, typeof(*var), timer_fieldname) +#endif + +#endif /* _WG_COMPAT_H */ diff --git a/net/wireguard/compat/dst_cache/dst_cache.c b/net/wireguard/compat/dst_cache/dst_cache.c new file mode 100644 index 000000000000..f74c43c550eb --- /dev/null +++ b/net/wireguard/compat/dst_cache/dst_cache.c @@ -0,0 +1,177 @@ +/* + * net/core/dst_cache.c - dst entry cache + * + * Copyright (c) 2016 Paolo Abeni + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#if IS_ENABLED(CONFIG_IPV6) +#include +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 50) +static inline u32 rt6_get_cookie(const struct rt6_info *rt) +{ + if ((unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from)) + rt = (struct rt6_info *)(rt->dst.from); + + return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; +} +#endif +#endif +#include + +#ifndef COMPAT_HAS_DEFINED_DST_CACHE_PCPU +struct dst_cache_pcpu { + unsigned long refresh_ts; + struct dst_entry *dst; + u32 cookie; + union { + struct in_addr in_saddr; + struct in6_addr in6_saddr; + }; +}; +#endif + +static void dst_cache_per_cpu_dst_set(struct dst_cache_pcpu *dst_cache, + struct dst_entry *dst, u32 cookie) +{ + dst_release(dst_cache->dst); + if (dst) + dst_hold(dst); + + dst_cache->cookie = cookie; + dst_cache->dst = dst; +} + +static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache, + struct dst_cache_pcpu *idst) +{ + struct dst_entry *dst; + + dst = idst->dst; + if (!dst) + goto fail; + + /* the cache already hold a dst reference; it can't go away */ + dst_hold(dst); + + if (unlikely(!time_after(idst->refresh_ts, dst_cache->reset_ts) || + (dst->obsolete && !dst->ops->check(dst, idst->cookie)))) { + dst_cache_per_cpu_dst_set(idst, NULL, 0); + dst_release(dst); + goto fail; + } + return dst; + +fail: + idst->refresh_ts = jiffies; + return NULL; +} + +struct dst_entry *dst_cache_get(struct dst_cache *dst_cache) +{ + if (!dst_cache->cache) + return NULL; + + return dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache)); +} + +struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr) +{ + struct dst_cache_pcpu *idst; + struct dst_entry *dst; + + if (!dst_cache->cache) + return NULL; + + idst = this_cpu_ptr(dst_cache->cache); + dst = dst_cache_per_cpu_get(dst_cache, idst); + if (!dst) + return NULL; + + *saddr = idst->in_saddr.s_addr; + return container_of(dst, struct rtable, dst); +} + +void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst, + __be32 saddr) +{ + struct dst_cache_pcpu *idst; + + if (!dst_cache->cache) + return; + + idst = this_cpu_ptr(dst_cache->cache); + dst_cache_per_cpu_dst_set(idst, dst, 0); + idst->in_saddr.s_addr = saddr; +} + +#if IS_ENABLED(CONFIG_IPV6) +void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst, + const struct in6_addr *addr) +{ + struct dst_cache_pcpu *idst; + + if (!dst_cache->cache) + return; + + idst = this_cpu_ptr(dst_cache->cache); + dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst, + rt6_get_cookie((struct rt6_info *)dst)); + idst->in6_saddr = *addr; +} + +struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache, + struct in6_addr *saddr) +{ + struct dst_cache_pcpu *idst; + struct dst_entry *dst; + + if (!dst_cache->cache) + return NULL; + + idst = this_cpu_ptr(dst_cache->cache); + dst = dst_cache_per_cpu_get(dst_cache, idst); + if (!dst) + return NULL; + + *saddr = idst->in6_saddr; + return dst; +} +#endif + +int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0) + BUG_ON(gfp & GFP_ATOMIC); + dst_cache->cache = alloc_percpu(struct dst_cache_pcpu); +#else + dst_cache->cache = alloc_percpu_gfp(struct dst_cache_pcpu, + gfp | __GFP_ZERO); +#endif + if (!dst_cache->cache) + return -ENOMEM; + + dst_cache_reset(dst_cache); + return 0; +} + +void dst_cache_destroy(struct dst_cache *dst_cache) +{ + int i; + + if (!dst_cache->cache) + return; + + for_each_possible_cpu(i) + dst_release(per_cpu_ptr(dst_cache->cache, i)->dst); + + free_percpu(dst_cache->cache); +} diff --git a/net/wireguard/compat/dst_cache/include/net/dst_cache.h b/net/wireguard/compat/dst_cache/include/net/dst_cache.h new file mode 100644 index 000000000000..48021c0d6be1 --- /dev/null +++ b/net/wireguard/compat/dst_cache/include/net/dst_cache.h @@ -0,0 +1,97 @@ +#ifndef _WG_NET_DST_CACHE_H +#define _WG_NET_DST_CACHE_H + +#include +#include +#if IS_ENABLED(CONFIG_IPV6) +#include +#endif + +struct dst_cache { + struct dst_cache_pcpu __percpu *cache; + unsigned long reset_ts; +}; + +/** + * dst_cache_get - perform cache lookup + * @dst_cache: the cache + * + * The caller should use dst_cache_get_ip4() if it need to retrieve the + * source address to be used when xmitting to the cached dst. + * local BH must be disabled. + */ +struct dst_entry *dst_cache_get(struct dst_cache *dst_cache); + +/** + * dst_cache_get_ip4 - perform cache lookup and fetch ipv4 source address + * @dst_cache: the cache + * @saddr: return value for the retrieved source address + * + * local BH must be disabled. + */ +struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr); + +/** + * dst_cache_set_ip4 - store the ipv4 dst into the cache + * @dst_cache: the cache + * @dst: the entry to be cached + * @saddr: the source address to be stored inside the cache + * + * local BH must be disabled. + */ +void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst, + __be32 saddr); + +#if IS_ENABLED(CONFIG_IPV6) + +/** + * dst_cache_set_ip6 - store the ipv6 dst into the cache + * @dst_cache: the cache + * @dst: the entry to be cached + * @saddr: the source address to be stored inside the cache + * + * local BH must be disabled. + */ +void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst, + const struct in6_addr *addr); + +/** + * dst_cache_get_ip6 - perform cache lookup and fetch ipv6 source address + * @dst_cache: the cache + * @saddr: return value for the retrieved source address + * + * local BH must be disabled. + */ +struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache, + struct in6_addr *saddr); +#endif + +/** + * dst_cache_reset - invalidate the cache contents + * @dst_cache: the cache + * + * This do not free the cached dst to avoid races and contentions. + * the dst will be freed on later cache lookup. + */ +static inline void dst_cache_reset(struct dst_cache *dst_cache) +{ + dst_cache->reset_ts = jiffies; +} + +/** + * dst_cache_init - initialize the cache, allocating the required storage + * @dst_cache: the cache + * @gfp: allocation flags + */ +int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp); + +/** + * dst_cache_destroy - empty the cache and free the allocated storage + * @dst_cache: the cache + * + * No synchronization is enforced: it must be called only when the cache + * is unused. + */ +void dst_cache_destroy(struct dst_cache *dst_cache); + +#endif /* _WG_NET_DST_CACHE_H */ diff --git a/net/wireguard/compat/dstmetadata/include/net/dst_metadata.h b/net/wireguard/compat/dstmetadata/include/net/dst_metadata.h new file mode 100644 index 000000000000..995094d4f099 --- /dev/null +++ b/net/wireguard/compat/dstmetadata/include/net/dst_metadata.h @@ -0,0 +1,3 @@ +#ifndef skb_valid_dst +#define skb_valid_dst(skb) (!!skb_dst(skb)) +#endif diff --git a/net/wireguard/compat/fpu-x86/include/asm/fpu/api.h b/net/wireguard/compat/fpu-x86/include/asm/fpu/api.h new file mode 100644 index 000000000000..f3f9117bcb88 --- /dev/null +++ b/net/wireguard/compat/fpu-x86/include/asm/fpu/api.h @@ -0,0 +1 @@ +#include diff --git a/net/wireguard/compat/intel-family-x86/include/asm/intel-family.h b/net/wireguard/compat/intel-family-x86/include/asm/intel-family.h new file mode 100644 index 000000000000..35a6bc4da8ad --- /dev/null +++ b/net/wireguard/compat/intel-family-x86/include/asm/intel-family.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_INTEL_FAMILY_H +#define _ASM_X86_INTEL_FAMILY_H + +/* + * "Big Core" Processors (Branded as Core, Xeon, etc...) + * + * The "_X" parts are generally the EP and EX Xeons, or the + * "Extreme" ones, like Broadwell-E. + * + * Things ending in "2" are usually because we have no better + * name for them. There's no processor called "SILVERMONT2". + */ + +#define INTEL_FAM6_CORE_YONAH 0x0E + +#define INTEL_FAM6_CORE2_MEROM 0x0F +#define INTEL_FAM6_CORE2_MEROM_L 0x16 +#define INTEL_FAM6_CORE2_PENRYN 0x17 +#define INTEL_FAM6_CORE2_DUNNINGTON 0x1D + +#define INTEL_FAM6_NEHALEM 0x1E +#define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */ +#define INTEL_FAM6_NEHALEM_EP 0x1A +#define INTEL_FAM6_NEHALEM_EX 0x2E + +#define INTEL_FAM6_WESTMERE 0x25 +#define INTEL_FAM6_WESTMERE_EP 0x2C +#define INTEL_FAM6_WESTMERE_EX 0x2F + +#define INTEL_FAM6_SANDYBRIDGE 0x2A +#define INTEL_FAM6_SANDYBRIDGE_X 0x2D +#define INTEL_FAM6_IVYBRIDGE 0x3A +#define INTEL_FAM6_IVYBRIDGE_X 0x3E + +#define INTEL_FAM6_HASWELL_CORE 0x3C +#define INTEL_FAM6_HASWELL_X 0x3F +#define INTEL_FAM6_HASWELL_ULT 0x45 +#define INTEL_FAM6_HASWELL_GT3E 0x46 + +#define INTEL_FAM6_BROADWELL_CORE 0x3D +#define INTEL_FAM6_BROADWELL_GT3E 0x47 +#define INTEL_FAM6_BROADWELL_X 0x4F +#define INTEL_FAM6_BROADWELL_XEON_D 0x56 + +#define INTEL_FAM6_SKYLAKE_MOBILE 0x4E +#define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E +#define INTEL_FAM6_SKYLAKE_X 0x55 +#define INTEL_FAM6_KABYLAKE_MOBILE 0x8E +#define INTEL_FAM6_KABYLAKE_DESKTOP 0x9E + +/* "Small Core" Processors (Atom) */ + +#define INTEL_FAM6_ATOM_PINEVIEW 0x1C +#define INTEL_FAM6_ATOM_LINCROFT 0x26 +#define INTEL_FAM6_ATOM_PENWELL 0x27 +#define INTEL_FAM6_ATOM_CLOVERVIEW 0x35 +#define INTEL_FAM6_ATOM_CEDARVIEW 0x36 +#define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */ +#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ +#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ +#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */ +#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */ +#define INTEL_FAM6_ATOM_GOLDMONT 0x5C +#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ +#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A + +/* Xeon Phi */ + +#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ +#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */ + +#endif /* _ASM_X86_INTEL_FAMILY_H */ diff --git a/net/wireguard/compat/memneq/include.h b/net/wireguard/compat/memneq/include.h new file mode 100644 index 000000000000..2d18acd9b6c8 --- /dev/null +++ b/net/wireguard/compat/memneq/include.h @@ -0,0 +1,5 @@ +extern noinline unsigned long __crypto_memneq(const void *a, const void *b, size_t size); +static inline int crypto_memneq(const void *a, const void *b, size_t size) +{ + return __crypto_memneq(a, b, size) != 0UL ? 1 : 0; +} diff --git a/net/wireguard/compat/memneq/memneq.c b/net/wireguard/compat/memneq/memneq.c new file mode 100644 index 000000000000..1c427d405537 --- /dev/null +++ b/net/wireguard/compat/memneq/memneq.c @@ -0,0 +1,170 @@ +/* + * Constant-time equality testing of memory regions. + * + * Authors: + * + * James Yonan + * Daniel Borkmann + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * BSD LICENSE + * + * Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of OpenVPN Technologies nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +/* Make the optimizer believe the variable can be manipulated arbitrarily. */ +#define COMPILER_OPTIMIZER_HIDE_VAR(var) asm("" : "=r" (var) : "0" (var)) + +#ifndef __HAVE_ARCH_CRYPTO_MEMNEQ + +/* Generic path for arbitrary size */ +static inline unsigned long +__crypto_memneq_generic(const void *a, const void *b, size_t size) +{ + unsigned long neq = 0; + +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + while (size >= sizeof(unsigned long)) { + neq |= *(unsigned long *)a ^ *(unsigned long *)b; + COMPILER_OPTIMIZER_HIDE_VAR(neq); + a += sizeof(unsigned long); + b += sizeof(unsigned long); + size -= sizeof(unsigned long); + } +#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ + while (size > 0) { + neq |= *(unsigned char *)a ^ *(unsigned char *)b; + COMPILER_OPTIMIZER_HIDE_VAR(neq); + a += 1; + b += 1; + size -= 1; + } + return neq; +} + +/* Loop-free fast-path for frequently used 16-byte size */ +static inline unsigned long __crypto_memneq_16(const void *a, const void *b) +{ + unsigned long neq = 0; + +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (sizeof(unsigned long) == 8) { + neq |= *(unsigned long *)(a) ^ *(unsigned long *)(b); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned long *)(a+8) ^ *(unsigned long *)(b+8); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + } else if (sizeof(unsigned int) == 4) { + neq |= *(unsigned int *)(a) ^ *(unsigned int *)(b); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned int *)(a+4) ^ *(unsigned int *)(b+4); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned int *)(a+8) ^ *(unsigned int *)(b+8); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned int *)(a+12) ^ *(unsigned int *)(b+12); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + } else +#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ + { + neq |= *(unsigned char *)(a) ^ *(unsigned char *)(b); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+1) ^ *(unsigned char *)(b+1); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+2) ^ *(unsigned char *)(b+2); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+3) ^ *(unsigned char *)(b+3); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+4) ^ *(unsigned char *)(b+4); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+5) ^ *(unsigned char *)(b+5); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+6) ^ *(unsigned char *)(b+6); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+7) ^ *(unsigned char *)(b+7); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+8) ^ *(unsigned char *)(b+8); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+9) ^ *(unsigned char *)(b+9); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+10) ^ *(unsigned char *)(b+10); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+11) ^ *(unsigned char *)(b+11); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+12) ^ *(unsigned char *)(b+12); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+13) ^ *(unsigned char *)(b+13); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+14) ^ *(unsigned char *)(b+14); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+15) ^ *(unsigned char *)(b+15); + COMPILER_OPTIMIZER_HIDE_VAR(neq); + } + + return neq; +} + +/* Compare two areas of memory without leaking timing information, + * and with special optimizations for common sizes. Users should + * not call this function directly, but should instead use + * crypto_memneq defined in crypto/algapi.h. + */ +noinline unsigned long __crypto_memneq(const void *a, const void *b, + size_t size) +{ + switch (size) { + case 16: + return __crypto_memneq_16(a, b); + default: + return __crypto_memneq_generic(a, b, size); + } +} + +#endif /* __HAVE_ARCH_CRYPTO_MEMNEQ */ diff --git a/net/wireguard/compat/neon-arm/include/asm/neon.h b/net/wireguard/compat/neon-arm/include/asm/neon.h new file mode 100644 index 000000000000..980d831e201a --- /dev/null +++ b/net/wireguard/compat/neon-arm/include/asm/neon.h @@ -0,0 +1,7 @@ +#ifndef _ARCH_ARM_ASM_NEON +#define _ARCH_ARM_ASM_NEON +#define kernel_neon_begin() \ + BUILD_BUG_ON_MSG(1, "This kernel does not support ARM NEON") +#define kernel_neon_end() \ + BUILD_BUG_ON_MSG(1, "This kernel does not support ARM NEON") +#endif diff --git a/net/wireguard/compat/ptr_ring/include/linux/ptr_ring.h b/net/wireguard/compat/ptr_ring/include/linux/ptr_ring.h new file mode 100644 index 000000000000..9d514bac1388 --- /dev/null +++ b/net/wireguard/compat/ptr_ring/include/linux/ptr_ring.h @@ -0,0 +1,674 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Definitions for the 'struct ptr_ring' datastructure. + * + * Author: + * Michael S. Tsirkin + * + * Copyright (C) 2016 Red Hat, Inc. + * + * This is a limited-size FIFO maintaining pointers in FIFO order, with + * one CPU producing entries and another consuming entries from a FIFO. + * + * This implementation tries to minimize cache-contention when there is a + * single producer and a single consumer CPU. + */ + +#ifndef _LINUX_PTR_RING_H +#define _LINUX_PTR_RING_H 1 + +#ifdef __KERNEL__ +#include +#include +#include +#include +#include +#include +#include +#endif + +struct ptr_ring { + int producer ____cacheline_aligned_in_smp; + spinlock_t producer_lock; + int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */ + int consumer_tail; /* next entry to invalidate */ + spinlock_t consumer_lock; + /* Shared consumer/producer data */ + /* Read-only by both the producer and the consumer */ + int size ____cacheline_aligned_in_smp; /* max entries in queue */ + int batch; /* number of entries to consume in a batch */ + void **queue; +}; + +/* Note: callers invoking this in a loop must use a compiler barrier, + * for example cpu_relax(). + * + * NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock: + * see e.g. ptr_ring_full. + */ +static inline bool __ptr_ring_full(struct ptr_ring *r) +{ + return r->queue[r->producer]; +} + +static inline bool ptr_ring_full(struct ptr_ring *r) +{ + bool ret; + + spin_lock(&r->producer_lock); + ret = __ptr_ring_full(r); + spin_unlock(&r->producer_lock); + + return ret; +} + +static inline bool ptr_ring_full_irq(struct ptr_ring *r) +{ + bool ret; + + spin_lock_irq(&r->producer_lock); + ret = __ptr_ring_full(r); + spin_unlock_irq(&r->producer_lock); + + return ret; +} + +static inline bool ptr_ring_full_any(struct ptr_ring *r) +{ + unsigned long flags; + bool ret; + + spin_lock_irqsave(&r->producer_lock, flags); + ret = __ptr_ring_full(r); + spin_unlock_irqrestore(&r->producer_lock, flags); + + return ret; +} + +static inline bool ptr_ring_full_bh(struct ptr_ring *r) +{ + bool ret; + + spin_lock_bh(&r->producer_lock); + ret = __ptr_ring_full(r); + spin_unlock_bh(&r->producer_lock); + + return ret; +} + +/* Note: callers invoking this in a loop must use a compiler barrier, + * for example cpu_relax(). Callers must hold producer_lock. + * Callers are responsible for making sure pointer that is being queued + * points to a valid data. + */ +static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) +{ + if (unlikely(!r->size) || r->queue[r->producer]) + return -ENOSPC; + + /* Make sure the pointer we are storing points to a valid data. */ + /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */ + smp_wmb(); + + WRITE_ONCE(r->queue[r->producer++], ptr); + if (unlikely(r->producer >= r->size)) + r->producer = 0; + return 0; +} + +/* + * Note: resize (below) nests producer lock within consumer lock, so if you + * consume in interrupt or BH context, you must disable interrupts/BH when + * calling this. + */ +static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr) +{ + int ret; + + spin_lock(&r->producer_lock); + ret = __ptr_ring_produce(r, ptr); + spin_unlock(&r->producer_lock); + + return ret; +} + +static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr) +{ + int ret; + + spin_lock_irq(&r->producer_lock); + ret = __ptr_ring_produce(r, ptr); + spin_unlock_irq(&r->producer_lock); + + return ret; +} + +static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&r->producer_lock, flags); + ret = __ptr_ring_produce(r, ptr); + spin_unlock_irqrestore(&r->producer_lock, flags); + + return ret; +} + +static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr) +{ + int ret; + + spin_lock_bh(&r->producer_lock); + ret = __ptr_ring_produce(r, ptr); + spin_unlock_bh(&r->producer_lock); + + return ret; +} + +static inline void *__ptr_ring_peek(struct ptr_ring *r) +{ + if (likely(r->size)) + return READ_ONCE(r->queue[r->consumer_head]); + return NULL; +} + +/* + * Test ring empty status without taking any locks. + * + * NB: This is only safe to call if ring is never resized. + * + * However, if some other CPU consumes ring entries at the same time, the value + * returned is not guaranteed to be correct. + * + * In this case - to avoid incorrectly detecting the ring + * as empty - the CPU consuming the ring entries is responsible + * for either consuming all ring entries until the ring is empty, + * or synchronizing with some other CPU and causing it to + * re-test __ptr_ring_empty and/or consume the ring enteries + * after the synchronization point. + * + * Note: callers invoking this in a loop must use a compiler barrier, + * for example cpu_relax(). + */ +static inline bool __ptr_ring_empty(struct ptr_ring *r) +{ + if (likely(r->size)) + return !r->queue[READ_ONCE(r->consumer_head)]; + return true; +} + +static inline bool ptr_ring_empty(struct ptr_ring *r) +{ + bool ret; + + spin_lock(&r->consumer_lock); + ret = __ptr_ring_empty(r); + spin_unlock(&r->consumer_lock); + + return ret; +} + +static inline bool ptr_ring_empty_irq(struct ptr_ring *r) +{ + bool ret; + + spin_lock_irq(&r->consumer_lock); + ret = __ptr_ring_empty(r); + spin_unlock_irq(&r->consumer_lock); + + return ret; +} + +static inline bool ptr_ring_empty_any(struct ptr_ring *r) +{ + unsigned long flags; + bool ret; + + spin_lock_irqsave(&r->consumer_lock, flags); + ret = __ptr_ring_empty(r); + spin_unlock_irqrestore(&r->consumer_lock, flags); + + return ret; +} + +static inline bool ptr_ring_empty_bh(struct ptr_ring *r) +{ + bool ret; + + spin_lock_bh(&r->consumer_lock); + ret = __ptr_ring_empty(r); + spin_unlock_bh(&r->consumer_lock); + + return ret; +} + +/* Must only be called after __ptr_ring_peek returned !NULL */ +static inline void __ptr_ring_discard_one(struct ptr_ring *r) +{ + /* Fundamentally, what we want to do is update consumer + * index and zero out the entry so producer can reuse it. + * Doing it naively at each consume would be as simple as: + * consumer = r->consumer; + * r->queue[consumer++] = NULL; + * if (unlikely(consumer >= r->size)) + * consumer = 0; + * r->consumer = consumer; + * but that is suboptimal when the ring is full as producer is writing + * out new entries in the same cache line. Defer these updates until a + * batch of entries has been consumed. + */ + /* Note: we must keep consumer_head valid at all times for __ptr_ring_empty + * to work correctly. + */ + int consumer_head = r->consumer_head; + int head = consumer_head++; + + /* Once we have processed enough entries invalidate them in + * the ring all at once so producer can reuse their space in the ring. + * We also do this when we reach end of the ring - not mandatory + * but helps keep the implementation simple. + */ + if (unlikely(consumer_head - r->consumer_tail >= r->batch || + consumer_head >= r->size)) { + /* Zero out entries in the reverse order: this way we touch the + * cache line that producer might currently be reading the last; + * producer won't make progress and touch other cache lines + * besides the first one until we write out all entries. + */ + while (likely(head >= r->consumer_tail)) + r->queue[head--] = NULL; + r->consumer_tail = consumer_head; + } + if (unlikely(consumer_head >= r->size)) { + consumer_head = 0; + r->consumer_tail = 0; + } + /* matching READ_ONCE in __ptr_ring_empty for lockless tests */ + WRITE_ONCE(r->consumer_head, consumer_head); +} + +static inline void *__ptr_ring_consume(struct ptr_ring *r) +{ + void *ptr; + + ptr = __ptr_ring_peek(r); + if (ptr) + __ptr_ring_discard_one(r); +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0) + /* The READ_ONCE in __ptr_ring_peek doesn't imply a barrier on old kernels. */ + smp_read_barrier_depends(); +#endif + + return ptr; +} + +static inline int __ptr_ring_consume_batched(struct ptr_ring *r, + void **array, int n) +{ + void *ptr; + int i; + + for (i = 0; i < n; i++) { + ptr = __ptr_ring_consume(r); + if (!ptr) + break; + array[i] = ptr; + } + + return i; +} + +/* + * Note: resize (below) nests producer lock within consumer lock, so if you + * call this in interrupt or BH context, you must disable interrupts/BH when + * producing. + */ +static inline void *ptr_ring_consume(struct ptr_ring *r) +{ + void *ptr; + + spin_lock(&r->consumer_lock); + ptr = __ptr_ring_consume(r); + spin_unlock(&r->consumer_lock); + + return ptr; +} + +static inline void *ptr_ring_consume_irq(struct ptr_ring *r) +{ + void *ptr; + + spin_lock_irq(&r->consumer_lock); + ptr = __ptr_ring_consume(r); + spin_unlock_irq(&r->consumer_lock); + + return ptr; +} + +static inline void *ptr_ring_consume_any(struct ptr_ring *r) +{ + unsigned long flags; + void *ptr; + + spin_lock_irqsave(&r->consumer_lock, flags); + ptr = __ptr_ring_consume(r); + spin_unlock_irqrestore(&r->consumer_lock, flags); + + return ptr; +} + +static inline void *ptr_ring_consume_bh(struct ptr_ring *r) +{ + void *ptr; + + spin_lock_bh(&r->consumer_lock); + ptr = __ptr_ring_consume(r); + spin_unlock_bh(&r->consumer_lock); + + return ptr; +} + +static inline int ptr_ring_consume_batched(struct ptr_ring *r, + void **array, int n) +{ + int ret; + + spin_lock(&r->consumer_lock); + ret = __ptr_ring_consume_batched(r, array, n); + spin_unlock(&r->consumer_lock); + + return ret; +} + +static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r, + void **array, int n) +{ + int ret; + + spin_lock_irq(&r->consumer_lock); + ret = __ptr_ring_consume_batched(r, array, n); + spin_unlock_irq(&r->consumer_lock); + + return ret; +} + +static inline int ptr_ring_consume_batched_any(struct ptr_ring *r, + void **array, int n) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&r->consumer_lock, flags); + ret = __ptr_ring_consume_batched(r, array, n); + spin_unlock_irqrestore(&r->consumer_lock, flags); + + return ret; +} + +static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r, + void **array, int n) +{ + int ret; + + spin_lock_bh(&r->consumer_lock); + ret = __ptr_ring_consume_batched(r, array, n); + spin_unlock_bh(&r->consumer_lock); + + return ret; +} + +/* Cast to structure type and call a function without discarding from FIFO. + * Function must return a value. + * Callers must take consumer_lock. + */ +#define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r))) + +#define PTR_RING_PEEK_CALL(r, f) ({ \ + typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ + \ + spin_lock(&(r)->consumer_lock); \ + __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ + spin_unlock(&(r)->consumer_lock); \ + __PTR_RING_PEEK_CALL_v; \ +}) + +#define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \ + typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ + \ + spin_lock_irq(&(r)->consumer_lock); \ + __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ + spin_unlock_irq(&(r)->consumer_lock); \ + __PTR_RING_PEEK_CALL_v; \ +}) + +#define PTR_RING_PEEK_CALL_BH(r, f) ({ \ + typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ + \ + spin_lock_bh(&(r)->consumer_lock); \ + __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ + spin_unlock_bh(&(r)->consumer_lock); \ + __PTR_RING_PEEK_CALL_v; \ +}) + +#define PTR_RING_PEEK_CALL_ANY(r, f) ({ \ + typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ + unsigned long __PTR_RING_PEEK_CALL_f;\ + \ + spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \ + __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ + spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \ + __PTR_RING_PEEK_CALL_v; \ +}) + +/* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See + * documentation for vmalloc for which of them are legal. + */ +static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp) +{ + if (size > KMALLOC_MAX_SIZE / sizeof(void *)) + return NULL; + return kvmalloc(size * sizeof(void *), gfp | __GFP_ZERO); +} + +static inline void __ptr_ring_set_size(struct ptr_ring *r, int size) +{ + r->size = size; + r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue)); + /* We need to set batch at least to 1 to make logic + * in __ptr_ring_discard_one work correctly. + * Batching too much (because ring is small) would cause a lot of + * burstiness. Needs tuning, for now disable batching. + */ + if (r->batch > r->size / 2 || !r->batch) + r->batch = 1; +} + +static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) +{ + r->queue = __ptr_ring_init_queue_alloc(size, gfp); + if (!r->queue) + return -ENOMEM; + + __ptr_ring_set_size(r, size); + r->producer = r->consumer_head = r->consumer_tail = 0; + spin_lock_init(&r->producer_lock); + spin_lock_init(&r->consumer_lock); + + return 0; +} + +/* + * Return entries into ring. Destroy entries that don't fit. + * + * Note: this is expected to be a rare slow path operation. + * + * Note: producer lock is nested within consumer lock, so if you + * resize you must make sure all uses nest correctly. + * In particular if you consume ring in interrupt or BH context, you must + * disable interrupts/BH when doing so. + */ +static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n, + void (*destroy)(void *)) +{ + unsigned long flags; + int head; + + spin_lock_irqsave(&r->consumer_lock, flags); + spin_lock(&r->producer_lock); + + if (!r->size) + goto done; + + /* + * Clean out buffered entries (for simplicity). This way following code + * can test entries for NULL and if not assume they are valid. + */ + head = r->consumer_head - 1; + while (likely(head >= r->consumer_tail)) + r->queue[head--] = NULL; + r->consumer_tail = r->consumer_head; + + /* + * Go over entries in batch, start moving head back and copy entries. + * Stop when we run into previously unconsumed entries. + */ + while (n) { + head = r->consumer_head - 1; + if (head < 0) + head = r->size - 1; + if (r->queue[head]) { + /* This batch entry will have to be destroyed. */ + goto done; + } + r->queue[head] = batch[--n]; + r->consumer_tail = head; + /* matching READ_ONCE in __ptr_ring_empty for lockless tests */ + WRITE_ONCE(r->consumer_head, head); + } + +done: + /* Destroy all entries left in the batch. */ + while (n) + destroy(batch[--n]); + spin_unlock(&r->producer_lock); + spin_unlock_irqrestore(&r->consumer_lock, flags); +} + +static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue, + int size, gfp_t gfp, + void (*destroy)(void *)) +{ + int producer = 0; + void **old; + void *ptr; + + while ((ptr = __ptr_ring_consume(r))) + if (producer < size) + queue[producer++] = ptr; + else if (destroy) + destroy(ptr); + + if (producer >= size) + producer = 0; + __ptr_ring_set_size(r, size); + r->producer = producer; + r->consumer_head = 0; + r->consumer_tail = 0; + old = r->queue; + r->queue = queue; + + return old; +} + +/* + * Note: producer lock is nested within consumer lock, so if you + * resize you must make sure all uses nest correctly. + * In particular if you consume ring in interrupt or BH context, you must + * disable interrupts/BH when doing so. + */ +static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, + void (*destroy)(void *)) +{ + unsigned long flags; + void **queue = __ptr_ring_init_queue_alloc(size, gfp); + void **old; + + if (!queue) + return -ENOMEM; + + spin_lock_irqsave(&(r)->consumer_lock, flags); + spin_lock(&(r)->producer_lock); + + old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy); + + spin_unlock(&(r)->producer_lock); + spin_unlock_irqrestore(&(r)->consumer_lock, flags); + + kvfree(old); + + return 0; +} + +/* + * Note: producer lock is nested within consumer lock, so if you + * resize you must make sure all uses nest correctly. + * In particular if you consume ring in interrupt or BH context, you must + * disable interrupts/BH when doing so. + */ +static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, + unsigned int nrings, + int size, + gfp_t gfp, void (*destroy)(void *)) +{ + unsigned long flags; + void ***queues; + int i; + + queues = kmalloc_array(nrings, sizeof(*queues), gfp); + if (!queues) + goto noqueues; + + for (i = 0; i < nrings; ++i) { + queues[i] = __ptr_ring_init_queue_alloc(size, gfp); + if (!queues[i]) + goto nomem; + } + + for (i = 0; i < nrings; ++i) { + spin_lock_irqsave(&(rings[i])->consumer_lock, flags); + spin_lock(&(rings[i])->producer_lock); + queues[i] = __ptr_ring_swap_queue(rings[i], queues[i], + size, gfp, destroy); + spin_unlock(&(rings[i])->producer_lock); + spin_unlock_irqrestore(&(rings[i])->consumer_lock, flags); + } + + for (i = 0; i < nrings; ++i) + kvfree(queues[i]); + + kfree(queues); + + return 0; + +nomem: + while (--i >= 0) + kvfree(queues[i]); + + kfree(queues); + +noqueues: + return -ENOMEM; +} + +static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *)) +{ + void *ptr; + + if (destroy) + while ((ptr = ptr_ring_consume(r))) + destroy(ptr); + kvfree(r->queue); +} + +#endif /* _LINUX_PTR_RING_H */ diff --git a/net/wireguard/compat/simd-asm/include/asm/simd.h b/net/wireguard/compat/simd-asm/include/asm/simd.h new file mode 100644 index 000000000000..a975b38b5578 --- /dev/null +++ b/net/wireguard/compat/simd-asm/include/asm/simd.h @@ -0,0 +1,21 @@ +#ifndef _COMPAT_ASM_SIMD_H +#define _COMPAT_ASM_SIMD_H + +#if defined(CONFIG_X86_64) +#include +#endif + +static __must_check inline bool may_use_simd(void) +{ +#if defined(CONFIG_X86_64) + return irq_fpu_usable(); +#elif defined(CONFIG_ARM64) && defined(CONFIG_KERNEL_MODE_NEON) + return true; +#elif defined(CONFIG_ARM) && defined(CONFIG_KERNEL_MODE_NEON) + return !in_nmi() && !in_irq() && !in_serving_softirq(); +#else + return false; +#endif +} + +#endif diff --git a/net/wireguard/compat/simd/include/linux/simd.h b/net/wireguard/compat/simd/include/linux/simd.h new file mode 100644 index 000000000000..e7f2550320c7 --- /dev/null +++ b/net/wireguard/compat/simd/include/linux/simd.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_SIMD_H +#define _WG_SIMD_H + +#include +#include +#if defined(CONFIG_X86_64) +#include +#elif defined(CONFIG_KERNEL_MODE_NEON) +#include +#endif + +typedef enum { + HAVE_NO_SIMD = 1 << 0, + HAVE_FULL_SIMD = 1 << 1, + HAVE_SIMD_IN_USE = 1 << 31 +} simd_context_t; + +#define DONT_USE_SIMD ((simd_context_t []){ HAVE_NO_SIMD }) + +static inline void simd_get(simd_context_t *ctx) +{ + *ctx = !IS_ENABLED(CONFIG_PREEMPT_RT) && !IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && may_use_simd() ? HAVE_FULL_SIMD : HAVE_NO_SIMD; +} + +static inline void simd_put(simd_context_t *ctx) +{ +#if defined(CONFIG_X86_64) + if (*ctx & HAVE_SIMD_IN_USE) + kernel_fpu_end(); +#elif defined(CONFIG_KERNEL_MODE_NEON) + if (*ctx & HAVE_SIMD_IN_USE) + kernel_neon_end(); +#endif + *ctx = HAVE_NO_SIMD; +} + +static inline bool simd_relax(simd_context_t *ctx) +{ +#ifdef CONFIG_PREEMPT + if ((*ctx & HAVE_SIMD_IN_USE) && need_resched()) { + simd_put(ctx); + simd_get(ctx); + return true; + } +#endif + return false; +} + +static __must_check inline bool simd_use(simd_context_t *ctx) +{ + if (!(*ctx & HAVE_FULL_SIMD)) + return false; + if (*ctx & HAVE_SIMD_IN_USE) + return true; +#if defined(CONFIG_X86_64) + kernel_fpu_begin(); +#elif defined(CONFIG_KERNEL_MODE_NEON) + kernel_neon_begin(); +#endif + *ctx |= HAVE_SIMD_IN_USE; + return true; +} + +#endif /* _WG_SIMD_H */ diff --git a/net/wireguard/compat/siphash/include/linux/siphash.h b/net/wireguard/compat/siphash/include/linux/siphash.h new file mode 100644 index 000000000000..3b30b3c47778 --- /dev/null +++ b/net/wireguard/compat/siphash/include/linux/siphash.h @@ -0,0 +1,134 @@ +/* Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * + * This file is provided under a dual BSD/GPLv2 license. + * + * SipHash: a fast short-input PRF + * https://131002.net/siphash/ + * + * This implementation is specifically for SipHash2-4 for a secure PRF + * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for + * hashtables. + */ + +#ifndef _WG_LINUX_SIPHASH_H +#define _WG_LINUX_SIPHASH_H + +#include +#include + +#define SIPHASH_ALIGNMENT __alignof__(u64) +typedef struct { + u64 key[2]; +} siphash_key_t; + +u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); +u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); + +u64 siphash_1u64(const u64 a, const siphash_key_t *key); +u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key); +u64 siphash_3u64(const u64 a, const u64 b, const u64 c, + const siphash_key_t *key); +u64 siphash_4u64(const u64 a, const u64 b, const u64 c, const u64 d, + const siphash_key_t *key); +u64 siphash_1u32(const u32 a, const siphash_key_t *key); +u64 siphash_3u32(const u32 a, const u32 b, const u32 c, + const siphash_key_t *key); + +static inline u64 siphash_2u32(const u32 a, const u32 b, + const siphash_key_t *key) +{ + return siphash_1u64((u64)b << 32 | a, key); +} +static inline u64 siphash_4u32(const u32 a, const u32 b, const u32 c, + const u32 d, const siphash_key_t *key) +{ + return siphash_2u64((u64)b << 32 | a, (u64)d << 32 | c, key); +} + + +static inline u64 ___siphash_aligned(const __le64 *data, size_t len, + const siphash_key_t *key) +{ + if (__builtin_constant_p(len) && len == 4) + return siphash_1u32(le32_to_cpup((const __le32 *)data), key); + if (__builtin_constant_p(len) && len == 8) + return siphash_1u64(le64_to_cpu(data[0]), key); + if (__builtin_constant_p(len) && len == 16) + return siphash_2u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), + key); + if (__builtin_constant_p(len) && len == 24) + return siphash_3u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), + le64_to_cpu(data[2]), key); + if (__builtin_constant_p(len) && len == 32) + return siphash_4u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), + le64_to_cpu(data[2]), le64_to_cpu(data[3]), + key); + return __siphash_aligned(data, len, key); +} + +/** + * siphash - compute 64-bit siphash PRF value + * @data: buffer to hash + * @size: size of @data + * @key: the siphash key + */ +static inline u64 siphash(const void *data, size_t len, + const siphash_key_t *key) +{ + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + !IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) + return __siphash_unaligned(data, len, key); + return ___siphash_aligned(data, len, key); +} + +#define HSIPHASH_ALIGNMENT __alignof__(unsigned long) +typedef struct { + unsigned long key[2]; +} hsiphash_key_t; + +u32 __hsiphash_aligned(const void *data, size_t len, + const hsiphash_key_t *key); +u32 __hsiphash_unaligned(const void *data, size_t len, + const hsiphash_key_t *key); + +u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key); +u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key); +u32 hsiphash_3u32(const u32 a, const u32 b, const u32 c, + const hsiphash_key_t *key); +u32 hsiphash_4u32(const u32 a, const u32 b, const u32 c, const u32 d, + const hsiphash_key_t *key); + +static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len, + const hsiphash_key_t *key) +{ + if (__builtin_constant_p(len) && len == 4) + return hsiphash_1u32(le32_to_cpu(data[0]), key); + if (__builtin_constant_p(len) && len == 8) + return hsiphash_2u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), + key); + if (__builtin_constant_p(len) && len == 12) + return hsiphash_3u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), + le32_to_cpu(data[2]), key); + if (__builtin_constant_p(len) && len == 16) + return hsiphash_4u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), + le32_to_cpu(data[2]), le32_to_cpu(data[3]), + key); + return __hsiphash_aligned(data, len, key); +} + +/** + * hsiphash - compute 32-bit hsiphash PRF value + * @data: buffer to hash + * @size: size of @data + * @key: the hsiphash key + */ +static inline u32 hsiphash(const void *data, size_t len, + const hsiphash_key_t *key) +{ + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + !IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) + return __hsiphash_unaligned(data, len, key); + return ___hsiphash_aligned(data, len, key); +} + +#endif /* _WG_LINUX_SIPHASH_H */ diff --git a/net/wireguard/compat/siphash/siphash.c b/net/wireguard/compat/siphash/siphash.c new file mode 100644 index 000000000000..7dc72cb4a710 --- /dev/null +++ b/net/wireguard/compat/siphash/siphash.c @@ -0,0 +1,539 @@ +/* Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * + * This file is provided under a dual BSD/GPLv2 license. + * + * SipHash: a fast short-input PRF + * https://131002.net/siphash/ + * + * This implementation is specifically for SipHash2-4 for a secure PRF + * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for + * hashtables. + */ + +#include +#include + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) +#ifdef __LITTLE_ENDIAN +#define bytemask_from_count(cnt) (~(~0ul << (cnt)*8)) +#else +#define bytemask_from_count(cnt) (~(~0ul >> (cnt)*8)) +#endif +#endif + +#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 +#include +#include +#endif + +#define SIPROUND \ + do { \ + v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32); \ + v2 += v3; v3 = rol64(v3, 16); v3 ^= v2; \ + v0 += v3; v3 = rol64(v3, 21); v3 ^= v0; \ + v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32); \ + } while (0) + +#define PREAMBLE(len) \ + u64 v0 = 0x736f6d6570736575ULL; \ + u64 v1 = 0x646f72616e646f6dULL; \ + u64 v2 = 0x6c7967656e657261ULL; \ + u64 v3 = 0x7465646279746573ULL; \ + u64 b = ((u64)(len)) << 56; \ + v3 ^= key->key[1]; \ + v2 ^= key->key[0]; \ + v1 ^= key->key[1]; \ + v0 ^= key->key[0]; + +#define POSTAMBLE \ + v3 ^= b; \ + SIPROUND; \ + SIPROUND; \ + v0 ^= b; \ + v2 ^= 0xff; \ + SIPROUND; \ + SIPROUND; \ + SIPROUND; \ + SIPROUND; \ + return (v0 ^ v1) ^ (v2 ^ v3); + +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key) +{ + const u8 *end = data + len - (len % sizeof(u64)); + const u8 left = len & (sizeof(u64) - 1); + u64 m; + PREAMBLE(len) + for (; data != end; data += sizeof(u64)) { + m = le64_to_cpup(data); + v3 ^= m; + SIPROUND; + SIPROUND; + v0 ^= m; + } +#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 + if (left) + b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) & + bytemask_from_count(left))); +#else + switch (left) { + case 7: b |= ((u64)end[6]) << 48; fallthrough; + case 6: b |= ((u64)end[5]) << 40; fallthrough; + case 5: b |= ((u64)end[4]) << 32; fallthrough; + case 4: b |= le32_to_cpup(data); break; + case 3: b |= ((u64)end[2]) << 16; fallthrough; + case 2: b |= le16_to_cpup(data); break; + case 1: b |= end[0]; + } +#endif + POSTAMBLE +} +#endif + +u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key) +{ + const u8 *end = data + len - (len % sizeof(u64)); + const u8 left = len & (sizeof(u64) - 1); + u64 m; + PREAMBLE(len) + for (; data != end; data += sizeof(u64)) { + m = get_unaligned_le64(data); + v3 ^= m; + SIPROUND; + SIPROUND; + v0 ^= m; + } +#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 + if (left) + b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) & + bytemask_from_count(left))); +#else + switch (left) { + case 7: b |= ((u64)end[6]) << 48; fallthrough; + case 6: b |= ((u64)end[5]) << 40; fallthrough; + case 5: b |= ((u64)end[4]) << 32; fallthrough; + case 4: b |= get_unaligned_le32(end); break; + case 3: b |= ((u64)end[2]) << 16; fallthrough; + case 2: b |= get_unaligned_le16(end); break; + case 1: b |= end[0]; + } +#endif + POSTAMBLE +} + +/** + * siphash_1u64 - compute 64-bit siphash PRF value of a u64 + * @first: first u64 + * @key: the siphash key + */ +u64 siphash_1u64(const u64 first, const siphash_key_t *key) +{ + PREAMBLE(8) + v3 ^= first; + SIPROUND; + SIPROUND; + v0 ^= first; + POSTAMBLE +} + +/** + * siphash_2u64 - compute 64-bit siphash PRF value of 2 u64 + * @first: first u64 + * @second: second u64 + * @key: the siphash key + */ +u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t *key) +{ + PREAMBLE(16) + v3 ^= first; + SIPROUND; + SIPROUND; + v0 ^= first; + v3 ^= second; + SIPROUND; + SIPROUND; + v0 ^= second; + POSTAMBLE +} + +/** + * siphash_3u64 - compute 64-bit siphash PRF value of 3 u64 + * @first: first u64 + * @second: second u64 + * @third: third u64 + * @key: the siphash key + */ +u64 siphash_3u64(const u64 first, const u64 second, const u64 third, + const siphash_key_t *key) +{ + PREAMBLE(24) + v3 ^= first; + SIPROUND; + SIPROUND; + v0 ^= first; + v3 ^= second; + SIPROUND; + SIPROUND; + v0 ^= second; + v3 ^= third; + SIPROUND; + SIPROUND; + v0 ^= third; + POSTAMBLE +} + +/** + * siphash_4u64 - compute 64-bit siphash PRF value of 4 u64 + * @first: first u64 + * @second: second u64 + * @third: third u64 + * @forth: forth u64 + * @key: the siphash key + */ +u64 siphash_4u64(const u64 first, const u64 second, const u64 third, + const u64 forth, const siphash_key_t *key) +{ + PREAMBLE(32) + v3 ^= first; + SIPROUND; + SIPROUND; + v0 ^= first; + v3 ^= second; + SIPROUND; + SIPROUND; + v0 ^= second; + v3 ^= third; + SIPROUND; + SIPROUND; + v0 ^= third; + v3 ^= forth; + SIPROUND; + SIPROUND; + v0 ^= forth; + POSTAMBLE +} + +u64 siphash_1u32(const u32 first, const siphash_key_t *key) +{ + PREAMBLE(4) + b |= first; + POSTAMBLE +} + +u64 siphash_3u32(const u32 first, const u32 second, const u32 third, + const siphash_key_t *key) +{ + u64 combined = (u64)second << 32 | first; + PREAMBLE(12) + v3 ^= combined; + SIPROUND; + SIPROUND; + v0 ^= combined; + b |= third; + POSTAMBLE +} + +#if BITS_PER_LONG == 64 +/* Note that on 64-bit, we make HalfSipHash1-3 actually be SipHash1-3, for + * performance reasons. On 32-bit, below, we actually implement HalfSipHash1-3. + */ + +#define HSIPROUND SIPROUND +#define HPREAMBLE(len) PREAMBLE(len) +#define HPOSTAMBLE \ + v3 ^= b; \ + HSIPROUND; \ + v0 ^= b; \ + v2 ^= 0xff; \ + HSIPROUND; \ + HSIPROUND; \ + HSIPROUND; \ + return (v0 ^ v1) ^ (v2 ^ v3); + +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) +{ + const u8 *end = data + len - (len % sizeof(u64)); + const u8 left = len & (sizeof(u64) - 1); + u64 m; + HPREAMBLE(len) + for (; data != end; data += sizeof(u64)) { + m = le64_to_cpup(data); + v3 ^= m; + HSIPROUND; + v0 ^= m; + } +#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 + if (left) + b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) & + bytemask_from_count(left))); +#else + switch (left) { + case 7: b |= ((u64)end[6]) << 48; fallthrough; + case 6: b |= ((u64)end[5]) << 40; fallthrough; + case 5: b |= ((u64)end[4]) << 32; fallthrough; + case 4: b |= le32_to_cpup(data); break; + case 3: b |= ((u64)end[2]) << 16; fallthrough; + case 2: b |= le16_to_cpup(data); break; + case 1: b |= end[0]; + } +#endif + HPOSTAMBLE +} +#endif + +u32 __hsiphash_unaligned(const void *data, size_t len, + const hsiphash_key_t *key) +{ + const u8 *end = data + len - (len % sizeof(u64)); + const u8 left = len & (sizeof(u64) - 1); + u64 m; + HPREAMBLE(len) + for (; data != end; data += sizeof(u64)) { + m = get_unaligned_le64(data); + v3 ^= m; + HSIPROUND; + v0 ^= m; + } +#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 + if (left) + b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) & + bytemask_from_count(left))); +#else + switch (left) { + case 7: b |= ((u64)end[6]) << 48; fallthrough; + case 6: b |= ((u64)end[5]) << 40; fallthrough; + case 5: b |= ((u64)end[4]) << 32; fallthrough; + case 4: b |= get_unaligned_le32(end); break; + case 3: b |= ((u64)end[2]) << 16; fallthrough; + case 2: b |= get_unaligned_le16(end); break; + case 1: b |= end[0]; + } +#endif + HPOSTAMBLE +} + +/** + * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32 + * @first: first u32 + * @key: the hsiphash key + */ +u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key) +{ + HPREAMBLE(4) + b |= first; + HPOSTAMBLE +} + +/** + * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32 + * @first: first u32 + * @second: second u32 + * @key: the hsiphash key + */ +u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key) +{ + u64 combined = (u64)second << 32 | first; + HPREAMBLE(8) + v3 ^= combined; + HSIPROUND; + v0 ^= combined; + HPOSTAMBLE +} + +/** + * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32 + * @first: first u32 + * @second: second u32 + * @third: third u32 + * @key: the hsiphash key + */ +u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third, + const hsiphash_key_t *key) +{ + u64 combined = (u64)second << 32 | first; + HPREAMBLE(12) + v3 ^= combined; + HSIPROUND; + v0 ^= combined; + b |= third; + HPOSTAMBLE +} + +/** + * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32 + * @first: first u32 + * @second: second u32 + * @third: third u32 + * @forth: forth u32 + * @key: the hsiphash key + */ +u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third, + const u32 forth, const hsiphash_key_t *key) +{ + u64 combined = (u64)second << 32 | first; + HPREAMBLE(16) + v3 ^= combined; + HSIPROUND; + v0 ^= combined; + combined = (u64)forth << 32 | third; + v3 ^= combined; + HSIPROUND; + v0 ^= combined; + HPOSTAMBLE +} +#else +#define HSIPROUND \ + do { \ + v0 += v1; v1 = rol32(v1, 5); v1 ^= v0; v0 = rol32(v0, 16); \ + v2 += v3; v3 = rol32(v3, 8); v3 ^= v2; \ + v0 += v3; v3 = rol32(v3, 7); v3 ^= v0; \ + v2 += v1; v1 = rol32(v1, 13); v1 ^= v2; v2 = rol32(v2, 16); \ + } while (0) + +#define HPREAMBLE(len) \ + u32 v0 = 0; \ + u32 v1 = 0; \ + u32 v2 = 0x6c796765U; \ + u32 v3 = 0x74656462U; \ + u32 b = ((u32)(len)) << 24; \ + v3 ^= key->key[1]; \ + v2 ^= key->key[0]; \ + v1 ^= key->key[1]; \ + v0 ^= key->key[0]; + +#define HPOSTAMBLE \ + v3 ^= b; \ + HSIPROUND; \ + v0 ^= b; \ + v2 ^= 0xff; \ + HSIPROUND; \ + HSIPROUND; \ + HSIPROUND; \ + return v1 ^ v3; + +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) +{ + const u8 *end = data + len - (len % sizeof(u32)); + const u8 left = len & (sizeof(u32) - 1); + u32 m; + HPREAMBLE(len) + for (; data != end; data += sizeof(u32)) { + m = le32_to_cpup(data); + v3 ^= m; + HSIPROUND; + v0 ^= m; + } + switch (left) { + case 3: b |= ((u32)end[2]) << 16; fallthrough; + case 2: b |= le16_to_cpup(data); break; + case 1: b |= end[0]; + } + HPOSTAMBLE +} +#endif + +u32 __hsiphash_unaligned(const void *data, size_t len, + const hsiphash_key_t *key) +{ + const u8 *end = data + len - (len % sizeof(u32)); + const u8 left = len & (sizeof(u32) - 1); + u32 m; + HPREAMBLE(len) + for (; data != end; data += sizeof(u32)) { + m = get_unaligned_le32(data); + v3 ^= m; + HSIPROUND; + v0 ^= m; + } + switch (left) { + case 3: b |= ((u32)end[2]) << 16; fallthrough; + case 2: b |= get_unaligned_le16(end); break; + case 1: b |= end[0]; + } + HPOSTAMBLE +} + +/** + * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32 + * @first: first u32 + * @key: the hsiphash key + */ +u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key) +{ + HPREAMBLE(4) + v3 ^= first; + HSIPROUND; + v0 ^= first; + HPOSTAMBLE +} + +/** + * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32 + * @first: first u32 + * @second: second u32 + * @key: the hsiphash key + */ +u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key) +{ + HPREAMBLE(8) + v3 ^= first; + HSIPROUND; + v0 ^= first; + v3 ^= second; + HSIPROUND; + v0 ^= second; + HPOSTAMBLE +} + +/** + * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32 + * @first: first u32 + * @second: second u32 + * @third: third u32 + * @key: the hsiphash key + */ +u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third, + const hsiphash_key_t *key) +{ + HPREAMBLE(12) + v3 ^= first; + HSIPROUND; + v0 ^= first; + v3 ^= second; + HSIPROUND; + v0 ^= second; + v3 ^= third; + HSIPROUND; + v0 ^= third; + HPOSTAMBLE +} + +/** + * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32 + * @first: first u32 + * @second: second u32 + * @third: third u32 + * @forth: forth u32 + * @key: the hsiphash key + */ +u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third, + const u32 forth, const hsiphash_key_t *key) +{ + HPREAMBLE(16) + v3 ^= first; + HSIPROUND; + v0 ^= first; + v3 ^= second; + HSIPROUND; + v0 ^= second; + v3 ^= third; + HSIPROUND; + v0 ^= third; + v3 ^= forth; + HSIPROUND; + v0 ^= forth; + HPOSTAMBLE +} +#endif diff --git a/net/wireguard/compat/skb_array/include/linux/skb_array.h b/net/wireguard/compat/skb_array/include/linux/skb_array.h new file mode 100644 index 000000000000..c91fedcdbfc6 --- /dev/null +++ b/net/wireguard/compat/skb_array/include/linux/skb_array.h @@ -0,0 +1,11 @@ +#ifndef _WG_SKB_ARRAY_H +#define _WG_SKB_ARRAY_H + +#include + +static void __skb_array_destroy_skb(void *ptr) +{ + kfree_skb(ptr); +} + +#endif diff --git a/net/wireguard/compat/udp_tunnel/include/net/udp_tunnel.h b/net/wireguard/compat/udp_tunnel/include/net/udp_tunnel.h new file mode 100644 index 000000000000..8999527d6952 --- /dev/null +++ b/net/wireguard/compat/udp_tunnel/include/net/udp_tunnel.h @@ -0,0 +1,94 @@ +#ifndef _WG_NET_UDP_TUNNEL_H +#define _WG_NET_UDP_TUNNEL_H + +#include +#include + +#if IS_ENABLED(CONFIG_IPV6) +#include +#include +#endif + +struct udp_port_cfg { + u8 family; + + /* Used only for kernel-created sockets */ + union { + struct in_addr local_ip; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr local_ip6; +#endif + }; + + union { + struct in_addr peer_ip; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr peer_ip6; +#endif + }; + + __be16 local_udp_port; + __be16 peer_udp_port; + unsigned int use_udp_checksums:1, + use_udp6_tx_checksums:1, + use_udp6_rx_checksums:1, + ipv6_v6only:1; +}; + +int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, + struct socket **sockp); + +#if IS_ENABLED(CONFIG_IPV6) +int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, + struct socket **sockp); +#else +static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, + struct socket **sockp) +{ + return 0; +} +#endif + +static inline int udp_sock_create(struct net *net, + struct udp_port_cfg *cfg, + struct socket **sockp) +{ + if (cfg->family == AF_INET) + return udp_sock_create4(net, cfg, sockp); + + if (cfg->family == AF_INET6) + return udp_sock_create6(net, cfg, sockp); + + return -EPFNOSUPPORT; +} + +typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); + +struct udp_tunnel_sock_cfg { + void *sk_user_data; + __u8 encap_type; + udp_tunnel_encap_rcv_t encap_rcv; +}; + +/* Setup the given (UDP) sock to receive UDP encapsulated packets */ +void setup_udp_tunnel_sock(struct net *net, struct socket *sock, + struct udp_tunnel_sock_cfg *sock_cfg); + +/* Transmit the skb using UDP encapsulation. */ +void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, + __be32 src, __be32 dst, __u8 tos, __u8 ttl, + __be16 df, __be16 src_port, __be16 dst_port, + bool xnet, bool nocheck); + +#if IS_ENABLED(CONFIG_IPV6) +int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, + struct net_device *dev, struct in6_addr *saddr, + struct in6_addr *daddr, + __u8 prio, __u8 ttl, __be32 label, + __be16 src_port, __be16 dst_port, bool nocheck); +#endif + +void udp_tunnel_sock_release(struct socket *sock); + +#endif /* _WG_NET_UDP_TUNNEL_H */ diff --git a/net/wireguard/compat/udp_tunnel/udp_tunnel.c b/net/wireguard/compat/udp_tunnel/udp_tunnel.c new file mode 100644 index 000000000000..d287b917be84 --- /dev/null +++ b/net/wireguard/compat/udp_tunnel/udp_tunnel.c @@ -0,0 +1,396 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) +#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) +#define rcu_dereference_sk_user_data(sk) rcu_dereference(__sk_user_data((sk))) +#define rcu_assign_sk_user_data(sk, ptr) rcu_assign_pointer(__sk_user_data((sk)), ptr) +#endif + +/* This is global so, uh, only one real call site... This is the kind of horrific hack you'd expect to see in compat code. */ +static udp_tunnel_encap_rcv_t encap_rcv = NULL; +static void __compat_sk_data_ready(struct sock *sk +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) + ,int unused_vulnerable_length_param +#endif + ) +{ + struct sk_buff *skb; + while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { + skb_orphan(skb); + sk_mem_reclaim(sk); + encap_rcv(sk, skb); + } +} + +int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, + struct socket **sockp) +{ + int err; + struct socket *sock = NULL; + struct sockaddr_in udp_addr; + + err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock); + if (err < 0) + goto error; + sk_change_net(sock->sk, net); + + udp_addr.sin_family = AF_INET; + udp_addr.sin_addr = cfg->local_ip; + udp_addr.sin_port = cfg->local_udp_port; + err = kernel_bind(sock, (struct sockaddr *)&udp_addr, + sizeof(udp_addr)); + if (err < 0) + goto error; + + if (cfg->peer_udp_port) { + udp_addr.sin_family = AF_INET; + udp_addr.sin_addr = cfg->peer_ip; + udp_addr.sin_port = cfg->peer_udp_port; + err = kernel_connect(sock, (struct sockaddr *)&udp_addr, + sizeof(udp_addr), 0); + if (err < 0) + goto error; + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0) + sock->sk->sk_no_check = !cfg->use_udp_checksums; +#else + sock->sk->sk_no_check_tx = !cfg->use_udp_checksums; +#endif + + *sockp = sock; + return 0; + +error: + if (sock) { + kernel_sock_shutdown(sock, SHUT_RDWR); + sk_release_kernel(sock->sk); + } + *sockp = NULL; + return err; +} + +void setup_udp_tunnel_sock(struct net *net, struct socket *sock, + struct udp_tunnel_sock_cfg *cfg) +{ + inet_sk(sock->sk)->mc_loop = 0; + encap_rcv = cfg->encap_rcv; + rcu_assign_sk_user_data(sock->sk, cfg->sk_user_data); + /* We force the cast in this awful way, due to various Android kernels + * backporting things stupidly. */ + *(void **)&sock->sk->sk_data_ready = (void *)__compat_sk_data_ready; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0) +static inline __sum16 udp_v4_check(int len, __be32 saddr, + __be32 daddr, __wsum base) +{ + return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base); +} + +static void udp_set_csum(bool nocheck, struct sk_buff *skb, + __be32 saddr, __be32 daddr, int len) +{ + struct udphdr *uh = udp_hdr(skb); + + if (nocheck) + uh->check = 0; + else if (skb_is_gso(skb)) + uh->check = ~udp_v4_check(len, saddr, daddr, 0); + else if (skb_dst(skb) && skb_dst(skb)->dev && + (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) { + + BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); + + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + uh->check = ~udp_v4_check(len, saddr, daddr, 0); + } else { + __wsum csum; + + BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); + + uh->check = 0; + csum = skb_checksum(skb, 0, len, 0); + uh->check = udp_v4_check(len, saddr, daddr, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + } +} + +#endif + +static void __compat_fake_destructor(struct sk_buff *skb) +{ +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0) +static void __compat_iptunnel_xmit(struct rtable *rt, struct sk_buff *skb, + __be32 src, __be32 dst, __u8 proto, + __u8 tos, __u8 ttl, __be16 df, bool xnet) +{ + struct iphdr *iph; + struct pcpu_tstats *tstats = this_cpu_ptr(skb->dev->tstats); + + skb_scrub_packet(skb, xnet); + + skb->rxhash = 0; + skb_dst_set(skb, &rt->dst); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + + /* Push down and install the IP header. */ + skb_push(skb, sizeof(struct iphdr)); + skb_reset_network_header(skb); + + iph = ip_hdr(skb); + + iph->version = 4; + iph->ihl = sizeof(struct iphdr) >> 2; + iph->frag_off = df; + iph->protocol = proto; + iph->tos = tos; + iph->daddr = dst; + iph->saddr = src; + iph->ttl = ttl; +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 53) + __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1); +#else + __ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1); +#endif + + iptunnel_xmit(skb, skb->dev); + u64_stats_update_begin(&tstats->syncp); + tstats->tx_bytes -= 8; + u64_stats_update_end(&tstats->syncp); +} +#define iptunnel_xmit __compat_iptunnel_xmit +#endif + +void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, + __be32 src, __be32 dst, __u8 tos, __u8 ttl, + __be16 df, __be16 src_port, __be16 dst_port, + bool xnet, bool nocheck) +{ + struct udphdr *uh; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0) + struct net_device *dev = skb->dev; + int ret; +#endif + + __skb_push(skb, sizeof(*uh)); + skb_reset_transport_header(skb); + uh = udp_hdr(skb); + + uh->dest = dst_port; + uh->source = src_port; + uh->len = htons(skb->len); + + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + + udp_set_csum(nocheck, skb, src, dst, skb->len); + + if (!skb->sk) + skb->sk = sk; + if (!skb->destructor) + skb->destructor = __compat_fake_destructor; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0) + ret = +#endif + iptunnel_xmit( +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0) + sk, +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0) + dev_net(dev), +#endif + rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0) + , xnet +#endif + ); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0) + if (ret) + iptunnel_xmit_stats(ret - 8, &dev->stats, dev->tstats); +#endif +} + +void udp_tunnel_sock_release(struct socket *sock) +{ + rcu_assign_sk_user_data(sock->sk, NULL); + kernel_sock_shutdown(sock, SHUT_RDWR); + sk_release_kernel(sock->sk); +} + +#if IS_ENABLED(CONFIG_IPV6) +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, + struct socket **sockp) +{ + struct sockaddr_in6 udp6_addr; + int err; + struct socket *sock = NULL; + + err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock); + if (err < 0) + goto error; + sk_change_net(sock->sk, net); + + if (cfg->ipv6_v6only) { + int val = 1; + + err = kernel_setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, + (char *) &val, sizeof(val)); + if (err < 0) + goto error; + } + + udp6_addr.sin6_family = AF_INET6; + memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6, + sizeof(udp6_addr.sin6_addr)); + udp6_addr.sin6_port = cfg->local_udp_port; + err = kernel_bind(sock, (struct sockaddr *)&udp6_addr, + sizeof(udp6_addr)); + if (err < 0) + goto error; + + if (cfg->peer_udp_port) { + udp6_addr.sin6_family = AF_INET6; + memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6, + sizeof(udp6_addr.sin6_addr)); + udp6_addr.sin6_port = cfg->peer_udp_port; + err = kernel_connect(sock, + (struct sockaddr *)&udp6_addr, + sizeof(udp6_addr), 0); + } + if (err < 0) + goto error; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0) + sock->sk->sk_no_check = !cfg->use_udp_checksums; +#else + udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums); + udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums); +#endif + + *sockp = sock; + return 0; + +error: + if (sock) { + kernel_sock_shutdown(sock, SHUT_RDWR); + sk_release_kernel(sock->sk); + } + *sockp = NULL; + return err; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0) +static inline __sum16 udp_v6_check(int len, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + __wsum base) +{ + return csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, base); +} +static void udp6_set_csum(bool nocheck, struct sk_buff *skb, + const struct in6_addr *saddr, + const struct in6_addr *daddr, int len) +{ + struct udphdr *uh = udp_hdr(skb); + + if (nocheck) + uh->check = 0; + else if (skb_is_gso(skb)) + uh->check = ~udp_v6_check(len, saddr, daddr, 0); + else if (skb_dst(skb) && skb_dst(skb)->dev && + (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) { + + BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); + + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + uh->check = ~udp_v6_check(len, saddr, daddr, 0); + } else { + __wsum csum; + + BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); + + uh->check = 0; + csum = skb_checksum(skb, 0, len, 0); + uh->check = udp_v6_check(len, saddr, daddr, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + } +} +#endif + +int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, + struct net_device *dev, struct in6_addr *saddr, + struct in6_addr *daddr, + __u8 prio, __u8 ttl, __be32 label, + __be16 src_port, __be16 dst_port, bool nocheck) +{ + struct udphdr *uh; + struct ipv6hdr *ip6h; + + __skb_push(skb, sizeof(*uh)); + skb_reset_transport_header(skb); + uh = udp_hdr(skb); + + uh->dest = dst_port; + uh->source = src_port; + + uh->len = htons(skb->len); + + skb_dst_set(skb, dst); + + udp6_set_csum(nocheck, skb, saddr, daddr, skb->len); + + __skb_push(skb, sizeof(*ip6h)); + skb_reset_network_header(skb); + ip6h = ipv6_hdr(skb); + ip6_flow_hdr(ip6h, prio, label); + ip6h->payload_len = htons(skb->len); + ip6h->nexthdr = IPPROTO_UDP; + ip6h->hop_limit = ttl; + ip6h->daddr = *daddr; + ip6h->saddr = *saddr; + + if (!skb->sk) + skb->sk = sk; + if (!skb->destructor) + skb->destructor = __compat_fake_destructor; + + ip6tunnel_xmit(skb, dev); + return 0; +} +#endif diff --git a/net/wireguard/compat/udp_tunnel/udp_tunnel_partial_compat.h b/net/wireguard/compat/udp_tunnel/udp_tunnel_partial_compat.h new file mode 100644 index 000000000000..0605896e902f --- /dev/null +++ b/net/wireguard/compat/udp_tunnel/udp_tunnel_partial_compat.h @@ -0,0 +1,226 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0) +#define udp_sock_create4 udp_sock_create +#define udp_sock_create6 udp_sock_create +#include +#include +#include +#include +#include +#include +#if IS_ENABLED(CONFIG_IPV6) +#include +#include +#include +#include +#include +#endif +static inline void __compat_fake_destructor(struct sk_buff *skb) +{ +} +typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); +struct udp_tunnel_sock_cfg { + void *sk_user_data; + __u8 encap_type; + udp_tunnel_encap_rcv_t encap_rcv; +}; +/* This is global so, uh, only one real call site... This is the kind of horrific hack you'd expect to see in compat code. */ +static udp_tunnel_encap_rcv_t encap_rcv = NULL; +static void __compat_sk_data_ready(struct sock *sk) +{ + struct sk_buff *skb; + while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { + skb_orphan(skb); + sk_mem_reclaim(sk); + encap_rcv(sk, skb); + } +} +static inline void setup_udp_tunnel_sock(struct net *net, struct socket *sock, + struct udp_tunnel_sock_cfg *cfg) +{ + struct sock *sk = sock->sk; + inet_sk(sk)->mc_loop = 0; + encap_rcv = cfg->encap_rcv; + rcu_assign_sk_user_data(sk, cfg->sk_user_data); + sk->sk_data_ready = __compat_sk_data_ready; +} +static inline void udp_tunnel_sock_release(struct socket *sock) +{ + rcu_assign_sk_user_data(sock->sk, NULL); + kernel_sock_shutdown(sock, SHUT_RDWR); + sk_release_kernel(sock->sk); +} +static inline int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt, + struct sk_buff *skb, __be32 src, __be32 dst, + __u8 tos, __u8 ttl, __be16 df, __be16 src_port, + __be16 dst_port, bool xnet) +{ + struct udphdr *uh; + __skb_push(skb, sizeof(*uh)); + skb_reset_transport_header(skb); + uh = udp_hdr(skb); + uh->dest = dst_port; + uh->source = src_port; + uh->len = htons(skb->len); + udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len); + return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP, + tos, ttl, df, xnet); +} +#if IS_ENABLED(CONFIG_IPV6) +static inline int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst, + struct sk_buff *skb, struct net_device *dev, + struct in6_addr *saddr, struct in6_addr *daddr, + __u8 prio, __u8 ttl, __be16 src_port, + __be16 dst_port) +{ + struct udphdr *uh; + struct ipv6hdr *ip6h; + struct sock *sk = sock->sk; + __skb_push(skb, sizeof(*uh)); + skb_reset_transport_header(skb); + uh = udp_hdr(skb); + uh->dest = dst_port; + uh->source = src_port; + uh->len = htons(skb->len); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED + | IPSKB_REROUTED); + skb_dst_set(skb, dst); + udp6_set_csum(udp_get_no_check6_tx(sk), skb, &inet6_sk(sk)->saddr, + &sk->sk_v6_daddr, skb->len); + __skb_push(skb, sizeof(*ip6h)); + skb_reset_network_header(skb); + ip6h = ipv6_hdr(skb); + ip6_flow_hdr(ip6h, prio, htonl(0)); + ip6h->payload_len = htons(skb->len); + ip6h->nexthdr = IPPROTO_UDP; + ip6h->hop_limit = ttl; + ip6h->daddr = *daddr; + ip6h->saddr = *saddr; + ip6tunnel_xmit(skb, dev); + return 0; +} +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) +#include +#include +#include +#include +#include +#include +#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__; ret__ = udp_tunnel_xmit_skb((b)->sk_socket, a, c, d, e, f, g, h, i, j, k); if (ret__) iptunnel_xmit_stats(ret__ - 8, &dev__->stats, dev__->tstats); } while (0) +#if IS_ENABLED(CONFIG_IPV6) +#define udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) udp_tunnel6_xmit_skb((b)->sk_socket, a, c, d, e, f, g, h, j, k); +#endif +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 18, 0) +static inline void __compat_fake_destructor(struct sk_buff *skb) +{ +} +#endif +#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__; if (!(c)->destructor) (c)->destructor = __compat_fake_destructor; if (!(c)->sk) (c)->sk = (b); ret__ = udp_tunnel_xmit_skb(a, c, d, e, f, g, h, i, j, k, l); if (ret__) iptunnel_xmit_stats(ret__ - 8, &dev__->stats, dev__->tstats); } while (0) +#if IS_ENABLED(CONFIG_IPV6) +#define udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { if (!(c)->destructor) (c)->destructor = __compat_fake_destructor; if (!(c)->sk) (c)->sk = (b); udp_tunnel6_xmit_skb(a, c, d, e, f, g, h, j, k, l); } while(0) +#endif +#else + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) +#include +#include +#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__ = udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l); if (ret__) iptunnel_xmit_stats(ret__ - 8, &dev__->stats, dev__->tstats); } while (0) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) +#include +#include +#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__ = udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l); iptunnel_xmit_stats(ret__, &dev__->stats, dev__->tstats); } while (0) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && IS_ENABLED(CONFIG_IPV6) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) +#include +#include +#define udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, j, k, l) +#endif + +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) +#include +#include +#include +struct __compat_udp_port_cfg { + u8 family; + union { + struct in_addr local_ip; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr local_ip6; +#endif + }; + union { + struct in_addr peer_ip; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr peer_ip6; +#endif + }; + __be16 local_udp_port; + __be16 peer_udp_port; + unsigned int use_udp_checksums:1, use_udp6_tx_checksums:1, use_udp6_rx_checksums:1, ipv6_v6only:1; +}; +static inline int __maybe_unused __compat_udp_sock_create(struct net *net, struct __compat_udp_port_cfg *cfg, struct socket **sockp) +{ + struct udp_port_cfg old_cfg = { + .family = cfg->family, + .local_ip = cfg->local_ip, +#if IS_ENABLED(CONFIG_IPV6) + .local_ip6 = cfg->local_ip6, +#endif + .peer_ip = cfg->peer_ip, +#if IS_ENABLED(CONFIG_IPV6) + .peer_ip6 = cfg->peer_ip6, +#endif + .local_udp_port = cfg->local_udp_port, + .peer_udp_port = cfg->peer_udp_port, + .use_udp_checksums = cfg->use_udp_checksums, + .use_udp6_tx_checksums = cfg->use_udp6_tx_checksums, + .use_udp6_rx_checksums = cfg->use_udp6_rx_checksums + }; + if (cfg->family == AF_INET) + return udp_sock_create4(net, &old_cfg, sockp); + +#if IS_ENABLED(CONFIG_IPV6) + if (cfg->family == AF_INET6) { + int ret; + int old_bindv6only; + struct net *nobns; + + if (cfg->ipv6_v6only) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0) + nobns = &init_net; +#else + nobns = net; +#endif + /* Since udp_port_cfg only learned of ipv6_v6only in 4.3, we do this horrible + * hack here and set the sysctl variable temporarily to something that will + * set the right option for us in sock_create. It's super racey! */ + old_bindv6only = nobns->ipv6.sysctl.bindv6only; + nobns->ipv6.sysctl.bindv6only = 1; + } + ret = udp_sock_create6(net, &old_cfg, sockp); + if (cfg->ipv6_v6only) + nobns->ipv6.sysctl.bindv6only = old_bindv6only; + return ret; + } +#endif + return -EPFNOSUPPORT; +} +#define udp_port_cfg __compat_udp_port_cfg +#define udp_sock_create(a, b, c) __compat_udp_sock_create(a, b, c) +#endif diff --git a/net/wireguard/compat/version/linux/version.h b/net/wireguard/compat/version/linux/version.h new file mode 100644 index 000000000000..90988b37aed6 --- /dev/null +++ b/net/wireguard/compat/version/linux/version.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2021 Jason A. Donenfeld . All Rights Reserved. + */ + +#include_next +#undef KERNEL_VERSION +#define KERNEL_VERSION(a, b, c) (((a) << 24) + ((b) << 16) + (c)) +#undef LINUX_VERSION_CODE +#define LINUX_VERSION_CODE KERNEL_VERSION(COMPAT_VERSION, COMPAT_PATCHLEVEL, COMPAT_SUBLEVEL) diff --git a/net/wireguard/cookie.c b/net/wireguard/cookie.c new file mode 100644 index 000000000000..8b7d1fe0cdf4 --- /dev/null +++ b/net/wireguard/cookie.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "cookie.h" +#include "peer.h" +#include "device.h" +#include "messages.h" +#include "ratelimiter.h" +#include "timers.h" + +#include +#include + +#include +#include + +void wg_cookie_checker_init(struct cookie_checker *checker, + struct wg_device *wg) +{ + init_rwsem(&checker->secret_lock); + checker->secret_birthdate = ktime_get_coarse_boottime_ns(); + get_random_bytes(checker->secret, NOISE_HASH_LEN); + checker->device = wg; +} + +enum { COOKIE_KEY_LABEL_LEN = 8 }; +static const u8 mac1_key_label[COOKIE_KEY_LABEL_LEN] = "mac1----"; +static const u8 cookie_key_label[COOKIE_KEY_LABEL_LEN] = "cookie--"; + +static void precompute_key(u8 key[NOISE_SYMMETRIC_KEY_LEN], + const u8 pubkey[NOISE_PUBLIC_KEY_LEN], + const u8 label[COOKIE_KEY_LABEL_LEN]) +{ + struct blake2s_state blake; + + blake2s_init(&blake, NOISE_SYMMETRIC_KEY_LEN); + blake2s_update(&blake, label, COOKIE_KEY_LABEL_LEN); + blake2s_update(&blake, pubkey, NOISE_PUBLIC_KEY_LEN); + blake2s_final(&blake, key); +} + +/* Must hold peer->handshake.static_identity->lock */ +void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker) +{ + if (likely(checker->device->static_identity.has_identity)) { + precompute_key(checker->cookie_encryption_key, + checker->device->static_identity.static_public, + cookie_key_label); + precompute_key(checker->message_mac1_key, + checker->device->static_identity.static_public, + mac1_key_label); + } else { + memset(checker->cookie_encryption_key, 0, + NOISE_SYMMETRIC_KEY_LEN); + memset(checker->message_mac1_key, 0, NOISE_SYMMETRIC_KEY_LEN); + } +} + +void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer) +{ + precompute_key(peer->latest_cookie.cookie_decryption_key, + peer->handshake.remote_static, cookie_key_label); + precompute_key(peer->latest_cookie.message_mac1_key, + peer->handshake.remote_static, mac1_key_label); +} + +void wg_cookie_init(struct cookie *cookie) +{ + memset(cookie, 0, sizeof(*cookie)); + init_rwsem(&cookie->lock); +} + +static void compute_mac1(u8 mac1[COOKIE_LEN], const void *message, size_t len, + const u8 key[NOISE_SYMMETRIC_KEY_LEN]) +{ + len = len - sizeof(struct message_macs) + + offsetof(struct message_macs, mac1); + blake2s(mac1, message, key, COOKIE_LEN, len, NOISE_SYMMETRIC_KEY_LEN); +} + +static void compute_mac2(u8 mac2[COOKIE_LEN], const void *message, size_t len, + const u8 cookie[COOKIE_LEN]) +{ + len = len - sizeof(struct message_macs) + + offsetof(struct message_macs, mac2); + blake2s(mac2, message, cookie, COOKIE_LEN, len, COOKIE_LEN); +} + +static void make_cookie(u8 cookie[COOKIE_LEN], struct sk_buff *skb, + struct cookie_checker *checker) +{ + struct blake2s_state state; + + if (wg_birthdate_has_expired(checker->secret_birthdate, + COOKIE_SECRET_MAX_AGE)) { + down_write(&checker->secret_lock); + checker->secret_birthdate = ktime_get_coarse_boottime_ns(); + get_random_bytes(checker->secret, NOISE_HASH_LEN); + up_write(&checker->secret_lock); + } + + down_read(&checker->secret_lock); + + blake2s_init_key(&state, COOKIE_LEN, checker->secret, NOISE_HASH_LEN); + if (skb->protocol == htons(ETH_P_IP)) + blake2s_update(&state, (u8 *)&ip_hdr(skb)->saddr, + sizeof(struct in_addr)); + else if (skb->protocol == htons(ETH_P_IPV6)) + blake2s_update(&state, (u8 *)&ipv6_hdr(skb)->saddr, + sizeof(struct in6_addr)); + blake2s_update(&state, (u8 *)&udp_hdr(skb)->source, sizeof(__be16)); + blake2s_final(&state, cookie); + + up_read(&checker->secret_lock); +} + +enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker, + struct sk_buff *skb, + bool check_cookie) +{ + struct message_macs *macs = (struct message_macs *) + (skb->data + skb->len - sizeof(*macs)); + enum cookie_mac_state ret; + u8 computed_mac[COOKIE_LEN]; + u8 cookie[COOKIE_LEN]; + + ret = INVALID_MAC; + compute_mac1(computed_mac, skb->data, skb->len, + checker->message_mac1_key); + if (crypto_memneq(computed_mac, macs->mac1, COOKIE_LEN)) + goto out; + + ret = VALID_MAC_BUT_NO_COOKIE; + + if (!check_cookie) + goto out; + + make_cookie(cookie, skb, checker); + + compute_mac2(computed_mac, skb->data, skb->len, cookie); + if (crypto_memneq(computed_mac, macs->mac2, COOKIE_LEN)) + goto out; + + ret = VALID_MAC_WITH_COOKIE_BUT_RATELIMITED; + if (!wg_ratelimiter_allow(skb, dev_net(checker->device->dev))) + goto out; + + ret = VALID_MAC_WITH_COOKIE; + +out: + return ret; +} + +void wg_cookie_add_mac_to_packet(void *message, size_t len, + struct wg_peer *peer) +{ + struct message_macs *macs = (struct message_macs *) + ((u8 *)message + len - sizeof(*macs)); + + down_write(&peer->latest_cookie.lock); + compute_mac1(macs->mac1, message, len, + peer->latest_cookie.message_mac1_key); + memcpy(peer->latest_cookie.last_mac1_sent, macs->mac1, COOKIE_LEN); + peer->latest_cookie.have_sent_mac1 = true; + up_write(&peer->latest_cookie.lock); + + down_read(&peer->latest_cookie.lock); + if (peer->latest_cookie.is_valid && + !wg_birthdate_has_expired(peer->latest_cookie.birthdate, + COOKIE_SECRET_MAX_AGE - COOKIE_SECRET_LATENCY)) + compute_mac2(macs->mac2, message, len, + peer->latest_cookie.cookie); + else + memset(macs->mac2, 0, COOKIE_LEN); + up_read(&peer->latest_cookie.lock); +} + +void wg_cookie_message_create(struct message_handshake_cookie *dst, + struct sk_buff *skb, __le32 index, + struct cookie_checker *checker) +{ + struct message_macs *macs = (struct message_macs *) + ((u8 *)skb->data + skb->len - sizeof(*macs)); + u8 cookie[COOKIE_LEN]; + + dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE); + dst->receiver_index = index; + get_random_bytes_wait(dst->nonce, COOKIE_NONCE_LEN); + + make_cookie(cookie, skb, checker); + xchacha20poly1305_encrypt(dst->encrypted_cookie, cookie, COOKIE_LEN, + macs->mac1, COOKIE_LEN, dst->nonce, + checker->cookie_encryption_key); +} + +void wg_cookie_message_consume(struct message_handshake_cookie *src, + struct wg_device *wg) +{ + struct wg_peer *peer = NULL; + u8 cookie[COOKIE_LEN]; + bool ret; + + if (unlikely(!wg_index_hashtable_lookup(wg->index_hashtable, + INDEX_HASHTABLE_HANDSHAKE | + INDEX_HASHTABLE_KEYPAIR, + src->receiver_index, &peer))) + return; + + down_read(&peer->latest_cookie.lock); + if (unlikely(!peer->latest_cookie.have_sent_mac1)) { + up_read(&peer->latest_cookie.lock); + goto out; + } + ret = xchacha20poly1305_decrypt( + cookie, src->encrypted_cookie, sizeof(src->encrypted_cookie), + peer->latest_cookie.last_mac1_sent, COOKIE_LEN, src->nonce, + peer->latest_cookie.cookie_decryption_key); + up_read(&peer->latest_cookie.lock); + + if (ret) { + down_write(&peer->latest_cookie.lock); + memcpy(peer->latest_cookie.cookie, cookie, COOKIE_LEN); + peer->latest_cookie.birthdate = ktime_get_coarse_boottime_ns(); + peer->latest_cookie.is_valid = true; + peer->latest_cookie.have_sent_mac1 = false; + up_write(&peer->latest_cookie.lock); + } else { + net_dbg_ratelimited("%s: Could not decrypt invalid cookie response\n", + wg->dev->name); + } + +out: + wg_peer_put(peer); +} diff --git a/net/wireguard/cookie.h b/net/wireguard/cookie.h new file mode 100644 index 000000000000..c4bd61ca03f2 --- /dev/null +++ b/net/wireguard/cookie.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_COOKIE_H +#define _WG_COOKIE_H + +#include "messages.h" +#include + +struct wg_peer; + +struct cookie_checker { + u8 secret[NOISE_HASH_LEN]; + u8 cookie_encryption_key[NOISE_SYMMETRIC_KEY_LEN]; + u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN]; + u64 secret_birthdate; + struct rw_semaphore secret_lock; + struct wg_device *device; +}; + +struct cookie { + u64 birthdate; + bool is_valid; + u8 cookie[COOKIE_LEN]; + bool have_sent_mac1; + u8 last_mac1_sent[COOKIE_LEN]; + u8 cookie_decryption_key[NOISE_SYMMETRIC_KEY_LEN]; + u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN]; + struct rw_semaphore lock; +}; + +enum cookie_mac_state { + INVALID_MAC, + VALID_MAC_BUT_NO_COOKIE, + VALID_MAC_WITH_COOKIE_BUT_RATELIMITED, + VALID_MAC_WITH_COOKIE +}; + +void wg_cookie_checker_init(struct cookie_checker *checker, + struct wg_device *wg); +void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker); +void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer); +void wg_cookie_init(struct cookie *cookie); + +enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker, + struct sk_buff *skb, + bool check_cookie); +void wg_cookie_add_mac_to_packet(void *message, size_t len, + struct wg_peer *peer); + +void wg_cookie_message_create(struct message_handshake_cookie *src, + struct sk_buff *skb, __le32 index, + struct cookie_checker *checker); +void wg_cookie_message_consume(struct message_handshake_cookie *src, + struct wg_device *wg); + +#endif /* _WG_COOKIE_H */ diff --git a/net/wireguard/crypto/Kbuild.include b/net/wireguard/crypto/Kbuild.include new file mode 100644 index 000000000000..f2a312e96d88 --- /dev/null +++ b/net/wireguard/crypto/Kbuild.include @@ -0,0 +1,57 @@ +ifeq ($(CONFIG_X86_64)$(if $(CONFIG_UML),y,n),yn) +CONFIG_ZINC_ARCH_X86_64 := y +endif +ifeq ($(CONFIG_ARM)$(if $(CONFIG_CPU_32v3),y,n),yn) +CONFIG_ZINC_ARCH_ARM := y +endif +ifeq ($(CONFIG_ARM64),y) +CONFIG_ZINC_ARCH_ARM64 := y +endif +ifeq ($(CONFIG_MIPS)$(CONFIG_CPU_MIPS32_R2),yy) +CONFIG_ZINC_ARCH_MIPS := y +endif +ifeq ($(CONFIG_MIPS)$(CONFIG_64BIT),yy) +CONFIG_ZINC_ARCH_MIPS64 := y +endif + +zinc-y += chacha20/chacha20.o +zinc-$(CONFIG_ZINC_ARCH_X86_64) += chacha20/chacha20-x86_64.o +zinc-$(CONFIG_ZINC_ARCH_ARM) += chacha20/chacha20-arm.o chacha20/chacha20-unrolled-arm.o +zinc-$(CONFIG_ZINC_ARCH_ARM64) += chacha20/chacha20-arm64.o +zinc-$(CONFIG_ZINC_ARCH_MIPS) += chacha20/chacha20-mips.o +AFLAGS_chacha20-mips.o += -O2 # This is required to fill the branch delay slots + +zinc-y += poly1305/poly1305.o +zinc-$(CONFIG_ZINC_ARCH_X86_64) += poly1305/poly1305-x86_64.o +zinc-$(CONFIG_ZINC_ARCH_ARM) += poly1305/poly1305-arm.o +zinc-$(CONFIG_ZINC_ARCH_ARM64) += poly1305/poly1305-arm64.o +zinc-$(CONFIG_ZINC_ARCH_MIPS) += poly1305/poly1305-mips.o +AFLAGS_poly1305-mips.o += -O2 # This is required to fill the branch delay slots +zinc-$(CONFIG_ZINC_ARCH_MIPS64) += poly1305/poly1305-mips64.o + +zinc-y += chacha20poly1305.o + +zinc-y += blake2s/blake2s.o +zinc-$(CONFIG_ZINC_ARCH_X86_64) += blake2s/blake2s-x86_64.o + +zinc-y += curve25519/curve25519.o +zinc-$(CONFIG_ZINC_ARCH_ARM) += curve25519/curve25519-arm.o + +quiet_cmd_perlasm = PERLASM $@ + cmd_perlasm = $(PERL) $< > $@ +$(obj)/%.S: $(src)/%.pl FORCE + $(call if_changed,perlasm) +kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src)) +targets := $(patsubst $(kbuild-dir)/%.pl,%.S,$(wildcard $(patsubst %.o,$(kbuild-dir)/crypto/zinc/%.pl,$(zinc-y) $(zinc-m) $(zinc-)))) + +# Old kernels don't set this, which causes trouble. +.SECONDARY: + +wireguard-y += $(addprefix crypto/zinc/,$(zinc-y)) +ccflags-y += -I$(kbuild-dir)/crypto/include +ccflags-$(CONFIG_ZINC_ARCH_X86_64) += -DCONFIG_ZINC_ARCH_X86_64 +ccflags-$(CONFIG_ZINC_ARCH_ARM) += -DCONFIG_ZINC_ARCH_ARM +ccflags-$(CONFIG_ZINC_ARCH_ARM64) += -DCONFIG_ZINC_ARCH_ARM64 +ccflags-$(CONFIG_ZINC_ARCH_MIPS) += -DCONFIG_ZINC_ARCH_MIPS +ccflags-$(CONFIG_ZINC_ARCH_MIPS64) += -DCONFIG_ZINC_ARCH_MIPS64 +ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DCONFIG_ZINC_SELFTEST diff --git a/net/wireguard/crypto/include/zinc/blake2s.h b/net/wireguard/crypto/include/zinc/blake2s.h new file mode 100644 index 000000000000..2ca0bc30750d --- /dev/null +++ b/net/wireguard/crypto/include/zinc/blake2s.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _ZINC_BLAKE2S_H +#define _ZINC_BLAKE2S_H + +#include +#include +#include + +enum blake2s_lengths { + BLAKE2S_BLOCK_SIZE = 64, + BLAKE2S_HASH_SIZE = 32, + BLAKE2S_KEY_SIZE = 32 +}; + +struct blake2s_state { + u32 h[8]; + u32 t[2]; + u32 f[2]; + u8 buf[BLAKE2S_BLOCK_SIZE]; + unsigned int buflen; + unsigned int outlen; +}; + +void blake2s_init(struct blake2s_state *state, const size_t outlen); +void blake2s_init_key(struct blake2s_state *state, const size_t outlen, + const void *key, const size_t keylen); +void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen); +void blake2s_final(struct blake2s_state *state, u8 *out); + +static inline void blake2s(u8 *out, const u8 *in, const u8 *key, + const size_t outlen, const size_t inlen, + const size_t keylen) +{ + struct blake2s_state state; + + WARN_ON(IS_ENABLED(DEBUG) && ((!in && inlen > 0) || !out || !outlen || + outlen > BLAKE2S_HASH_SIZE || keylen > BLAKE2S_KEY_SIZE || + (!key && keylen))); + + if (keylen) + blake2s_init_key(&state, outlen, key, keylen); + else + blake2s_init(&state, outlen); + + blake2s_update(&state, in, inlen); + blake2s_final(&state, out); +} + +void blake2s_hmac(u8 *out, const u8 *in, const u8 *key, const size_t outlen, + const size_t inlen, const size_t keylen); + +#endif /* _ZINC_BLAKE2S_H */ diff --git a/net/wireguard/crypto/include/zinc/chacha20.h b/net/wireguard/crypto/include/zinc/chacha20.h new file mode 100644 index 000000000000..1b0083d871fb --- /dev/null +++ b/net/wireguard/crypto/include/zinc/chacha20.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _ZINC_CHACHA20_H +#define _ZINC_CHACHA20_H + +#include +#include +#include +#include + +enum chacha20_lengths { + CHACHA20_NONCE_SIZE = 16, + CHACHA20_KEY_SIZE = 32, + CHACHA20_KEY_WORDS = CHACHA20_KEY_SIZE / sizeof(u32), + CHACHA20_BLOCK_SIZE = 64, + CHACHA20_BLOCK_WORDS = CHACHA20_BLOCK_SIZE / sizeof(u32), + HCHACHA20_NONCE_SIZE = CHACHA20_NONCE_SIZE, + HCHACHA20_KEY_SIZE = CHACHA20_KEY_SIZE +}; + +enum chacha20_constants { /* expand 32-byte k */ + CHACHA20_CONSTANT_EXPA = 0x61707865U, + CHACHA20_CONSTANT_ND_3 = 0x3320646eU, + CHACHA20_CONSTANT_2_BY = 0x79622d32U, + CHACHA20_CONSTANT_TE_K = 0x6b206574U +}; + +struct chacha20_ctx { + union { + u32 state[16]; + struct { + u32 constant[4]; + u32 key[8]; + u32 counter[4]; + }; + }; +}; + +static inline void chacha20_init(struct chacha20_ctx *ctx, + const u8 key[CHACHA20_KEY_SIZE], + const u64 nonce) +{ + ctx->constant[0] = CHACHA20_CONSTANT_EXPA; + ctx->constant[1] = CHACHA20_CONSTANT_ND_3; + ctx->constant[2] = CHACHA20_CONSTANT_2_BY; + ctx->constant[3] = CHACHA20_CONSTANT_TE_K; + ctx->key[0] = get_unaligned_le32(key + 0); + ctx->key[1] = get_unaligned_le32(key + 4); + ctx->key[2] = get_unaligned_le32(key + 8); + ctx->key[3] = get_unaligned_le32(key + 12); + ctx->key[4] = get_unaligned_le32(key + 16); + ctx->key[5] = get_unaligned_le32(key + 20); + ctx->key[6] = get_unaligned_le32(key + 24); + ctx->key[7] = get_unaligned_le32(key + 28); + ctx->counter[0] = 0; + ctx->counter[1] = 0; + ctx->counter[2] = nonce & U32_MAX; + ctx->counter[3] = nonce >> 32; +} +void chacha20(struct chacha20_ctx *ctx, u8 *dst, const u8 *src, u32 len, + simd_context_t *simd_context); + +void hchacha20(u32 derived_key[CHACHA20_KEY_WORDS], + const u8 nonce[HCHACHA20_NONCE_SIZE], + const u8 key[HCHACHA20_KEY_SIZE], simd_context_t *simd_context); + +#endif /* _ZINC_CHACHA20_H */ diff --git a/net/wireguard/crypto/include/zinc/chacha20poly1305.h b/net/wireguard/crypto/include/zinc/chacha20poly1305.h new file mode 100644 index 000000000000..e3339f02996e --- /dev/null +++ b/net/wireguard/crypto/include/zinc/chacha20poly1305.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _ZINC_CHACHA20POLY1305_H +#define _ZINC_CHACHA20POLY1305_H + +#include +#include + +struct scatterlist; + +enum chacha20poly1305_lengths { + XCHACHA20POLY1305_NONCE_SIZE = 24, + CHACHA20POLY1305_KEY_SIZE = 32, + CHACHA20POLY1305_AUTHTAG_SIZE = 16 +}; + +void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, + const u8 *ad, const size_t ad_len, + const u64 nonce, + const u8 key[CHACHA20POLY1305_KEY_SIZE]); + +bool __must_check chacha20poly1305_encrypt_sg_inplace( + struct scatterlist *src, const size_t src_len, const u8 *ad, + const size_t ad_len, const u64 nonce, + const u8 key[CHACHA20POLY1305_KEY_SIZE], simd_context_t *simd_context); + +bool __must_check +chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, + const u8 *ad, const size_t ad_len, const u64 nonce, + const u8 key[CHACHA20POLY1305_KEY_SIZE]); + +bool __must_check chacha20poly1305_decrypt_sg_inplace( + struct scatterlist *src, size_t src_len, const u8 *ad, + const size_t ad_len, const u64 nonce, + const u8 key[CHACHA20POLY1305_KEY_SIZE], simd_context_t *simd_context); + +void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, + const u8 *ad, const size_t ad_len, + const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE], + const u8 key[CHACHA20POLY1305_KEY_SIZE]); + +bool __must_check xchacha20poly1305_decrypt( + u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, + const size_t ad_len, const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE], + const u8 key[CHACHA20POLY1305_KEY_SIZE]); + +#endif /* _ZINC_CHACHA20POLY1305_H */ diff --git a/net/wireguard/crypto/include/zinc/curve25519.h b/net/wireguard/crypto/include/zinc/curve25519.h new file mode 100644 index 000000000000..127d8a3a1c82 --- /dev/null +++ b/net/wireguard/crypto/include/zinc/curve25519.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _ZINC_CURVE25519_H +#define _ZINC_CURVE25519_H + +#include + +enum curve25519_lengths { + CURVE25519_KEY_SIZE = 32 +}; + +bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]); +void curve25519_generate_secret(u8 secret[CURVE25519_KEY_SIZE]); +bool __must_check curve25519_generate_public( + u8 pub[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE]); + +static inline void curve25519_clamp_secret(u8 secret[CURVE25519_KEY_SIZE]) +{ + secret[0] &= 248; + secret[31] = (secret[31] & 127) | 64; +} + +#endif /* _ZINC_CURVE25519_H */ diff --git a/net/wireguard/crypto/include/zinc/poly1305.h b/net/wireguard/crypto/include/zinc/poly1305.h new file mode 100644 index 000000000000..8a16d19f8177 --- /dev/null +++ b/net/wireguard/crypto/include/zinc/poly1305.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _ZINC_POLY1305_H +#define _ZINC_POLY1305_H + +#include +#include + +enum poly1305_lengths { + POLY1305_BLOCK_SIZE = 16, + POLY1305_KEY_SIZE = 32, + POLY1305_MAC_SIZE = 16 +}; + +struct poly1305_ctx { + u8 opaque[24 * sizeof(u64)]; + u32 nonce[4]; + u8 data[POLY1305_BLOCK_SIZE]; + size_t num; +} __aligned(8); + +void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE]); +void poly1305_update(struct poly1305_ctx *ctx, const u8 *input, size_t len, + simd_context_t *simd_context); +void poly1305_final(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE], + simd_context_t *simd_context); + +#endif /* _ZINC_POLY1305_H */ diff --git a/net/wireguard/crypto/zinc.h b/net/wireguard/crypto/zinc.h new file mode 100644 index 000000000000..9aa1e8d59bf5 --- /dev/null +++ b/net/wireguard/crypto/zinc.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_ZINC_H +#define _WG_ZINC_H + +int chacha20_mod_init(void); +int poly1305_mod_init(void); +int chacha20poly1305_mod_init(void); +int blake2s_mod_init(void); +int curve25519_mod_init(void); + +#endif diff --git a/net/wireguard/crypto/zinc/blake2s/blake2s-x86_64-glue.c b/net/wireguard/crypto/zinc/blake2s/blake2s-x86_64-glue.c new file mode 100644 index 000000000000..f8cda59bf297 --- /dev/null +++ b/net/wireguard/crypto/zinc/blake2s/blake2s-x86_64-glue.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include +#include +#include +#include + +asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state, + const u8 *block, const size_t nblocks, + const u32 inc); +asmlinkage void blake2s_compress_avx512(struct blake2s_state *state, + const u8 *block, const size_t nblocks, + const u32 inc); + +static bool blake2s_use_ssse3 __ro_after_init; +static bool blake2s_use_avx512 __ro_after_init; +static bool *const blake2s_nobs[] __initconst = { &blake2s_use_ssse3, + &blake2s_use_avx512 }; + +static void __init blake2s_fpu_init(void) +{ + blake2s_use_ssse3 = boot_cpu_has(X86_FEATURE_SSSE3); +#ifndef COMPAT_CANNOT_USE_AVX512 + blake2s_use_avx512 = + boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_AVX512F) && + boot_cpu_has(X86_FEATURE_AVX512VL) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | + XFEATURE_MASK_AVX512, NULL); +#endif +} + +static inline bool blake2s_compress_arch(struct blake2s_state *state, + const u8 *block, size_t nblocks, + const u32 inc) +{ + simd_context_t simd_context; + bool used_arch = false; + + /* SIMD disables preemption, so relax after processing each page. */ + BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8); + + simd_get(&simd_context); + + if (!IS_ENABLED(CONFIG_AS_SSSE3) || !blake2s_use_ssse3 || + !simd_use(&simd_context)) + goto out; + used_arch = true; + + for (;;) { + const size_t blocks = min_t(size_t, nblocks, + PAGE_SIZE / BLAKE2S_BLOCK_SIZE); + + if (IS_ENABLED(CONFIG_AS_AVX512) && blake2s_use_avx512) + blake2s_compress_avx512(state, block, blocks, inc); + else + blake2s_compress_ssse3(state, block, blocks, inc); + + nblocks -= blocks; + if (!nblocks) + break; + block += blocks * BLAKE2S_BLOCK_SIZE; + simd_relax(&simd_context); + } +out: + simd_put(&simd_context); + return used_arch; +} diff --git a/net/wireguard/crypto/zinc/blake2s/blake2s-x86_64.S b/net/wireguard/crypto/zinc/blake2s/blake2s-x86_64.S new file mode 100644 index 000000000000..24910b766bdd --- /dev/null +++ b/net/wireguard/crypto/zinc/blake2s/blake2s-x86_64.S @@ -0,0 +1,258 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * Copyright (C) 2017-2019 Samuel Neves . All Rights Reserved. + */ + +#include + +.section .rodata.cst32.BLAKE2S_IV, "aM", @progbits, 32 +.align 32 +IV: .octa 0xA54FF53A3C6EF372BB67AE856A09E667 + .octa 0x5BE0CD191F83D9AB9B05688C510E527F +.section .rodata.cst16.ROT16, "aM", @progbits, 16 +.align 16 +ROT16: .octa 0x0D0C0F0E09080B0A0504070601000302 +.section .rodata.cst16.ROR328, "aM", @progbits, 16 +.align 16 +ROR328: .octa 0x0C0F0E0D080B0A090407060500030201 +.section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 160 +.align 64 +SIGMA: +.byte 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13 +.byte 14, 4, 9, 13, 10, 8, 15, 6, 5, 1, 0, 11, 3, 12, 2, 7 +.byte 11, 12, 5, 15, 8, 0, 2, 13, 9, 10, 3, 7, 4, 14, 6, 1 +.byte 7, 3, 13, 11, 9, 1, 12, 14, 15, 2, 5, 4, 8, 6, 10, 0 +.byte 9, 5, 2, 10, 0, 7, 4, 15, 3, 14, 11, 6, 13, 1, 12, 8 +.byte 2, 6, 0, 8, 12, 10, 11, 3, 1, 4, 7, 15, 9, 13, 5, 14 +.byte 12, 1, 14, 4, 5, 15, 13, 10, 8, 0, 6, 9, 11, 7, 3, 2 +.byte 13, 7, 12, 3, 11, 14, 1, 9, 2, 5, 15, 8, 10, 0, 4, 6 +.byte 6, 14, 11, 0, 15, 9, 3, 8, 10, 12, 13, 1, 5, 2, 7, 4 +.byte 10, 8, 7, 1, 2, 4, 6, 5, 13, 15, 9, 3, 0, 11, 14, 12 +#ifdef CONFIG_AS_AVX512 +.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640 +.align 64 +SIGMA2: +.long 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13 +.long 8, 2, 13, 15, 10, 9, 12, 3, 6, 4, 0, 14, 5, 11, 1, 7 +.long 11, 13, 8, 6, 5, 10, 14, 3, 2, 4, 12, 15, 1, 0, 7, 9 +.long 11, 10, 7, 0, 8, 15, 1, 13, 3, 6, 2, 12, 4, 14, 9, 5 +.long 4, 10, 9, 14, 15, 0, 11, 8, 1, 7, 3, 13, 2, 5, 6, 12 +.long 2, 11, 4, 15, 14, 3, 10, 8, 13, 6, 5, 7, 0, 12, 1, 9 +.long 4, 8, 15, 9, 14, 11, 13, 5, 3, 2, 1, 12, 6, 10, 7, 0 +.long 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10 +.long 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14 +.long 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9 +#endif /* CONFIG_AS_AVX512 */ + +.text +#ifdef CONFIG_AS_SSSE3 +SYM_FUNC_START(blake2s_compress_ssse3) + testq %rdx,%rdx + je .Lendofloop + movdqu (%rdi),%xmm0 + movdqu 0x10(%rdi),%xmm1 + movdqa ROT16(%rip),%xmm12 + movdqa ROR328(%rip),%xmm13 + movdqu 0x20(%rdi),%xmm14 + movq %rcx,%xmm15 + leaq SIGMA+0xa0(%rip),%r8 + jmp .Lbeginofloop + .align 32 +.Lbeginofloop: + movdqa %xmm0,%xmm10 + movdqa %xmm1,%xmm11 + paddq %xmm15,%xmm14 + movdqa IV(%rip),%xmm2 + movdqa %xmm14,%xmm3 + pxor IV+0x10(%rip),%xmm3 + leaq SIGMA(%rip),%rcx +.Lroundloop: + movzbl (%rcx),%eax + movd (%rsi,%rax,4),%xmm4 + movzbl 0x1(%rcx),%eax + movd (%rsi,%rax,4),%xmm5 + movzbl 0x2(%rcx),%eax + movd (%rsi,%rax,4),%xmm6 + movzbl 0x3(%rcx),%eax + movd (%rsi,%rax,4),%xmm7 + punpckldq %xmm5,%xmm4 + punpckldq %xmm7,%xmm6 + punpcklqdq %xmm6,%xmm4 + paddd %xmm4,%xmm0 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm12,%xmm3 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm8 + psrld $0xc,%xmm1 + pslld $0x14,%xmm8 + por %xmm8,%xmm1 + movzbl 0x4(%rcx),%eax + movd (%rsi,%rax,4),%xmm5 + movzbl 0x5(%rcx),%eax + movd (%rsi,%rax,4),%xmm6 + movzbl 0x6(%rcx),%eax + movd (%rsi,%rax,4),%xmm7 + movzbl 0x7(%rcx),%eax + movd (%rsi,%rax,4),%xmm4 + punpckldq %xmm6,%xmm5 + punpckldq %xmm4,%xmm7 + punpcklqdq %xmm7,%xmm5 + paddd %xmm5,%xmm0 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm13,%xmm3 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm8 + psrld $0x7,%xmm1 + pslld $0x19,%xmm8 + por %xmm8,%xmm1 + pshufd $0x93,%xmm0,%xmm0 + pshufd $0x4e,%xmm3,%xmm3 + pshufd $0x39,%xmm2,%xmm2 + movzbl 0x8(%rcx),%eax + movd (%rsi,%rax,4),%xmm6 + movzbl 0x9(%rcx),%eax + movd (%rsi,%rax,4),%xmm7 + movzbl 0xa(%rcx),%eax + movd (%rsi,%rax,4),%xmm4 + movzbl 0xb(%rcx),%eax + movd (%rsi,%rax,4),%xmm5 + punpckldq %xmm7,%xmm6 + punpckldq %xmm5,%xmm4 + punpcklqdq %xmm4,%xmm6 + paddd %xmm6,%xmm0 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm12,%xmm3 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm8 + psrld $0xc,%xmm1 + pslld $0x14,%xmm8 + por %xmm8,%xmm1 + movzbl 0xc(%rcx),%eax + movd (%rsi,%rax,4),%xmm7 + movzbl 0xd(%rcx),%eax + movd (%rsi,%rax,4),%xmm4 + movzbl 0xe(%rcx),%eax + movd (%rsi,%rax,4),%xmm5 + movzbl 0xf(%rcx),%eax + movd (%rsi,%rax,4),%xmm6 + punpckldq %xmm4,%xmm7 + punpckldq %xmm6,%xmm5 + punpcklqdq %xmm5,%xmm7 + paddd %xmm7,%xmm0 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm13,%xmm3 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm8 + psrld $0x7,%xmm1 + pslld $0x19,%xmm8 + por %xmm8,%xmm1 + pshufd $0x39,%xmm0,%xmm0 + pshufd $0x4e,%xmm3,%xmm3 + pshufd $0x93,%xmm2,%xmm2 + addq $0x10,%rcx + cmpq %r8,%rcx + jnz .Lroundloop + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm1 + pxor %xmm10,%xmm0 + pxor %xmm11,%xmm1 + addq $0x40,%rsi + decq %rdx + jnz .Lbeginofloop + movdqu %xmm0,(%rdi) + movdqu %xmm1,0x10(%rdi) + movdqu %xmm14,0x20(%rdi) +.Lendofloop: + ret +SYM_FUNC_END(blake2s_compress_ssse3) +#endif /* CONFIG_AS_SSSE3 */ + +#ifdef CONFIG_AS_AVX512 +SYM_FUNC_START(blake2s_compress_avx512) + vmovdqu (%rdi),%xmm0 + vmovdqu 0x10(%rdi),%xmm1 + vmovdqu 0x20(%rdi),%xmm4 + vmovq %rcx,%xmm5 + vmovdqa IV(%rip),%xmm14 + vmovdqa IV+16(%rip),%xmm15 + jmp .Lblake2s_compress_avx512_mainloop +.align 32 +.Lblake2s_compress_avx512_mainloop: + vmovdqa %xmm0,%xmm10 + vmovdqa %xmm1,%xmm11 + vpaddq %xmm5,%xmm4,%xmm4 + vmovdqa %xmm14,%xmm2 + vpxor %xmm15,%xmm4,%xmm3 + vmovdqu (%rsi),%ymm6 + vmovdqu 0x20(%rsi),%ymm7 + addq $0x40,%rsi + leaq SIGMA2(%rip),%rax + movb $0xa,%cl +.Lblake2s_compress_avx512_roundloop: + addq $0x40,%rax + vmovdqa -0x40(%rax),%ymm8 + vmovdqa -0x20(%rax),%ymm9 + vpermi2d %ymm7,%ymm6,%ymm8 + vpermi2d %ymm7,%ymm6,%ymm9 + vmovdqa %ymm8,%ymm6 + vmovdqa %ymm9,%ymm7 + vpaddd %xmm8,%xmm0,%xmm0 + vpaddd %xmm1,%xmm0,%xmm0 + vpxor %xmm0,%xmm3,%xmm3 + vprord $0x10,%xmm3,%xmm3 + vpaddd %xmm3,%xmm2,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + vprord $0xc,%xmm1,%xmm1 + vextracti128 $0x1,%ymm8,%xmm8 + vpaddd %xmm8,%xmm0,%xmm0 + vpaddd %xmm1,%xmm0,%xmm0 + vpxor %xmm0,%xmm3,%xmm3 + vprord $0x8,%xmm3,%xmm3 + vpaddd %xmm3,%xmm2,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + vprord $0x7,%xmm1,%xmm1 + vpshufd $0x93,%xmm0,%xmm0 + vpshufd $0x4e,%xmm3,%xmm3 + vpshufd $0x39,%xmm2,%xmm2 + vpaddd %xmm9,%xmm0,%xmm0 + vpaddd %xmm1,%xmm0,%xmm0 + vpxor %xmm0,%xmm3,%xmm3 + vprord $0x10,%xmm3,%xmm3 + vpaddd %xmm3,%xmm2,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + vprord $0xc,%xmm1,%xmm1 + vextracti128 $0x1,%ymm9,%xmm9 + vpaddd %xmm9,%xmm0,%xmm0 + vpaddd %xmm1,%xmm0,%xmm0 + vpxor %xmm0,%xmm3,%xmm3 + vprord $0x8,%xmm3,%xmm3 + vpaddd %xmm3,%xmm2,%xmm2 + vpxor %xmm2,%xmm1,%xmm1 + vprord $0x7,%xmm1,%xmm1 + vpshufd $0x39,%xmm0,%xmm0 + vpshufd $0x4e,%xmm3,%xmm3 + vpshufd $0x93,%xmm2,%xmm2 + decb %cl + jne .Lblake2s_compress_avx512_roundloop + vpxor %xmm10,%xmm0,%xmm0 + vpxor %xmm11,%xmm1,%xmm1 + vpxor %xmm2,%xmm0,%xmm0 + vpxor %xmm3,%xmm1,%xmm1 + decq %rdx + jne .Lblake2s_compress_avx512_mainloop + vmovdqu %xmm0,(%rdi) + vmovdqu %xmm1,0x10(%rdi) + vmovdqu %xmm4,0x20(%rdi) + vzeroupper + retq +SYM_FUNC_END(blake2s_compress_avx512) +#endif /* CONFIG_AS_AVX512 */ diff --git a/net/wireguard/crypto/zinc/blake2s/blake2s.c b/net/wireguard/crypto/zinc/blake2s/blake2s.c new file mode 100644 index 000000000000..a9b8a8d95998 --- /dev/null +++ b/net/wireguard/crypto/zinc/blake2s/blake2s.c @@ -0,0 +1,271 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * + * This is an implementation of the BLAKE2s hash and PRF functions. + * + * Information: https://blake2.net/ + * + */ + +#include +#include "../selftest/run.h" + +#include +#include +#include +#include +#include +#include +#include + +static const u32 blake2s_iv[8] = { + 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, + 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL +}; + +static const u8 blake2s_sigma[10][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, +}; + +static inline void blake2s_set_lastblock(struct blake2s_state *state) +{ + state->f[0] = -1; +} + +static inline void blake2s_increment_counter(struct blake2s_state *state, + const u32 inc) +{ + state->t[0] += inc; + state->t[1] += (state->t[0] < inc); +} + +static inline void blake2s_init_param(struct blake2s_state *state, + const u32 param) +{ + int i; + + memset(state, 0, sizeof(*state)); + for (i = 0; i < 8; ++i) + state->h[i] = blake2s_iv[i]; + state->h[0] ^= param; +} + +void blake2s_init(struct blake2s_state *state, const size_t outlen) +{ + WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE)); + blake2s_init_param(state, 0x01010000 | outlen); + state->outlen = outlen; +} + +void blake2s_init_key(struct blake2s_state *state, const size_t outlen, + const void *key, const size_t keylen) +{ + u8 block[BLAKE2S_BLOCK_SIZE] = { 0 }; + + WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE || + !key || !keylen || keylen > BLAKE2S_KEY_SIZE)); + blake2s_init_param(state, 0x01010000 | keylen << 8 | outlen); + state->outlen = outlen; + memcpy(block, key, keylen); + blake2s_update(state, block, BLAKE2S_BLOCK_SIZE); + memzero_explicit(block, BLAKE2S_BLOCK_SIZE); +} + +#if defined(CONFIG_ZINC_ARCH_X86_64) +#include "blake2s-x86_64-glue.c" +#else +static bool *const blake2s_nobs[] __initconst = { }; +static void __init blake2s_fpu_init(void) +{ +} +static inline bool blake2s_compress_arch(struct blake2s_state *state, + const u8 *block, size_t nblocks, + const u32 inc) +{ + return false; +} +#endif + +static inline void blake2s_compress(struct blake2s_state *state, + const u8 *block, size_t nblocks, + const u32 inc) +{ + u32 m[16]; + u32 v[16]; + int i; + + WARN_ON(IS_ENABLED(DEBUG) && + (nblocks > 1 && inc != BLAKE2S_BLOCK_SIZE)); + + if (blake2s_compress_arch(state, block, nblocks, inc)) + return; + + while (nblocks > 0) { + blake2s_increment_counter(state, inc); + memcpy(m, block, BLAKE2S_BLOCK_SIZE); + le32_to_cpu_array(m, ARRAY_SIZE(m)); + memcpy(v, state->h, 32); + v[ 8] = blake2s_iv[0]; + v[ 9] = blake2s_iv[1]; + v[10] = blake2s_iv[2]; + v[11] = blake2s_iv[3]; + v[12] = blake2s_iv[4] ^ state->t[0]; + v[13] = blake2s_iv[5] ^ state->t[1]; + v[14] = blake2s_iv[6] ^ state->f[0]; + v[15] = blake2s_iv[7] ^ state->f[1]; + +#define G(r, i, a, b, c, d) do { \ + a += b + m[blake2s_sigma[r][2 * i + 0]]; \ + d = ror32(d ^ a, 16); \ + c += d; \ + b = ror32(b ^ c, 12); \ + a += b + m[blake2s_sigma[r][2 * i + 1]]; \ + d = ror32(d ^ a, 8); \ + c += d; \ + b = ror32(b ^ c, 7); \ +} while (0) + +#define ROUND(r) do { \ + G(r, 0, v[0], v[ 4], v[ 8], v[12]); \ + G(r, 1, v[1], v[ 5], v[ 9], v[13]); \ + G(r, 2, v[2], v[ 6], v[10], v[14]); \ + G(r, 3, v[3], v[ 7], v[11], v[15]); \ + G(r, 4, v[0], v[ 5], v[10], v[15]); \ + G(r, 5, v[1], v[ 6], v[11], v[12]); \ + G(r, 6, v[2], v[ 7], v[ 8], v[13]); \ + G(r, 7, v[3], v[ 4], v[ 9], v[14]); \ +} while (0) + ROUND(0); + ROUND(1); + ROUND(2); + ROUND(3); + ROUND(4); + ROUND(5); + ROUND(6); + ROUND(7); + ROUND(8); + ROUND(9); + +#undef G +#undef ROUND + + for (i = 0; i < 8; ++i) + state->h[i] ^= v[i] ^ v[i + 8]; + + block += BLAKE2S_BLOCK_SIZE; + --nblocks; + } +} + +void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen) +{ + const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; + + if (unlikely(!inlen)) + return; + if (inlen > fill) { + memcpy(state->buf + state->buflen, in, fill); + blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); + state->buflen = 0; + in += fill; + inlen -= fill; + } + if (inlen > BLAKE2S_BLOCK_SIZE) { + const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); + /* Hash one less (full) block than strictly possible */ + blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); + in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); + inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); + } + memcpy(state->buf + state->buflen, in, inlen); + state->buflen += inlen; +} + +void blake2s_final(struct blake2s_state *state, u8 *out) +{ + WARN_ON(IS_ENABLED(DEBUG) && !out); + blake2s_set_lastblock(state); + memset(state->buf + state->buflen, 0, + BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ + blake2s_compress(state, state->buf, 1, state->buflen); + cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); + memcpy(out, state->h, state->outlen); + memzero_explicit(state, sizeof(*state)); +} + +void blake2s_hmac(u8 *out, const u8 *in, const u8 *key, const size_t outlen, + const size_t inlen, const size_t keylen) +{ + struct blake2s_state state; + u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 }; + u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32)); + int i; + + if (keylen > BLAKE2S_BLOCK_SIZE) { + blake2s_init(&state, BLAKE2S_HASH_SIZE); + blake2s_update(&state, key, keylen); + blake2s_final(&state, x_key); + } else + memcpy(x_key, key, keylen); + + for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) + x_key[i] ^= 0x36; + + blake2s_init(&state, BLAKE2S_HASH_SIZE); + blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); + blake2s_update(&state, in, inlen); + blake2s_final(&state, i_hash); + + for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) + x_key[i] ^= 0x5c ^ 0x36; + + blake2s_init(&state, BLAKE2S_HASH_SIZE); + blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); + blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE); + blake2s_final(&state, i_hash); + + memcpy(out, i_hash, outlen); + memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE); + memzero_explicit(i_hash, BLAKE2S_HASH_SIZE); +} + +#include "../selftest/blake2s.c" + +static bool nosimd __initdata = false; + +#ifndef COMPAT_ZINC_IS_A_MODULE +int __init blake2s_mod_init(void) +#else +static int __init mod_init(void) +#endif +{ + if (!nosimd) + blake2s_fpu_init(); + if (!selftest_run("blake2s", blake2s_selftest, blake2s_nobs, + ARRAY_SIZE(blake2s_nobs))) + return -ENOTRECOVERABLE; + return 0; +} + +#ifdef COMPAT_ZINC_IS_A_MODULE +static void __exit mod_exit(void) +{ +} + +module_param(nosimd, bool, 0); +module_init(mod_init); +module_exit(mod_exit); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("BLAKE2s hash function"); +MODULE_AUTHOR("Jason A. Donenfeld "); +#endif diff --git a/net/wireguard/crypto/zinc/chacha20/chacha20-arm-glue.c b/net/wireguard/crypto/zinc/chacha20/chacha20-arm-glue.c new file mode 100644 index 000000000000..41e2e79abb2b --- /dev/null +++ b/net/wireguard/crypto/zinc/chacha20/chacha20-arm-glue.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include +#include +#if defined(CONFIG_ZINC_ARCH_ARM) +#include +#include +#endif + +asmlinkage void chacha20_arm(u8 *out, const u8 *in, const size_t len, + const u32 key[8], const u32 counter[4]); +asmlinkage void hchacha20_arm(const u32 state[16], u32 out[8]); +asmlinkage void chacha20_neon(u8 *out, const u8 *in, const size_t len, + const u32 key[8], const u32 counter[4]); + +static bool chacha20_use_neon __ro_after_init; +static bool *const chacha20_nobs[] __initconst = { &chacha20_use_neon }; +static void __init chacha20_fpu_init(void) +{ +#if defined(CONFIG_ZINC_ARCH_ARM64) + chacha20_use_neon = cpu_have_named_feature(ASIMD); +#elif defined(CONFIG_ZINC_ARCH_ARM) + switch (read_cpuid_part()) { + case ARM_CPU_PART_CORTEX_A7: + case ARM_CPU_PART_CORTEX_A5: + /* The Cortex-A7 and Cortex-A5 do not perform well with the NEON + * implementation but do incredibly with the scalar one and use + * less power. + */ + break; + default: + chacha20_use_neon = elf_hwcap & HWCAP_NEON; + } +#endif +} + +static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst, + const u8 *src, size_t len, + simd_context_t *simd_context) +{ + /* SIMD disables preemption, so relax after processing each page. */ + BUILD_BUG_ON(PAGE_SIZE < CHACHA20_BLOCK_SIZE || + PAGE_SIZE % CHACHA20_BLOCK_SIZE); + + for (;;) { + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && chacha20_use_neon && + len >= CHACHA20_BLOCK_SIZE * 3 && simd_use(simd_context)) { + const size_t bytes = min_t(size_t, len, PAGE_SIZE); + + chacha20_neon(dst, src, bytes, ctx->key, ctx->counter); + ctx->counter[0] += (bytes + 63) / 64; + len -= bytes; + if (!len) + break; + dst += bytes; + src += bytes; + simd_relax(simd_context); + } else { + chacha20_arm(dst, src, len, ctx->key, ctx->counter); + ctx->counter[0] += (len + 63) / 64; + break; + } + } + + return true; +} + +static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS], + const u8 nonce[HCHACHA20_NONCE_SIZE], + const u8 key[HCHACHA20_KEY_SIZE], + simd_context_t *simd_context) +{ + if (IS_ENABLED(CONFIG_ZINC_ARCH_ARM)) { + u32 x[] = { CHACHA20_CONSTANT_EXPA, + CHACHA20_CONSTANT_ND_3, + CHACHA20_CONSTANT_2_BY, + CHACHA20_CONSTANT_TE_K, + get_unaligned_le32(key + 0), + get_unaligned_le32(key + 4), + get_unaligned_le32(key + 8), + get_unaligned_le32(key + 12), + get_unaligned_le32(key + 16), + get_unaligned_le32(key + 20), + get_unaligned_le32(key + 24), + get_unaligned_le32(key + 28), + get_unaligned_le32(nonce + 0), + get_unaligned_le32(nonce + 4), + get_unaligned_le32(nonce + 8), + get_unaligned_le32(nonce + 12) + }; + hchacha20_arm(x, derived_key); + return true; + } + return false; +} diff --git a/net/wireguard/crypto/zinc/chacha20/chacha20-arm.pl b/net/wireguard/crypto/zinc/chacha20/chacha20-arm.pl new file mode 100644 index 000000000000..6785383ab7bb --- /dev/null +++ b/net/wireguard/crypto/zinc/chacha20/chacha20-arm.pl @@ -0,0 +1,1227 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# +# This code is taken from the OpenSSL project but the author, Andy Polyakov, +# has relicensed it under the licenses specified in the SPDX header above. +# The original headers, including the original license headers, are +# included below for completeness. +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# December 2014 +# +# ChaCha20 for ARMv4. +# +# September 2018 +# +# Improve scalar performance per Eric Biggers' suggestion to eliminate +# separate rotates. This requires b[0..3] and d[0..3] to be maintained +# pre-rotated, hence odd twists prior inner loop and when accumulating +# key material. Since amount of instructions is reduced as result, even +# NEON performance is improved somewhat, most notably by ~9% on low-end +# Cortex-A5/A7. Full unroll was shown to provide even better scalar +# performance on Cortex-A5/A7, naturally at the cost of manyfold size +# increase. We let it be. Oversized code works in benchmarks, but is not +# necessarily optimal in real life, when it's likely to be out-of-cache +# upon entry and evict significant part of cache upon completion. +# +# Performance in cycles per byte out of large buffer. +# +# IALU/gcc-4.4 1xNEON 3xNEON+1xIALU +# +# Cortex-A5 14.2(*)/+160% 21.8 12.9(**) +# Cortex-A8 10.2(*)/+190% 13.9 6.10 +# Cortex-A9 10.8(*)/+150% 14.3 6.50 +# Cortex-A15 11.0/+40% 16.0 4.90 +# Snapdragon S4 13.9(***)/+90% 13.6 4.90 +# +# (*) most "favourable" result for aligned data on little-endian +# processor, result for misaligned data is 10-15% lower; +# (**) pure 4xNEON [with "vertical" layout] was shown to provide ~8% +# better performance on Cortex-A5/A7, but not on others; +# (***) it's 17% slower than original, trade-off is considered +# acceptable, because of improvement on others, specifically +# +36% on Cortex-A5/A7 and +20% on Cortex-A9; + +$flavour = shift; +if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } +else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} } + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} + +sub AUTOLOAD() # thunk [simplified] x86-style perlasm +{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; + my $arg = pop; + $arg = "#$arg" if ($arg*1 eq $arg); + $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; +} + +my @x=map("r$_",(0..7,"x","x","x","x",12,"x",14,"x")); +my @t=map("r$_",(8..11)); + +sub ROUND { +my ($a0,$b0,$c0,$d0)=@_; +my ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0)); +my ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1)); +my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2)); +my $odd = $d0&1; +my ($xc,$xc_) = (@t[0..1]); +my ($xd,$xd_) = $odd ? (@t[2],@x[$d1]) : (@x[$d0],@t[2]); +my @ret; + + # Consider order in which variables are addressed by their + # index: + # + # a b c d + # + # 0 4 8 12 < even round + # 1 5 9 13 + # 2 6 10 14 + # 3 7 11 15 + # 0 5 10 15 < odd round + # 1 6 11 12 + # 2 7 8 13 + # 3 4 9 14 + # + # 'a', 'b' are permanently allocated in registers, @x[0..7], + # while 'c's and pair of 'd's are maintained in memory. If + # you observe 'c' column, you'll notice that pair of 'c's is + # invariant between rounds. This means that we have to reload + # them once per round, in the middle. This is why you'll see + # bunch of 'c' stores and loads in the middle, but none in + # the beginning or end. If you observe 'd' column, you'll + # notice that 15 and 13 are reused in next pair of rounds. + # This is why these two are chosen for offloading to memory, + # to make loads count more. + push @ret,( + "&add (@x[$a0],@x[$a0],@x[$b0],'ror#13')", + "&add (@x[$a1],@x[$a1],@x[$b1],'ror#13')", + "&eor ($xd,@x[$a0],$xd,'ror#24')", + "&eor ($xd_,@x[$a1],$xd_,'ror#24')", + + "&add ($xc,$xc,$xd,'ror#16')", + "&add ($xc_,$xc_,$xd_,'ror#16')", + "&eor (@x[$b0],$xc, @x[$b0],'ror#13')", + "&eor (@x[$b1],$xc_,@x[$b1],'ror#13')", + + "&add (@x[$a0],@x[$a0],@x[$b0],'ror#20')", + "&add (@x[$a1],@x[$a1],@x[$b1],'ror#20')", + "&eor ($xd,@x[$a0],$xd,'ror#16')", + "&eor ($xd_,@x[$a1],$xd_,'ror#16')" ); + push @ret,( + "&str ($xd,'[sp,#4*(16+$d0)]')" ) if ($odd); + push @ret,( + "&add ($xc,$xc,$xd,'ror#24')" ); + push @ret,( + "&ldr ($xd,'[sp,#4*(16+$d2)]')" ) if ($odd); + push @ret,( + "&str ($xd_,'[sp,#4*(16+$d1)]')" ) if (!$odd); + push @ret,( + "&add ($xc_,$xc_,$xd_,'ror#24')" ); + push @ret,( + "&ldr ($xd_,'[sp,#4*(16+$d3)]')" ) if (!$odd); + push @ret,( + "&str ($xc,'[sp,#4*(16+$c0)]')", + "&eor (@x[$b0],@x[$b0],$xc,'ror#12')", + "&str ($xc_,'[sp,#4*(16+$c1)]')", + "&eor (@x[$b1],@x[$b1],$xc_,'ror#12')" ); + + $xd=@x[$d2] if (!$odd); + $xd_=@x[$d3] if ($odd); + push @ret,( + "&ldr ($xc,'[sp,#4*(16+$c2)]')", + "&add (@x[$a2],@x[$a2],@x[$b2],'ror#13')", + "&ldr ($xc_,'[sp,#4*(16+$c3)]')", + "&add (@x[$a3],@x[$a3],@x[$b3],'ror#13')", + "&eor ($xd,@x[$a2],$xd,'ror#24')", + "&eor ($xd_,@x[$a3],$xd_,'ror#24')", + + "&add ($xc,$xc,$xd,'ror#16')", + "&add ($xc_,$xc_,$xd_,'ror#16')", + "&eor (@x[$b2],$xc, @x[$b2],'ror#13')", + "&eor (@x[$b3],$xc_,@x[$b3],'ror#13')", + + "&add (@x[$a2],@x[$a2],@x[$b2],'ror#20')", + "&add (@x[$a3],@x[$a3],@x[$b3],'ror#20')", + "&eor ($xd,@x[$a2],$xd,'ror#16')", + "&eor ($xd_,@x[$a3],$xd_,'ror#16')", + + "&add ($xc,$xc,$xd,'ror#24')", + "&add ($xc_,$xc_,$xd_,'ror#24')", + "&eor (@x[$b2],@x[$b2],$xc,'ror#12')", + "&eor (@x[$b3],@x[$b3],$xc_,'ror#12')" ); + + @ret; +} + +$code.=<<___; +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__ +# define ChaCha20_ctr32 chacha20_arm_cryptogams +# define ChaCha20_neon chacha20_neon +#endif + +.text +#if defined(__thumb2__) || defined(__clang__) +.syntax unified +# define ldrhsb ldrbhs +#endif +#if defined(__thumb2__) +.thumb +#else +.code 32 +#endif + +.align 5 +.Lsigma: +.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 @ endian-neutral +.Lone: +.long 1,0,0,0 +.Lrot8: +.long 0x02010003,0x06050407 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-.LChaCha20_ctr32 +#else +.word -1 +#endif + +.globl ChaCha20_ctr32 +.type ChaCha20_ctr32,%function +.align 5 +ChaCha20_ctr32: +.LChaCha20_ctr32: + ldr r12,[sp,#0] @ pull pointer to counter and nonce + stmdb sp!,{r0-r2,r4-r11,lr} +#if __ARM_ARCH__<7 && !defined(__thumb2__) + sub r14,pc,#16 @ ChaCha20_ctr32 +#else + adr r14,.LChaCha20_ctr32 +#endif + cmp r2,#0 @ len==0? +#ifdef __thumb2__ + itt eq +#endif + addeq sp,sp,#4*3 + beq .Lno_data +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + cmp r2,#192 @ test len + bls .Lshort + ldr r4,[r14,#-24] + ldr r4,[r14,r4] +# ifdef __APPLE__ + ldr r4,[r4] +# endif + tst r4,#ARMV7_NEON + bne .LChaCha20_neon +.Lshort: +#endif + ldmia r12,{r4-r7} @ load counter and nonce + sub sp,sp,#4*(16) @ off-load area + sub r14,r14,#64 @ .Lsigma + stmdb sp!,{r4-r7} @ copy counter and nonce + ldmia r3,{r4-r11} @ load key + ldmia r14,{r0-r3} @ load sigma + stmdb sp!,{r4-r11} @ copy key + stmdb sp!,{r0-r3} @ copy sigma + str r10,[sp,#4*(16+10)] @ off-load "@x[10]" + str r11,[sp,#4*(16+11)] @ off-load "@x[11]" + b .Loop_outer_enter + +.align 4 +.Loop_outer: + ldmia sp,{r0-r9} @ load key material + str @t[3],[sp,#4*(32+2)] @ save len + str r12, [sp,#4*(32+1)] @ save inp + str r14, [sp,#4*(32+0)] @ save out +.Loop_outer_enter: + ldr @t[3], [sp,#4*(15)] + mov @x[4],@x[4],ror#19 @ twist b[0..3] + ldr @x[12],[sp,#4*(12)] @ modulo-scheduled load + mov @x[5],@x[5],ror#19 + ldr @t[2], [sp,#4*(13)] + mov @x[6],@x[6],ror#19 + ldr @x[14],[sp,#4*(14)] + mov @x[7],@x[7],ror#19 + mov @t[3],@t[3],ror#8 @ twist d[0..3] + mov @x[12],@x[12],ror#8 + mov @t[2],@t[2],ror#8 + mov @x[14],@x[14],ror#8 + str @t[3], [sp,#4*(16+15)] + mov @t[3],#10 + b .Loop + +.align 4 +.Loop: + subs @t[3],@t[3],#1 +___ + foreach (&ROUND(0, 4, 8,12)) { eval; } + foreach (&ROUND(0, 5,10,15)) { eval; } +$code.=<<___; + bne .Loop + + ldr @t[3],[sp,#4*(32+2)] @ load len + + str @t[0], [sp,#4*(16+8)] @ modulo-scheduled store + str @t[1], [sp,#4*(16+9)] + str @x[12],[sp,#4*(16+12)] + str @t[2], [sp,#4*(16+13)] + str @x[14],[sp,#4*(16+14)] + + @ at this point we have first half of 512-bit result in + @ @x[0-7] and second half at sp+4*(16+8) + + cmp @t[3],#64 @ done yet? +#ifdef __thumb2__ + itete lo +#endif + addlo r12,sp,#4*(0) @ shortcut or ... + ldrhs r12,[sp,#4*(32+1)] @ ... load inp + addlo r14,sp,#4*(0) @ shortcut or ... + ldrhs r14,[sp,#4*(32+0)] @ ... load out + + ldr @t[0],[sp,#4*(0)] @ load key material + ldr @t[1],[sp,#4*(1)] + +#if __ARM_ARCH__>=6 || !defined(__ARMEB__) +# if __ARM_ARCH__<7 + orr @t[2],r12,r14 + tst @t[2],#3 @ are input and output aligned? + ldr @t[2],[sp,#4*(2)] + bne .Lunaligned + cmp @t[3],#64 @ restore flags +# else + ldr @t[2],[sp,#4*(2)] +# endif + ldr @t[3],[sp,#4*(3)] + + add @x[0],@x[0],@t[0] @ accumulate key material + add @x[1],@x[1],@t[1] +# ifdef __thumb2__ + itt hs +# endif + ldrhs @t[0],[r12],#16 @ load input + ldrhs @t[1],[r12,#-12] + + add @x[2],@x[2],@t[2] + add @x[3],@x[3],@t[3] +# ifdef __thumb2__ + itt hs +# endif + ldrhs @t[2],[r12,#-8] + ldrhs @t[3],[r12,#-4] +# if __ARM_ARCH__>=6 && defined(__ARMEB__) + rev @x[0],@x[0] + rev @x[1],@x[1] + rev @x[2],@x[2] + rev @x[3],@x[3] +# endif +# ifdef __thumb2__ + itt hs +# endif + eorhs @x[0],@x[0],@t[0] @ xor with input + eorhs @x[1],@x[1],@t[1] + add @t[0],sp,#4*(4) + str @x[0],[r14],#16 @ store output +# ifdef __thumb2__ + itt hs +# endif + eorhs @x[2],@x[2],@t[2] + eorhs @x[3],@x[3],@t[3] + ldmia @t[0],{@t[0]-@t[3]} @ load key material + str @x[1],[r14,#-12] + str @x[2],[r14,#-8] + str @x[3],[r14,#-4] + + add @x[4],@t[0],@x[4],ror#13 @ accumulate key material + add @x[5],@t[1],@x[5],ror#13 +# ifdef __thumb2__ + itt hs +# endif + ldrhs @t[0],[r12],#16 @ load input + ldrhs @t[1],[r12,#-12] + add @x[6],@t[2],@x[6],ror#13 + add @x[7],@t[3],@x[7],ror#13 +# ifdef __thumb2__ + itt hs +# endif + ldrhs @t[2],[r12,#-8] + ldrhs @t[3],[r12,#-4] +# if __ARM_ARCH__>=6 && defined(__ARMEB__) + rev @x[4],@x[4] + rev @x[5],@x[5] + rev @x[6],@x[6] + rev @x[7],@x[7] +# endif +# ifdef __thumb2__ + itt hs +# endif + eorhs @x[4],@x[4],@t[0] + eorhs @x[5],@x[5],@t[1] + add @t[0],sp,#4*(8) + str @x[4],[r14],#16 @ store output +# ifdef __thumb2__ + itt hs +# endif + eorhs @x[6],@x[6],@t[2] + eorhs @x[7],@x[7],@t[3] + str @x[5],[r14,#-12] + ldmia @t[0],{@t[0]-@t[3]} @ load key material + str @x[6],[r14,#-8] + add @x[0],sp,#4*(16+8) + str @x[7],[r14,#-4] + + ldmia @x[0],{@x[0]-@x[7]} @ load second half + + add @x[0],@x[0],@t[0] @ accumulate key material + add @x[1],@x[1],@t[1] +# ifdef __thumb2__ + itt hs +# endif + ldrhs @t[0],[r12],#16 @ load input + ldrhs @t[1],[r12,#-12] +# ifdef __thumb2__ + itt hi +# endif + strhi @t[2],[sp,#4*(16+10)] @ copy "@x[10]" while at it + strhi @t[3],[sp,#4*(16+11)] @ copy "@x[11]" while at it + add @x[2],@x[2],@t[2] + add @x[3],@x[3],@t[3] +# ifdef __thumb2__ + itt hs +# endif + ldrhs @t[2],[r12,#-8] + ldrhs @t[3],[r12,#-4] +# if __ARM_ARCH__>=6 && defined(__ARMEB__) + rev @x[0],@x[0] + rev @x[1],@x[1] + rev @x[2],@x[2] + rev @x[3],@x[3] +# endif +# ifdef __thumb2__ + itt hs +# endif + eorhs @x[0],@x[0],@t[0] + eorhs @x[1],@x[1],@t[1] + add @t[0],sp,#4*(12) + str @x[0],[r14],#16 @ store output +# ifdef __thumb2__ + itt hs +# endif + eorhs @x[2],@x[2],@t[2] + eorhs @x[3],@x[3],@t[3] + str @x[1],[r14,#-12] + ldmia @t[0],{@t[0]-@t[3]} @ load key material + str @x[2],[r14,#-8] + str @x[3],[r14,#-4] + + add @x[4],@t[0],@x[4],ror#24 @ accumulate key material + add @x[5],@t[1],@x[5],ror#24 +# ifdef __thumb2__ + itt hi +# endif + addhi @t[0],@t[0],#1 @ next counter value + strhi @t[0],[sp,#4*(12)] @ save next counter value +# ifdef __thumb2__ + itt hs +# endif + ldrhs @t[0],[r12],#16 @ load input + ldrhs @t[1],[r12,#-12] + add @x[6],@t[2],@x[6],ror#24 + add @x[7],@t[3],@x[7],ror#24 +# ifdef __thumb2__ + itt hs +# endif + ldrhs @t[2],[r12,#-8] + ldrhs @t[3],[r12,#-4] +# if __ARM_ARCH__>=6 && defined(__ARMEB__) + rev @x[4],@x[4] + rev @x[5],@x[5] + rev @x[6],@x[6] + rev @x[7],@x[7] +# endif +# ifdef __thumb2__ + itt hs +# endif + eorhs @x[4],@x[4],@t[0] + eorhs @x[5],@x[5],@t[1] +# ifdef __thumb2__ + it ne +# endif + ldrne @t[0],[sp,#4*(32+2)] @ re-load len +# ifdef __thumb2__ + itt hs +# endif + eorhs @x[6],@x[6],@t[2] + eorhs @x[7],@x[7],@t[3] + str @x[4],[r14],#16 @ store output + str @x[5],[r14,#-12] +# ifdef __thumb2__ + it hs +# endif + subhs @t[3],@t[0],#64 @ len-=64 + str @x[6],[r14,#-8] + str @x[7],[r14,#-4] + bhi .Loop_outer + + beq .Ldone +# if __ARM_ARCH__<7 + b .Ltail + +.align 4 +.Lunaligned: @ unaligned endian-neutral path + cmp @t[3],#64 @ restore flags +# endif +#endif +#if __ARM_ARCH__<7 + ldr @t[3],[sp,#4*(3)] +___ +for ($i=0;$i<16;$i+=4) { +my $j=$i&0x7; +my $twist=""; +if ($i==4) { $twist = ",ror#13"; } +elsif ($i==12) { $twist = ",ror#24"; } + +$code.=<<___ if ($i==4); + add @x[0],sp,#4*(16+8) +___ +$code.=<<___ if ($i==8); + ldmia @x[0],{@x[0]-@x[7]} @ load second half +# ifdef __thumb2__ + itt hi +# endif + strhi @t[2],[sp,#4*(16+10)] @ copy "@x[10]" + strhi @t[3],[sp,#4*(16+11)] @ copy "@x[11]" +___ +$code.=<<___; + add @x[$j+0],@t[0],@x[$j+0]$twist @ accumulate key material +___ +$code.=<<___ if ($i==12); +# ifdef __thumb2__ + itt hi +# endif + addhi @t[0],@t[0],#1 @ next counter value + strhi @t[0],[sp,#4*(12)] @ save next counter value +___ +$code.=<<___; + add @x[$j+1],@t[1],@x[$j+1]$twist + add @x[$j+2],@t[2],@x[$j+2]$twist +# ifdef __thumb2__ + itete lo +# endif + eorlo @t[0],@t[0],@t[0] @ zero or ... + ldrhsb @t[0],[r12],#16 @ ... load input + eorlo @t[1],@t[1],@t[1] + ldrhsb @t[1],[r12,#-12] + + add @x[$j+3],@t[3],@x[$j+3]$twist +# ifdef __thumb2__ + itete lo +# endif + eorlo @t[2],@t[2],@t[2] + ldrhsb @t[2],[r12,#-8] + eorlo @t[3],@t[3],@t[3] + ldrhsb @t[3],[r12,#-4] + + eor @x[$j+0],@t[0],@x[$j+0] @ xor with input (or zero) + eor @x[$j+1],@t[1],@x[$j+1] +# ifdef __thumb2__ + itt hs +# endif + ldrhsb @t[0],[r12,#-15] @ load more input + ldrhsb @t[1],[r12,#-11] + eor @x[$j+2],@t[2],@x[$j+2] + strb @x[$j+0],[r14],#16 @ store output + eor @x[$j+3],@t[3],@x[$j+3] +# ifdef __thumb2__ + itt hs +# endif + ldrhsb @t[2],[r12,#-7] + ldrhsb @t[3],[r12,#-3] + strb @x[$j+1],[r14,#-12] + eor @x[$j+0],@t[0],@x[$j+0],lsr#8 + strb @x[$j+2],[r14,#-8] + eor @x[$j+1],@t[1],@x[$j+1],lsr#8 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb @t[0],[r12,#-14] @ load more input + ldrhsb @t[1],[r12,#-10] + strb @x[$j+3],[r14,#-4] + eor @x[$j+2],@t[2],@x[$j+2],lsr#8 + strb @x[$j+0],[r14,#-15] + eor @x[$j+3],@t[3],@x[$j+3],lsr#8 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb @t[2],[r12,#-6] + ldrhsb @t[3],[r12,#-2] + strb @x[$j+1],[r14,#-11] + eor @x[$j+0],@t[0],@x[$j+0],lsr#8 + strb @x[$j+2],[r14,#-7] + eor @x[$j+1],@t[1],@x[$j+1],lsr#8 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb @t[0],[r12,#-13] @ load more input + ldrhsb @t[1],[r12,#-9] + strb @x[$j+3],[r14,#-3] + eor @x[$j+2],@t[2],@x[$j+2],lsr#8 + strb @x[$j+0],[r14,#-14] + eor @x[$j+3],@t[3],@x[$j+3],lsr#8 +# ifdef __thumb2__ + itt hs +# endif + ldrhsb @t[2],[r12,#-5] + ldrhsb @t[3],[r12,#-1] + strb @x[$j+1],[r14,#-10] + strb @x[$j+2],[r14,#-6] + eor @x[$j+0],@t[0],@x[$j+0],lsr#8 + strb @x[$j+3],[r14,#-2] + eor @x[$j+1],@t[1],@x[$j+1],lsr#8 + strb @x[$j+0],[r14,#-13] + eor @x[$j+2],@t[2],@x[$j+2],lsr#8 + strb @x[$j+1],[r14,#-9] + eor @x[$j+3],@t[3],@x[$j+3],lsr#8 + strb @x[$j+2],[r14,#-5] + strb @x[$j+3],[r14,#-1] +___ +$code.=<<___ if ($i<12); + add @t[0],sp,#4*(4+$i) + ldmia @t[0],{@t[0]-@t[3]} @ load key material +___ +} +$code.=<<___; +# ifdef __thumb2__ + it ne +# endif + ldrne @t[0],[sp,#4*(32+2)] @ re-load len +# ifdef __thumb2__ + it hs +# endif + subhs @t[3],@t[0],#64 @ len-=64 + bhi .Loop_outer + + beq .Ldone +#endif + +.Ltail: + ldr r12,[sp,#4*(32+1)] @ load inp + add @t[1],sp,#4*(0) + ldr r14,[sp,#4*(32+0)] @ load out + +.Loop_tail: + ldrb @t[2],[@t[1]],#1 @ read buffer on stack + ldrb @t[3],[r12],#1 @ read input + subs @t[0],@t[0],#1 + eor @t[3],@t[3],@t[2] + strb @t[3],[r14],#1 @ store output + bne .Loop_tail + +.Ldone: + add sp,sp,#4*(32+3) +.Lno_data: +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .long 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size ChaCha20_ctr32,.-ChaCha20_ctr32 +___ + +{{{ +my ($a0,$b0,$c0,$d0,$a1,$b1,$c1,$d1,$a2,$b2,$c2,$d2,$t0,$t1,$t2,$t3) = + map("q$_",(0..15)); + +# This can replace vshr-by-24+vsli-by-8. It gives ~3% improvement on +# Cortex-A5/A7, but hurts Cortex-A9 by 5% and Snapdragon S4 by 14%! +sub vperm() +{ my ($dst,$src,$tbl) = @_; + $code .= " vtbl.8 $dst#lo,{$src#lo},$tbl#lo\n"; + $code .= " vtbl.8 $dst#hi,{$src#hi},$tbl#lo\n"; +} + +sub NEONROUND { +my $odd = pop; +my ($a,$b,$c,$d,$t)=@_; + + ( + "&vadd_i32 ($a,$a,$b)", + "&veor ($d,$d,$a)", + "&vrev32_16 ($d,$d)", # vrot ($d,16) + + "&vadd_i32 ($c,$c,$d)", + "&veor ($t,$b,$c)", + "&vshr_u32 ($b,$t,20)", + "&vsli_32 ($b,$t,12)", + + "&vadd_i32 ($a,$a,$b)", + "&veor ($t,$d,$a)", + "&vshr_u32 ($d,$t,24)", + "&vsli_32 ($d,$t,8)", + #"&vperm ($d,$t,$t3)", + + "&vadd_i32 ($c,$c,$d)", + "&veor ($t,$b,$c)", + "&vshr_u32 ($b,$t,25)", + "&vsli_32 ($b,$t,7)", + + "&vext_8 ($a,$a,$a,$odd?4:12)", + "&vext_8 ($d,$d,$d,8)", + "&vext_8 ($c,$c,$c,$odd?12:4)" + ); +} + +$code.=<<___; +#if (defined(__KERNEL__) && defined(CONFIG_KERNEL_MODE_NEON)) || (!defined(__KERNEL__) && __ARM_MAX_ARCH__>=7) +.arch armv7-a +.fpu neon + +# ifdef __KERNEL__ +.globl ChaCha20_neon +@ For optimal performance it's appropriate for caller to enforce +@ minimum input length, 193 bytes is suggested. +# endif +.type ChaCha20_neon,%function +.align 5 +ChaCha20_neon: + ldr r12,[sp,#0] @ pull pointer to counter and nonce + stmdb sp!,{r0-r2,r4-r11,lr} +.LChaCha20_neon: + adr r14,.Lsigma + vstmdb sp!,{d8-d15} @ ABI spec says so + stmdb sp!,{r0-r3} + + vld1.32 {$b0-$c0},[r3] @ load key + ldmia r3,{r4-r11} @ load key + + sub sp,sp,#4*(16+16) + vld1.32 {$d0},[r12] @ load counter and nonce + add r12,sp,#4*8 + ldmia r14,{r0-r3} @ load sigma + vld1.32 {$a0},[r14]! @ load sigma + vld1.32 {$t0},[r14]! @ one + @ vld1.32 {$t3#lo},[r14] @ rot8 + vst1.32 {$c0-$d0},[r12] @ copy 1/2key|counter|nonce + vst1.32 {$a0-$b0},[sp] @ copy sigma|1/2key + + str r10,[sp,#4*(16+10)] @ off-load "@x[10]" + str r11,[sp,#4*(16+11)] @ off-load "@x[11]" + vshl.i32 $t1#lo,$t0#lo,#1 @ two + vstr $t0#lo,[sp,#4*(16+0)] + vshl.i32 $t2#lo,$t0#lo,#2 @ four + vstr $t1#lo,[sp,#4*(16+2)] + vmov $a1,$a0 + vstr $t2#lo,[sp,#4*(16+4)] + vmov $a2,$a0 + @ vstr $t3#lo,[sp,#4*(16+6)] + vmov $b1,$b0 + vmov $b2,$b0 + b .Loop_neon_enter + +.align 4 +.Loop_neon_outer: + ldmia sp,{r0-r9} @ load key material + cmp @t[3],#64*2 @ if len<=64*2 + bls .Lbreak_neon @ switch to integer-only + @ vldr $t3#lo,[sp,#4*(16+6)] @ rot8 + vmov $a1,$a0 + str @t[3],[sp,#4*(32+2)] @ save len + vmov $a2,$a0 + str r12, [sp,#4*(32+1)] @ save inp + vmov $b1,$b0 + str r14, [sp,#4*(32+0)] @ save out + vmov $b2,$b0 +.Loop_neon_enter: + ldr @t[3], [sp,#4*(15)] + mov @x[4],@x[4],ror#19 @ twist b[0..3] + vadd.i32 $d1,$d0,$t0 @ counter+1 + ldr @x[12],[sp,#4*(12)] @ modulo-scheduled load + mov @x[5],@x[5],ror#19 + vmov $c1,$c0 + ldr @t[2], [sp,#4*(13)] + mov @x[6],@x[6],ror#19 + vmov $c2,$c0 + ldr @x[14],[sp,#4*(14)] + mov @x[7],@x[7],ror#19 + vadd.i32 $d2,$d1,$t0 @ counter+2 + add @x[12],@x[12],#3 @ counter+3 + mov @t[3],@t[3],ror#8 @ twist d[0..3] + mov @x[12],@x[12],ror#8 + mov @t[2],@t[2],ror#8 + mov @x[14],@x[14],ror#8 + str @t[3], [sp,#4*(16+15)] + mov @t[3],#10 + b .Loop_neon + +.align 4 +.Loop_neon: + subs @t[3],@t[3],#1 +___ + my @thread0=&NEONROUND($a0,$b0,$c0,$d0,$t0,0); + my @thread1=&NEONROUND($a1,$b1,$c1,$d1,$t1,0); + my @thread2=&NEONROUND($a2,$b2,$c2,$d2,$t2,0); + my @thread3=&ROUND(0,4,8,12); + + foreach (@thread0) { + eval; eval(shift(@thread3)); + eval(shift(@thread1)); eval(shift(@thread3)); + eval(shift(@thread2)); eval(shift(@thread3)); + } + + @thread0=&NEONROUND($a0,$b0,$c0,$d0,$t0,1); + @thread1=&NEONROUND($a1,$b1,$c1,$d1,$t1,1); + @thread2=&NEONROUND($a2,$b2,$c2,$d2,$t2,1); + @thread3=&ROUND(0,5,10,15); + + foreach (@thread0) { + eval; eval(shift(@thread3)); + eval(shift(@thread1)); eval(shift(@thread3)); + eval(shift(@thread2)); eval(shift(@thread3)); + } +$code.=<<___; + bne .Loop_neon + + add @t[3],sp,#32 + vld1.32 {$t0-$t1},[sp] @ load key material + vld1.32 {$t2-$t3},[@t[3]] + + ldr @t[3],[sp,#4*(32+2)] @ load len + + str @t[0], [sp,#4*(16+8)] @ modulo-scheduled store + str @t[1], [sp,#4*(16+9)] + str @x[12],[sp,#4*(16+12)] + str @t[2], [sp,#4*(16+13)] + str @x[14],[sp,#4*(16+14)] + + @ at this point we have first half of 512-bit result in + @ @x[0-7] and second half at sp+4*(16+8) + + ldr r12,[sp,#4*(32+1)] @ load inp + ldr r14,[sp,#4*(32+0)] @ load out + + vadd.i32 $a0,$a0,$t0 @ accumulate key material + vadd.i32 $a1,$a1,$t0 + vadd.i32 $a2,$a2,$t0 + vldr $t0#lo,[sp,#4*(16+0)] @ one + + vadd.i32 $b0,$b0,$t1 + vadd.i32 $b1,$b1,$t1 + vadd.i32 $b2,$b2,$t1 + vldr $t1#lo,[sp,#4*(16+2)] @ two + + vadd.i32 $c0,$c0,$t2 + vadd.i32 $c1,$c1,$t2 + vadd.i32 $c2,$c2,$t2 + vadd.i32 $d1#lo,$d1#lo,$t0#lo @ counter+1 + vadd.i32 $d2#lo,$d2#lo,$t1#lo @ counter+2 + + vadd.i32 $d0,$d0,$t3 + vadd.i32 $d1,$d1,$t3 + vadd.i32 $d2,$d2,$t3 + + cmp @t[3],#64*4 + blo .Ltail_neon + + vld1.8 {$t0-$t1},[r12]! @ load input + mov @t[3],sp + vld1.8 {$t2-$t3},[r12]! + veor $a0,$a0,$t0 @ xor with input + veor $b0,$b0,$t1 + vld1.8 {$t0-$t1},[r12]! + veor $c0,$c0,$t2 + veor $d0,$d0,$t3 + vld1.8 {$t2-$t3},[r12]! + + veor $a1,$a1,$t0 + vst1.8 {$a0-$b0},[r14]! @ store output + veor $b1,$b1,$t1 + vld1.8 {$t0-$t1},[r12]! + veor $c1,$c1,$t2 + vst1.8 {$c0-$d0},[r14]! + veor $d1,$d1,$t3 + vld1.8 {$t2-$t3},[r12]! + + veor $a2,$a2,$t0 + vld1.32 {$a0-$b0},[@t[3]]! @ load for next iteration + veor $t0#hi,$t0#hi,$t0#hi + vldr $t0#lo,[sp,#4*(16+4)] @ four + veor $b2,$b2,$t1 + vld1.32 {$c0-$d0},[@t[3]] + veor $c2,$c2,$t2 + vst1.8 {$a1-$b1},[r14]! + veor $d2,$d2,$t3 + vst1.8 {$c1-$d1},[r14]! + + vadd.i32 $d0#lo,$d0#lo,$t0#lo @ next counter value + vldr $t0#lo,[sp,#4*(16+0)] @ one + + ldmia sp,{@t[0]-@t[3]} @ load key material + add @x[0],@x[0],@t[0] @ accumulate key material + ldr @t[0],[r12],#16 @ load input + vst1.8 {$a2-$b2},[r14]! + add @x[1],@x[1],@t[1] + ldr @t[1],[r12,#-12] + vst1.8 {$c2-$d2},[r14]! + add @x[2],@x[2],@t[2] + ldr @t[2],[r12,#-8] + add @x[3],@x[3],@t[3] + ldr @t[3],[r12,#-4] +# ifdef __ARMEB__ + rev @x[0],@x[0] + rev @x[1],@x[1] + rev @x[2],@x[2] + rev @x[3],@x[3] +# endif + eor @x[0],@x[0],@t[0] @ xor with input + add @t[0],sp,#4*(4) + eor @x[1],@x[1],@t[1] + str @x[0],[r14],#16 @ store output + eor @x[2],@x[2],@t[2] + str @x[1],[r14,#-12] + eor @x[3],@x[3],@t[3] + ldmia @t[0],{@t[0]-@t[3]} @ load key material + str @x[2],[r14,#-8] + str @x[3],[r14,#-4] + + add @x[4],@t[0],@x[4],ror#13 @ accumulate key material + ldr @t[0],[r12],#16 @ load input + add @x[5],@t[1],@x[5],ror#13 + ldr @t[1],[r12,#-12] + add @x[6],@t[2],@x[6],ror#13 + ldr @t[2],[r12,#-8] + add @x[7],@t[3],@x[7],ror#13 + ldr @t[3],[r12,#-4] +# ifdef __ARMEB__ + rev @x[4],@x[4] + rev @x[5],@x[5] + rev @x[6],@x[6] + rev @x[7],@x[7] +# endif + eor @x[4],@x[4],@t[0] + add @t[0],sp,#4*(8) + eor @x[5],@x[5],@t[1] + str @x[4],[r14],#16 @ store output + eor @x[6],@x[6],@t[2] + str @x[5],[r14,#-12] + eor @x[7],@x[7],@t[3] + ldmia @t[0],{@t[0]-@t[3]} @ load key material + str @x[6],[r14,#-8] + add @x[0],sp,#4*(16+8) + str @x[7],[r14,#-4] + + ldmia @x[0],{@x[0]-@x[7]} @ load second half + + add @x[0],@x[0],@t[0] @ accumulate key material + ldr @t[0],[r12],#16 @ load input + add @x[1],@x[1],@t[1] + ldr @t[1],[r12,#-12] +# ifdef __thumb2__ + it hi +# endif + strhi @t[2],[sp,#4*(16+10)] @ copy "@x[10]" while at it + add @x[2],@x[2],@t[2] + ldr @t[2],[r12,#-8] +# ifdef __thumb2__ + it hi +# endif + strhi @t[3],[sp,#4*(16+11)] @ copy "@x[11]" while at it + add @x[3],@x[3],@t[3] + ldr @t[3],[r12,#-4] +# ifdef __ARMEB__ + rev @x[0],@x[0] + rev @x[1],@x[1] + rev @x[2],@x[2] + rev @x[3],@x[3] +# endif + eor @x[0],@x[0],@t[0] + add @t[0],sp,#4*(12) + eor @x[1],@x[1],@t[1] + str @x[0],[r14],#16 @ store output + eor @x[2],@x[2],@t[2] + str @x[1],[r14,#-12] + eor @x[3],@x[3],@t[3] + ldmia @t[0],{@t[0]-@t[3]} @ load key material + str @x[2],[r14,#-8] + str @x[3],[r14,#-4] + + add @x[4],@t[0],@x[4],ror#24 @ accumulate key material + add @t[0],@t[0],#4 @ next counter value + add @x[5],@t[1],@x[5],ror#24 + str @t[0],[sp,#4*(12)] @ save next counter value + ldr @t[0],[r12],#16 @ load input + add @x[6],@t[2],@x[6],ror#24 + add @x[4],@x[4],#3 @ counter+3 + ldr @t[1],[r12,#-12] + add @x[7],@t[3],@x[7],ror#24 + ldr @t[2],[r12,#-8] + ldr @t[3],[r12,#-4] +# ifdef __ARMEB__ + rev @x[4],@x[4] + rev @x[5],@x[5] + rev @x[6],@x[6] + rev @x[7],@x[7] +# endif + eor @x[4],@x[4],@t[0] +# ifdef __thumb2__ + it hi +# endif + ldrhi @t[0],[sp,#4*(32+2)] @ re-load len + eor @x[5],@x[5],@t[1] + eor @x[6],@x[6],@t[2] + str @x[4],[r14],#16 @ store output + eor @x[7],@x[7],@t[3] + str @x[5],[r14,#-12] + sub @t[3],@t[0],#64*4 @ len-=64*4 + str @x[6],[r14,#-8] + str @x[7],[r14,#-4] + bhi .Loop_neon_outer + + b .Ldone_neon + +.align 4 +.Lbreak_neon: + @ harmonize NEON and integer-only stack frames: load data + @ from NEON frame, but save to integer-only one; distance + @ between the two is 4*(32+4+16-32)=4*(20). + + str @t[3], [sp,#4*(20+32+2)] @ save len + add @t[3],sp,#4*(32+4) + str r12, [sp,#4*(20+32+1)] @ save inp + str r14, [sp,#4*(20+32+0)] @ save out + + ldr @x[12],[sp,#4*(16+10)] + ldr @x[14],[sp,#4*(16+11)] + vldmia @t[3],{d8-d15} @ fulfill ABI requirement + str @x[12],[sp,#4*(20+16+10)] @ copy "@x[10]" + str @x[14],[sp,#4*(20+16+11)] @ copy "@x[11]" + + ldr @t[3], [sp,#4*(15)] + mov @x[4],@x[4],ror#19 @ twist b[0..3] + ldr @x[12],[sp,#4*(12)] @ modulo-scheduled load + mov @x[5],@x[5],ror#19 + ldr @t[2], [sp,#4*(13)] + mov @x[6],@x[6],ror#19 + ldr @x[14],[sp,#4*(14)] + mov @x[7],@x[7],ror#19 + mov @t[3],@t[3],ror#8 @ twist d[0..3] + mov @x[12],@x[12],ror#8 + mov @t[2],@t[2],ror#8 + mov @x[14],@x[14],ror#8 + str @t[3], [sp,#4*(20+16+15)] + add @t[3],sp,#4*(20) + vst1.32 {$a0-$b0},[@t[3]]! @ copy key + add sp,sp,#4*(20) @ switch frame + vst1.32 {$c0-$d0},[@t[3]] + mov @t[3],#10 + b .Loop @ go integer-only + +.align 4 +.Ltail_neon: + cmp @t[3],#64*3 + bhs .L192_or_more_neon + cmp @t[3],#64*2 + bhs .L128_or_more_neon + cmp @t[3],#64*1 + bhs .L64_or_more_neon + + add @t[0],sp,#4*(8) + vst1.8 {$a0-$b0},[sp] + add @t[2],sp,#4*(0) + vst1.8 {$c0-$d0},[@t[0]] + b .Loop_tail_neon + +.align 4 +.L64_or_more_neon: + vld1.8 {$t0-$t1},[r12]! + vld1.8 {$t2-$t3},[r12]! + veor $a0,$a0,$t0 + veor $b0,$b0,$t1 + veor $c0,$c0,$t2 + veor $d0,$d0,$t3 + vst1.8 {$a0-$b0},[r14]! + vst1.8 {$c0-$d0},[r14]! + + beq .Ldone_neon + + add @t[0],sp,#4*(8) + vst1.8 {$a1-$b1},[sp] + add @t[2],sp,#4*(0) + vst1.8 {$c1-$d1},[@t[0]] + sub @t[3],@t[3],#64*1 @ len-=64*1 + b .Loop_tail_neon + +.align 4 +.L128_or_more_neon: + vld1.8 {$t0-$t1},[r12]! + vld1.8 {$t2-$t3},[r12]! + veor $a0,$a0,$t0 + veor $b0,$b0,$t1 + vld1.8 {$t0-$t1},[r12]! + veor $c0,$c0,$t2 + veor $d0,$d0,$t3 + vld1.8 {$t2-$t3},[r12]! + + veor $a1,$a1,$t0 + veor $b1,$b1,$t1 + vst1.8 {$a0-$b0},[r14]! + veor $c1,$c1,$t2 + vst1.8 {$c0-$d0},[r14]! + veor $d1,$d1,$t3 + vst1.8 {$a1-$b1},[r14]! + vst1.8 {$c1-$d1},[r14]! + + beq .Ldone_neon + + add @t[0],sp,#4*(8) + vst1.8 {$a2-$b2},[sp] + add @t[2],sp,#4*(0) + vst1.8 {$c2-$d2},[@t[0]] + sub @t[3],@t[3],#64*2 @ len-=64*2 + b .Loop_tail_neon + +.align 4 +.L192_or_more_neon: + vld1.8 {$t0-$t1},[r12]! + vld1.8 {$t2-$t3},[r12]! + veor $a0,$a0,$t0 + veor $b0,$b0,$t1 + vld1.8 {$t0-$t1},[r12]! + veor $c0,$c0,$t2 + veor $d0,$d0,$t3 + vld1.8 {$t2-$t3},[r12]! + + veor $a1,$a1,$t0 + veor $b1,$b1,$t1 + vld1.8 {$t0-$t1},[r12]! + veor $c1,$c1,$t2 + vst1.8 {$a0-$b0},[r14]! + veor $d1,$d1,$t3 + vld1.8 {$t2-$t3},[r12]! + + veor $a2,$a2,$t0 + vst1.8 {$c0-$d0},[r14]! + veor $b2,$b2,$t1 + vst1.8 {$a1-$b1},[r14]! + veor $c2,$c2,$t2 + vst1.8 {$c1-$d1},[r14]! + veor $d2,$d2,$t3 + vst1.8 {$a2-$b2},[r14]! + vst1.8 {$c2-$d2},[r14]! + + beq .Ldone_neon + + ldmia sp,{@t[0]-@t[3]} @ load key material + add @x[0],@x[0],@t[0] @ accumulate key material + add @t[0],sp,#4*(4) + add @x[1],@x[1],@t[1] + add @x[2],@x[2],@t[2] + add @x[3],@x[3],@t[3] + ldmia @t[0],{@t[0]-@t[3]} @ load key material + + add @x[4],@t[0],@x[4],ror#13 @ accumulate key material + add @t[0],sp,#4*(8) + add @x[5],@t[1],@x[5],ror#13 + add @x[6],@t[2],@x[6],ror#13 + add @x[7],@t[3],@x[7],ror#13 + ldmia @t[0],{@t[0]-@t[3]} @ load key material +# ifdef __ARMEB__ + rev @x[0],@x[0] + rev @x[1],@x[1] + rev @x[2],@x[2] + rev @x[3],@x[3] + rev @x[4],@x[4] + rev @x[5],@x[5] + rev @x[6],@x[6] + rev @x[7],@x[7] +# endif + stmia sp,{@x[0]-@x[7]} + add @x[0],sp,#4*(16+8) + + ldmia @x[0],{@x[0]-@x[7]} @ load second half + + add @x[0],@x[0],@t[0] @ accumulate key material + add @t[0],sp,#4*(12) + add @x[1],@x[1],@t[1] + add @x[2],@x[2],@t[2] + add @x[3],@x[3],@t[3] + ldmia @t[0],{@t[0]-@t[3]} @ load key material + + add @x[4],@t[0],@x[4],ror#24 @ accumulate key material + add @t[0],sp,#4*(8) + add @x[5],@t[1],@x[5],ror#24 + add @x[4],@x[4],#3 @ counter+3 + add @x[6],@t[2],@x[6],ror#24 + add @x[7],@t[3],@x[7],ror#24 + ldr @t[3],[sp,#4*(32+2)] @ re-load len +# ifdef __ARMEB__ + rev @x[0],@x[0] + rev @x[1],@x[1] + rev @x[2],@x[2] + rev @x[3],@x[3] + rev @x[4],@x[4] + rev @x[5],@x[5] + rev @x[6],@x[6] + rev @x[7],@x[7] +# endif + stmia @t[0],{@x[0]-@x[7]} + add @t[2],sp,#4*(0) + sub @t[3],@t[3],#64*3 @ len-=64*3 + +.Loop_tail_neon: + ldrb @t[0],[@t[2]],#1 @ read buffer on stack + ldrb @t[1],[r12],#1 @ read input + subs @t[3],@t[3],#1 + eor @t[0],@t[0],@t[1] + strb @t[0],[r14],#1 @ store output + bne .Loop_tail_neon + +.Ldone_neon: + add sp,sp,#4*(32+4) + vldmia sp,{d8-d15} + add sp,sp,#4*(16+3) + ldmia sp!,{r4-r11,pc} +.size ChaCha20_neon,.-ChaCha20_neon +# ifndef __KERNEL__ +.comm OPENSSL_armcap_P,4,4 +# endif +#endif +___ +}}} + +open SELF,$0; +while() { + next if (/^#!/); + last if (!s/^#/@/ and !/^$/); + print; +} +close SELF; + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/geo; + + s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo; + + print $_,"\n"; +} +close STDOUT; diff --git a/net/wireguard/crypto/zinc/chacha20/chacha20-arm64.pl b/net/wireguard/crypto/zinc/chacha20/chacha20-arm64.pl new file mode 100644 index 000000000000..ac14a9924165 --- /dev/null +++ b/net/wireguard/crypto/zinc/chacha20/chacha20-arm64.pl @@ -0,0 +1,1163 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# +# This code is taken from the OpenSSL project but the author, Andy Polyakov, +# has relicensed it under the licenses specified in the SPDX header above. +# The original headers, including the original license headers, are +# included below for completeness. +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# June 2015 +# +# ChaCha20 for ARMv8. +# +# Performance in cycles per byte out of large buffer. +# +# IALU/gcc-4.9 3xNEON+1xIALU 6xNEON+2xIALU(*) +# +# Apple A7 5.50/+49% 3.33 1.70 +# Cortex-A53 8.40/+80% 4.72 4.72(**) +# Cortex-A57 8.06/+43% 4.90 4.43(***) +# Denver 4.50/+82% 2.63 2.67(**) +# X-Gene 9.50/+46% 8.82 8.89(**) +# Mongoose 8.00/+44% 3.64 3.25(***) +# Kryo 8.17/+50% 4.83 4.65(***) +# +# (*) since no non-Apple processor exhibits significantly better +# performance, the code path is #ifdef __APPLE__-ed; +# (**) it's expected that doubling interleave factor doesn't help +# all processors, only those with higher NEON latency and +# higher instruction issue rate; +# (***) expected improvement was actually higher; + +$flavour=shift; +if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } +else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} } + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} + +sub AUTOLOAD() # thunk [simplified] x86-style perlasm +{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; + my $arg = pop; + $arg = "#$arg" if ($arg*1 eq $arg); + $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; +} + +my ($out,$inp,$len,$key,$ctr) = map("x$_",(0..4)); + +my @x=map("x$_",(5..17,19..21)); +my @d=map("x$_",(22..28,30)); + +sub ROUND { +my ($a0,$b0,$c0,$d0)=@_; +my ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0)); +my ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1)); +my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2)); + + ( + "&add_32 (@x[$a0],@x[$a0],@x[$b0])", + "&add_32 (@x[$a1],@x[$a1],@x[$b1])", + "&add_32 (@x[$a2],@x[$a2],@x[$b2])", + "&add_32 (@x[$a3],@x[$a3],@x[$b3])", + "&eor_32 (@x[$d0],@x[$d0],@x[$a0])", + "&eor_32 (@x[$d1],@x[$d1],@x[$a1])", + "&eor_32 (@x[$d2],@x[$d2],@x[$a2])", + "&eor_32 (@x[$d3],@x[$d3],@x[$a3])", + "&ror_32 (@x[$d0],@x[$d0],16)", + "&ror_32 (@x[$d1],@x[$d1],16)", + "&ror_32 (@x[$d2],@x[$d2],16)", + "&ror_32 (@x[$d3],@x[$d3],16)", + + "&add_32 (@x[$c0],@x[$c0],@x[$d0])", + "&add_32 (@x[$c1],@x[$c1],@x[$d1])", + "&add_32 (@x[$c2],@x[$c2],@x[$d2])", + "&add_32 (@x[$c3],@x[$c3],@x[$d3])", + "&eor_32 (@x[$b0],@x[$b0],@x[$c0])", + "&eor_32 (@x[$b1],@x[$b1],@x[$c1])", + "&eor_32 (@x[$b2],@x[$b2],@x[$c2])", + "&eor_32 (@x[$b3],@x[$b3],@x[$c3])", + "&ror_32 (@x[$b0],@x[$b0],20)", + "&ror_32 (@x[$b1],@x[$b1],20)", + "&ror_32 (@x[$b2],@x[$b2],20)", + "&ror_32 (@x[$b3],@x[$b3],20)", + + "&add_32 (@x[$a0],@x[$a0],@x[$b0])", + "&add_32 (@x[$a1],@x[$a1],@x[$b1])", + "&add_32 (@x[$a2],@x[$a2],@x[$b2])", + "&add_32 (@x[$a3],@x[$a3],@x[$b3])", + "&eor_32 (@x[$d0],@x[$d0],@x[$a0])", + "&eor_32 (@x[$d1],@x[$d1],@x[$a1])", + "&eor_32 (@x[$d2],@x[$d2],@x[$a2])", + "&eor_32 (@x[$d3],@x[$d3],@x[$a3])", + "&ror_32 (@x[$d0],@x[$d0],24)", + "&ror_32 (@x[$d1],@x[$d1],24)", + "&ror_32 (@x[$d2],@x[$d2],24)", + "&ror_32 (@x[$d3],@x[$d3],24)", + + "&add_32 (@x[$c0],@x[$c0],@x[$d0])", + "&add_32 (@x[$c1],@x[$c1],@x[$d1])", + "&add_32 (@x[$c2],@x[$c2],@x[$d2])", + "&add_32 (@x[$c3],@x[$c3],@x[$d3])", + "&eor_32 (@x[$b0],@x[$b0],@x[$c0])", + "&eor_32 (@x[$b1],@x[$b1],@x[$c1])", + "&eor_32 (@x[$b2],@x[$b2],@x[$c2])", + "&eor_32 (@x[$b3],@x[$b3],@x[$c3])", + "&ror_32 (@x[$b0],@x[$b0],25)", + "&ror_32 (@x[$b1],@x[$b1],25)", + "&ror_32 (@x[$b2],@x[$b2],25)", + "&ror_32 (@x[$b3],@x[$b3],25)" + ); +} + +$code.=<<___; +#ifndef __KERNEL__ +# include "arm_arch.h" +.extern OPENSSL_armcap_P +#else +# define ChaCha20_ctr32 chacha20_arm +# define ChaCha20_neon chacha20_neon +#endif + +.text + +.align 5 +.Lsigma: +.quad 0x3320646e61707865,0x6b20657479622d32 // endian-neutral +.Lone: +.long 1,0,0,0 +#ifndef __KERNEL__ +.LOPENSSL_armcap_P: +# ifdef __ILP32__ +.long OPENSSL_armcap_P-. +# else +.quad OPENSSL_armcap_P-. +# endif +#endif + +.globl ChaCha20_ctr32 +.type ChaCha20_ctr32,%function +.align 5 +ChaCha20_ctr32: + cbz $len,.Labort +#ifndef __KERNEL__ + adr @x[0],.LOPENSSL_armcap_P + cmp $len,#192 + b.lo .Lshort +# ifdef __ILP32__ + ldrsw @x[1],[@x[0]] +# else + ldr @x[1],[@x[0]] +# endif + ldr w17,[@x[1],@x[0]] + tst w17,#ARMV7_NEON + b.ne ChaCha20_neon + +.Lshort: +#endif + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + + adr @x[0],.Lsigma + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#64 + + ldp @d[0],@d[1],[@x[0]] // load sigma + ldp @d[2],@d[3],[$key] // load key + ldp @d[4],@d[5],[$key,#16] + ldp @d[6],@d[7],[$ctr] // load counter +#ifdef __AARCH64EB__ + ror @d[2],@d[2],#32 + ror @d[3],@d[3],#32 + ror @d[4],@d[4],#32 + ror @d[5],@d[5],#32 + ror @d[6],@d[6],#32 + ror @d[7],@d[7],#32 +#endif + +.Loop_outer: + mov.32 @x[0],@d[0] // unpack key block + lsr @x[1],@d[0],#32 + mov.32 @x[2],@d[1] + lsr @x[3],@d[1],#32 + mov.32 @x[4],@d[2] + lsr @x[5],@d[2],#32 + mov.32 @x[6],@d[3] + lsr @x[7],@d[3],#32 + mov.32 @x[8],@d[4] + lsr @x[9],@d[4],#32 + mov.32 @x[10],@d[5] + lsr @x[11],@d[5],#32 + mov.32 @x[12],@d[6] + lsr @x[13],@d[6],#32 + mov.32 @x[14],@d[7] + lsr @x[15],@d[7],#32 + + mov $ctr,#10 + subs $len,$len,#64 +.Loop: + sub $ctr,$ctr,#1 +___ + foreach (&ROUND(0, 4, 8,12)) { eval; } + foreach (&ROUND(0, 5,10,15)) { eval; } +$code.=<<___; + cbnz $ctr,.Loop + + add.32 @x[0],@x[0],@d[0] // accumulate key block + add @x[1],@x[1],@d[0],lsr#32 + add.32 @x[2],@x[2],@d[1] + add @x[3],@x[3],@d[1],lsr#32 + add.32 @x[4],@x[4],@d[2] + add @x[5],@x[5],@d[2],lsr#32 + add.32 @x[6],@x[6],@d[3] + add @x[7],@x[7],@d[3],lsr#32 + add.32 @x[8],@x[8],@d[4] + add @x[9],@x[9],@d[4],lsr#32 + add.32 @x[10],@x[10],@d[5] + add @x[11],@x[11],@d[5],lsr#32 + add.32 @x[12],@x[12],@d[6] + add @x[13],@x[13],@d[6],lsr#32 + add.32 @x[14],@x[14],@d[7] + add @x[15],@x[15],@d[7],lsr#32 + + b.lo .Ltail + + add @x[0],@x[0],@x[1],lsl#32 // pack + add @x[2],@x[2],@x[3],lsl#32 + ldp @x[1],@x[3],[$inp,#0] // load input + add @x[4],@x[4],@x[5],lsl#32 + add @x[6],@x[6],@x[7],lsl#32 + ldp @x[5],@x[7],[$inp,#16] + add @x[8],@x[8],@x[9],lsl#32 + add @x[10],@x[10],@x[11],lsl#32 + ldp @x[9],@x[11],[$inp,#32] + add @x[12],@x[12],@x[13],lsl#32 + add @x[14],@x[14],@x[15],lsl#32 + ldp @x[13],@x[15],[$inp,#48] + add $inp,$inp,#64 +#ifdef __AARCH64EB__ + rev @x[0],@x[0] + rev @x[2],@x[2] + rev @x[4],@x[4] + rev @x[6],@x[6] + rev @x[8],@x[8] + rev @x[10],@x[10] + rev @x[12],@x[12] + rev @x[14],@x[14] +#endif + eor @x[0],@x[0],@x[1] + eor @x[2],@x[2],@x[3] + eor @x[4],@x[4],@x[5] + eor @x[6],@x[6],@x[7] + eor @x[8],@x[8],@x[9] + eor @x[10],@x[10],@x[11] + eor @x[12],@x[12],@x[13] + eor @x[14],@x[14],@x[15] + + stp @x[0],@x[2],[$out,#0] // store output + add @d[6],@d[6],#1 // increment counter + stp @x[4],@x[6],[$out,#16] + stp @x[8],@x[10],[$out,#32] + stp @x[12],@x[14],[$out,#48] + add $out,$out,#64 + + b.hi .Loop_outer + + ldp x19,x20,[x29,#16] + add sp,sp,#64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 +.Labort: + ret + +.align 4 +.Ltail: + add $len,$len,#64 +.Less_than_64: + sub $out,$out,#1 + add $inp,$inp,$len + add $out,$out,$len + add $ctr,sp,$len + neg $len,$len + + add @x[0],@x[0],@x[1],lsl#32 // pack + add @x[2],@x[2],@x[3],lsl#32 + add @x[4],@x[4],@x[5],lsl#32 + add @x[6],@x[6],@x[7],lsl#32 + add @x[8],@x[8],@x[9],lsl#32 + add @x[10],@x[10],@x[11],lsl#32 + add @x[12],@x[12],@x[13],lsl#32 + add @x[14],@x[14],@x[15],lsl#32 +#ifdef __AARCH64EB__ + rev @x[0],@x[0] + rev @x[2],@x[2] + rev @x[4],@x[4] + rev @x[6],@x[6] + rev @x[8],@x[8] + rev @x[10],@x[10] + rev @x[12],@x[12] + rev @x[14],@x[14] +#endif + stp @x[0],@x[2],[sp,#0] + stp @x[4],@x[6],[sp,#16] + stp @x[8],@x[10],[sp,#32] + stp @x[12],@x[14],[sp,#48] + +.Loop_tail: + ldrb w10,[$inp,$len] + ldrb w11,[$ctr,$len] + add $len,$len,#1 + eor w10,w10,w11 + strb w10,[$out,$len] + cbnz $len,.Loop_tail + + stp xzr,xzr,[sp,#0] + stp xzr,xzr,[sp,#16] + stp xzr,xzr,[sp,#32] + stp xzr,xzr,[sp,#48] + + ldp x19,x20,[x29,#16] + add sp,sp,#64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + ret +.size ChaCha20_ctr32,.-ChaCha20_ctr32 +___ + +{{{ +my ($A0,$B0,$C0,$D0,$A1,$B1,$C1,$D1,$A2,$B2,$C2,$D2,$T0,$T1,$T2,$T3) = + map("v$_.4s",(0..7,16..23)); +my (@K)=map("v$_.4s",(24..30)); +my $ONE="v31.4s"; + +sub NEONROUND { +my $odd = pop; +my ($a,$b,$c,$d,$t)=@_; + + ( + "&add ('$a','$a','$b')", + "&eor ('$d','$d','$a')", + "&rev32_16 ('$d','$d')", # vrot ($d,16) + + "&add ('$c','$c','$d')", + "&eor ('$t','$b','$c')", + "&ushr ('$b','$t',20)", + "&sli ('$b','$t',12)", + + "&add ('$a','$a','$b')", + "&eor ('$t','$d','$a')", + "&ushr ('$d','$t',24)", + "&sli ('$d','$t',8)", + + "&add ('$c','$c','$d')", + "&eor ('$t','$b','$c')", + "&ushr ('$b','$t',25)", + "&sli ('$b','$t',7)", + + "&ext ('$a','$a','$a',$odd?4:12)", + "&ext ('$d','$d','$d',8)", + "&ext ('$c','$c','$c',$odd?12:4)" + ); +} + +$code.=<<___; +#if !defined(__KERNEL__) || defined(CONFIG_KERNEL_MODE_NEON) +#ifdef __KERNEL__ +.globl ChaCha20_neon +.type ChaCha20_neon,%function +#endif +.type ChaCha20_neon,%function +.align 5 +ChaCha20_neon: + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + + adr @x[0],.Lsigma + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] +#ifdef __APPLE__ + cmp $len,#512 + b.hs .L512_or_more_neon +#endif + + sub sp,sp,#64 + + ldp @d[0],@d[1],[@x[0]] // load sigma + ld1 {@K[0]},[@x[0]],#16 + ldp @d[2],@d[3],[$key] // load key + ldp @d[4],@d[5],[$key,#16] + ld1 {@K[1],@K[2]},[$key] + ldp @d[6],@d[7],[$ctr] // load counter + ld1 {@K[3]},[$ctr] + ld1 {$ONE},[@x[0]] +#ifdef __AARCH64EB__ + rev64 @K[0],@K[0] + ror @d[2],@d[2],#32 + ror @d[3],@d[3],#32 + ror @d[4],@d[4],#32 + ror @d[5],@d[5],#32 + ror @d[6],@d[6],#32 + ror @d[7],@d[7],#32 +#endif + add @K[3],@K[3],$ONE // += 1 + add @K[4],@K[3],$ONE + add @K[5],@K[4],$ONE + shl $ONE,$ONE,#2 // 1 -> 4 + +.Loop_outer_neon: + mov.32 @x[0],@d[0] // unpack key block + lsr @x[1],@d[0],#32 + mov $A0,@K[0] + mov.32 @x[2],@d[1] + lsr @x[3],@d[1],#32 + mov $A1,@K[0] + mov.32 @x[4],@d[2] + lsr @x[5],@d[2],#32 + mov $A2,@K[0] + mov.32 @x[6],@d[3] + mov $B0,@K[1] + lsr @x[7],@d[3],#32 + mov $B1,@K[1] + mov.32 @x[8],@d[4] + mov $B2,@K[1] + lsr @x[9],@d[4],#32 + mov $D0,@K[3] + mov.32 @x[10],@d[5] + mov $D1,@K[4] + lsr @x[11],@d[5],#32 + mov $D2,@K[5] + mov.32 @x[12],@d[6] + mov $C0,@K[2] + lsr @x[13],@d[6],#32 + mov $C1,@K[2] + mov.32 @x[14],@d[7] + mov $C2,@K[2] + lsr @x[15],@d[7],#32 + + mov $ctr,#10 + subs $len,$len,#256 +.Loop_neon: + sub $ctr,$ctr,#1 +___ + my @thread0=&NEONROUND($A0,$B0,$C0,$D0,$T0,0); + my @thread1=&NEONROUND($A1,$B1,$C1,$D1,$T1,0); + my @thread2=&NEONROUND($A2,$B2,$C2,$D2,$T2,0); + my @thread3=&ROUND(0,4,8,12); + + foreach (@thread0) { + eval; eval(shift(@thread3)); + eval(shift(@thread1)); eval(shift(@thread3)); + eval(shift(@thread2)); eval(shift(@thread3)); + } + + @thread0=&NEONROUND($A0,$B0,$C0,$D0,$T0,1); + @thread1=&NEONROUND($A1,$B1,$C1,$D1,$T1,1); + @thread2=&NEONROUND($A2,$B2,$C2,$D2,$T2,1); + @thread3=&ROUND(0,5,10,15); + + foreach (@thread0) { + eval; eval(shift(@thread3)); + eval(shift(@thread1)); eval(shift(@thread3)); + eval(shift(@thread2)); eval(shift(@thread3)); + } +$code.=<<___; + cbnz $ctr,.Loop_neon + + add.32 @x[0],@x[0],@d[0] // accumulate key block + add $A0,$A0,@K[0] + add @x[1],@x[1],@d[0],lsr#32 + add $A1,$A1,@K[0] + add.32 @x[2],@x[2],@d[1] + add $A2,$A2,@K[0] + add @x[3],@x[3],@d[1],lsr#32 + add $C0,$C0,@K[2] + add.32 @x[4],@x[4],@d[2] + add $C1,$C1,@K[2] + add @x[5],@x[5],@d[2],lsr#32 + add $C2,$C2,@K[2] + add.32 @x[6],@x[6],@d[3] + add $D0,$D0,@K[3] + add @x[7],@x[7],@d[3],lsr#32 + add.32 @x[8],@x[8],@d[4] + add $D1,$D1,@K[4] + add @x[9],@x[9],@d[4],lsr#32 + add.32 @x[10],@x[10],@d[5] + add $D2,$D2,@K[5] + add @x[11],@x[11],@d[5],lsr#32 + add.32 @x[12],@x[12],@d[6] + add $B0,$B0,@K[1] + add @x[13],@x[13],@d[6],lsr#32 + add.32 @x[14],@x[14],@d[7] + add $B1,$B1,@K[1] + add @x[15],@x[15],@d[7],lsr#32 + add $B2,$B2,@K[1] + + b.lo .Ltail_neon + + add @x[0],@x[0],@x[1],lsl#32 // pack + add @x[2],@x[2],@x[3],lsl#32 + ldp @x[1],@x[3],[$inp,#0] // load input + add @x[4],@x[4],@x[5],lsl#32 + add @x[6],@x[6],@x[7],lsl#32 + ldp @x[5],@x[7],[$inp,#16] + add @x[8],@x[8],@x[9],lsl#32 + add @x[10],@x[10],@x[11],lsl#32 + ldp @x[9],@x[11],[$inp,#32] + add @x[12],@x[12],@x[13],lsl#32 + add @x[14],@x[14],@x[15],lsl#32 + ldp @x[13],@x[15],[$inp,#48] + add $inp,$inp,#64 +#ifdef __AARCH64EB__ + rev @x[0],@x[0] + rev @x[2],@x[2] + rev @x[4],@x[4] + rev @x[6],@x[6] + rev @x[8],@x[8] + rev @x[10],@x[10] + rev @x[12],@x[12] + rev @x[14],@x[14] +#endif + ld1.8 {$T0-$T3},[$inp],#64 + eor @x[0],@x[0],@x[1] + eor @x[2],@x[2],@x[3] + eor @x[4],@x[4],@x[5] + eor @x[6],@x[6],@x[7] + eor @x[8],@x[8],@x[9] + eor $A0,$A0,$T0 + eor @x[10],@x[10],@x[11] + eor $B0,$B0,$T1 + eor @x[12],@x[12],@x[13] + eor $C0,$C0,$T2 + eor @x[14],@x[14],@x[15] + eor $D0,$D0,$T3 + ld1.8 {$T0-$T3},[$inp],#64 + + stp @x[0],@x[2],[$out,#0] // store output + add @d[6],@d[6],#4 // increment counter + stp @x[4],@x[6],[$out,#16] + add @K[3],@K[3],$ONE // += 4 + stp @x[8],@x[10],[$out,#32] + add @K[4],@K[4],$ONE + stp @x[12],@x[14],[$out,#48] + add @K[5],@K[5],$ONE + add $out,$out,#64 + + st1.8 {$A0-$D0},[$out],#64 + ld1.8 {$A0-$D0},[$inp],#64 + + eor $A1,$A1,$T0 + eor $B1,$B1,$T1 + eor $C1,$C1,$T2 + eor $D1,$D1,$T3 + st1.8 {$A1-$D1},[$out],#64 + + eor $A2,$A2,$A0 + eor $B2,$B2,$B0 + eor $C2,$C2,$C0 + eor $D2,$D2,$D0 + st1.8 {$A2-$D2},[$out],#64 + + b.hi .Loop_outer_neon + + ldp x19,x20,[x29,#16] + add sp,sp,#64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + ret + +.Ltail_neon: + add $len,$len,#256 + cmp $len,#64 + b.lo .Less_than_64 + + add @x[0],@x[0],@x[1],lsl#32 // pack + add @x[2],@x[2],@x[3],lsl#32 + ldp @x[1],@x[3],[$inp,#0] // load input + add @x[4],@x[4],@x[5],lsl#32 + add @x[6],@x[6],@x[7],lsl#32 + ldp @x[5],@x[7],[$inp,#16] + add @x[8],@x[8],@x[9],lsl#32 + add @x[10],@x[10],@x[11],lsl#32 + ldp @x[9],@x[11],[$inp,#32] + add @x[12],@x[12],@x[13],lsl#32 + add @x[14],@x[14],@x[15],lsl#32 + ldp @x[13],@x[15],[$inp,#48] + add $inp,$inp,#64 +#ifdef __AARCH64EB__ + rev @x[0],@x[0] + rev @x[2],@x[2] + rev @x[4],@x[4] + rev @x[6],@x[6] + rev @x[8],@x[8] + rev @x[10],@x[10] + rev @x[12],@x[12] + rev @x[14],@x[14] +#endif + eor @x[0],@x[0],@x[1] + eor @x[2],@x[2],@x[3] + eor @x[4],@x[4],@x[5] + eor @x[6],@x[6],@x[7] + eor @x[8],@x[8],@x[9] + eor @x[10],@x[10],@x[11] + eor @x[12],@x[12],@x[13] + eor @x[14],@x[14],@x[15] + + stp @x[0],@x[2],[$out,#0] // store output + add @d[6],@d[6],#4 // increment counter + stp @x[4],@x[6],[$out,#16] + stp @x[8],@x[10],[$out,#32] + stp @x[12],@x[14],[$out,#48] + add $out,$out,#64 + b.eq .Ldone_neon + sub $len,$len,#64 + cmp $len,#64 + b.lo .Less_than_128 + + ld1.8 {$T0-$T3},[$inp],#64 + eor $A0,$A0,$T0 + eor $B0,$B0,$T1 + eor $C0,$C0,$T2 + eor $D0,$D0,$T3 + st1.8 {$A0-$D0},[$out],#64 + b.eq .Ldone_neon + sub $len,$len,#64 + cmp $len,#64 + b.lo .Less_than_192 + + ld1.8 {$T0-$T3},[$inp],#64 + eor $A1,$A1,$T0 + eor $B1,$B1,$T1 + eor $C1,$C1,$T2 + eor $D1,$D1,$T3 + st1.8 {$A1-$D1},[$out],#64 + b.eq .Ldone_neon + sub $len,$len,#64 + + st1.8 {$A2-$D2},[sp] + b .Last_neon + +.Less_than_128: + st1.8 {$A0-$D0},[sp] + b .Last_neon +.Less_than_192: + st1.8 {$A1-$D1},[sp] + b .Last_neon + +.align 4 +.Last_neon: + sub $out,$out,#1 + add $inp,$inp,$len + add $out,$out,$len + add $ctr,sp,$len + neg $len,$len + +.Loop_tail_neon: + ldrb w10,[$inp,$len] + ldrb w11,[$ctr,$len] + add $len,$len,#1 + eor w10,w10,w11 + strb w10,[$out,$len] + cbnz $len,.Loop_tail_neon + + stp xzr,xzr,[sp,#0] + stp xzr,xzr,[sp,#16] + stp xzr,xzr,[sp,#32] + stp xzr,xzr,[sp,#48] + +.Ldone_neon: + ldp x19,x20,[x29,#16] + add sp,sp,#64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + ret +.size ChaCha20_neon,.-ChaCha20_neon +___ +{ +my ($T0,$T1,$T2,$T3,$T4,$T5)=@K; +my ($A0,$B0,$C0,$D0,$A1,$B1,$C1,$D1,$A2,$B2,$C2,$D2, + $A3,$B3,$C3,$D3,$A4,$B4,$C4,$D4,$A5,$B5,$C5,$D5) = map("v$_.4s",(0..23)); + +$code.=<<___; +#ifdef __APPLE__ +.type ChaCha20_512_neon,%function +.align 5 +ChaCha20_512_neon: + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + + adr @x[0],.Lsigma + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + +.L512_or_more_neon: + sub sp,sp,#128+64 + + ldp @d[0],@d[1],[@x[0]] // load sigma + ld1 {@K[0]},[@x[0]],#16 + ldp @d[2],@d[3],[$key] // load key + ldp @d[4],@d[5],[$key,#16] + ld1 {@K[1],@K[2]},[$key] + ldp @d[6],@d[7],[$ctr] // load counter + ld1 {@K[3]},[$ctr] + ld1 {$ONE},[@x[0]] +# ifdef __AARCH64EB__ + rev64 @K[0],@K[0] + ror @d[2],@d[2],#32 + ror @d[3],@d[3],#32 + ror @d[4],@d[4],#32 + ror @d[5],@d[5],#32 + ror @d[6],@d[6],#32 + ror @d[7],@d[7],#32 +# endif + add @K[3],@K[3],$ONE // += 1 + stp @K[0],@K[1],[sp,#0] // off-load key block, invariant part + add @K[3],@K[3],$ONE // not typo + str @K[2],[sp,#32] + add @K[4],@K[3],$ONE + add @K[5],@K[4],$ONE + add @K[6],@K[5],$ONE + shl $ONE,$ONE,#2 // 1 -> 4 + + stp d8,d9,[sp,#128+0] // meet ABI requirements + stp d10,d11,[sp,#128+16] + stp d12,d13,[sp,#128+32] + stp d14,d15,[sp,#128+48] + + sub $len,$len,#512 // not typo + +.Loop_outer_512_neon: + mov $A0,@K[0] + mov $A1,@K[0] + mov $A2,@K[0] + mov $A3,@K[0] + mov $A4,@K[0] + mov $A5,@K[0] + mov $B0,@K[1] + mov.32 @x[0],@d[0] // unpack key block + mov $B1,@K[1] + lsr @x[1],@d[0],#32 + mov $B2,@K[1] + mov.32 @x[2],@d[1] + mov $B3,@K[1] + lsr @x[3],@d[1],#32 + mov $B4,@K[1] + mov.32 @x[4],@d[2] + mov $B5,@K[1] + lsr @x[5],@d[2],#32 + mov $D0,@K[3] + mov.32 @x[6],@d[3] + mov $D1,@K[4] + lsr @x[7],@d[3],#32 + mov $D2,@K[5] + mov.32 @x[8],@d[4] + mov $D3,@K[6] + lsr @x[9],@d[4],#32 + mov $C0,@K[2] + mov.32 @x[10],@d[5] + mov $C1,@K[2] + lsr @x[11],@d[5],#32 + add $D4,$D0,$ONE // +4 + mov.32 @x[12],@d[6] + add $D5,$D1,$ONE // +4 + lsr @x[13],@d[6],#32 + mov $C2,@K[2] + mov.32 @x[14],@d[7] + mov $C3,@K[2] + lsr @x[15],@d[7],#32 + mov $C4,@K[2] + stp @K[3],@K[4],[sp,#48] // off-load key block, variable part + mov $C5,@K[2] + str @K[5],[sp,#80] + + mov $ctr,#5 + subs $len,$len,#512 +.Loop_upper_neon: + sub $ctr,$ctr,#1 +___ + my @thread0=&NEONROUND($A0,$B0,$C0,$D0,$T0,0); + my @thread1=&NEONROUND($A1,$B1,$C1,$D1,$T1,0); + my @thread2=&NEONROUND($A2,$B2,$C2,$D2,$T2,0); + my @thread3=&NEONROUND($A3,$B3,$C3,$D3,$T3,0); + my @thread4=&NEONROUND($A4,$B4,$C4,$D4,$T4,0); + my @thread5=&NEONROUND($A5,$B5,$C5,$D5,$T5,0); + my @thread67=(&ROUND(0,4,8,12),&ROUND(0,5,10,15)); + my $diff = ($#thread0+1)*6 - $#thread67 - 1; + my $i = 0; + + foreach (@thread0) { + eval; eval(shift(@thread67)); + eval(shift(@thread1)); eval(shift(@thread67)); + eval(shift(@thread2)); eval(shift(@thread67)); + eval(shift(@thread3)); eval(shift(@thread67)); + eval(shift(@thread4)); eval(shift(@thread67)); + eval(shift(@thread5)); eval(shift(@thread67)); + } + + @thread0=&NEONROUND($A0,$B0,$C0,$D0,$T0,1); + @thread1=&NEONROUND($A1,$B1,$C1,$D1,$T1,1); + @thread2=&NEONROUND($A2,$B2,$C2,$D2,$T2,1); + @thread3=&NEONROUND($A3,$B3,$C3,$D3,$T3,1); + @thread4=&NEONROUND($A4,$B4,$C4,$D4,$T4,1); + @thread5=&NEONROUND($A5,$B5,$C5,$D5,$T5,1); + @thread67=(&ROUND(0,4,8,12),&ROUND(0,5,10,15)); + + foreach (@thread0) { + eval; eval(shift(@thread67)); + eval(shift(@thread1)); eval(shift(@thread67)); + eval(shift(@thread2)); eval(shift(@thread67)); + eval(shift(@thread3)); eval(shift(@thread67)); + eval(shift(@thread4)); eval(shift(@thread67)); + eval(shift(@thread5)); eval(shift(@thread67)); + } +$code.=<<___; + cbnz $ctr,.Loop_upper_neon + + add.32 @x[0],@x[0],@d[0] // accumulate key block + add @x[1],@x[1],@d[0],lsr#32 + add.32 @x[2],@x[2],@d[1] + add @x[3],@x[3],@d[1],lsr#32 + add.32 @x[4],@x[4],@d[2] + add @x[5],@x[5],@d[2],lsr#32 + add.32 @x[6],@x[6],@d[3] + add @x[7],@x[7],@d[3],lsr#32 + add.32 @x[8],@x[8],@d[4] + add @x[9],@x[9],@d[4],lsr#32 + add.32 @x[10],@x[10],@d[5] + add @x[11],@x[11],@d[5],lsr#32 + add.32 @x[12],@x[12],@d[6] + add @x[13],@x[13],@d[6],lsr#32 + add.32 @x[14],@x[14],@d[7] + add @x[15],@x[15],@d[7],lsr#32 + + add @x[0],@x[0],@x[1],lsl#32 // pack + add @x[2],@x[2],@x[3],lsl#32 + ldp @x[1],@x[3],[$inp,#0] // load input + add @x[4],@x[4],@x[5],lsl#32 + add @x[6],@x[6],@x[7],lsl#32 + ldp @x[5],@x[7],[$inp,#16] + add @x[8],@x[8],@x[9],lsl#32 + add @x[10],@x[10],@x[11],lsl#32 + ldp @x[9],@x[11],[$inp,#32] + add @x[12],@x[12],@x[13],lsl#32 + add @x[14],@x[14],@x[15],lsl#32 + ldp @x[13],@x[15],[$inp,#48] + add $inp,$inp,#64 +# ifdef __AARCH64EB__ + rev @x[0],@x[0] + rev @x[2],@x[2] + rev @x[4],@x[4] + rev @x[6],@x[6] + rev @x[8],@x[8] + rev @x[10],@x[10] + rev @x[12],@x[12] + rev @x[14],@x[14] +# endif + eor @x[0],@x[0],@x[1] + eor @x[2],@x[2],@x[3] + eor @x[4],@x[4],@x[5] + eor @x[6],@x[6],@x[7] + eor @x[8],@x[8],@x[9] + eor @x[10],@x[10],@x[11] + eor @x[12],@x[12],@x[13] + eor @x[14],@x[14],@x[15] + + stp @x[0],@x[2],[$out,#0] // store output + add @d[6],@d[6],#1 // increment counter + mov.32 @x[0],@d[0] // unpack key block + lsr @x[1],@d[0],#32 + stp @x[4],@x[6],[$out,#16] + mov.32 @x[2],@d[1] + lsr @x[3],@d[1],#32 + stp @x[8],@x[10],[$out,#32] + mov.32 @x[4],@d[2] + lsr @x[5],@d[2],#32 + stp @x[12],@x[14],[$out,#48] + add $out,$out,#64 + mov.32 @x[6],@d[3] + lsr @x[7],@d[3],#32 + mov.32 @x[8],@d[4] + lsr @x[9],@d[4],#32 + mov.32 @x[10],@d[5] + lsr @x[11],@d[5],#32 + mov.32 @x[12],@d[6] + lsr @x[13],@d[6],#32 + mov.32 @x[14],@d[7] + lsr @x[15],@d[7],#32 + + mov $ctr,#5 +.Loop_lower_neon: + sub $ctr,$ctr,#1 +___ + @thread0=&NEONROUND($A0,$B0,$C0,$D0,$T0,0); + @thread1=&NEONROUND($A1,$B1,$C1,$D1,$T1,0); + @thread2=&NEONROUND($A2,$B2,$C2,$D2,$T2,0); + @thread3=&NEONROUND($A3,$B3,$C3,$D3,$T3,0); + @thread4=&NEONROUND($A4,$B4,$C4,$D4,$T4,0); + @thread5=&NEONROUND($A5,$B5,$C5,$D5,$T5,0); + @thread67=(&ROUND(0,4,8,12),&ROUND(0,5,10,15)); + + foreach (@thread0) { + eval; eval(shift(@thread67)); + eval(shift(@thread1)); eval(shift(@thread67)); + eval(shift(@thread2)); eval(shift(@thread67)); + eval(shift(@thread3)); eval(shift(@thread67)); + eval(shift(@thread4)); eval(shift(@thread67)); + eval(shift(@thread5)); eval(shift(@thread67)); + } + + @thread0=&NEONROUND($A0,$B0,$C0,$D0,$T0,1); + @thread1=&NEONROUND($A1,$B1,$C1,$D1,$T1,1); + @thread2=&NEONROUND($A2,$B2,$C2,$D2,$T2,1); + @thread3=&NEONROUND($A3,$B3,$C3,$D3,$T3,1); + @thread4=&NEONROUND($A4,$B4,$C4,$D4,$T4,1); + @thread5=&NEONROUND($A5,$B5,$C5,$D5,$T5,1); + @thread67=(&ROUND(0,4,8,12),&ROUND(0,5,10,15)); + + foreach (@thread0) { + eval; eval(shift(@thread67)); + eval(shift(@thread1)); eval(shift(@thread67)); + eval(shift(@thread2)); eval(shift(@thread67)); + eval(shift(@thread3)); eval(shift(@thread67)); + eval(shift(@thread4)); eval(shift(@thread67)); + eval(shift(@thread5)); eval(shift(@thread67)); + } +$code.=<<___; + cbnz $ctr,.Loop_lower_neon + + add.32 @x[0],@x[0],@d[0] // accumulate key block + ldp @K[0],@K[1],[sp,#0] + add @x[1],@x[1],@d[0],lsr#32 + ldp @K[2],@K[3],[sp,#32] + add.32 @x[2],@x[2],@d[1] + ldp @K[4],@K[5],[sp,#64] + add @x[3],@x[3],@d[1],lsr#32 + add $A0,$A0,@K[0] + add.32 @x[4],@x[4],@d[2] + add $A1,$A1,@K[0] + add @x[5],@x[5],@d[2],lsr#32 + add $A2,$A2,@K[0] + add.32 @x[6],@x[6],@d[3] + add $A3,$A3,@K[0] + add @x[7],@x[7],@d[3],lsr#32 + add $A4,$A4,@K[0] + add.32 @x[8],@x[8],@d[4] + add $A5,$A5,@K[0] + add @x[9],@x[9],@d[4],lsr#32 + add $C0,$C0,@K[2] + add.32 @x[10],@x[10],@d[5] + add $C1,$C1,@K[2] + add @x[11],@x[11],@d[5],lsr#32 + add $C2,$C2,@K[2] + add.32 @x[12],@x[12],@d[6] + add $C3,$C3,@K[2] + add @x[13],@x[13],@d[6],lsr#32 + add $C4,$C4,@K[2] + add.32 @x[14],@x[14],@d[7] + add $C5,$C5,@K[2] + add @x[15],@x[15],@d[7],lsr#32 + add $D4,$D4,$ONE // +4 + add @x[0],@x[0],@x[1],lsl#32 // pack + add $D5,$D5,$ONE // +4 + add @x[2],@x[2],@x[3],lsl#32 + add $D0,$D0,@K[3] + ldp @x[1],@x[3],[$inp,#0] // load input + add $D1,$D1,@K[4] + add @x[4],@x[4],@x[5],lsl#32 + add $D2,$D2,@K[5] + add @x[6],@x[6],@x[7],lsl#32 + add $D3,$D3,@K[6] + ldp @x[5],@x[7],[$inp,#16] + add $D4,$D4,@K[3] + add @x[8],@x[8],@x[9],lsl#32 + add $D5,$D5,@K[4] + add @x[10],@x[10],@x[11],lsl#32 + add $B0,$B0,@K[1] + ldp @x[9],@x[11],[$inp,#32] + add $B1,$B1,@K[1] + add @x[12],@x[12],@x[13],lsl#32 + add $B2,$B2,@K[1] + add @x[14],@x[14],@x[15],lsl#32 + add $B3,$B3,@K[1] + ldp @x[13],@x[15],[$inp,#48] + add $B4,$B4,@K[1] + add $inp,$inp,#64 + add $B5,$B5,@K[1] + +# ifdef __AARCH64EB__ + rev @x[0],@x[0] + rev @x[2],@x[2] + rev @x[4],@x[4] + rev @x[6],@x[6] + rev @x[8],@x[8] + rev @x[10],@x[10] + rev @x[12],@x[12] + rev @x[14],@x[14] +# endif + ld1.8 {$T0-$T3},[$inp],#64 + eor @x[0],@x[0],@x[1] + eor @x[2],@x[2],@x[3] + eor @x[4],@x[4],@x[5] + eor @x[6],@x[6],@x[7] + eor @x[8],@x[8],@x[9] + eor $A0,$A0,$T0 + eor @x[10],@x[10],@x[11] + eor $B0,$B0,$T1 + eor @x[12],@x[12],@x[13] + eor $C0,$C0,$T2 + eor @x[14],@x[14],@x[15] + eor $D0,$D0,$T3 + ld1.8 {$T0-$T3},[$inp],#64 + + stp @x[0],@x[2],[$out,#0] // store output + add @d[6],@d[6],#7 // increment counter + stp @x[4],@x[6],[$out,#16] + stp @x[8],@x[10],[$out,#32] + stp @x[12],@x[14],[$out,#48] + add $out,$out,#64 + st1.8 {$A0-$D0},[$out],#64 + + ld1.8 {$A0-$D0},[$inp],#64 + eor $A1,$A1,$T0 + eor $B1,$B1,$T1 + eor $C1,$C1,$T2 + eor $D1,$D1,$T3 + st1.8 {$A1-$D1},[$out],#64 + + ld1.8 {$A1-$D1},[$inp],#64 + eor $A2,$A2,$A0 + ldp @K[0],@K[1],[sp,#0] + eor $B2,$B2,$B0 + ldp @K[2],@K[3],[sp,#32] + eor $C2,$C2,$C0 + eor $D2,$D2,$D0 + st1.8 {$A2-$D2},[$out],#64 + + ld1.8 {$A2-$D2},[$inp],#64 + eor $A3,$A3,$A1 + eor $B3,$B3,$B1 + eor $C3,$C3,$C1 + eor $D3,$D3,$D1 + st1.8 {$A3-$D3},[$out],#64 + + ld1.8 {$A3-$D3},[$inp],#64 + eor $A4,$A4,$A2 + eor $B4,$B4,$B2 + eor $C4,$C4,$C2 + eor $D4,$D4,$D2 + st1.8 {$A4-$D4},[$out],#64 + + shl $A0,$ONE,#1 // 4 -> 8 + eor $A5,$A5,$A3 + eor $B5,$B5,$B3 + eor $C5,$C5,$C3 + eor $D5,$D5,$D3 + st1.8 {$A5-$D5},[$out],#64 + + add @K[3],@K[3],$A0 // += 8 + add @K[4],@K[4],$A0 + add @K[5],@K[5],$A0 + add @K[6],@K[6],$A0 + + b.hs .Loop_outer_512_neon + + adds $len,$len,#512 + ushr $A0,$ONE,#2 // 4 -> 1 + + ldp d8,d9,[sp,#128+0] // meet ABI requirements + ldp d10,d11,[sp,#128+16] + ldp d12,d13,[sp,#128+32] + ldp d14,d15,[sp,#128+48] + + stp @K[0],$ONE,[sp,#0] // wipe off-load area + stp @K[0],$ONE,[sp,#32] + stp @K[0],$ONE,[sp,#64] + + b.eq .Ldone_512_neon + + cmp $len,#192 + sub @K[3],@K[3],$A0 // -= 1 + sub @K[4],@K[4],$A0 + sub @K[5],@K[5],$A0 + add sp,sp,#128 + b.hs .Loop_outer_neon + + eor @K[1],@K[1],@K[1] + eor @K[2],@K[2],@K[2] + eor @K[3],@K[3],@K[3] + eor @K[4],@K[4],@K[4] + eor @K[5],@K[5],@K[5] + eor @K[6],@K[6],@K[6] + b .Loop_outer + +.Ldone_512_neon: + ldp x19,x20,[x29,#16] + add sp,sp,#128+64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + ret +.size ChaCha20_512_neon,.-ChaCha20_512_neon +#endif +#endif +___ +} +}}} + +open SELF,$0; +while() { + next if (/^#!/); + last if (!s/^#/\/\// and !/^$/); + print; +} +close SELF; + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/geo; + + (s/\b([a-z]+)\.32\b/$1/ and (s/x([0-9]+)/w$1/g or 1)) or + (m/\b(eor|ext|mov)\b/ and (s/\.4s/\.16b/g or 1)) or + (s/\b((?:ld|st)1)\.8\b/$1/ and (s/\.4s/\.16b/g or 1)) or + (m/\b(ld|st)[rp]\b/ and (s/v([0-9]+)\.4s/q$1/g or 1)) or + (s/\brev32\.16\b/rev32/ and (s/\.4s/\.8h/g or 1)); + + print $_,"\n"; +} +close STDOUT; # flush diff --git a/net/wireguard/crypto/zinc/chacha20/chacha20-mips-glue.c b/net/wireguard/crypto/zinc/chacha20/chacha20-mips-glue.c new file mode 100644 index 000000000000..96ce01e2c133 --- /dev/null +++ b/net/wireguard/crypto/zinc/chacha20/chacha20-mips-glue.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +asmlinkage void chacha20_mips(u32 state[16], u8 *out, const u8 *in, + const size_t len); +static bool *const chacha20_nobs[] __initconst = { }; +static void __init chacha20_fpu_init(void) +{ +} + +static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst, + const u8 *src, size_t len, + simd_context_t *simd_context) +{ + chacha20_mips(ctx->state, dst, src, len); + return true; +} + +static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS], + const u8 nonce[HCHACHA20_NONCE_SIZE], + const u8 key[HCHACHA20_KEY_SIZE], + simd_context_t *simd_context) +{ + return false; +} diff --git a/net/wireguard/crypto/zinc/chacha20/chacha20-mips.S b/net/wireguard/crypto/zinc/chacha20/chacha20-mips.S new file mode 100644 index 000000000000..a81e02db95e7 --- /dev/null +++ b/net/wireguard/crypto/zinc/chacha20/chacha20-mips.S @@ -0,0 +1,424 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2016-2018 René van Dorst . All Rights Reserved. + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#define MASK_U32 0x3c +#define CHACHA20_BLOCK_SIZE 64 +#define STACK_SIZE 32 + +#define X0 $t0 +#define X1 $t1 +#define X2 $t2 +#define X3 $t3 +#define X4 $t4 +#define X5 $t5 +#define X6 $t6 +#define X7 $t7 +#define X8 $t8 +#define X9 $t9 +#define X10 $v1 +#define X11 $s6 +#define X12 $s5 +#define X13 $s4 +#define X14 $s3 +#define X15 $s2 +/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */ +#define T0 $s1 +#define T1 $s0 +#define T(n) T ## n +#define X(n) X ## n + +/* Input arguments */ +#define STATE $a0 +#define OUT $a1 +#define IN $a2 +#define BYTES $a3 + +/* Output argument */ +/* NONCE[0] is kept in a register and not in memory. + * We don't want to touch original value in memory. + * Must be incremented every loop iteration. + */ +#define NONCE_0 $v0 + +/* SAVED_X and SAVED_CA are set in the jump table. + * Use regs which are overwritten on exit else we don't leak clear data. + * They are used to handling the last bytes which are not multiple of 4. + */ +#define SAVED_X X15 +#define SAVED_CA $s7 + +#define IS_UNALIGNED $s7 + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define MSB 0 +#define LSB 3 +#define ROTx rotl +#define ROTR(n) rotr n, 24 +#define CPU_TO_LE32(n) \ + wsbh n; \ + rotr n, 16; +#else +#define MSB 3 +#define LSB 0 +#define ROTx rotr +#define CPU_TO_LE32(n) +#define ROTR(n) +#endif + +#define FOR_EACH_WORD(x) \ + x( 0); \ + x( 1); \ + x( 2); \ + x( 3); \ + x( 4); \ + x( 5); \ + x( 6); \ + x( 7); \ + x( 8); \ + x( 9); \ + x(10); \ + x(11); \ + x(12); \ + x(13); \ + x(14); \ + x(15); + +#define FOR_EACH_WORD_REV(x) \ + x(15); \ + x(14); \ + x(13); \ + x(12); \ + x(11); \ + x(10); \ + x( 9); \ + x( 8); \ + x( 7); \ + x( 6); \ + x( 5); \ + x( 4); \ + x( 3); \ + x( 2); \ + x( 1); \ + x( 0); + +#define PLUS_ONE_0 1 +#define PLUS_ONE_1 2 +#define PLUS_ONE_2 3 +#define PLUS_ONE_3 4 +#define PLUS_ONE_4 5 +#define PLUS_ONE_5 6 +#define PLUS_ONE_6 7 +#define PLUS_ONE_7 8 +#define PLUS_ONE_8 9 +#define PLUS_ONE_9 10 +#define PLUS_ONE_10 11 +#define PLUS_ONE_11 12 +#define PLUS_ONE_12 13 +#define PLUS_ONE_13 14 +#define PLUS_ONE_14 15 +#define PLUS_ONE_15 16 +#define PLUS_ONE(x) PLUS_ONE_ ## x +#define _CONCAT3(a,b,c) a ## b ## c +#define CONCAT3(a,b,c) _CONCAT3(a,b,c) + +#define STORE_UNALIGNED(x) \ +CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \ + .if (x != 12); \ + lw T0, (x*4)(STATE); \ + .endif; \ + lwl T1, (x*4)+MSB ## (IN); \ + lwr T1, (x*4)+LSB ## (IN); \ + .if (x == 12); \ + addu X ## x, NONCE_0; \ + .else; \ + addu X ## x, T0; \ + .endif; \ + CPU_TO_LE32(X ## x); \ + xor X ## x, T1; \ + swl X ## x, (x*4)+MSB ## (OUT); \ + swr X ## x, (x*4)+LSB ## (OUT); + +#define STORE_ALIGNED(x) \ +CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \ + .if (x != 12); \ + lw T0, (x*4)(STATE); \ + .endif; \ + lw T1, (x*4) ## (IN); \ + .if (x == 12); \ + addu X ## x, NONCE_0; \ + .else; \ + addu X ## x, T0; \ + .endif; \ + CPU_TO_LE32(X ## x); \ + xor X ## x, T1; \ + sw X ## x, (x*4) ## (OUT); + +/* Jump table macro. + * Used for setup and handling the last bytes, which are not multiple of 4. + * X15 is free to store Xn + * Every jumptable entry must be equal in size. + */ +#define JMPTBL_ALIGNED(x) \ +.Lchacha20_mips_jmptbl_aligned_ ## x: ; \ + .set noreorder; \ + b .Lchacha20_mips_xor_aligned_ ## x ## _b; \ + .if (x == 12); \ + addu SAVED_X, X ## x, NONCE_0; \ + .else; \ + addu SAVED_X, X ## x, SAVED_CA; \ + .endif; \ + .set reorder + +#define JMPTBL_UNALIGNED(x) \ +.Lchacha20_mips_jmptbl_unaligned_ ## x: ; \ + .set noreorder; \ + b .Lchacha20_mips_xor_unaligned_ ## x ## _b; \ + .if (x == 12); \ + addu SAVED_X, X ## x, NONCE_0; \ + .else; \ + addu SAVED_X, X ## x, SAVED_CA; \ + .endif; \ + .set reorder + +#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \ + addu X(A), X(K); \ + addu X(B), X(L); \ + addu X(C), X(M); \ + addu X(D), X(N); \ + xor X(V), X(A); \ + xor X(W), X(B); \ + xor X(Y), X(C); \ + xor X(Z), X(D); \ + rotl X(V), S; \ + rotl X(W), S; \ + rotl X(Y), S; \ + rotl X(Z), S; + +.text +.set reorder +.set noat +.globl chacha20_mips +.ent chacha20_mips +chacha20_mips: + .frame $sp, STACK_SIZE, $ra + + addiu $sp, -STACK_SIZE + + /* Return bytes = 0. */ + beqz BYTES, .Lchacha20_mips_end + + lw NONCE_0, 48(STATE) + + /* Save s0-s7 */ + sw $s0, 0($sp) + sw $s1, 4($sp) + sw $s2, 8($sp) + sw $s3, 12($sp) + sw $s4, 16($sp) + sw $s5, 20($sp) + sw $s6, 24($sp) + sw $s7, 28($sp) + + /* Test IN or OUT is unaligned. + * IS_UNALIGNED = ( IN | OUT ) & 0x00000003 + */ + or IS_UNALIGNED, IN, OUT + andi IS_UNALIGNED, 0x3 + + /* Set number of rounds */ + li $at, 20 + + b .Lchacha20_rounds_start + +.align 4 +.Loop_chacha20_rounds: + addiu IN, CHACHA20_BLOCK_SIZE + addiu OUT, CHACHA20_BLOCK_SIZE + addiu NONCE_0, 1 + +.Lchacha20_rounds_start: + lw X0, 0(STATE) + lw X1, 4(STATE) + lw X2, 8(STATE) + lw X3, 12(STATE) + + lw X4, 16(STATE) + lw X5, 20(STATE) + lw X6, 24(STATE) + lw X7, 28(STATE) + lw X8, 32(STATE) + lw X9, 36(STATE) + lw X10, 40(STATE) + lw X11, 44(STATE) + + move X12, NONCE_0 + lw X13, 52(STATE) + lw X14, 56(STATE) + lw X15, 60(STATE) + +.Loop_chacha20_xor_rounds: + addiu $at, -2 + AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); + AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); + AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); + AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); + AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); + AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); + AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); + AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); + bnez $at, .Loop_chacha20_xor_rounds + + addiu BYTES, -(CHACHA20_BLOCK_SIZE) + + /* Is data src/dst unaligned? Jump */ + bnez IS_UNALIGNED, .Loop_chacha20_unaligned + + /* Set number rounds here to fill delayslot. */ + li $at, 20 + + /* BYTES < 0, it has no full block. */ + bltz BYTES, .Lchacha20_mips_no_full_block_aligned + + FOR_EACH_WORD_REV(STORE_ALIGNED) + + /* BYTES > 0? Loop again. */ + bgtz BYTES, .Loop_chacha20_rounds + + /* Place this here to fill delay slot */ + addiu NONCE_0, 1 + + /* BYTES < 0? Handle last bytes */ + bltz BYTES, .Lchacha20_mips_xor_bytes + +.Lchacha20_mips_xor_done: + /* Restore used registers */ + lw $s0, 0($sp) + lw $s1, 4($sp) + lw $s2, 8($sp) + lw $s3, 12($sp) + lw $s4, 16($sp) + lw $s5, 20($sp) + lw $s6, 24($sp) + lw $s7, 28($sp) + + /* Write NONCE_0 back to right location in state */ + sw NONCE_0, 48(STATE) + +.Lchacha20_mips_end: + addiu $sp, STACK_SIZE + jr $ra + +.Lchacha20_mips_no_full_block_aligned: + /* Restore the offset on BYTES */ + addiu BYTES, CHACHA20_BLOCK_SIZE + + /* Get number of full WORDS */ + andi $at, BYTES, MASK_U32 + + /* Load upper half of jump table addr */ + lui T0, %hi(.Lchacha20_mips_jmptbl_aligned_0) + + /* Calculate lower half jump table offset */ + ins T0, $at, 1, 6 + + /* Add offset to STATE */ + addu T1, STATE, $at + + /* Add lower half jump table addr */ + addiu T0, %lo(.Lchacha20_mips_jmptbl_aligned_0) + + /* Read value from STATE */ + lw SAVED_CA, 0(T1) + + /* Store remaining bytecounter as negative value */ + subu BYTES, $at, BYTES + + jr T0 + + /* Jump table */ + FOR_EACH_WORD(JMPTBL_ALIGNED) + + +.Loop_chacha20_unaligned: + /* Set number rounds here to fill delayslot. */ + li $at, 20 + + /* BYTES > 0, it has no full block. */ + bltz BYTES, .Lchacha20_mips_no_full_block_unaligned + + FOR_EACH_WORD_REV(STORE_UNALIGNED) + + /* BYTES > 0? Loop again. */ + bgtz BYTES, .Loop_chacha20_rounds + + /* Write NONCE_0 back to right location in state */ + sw NONCE_0, 48(STATE) + + .set noreorder + /* Fall through to byte handling */ + bgez BYTES, .Lchacha20_mips_xor_done +.Lchacha20_mips_xor_unaligned_0_b: +.Lchacha20_mips_xor_aligned_0_b: + /* Place this here to fill delay slot */ + addiu NONCE_0, 1 + .set reorder + +.Lchacha20_mips_xor_bytes: + addu IN, $at + addu OUT, $at + /* First byte */ + lbu T1, 0(IN) + addiu $at, BYTES, 1 + CPU_TO_LE32(SAVED_X) + ROTR(SAVED_X) + xor T1, SAVED_X + sb T1, 0(OUT) + beqz $at, .Lchacha20_mips_xor_done + /* Second byte */ + lbu T1, 1(IN) + addiu $at, BYTES, 2 + ROTx SAVED_X, 8 + xor T1, SAVED_X + sb T1, 1(OUT) + beqz $at, .Lchacha20_mips_xor_done + /* Third byte */ + lbu T1, 2(IN) + ROTx SAVED_X, 8 + xor T1, SAVED_X + sb T1, 2(OUT) + b .Lchacha20_mips_xor_done + +.Lchacha20_mips_no_full_block_unaligned: + /* Restore the offset on BYTES */ + addiu BYTES, CHACHA20_BLOCK_SIZE + + /* Get number of full WORDS */ + andi $at, BYTES, MASK_U32 + + /* Load upper half of jump table addr */ + lui T0, %hi(.Lchacha20_mips_jmptbl_unaligned_0) + + /* Calculate lower half jump table offset */ + ins T0, $at, 1, 6 + + /* Add offset to STATE */ + addu T1, STATE, $at + + /* Add lower half jump table addr */ + addiu T0, %lo(.Lchacha20_mips_jmptbl_unaligned_0) + + /* Read value from STATE */ + lw SAVED_CA, 0(T1) + + /* Store remaining bytecounter as negative value */ + subu BYTES, $at, BYTES + + jr T0 + + /* Jump table */ + FOR_EACH_WORD(JMPTBL_UNALIGNED) +.end chacha20_mips +.set at diff --git a/net/wireguard/crypto/zinc/chacha20/chacha20-unrolled-arm.S b/net/wireguard/crypto/zinc/chacha20/chacha20-unrolled-arm.S new file mode 100644 index 000000000000..8fb4bc2e7b5b --- /dev/null +++ b/net/wireguard/crypto/zinc/chacha20/chacha20-unrolled-arm.S @@ -0,0 +1,461 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 Google, Inc. + */ + +#include +#include + +/* + * Design notes: + * + * 16 registers would be needed to hold the state matrix, but only 14 are + * available because 'sp' and 'pc' cannot be used. So we spill the elements + * (x8, x9) to the stack and swap them out with (x10, x11). This adds one + * 'ldrd' and one 'strd' instruction per round. + * + * All rotates are performed using the implicit rotate operand accepted by the + * 'add' and 'eor' instructions. This is faster than using explicit rotate + * instructions. To make this work, we allow the values in the second and last + * rows of the ChaCha state matrix (rows 'b' and 'd') to temporarily have the + * wrong rotation amount. The rotation amount is then fixed up just in time + * when the values are used. 'brot' is the number of bits the values in row 'b' + * need to be rotated right to arrive at the correct values, and 'drot' + * similarly for row 'd'. (brot, drot) start out as (0, 0) but we make it such + * that they end up as (25, 24) after every round. + */ + + // ChaCha state registers + X0 .req r0 + X1 .req r1 + X2 .req r2 + X3 .req r3 + X4 .req r4 + X5 .req r5 + X6 .req r6 + X7 .req r7 + X8_X10 .req r8 // shared by x8 and x10 + X9_X11 .req r9 // shared by x9 and x11 + X12 .req r10 + X13 .req r11 + X14 .req r12 + X15 .req r14 + +.Lexpand_32byte_k: + // "expand 32-byte k" + .word 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 + +#ifdef __thumb2__ +# define adrl adr +#endif + +.macro __rev out, in, t0, t1, t2 +.if __LINUX_ARM_ARCH__ >= 6 + rev \out, \in +.else + lsl \t0, \in, #24 + and \t1, \in, #0xff00 + and \t2, \in, #0xff0000 + orr \out, \t0, \in, lsr #24 + orr \out, \out, \t1, lsl #8 + orr \out, \out, \t2, lsr #8 +.endif +.endm + +.macro _le32_bswap x, t0, t1, t2 +#ifdef __ARMEB__ + __rev \x, \x, \t0, \t1, \t2 +#endif +.endm + +.macro _le32_bswap_4x a, b, c, d, t0, t1, t2 + _le32_bswap \a, \t0, \t1, \t2 + _le32_bswap \b, \t0, \t1, \t2 + _le32_bswap \c, \t0, \t1, \t2 + _le32_bswap \d, \t0, \t1, \t2 +.endm + +.macro __ldrd a, b, src, offset +#if __LINUX_ARM_ARCH__ >= 6 + ldrd \a, \b, [\src, #\offset] +#else + ldr \a, [\src, #\offset] + ldr \b, [\src, #\offset + 4] +#endif +.endm + +.macro __strd a, b, dst, offset +#if __LINUX_ARM_ARCH__ >= 6 + strd \a, \b, [\dst, #\offset] +#else + str \a, [\dst, #\offset] + str \b, [\dst, #\offset + 4] +#endif +.endm + +.macro _halfround a1, b1, c1, d1, a2, b2, c2, d2 + + // a += b; d ^= a; d = rol(d, 16); + add \a1, \a1, \b1, ror #brot + add \a2, \a2, \b2, ror #brot + eor \d1, \a1, \d1, ror #drot + eor \d2, \a2, \d2, ror #drot + // drot == 32 - 16 == 16 + + // c += d; b ^= c; b = rol(b, 12); + add \c1, \c1, \d1, ror #16 + add \c2, \c2, \d2, ror #16 + eor \b1, \c1, \b1, ror #brot + eor \b2, \c2, \b2, ror #brot + // brot == 32 - 12 == 20 + + // a += b; d ^= a; d = rol(d, 8); + add \a1, \a1, \b1, ror #20 + add \a2, \a2, \b2, ror #20 + eor \d1, \a1, \d1, ror #16 + eor \d2, \a2, \d2, ror #16 + // drot == 32 - 8 == 24 + + // c += d; b ^= c; b = rol(b, 7); + add \c1, \c1, \d1, ror #24 + add \c2, \c2, \d2, ror #24 + eor \b1, \c1, \b1, ror #20 + eor \b2, \c2, \b2, ror #20 + // brot == 32 - 7 == 25 +.endm + +.macro _doubleround + + // column round + + // quarterrounds: (x0, x4, x8, x12) and (x1, x5, x9, x13) + _halfround X0, X4, X8_X10, X12, X1, X5, X9_X11, X13 + + // save (x8, x9); restore (x10, x11) + __strd X8_X10, X9_X11, sp, 0 + __ldrd X8_X10, X9_X11, sp, 8 + + // quarterrounds: (x2, x6, x10, x14) and (x3, x7, x11, x15) + _halfround X2, X6, X8_X10, X14, X3, X7, X9_X11, X15 + + .set brot, 25 + .set drot, 24 + + // diagonal round + + // quarterrounds: (x0, x5, x10, x15) and (x1, x6, x11, x12) + _halfround X0, X5, X8_X10, X15, X1, X6, X9_X11, X12 + + // save (x10, x11); restore (x8, x9) + __strd X8_X10, X9_X11, sp, 8 + __ldrd X8_X10, X9_X11, sp, 0 + + // quarterrounds: (x2, x7, x8, x13) and (x3, x4, x9, x14) + _halfround X2, X7, X8_X10, X13, X3, X4, X9_X11, X14 +.endm + +.macro _chacha_permute nrounds + .set brot, 0 + .set drot, 0 + .rept \nrounds / 2 + _doubleround + .endr +.endm + +.macro _chacha nrounds + +.Lnext_block\@: + // Stack: unused0-unused1 x10-x11 x0-x15 OUT IN LEN + // Registers contain x0-x9,x12-x15. + + // Do the core ChaCha permutation to update x0-x15. + _chacha_permute \nrounds + + add sp, #8 + // Stack: x10-x11 orig_x0-orig_x15 OUT IN LEN + // Registers contain x0-x9,x12-x15. + // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'. + + // Free up some registers (r8-r12,r14) by pushing (x8-x9,x12-x15). + push {X8_X10, X9_X11, X12, X13, X14, X15} + + // Load (OUT, IN, LEN). + ldr r14, [sp, #96] + ldr r12, [sp, #100] + ldr r11, [sp, #104] + + orr r10, r14, r12 + + // Use slow path if fewer than 64 bytes remain. + cmp r11, #64 + blt .Lxor_slowpath\@ + + // Use slow path if IN and/or OUT isn't 4-byte aligned. Needed even on + // ARMv6+, since ldmia and stmia (used below) still require alignment. + tst r10, #3 + bne .Lxor_slowpath\@ + + // Fast path: XOR 64 bytes of aligned data. + + // Stack: x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN + // Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is OUT. + // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'. + + // x0-x3 + __ldrd r8, r9, sp, 32 + __ldrd r10, r11, sp, 40 + add X0, X0, r8 + add X1, X1, r9 + add X2, X2, r10 + add X3, X3, r11 + _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10 + ldmia r12!, {r8-r11} + eor X0, X0, r8 + eor X1, X1, r9 + eor X2, X2, r10 + eor X3, X3, r11 + stmia r14!, {X0-X3} + + // x4-x7 + __ldrd r8, r9, sp, 48 + __ldrd r10, r11, sp, 56 + add X4, r8, X4, ror #brot + add X5, r9, X5, ror #brot + ldmia r12!, {X0-X3} + add X6, r10, X6, ror #brot + add X7, r11, X7, ror #brot + _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10 + eor X4, X4, X0 + eor X5, X5, X1 + eor X6, X6, X2 + eor X7, X7, X3 + stmia r14!, {X4-X7} + + // x8-x15 + pop {r0-r7} // (x8-x9,x12-x15,x10-x11) + __ldrd r8, r9, sp, 32 + __ldrd r10, r11, sp, 40 + add r0, r0, r8 // x8 + add r1, r1, r9 // x9 + add r6, r6, r10 // x10 + add r7, r7, r11 // x11 + _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10 + ldmia r12!, {r8-r11} + eor r0, r0, r8 // x8 + eor r1, r1, r9 // x9 + eor r6, r6, r10 // x10 + eor r7, r7, r11 // x11 + stmia r14!, {r0,r1,r6,r7} + ldmia r12!, {r0,r1,r6,r7} + __ldrd r8, r9, sp, 48 + __ldrd r10, r11, sp, 56 + add r2, r8, r2, ror #drot // x12 + add r3, r9, r3, ror #drot // x13 + add r4, r10, r4, ror #drot // x14 + add r5, r11, r5, ror #drot // x15 + _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11 + ldr r9, [sp, #72] // load LEN + eor r2, r2, r0 // x12 + eor r3, r3, r1 // x13 + eor r4, r4, r6 // x14 + eor r5, r5, r7 // x15 + subs r9, #64 // decrement and check LEN + stmia r14!, {r2-r5} + + beq .Ldone\@ + +.Lprepare_for_next_block\@: + + // Stack: x0-x15 OUT IN LEN + + // Increment block counter (x12) + add r8, #1 + + // Store updated (OUT, IN, LEN) + str r14, [sp, #64] + str r12, [sp, #68] + str r9, [sp, #72] + + mov r14, sp + + // Store updated block counter (x12) + str r8, [sp, #48] + + sub sp, #16 + + // Reload state and do next block + ldmia r14!, {r0-r11} // load x0-x11 + __strd r10, r11, sp, 8 // store x10-x11 before state + ldmia r14, {r10-r12,r14} // load x12-x15 + b .Lnext_block\@ + +.Lxor_slowpath\@: + // Slow path: < 64 bytes remaining, or unaligned input or output buffer. + // We handle it by storing the 64 bytes of keystream to the stack, then + // XOR-ing the needed portion with the data. + + // Allocate keystream buffer + sub sp, #64 + mov r14, sp + + // Stack: ks0-ks15 x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN + // Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is &ks0. + // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'. + + // Save keystream for x0-x3 + __ldrd r8, r9, sp, 96 + __ldrd r10, r11, sp, 104 + add X0, X0, r8 + add X1, X1, r9 + add X2, X2, r10 + add X3, X3, r11 + _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10 + stmia r14!, {X0-X3} + + // Save keystream for x4-x7 + __ldrd r8, r9, sp, 112 + __ldrd r10, r11, sp, 120 + add X4, r8, X4, ror #brot + add X5, r9, X5, ror #brot + add X6, r10, X6, ror #brot + add X7, r11, X7, ror #brot + _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10 + add r8, sp, #64 + stmia r14!, {X4-X7} + + // Save keystream for x8-x15 + ldm r8, {r0-r7} // (x8-x9,x12-x15,x10-x11) + __ldrd r8, r9, sp, 128 + __ldrd r10, r11, sp, 136 + add r0, r0, r8 // x8 + add r1, r1, r9 // x9 + add r6, r6, r10 // x10 + add r7, r7, r11 // x11 + _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10 + stmia r14!, {r0,r1,r6,r7} + __ldrd r8, r9, sp, 144 + __ldrd r10, r11, sp, 152 + add r2, r8, r2, ror #drot // x12 + add r3, r9, r3, ror #drot // x13 + add r4, r10, r4, ror #drot // x14 + add r5, r11, r5, ror #drot // x15 + _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11 + stmia r14, {r2-r5} + + // Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN + // Registers: r8 is block counter, r12 is IN. + + ldr r9, [sp, #168] // LEN + ldr r14, [sp, #160] // OUT + cmp r9, #64 + mov r0, sp + movle r1, r9 + movgt r1, #64 + // r1 is number of bytes to XOR, in range [1, 64] + +.if __LINUX_ARM_ARCH__ < 6 + orr r2, r12, r14 + tst r2, #3 // IN or OUT misaligned? + bne .Lxor_next_byte\@ +.endif + + // XOR a word at a time +.rept 16 + subs r1, #4 + blt .Lxor_words_done\@ + ldr r2, [r12], #4 + ldr r3, [r0], #4 + eor r2, r2, r3 + str r2, [r14], #4 +.endr + b .Lxor_slowpath_done\@ +.Lxor_words_done\@: + ands r1, r1, #3 + beq .Lxor_slowpath_done\@ + + // XOR a byte at a time +.Lxor_next_byte\@: + ldrb r2, [r12], #1 + ldrb r3, [r0], #1 + eor r2, r2, r3 + strb r2, [r14], #1 + subs r1, #1 + bne .Lxor_next_byte\@ + +.Lxor_slowpath_done\@: + subs r9, #64 + add sp, #96 + bgt .Lprepare_for_next_block\@ + +.Ldone\@: +.endm // _chacha + +/* + * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8], + * const u32 iv[4]); + */ +SYM_FUNC_START(chacha20_arm) + cmp r2, #0 // len == 0? + reteq lr + + push {r0-r2,r4-r11,lr} + + // Push state x0-x15 onto stack. + // Also store an extra copy of x10-x11 just before the state. + + ldr r4, [sp, #48] // iv + mov r0, sp + sub sp, #80 + + // iv: x12-x15 + ldm r4, {X12,X13,X14,X15} + stmdb r0!, {X12,X13,X14,X15} + + // key: x4-x11 + __ldrd X8_X10, X9_X11, r3, 24 + __strd X8_X10, X9_X11, sp, 8 + stmdb r0!, {X8_X10, X9_X11} + ldm r3, {X4-X9_X11} + stmdb r0!, {X4-X9_X11} + + // constants: x0-x3 + adrl X3, .Lexpand_32byte_k + ldm X3, {X0-X3} + __strd X0, X1, sp, 16 + __strd X2, X3, sp, 24 + + _chacha 20 + + add sp, #76 + pop {r4-r11, pc} +SYM_FUNC_END(chacha20_arm) + +/* + * void hchacha20_arm(const u32 state[16], u32 out[8]); + */ +SYM_FUNC_START(hchacha20_arm) + push {r1,r4-r11,lr} + + mov r14, r0 + ldmia r14!, {r0-r11} // load x0-x11 + push {r10-r11} // store x10-x11 to stack + ldm r14, {r10-r12,r14} // load x12-x15 + sub sp, #8 + + _chacha_permute 20 + + // Skip over (unused0-unused1, x10-x11) + add sp, #16 + + // Fix up rotations of x12-x15 + ror X12, X12, #drot + ror X13, X13, #drot + pop {r4} // load 'out' + ror X14, X14, #drot + ror X15, X15, #drot + + // Store (x0-x3,x12-x15) to 'out' + stm r4, {X0,X1,X2,X3,X12,X13,X14,X15} + + pop {r4-r11,pc} +SYM_FUNC_END(hchacha20_arm) diff --git a/net/wireguard/crypto/zinc/chacha20/chacha20-x86_64-glue.c b/net/wireguard/crypto/zinc/chacha20/chacha20-x86_64-glue.c new file mode 100644 index 000000000000..5ac5f686a641 --- /dev/null +++ b/net/wireguard/crypto/zinc/chacha20/chacha20-x86_64-glue.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include +#include +#include +#include + +asmlinkage void hchacha20_ssse3(u32 *derived_key, const u8 *nonce, + const u8 *key); +asmlinkage void chacha20_ssse3(u8 *out, const u8 *in, const size_t len, + const u32 key[8], const u32 counter[4]); +asmlinkage void chacha20_avx2(u8 *out, const u8 *in, const size_t len, + const u32 key[8], const u32 counter[4]); +asmlinkage void chacha20_avx512(u8 *out, const u8 *in, const size_t len, + const u32 key[8], const u32 counter[4]); +asmlinkage void chacha20_avx512vl(u8 *out, const u8 *in, const size_t len, + const u32 key[8], const u32 counter[4]); + +static bool chacha20_use_ssse3 __ro_after_init; +static bool chacha20_use_avx2 __ro_after_init; +static bool chacha20_use_avx512 __ro_after_init; +static bool chacha20_use_avx512vl __ro_after_init; +static bool *const chacha20_nobs[] __initconst = { + &chacha20_use_ssse3, &chacha20_use_avx2, &chacha20_use_avx512, + &chacha20_use_avx512vl }; + +static void __init chacha20_fpu_init(void) +{ + chacha20_use_ssse3 = boot_cpu_has(X86_FEATURE_SSSE3); + chacha20_use_avx2 = + boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); +#ifndef COMPAT_CANNOT_USE_AVX512 + chacha20_use_avx512 = + boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_AVX512F) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | + XFEATURE_MASK_AVX512, NULL) && + /* Skylake downclocks unacceptably much when using zmm. */ + boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X; + chacha20_use_avx512vl = + boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_AVX512F) && + boot_cpu_has(X86_FEATURE_AVX512VL) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | + XFEATURE_MASK_AVX512, NULL); +#endif +} + +static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst, + const u8 *src, size_t len, + simd_context_t *simd_context) +{ + /* SIMD disables preemption, so relax after processing each page. */ + BUILD_BUG_ON(PAGE_SIZE < CHACHA20_BLOCK_SIZE || + PAGE_SIZE % CHACHA20_BLOCK_SIZE); + + if (!IS_ENABLED(CONFIG_AS_SSSE3) || !chacha20_use_ssse3 || + len <= CHACHA20_BLOCK_SIZE || !simd_use(simd_context)) + return false; + + for (;;) { + const size_t bytes = min_t(size_t, len, PAGE_SIZE); + + if (IS_ENABLED(CONFIG_AS_AVX512) && chacha20_use_avx512 && + len >= CHACHA20_BLOCK_SIZE * 8) + chacha20_avx512(dst, src, bytes, ctx->key, ctx->counter); + else if (IS_ENABLED(CONFIG_AS_AVX512) && chacha20_use_avx512vl && + len >= CHACHA20_BLOCK_SIZE * 4) + chacha20_avx512vl(dst, src, bytes, ctx->key, ctx->counter); + else if (IS_ENABLED(CONFIG_AS_AVX2) && chacha20_use_avx2 && + len >= CHACHA20_BLOCK_SIZE * 4) + chacha20_avx2(dst, src, bytes, ctx->key, ctx->counter); + else + chacha20_ssse3(dst, src, bytes, ctx->key, ctx->counter); + ctx->counter[0] += (bytes + 63) / 64; + len -= bytes; + if (!len) + break; + dst += bytes; + src += bytes; + simd_relax(simd_context); + } + + return true; +} + +static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS], + const u8 nonce[HCHACHA20_NONCE_SIZE], + const u8 key[HCHACHA20_KEY_SIZE], + simd_context_t *simd_context) +{ + if (IS_ENABLED(CONFIG_AS_SSSE3) && chacha20_use_ssse3 && + simd_use(simd_context)) { + hchacha20_ssse3(derived_key, nonce, key); + return true; + } + return false; +} diff --git a/net/wireguard/crypto/zinc/chacha20/chacha20-x86_64.pl b/net/wireguard/crypto/zinc/chacha20/chacha20-x86_64.pl new file mode 100644 index 000000000000..29906a66b8b7 --- /dev/null +++ b/net/wireguard/crypto/zinc/chacha20/chacha20-x86_64.pl @@ -0,0 +1,4106 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# +# Copyright (C) 2017-2019 Samuel Neves . All Rights Reserved. +# Copyright (C) 2017-2019 Jason A. Donenfeld . All Rights Reserved. +# Copyright (C) 2006-2017 CRYPTOGAMS by . All Rights Reserved. +# +# This code is taken from the OpenSSL project but the author, Andy Polyakov, +# has relicensed it under the licenses specified in the SPDX header above. +# The original headers, including the original license headers, are +# included below for completeness. +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# November 2014 +# +# ChaCha20 for x86_64. +# +# December 2016 +# +# Add AVX512F code path. +# +# December 2017 +# +# Add AVX512VL code path. +# +# Performance in cycles per byte out of large buffer. +# +# IALU/gcc 4.8(i) 1x/2xSSSE3(ii) 4xSSSE3 NxAVX(v) +# +# P4 9.48/+99% - - +# Core2 7.83/+55% 7.90/5.76 4.35 +# Westmere 7.19/+50% 5.60/4.50 3.00 +# Sandy Bridge 8.31/+42% 5.45/4.00 2.72 +# Ivy Bridge 6.71/+46% 5.40/? 2.41 +# Haswell 5.92/+43% 5.20/3.45 2.42 1.23 +# Skylake[-X] 5.87/+39% 4.70/3.22 2.31 1.19[0.80(vi)] +# Silvermont 12.0/+33% 7.75/6.90 7.03(iii) +# Knights L 11.7/- ? 9.60(iii) 0.80 +# Goldmont 10.6/+17% 5.10/3.52 3.28 +# Sledgehammer 7.28/+52% - - +# Bulldozer 9.66/+28% 9.85/5.35(iv) 3.06(iv) +# Ryzen 5.96/+50% 5.19/3.00 2.40 2.09 +# VIA Nano 10.5/+46% 6.72/6.88 6.05 +# +# (i) compared to older gcc 3.x one can observe >2x improvement on +# most platforms; +# (ii) 2xSSSE3 is code path optimized specifically for 128 bytes used +# by chacha20_poly1305_tls_cipher, results are EVP-free; +# (iii) this is not optimal result for Atom because of MSROM +# limitations, SSE2 can do better, but gain is considered too +# low to justify the [maintenance] effort; +# (iv) Bulldozer actually executes 4xXOP code path that delivers 2.20 +# and 4.85 for 128-byte inputs; +# (v) 8xAVX2, 8xAVX512VL or 16xAVX512F, whichever best applicable; +# (vi) even though Skylake-X can execute AVX512F code and deliver 0.57 +# cpb in single thread, the corresponding capability is suppressed; + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); +$kernel=0; $kernel=1 if (!$flavour && !$output); + +if (!$kernel) { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or + die "can't locate x86_64-xlate.pl"; + + open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; + *STDOUT=*OUT; + + if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.19) + ($1>=2.22) + ($1>=2.25); + } + + if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)(?:\.([0-9]+))?/) { + $avx = ($1>=2.09) + ($1>=2.10) + ($1>=2.12); + $avx += 1 if ($1==2.11 && $2>=8); + } + + if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && + `ml64 2>&1` =~ /Version ([0-9]+)\./) { + $avx = ($1>=10) + ($1>=11); + } + + if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) { + $avx = ($2>=3.0) + ($2>3.0); + } +} else { + $avx = 4; # The kernel uses ifdefs for this. +} + +# input parameter block +($out,$inp,$len,$key,$counter)=("%rdi","%rsi","%rdx","%rcx","%r8"); + +$code.=<<___ if $kernel; +#include +___ + +sub declare_variable() { + my ($name, $size, $type, $payload) = @_; + if($kernel) { + $code.=".section .rodata.cst$size.L$name, \"aM\", \@progbits, $size\n"; + $code.=".align $size\n"; + $code.=".L$name:\n"; + $code.=".$type $payload\n"; + } else { + $code.=".L$name:\n"; + $code.=".$type $payload\n"; + } +} + +sub declare_function() { + my ($name, $align, $nargs) = @_; + if($kernel) { + $code .= ".align $align\n"; + $code .= "SYM_FUNC_START($name)\n"; + $code .= ".L$name:\n"; + } else { + $code .= ".globl $name\n"; + $code .= ".type $name,\@function,$nargs\n"; + $code .= ".align $align\n"; + $code .= "$name:\n"; + } +} + +sub end_function() { + my ($name) = @_; + if($kernel) { + $code .= "SYM_FUNC_END($name)\n"; + } else { + $code .= ".size $name,.-$name\n"; + } +} + +if(!$kernel) { + $code .= ".text\n"; +} +&declare_variable('zero', 16, 'long', '0,0,0,0'); +&declare_variable('one', 16, 'long', '1,0,0,0'); +&declare_variable('inc', 16, 'long', '0,1,2,3'); +&declare_variable('four', 16, 'long', '4,4,4,4'); +&declare_variable('incy', 32, 'long', '0,2,4,6,1,3,5,7'); +&declare_variable('eight', 32, 'long', '8,8,8,8,8,8,8,8'); +&declare_variable('rot16', 16, 'byte', '0x2,0x3,0x0,0x1, 0x6,0x7,0x4,0x5, 0xa,0xb,0x8,0x9, 0xe,0xf,0xc,0xd'); +&declare_variable('rot24', 16, 'byte', '0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe'); +&declare_variable('twoy', 32, 'long', '2,0,0,0, 2,0,0,0'); +&declare_variable('zeroz', 64, 'long', '0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,0'); +&declare_variable('fourz', 64, 'long', '4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,0'); +&declare_variable('incz', 64, 'long', '0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15'); +&declare_variable('sixteen', 64, 'long', '16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16'); +&declare_variable('sigma', 16, 'ascii', '"expand 32-byte k"'); + +$code.=<<___ if !$kernel; +.asciz "ChaCha20 for x86_64, CRYPTOGAMS by " +___ +$code.=".text\n"; + +sub AUTOLOAD() # thunk [simplified] 32-bit style perlasm +{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; + my $arg = pop; + $arg = "\$$arg" if ($arg*1 eq $arg); + $code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n"; +} + +@x=("%eax","%ebx","%ecx","%edx",map("%r${_}d",(8..11)), + "%nox","%nox","%nox","%nox",map("%r${_}d",(12..15))); +@t=("%esi","%edi"); + +sub ROUND { # critical path is 24 cycles per round +my ($a0,$b0,$c0,$d0)=@_; +my ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0)); +my ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1)); +my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2)); +my ($xc,$xc_)=map("\"$_\"",@t); +my @x=map("\"$_\"",@x); + + # Consider order in which variables are addressed by their + # index: + # + # a b c d + # + # 0 4 8 12 < even round + # 1 5 9 13 + # 2 6 10 14 + # 3 7 11 15 + # 0 5 10 15 < odd round + # 1 6 11 12 + # 2 7 8 13 + # 3 4 9 14 + # + # 'a', 'b' and 'd's are permanently allocated in registers, + # @x[0..7,12..15], while 'c's are maintained in memory. If + # you observe 'c' column, you'll notice that pair of 'c's is + # invariant between rounds. This means that we have to reload + # them once per round, in the middle. This is why you'll see + # bunch of 'c' stores and loads in the middle, but none in + # the beginning or end. + + # Normally instructions would be interleaved to favour in-order + # execution. Generally out-of-order cores manage it gracefully, + # but not this time for some reason. As in-order execution + # cores are dying breed, old Atom is the only one around, + # instructions are left uninterleaved. Besides, Atom is better + # off executing 1xSSSE3 code anyway... + + ( + "&add (@x[$a0],@x[$b0])", # Q1 + "&xor (@x[$d0],@x[$a0])", + "&rol (@x[$d0],16)", + "&add (@x[$a1],@x[$b1])", # Q2 + "&xor (@x[$d1],@x[$a1])", + "&rol (@x[$d1],16)", + + "&add ($xc,@x[$d0])", + "&xor (@x[$b0],$xc)", + "&rol (@x[$b0],12)", + "&add ($xc_,@x[$d1])", + "&xor (@x[$b1],$xc_)", + "&rol (@x[$b1],12)", + + "&add (@x[$a0],@x[$b0])", + "&xor (@x[$d0],@x[$a0])", + "&rol (@x[$d0],8)", + "&add (@x[$a1],@x[$b1])", + "&xor (@x[$d1],@x[$a1])", + "&rol (@x[$d1],8)", + + "&add ($xc,@x[$d0])", + "&xor (@x[$b0],$xc)", + "&rol (@x[$b0],7)", + "&add ($xc_,@x[$d1])", + "&xor (@x[$b1],$xc_)", + "&rol (@x[$b1],7)", + + "&mov (\"4*$c0(%rsp)\",$xc)", # reload pair of 'c's + "&mov (\"4*$c1(%rsp)\",$xc_)", + "&mov ($xc,\"4*$c2(%rsp)\")", + "&mov ($xc_,\"4*$c3(%rsp)\")", + + "&add (@x[$a2],@x[$b2])", # Q3 + "&xor (@x[$d2],@x[$a2])", + "&rol (@x[$d2],16)", + "&add (@x[$a3],@x[$b3])", # Q4 + "&xor (@x[$d3],@x[$a3])", + "&rol (@x[$d3],16)", + + "&add ($xc,@x[$d2])", + "&xor (@x[$b2],$xc)", + "&rol (@x[$b2],12)", + "&add ($xc_,@x[$d3])", + "&xor (@x[$b3],$xc_)", + "&rol (@x[$b3],12)", + + "&add (@x[$a2],@x[$b2])", + "&xor (@x[$d2],@x[$a2])", + "&rol (@x[$d2],8)", + "&add (@x[$a3],@x[$b3])", + "&xor (@x[$d3],@x[$a3])", + "&rol (@x[$d3],8)", + + "&add ($xc,@x[$d2])", + "&xor (@x[$b2],$xc)", + "&rol (@x[$b2],7)", + "&add ($xc_,@x[$d3])", + "&xor (@x[$b3],$xc_)", + "&rol (@x[$b3],7)" + ); +} + +######################################################################## +# Generic code path that handles all lengths on pre-SSSE3 processors. +if(!$kernel) { +&declare_function("chacha20_ctr32", 64, 5); +$code.=<<___; +.cfi_startproc + cmp \$0,$len + je .Lno_data + mov OPENSSL_ia32cap_P+4(%rip),%r9 +___ +$code.=<<___ if ($avx>2); + bt \$48,%r9 # check for AVX512F + jc .Lchacha20_avx512 + test %r9,%r9 # check for AVX512VL + js .Lchacha20_avx512vl +___ +$code.=<<___; + test \$`1<<(41-32)`,%r9d + jnz .Lchacha20_ssse3 +___ +$code.=<<___; + push %rbx +.cfi_push %rbx + push %rbp +.cfi_push %rbp + push %r12 +.cfi_push %r12 + push %r13 +.cfi_push %r13 + push %r14 +.cfi_push %r14 + push %r15 +.cfi_push %r15 + sub \$64+24,%rsp +.cfi_adjust_cfa_offset 64+24 +.Lctr32_body: + + #movdqa .Lsigma(%rip),%xmm0 + movdqu ($key),%xmm1 + movdqu 16($key),%xmm2 + movdqu ($counter),%xmm3 + movdqa .Lone(%rip),%xmm4 + + #movdqa %xmm0,4*0(%rsp) # key[0] + movdqa %xmm1,4*4(%rsp) # key[1] + movdqa %xmm2,4*8(%rsp) # key[2] + movdqa %xmm3,4*12(%rsp) # key[3] + mov $len,%rbp # reassign $len + jmp .Loop_outer + +.align 32 +.Loop_outer: + mov \$0x61707865,@x[0] # 'expa' + mov \$0x3320646e,@x[1] # 'nd 3' + mov \$0x79622d32,@x[2] # '2-by' + mov \$0x6b206574,@x[3] # 'te k' + mov 4*4(%rsp),@x[4] + mov 4*5(%rsp),@x[5] + mov 4*6(%rsp),@x[6] + mov 4*7(%rsp),@x[7] + movd %xmm3,@x[12] + mov 4*13(%rsp),@x[13] + mov 4*14(%rsp),@x[14] + mov 4*15(%rsp),@x[15] + + mov %rbp,64+0(%rsp) # save len + mov \$10,%ebp + mov $inp,64+8(%rsp) # save inp + movq %xmm2,%rsi # "@x[8]" + mov $out,64+16(%rsp) # save out + mov %rsi,%rdi + shr \$32,%rdi # "@x[9]" + jmp .Loop + +.align 32 +.Loop: +___ + foreach (&ROUND (0, 4, 8,12)) { eval; } + foreach (&ROUND (0, 5,10,15)) { eval; } + &dec ("%ebp"); + &jnz (".Loop"); + +$code.=<<___; + mov @t[1],4*9(%rsp) # modulo-scheduled + mov @t[0],4*8(%rsp) + mov 64(%rsp),%rbp # load len + movdqa %xmm2,%xmm1 + mov 64+8(%rsp),$inp # load inp + paddd %xmm4,%xmm3 # increment counter + mov 64+16(%rsp),$out # load out + + add \$0x61707865,@x[0] # 'expa' + add \$0x3320646e,@x[1] # 'nd 3' + add \$0x79622d32,@x[2] # '2-by' + add \$0x6b206574,@x[3] # 'te k' + add 4*4(%rsp),@x[4] + add 4*5(%rsp),@x[5] + add 4*6(%rsp),@x[6] + add 4*7(%rsp),@x[7] + add 4*12(%rsp),@x[12] + add 4*13(%rsp),@x[13] + add 4*14(%rsp),@x[14] + add 4*15(%rsp),@x[15] + paddd 4*8(%rsp),%xmm1 + + cmp \$64,%rbp + jb .Ltail + + xor 4*0($inp),@x[0] # xor with input + xor 4*1($inp),@x[1] + xor 4*2($inp),@x[2] + xor 4*3($inp),@x[3] + xor 4*4($inp),@x[4] + xor 4*5($inp),@x[5] + xor 4*6($inp),@x[6] + xor 4*7($inp),@x[7] + movdqu 4*8($inp),%xmm0 + xor 4*12($inp),@x[12] + xor 4*13($inp),@x[13] + xor 4*14($inp),@x[14] + xor 4*15($inp),@x[15] + lea 4*16($inp),$inp # inp+=64 + pxor %xmm1,%xmm0 + + movdqa %xmm2,4*8(%rsp) + movd %xmm3,4*12(%rsp) + + mov @x[0],4*0($out) # write output + mov @x[1],4*1($out) + mov @x[2],4*2($out) + mov @x[3],4*3($out) + mov @x[4],4*4($out) + mov @x[5],4*5($out) + mov @x[6],4*6($out) + mov @x[7],4*7($out) + movdqu %xmm0,4*8($out) + mov @x[12],4*12($out) + mov @x[13],4*13($out) + mov @x[14],4*14($out) + mov @x[15],4*15($out) + lea 4*16($out),$out # out+=64 + + sub \$64,%rbp + jnz .Loop_outer + + jmp .Ldone + +.align 16 +.Ltail: + mov @x[0],4*0(%rsp) + mov @x[1],4*1(%rsp) + xor %rbx,%rbx + mov @x[2],4*2(%rsp) + mov @x[3],4*3(%rsp) + mov @x[4],4*4(%rsp) + mov @x[5],4*5(%rsp) + mov @x[6],4*6(%rsp) + mov @x[7],4*7(%rsp) + movdqa %xmm1,4*8(%rsp) + mov @x[12],4*12(%rsp) + mov @x[13],4*13(%rsp) + mov @x[14],4*14(%rsp) + mov @x[15],4*15(%rsp) + +.Loop_tail: + movzb ($inp,%rbx),%eax + movzb (%rsp,%rbx),%edx + lea 1(%rbx),%rbx + xor %edx,%eax + mov %al,-1($out,%rbx) + dec %rbp + jnz .Loop_tail + +.Ldone: + add \$64+24,%rsp +.cfi_adjust_cfa_offset -64-24 + pop %r15 +.cfi_restore %r15 + pop %r14 +.cfi_restore %r14 + pop %r13 +.cfi_restore %r13 + pop %r12 +.cfi_restore %r12 + pop %rbp +.cfi_restore %rbp + pop %rbx +.cfi_restore %rbx +.Lno_data: + ret +.cfi_endproc +___ +&end_function("chacha20_ctr32"); +} + +######################################################################## +# SSSE3 code path that handles shorter lengths +{ +my ($a,$b,$c,$d,$t,$t1,$rot16,$rot24)=map("%xmm$_",(0..7)); + +sub SSSE3ROUND { # critical path is 20 "SIMD ticks" per round + &paddd ($a,$b); + &pxor ($d,$a); + &pshufb ($d,$rot16); + + &paddd ($c,$d); + &pxor ($b,$c); + &movdqa ($t,$b); + &psrld ($b,20); + &pslld ($t,12); + &por ($b,$t); + + &paddd ($a,$b); + &pxor ($d,$a); + &pshufb ($d,$rot24); + + &paddd ($c,$d); + &pxor ($b,$c); + &movdqa ($t,$b); + &psrld ($b,25); + &pslld ($t,7); + &por ($b,$t); +} + +my $xframe = $win64 ? 32+8 : 8; + +if($kernel) { + $code .= "#ifdef CONFIG_AS_SSSE3\n"; +} + +if($kernel) { +&declare_function("hchacha20_ssse3", 32, 5); +$code.=<<___; + movdqa .Lsigma(%rip),$a + movdqu ($len),$b + movdqu 16($len),$c + movdqu ($inp),$d + # This code is only used when targeting kernel. + # If targeting win64, xmm{6,7} preserving needs to be added. + movdqa .Lrot16(%rip),$rot16 + movdqa .Lrot24(%rip),$rot24 + mov \$10,$counter # reuse $counter + jmp 1f +.align 32 +1: +___ + &SSSE3ROUND(); + &pshufd ($a,$a,0b10010011); + &pshufd ($d,$d,0b01001110); + &pshufd ($c,$c,0b00111001); + &nop (); + + &SSSE3ROUND(); + &pshufd ($a,$a,0b00111001); + &pshufd ($d,$d,0b01001110); + &pshufd ($c,$c,0b10010011); + + &dec ($counter); + &jnz ("1b"); + +$code.=<<___; + movdqu $a, ($out) + movdqu $d, 16($out) + ret +___ +&end_function("hchacha20_ssse3"); +} + +&declare_function("chacha20_ssse3", 32, 5); +$code.=<<___; +.cfi_startproc + lea 8(%rsp),%r10 # frame pointer +.cfi_def_cfa_register %r10 +___ +$code.=<<___ if ($avx && !$kernel); + test \$`1<<(43-32)`,%r10d + jnz .Lchacha20_4xop # XOP is fastest even if we use 1/4 +___ +$code.=<<___; + cmp \$128,$len # we might throw away some data, + je .Lchacha20_128 + ja .Lchacha20_4x # but overall it won't be slower + +.Ldo_ssse3_after_all: + sub \$64+$xframe,%rsp + and \$-16,%rsp +___ +$code.=<<___ if ($win64); + movaps %xmm6,-0x30(%r10) + movaps %xmm7,-0x20(%r10) +.Lssse3_body: +___ +$code.=<<___; + movdqa .Lsigma(%rip),$a + movdqu ($key),$b + movdqu 16($key),$c + movdqu ($counter),$d + movdqa .Lrot16(%rip),$rot16 + movdqa .Lrot24(%rip),$rot24 + + movdqa $a,0x00(%rsp) + movdqa $b,0x10(%rsp) + movdqa $c,0x20(%rsp) + movdqa $d,0x30(%rsp) + mov \$10,$counter # reuse $counter + jmp .Loop_ssse3 + +.align 32 +.Loop_outer_ssse3: + movdqa .Lone(%rip),$d + movdqa 0x00(%rsp),$a + movdqa 0x10(%rsp),$b + movdqa 0x20(%rsp),$c + paddd 0x30(%rsp),$d + mov \$10,$counter + movdqa $d,0x30(%rsp) + jmp .Loop_ssse3 + +.align 32 +.Loop_ssse3: +___ + &SSSE3ROUND(); + &pshufd ($a,$a,0b10010011); + &pshufd ($d,$d,0b01001110); + &pshufd ($c,$c,0b00111001); + &nop (); + + &SSSE3ROUND(); + &pshufd ($a,$a,0b00111001); + &pshufd ($d,$d,0b01001110); + &pshufd ($c,$c,0b10010011); + + &dec ($counter); + &jnz (".Loop_ssse3"); + +$code.=<<___; + paddd 0x00(%rsp),$a + paddd 0x10(%rsp),$b + paddd 0x20(%rsp),$c + paddd 0x30(%rsp),$d + + cmp \$64,$len + jb .Ltail_ssse3 + + movdqu 0x00($inp),$t + movdqu 0x10($inp),$t1 + pxor $t,$a # xor with input + movdqu 0x20($inp),$t + pxor $t1,$b + movdqu 0x30($inp),$t1 + lea 0x40($inp),$inp # inp+=64 + pxor $t,$c + pxor $t1,$d + + movdqu $a,0x00($out) # write output + movdqu $b,0x10($out) + movdqu $c,0x20($out) + movdqu $d,0x30($out) + lea 0x40($out),$out # out+=64 + + sub \$64,$len + jnz .Loop_outer_ssse3 + + jmp .Ldone_ssse3 + +.align 16 +.Ltail_ssse3: + movdqa $a,0x00(%rsp) + movdqa $b,0x10(%rsp) + movdqa $c,0x20(%rsp) + movdqa $d,0x30(%rsp) + xor $counter,$counter + +.Loop_tail_ssse3: + movzb ($inp,$counter),%eax + movzb (%rsp,$counter),%ecx + lea 1($counter),$counter + xor %ecx,%eax + mov %al,-1($out,$counter) + dec $len + jnz .Loop_tail_ssse3 + +.Ldone_ssse3: +___ +$code.=<<___ if ($win64); + movaps -0x30(%r10),%xmm6 + movaps -0x20(%r10),%xmm7 +___ +$code.=<<___; + lea -8(%r10),%rsp +.cfi_def_cfa_register %rsp +.Lssse3_epilogue: + ret +.cfi_endproc +___ +} +&end_function("chacha20_ssse3"); + +######################################################################## +# SSSE3 code path that handles 128-byte inputs +{ +my ($a,$b,$c,$d,$t,$t1,$rot16,$rot24)=map("%xmm$_",(8,9,2..7)); +my ($a1,$b1,$c1,$d1)=map("%xmm$_",(10,11,0,1)); + +sub SSSE3ROUND_2x { + &paddd ($a,$b); + &pxor ($d,$a); + &paddd ($a1,$b1); + &pxor ($d1,$a1); + &pshufb ($d,$rot16); + &pshufb($d1,$rot16); + + &paddd ($c,$d); + &paddd ($c1,$d1); + &pxor ($b,$c); + &pxor ($b1,$c1); + &movdqa ($t,$b); + &psrld ($b,20); + &movdqa($t1,$b1); + &pslld ($t,12); + &psrld ($b1,20); + &por ($b,$t); + &pslld ($t1,12); + &por ($b1,$t1); + + &paddd ($a,$b); + &pxor ($d,$a); + &paddd ($a1,$b1); + &pxor ($d1,$a1); + &pshufb ($d,$rot24); + &pshufb($d1,$rot24); + + &paddd ($c,$d); + &paddd ($c1,$d1); + &pxor ($b,$c); + &pxor ($b1,$c1); + &movdqa ($t,$b); + &psrld ($b,25); + &movdqa($t1,$b1); + &pslld ($t,7); + &psrld ($b1,25); + &por ($b,$t); + &pslld ($t1,7); + &por ($b1,$t1); +} + +my $xframe = $win64 ? 0x68 : 8; + +$code.=<<___; +.type chacha20_128,\@function,5 +.align 32 +chacha20_128: +.cfi_startproc +.Lchacha20_128: + lea 8(%rsp),%r10 # frame pointer +.cfi_def_cfa_register %r10 + sub \$64+$xframe,%rsp + and \$-16,%rsp +___ +$code.=<<___ if ($win64); + movaps %xmm6,-0x70(%r10) + movaps %xmm7,-0x60(%r10) + movaps %xmm8,-0x50(%r10) + movaps %xmm9,-0x40(%r10) + movaps %xmm10,-0x30(%r10) + movaps %xmm11,-0x20(%r10) +.L128_body: +___ +$code.=<<___; + movdqa .Lsigma(%rip),$a + movdqu ($key),$b + movdqu 16($key),$c + movdqu ($counter),$d + movdqa .Lone(%rip),$d1 + movdqa .Lrot16(%rip),$rot16 + movdqa .Lrot24(%rip),$rot24 + + movdqa $a,$a1 + movdqa $a,0x00(%rsp) + movdqa $b,$b1 + movdqa $b,0x10(%rsp) + movdqa $c,$c1 + movdqa $c,0x20(%rsp) + paddd $d,$d1 + movdqa $d,0x30(%rsp) + mov \$10,$counter # reuse $counter + jmp .Loop_128 + +.align 32 +.Loop_128: +___ + &SSSE3ROUND_2x(); + &pshufd ($a,$a,0b10010011); + &pshufd ($d,$d,0b01001110); + &pshufd ($c,$c,0b00111001); + &pshufd ($a1,$a1,0b10010011); + &pshufd ($d1,$d1,0b01001110); + &pshufd ($c1,$c1,0b00111001); + + &SSSE3ROUND_2x(); + &pshufd ($a,$a,0b00111001); + &pshufd ($d,$d,0b01001110); + &pshufd ($c,$c,0b10010011); + &pshufd ($a1,$a1,0b00111001); + &pshufd ($d1,$d1,0b01001110); + &pshufd ($c1,$c1,0b10010011); + + &dec ($counter); + &jnz (".Loop_128"); + +$code.=<<___; + paddd 0x00(%rsp),$a + paddd 0x10(%rsp),$b + paddd 0x20(%rsp),$c + paddd 0x30(%rsp),$d + paddd .Lone(%rip),$d1 + paddd 0x00(%rsp),$a1 + paddd 0x10(%rsp),$b1 + paddd 0x20(%rsp),$c1 + paddd 0x30(%rsp),$d1 + + movdqu 0x00($inp),$t + movdqu 0x10($inp),$t1 + pxor $t,$a # xor with input + movdqu 0x20($inp),$t + pxor $t1,$b + movdqu 0x30($inp),$t1 + pxor $t,$c + movdqu 0x40($inp),$t + pxor $t1,$d + movdqu 0x50($inp),$t1 + pxor $t,$a1 + movdqu 0x60($inp),$t + pxor $t1,$b1 + movdqu 0x70($inp),$t1 + pxor $t,$c1 + pxor $t1,$d1 + + movdqu $a,0x00($out) # write output + movdqu $b,0x10($out) + movdqu $c,0x20($out) + movdqu $d,0x30($out) + movdqu $a1,0x40($out) + movdqu $b1,0x50($out) + movdqu $c1,0x60($out) + movdqu $d1,0x70($out) +___ +$code.=<<___ if ($win64); + movaps -0x70(%r10),%xmm6 + movaps -0x60(%r10),%xmm7 + movaps -0x50(%r10),%xmm8 + movaps -0x40(%r10),%xmm9 + movaps -0x30(%r10),%xmm10 + movaps -0x20(%r10),%xmm11 +___ +$code.=<<___; + lea -8(%r10),%rsp +.cfi_def_cfa_register %rsp +.L128_epilogue: + ret +.cfi_endproc +.size chacha20_128,.-chacha20_128 +___ +} + +######################################################################## +# SSSE3 code path that handles longer messages. +{ +# assign variables to favor Atom front-end +my ($xd0,$xd1,$xd2,$xd3, $xt0,$xt1,$xt2,$xt3, + $xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3)=map("%xmm$_",(0..15)); +my @xx=($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3, + "%nox","%nox","%nox","%nox", $xd0,$xd1,$xd2,$xd3); + +sub SSSE3_lane_ROUND { +my ($a0,$b0,$c0,$d0)=@_; +my ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0)); +my ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1)); +my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2)); +my ($xc,$xc_,$t0,$t1)=map("\"$_\"",$xt0,$xt1,$xt2,$xt3); +my @x=map("\"$_\"",@xx); + + # Consider order in which variables are addressed by their + # index: + # + # a b c d + # + # 0 4 8 12 < even round + # 1 5 9 13 + # 2 6 10 14 + # 3 7 11 15 + # 0 5 10 15 < odd round + # 1 6 11 12 + # 2 7 8 13 + # 3 4 9 14 + # + # 'a', 'b' and 'd's are permanently allocated in registers, + # @x[0..7,12..15], while 'c's are maintained in memory. If + # you observe 'c' column, you'll notice that pair of 'c's is + # invariant between rounds. This means that we have to reload + # them once per round, in the middle. This is why you'll see + # bunch of 'c' stores and loads in the middle, but none in + # the beginning or end. + + ( + "&paddd (@x[$a0],@x[$b0])", # Q1 + "&paddd (@x[$a1],@x[$b1])", # Q2 + "&pxor (@x[$d0],@x[$a0])", + "&pxor (@x[$d1],@x[$a1])", + "&pshufb (@x[$d0],$t1)", + "&pshufb (@x[$d1],$t1)", + + "&paddd ($xc,@x[$d0])", + "&paddd ($xc_,@x[$d1])", + "&pxor (@x[$b0],$xc)", + "&pxor (@x[$b1],$xc_)", + "&movdqa ($t0,@x[$b0])", + "&pslld (@x[$b0],12)", + "&psrld ($t0,20)", + "&movdqa ($t1,@x[$b1])", + "&pslld (@x[$b1],12)", + "&por (@x[$b0],$t0)", + "&psrld ($t1,20)", + "&movdqa ($t0,'(%r11)')", # .Lrot24(%rip) + "&por (@x[$b1],$t1)", + + "&paddd (@x[$a0],@x[$b0])", + "&paddd (@x[$a1],@x[$b1])", + "&pxor (@x[$d0],@x[$a0])", + "&pxor (@x[$d1],@x[$a1])", + "&pshufb (@x[$d0],$t0)", + "&pshufb (@x[$d1],$t0)", + + "&paddd ($xc,@x[$d0])", + "&paddd ($xc_,@x[$d1])", + "&pxor (@x[$b0],$xc)", + "&pxor (@x[$b1],$xc_)", + "&movdqa ($t1,@x[$b0])", + "&pslld (@x[$b0],7)", + "&psrld ($t1,25)", + "&movdqa ($t0,@x[$b1])", + "&pslld (@x[$b1],7)", + "&por (@x[$b0],$t1)", + "&psrld ($t0,25)", + "&movdqa ($t1,'(%r9)')", # .Lrot16(%rip) + "&por (@x[$b1],$t0)", + + "&movdqa (\"`16*($c0-8)`(%rsp)\",$xc)", # reload pair of 'c's + "&movdqa (\"`16*($c1-8)`(%rsp)\",$xc_)", + "&movdqa ($xc,\"`16*($c2-8)`(%rsp)\")", + "&movdqa ($xc_,\"`16*($c3-8)`(%rsp)\")", + + "&paddd (@x[$a2],@x[$b2])", # Q3 + "&paddd (@x[$a3],@x[$b3])", # Q4 + "&pxor (@x[$d2],@x[$a2])", + "&pxor (@x[$d3],@x[$a3])", + "&pshufb (@x[$d2],$t1)", + "&pshufb (@x[$d3],$t1)", + + "&paddd ($xc,@x[$d2])", + "&paddd ($xc_,@x[$d3])", + "&pxor (@x[$b2],$xc)", + "&pxor (@x[$b3],$xc_)", + "&movdqa ($t0,@x[$b2])", + "&pslld (@x[$b2],12)", + "&psrld ($t0,20)", + "&movdqa ($t1,@x[$b3])", + "&pslld (@x[$b3],12)", + "&por (@x[$b2],$t0)", + "&psrld ($t1,20)", + "&movdqa ($t0,'(%r11)')", # .Lrot24(%rip) + "&por (@x[$b3],$t1)", + + "&paddd (@x[$a2],@x[$b2])", + "&paddd (@x[$a3],@x[$b3])", + "&pxor (@x[$d2],@x[$a2])", + "&pxor (@x[$d3],@x[$a3])", + "&pshufb (@x[$d2],$t0)", + "&pshufb (@x[$d3],$t0)", + + "&paddd ($xc,@x[$d2])", + "&paddd ($xc_,@x[$d3])", + "&pxor (@x[$b2],$xc)", + "&pxor (@x[$b3],$xc_)", + "&movdqa ($t1,@x[$b2])", + "&pslld (@x[$b2],7)", + "&psrld ($t1,25)", + "&movdqa ($t0,@x[$b3])", + "&pslld (@x[$b3],7)", + "&por (@x[$b2],$t1)", + "&psrld ($t0,25)", + "&movdqa ($t1,'(%r9)')", # .Lrot16(%rip) + "&por (@x[$b3],$t0)" + ); +} + +my $xframe = $win64 ? 0xa8 : 8; + +$code.=<<___; +.type chacha20_4x,\@function,5 +.align 32 +chacha20_4x: +.cfi_startproc +.Lchacha20_4x: + lea 8(%rsp),%r10 # frame pointer +.cfi_def_cfa_register %r10 +___ +$code.=<<___ if (!$kernel); + mov %r9,%r11 +___ +$code.=<<___ if ($avx>1 && !$kernel); + shr \$32,%r9 # OPENSSL_ia32cap_P+8 + test \$`1<<5`,%r9 # test AVX2 + jnz .Lchacha20_8x +___ +$code.=<<___; + cmp \$192,$len + ja .Lproceed4x +___ +$code.=<<___ if (!$kernel); + and \$`1<<26|1<<22`,%r11 # isolate XSAVE+MOVBE + cmp \$`1<<22`,%r11 # check for MOVBE without XSAVE + je .Ldo_ssse3_after_all # to detect Atom +___ +$code.=<<___; +.Lproceed4x: + sub \$0x140+$xframe,%rsp + and \$-16,%rsp +___ + ################ stack layout + # +0x00 SIMD equivalent of @x[8-12] + # ... + # +0x40 constant copy of key[0-2] smashed by lanes + # ... + # +0x100 SIMD counters (with nonce smashed by lanes) + # ... + # +0x140 +$code.=<<___ if ($win64); + movaps %xmm6,-0xb0(%r10) + movaps %xmm7,-0xa0(%r10) + movaps %xmm8,-0x90(%r10) + movaps %xmm9,-0x80(%r10) + movaps %xmm10,-0x70(%r10) + movaps %xmm11,-0x60(%r10) + movaps %xmm12,-0x50(%r10) + movaps %xmm13,-0x40(%r10) + movaps %xmm14,-0x30(%r10) + movaps %xmm15,-0x20(%r10) +.L4x_body: +___ +$code.=<<___; + movdqa .Lsigma(%rip),$xa3 # key[0] + movdqu ($key),$xb3 # key[1] + movdqu 16($key),$xt3 # key[2] + movdqu ($counter),$xd3 # key[3] + lea 0x100(%rsp),%rcx # size optimization + lea .Lrot16(%rip),%r9 + lea .Lrot24(%rip),%r11 + + pshufd \$0x00,$xa3,$xa0 # smash key by lanes... + pshufd \$0x55,$xa3,$xa1 + movdqa $xa0,0x40(%rsp) # ... and offload + pshufd \$0xaa,$xa3,$xa2 + movdqa $xa1,0x50(%rsp) + pshufd \$0xff,$xa3,$xa3 + movdqa $xa2,0x60(%rsp) + movdqa $xa3,0x70(%rsp) + + pshufd \$0x00,$xb3,$xb0 + pshufd \$0x55,$xb3,$xb1 + movdqa $xb0,0x80-0x100(%rcx) + pshufd \$0xaa,$xb3,$xb2 + movdqa $xb1,0x90-0x100(%rcx) + pshufd \$0xff,$xb3,$xb3 + movdqa $xb2,0xa0-0x100(%rcx) + movdqa $xb3,0xb0-0x100(%rcx) + + pshufd \$0x00,$xt3,$xt0 # "$xc0" + pshufd \$0x55,$xt3,$xt1 # "$xc1" + movdqa $xt0,0xc0-0x100(%rcx) + pshufd \$0xaa,$xt3,$xt2 # "$xc2" + movdqa $xt1,0xd0-0x100(%rcx) + pshufd \$0xff,$xt3,$xt3 # "$xc3" + movdqa $xt2,0xe0-0x100(%rcx) + movdqa $xt3,0xf0-0x100(%rcx) + + pshufd \$0x00,$xd3,$xd0 + pshufd \$0x55,$xd3,$xd1 + paddd .Linc(%rip),$xd0 # don't save counters yet + pshufd \$0xaa,$xd3,$xd2 + movdqa $xd1,0x110-0x100(%rcx) + pshufd \$0xff,$xd3,$xd3 + movdqa $xd2,0x120-0x100(%rcx) + movdqa $xd3,0x130-0x100(%rcx) + + jmp .Loop_enter4x + +.align 32 +.Loop_outer4x: + movdqa 0x40(%rsp),$xa0 # re-load smashed key + movdqa 0x50(%rsp),$xa1 + movdqa 0x60(%rsp),$xa2 + movdqa 0x70(%rsp),$xa3 + movdqa 0x80-0x100(%rcx),$xb0 + movdqa 0x90-0x100(%rcx),$xb1 + movdqa 0xa0-0x100(%rcx),$xb2 + movdqa 0xb0-0x100(%rcx),$xb3 + movdqa 0xc0-0x100(%rcx),$xt0 # "$xc0" + movdqa 0xd0-0x100(%rcx),$xt1 # "$xc1" + movdqa 0xe0-0x100(%rcx),$xt2 # "$xc2" + movdqa 0xf0-0x100(%rcx),$xt3 # "$xc3" + movdqa 0x100-0x100(%rcx),$xd0 + movdqa 0x110-0x100(%rcx),$xd1 + movdqa 0x120-0x100(%rcx),$xd2 + movdqa 0x130-0x100(%rcx),$xd3 + paddd .Lfour(%rip),$xd0 # next SIMD counters + +.Loop_enter4x: + movdqa $xt2,0x20(%rsp) # SIMD equivalent of "@x[10]" + movdqa $xt3,0x30(%rsp) # SIMD equivalent of "@x[11]" + movdqa (%r9),$xt3 # .Lrot16(%rip) + mov \$10,%eax + movdqa $xd0,0x100-0x100(%rcx) # save SIMD counters + jmp .Loop4x + +.align 32 +.Loop4x: +___ + foreach (&SSSE3_lane_ROUND(0, 4, 8,12)) { eval; } + foreach (&SSSE3_lane_ROUND(0, 5,10,15)) { eval; } +$code.=<<___; + dec %eax + jnz .Loop4x + + paddd 0x40(%rsp),$xa0 # accumulate key material + paddd 0x50(%rsp),$xa1 + paddd 0x60(%rsp),$xa2 + paddd 0x70(%rsp),$xa3 + + movdqa $xa0,$xt2 # "de-interlace" data + punpckldq $xa1,$xa0 + movdqa $xa2,$xt3 + punpckldq $xa3,$xa2 + punpckhdq $xa1,$xt2 + punpckhdq $xa3,$xt3 + movdqa $xa0,$xa1 + punpcklqdq $xa2,$xa0 # "a0" + movdqa $xt2,$xa3 + punpcklqdq $xt3,$xt2 # "a2" + punpckhqdq $xa2,$xa1 # "a1" + punpckhqdq $xt3,$xa3 # "a3" +___ + ($xa2,$xt2)=($xt2,$xa2); +$code.=<<___; + paddd 0x80-0x100(%rcx),$xb0 + paddd 0x90-0x100(%rcx),$xb1 + paddd 0xa0-0x100(%rcx),$xb2 + paddd 0xb0-0x100(%rcx),$xb3 + + movdqa $xa0,0x00(%rsp) # offload $xaN + movdqa $xa1,0x10(%rsp) + movdqa 0x20(%rsp),$xa0 # "xc2" + movdqa 0x30(%rsp),$xa1 # "xc3" + + movdqa $xb0,$xt2 + punpckldq $xb1,$xb0 + movdqa $xb2,$xt3 + punpckldq $xb3,$xb2 + punpckhdq $xb1,$xt2 + punpckhdq $xb3,$xt3 + movdqa $xb0,$xb1 + punpcklqdq $xb2,$xb0 # "b0" + movdqa $xt2,$xb3 + punpcklqdq $xt3,$xt2 # "b2" + punpckhqdq $xb2,$xb1 # "b1" + punpckhqdq $xt3,$xb3 # "b3" +___ + ($xb2,$xt2)=($xt2,$xb2); + my ($xc0,$xc1,$xc2,$xc3)=($xt0,$xt1,$xa0,$xa1); +$code.=<<___; + paddd 0xc0-0x100(%rcx),$xc0 + paddd 0xd0-0x100(%rcx),$xc1 + paddd 0xe0-0x100(%rcx),$xc2 + paddd 0xf0-0x100(%rcx),$xc3 + + movdqa $xa2,0x20(%rsp) # keep offloading $xaN + movdqa $xa3,0x30(%rsp) + + movdqa $xc0,$xt2 + punpckldq $xc1,$xc0 + movdqa $xc2,$xt3 + punpckldq $xc3,$xc2 + punpckhdq $xc1,$xt2 + punpckhdq $xc3,$xt3 + movdqa $xc0,$xc1 + punpcklqdq $xc2,$xc0 # "c0" + movdqa $xt2,$xc3 + punpcklqdq $xt3,$xt2 # "c2" + punpckhqdq $xc2,$xc1 # "c1" + punpckhqdq $xt3,$xc3 # "c3" +___ + ($xc2,$xt2)=($xt2,$xc2); + ($xt0,$xt1)=($xa2,$xa3); # use $xaN as temporary +$code.=<<___; + paddd 0x100-0x100(%rcx),$xd0 + paddd 0x110-0x100(%rcx),$xd1 + paddd 0x120-0x100(%rcx),$xd2 + paddd 0x130-0x100(%rcx),$xd3 + + movdqa $xd0,$xt2 + punpckldq $xd1,$xd0 + movdqa $xd2,$xt3 + punpckldq $xd3,$xd2 + punpckhdq $xd1,$xt2 + punpckhdq $xd3,$xt3 + movdqa $xd0,$xd1 + punpcklqdq $xd2,$xd0 # "d0" + movdqa $xt2,$xd3 + punpcklqdq $xt3,$xt2 # "d2" + punpckhqdq $xd2,$xd1 # "d1" + punpckhqdq $xt3,$xd3 # "d3" +___ + ($xd2,$xt2)=($xt2,$xd2); +$code.=<<___; + cmp \$64*4,$len + jb .Ltail4x + + movdqu 0x00($inp),$xt0 # xor with input + movdqu 0x10($inp),$xt1 + movdqu 0x20($inp),$xt2 + movdqu 0x30($inp),$xt3 + pxor 0x00(%rsp),$xt0 # $xaN is offloaded, remember? + pxor $xb0,$xt1 + pxor $xc0,$xt2 + pxor $xd0,$xt3 + + movdqu $xt0,0x00($out) + movdqu 0x40($inp),$xt0 + movdqu $xt1,0x10($out) + movdqu 0x50($inp),$xt1 + movdqu $xt2,0x20($out) + movdqu 0x60($inp),$xt2 + movdqu $xt3,0x30($out) + movdqu 0x70($inp),$xt3 + lea 0x80($inp),$inp # size optimization + pxor 0x10(%rsp),$xt0 + pxor $xb1,$xt1 + pxor $xc1,$xt2 + pxor $xd1,$xt3 + + movdqu $xt0,0x40($out) + movdqu 0x00($inp),$xt0 + movdqu $xt1,0x50($out) + movdqu 0x10($inp),$xt1 + movdqu $xt2,0x60($out) + movdqu 0x20($inp),$xt2 + movdqu $xt3,0x70($out) + lea 0x80($out),$out # size optimization + movdqu 0x30($inp),$xt3 + pxor 0x20(%rsp),$xt0 + pxor $xb2,$xt1 + pxor $xc2,$xt2 + pxor $xd2,$xt3 + + movdqu $xt0,0x00($out) + movdqu 0x40($inp),$xt0 + movdqu $xt1,0x10($out) + movdqu 0x50($inp),$xt1 + movdqu $xt2,0x20($out) + movdqu 0x60($inp),$xt2 + movdqu $xt3,0x30($out) + movdqu 0x70($inp),$xt3 + lea 0x80($inp),$inp # inp+=64*4 + pxor 0x30(%rsp),$xt0 + pxor $xb3,$xt1 + pxor $xc3,$xt2 + pxor $xd3,$xt3 + movdqu $xt0,0x40($out) + movdqu $xt1,0x50($out) + movdqu $xt2,0x60($out) + movdqu $xt3,0x70($out) + lea 0x80($out),$out # out+=64*4 + + sub \$64*4,$len + jnz .Loop_outer4x + + jmp .Ldone4x + +.Ltail4x: + cmp \$192,$len + jae .L192_or_more4x + cmp \$128,$len + jae .L128_or_more4x + cmp \$64,$len + jae .L64_or_more4x + + #movdqa 0x00(%rsp),$xt0 # $xaN is offloaded, remember? + xor %r9,%r9 + #movdqa $xt0,0x00(%rsp) + movdqa $xb0,0x10(%rsp) + movdqa $xc0,0x20(%rsp) + movdqa $xd0,0x30(%rsp) + jmp .Loop_tail4x + +.align 32 +.L64_or_more4x: + movdqu 0x00($inp),$xt0 # xor with input + movdqu 0x10($inp),$xt1 + movdqu 0x20($inp),$xt2 + movdqu 0x30($inp),$xt3 + pxor 0x00(%rsp),$xt0 # $xaxN is offloaded, remember? + pxor $xb0,$xt1 + pxor $xc0,$xt2 + pxor $xd0,$xt3 + movdqu $xt0,0x00($out) + movdqu $xt1,0x10($out) + movdqu $xt2,0x20($out) + movdqu $xt3,0x30($out) + je .Ldone4x + + movdqa 0x10(%rsp),$xt0 # $xaN is offloaded, remember? + lea 0x40($inp),$inp # inp+=64*1 + xor %r9,%r9 + movdqa $xt0,0x00(%rsp) + movdqa $xb1,0x10(%rsp) + lea 0x40($out),$out # out+=64*1 + movdqa $xc1,0x20(%rsp) + sub \$64,$len # len-=64*1 + movdqa $xd1,0x30(%rsp) + jmp .Loop_tail4x + +.align 32 +.L128_or_more4x: + movdqu 0x00($inp),$xt0 # xor with input + movdqu 0x10($inp),$xt1 + movdqu 0x20($inp),$xt2 + movdqu 0x30($inp),$xt3 + pxor 0x00(%rsp),$xt0 # $xaN is offloaded, remember? + pxor $xb0,$xt1 + pxor $xc0,$xt2 + pxor $xd0,$xt3 + + movdqu $xt0,0x00($out) + movdqu 0x40($inp),$xt0 + movdqu $xt1,0x10($out) + movdqu 0x50($inp),$xt1 + movdqu $xt2,0x20($out) + movdqu 0x60($inp),$xt2 + movdqu $xt3,0x30($out) + movdqu 0x70($inp),$xt3 + pxor 0x10(%rsp),$xt0 + pxor $xb1,$xt1 + pxor $xc1,$xt2 + pxor $xd1,$xt3 + movdqu $xt0,0x40($out) + movdqu $xt1,0x50($out) + movdqu $xt2,0x60($out) + movdqu $xt3,0x70($out) + je .Ldone4x + + movdqa 0x20(%rsp),$xt0 # $xaN is offloaded, remember? + lea 0x80($inp),$inp # inp+=64*2 + xor %r9,%r9 + movdqa $xt0,0x00(%rsp) + movdqa $xb2,0x10(%rsp) + lea 0x80($out),$out # out+=64*2 + movdqa $xc2,0x20(%rsp) + sub \$128,$len # len-=64*2 + movdqa $xd2,0x30(%rsp) + jmp .Loop_tail4x + +.align 32 +.L192_or_more4x: + movdqu 0x00($inp),$xt0 # xor with input + movdqu 0x10($inp),$xt1 + movdqu 0x20($inp),$xt2 + movdqu 0x30($inp),$xt3 + pxor 0x00(%rsp),$xt0 # $xaN is offloaded, remember? + pxor $xb0,$xt1 + pxor $xc0,$xt2 + pxor $xd0,$xt3 + + movdqu $xt0,0x00($out) + movdqu 0x40($inp),$xt0 + movdqu $xt1,0x10($out) + movdqu 0x50($inp),$xt1 + movdqu $xt2,0x20($out) + movdqu 0x60($inp),$xt2 + movdqu $xt3,0x30($out) + movdqu 0x70($inp),$xt3 + lea 0x80($inp),$inp # size optimization + pxor 0x10(%rsp),$xt0 + pxor $xb1,$xt1 + pxor $xc1,$xt2 + pxor $xd1,$xt3 + + movdqu $xt0,0x40($out) + movdqu 0x00($inp),$xt0 + movdqu $xt1,0x50($out) + movdqu 0x10($inp),$xt1 + movdqu $xt2,0x60($out) + movdqu 0x20($inp),$xt2 + movdqu $xt3,0x70($out) + lea 0x80($out),$out # size optimization + movdqu 0x30($inp),$xt3 + pxor 0x20(%rsp),$xt0 + pxor $xb2,$xt1 + pxor $xc2,$xt2 + pxor $xd2,$xt3 + movdqu $xt0,0x00($out) + movdqu $xt1,0x10($out) + movdqu $xt2,0x20($out) + movdqu $xt3,0x30($out) + je .Ldone4x + + movdqa 0x30(%rsp),$xt0 # $xaN is offloaded, remember? + lea 0x40($inp),$inp # inp+=64*3 + xor %r9,%r9 + movdqa $xt0,0x00(%rsp) + movdqa $xb3,0x10(%rsp) + lea 0x40($out),$out # out+=64*3 + movdqa $xc3,0x20(%rsp) + sub \$192,$len # len-=64*3 + movdqa $xd3,0x30(%rsp) + +.Loop_tail4x: + movzb ($inp,%r9),%eax + movzb (%rsp,%r9),%ecx + lea 1(%r9),%r9 + xor %ecx,%eax + mov %al,-1($out,%r9) + dec $len + jnz .Loop_tail4x + +.Ldone4x: +___ +$code.=<<___ if ($win64); + movaps -0xb0(%r10),%xmm6 + movaps -0xa0(%r10),%xmm7 + movaps -0x90(%r10),%xmm8 + movaps -0x80(%r10),%xmm9 + movaps -0x70(%r10),%xmm10 + movaps -0x60(%r10),%xmm11 + movaps -0x50(%r10),%xmm12 + movaps -0x40(%r10),%xmm13 + movaps -0x30(%r10),%xmm14 + movaps -0x20(%r10),%xmm15 +___ +$code.=<<___; + lea -8(%r10),%rsp +.cfi_def_cfa_register %rsp +.L4x_epilogue: + ret +.cfi_endproc +.size chacha20_4x,.-chacha20_4x +___ +} +if($kernel) { + $code .= "#endif\n"; +} + +######################################################################## +# XOP code path that handles all lengths. +if ($avx && !$kernel) { +# There is some "anomaly" observed depending on instructions' size or +# alignment. If you look closely at below code you'll notice that +# sometimes argument order varies. The order affects instruction +# encoding by making it larger, and such fiddling gives 5% performance +# improvement. This is on FX-4100... + +my ($xb0,$xb1,$xb2,$xb3, $xd0,$xd1,$xd2,$xd3, + $xa0,$xa1,$xa2,$xa3, $xt0,$xt1,$xt2,$xt3)=map("%xmm$_",(0..15)); +my @xx=($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3, + $xt0,$xt1,$xt2,$xt3, $xd0,$xd1,$xd2,$xd3); + +sub XOP_lane_ROUND { +my ($a0,$b0,$c0,$d0)=@_; +my ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0)); +my ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1)); +my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2)); +my @x=map("\"$_\"",@xx); + + ( + "&vpaddd (@x[$a0],@x[$a0],@x[$b0])", # Q1 + "&vpaddd (@x[$a1],@x[$a1],@x[$b1])", # Q2 + "&vpaddd (@x[$a2],@x[$a2],@x[$b2])", # Q3 + "&vpaddd (@x[$a3],@x[$a3],@x[$b3])", # Q4 + "&vpxor (@x[$d0],@x[$a0],@x[$d0])", + "&vpxor (@x[$d1],@x[$a1],@x[$d1])", + "&vpxor (@x[$d2],@x[$a2],@x[$d2])", + "&vpxor (@x[$d3],@x[$a3],@x[$d3])", + "&vprotd (@x[$d0],@x[$d0],16)", + "&vprotd (@x[$d1],@x[$d1],16)", + "&vprotd (@x[$d2],@x[$d2],16)", + "&vprotd (@x[$d3],@x[$d3],16)", + + "&vpaddd (@x[$c0],@x[$c0],@x[$d0])", + "&vpaddd (@x[$c1],@x[$c1],@x[$d1])", + "&vpaddd (@x[$c2],@x[$c2],@x[$d2])", + "&vpaddd (@x[$c3],@x[$c3],@x[$d3])", + "&vpxor (@x[$b0],@x[$c0],@x[$b0])", + "&vpxor (@x[$b1],@x[$c1],@x[$b1])", + "&vpxor (@x[$b2],@x[$b2],@x[$c2])", # flip + "&vpxor (@x[$b3],@x[$b3],@x[$c3])", # flip + "&vprotd (@x[$b0],@x[$b0],12)", + "&vprotd (@x[$b1],@x[$b1],12)", + "&vprotd (@x[$b2],@x[$b2],12)", + "&vprotd (@x[$b3],@x[$b3],12)", + + "&vpaddd (@x[$a0],@x[$b0],@x[$a0])", # flip + "&vpaddd (@x[$a1],@x[$b1],@x[$a1])", # flip + "&vpaddd (@x[$a2],@x[$a2],@x[$b2])", + "&vpaddd (@x[$a3],@x[$a3],@x[$b3])", + "&vpxor (@x[$d0],@x[$a0],@x[$d0])", + "&vpxor (@x[$d1],@x[$a1],@x[$d1])", + "&vpxor (@x[$d2],@x[$a2],@x[$d2])", + "&vpxor (@x[$d3],@x[$a3],@x[$d3])", + "&vprotd (@x[$d0],@x[$d0],8)", + "&vprotd (@x[$d1],@x[$d1],8)", + "&vprotd (@x[$d2],@x[$d2],8)", + "&vprotd (@x[$d3],@x[$d3],8)", + + "&vpaddd (@x[$c0],@x[$c0],@x[$d0])", + "&vpaddd (@x[$c1],@x[$c1],@x[$d1])", + "&vpaddd (@x[$c2],@x[$c2],@x[$d2])", + "&vpaddd (@x[$c3],@x[$c3],@x[$d3])", + "&vpxor (@x[$b0],@x[$c0],@x[$b0])", + "&vpxor (@x[$b1],@x[$c1],@x[$b1])", + "&vpxor (@x[$b2],@x[$b2],@x[$c2])", # flip + "&vpxor (@x[$b3],@x[$b3],@x[$c3])", # flip + "&vprotd (@x[$b0],@x[$b0],7)", + "&vprotd (@x[$b1],@x[$b1],7)", + "&vprotd (@x[$b2],@x[$b2],7)", + "&vprotd (@x[$b3],@x[$b3],7)" + ); +} + +my $xframe = $win64 ? 0xa8 : 8; + +&declare_function("chacha20_xop", 32, 5); +$code.=<<___; +.cfi_startproc +.Lchacha20_4xop: + lea 8(%rsp),%r10 # frame pointer +.cfi_def_cfa_register %r10 + sub \$0x140+$xframe,%rsp + and \$-16,%rsp +___ + ################ stack layout + # +0x00 SIMD equivalent of @x[8-12] + # ... + # +0x40 constant copy of key[0-2] smashed by lanes + # ... + # +0x100 SIMD counters (with nonce smashed by lanes) + # ... + # +0x140 +$code.=<<___ if ($win64); + movaps %xmm6,-0xb0(%r10) + movaps %xmm7,-0xa0(%r10) + movaps %xmm8,-0x90(%r10) + movaps %xmm9,-0x80(%r10) + movaps %xmm10,-0x70(%r10) + movaps %xmm11,-0x60(%r10) + movaps %xmm12,-0x50(%r10) + movaps %xmm13,-0x40(%r10) + movaps %xmm14,-0x30(%r10) + movaps %xmm15,-0x20(%r10) +.L4xop_body: +___ +$code.=<<___; + vzeroupper + + vmovdqa .Lsigma(%rip),$xa3 # key[0] + vmovdqu ($key),$xb3 # key[1] + vmovdqu 16($key),$xt3 # key[2] + vmovdqu ($counter),$xd3 # key[3] + lea 0x100(%rsp),%rcx # size optimization + + vpshufd \$0x00,$xa3,$xa0 # smash key by lanes... + vpshufd \$0x55,$xa3,$xa1 + vmovdqa $xa0,0x40(%rsp) # ... and offload + vpshufd \$0xaa,$xa3,$xa2 + vmovdqa $xa1,0x50(%rsp) + vpshufd \$0xff,$xa3,$xa3 + vmovdqa $xa2,0x60(%rsp) + vmovdqa $xa3,0x70(%rsp) + + vpshufd \$0x00,$xb3,$xb0 + vpshufd \$0x55,$xb3,$xb1 + vmovdqa $xb0,0x80-0x100(%rcx) + vpshufd \$0xaa,$xb3,$xb2 + vmovdqa $xb1,0x90-0x100(%rcx) + vpshufd \$0xff,$xb3,$xb3 + vmovdqa $xb2,0xa0-0x100(%rcx) + vmovdqa $xb3,0xb0-0x100(%rcx) + + vpshufd \$0x00,$xt3,$xt0 # "$xc0" + vpshufd \$0x55,$xt3,$xt1 # "$xc1" + vmovdqa $xt0,0xc0-0x100(%rcx) + vpshufd \$0xaa,$xt3,$xt2 # "$xc2" + vmovdqa $xt1,0xd0-0x100(%rcx) + vpshufd \$0xff,$xt3,$xt3 # "$xc3" + vmovdqa $xt2,0xe0-0x100(%rcx) + vmovdqa $xt3,0xf0-0x100(%rcx) + + vpshufd \$0x00,$xd3,$xd0 + vpshufd \$0x55,$xd3,$xd1 + vpaddd .Linc(%rip),$xd0,$xd0 # don't save counters yet + vpshufd \$0xaa,$xd3,$xd2 + vmovdqa $xd1,0x110-0x100(%rcx) + vpshufd \$0xff,$xd3,$xd3 + vmovdqa $xd2,0x120-0x100(%rcx) + vmovdqa $xd3,0x130-0x100(%rcx) + + jmp .Loop_enter4xop + +.align 32 +.Loop_outer4xop: + vmovdqa 0x40(%rsp),$xa0 # re-load smashed key + vmovdqa 0x50(%rsp),$xa1 + vmovdqa 0x60(%rsp),$xa2 + vmovdqa 0x70(%rsp),$xa3 + vmovdqa 0x80-0x100(%rcx),$xb0 + vmovdqa 0x90-0x100(%rcx),$xb1 + vmovdqa 0xa0-0x100(%rcx),$xb2 + vmovdqa 0xb0-0x100(%rcx),$xb3 + vmovdqa 0xc0-0x100(%rcx),$xt0 # "$xc0" + vmovdqa 0xd0-0x100(%rcx),$xt1 # "$xc1" + vmovdqa 0xe0-0x100(%rcx),$xt2 # "$xc2" + vmovdqa 0xf0-0x100(%rcx),$xt3 # "$xc3" + vmovdqa 0x100-0x100(%rcx),$xd0 + vmovdqa 0x110-0x100(%rcx),$xd1 + vmovdqa 0x120-0x100(%rcx),$xd2 + vmovdqa 0x130-0x100(%rcx),$xd3 + vpaddd .Lfour(%rip),$xd0,$xd0 # next SIMD counters + +.Loop_enter4xop: + mov \$10,%eax + vmovdqa $xd0,0x100-0x100(%rcx) # save SIMD counters + jmp .Loop4xop + +.align 32 +.Loop4xop: +___ + foreach (&XOP_lane_ROUND(0, 4, 8,12)) { eval; } + foreach (&XOP_lane_ROUND(0, 5,10,15)) { eval; } +$code.=<<___; + dec %eax + jnz .Loop4xop + + vpaddd 0x40(%rsp),$xa0,$xa0 # accumulate key material + vpaddd 0x50(%rsp),$xa1,$xa1 + vpaddd 0x60(%rsp),$xa2,$xa2 + vpaddd 0x70(%rsp),$xa3,$xa3 + + vmovdqa $xt2,0x20(%rsp) # offload $xc2,3 + vmovdqa $xt3,0x30(%rsp) + + vpunpckldq $xa1,$xa0,$xt2 # "de-interlace" data + vpunpckldq $xa3,$xa2,$xt3 + vpunpckhdq $xa1,$xa0,$xa0 + vpunpckhdq $xa3,$xa2,$xa2 + vpunpcklqdq $xt3,$xt2,$xa1 # "a0" + vpunpckhqdq $xt3,$xt2,$xt2 # "a1" + vpunpcklqdq $xa2,$xa0,$xa3 # "a2" + vpunpckhqdq $xa2,$xa0,$xa0 # "a3" +___ + ($xa0,$xa1,$xa2,$xa3,$xt2)=($xa1,$xt2,$xa3,$xa0,$xa2); +$code.=<<___; + vpaddd 0x80-0x100(%rcx),$xb0,$xb0 + vpaddd 0x90-0x100(%rcx),$xb1,$xb1 + vpaddd 0xa0-0x100(%rcx),$xb2,$xb2 + vpaddd 0xb0-0x100(%rcx),$xb3,$xb3 + + vmovdqa $xa0,0x00(%rsp) # offload $xa0,1 + vmovdqa $xa1,0x10(%rsp) + vmovdqa 0x20(%rsp),$xa0 # "xc2" + vmovdqa 0x30(%rsp),$xa1 # "xc3" + + vpunpckldq $xb1,$xb0,$xt2 + vpunpckldq $xb3,$xb2,$xt3 + vpunpckhdq $xb1,$xb0,$xb0 + vpunpckhdq $xb3,$xb2,$xb2 + vpunpcklqdq $xt3,$xt2,$xb1 # "b0" + vpunpckhqdq $xt3,$xt2,$xt2 # "b1" + vpunpcklqdq $xb2,$xb0,$xb3 # "b2" + vpunpckhqdq $xb2,$xb0,$xb0 # "b3" +___ + ($xb0,$xb1,$xb2,$xb3,$xt2)=($xb1,$xt2,$xb3,$xb0,$xb2); + my ($xc0,$xc1,$xc2,$xc3)=($xt0,$xt1,$xa0,$xa1); +$code.=<<___; + vpaddd 0xc0-0x100(%rcx),$xc0,$xc0 + vpaddd 0xd0-0x100(%rcx),$xc1,$xc1 + vpaddd 0xe0-0x100(%rcx),$xc2,$xc2 + vpaddd 0xf0-0x100(%rcx),$xc3,$xc3 + + vpunpckldq $xc1,$xc0,$xt2 + vpunpckldq $xc3,$xc2,$xt3 + vpunpckhdq $xc1,$xc0,$xc0 + vpunpckhdq $xc3,$xc2,$xc2 + vpunpcklqdq $xt3,$xt2,$xc1 # "c0" + vpunpckhqdq $xt3,$xt2,$xt2 # "c1" + vpunpcklqdq $xc2,$xc0,$xc3 # "c2" + vpunpckhqdq $xc2,$xc0,$xc0 # "c3" +___ + ($xc0,$xc1,$xc2,$xc3,$xt2)=($xc1,$xt2,$xc3,$xc0,$xc2); +$code.=<<___; + vpaddd 0x100-0x100(%rcx),$xd0,$xd0 + vpaddd 0x110-0x100(%rcx),$xd1,$xd1 + vpaddd 0x120-0x100(%rcx),$xd2,$xd2 + vpaddd 0x130-0x100(%rcx),$xd3,$xd3 + + vpunpckldq $xd1,$xd0,$xt2 + vpunpckldq $xd3,$xd2,$xt3 + vpunpckhdq $xd1,$xd0,$xd0 + vpunpckhdq $xd3,$xd2,$xd2 + vpunpcklqdq $xt3,$xt2,$xd1 # "d0" + vpunpckhqdq $xt3,$xt2,$xt2 # "d1" + vpunpcklqdq $xd2,$xd0,$xd3 # "d2" + vpunpckhqdq $xd2,$xd0,$xd0 # "d3" +___ + ($xd0,$xd1,$xd2,$xd3,$xt2)=($xd1,$xt2,$xd3,$xd0,$xd2); + ($xa0,$xa1)=($xt2,$xt3); +$code.=<<___; + vmovdqa 0x00(%rsp),$xa0 # restore $xa0,1 + vmovdqa 0x10(%rsp),$xa1 + + cmp \$64*4,$len + jb .Ltail4xop + + vpxor 0x00($inp),$xa0,$xa0 # xor with input + vpxor 0x10($inp),$xb0,$xb0 + vpxor 0x20($inp),$xc0,$xc0 + vpxor 0x30($inp),$xd0,$xd0 + vpxor 0x40($inp),$xa1,$xa1 + vpxor 0x50($inp),$xb1,$xb1 + vpxor 0x60($inp),$xc1,$xc1 + vpxor 0x70($inp),$xd1,$xd1 + lea 0x80($inp),$inp # size optimization + vpxor 0x00($inp),$xa2,$xa2 + vpxor 0x10($inp),$xb2,$xb2 + vpxor 0x20($inp),$xc2,$xc2 + vpxor 0x30($inp),$xd2,$xd2 + vpxor 0x40($inp),$xa3,$xa3 + vpxor 0x50($inp),$xb3,$xb3 + vpxor 0x60($inp),$xc3,$xc3 + vpxor 0x70($inp),$xd3,$xd3 + lea 0x80($inp),$inp # inp+=64*4 + + vmovdqu $xa0,0x00($out) + vmovdqu $xb0,0x10($out) + vmovdqu $xc0,0x20($out) + vmovdqu $xd0,0x30($out) + vmovdqu $xa1,0x40($out) + vmovdqu $xb1,0x50($out) + vmovdqu $xc1,0x60($out) + vmovdqu $xd1,0x70($out) + lea 0x80($out),$out # size optimization + vmovdqu $xa2,0x00($out) + vmovdqu $xb2,0x10($out) + vmovdqu $xc2,0x20($out) + vmovdqu $xd2,0x30($out) + vmovdqu $xa3,0x40($out) + vmovdqu $xb3,0x50($out) + vmovdqu $xc3,0x60($out) + vmovdqu $xd3,0x70($out) + lea 0x80($out),$out # out+=64*4 + + sub \$64*4,$len + jnz .Loop_outer4xop + + jmp .Ldone4xop + +.align 32 +.Ltail4xop: + cmp \$192,$len + jae .L192_or_more4xop + cmp \$128,$len + jae .L128_or_more4xop + cmp \$64,$len + jae .L64_or_more4xop + + xor %r9,%r9 + vmovdqa $xa0,0x00(%rsp) + vmovdqa $xb0,0x10(%rsp) + vmovdqa $xc0,0x20(%rsp) + vmovdqa $xd0,0x30(%rsp) + jmp .Loop_tail4xop + +.align 32 +.L64_or_more4xop: + vpxor 0x00($inp),$xa0,$xa0 # xor with input + vpxor 0x10($inp),$xb0,$xb0 + vpxor 0x20($inp),$xc0,$xc0 + vpxor 0x30($inp),$xd0,$xd0 + vmovdqu $xa0,0x00($out) + vmovdqu $xb0,0x10($out) + vmovdqu $xc0,0x20($out) + vmovdqu $xd0,0x30($out) + je .Ldone4xop + + lea 0x40($inp),$inp # inp+=64*1 + vmovdqa $xa1,0x00(%rsp) + xor %r9,%r9 + vmovdqa $xb1,0x10(%rsp) + lea 0x40($out),$out # out+=64*1 + vmovdqa $xc1,0x20(%rsp) + sub \$64,$len # len-=64*1 + vmovdqa $xd1,0x30(%rsp) + jmp .Loop_tail4xop + +.align 32 +.L128_or_more4xop: + vpxor 0x00($inp),$xa0,$xa0 # xor with input + vpxor 0x10($inp),$xb0,$xb0 + vpxor 0x20($inp),$xc0,$xc0 + vpxor 0x30($inp),$xd0,$xd0 + vpxor 0x40($inp),$xa1,$xa1 + vpxor 0x50($inp),$xb1,$xb1 + vpxor 0x60($inp),$xc1,$xc1 + vpxor 0x70($inp),$xd1,$xd1 + + vmovdqu $xa0,0x00($out) + vmovdqu $xb0,0x10($out) + vmovdqu $xc0,0x20($out) + vmovdqu $xd0,0x30($out) + vmovdqu $xa1,0x40($out) + vmovdqu $xb1,0x50($out) + vmovdqu $xc1,0x60($out) + vmovdqu $xd1,0x70($out) + je .Ldone4xop + + lea 0x80($inp),$inp # inp+=64*2 + vmovdqa $xa2,0x00(%rsp) + xor %r9,%r9 + vmovdqa $xb2,0x10(%rsp) + lea 0x80($out),$out # out+=64*2 + vmovdqa $xc2,0x20(%rsp) + sub \$128,$len # len-=64*2 + vmovdqa $xd2,0x30(%rsp) + jmp .Loop_tail4xop + +.align 32 +.L192_or_more4xop: + vpxor 0x00($inp),$xa0,$xa0 # xor with input + vpxor 0x10($inp),$xb0,$xb0 + vpxor 0x20($inp),$xc0,$xc0 + vpxor 0x30($inp),$xd0,$xd0 + vpxor 0x40($inp),$xa1,$xa1 + vpxor 0x50($inp),$xb1,$xb1 + vpxor 0x60($inp),$xc1,$xc1 + vpxor 0x70($inp),$xd1,$xd1 + lea 0x80($inp),$inp # size optimization + vpxor 0x00($inp),$xa2,$xa2 + vpxor 0x10($inp),$xb2,$xb2 + vpxor 0x20($inp),$xc2,$xc2 + vpxor 0x30($inp),$xd2,$xd2 + + vmovdqu $xa0,0x00($out) + vmovdqu $xb0,0x10($out) + vmovdqu $xc0,0x20($out) + vmovdqu $xd0,0x30($out) + vmovdqu $xa1,0x40($out) + vmovdqu $xb1,0x50($out) + vmovdqu $xc1,0x60($out) + vmovdqu $xd1,0x70($out) + lea 0x80($out),$out # size optimization + vmovdqu $xa2,0x00($out) + vmovdqu $xb2,0x10($out) + vmovdqu $xc2,0x20($out) + vmovdqu $xd2,0x30($out) + je .Ldone4xop + + lea 0x40($inp),$inp # inp+=64*3 + vmovdqa $xa3,0x00(%rsp) + xor %r9,%r9 + vmovdqa $xb3,0x10(%rsp) + lea 0x40($out),$out # out+=64*3 + vmovdqa $xc3,0x20(%rsp) + sub \$192,$len # len-=64*3 + vmovdqa $xd3,0x30(%rsp) + +.Loop_tail4xop: + movzb ($inp,%r9),%eax + movzb (%rsp,%r9),%ecx + lea 1(%r9),%r9 + xor %ecx,%eax + mov %al,-1($out,%r9) + dec $len + jnz .Loop_tail4xop + +.Ldone4xop: + vzeroupper +___ +$code.=<<___ if ($win64); + movaps -0xb0(%r10),%xmm6 + movaps -0xa0(%r10),%xmm7 + movaps -0x90(%r10),%xmm8 + movaps -0x80(%r10),%xmm9 + movaps -0x70(%r10),%xmm10 + movaps -0x60(%r10),%xmm11 + movaps -0x50(%r10),%xmm12 + movaps -0x40(%r10),%xmm13 + movaps -0x30(%r10),%xmm14 + movaps -0x20(%r10),%xmm15 +___ +$code.=<<___; + lea -8(%r10),%rsp +.cfi_def_cfa_register %rsp +.L4xop_epilogue: + ret +.cfi_endproc +___ +&end_function("chacha20_xop"); +} + +######################################################################## +# AVX2 code path +if ($avx>1) { + +if($kernel) { + $code .= "#ifdef CONFIG_AS_AVX2\n"; +} + +my ($xb0,$xb1,$xb2,$xb3, $xd0,$xd1,$xd2,$xd3, + $xa0,$xa1,$xa2,$xa3, $xt0,$xt1,$xt2,$xt3)=map("%ymm$_",(0..15)); +my @xx=($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3, + "%nox","%nox","%nox","%nox", $xd0,$xd1,$xd2,$xd3); + +sub AVX2_lane_ROUND { +my ($a0,$b0,$c0,$d0)=@_; +my ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0)); +my ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1)); +my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2)); +my ($xc,$xc_,$t0,$t1)=map("\"$_\"",$xt0,$xt1,$xt2,$xt3); +my @x=map("\"$_\"",@xx); + + # Consider order in which variables are addressed by their + # index: + # + # a b c d + # + # 0 4 8 12 < even round + # 1 5 9 13 + # 2 6 10 14 + # 3 7 11 15 + # 0 5 10 15 < odd round + # 1 6 11 12 + # 2 7 8 13 + # 3 4 9 14 + # + # 'a', 'b' and 'd's are permanently allocated in registers, + # @x[0..7,12..15], while 'c's are maintained in memory. If + # you observe 'c' column, you'll notice that pair of 'c's is + # invariant between rounds. This means that we have to reload + # them once per round, in the middle. This is why you'll see + # bunch of 'c' stores and loads in the middle, but none in + # the beginning or end. + + ( + "&vpaddd (@x[$a0],@x[$a0],@x[$b0])", # Q1 + "&vpxor (@x[$d0],@x[$a0],@x[$d0])", + "&vpshufb (@x[$d0],@x[$d0],$t1)", + "&vpaddd (@x[$a1],@x[$a1],@x[$b1])", # Q2 + "&vpxor (@x[$d1],@x[$a1],@x[$d1])", + "&vpshufb (@x[$d1],@x[$d1],$t1)", + + "&vpaddd ($xc,$xc,@x[$d0])", + "&vpxor (@x[$b0],$xc,@x[$b0])", + "&vpslld ($t0,@x[$b0],12)", + "&vpsrld (@x[$b0],@x[$b0],20)", + "&vpor (@x[$b0],$t0,@x[$b0])", + "&vbroadcasti128($t0,'(%r11)')", # .Lrot24(%rip) + "&vpaddd ($xc_,$xc_,@x[$d1])", + "&vpxor (@x[$b1],$xc_,@x[$b1])", + "&vpslld ($t1,@x[$b1],12)", + "&vpsrld (@x[$b1],@x[$b1],20)", + "&vpor (@x[$b1],$t1,@x[$b1])", + + "&vpaddd (@x[$a0],@x[$a0],@x[$b0])", + "&vpxor (@x[$d0],@x[$a0],@x[$d0])", + "&vpshufb (@x[$d0],@x[$d0],$t0)", + "&vpaddd (@x[$a1],@x[$a1],@x[$b1])", + "&vpxor (@x[$d1],@x[$a1],@x[$d1])", + "&vpshufb (@x[$d1],@x[$d1],$t0)", + + "&vpaddd ($xc,$xc,@x[$d0])", + "&vpxor (@x[$b0],$xc,@x[$b0])", + "&vpslld ($t1,@x[$b0],7)", + "&vpsrld (@x[$b0],@x[$b0],25)", + "&vpor (@x[$b0],$t1,@x[$b0])", + "&vbroadcasti128($t1,'(%r9)')", # .Lrot16(%rip) + "&vpaddd ($xc_,$xc_,@x[$d1])", + "&vpxor (@x[$b1],$xc_,@x[$b1])", + "&vpslld ($t0,@x[$b1],7)", + "&vpsrld (@x[$b1],@x[$b1],25)", + "&vpor (@x[$b1],$t0,@x[$b1])", + + "&vmovdqa (\"`32*($c0-8)`(%rsp)\",$xc)", # reload pair of 'c's + "&vmovdqa (\"`32*($c1-8)`(%rsp)\",$xc_)", + "&vmovdqa ($xc,\"`32*($c2-8)`(%rsp)\")", + "&vmovdqa ($xc_,\"`32*($c3-8)`(%rsp)\")", + + "&vpaddd (@x[$a2],@x[$a2],@x[$b2])", # Q3 + "&vpxor (@x[$d2],@x[$a2],@x[$d2])", + "&vpshufb (@x[$d2],@x[$d2],$t1)", + "&vpaddd (@x[$a3],@x[$a3],@x[$b3])", # Q4 + "&vpxor (@x[$d3],@x[$a3],@x[$d3])", + "&vpshufb (@x[$d3],@x[$d3],$t1)", + + "&vpaddd ($xc,$xc,@x[$d2])", + "&vpxor (@x[$b2],$xc,@x[$b2])", + "&vpslld ($t0,@x[$b2],12)", + "&vpsrld (@x[$b2],@x[$b2],20)", + "&vpor (@x[$b2],$t0,@x[$b2])", + "&vbroadcasti128($t0,'(%r11)')", # .Lrot24(%rip) + "&vpaddd ($xc_,$xc_,@x[$d3])", + "&vpxor (@x[$b3],$xc_,@x[$b3])", + "&vpslld ($t1,@x[$b3],12)", + "&vpsrld (@x[$b3],@x[$b3],20)", + "&vpor (@x[$b3],$t1,@x[$b3])", + + "&vpaddd (@x[$a2],@x[$a2],@x[$b2])", + "&vpxor (@x[$d2],@x[$a2],@x[$d2])", + "&vpshufb (@x[$d2],@x[$d2],$t0)", + "&vpaddd (@x[$a3],@x[$a3],@x[$b3])", + "&vpxor (@x[$d3],@x[$a3],@x[$d3])", + "&vpshufb (@x[$d3],@x[$d3],$t0)", + + "&vpaddd ($xc,$xc,@x[$d2])", + "&vpxor (@x[$b2],$xc,@x[$b2])", + "&vpslld ($t1,@x[$b2],7)", + "&vpsrld (@x[$b2],@x[$b2],25)", + "&vpor (@x[$b2],$t1,@x[$b2])", + "&vbroadcasti128($t1,'(%r9)')", # .Lrot16(%rip) + "&vpaddd ($xc_,$xc_,@x[$d3])", + "&vpxor (@x[$b3],$xc_,@x[$b3])", + "&vpslld ($t0,@x[$b3],7)", + "&vpsrld (@x[$b3],@x[$b3],25)", + "&vpor (@x[$b3],$t0,@x[$b3])" + ); +} + +my $xframe = $win64 ? 0xa8 : 8; + +&declare_function("chacha20_avx2", 32, 5); +$code.=<<___; +.cfi_startproc +.Lchacha20_8x: + lea 8(%rsp),%r10 # frame register +.cfi_def_cfa_register %r10 + sub \$0x280+$xframe,%rsp + and \$-32,%rsp +___ +$code.=<<___ if ($win64); + movaps %xmm6,-0xb0(%r10) + movaps %xmm7,-0xa0(%r10) + movaps %xmm8,-0x90(%r10) + movaps %xmm9,-0x80(%r10) + movaps %xmm10,-0x70(%r10) + movaps %xmm11,-0x60(%r10) + movaps %xmm12,-0x50(%r10) + movaps %xmm13,-0x40(%r10) + movaps %xmm14,-0x30(%r10) + movaps %xmm15,-0x20(%r10) +.L8x_body: +___ +$code.=<<___; + vzeroupper + + ################ stack layout + # +0x00 SIMD equivalent of @x[8-12] + # ... + # +0x80 constant copy of key[0-2] smashed by lanes + # ... + # +0x200 SIMD counters (with nonce smashed by lanes) + # ... + # +0x280 + + vbroadcasti128 .Lsigma(%rip),$xa3 # key[0] + vbroadcasti128 ($key),$xb3 # key[1] + vbroadcasti128 16($key),$xt3 # key[2] + vbroadcasti128 ($counter),$xd3 # key[3] + lea 0x100(%rsp),%rcx # size optimization + lea 0x200(%rsp),%rax # size optimization + lea .Lrot16(%rip),%r9 + lea .Lrot24(%rip),%r11 + + vpshufd \$0x00,$xa3,$xa0 # smash key by lanes... + vpshufd \$0x55,$xa3,$xa1 + vmovdqa $xa0,0x80-0x100(%rcx) # ... and offload + vpshufd \$0xaa,$xa3,$xa2 + vmovdqa $xa1,0xa0-0x100(%rcx) + vpshufd \$0xff,$xa3,$xa3 + vmovdqa $xa2,0xc0-0x100(%rcx) + vmovdqa $xa3,0xe0-0x100(%rcx) + + vpshufd \$0x00,$xb3,$xb0 + vpshufd \$0x55,$xb3,$xb1 + vmovdqa $xb0,0x100-0x100(%rcx) + vpshufd \$0xaa,$xb3,$xb2 + vmovdqa $xb1,0x120-0x100(%rcx) + vpshufd \$0xff,$xb3,$xb3 + vmovdqa $xb2,0x140-0x100(%rcx) + vmovdqa $xb3,0x160-0x100(%rcx) + + vpshufd \$0x00,$xt3,$xt0 # "xc0" + vpshufd \$0x55,$xt3,$xt1 # "xc1" + vmovdqa $xt0,0x180-0x200(%rax) + vpshufd \$0xaa,$xt3,$xt2 # "xc2" + vmovdqa $xt1,0x1a0-0x200(%rax) + vpshufd \$0xff,$xt3,$xt3 # "xc3" + vmovdqa $xt2,0x1c0-0x200(%rax) + vmovdqa $xt3,0x1e0-0x200(%rax) + + vpshufd \$0x00,$xd3,$xd0 + vpshufd \$0x55,$xd3,$xd1 + vpaddd .Lincy(%rip),$xd0,$xd0 # don't save counters yet + vpshufd \$0xaa,$xd3,$xd2 + vmovdqa $xd1,0x220-0x200(%rax) + vpshufd \$0xff,$xd3,$xd3 + vmovdqa $xd2,0x240-0x200(%rax) + vmovdqa $xd3,0x260-0x200(%rax) + + jmp .Loop_enter8x + +.align 32 +.Loop_outer8x: + vmovdqa 0x80-0x100(%rcx),$xa0 # re-load smashed key + vmovdqa 0xa0-0x100(%rcx),$xa1 + vmovdqa 0xc0-0x100(%rcx),$xa2 + vmovdqa 0xe0-0x100(%rcx),$xa3 + vmovdqa 0x100-0x100(%rcx),$xb0 + vmovdqa 0x120-0x100(%rcx),$xb1 + vmovdqa 0x140-0x100(%rcx),$xb2 + vmovdqa 0x160-0x100(%rcx),$xb3 + vmovdqa 0x180-0x200(%rax),$xt0 # "xc0" + vmovdqa 0x1a0-0x200(%rax),$xt1 # "xc1" + vmovdqa 0x1c0-0x200(%rax),$xt2 # "xc2" + vmovdqa 0x1e0-0x200(%rax),$xt3 # "xc3" + vmovdqa 0x200-0x200(%rax),$xd0 + vmovdqa 0x220-0x200(%rax),$xd1 + vmovdqa 0x240-0x200(%rax),$xd2 + vmovdqa 0x260-0x200(%rax),$xd3 + vpaddd .Leight(%rip),$xd0,$xd0 # next SIMD counters + +.Loop_enter8x: + vmovdqa $xt2,0x40(%rsp) # SIMD equivalent of "@x[10]" + vmovdqa $xt3,0x60(%rsp) # SIMD equivalent of "@x[11]" + vbroadcasti128 (%r9),$xt3 + vmovdqa $xd0,0x200-0x200(%rax) # save SIMD counters + mov \$10,%eax + jmp .Loop8x + +.align 32 +.Loop8x: +___ + foreach (&AVX2_lane_ROUND(0, 4, 8,12)) { eval; } + foreach (&AVX2_lane_ROUND(0, 5,10,15)) { eval; } +$code.=<<___; + dec %eax + jnz .Loop8x + + lea 0x200(%rsp),%rax # size optimization + vpaddd 0x80-0x100(%rcx),$xa0,$xa0 # accumulate key + vpaddd 0xa0-0x100(%rcx),$xa1,$xa1 + vpaddd 0xc0-0x100(%rcx),$xa2,$xa2 + vpaddd 0xe0-0x100(%rcx),$xa3,$xa3 + + vpunpckldq $xa1,$xa0,$xt2 # "de-interlace" data + vpunpckldq $xa3,$xa2,$xt3 + vpunpckhdq $xa1,$xa0,$xa0 + vpunpckhdq $xa3,$xa2,$xa2 + vpunpcklqdq $xt3,$xt2,$xa1 # "a0" + vpunpckhqdq $xt3,$xt2,$xt2 # "a1" + vpunpcklqdq $xa2,$xa0,$xa3 # "a2" + vpunpckhqdq $xa2,$xa0,$xa0 # "a3" +___ + ($xa0,$xa1,$xa2,$xa3,$xt2)=($xa1,$xt2,$xa3,$xa0,$xa2); +$code.=<<___; + vpaddd 0x100-0x100(%rcx),$xb0,$xb0 + vpaddd 0x120-0x100(%rcx),$xb1,$xb1 + vpaddd 0x140-0x100(%rcx),$xb2,$xb2 + vpaddd 0x160-0x100(%rcx),$xb3,$xb3 + + vpunpckldq $xb1,$xb0,$xt2 + vpunpckldq $xb3,$xb2,$xt3 + vpunpckhdq $xb1,$xb0,$xb0 + vpunpckhdq $xb3,$xb2,$xb2 + vpunpcklqdq $xt3,$xt2,$xb1 # "b0" + vpunpckhqdq $xt3,$xt2,$xt2 # "b1" + vpunpcklqdq $xb2,$xb0,$xb3 # "b2" + vpunpckhqdq $xb2,$xb0,$xb0 # "b3" +___ + ($xb0,$xb1,$xb2,$xb3,$xt2)=($xb1,$xt2,$xb3,$xb0,$xb2); +$code.=<<___; + vperm2i128 \$0x20,$xb0,$xa0,$xt3 # "de-interlace" further + vperm2i128 \$0x31,$xb0,$xa0,$xb0 + vperm2i128 \$0x20,$xb1,$xa1,$xa0 + vperm2i128 \$0x31,$xb1,$xa1,$xb1 + vperm2i128 \$0x20,$xb2,$xa2,$xa1 + vperm2i128 \$0x31,$xb2,$xa2,$xb2 + vperm2i128 \$0x20,$xb3,$xa3,$xa2 + vperm2i128 \$0x31,$xb3,$xa3,$xb3 +___ + ($xa0,$xa1,$xa2,$xa3,$xt3)=($xt3,$xa0,$xa1,$xa2,$xa3); + my ($xc0,$xc1,$xc2,$xc3)=($xt0,$xt1,$xa0,$xa1); +$code.=<<___; + vmovdqa $xa0,0x00(%rsp) # offload $xaN + vmovdqa $xa1,0x20(%rsp) + vmovdqa 0x40(%rsp),$xc2 # $xa0 + vmovdqa 0x60(%rsp),$xc3 # $xa1 + + vpaddd 0x180-0x200(%rax),$xc0,$xc0 + vpaddd 0x1a0-0x200(%rax),$xc1,$xc1 + vpaddd 0x1c0-0x200(%rax),$xc2,$xc2 + vpaddd 0x1e0-0x200(%rax),$xc3,$xc3 + + vpunpckldq $xc1,$xc0,$xt2 + vpunpckldq $xc3,$xc2,$xt3 + vpunpckhdq $xc1,$xc0,$xc0 + vpunpckhdq $xc3,$xc2,$xc2 + vpunpcklqdq $xt3,$xt2,$xc1 # "c0" + vpunpckhqdq $xt3,$xt2,$xt2 # "c1" + vpunpcklqdq $xc2,$xc0,$xc3 # "c2" + vpunpckhqdq $xc2,$xc0,$xc0 # "c3" +___ + ($xc0,$xc1,$xc2,$xc3,$xt2)=($xc1,$xt2,$xc3,$xc0,$xc2); +$code.=<<___; + vpaddd 0x200-0x200(%rax),$xd0,$xd0 + vpaddd 0x220-0x200(%rax),$xd1,$xd1 + vpaddd 0x240-0x200(%rax),$xd2,$xd2 + vpaddd 0x260-0x200(%rax),$xd3,$xd3 + + vpunpckldq $xd1,$xd0,$xt2 + vpunpckldq $xd3,$xd2,$xt3 + vpunpckhdq $xd1,$xd0,$xd0 + vpunpckhdq $xd3,$xd2,$xd2 + vpunpcklqdq $xt3,$xt2,$xd1 # "d0" + vpunpckhqdq $xt3,$xt2,$xt2 # "d1" + vpunpcklqdq $xd2,$xd0,$xd3 # "d2" + vpunpckhqdq $xd2,$xd0,$xd0 # "d3" +___ + ($xd0,$xd1,$xd2,$xd3,$xt2)=($xd1,$xt2,$xd3,$xd0,$xd2); +$code.=<<___; + vperm2i128 \$0x20,$xd0,$xc0,$xt3 # "de-interlace" further + vperm2i128 \$0x31,$xd0,$xc0,$xd0 + vperm2i128 \$0x20,$xd1,$xc1,$xc0 + vperm2i128 \$0x31,$xd1,$xc1,$xd1 + vperm2i128 \$0x20,$xd2,$xc2,$xc1 + vperm2i128 \$0x31,$xd2,$xc2,$xd2 + vperm2i128 \$0x20,$xd3,$xc3,$xc2 + vperm2i128 \$0x31,$xd3,$xc3,$xd3 +___ + ($xc0,$xc1,$xc2,$xc3,$xt3)=($xt3,$xc0,$xc1,$xc2,$xc3); + ($xb0,$xb1,$xb2,$xb3,$xc0,$xc1,$xc2,$xc3)= + ($xc0,$xc1,$xc2,$xc3,$xb0,$xb1,$xb2,$xb3); + ($xa0,$xa1)=($xt2,$xt3); +$code.=<<___; + vmovdqa 0x00(%rsp),$xa0 # $xaN was offloaded, remember? + vmovdqa 0x20(%rsp),$xa1 + + cmp \$64*8,$len + jb .Ltail8x + + vpxor 0x00($inp),$xa0,$xa0 # xor with input + vpxor 0x20($inp),$xb0,$xb0 + vpxor 0x40($inp),$xc0,$xc0 + vpxor 0x60($inp),$xd0,$xd0 + lea 0x80($inp),$inp # size optimization + vmovdqu $xa0,0x00($out) + vmovdqu $xb0,0x20($out) + vmovdqu $xc0,0x40($out) + vmovdqu $xd0,0x60($out) + lea 0x80($out),$out # size optimization + + vpxor 0x00($inp),$xa1,$xa1 + vpxor 0x20($inp),$xb1,$xb1 + vpxor 0x40($inp),$xc1,$xc1 + vpxor 0x60($inp),$xd1,$xd1 + lea 0x80($inp),$inp # size optimization + vmovdqu $xa1,0x00($out) + vmovdqu $xb1,0x20($out) + vmovdqu $xc1,0x40($out) + vmovdqu $xd1,0x60($out) + lea 0x80($out),$out # size optimization + + vpxor 0x00($inp),$xa2,$xa2 + vpxor 0x20($inp),$xb2,$xb2 + vpxor 0x40($inp),$xc2,$xc2 + vpxor 0x60($inp),$xd2,$xd2 + lea 0x80($inp),$inp # size optimization + vmovdqu $xa2,0x00($out) + vmovdqu $xb2,0x20($out) + vmovdqu $xc2,0x40($out) + vmovdqu $xd2,0x60($out) + lea 0x80($out),$out # size optimization + + vpxor 0x00($inp),$xa3,$xa3 + vpxor 0x20($inp),$xb3,$xb3 + vpxor 0x40($inp),$xc3,$xc3 + vpxor 0x60($inp),$xd3,$xd3 + lea 0x80($inp),$inp # size optimization + vmovdqu $xa3,0x00($out) + vmovdqu $xb3,0x20($out) + vmovdqu $xc3,0x40($out) + vmovdqu $xd3,0x60($out) + lea 0x80($out),$out # size optimization + + sub \$64*8,$len + jnz .Loop_outer8x + + jmp .Ldone8x + +.Ltail8x: + cmp \$448,$len + jae .L448_or_more8x + cmp \$384,$len + jae .L384_or_more8x + cmp \$320,$len + jae .L320_or_more8x + cmp \$256,$len + jae .L256_or_more8x + cmp \$192,$len + jae .L192_or_more8x + cmp \$128,$len + jae .L128_or_more8x + cmp \$64,$len + jae .L64_or_more8x + + xor %r9,%r9 + vmovdqa $xa0,0x00(%rsp) + vmovdqa $xb0,0x20(%rsp) + jmp .Loop_tail8x + +.align 32 +.L64_or_more8x: + vpxor 0x00($inp),$xa0,$xa0 # xor with input + vpxor 0x20($inp),$xb0,$xb0 + vmovdqu $xa0,0x00($out) + vmovdqu $xb0,0x20($out) + je .Ldone8x + + lea 0x40($inp),$inp # inp+=64*1 + xor %r9,%r9 + vmovdqa $xc0,0x00(%rsp) + lea 0x40($out),$out # out+=64*1 + sub \$64,$len # len-=64*1 + vmovdqa $xd0,0x20(%rsp) + jmp .Loop_tail8x + +.align 32 +.L128_or_more8x: + vpxor 0x00($inp),$xa0,$xa0 # xor with input + vpxor 0x20($inp),$xb0,$xb0 + vpxor 0x40($inp),$xc0,$xc0 + vpxor 0x60($inp),$xd0,$xd0 + vmovdqu $xa0,0x00($out) + vmovdqu $xb0,0x20($out) + vmovdqu $xc0,0x40($out) + vmovdqu $xd0,0x60($out) + je .Ldone8x + + lea 0x80($inp),$inp # inp+=64*2 + xor %r9,%r9 + vmovdqa $xa1,0x00(%rsp) + lea 0x80($out),$out # out+=64*2 + sub \$128,$len # len-=64*2 + vmovdqa $xb1,0x20(%rsp) + jmp .Loop_tail8x + +.align 32 +.L192_or_more8x: + vpxor 0x00($inp),$xa0,$xa0 # xor with input + vpxor 0x20($inp),$xb0,$xb0 + vpxor 0x40($inp),$xc0,$xc0 + vpxor 0x60($inp),$xd0,$xd0 + vpxor 0x80($inp),$xa1,$xa1 + vpxor 0xa0($inp),$xb1,$xb1 + vmovdqu $xa0,0x00($out) + vmovdqu $xb0,0x20($out) + vmovdqu $xc0,0x40($out) + vmovdqu $xd0,0x60($out) + vmovdqu $xa1,0x80($out) + vmovdqu $xb1,0xa0($out) + je .Ldone8x + + lea 0xc0($inp),$inp # inp+=64*3 + xor %r9,%r9 + vmovdqa $xc1,0x00(%rsp) + lea 0xc0($out),$out # out+=64*3 + sub \$192,$len # len-=64*3 + vmovdqa $xd1,0x20(%rsp) + jmp .Loop_tail8x + +.align 32 +.L256_or_more8x: + vpxor 0x00($inp),$xa0,$xa0 # xor with input + vpxor 0x20($inp),$xb0,$xb0 + vpxor 0x40($inp),$xc0,$xc0 + vpxor 0x60($inp),$xd0,$xd0 + vpxor 0x80($inp),$xa1,$xa1 + vpxor 0xa0($inp),$xb1,$xb1 + vpxor 0xc0($inp),$xc1,$xc1 + vpxor 0xe0($inp),$xd1,$xd1 + vmovdqu $xa0,0x00($out) + vmovdqu $xb0,0x20($out) + vmovdqu $xc0,0x40($out) + vmovdqu $xd0,0x60($out) + vmovdqu $xa1,0x80($out) + vmovdqu $xb1,0xa0($out) + vmovdqu $xc1,0xc0($out) + vmovdqu $xd1,0xe0($out) + je .Ldone8x + + lea 0x100($inp),$inp # inp+=64*4 + xor %r9,%r9 + vmovdqa $xa2,0x00(%rsp) + lea 0x100($out),$out # out+=64*4 + sub \$256,$len # len-=64*4 + vmovdqa $xb2,0x20(%rsp) + jmp .Loop_tail8x + +.align 32 +.L320_or_more8x: + vpxor 0x00($inp),$xa0,$xa0 # xor with input + vpxor 0x20($inp),$xb0,$xb0 + vpxor 0x40($inp),$xc0,$xc0 + vpxor 0x60($inp),$xd0,$xd0 + vpxor 0x80($inp),$xa1,$xa1 + vpxor 0xa0($inp),$xb1,$xb1 + vpxor 0xc0($inp),$xc1,$xc1 + vpxor 0xe0($inp),$xd1,$xd1 + vpxor 0x100($inp),$xa2,$xa2 + vpxor 0x120($inp),$xb2,$xb2 + vmovdqu $xa0,0x00($out) + vmovdqu $xb0,0x20($out) + vmovdqu $xc0,0x40($out) + vmovdqu $xd0,0x60($out) + vmovdqu $xa1,0x80($out) + vmovdqu $xb1,0xa0($out) + vmovdqu $xc1,0xc0($out) + vmovdqu $xd1,0xe0($out) + vmovdqu $xa2,0x100($out) + vmovdqu $xb2,0x120($out) + je .Ldone8x + + lea 0x140($inp),$inp # inp+=64*5 + xor %r9,%r9 + vmovdqa $xc2,0x00(%rsp) + lea 0x140($out),$out # out+=64*5 + sub \$320,$len # len-=64*5 + vmovdqa $xd2,0x20(%rsp) + jmp .Loop_tail8x + +.align 32 +.L384_or_more8x: + vpxor 0x00($inp),$xa0,$xa0 # xor with input + vpxor 0x20($inp),$xb0,$xb0 + vpxor 0x40($inp),$xc0,$xc0 + vpxor 0x60($inp),$xd0,$xd0 + vpxor 0x80($inp),$xa1,$xa1 + vpxor 0xa0($inp),$xb1,$xb1 + vpxor 0xc0($inp),$xc1,$xc1 + vpxor 0xe0($inp),$xd1,$xd1 + vpxor 0x100($inp),$xa2,$xa2 + vpxor 0x120($inp),$xb2,$xb2 + vpxor 0x140($inp),$xc2,$xc2 + vpxor 0x160($inp),$xd2,$xd2 + vmovdqu $xa0,0x00($out) + vmovdqu $xb0,0x20($out) + vmovdqu $xc0,0x40($out) + vmovdqu $xd0,0x60($out) + vmovdqu $xa1,0x80($out) + vmovdqu $xb1,0xa0($out) + vmovdqu $xc1,0xc0($out) + vmovdqu $xd1,0xe0($out) + vmovdqu $xa2,0x100($out) + vmovdqu $xb2,0x120($out) + vmovdqu $xc2,0x140($out) + vmovdqu $xd2,0x160($out) + je .Ldone8x + + lea 0x180($inp),$inp # inp+=64*6 + xor %r9,%r9 + vmovdqa $xa3,0x00(%rsp) + lea 0x180($out),$out # out+=64*6 + sub \$384,$len # len-=64*6 + vmovdqa $xb3,0x20(%rsp) + jmp .Loop_tail8x + +.align 32 +.L448_or_more8x: + vpxor 0x00($inp),$xa0,$xa0 # xor with input + vpxor 0x20($inp),$xb0,$xb0 + vpxor 0x40($inp),$xc0,$xc0 + vpxor 0x60($inp),$xd0,$xd0 + vpxor 0x80($inp),$xa1,$xa1 + vpxor 0xa0($inp),$xb1,$xb1 + vpxor 0xc0($inp),$xc1,$xc1 + vpxor 0xe0($inp),$xd1,$xd1 + vpxor 0x100($inp),$xa2,$xa2 + vpxor 0x120($inp),$xb2,$xb2 + vpxor 0x140($inp),$xc2,$xc2 + vpxor 0x160($inp),$xd2,$xd2 + vpxor 0x180($inp),$xa3,$xa3 + vpxor 0x1a0($inp),$xb3,$xb3 + vmovdqu $xa0,0x00($out) + vmovdqu $xb0,0x20($out) + vmovdqu $xc0,0x40($out) + vmovdqu $xd0,0x60($out) + vmovdqu $xa1,0x80($out) + vmovdqu $xb1,0xa0($out) + vmovdqu $xc1,0xc0($out) + vmovdqu $xd1,0xe0($out) + vmovdqu $xa2,0x100($out) + vmovdqu $xb2,0x120($out) + vmovdqu $xc2,0x140($out) + vmovdqu $xd2,0x160($out) + vmovdqu $xa3,0x180($out) + vmovdqu $xb3,0x1a0($out) + je .Ldone8x + + lea 0x1c0($inp),$inp # inp+=64*7 + xor %r9,%r9 + vmovdqa $xc3,0x00(%rsp) + lea 0x1c0($out),$out # out+=64*7 + sub \$448,$len # len-=64*7 + vmovdqa $xd3,0x20(%rsp) + +.Loop_tail8x: + movzb ($inp,%r9),%eax + movzb (%rsp,%r9),%ecx + lea 1(%r9),%r9 + xor %ecx,%eax + mov %al,-1($out,%r9) + dec $len + jnz .Loop_tail8x + +.Ldone8x: + vzeroall +___ +$code.=<<___ if ($win64); + movaps -0xb0(%r10),%xmm6 + movaps -0xa0(%r10),%xmm7 + movaps -0x90(%r10),%xmm8 + movaps -0x80(%r10),%xmm9 + movaps -0x70(%r10),%xmm10 + movaps -0x60(%r10),%xmm11 + movaps -0x50(%r10),%xmm12 + movaps -0x40(%r10),%xmm13 + movaps -0x30(%r10),%xmm14 + movaps -0x20(%r10),%xmm15 +___ +$code.=<<___; + lea -8(%r10),%rsp +.cfi_def_cfa_register %rsp +.L8x_epilogue: + ret +.cfi_endproc +___ +&end_function("chacha20_avx2"); +if($kernel) { + $code .= "#endif\n"; +} +} + +######################################################################## +# AVX512 code paths +if ($avx>2) { +# This one handles shorter inputs... +if($kernel) { + $code .= "#ifdef CONFIG_AS_AVX512\n"; +} + +my ($a,$b,$c,$d, $a_,$b_,$c_,$d_,$fourz) = map("%zmm$_",(0..3,16..20)); +my ($t0,$t1,$t2,$t3) = map("%xmm$_",(4..7)); + +sub vpxord() # size optimization +{ my $opcode = "vpxor"; # adhere to vpxor when possible + + foreach (@_) { + if (/%([zy])mm([0-9]+)/ && ($1 eq "z" || $2>=16)) { + $opcode = "vpxord"; + last; + } + } + + $code .= "\t$opcode\t".join(',',reverse @_)."\n"; +} + +sub AVX512ROUND { # critical path is 14 "SIMD ticks" per round + &vpaddd ($a,$a,$b); + &vpxord ($d,$d,$a); + &vprold ($d,$d,16); + + &vpaddd ($c,$c,$d); + &vpxord ($b,$b,$c); + &vprold ($b,$b,12); + + &vpaddd ($a,$a,$b); + &vpxord ($d,$d,$a); + &vprold ($d,$d,8); + + &vpaddd ($c,$c,$d); + &vpxord ($b,$b,$c); + &vprold ($b,$b,7); +} + +my $xframe = $win64 ? 32+8 : 8; + +&declare_function("chacha20_avx512", 32, 5); +$code.=<<___; +.cfi_startproc +.Lchacha20_avx512: + lea 8(%rsp),%r10 # frame pointer +.cfi_def_cfa_register %r10 + cmp \$512,$len + ja .Lchacha20_16x + + sub \$64+$xframe,%rsp + and \$-64,%rsp +___ +$code.=<<___ if ($win64); + movaps %xmm6,-0x30(%r10) + movaps %xmm7,-0x20(%r10) +.Lavx512_body: +___ +$code.=<<___; + vbroadcasti32x4 .Lsigma(%rip),$a + vbroadcasti32x4 ($key),$b + vbroadcasti32x4 16($key),$c + vbroadcasti32x4 ($counter),$d + + vmovdqa32 $a,$a_ + vmovdqa32 $b,$b_ + vmovdqa32 $c,$c_ + vpaddd .Lzeroz(%rip),$d,$d + vmovdqa32 .Lfourz(%rip),$fourz + mov \$10,$counter # reuse $counter + vmovdqa32 $d,$d_ + jmp .Loop_avx512 + +.align 16 +.Loop_outer_avx512: + vmovdqa32 $a_,$a + vmovdqa32 $b_,$b + vmovdqa32 $c_,$c + vpaddd $fourz,$d_,$d + mov \$10,$counter + vmovdqa32 $d,$d_ + jmp .Loop_avx512 + +.align 32 +.Loop_avx512: +___ + &AVX512ROUND(); + &vpshufd ($c,$c,0b01001110); + &vpshufd ($b,$b,0b00111001); + &vpshufd ($d,$d,0b10010011); + + &AVX512ROUND(); + &vpshufd ($c,$c,0b01001110); + &vpshufd ($b,$b,0b10010011); + &vpshufd ($d,$d,0b00111001); + + &dec ($counter); + &jnz (".Loop_avx512"); + +$code.=<<___; + vpaddd $a_,$a,$a + vpaddd $b_,$b,$b + vpaddd $c_,$c,$c + vpaddd $d_,$d,$d + + sub \$64,$len + jb .Ltail64_avx512 + + vpxor 0x00($inp),%x#$a,$t0 # xor with input + vpxor 0x10($inp),%x#$b,$t1 + vpxor 0x20($inp),%x#$c,$t2 + vpxor 0x30($inp),%x#$d,$t3 + lea 0x40($inp),$inp # inp+=64 + + vmovdqu $t0,0x00($out) # write output + vmovdqu $t1,0x10($out) + vmovdqu $t2,0x20($out) + vmovdqu $t3,0x30($out) + lea 0x40($out),$out # out+=64 + + jz .Ldone_avx512 + + vextracti32x4 \$1,$a,$t0 + vextracti32x4 \$1,$b,$t1 + vextracti32x4 \$1,$c,$t2 + vextracti32x4 \$1,$d,$t3 + + sub \$64,$len + jb .Ltail_avx512 + + vpxor 0x00($inp),$t0,$t0 # xor with input + vpxor 0x10($inp),$t1,$t1 + vpxor 0x20($inp),$t2,$t2 + vpxor 0x30($inp),$t3,$t3 + lea 0x40($inp),$inp # inp+=64 + + vmovdqu $t0,0x00($out) # write output + vmovdqu $t1,0x10($out) + vmovdqu $t2,0x20($out) + vmovdqu $t3,0x30($out) + lea 0x40($out),$out # out+=64 + + jz .Ldone_avx512 + + vextracti32x4 \$2,$a,$t0 + vextracti32x4 \$2,$b,$t1 + vextracti32x4 \$2,$c,$t2 + vextracti32x4 \$2,$d,$t3 + + sub \$64,$len + jb .Ltail_avx512 + + vpxor 0x00($inp),$t0,$t0 # xor with input + vpxor 0x10($inp),$t1,$t1 + vpxor 0x20($inp),$t2,$t2 + vpxor 0x30($inp),$t3,$t3 + lea 0x40($inp),$inp # inp+=64 + + vmovdqu $t0,0x00($out) # write output + vmovdqu $t1,0x10($out) + vmovdqu $t2,0x20($out) + vmovdqu $t3,0x30($out) + lea 0x40($out),$out # out+=64 + + jz .Ldone_avx512 + + vextracti32x4 \$3,$a,$t0 + vextracti32x4 \$3,$b,$t1 + vextracti32x4 \$3,$c,$t2 + vextracti32x4 \$3,$d,$t3 + + sub \$64,$len + jb .Ltail_avx512 + + vpxor 0x00($inp),$t0,$t0 # xor with input + vpxor 0x10($inp),$t1,$t1 + vpxor 0x20($inp),$t2,$t2 + vpxor 0x30($inp),$t3,$t3 + lea 0x40($inp),$inp # inp+=64 + + vmovdqu $t0,0x00($out) # write output + vmovdqu $t1,0x10($out) + vmovdqu $t2,0x20($out) + vmovdqu $t3,0x30($out) + lea 0x40($out),$out # out+=64 + + jnz .Loop_outer_avx512 + + jmp .Ldone_avx512 + +.align 16 +.Ltail64_avx512: + vmovdqa %x#$a,0x00(%rsp) + vmovdqa %x#$b,0x10(%rsp) + vmovdqa %x#$c,0x20(%rsp) + vmovdqa %x#$d,0x30(%rsp) + add \$64,$len + jmp .Loop_tail_avx512 + +.align 16 +.Ltail_avx512: + vmovdqa $t0,0x00(%rsp) + vmovdqa $t1,0x10(%rsp) + vmovdqa $t2,0x20(%rsp) + vmovdqa $t3,0x30(%rsp) + add \$64,$len + +.Loop_tail_avx512: + movzb ($inp,$counter),%eax + movzb (%rsp,$counter),%ecx + lea 1($counter),$counter + xor %ecx,%eax + mov %al,-1($out,$counter) + dec $len + jnz .Loop_tail_avx512 + + vmovdqu32 $a_,0x00(%rsp) + +.Ldone_avx512: + vzeroall +___ +$code.=<<___ if ($win64); + movaps -0x30(%r10),%xmm6 + movaps -0x20(%r10),%xmm7 +___ +$code.=<<___; + lea -8(%r10),%rsp +.cfi_def_cfa_register %rsp +.Lavx512_epilogue: + ret +.cfi_endproc +___ +&end_function("chacha20_avx512"); + +map(s/%z/%y/, $a,$b,$c,$d, $a_,$b_,$c_,$d_,$fourz); + +&declare_function("chacha20_avx512vl", 32, 5); +$code.=<<___; +.cfi_startproc +.Lchacha20_avx512vl: + lea 8(%rsp),%r10 # frame pointer +.cfi_def_cfa_register %r10 + cmp \$128,$len + ja .Lchacha20_8xvl + + sub \$64+$xframe,%rsp + and \$-32,%rsp +___ +$code.=<<___ if ($win64); + movaps %xmm6,-0x30(%r10) + movaps %xmm7,-0x20(%r10) +.Lavx512vl_body: +___ +$code.=<<___; + vbroadcasti128 .Lsigma(%rip),$a + vbroadcasti128 ($key),$b + vbroadcasti128 16($key),$c + vbroadcasti128 ($counter),$d + + vmovdqa32 $a,$a_ + vmovdqa32 $b,$b_ + vmovdqa32 $c,$c_ + vpaddd .Lzeroz(%rip),$d,$d + vmovdqa32 .Ltwoy(%rip),$fourz + mov \$10,$counter # reuse $counter + vmovdqa32 $d,$d_ + jmp .Loop_avx512vl + +.align 16 +.Loop_outer_avx512vl: + vmovdqa32 $c_,$c + vpaddd $fourz,$d_,$d + mov \$10,$counter + vmovdqa32 $d,$d_ + jmp .Loop_avx512vl + +.align 32 +.Loop_avx512vl: +___ + &AVX512ROUND(); + &vpshufd ($c,$c,0b01001110); + &vpshufd ($b,$b,0b00111001); + &vpshufd ($d,$d,0b10010011); + + &AVX512ROUND(); + &vpshufd ($c,$c,0b01001110); + &vpshufd ($b,$b,0b10010011); + &vpshufd ($d,$d,0b00111001); + + &dec ($counter); + &jnz (".Loop_avx512vl"); + +$code.=<<___; + vpaddd $a_,$a,$a + vpaddd $b_,$b,$b + vpaddd $c_,$c,$c + vpaddd $d_,$d,$d + + sub \$64,$len + jb .Ltail64_avx512vl + + vpxor 0x00($inp),%x#$a,$t0 # xor with input + vpxor 0x10($inp),%x#$b,$t1 + vpxor 0x20($inp),%x#$c,$t2 + vpxor 0x30($inp),%x#$d,$t3 + lea 0x40($inp),$inp # inp+=64 + + vmovdqu $t0,0x00($out) # write output + vmovdqu $t1,0x10($out) + vmovdqu $t2,0x20($out) + vmovdqu $t3,0x30($out) + lea 0x40($out),$out # out+=64 + + jz .Ldone_avx512vl + + vextracti128 \$1,$a,$t0 + vextracti128 \$1,$b,$t1 + vextracti128 \$1,$c,$t2 + vextracti128 \$1,$d,$t3 + + sub \$64,$len + jb .Ltail_avx512vl + + vpxor 0x00($inp),$t0,$t0 # xor with input + vpxor 0x10($inp),$t1,$t1 + vpxor 0x20($inp),$t2,$t2 + vpxor 0x30($inp),$t3,$t3 + lea 0x40($inp),$inp # inp+=64 + + vmovdqu $t0,0x00($out) # write output + vmovdqu $t1,0x10($out) + vmovdqu $t2,0x20($out) + vmovdqu $t3,0x30($out) + lea 0x40($out),$out # out+=64 + + vmovdqa32 $a_,$a + vmovdqa32 $b_,$b + jnz .Loop_outer_avx512vl + + jmp .Ldone_avx512vl + +.align 16 +.Ltail64_avx512vl: + vmovdqa %x#$a,0x00(%rsp) + vmovdqa %x#$b,0x10(%rsp) + vmovdqa %x#$c,0x20(%rsp) + vmovdqa %x#$d,0x30(%rsp) + add \$64,$len + jmp .Loop_tail_avx512vl + +.align 16 +.Ltail_avx512vl: + vmovdqa $t0,0x00(%rsp) + vmovdqa $t1,0x10(%rsp) + vmovdqa $t2,0x20(%rsp) + vmovdqa $t3,0x30(%rsp) + add \$64,$len + +.Loop_tail_avx512vl: + movzb ($inp,$counter),%eax + movzb (%rsp,$counter),%ecx + lea 1($counter),$counter + xor %ecx,%eax + mov %al,-1($out,$counter) + dec $len + jnz .Loop_tail_avx512vl + + vmovdqu32 $a_,0x00(%rsp) + vmovdqu32 $a_,0x20(%rsp) + +.Ldone_avx512vl: + vzeroall +___ +$code.=<<___ if ($win64); + movaps -0x30(%r10),%xmm6 + movaps -0x20(%r10),%xmm7 +___ +$code.=<<___; + lea -8(%r10),%rsp +.cfi_def_cfa_register %rsp +.Lavx512vl_epilogue: + ret +.cfi_endproc +___ +&end_function("chacha20_avx512vl"); + +# This one handles longer inputs... + +my ($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3, + $xc0,$xc1,$xc2,$xc3, $xd0,$xd1,$xd2,$xd3)=map("%zmm$_",(0..15)); +my @xx=($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3, + $xc0,$xc1,$xc2,$xc3, $xd0,$xd1,$xd2,$xd3); +my @key=map("%zmm$_",(16..31)); +my ($xt0,$xt1,$xt2,$xt3)=@key[0..3]; + +sub AVX512_lane_ROUND { +my ($a0,$b0,$c0,$d0)=@_; +my ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0)); +my ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1)); +my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2)); +my @x=map("\"$_\"",@xx); + + ( + "&vpaddd (@x[$a0],@x[$a0],@x[$b0])", # Q1 + "&vpaddd (@x[$a1],@x[$a1],@x[$b1])", # Q2 + "&vpaddd (@x[$a2],@x[$a2],@x[$b2])", # Q3 + "&vpaddd (@x[$a3],@x[$a3],@x[$b3])", # Q4 + "&vpxord (@x[$d0],@x[$d0],@x[$a0])", + "&vpxord (@x[$d1],@x[$d1],@x[$a1])", + "&vpxord (@x[$d2],@x[$d2],@x[$a2])", + "&vpxord (@x[$d3],@x[$d3],@x[$a3])", + "&vprold (@x[$d0],@x[$d0],16)", + "&vprold (@x[$d1],@x[$d1],16)", + "&vprold (@x[$d2],@x[$d2],16)", + "&vprold (@x[$d3],@x[$d3],16)", + + "&vpaddd (@x[$c0],@x[$c0],@x[$d0])", + "&vpaddd (@x[$c1],@x[$c1],@x[$d1])", + "&vpaddd (@x[$c2],@x[$c2],@x[$d2])", + "&vpaddd (@x[$c3],@x[$c3],@x[$d3])", + "&vpxord (@x[$b0],@x[$b0],@x[$c0])", + "&vpxord (@x[$b1],@x[$b1],@x[$c1])", + "&vpxord (@x[$b2],@x[$b2],@x[$c2])", + "&vpxord (@x[$b3],@x[$b3],@x[$c3])", + "&vprold (@x[$b0],@x[$b0],12)", + "&vprold (@x[$b1],@x[$b1],12)", + "&vprold (@x[$b2],@x[$b2],12)", + "&vprold (@x[$b3],@x[$b3],12)", + + "&vpaddd (@x[$a0],@x[$a0],@x[$b0])", + "&vpaddd (@x[$a1],@x[$a1],@x[$b1])", + "&vpaddd (@x[$a2],@x[$a2],@x[$b2])", + "&vpaddd (@x[$a3],@x[$a3],@x[$b3])", + "&vpxord (@x[$d0],@x[$d0],@x[$a0])", + "&vpxord (@x[$d1],@x[$d1],@x[$a1])", + "&vpxord (@x[$d2],@x[$d2],@x[$a2])", + "&vpxord (@x[$d3],@x[$d3],@x[$a3])", + "&vprold (@x[$d0],@x[$d0],8)", + "&vprold (@x[$d1],@x[$d1],8)", + "&vprold (@x[$d2],@x[$d2],8)", + "&vprold (@x[$d3],@x[$d3],8)", + + "&vpaddd (@x[$c0],@x[$c0],@x[$d0])", + "&vpaddd (@x[$c1],@x[$c1],@x[$d1])", + "&vpaddd (@x[$c2],@x[$c2],@x[$d2])", + "&vpaddd (@x[$c3],@x[$c3],@x[$d3])", + "&vpxord (@x[$b0],@x[$b0],@x[$c0])", + "&vpxord (@x[$b1],@x[$b1],@x[$c1])", + "&vpxord (@x[$b2],@x[$b2],@x[$c2])", + "&vpxord (@x[$b3],@x[$b3],@x[$c3])", + "&vprold (@x[$b0],@x[$b0],7)", + "&vprold (@x[$b1],@x[$b1],7)", + "&vprold (@x[$b2],@x[$b2],7)", + "&vprold (@x[$b3],@x[$b3],7)" + ); +} + +my $xframe = $win64 ? 0xa8 : 8; + +$code.=<<___; +.type chacha20_16x,\@function,5 +.align 32 +chacha20_16x: +.cfi_startproc +.Lchacha20_16x: + lea 8(%rsp),%r10 # frame register +.cfi_def_cfa_register %r10 + sub \$64+$xframe,%rsp + and \$-64,%rsp +___ +$code.=<<___ if ($win64); + movaps %xmm6,-0xb0(%r10) + movaps %xmm7,-0xa0(%r10) + movaps %xmm8,-0x90(%r10) + movaps %xmm9,-0x80(%r10) + movaps %xmm10,-0x70(%r10) + movaps %xmm11,-0x60(%r10) + movaps %xmm12,-0x50(%r10) + movaps %xmm13,-0x40(%r10) + movaps %xmm14,-0x30(%r10) + movaps %xmm15,-0x20(%r10) +.L16x_body: +___ +$code.=<<___; + vzeroupper + + lea .Lsigma(%rip),%r9 + vbroadcasti32x4 (%r9),$xa3 # key[0] + vbroadcasti32x4 ($key),$xb3 # key[1] + vbroadcasti32x4 16($key),$xc3 # key[2] + vbroadcasti32x4 ($counter),$xd3 # key[3] + + vpshufd \$0x00,$xa3,$xa0 # smash key by lanes... + vpshufd \$0x55,$xa3,$xa1 + vpshufd \$0xaa,$xa3,$xa2 + vpshufd \$0xff,$xa3,$xa3 + vmovdqa64 $xa0,@key[0] + vmovdqa64 $xa1,@key[1] + vmovdqa64 $xa2,@key[2] + vmovdqa64 $xa3,@key[3] + + vpshufd \$0x00,$xb3,$xb0 + vpshufd \$0x55,$xb3,$xb1 + vpshufd \$0xaa,$xb3,$xb2 + vpshufd \$0xff,$xb3,$xb3 + vmovdqa64 $xb0,@key[4] + vmovdqa64 $xb1,@key[5] + vmovdqa64 $xb2,@key[6] + vmovdqa64 $xb3,@key[7] + + vpshufd \$0x00,$xc3,$xc0 + vpshufd \$0x55,$xc3,$xc1 + vpshufd \$0xaa,$xc3,$xc2 + vpshufd \$0xff,$xc3,$xc3 + vmovdqa64 $xc0,@key[8] + vmovdqa64 $xc1,@key[9] + vmovdqa64 $xc2,@key[10] + vmovdqa64 $xc3,@key[11] + + vpshufd \$0x00,$xd3,$xd0 + vpshufd \$0x55,$xd3,$xd1 + vpshufd \$0xaa,$xd3,$xd2 + vpshufd \$0xff,$xd3,$xd3 + vpaddd .Lincz(%rip),$xd0,$xd0 # don't save counters yet + vmovdqa64 $xd0,@key[12] + vmovdqa64 $xd1,@key[13] + vmovdqa64 $xd2,@key[14] + vmovdqa64 $xd3,@key[15] + + mov \$10,%eax + jmp .Loop16x + +.align 32 +.Loop_outer16x: + vpbroadcastd 0(%r9),$xa0 # reload key + vpbroadcastd 4(%r9),$xa1 + vpbroadcastd 8(%r9),$xa2 + vpbroadcastd 12(%r9),$xa3 + vpaddd .Lsixteen(%rip),@key[12],@key[12] # next SIMD counters + vmovdqa64 @key[4],$xb0 + vmovdqa64 @key[5],$xb1 + vmovdqa64 @key[6],$xb2 + vmovdqa64 @key[7],$xb3 + vmovdqa64 @key[8],$xc0 + vmovdqa64 @key[9],$xc1 + vmovdqa64 @key[10],$xc2 + vmovdqa64 @key[11],$xc3 + vmovdqa64 @key[12],$xd0 + vmovdqa64 @key[13],$xd1 + vmovdqa64 @key[14],$xd2 + vmovdqa64 @key[15],$xd3 + + vmovdqa64 $xa0,@key[0] + vmovdqa64 $xa1,@key[1] + vmovdqa64 $xa2,@key[2] + vmovdqa64 $xa3,@key[3] + + mov \$10,%eax + jmp .Loop16x + +.align 32 +.Loop16x: +___ + foreach (&AVX512_lane_ROUND(0, 4, 8,12)) { eval; } + foreach (&AVX512_lane_ROUND(0, 5,10,15)) { eval; } +$code.=<<___; + dec %eax + jnz .Loop16x + + vpaddd @key[0],$xa0,$xa0 # accumulate key + vpaddd @key[1],$xa1,$xa1 + vpaddd @key[2],$xa2,$xa2 + vpaddd @key[3],$xa3,$xa3 + + vpunpckldq $xa1,$xa0,$xt2 # "de-interlace" data + vpunpckldq $xa3,$xa2,$xt3 + vpunpckhdq $xa1,$xa0,$xa0 + vpunpckhdq $xa3,$xa2,$xa2 + vpunpcklqdq $xt3,$xt2,$xa1 # "a0" + vpunpckhqdq $xt3,$xt2,$xt2 # "a1" + vpunpcklqdq $xa2,$xa0,$xa3 # "a2" + vpunpckhqdq $xa2,$xa0,$xa0 # "a3" +___ + ($xa0,$xa1,$xa2,$xa3,$xt2)=($xa1,$xt2,$xa3,$xa0,$xa2); +$code.=<<___; + vpaddd @key[4],$xb0,$xb0 + vpaddd @key[5],$xb1,$xb1 + vpaddd @key[6],$xb2,$xb2 + vpaddd @key[7],$xb3,$xb3 + + vpunpckldq $xb1,$xb0,$xt2 + vpunpckldq $xb3,$xb2,$xt3 + vpunpckhdq $xb1,$xb0,$xb0 + vpunpckhdq $xb3,$xb2,$xb2 + vpunpcklqdq $xt3,$xt2,$xb1 # "b0" + vpunpckhqdq $xt3,$xt2,$xt2 # "b1" + vpunpcklqdq $xb2,$xb0,$xb3 # "b2" + vpunpckhqdq $xb2,$xb0,$xb0 # "b3" +___ + ($xb0,$xb1,$xb2,$xb3,$xt2)=($xb1,$xt2,$xb3,$xb0,$xb2); +$code.=<<___; + vshufi32x4 \$0x44,$xb0,$xa0,$xt3 # "de-interlace" further + vshufi32x4 \$0xee,$xb0,$xa0,$xb0 + vshufi32x4 \$0x44,$xb1,$xa1,$xa0 + vshufi32x4 \$0xee,$xb1,$xa1,$xb1 + vshufi32x4 \$0x44,$xb2,$xa2,$xa1 + vshufi32x4 \$0xee,$xb2,$xa2,$xb2 + vshufi32x4 \$0x44,$xb3,$xa3,$xa2 + vshufi32x4 \$0xee,$xb3,$xa3,$xb3 +___ + ($xa0,$xa1,$xa2,$xa3,$xt3)=($xt3,$xa0,$xa1,$xa2,$xa3); +$code.=<<___; + vpaddd @key[8],$xc0,$xc0 + vpaddd @key[9],$xc1,$xc1 + vpaddd @key[10],$xc2,$xc2 + vpaddd @key[11],$xc3,$xc3 + + vpunpckldq $xc1,$xc0,$xt2 + vpunpckldq $xc3,$xc2,$xt3 + vpunpckhdq $xc1,$xc0,$xc0 + vpunpckhdq $xc3,$xc2,$xc2 + vpunpcklqdq $xt3,$xt2,$xc1 # "c0" + vpunpckhqdq $xt3,$xt2,$xt2 # "c1" + vpunpcklqdq $xc2,$xc0,$xc3 # "c2" + vpunpckhqdq $xc2,$xc0,$xc0 # "c3" +___ + ($xc0,$xc1,$xc2,$xc3,$xt2)=($xc1,$xt2,$xc3,$xc0,$xc2); +$code.=<<___; + vpaddd @key[12],$xd0,$xd0 + vpaddd @key[13],$xd1,$xd1 + vpaddd @key[14],$xd2,$xd2 + vpaddd @key[15],$xd3,$xd3 + + vpunpckldq $xd1,$xd0,$xt2 + vpunpckldq $xd3,$xd2,$xt3 + vpunpckhdq $xd1,$xd0,$xd0 + vpunpckhdq $xd3,$xd2,$xd2 + vpunpcklqdq $xt3,$xt2,$xd1 # "d0" + vpunpckhqdq $xt3,$xt2,$xt2 # "d1" + vpunpcklqdq $xd2,$xd0,$xd3 # "d2" + vpunpckhqdq $xd2,$xd0,$xd0 # "d3" +___ + ($xd0,$xd1,$xd2,$xd3,$xt2)=($xd1,$xt2,$xd3,$xd0,$xd2); +$code.=<<___; + vshufi32x4 \$0x44,$xd0,$xc0,$xt3 # "de-interlace" further + vshufi32x4 \$0xee,$xd0,$xc0,$xd0 + vshufi32x4 \$0x44,$xd1,$xc1,$xc0 + vshufi32x4 \$0xee,$xd1,$xc1,$xd1 + vshufi32x4 \$0x44,$xd2,$xc2,$xc1 + vshufi32x4 \$0xee,$xd2,$xc2,$xd2 + vshufi32x4 \$0x44,$xd3,$xc3,$xc2 + vshufi32x4 \$0xee,$xd3,$xc3,$xd3 +___ + ($xc0,$xc1,$xc2,$xc3,$xt3)=($xt3,$xc0,$xc1,$xc2,$xc3); +$code.=<<___; + vshufi32x4 \$0x88,$xc0,$xa0,$xt0 # "de-interlace" further + vshufi32x4 \$0xdd,$xc0,$xa0,$xa0 + vshufi32x4 \$0x88,$xd0,$xb0,$xc0 + vshufi32x4 \$0xdd,$xd0,$xb0,$xd0 + vshufi32x4 \$0x88,$xc1,$xa1,$xt1 + vshufi32x4 \$0xdd,$xc1,$xa1,$xa1 + vshufi32x4 \$0x88,$xd1,$xb1,$xc1 + vshufi32x4 \$0xdd,$xd1,$xb1,$xd1 + vshufi32x4 \$0x88,$xc2,$xa2,$xt2 + vshufi32x4 \$0xdd,$xc2,$xa2,$xa2 + vshufi32x4 \$0x88,$xd2,$xb2,$xc2 + vshufi32x4 \$0xdd,$xd2,$xb2,$xd2 + vshufi32x4 \$0x88,$xc3,$xa3,$xt3 + vshufi32x4 \$0xdd,$xc3,$xa3,$xa3 + vshufi32x4 \$0x88,$xd3,$xb3,$xc3 + vshufi32x4 \$0xdd,$xd3,$xb3,$xd3 +___ + ($xa0,$xa1,$xa2,$xa3,$xb0,$xb1,$xb2,$xb3)= + ($xt0,$xt1,$xt2,$xt3,$xa0,$xa1,$xa2,$xa3); + + ($xa0,$xb0,$xc0,$xd0, $xa1,$xb1,$xc1,$xd1, + $xa2,$xb2,$xc2,$xd2, $xa3,$xb3,$xc3,$xd3) = + ($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3, + $xc0,$xc1,$xc2,$xc3, $xd0,$xd1,$xd2,$xd3); +$code.=<<___; + cmp \$64*16,$len + jb .Ltail16x + + vpxord 0x00($inp),$xa0,$xa0 # xor with input + vpxord 0x40($inp),$xb0,$xb0 + vpxord 0x80($inp),$xc0,$xc0 + vpxord 0xc0($inp),$xd0,$xd0 + vmovdqu32 $xa0,0x00($out) + vmovdqu32 $xb0,0x40($out) + vmovdqu32 $xc0,0x80($out) + vmovdqu32 $xd0,0xc0($out) + + vpxord 0x100($inp),$xa1,$xa1 + vpxord 0x140($inp),$xb1,$xb1 + vpxord 0x180($inp),$xc1,$xc1 + vpxord 0x1c0($inp),$xd1,$xd1 + vmovdqu32 $xa1,0x100($out) + vmovdqu32 $xb1,0x140($out) + vmovdqu32 $xc1,0x180($out) + vmovdqu32 $xd1,0x1c0($out) + + vpxord 0x200($inp),$xa2,$xa2 + vpxord 0x240($inp),$xb2,$xb2 + vpxord 0x280($inp),$xc2,$xc2 + vpxord 0x2c0($inp),$xd2,$xd2 + vmovdqu32 $xa2,0x200($out) + vmovdqu32 $xb2,0x240($out) + vmovdqu32 $xc2,0x280($out) + vmovdqu32 $xd2,0x2c0($out) + + vpxord 0x300($inp),$xa3,$xa3 + vpxord 0x340($inp),$xb3,$xb3 + vpxord 0x380($inp),$xc3,$xc3 + vpxord 0x3c0($inp),$xd3,$xd3 + lea 0x400($inp),$inp + vmovdqu32 $xa3,0x300($out) + vmovdqu32 $xb3,0x340($out) + vmovdqu32 $xc3,0x380($out) + vmovdqu32 $xd3,0x3c0($out) + lea 0x400($out),$out + + sub \$64*16,$len + jnz .Loop_outer16x + + jmp .Ldone16x + +.align 32 +.Ltail16x: + xor %r9,%r9 + sub $inp,$out + cmp \$64*1,$len + jb .Less_than_64_16x + vpxord ($inp),$xa0,$xa0 # xor with input + vmovdqu32 $xa0,($out,$inp) + je .Ldone16x + vmovdqa32 $xb0,$xa0 + lea 64($inp),$inp + + cmp \$64*2,$len + jb .Less_than_64_16x + vpxord ($inp),$xb0,$xb0 + vmovdqu32 $xb0,($out,$inp) + je .Ldone16x + vmovdqa32 $xc0,$xa0 + lea 64($inp),$inp + + cmp \$64*3,$len + jb .Less_than_64_16x + vpxord ($inp),$xc0,$xc0 + vmovdqu32 $xc0,($out,$inp) + je .Ldone16x + vmovdqa32 $xd0,$xa0 + lea 64($inp),$inp + + cmp \$64*4,$len + jb .Less_than_64_16x + vpxord ($inp),$xd0,$xd0 + vmovdqu32 $xd0,($out,$inp) + je .Ldone16x + vmovdqa32 $xa1,$xa0 + lea 64($inp),$inp + + cmp \$64*5,$len + jb .Less_than_64_16x + vpxord ($inp),$xa1,$xa1 + vmovdqu32 $xa1,($out,$inp) + je .Ldone16x + vmovdqa32 $xb1,$xa0 + lea 64($inp),$inp + + cmp \$64*6,$len + jb .Less_than_64_16x + vpxord ($inp),$xb1,$xb1 + vmovdqu32 $xb1,($out,$inp) + je .Ldone16x + vmovdqa32 $xc1,$xa0 + lea 64($inp),$inp + + cmp \$64*7,$len + jb .Less_than_64_16x + vpxord ($inp),$xc1,$xc1 + vmovdqu32 $xc1,($out,$inp) + je .Ldone16x + vmovdqa32 $xd1,$xa0 + lea 64($inp),$inp + + cmp \$64*8,$len + jb .Less_than_64_16x + vpxord ($inp),$xd1,$xd1 + vmovdqu32 $xd1,($out,$inp) + je .Ldone16x + vmovdqa32 $xa2,$xa0 + lea 64($inp),$inp + + cmp \$64*9,$len + jb .Less_than_64_16x + vpxord ($inp),$xa2,$xa2 + vmovdqu32 $xa2,($out,$inp) + je .Ldone16x + vmovdqa32 $xb2,$xa0 + lea 64($inp),$inp + + cmp \$64*10,$len + jb .Less_than_64_16x + vpxord ($inp),$xb2,$xb2 + vmovdqu32 $xb2,($out,$inp) + je .Ldone16x + vmovdqa32 $xc2,$xa0 + lea 64($inp),$inp + + cmp \$64*11,$len + jb .Less_than_64_16x + vpxord ($inp),$xc2,$xc2 + vmovdqu32 $xc2,($out,$inp) + je .Ldone16x + vmovdqa32 $xd2,$xa0 + lea 64($inp),$inp + + cmp \$64*12,$len + jb .Less_than_64_16x + vpxord ($inp),$xd2,$xd2 + vmovdqu32 $xd2,($out,$inp) + je .Ldone16x + vmovdqa32 $xa3,$xa0 + lea 64($inp),$inp + + cmp \$64*13,$len + jb .Less_than_64_16x + vpxord ($inp),$xa3,$xa3 + vmovdqu32 $xa3,($out,$inp) + je .Ldone16x + vmovdqa32 $xb3,$xa0 + lea 64($inp),$inp + + cmp \$64*14,$len + jb .Less_than_64_16x + vpxord ($inp),$xb3,$xb3 + vmovdqu32 $xb3,($out,$inp) + je .Ldone16x + vmovdqa32 $xc3,$xa0 + lea 64($inp),$inp + + cmp \$64*15,$len + jb .Less_than_64_16x + vpxord ($inp),$xc3,$xc3 + vmovdqu32 $xc3,($out,$inp) + je .Ldone16x + vmovdqa32 $xd3,$xa0 + lea 64($inp),$inp + +.Less_than_64_16x: + vmovdqa32 $xa0,0x00(%rsp) + lea ($out,$inp),$out + and \$63,$len + +.Loop_tail16x: + movzb ($inp,%r9),%eax + movzb (%rsp,%r9),%ecx + lea 1(%r9),%r9 + xor %ecx,%eax + mov %al,-1($out,%r9) + dec $len + jnz .Loop_tail16x + + vpxord $xa0,$xa0,$xa0 + vmovdqa32 $xa0,0(%rsp) + +.Ldone16x: + vzeroall +___ +$code.=<<___ if ($win64); + movaps -0xb0(%r10),%xmm6 + movaps -0xa0(%r10),%xmm7 + movaps -0x90(%r10),%xmm8 + movaps -0x80(%r10),%xmm9 + movaps -0x70(%r10),%xmm10 + movaps -0x60(%r10),%xmm11 + movaps -0x50(%r10),%xmm12 + movaps -0x40(%r10),%xmm13 + movaps -0x30(%r10),%xmm14 + movaps -0x20(%r10),%xmm15 +___ +$code.=<<___; + lea -8(%r10),%rsp +.cfi_def_cfa_register %rsp +.L16x_epilogue: + ret +.cfi_endproc +.size chacha20_16x,.-chacha20_16x +___ + +# switch to %ymm domain +($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3, + $xc0,$xc1,$xc2,$xc3, $xd0,$xd1,$xd2,$xd3)=map("%ymm$_",(0..15)); +@xx=($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3, + $xc0,$xc1,$xc2,$xc3, $xd0,$xd1,$xd2,$xd3); +@key=map("%ymm$_",(16..31)); +($xt0,$xt1,$xt2,$xt3)=@key[0..3]; + +$code.=<<___; +.type chacha20_8xvl,\@function,5 +.align 32 +chacha20_8xvl: +.cfi_startproc +.Lchacha20_8xvl: + lea 8(%rsp),%r10 # frame register +.cfi_def_cfa_register %r10 + sub \$64+$xframe,%rsp + and \$-64,%rsp +___ +$code.=<<___ if ($win64); + movaps %xmm6,-0xb0(%r10) + movaps %xmm7,-0xa0(%r10) + movaps %xmm8,-0x90(%r10) + movaps %xmm9,-0x80(%r10) + movaps %xmm10,-0x70(%r10) + movaps %xmm11,-0x60(%r10) + movaps %xmm12,-0x50(%r10) + movaps %xmm13,-0x40(%r10) + movaps %xmm14,-0x30(%r10) + movaps %xmm15,-0x20(%r10) +.L8xvl_body: +___ +$code.=<<___; + vzeroupper + + lea .Lsigma(%rip),%r9 + vbroadcasti128 (%r9),$xa3 # key[0] + vbroadcasti128 ($key),$xb3 # key[1] + vbroadcasti128 16($key),$xc3 # key[2] + vbroadcasti128 ($counter),$xd3 # key[3] + + vpshufd \$0x00,$xa3,$xa0 # smash key by lanes... + vpshufd \$0x55,$xa3,$xa1 + vpshufd \$0xaa,$xa3,$xa2 + vpshufd \$0xff,$xa3,$xa3 + vmovdqa64 $xa0,@key[0] + vmovdqa64 $xa1,@key[1] + vmovdqa64 $xa2,@key[2] + vmovdqa64 $xa3,@key[3] + + vpshufd \$0x00,$xb3,$xb0 + vpshufd \$0x55,$xb3,$xb1 + vpshufd \$0xaa,$xb3,$xb2 + vpshufd \$0xff,$xb3,$xb3 + vmovdqa64 $xb0,@key[4] + vmovdqa64 $xb1,@key[5] + vmovdqa64 $xb2,@key[6] + vmovdqa64 $xb3,@key[7] + + vpshufd \$0x00,$xc3,$xc0 + vpshufd \$0x55,$xc3,$xc1 + vpshufd \$0xaa,$xc3,$xc2 + vpshufd \$0xff,$xc3,$xc3 + vmovdqa64 $xc0,@key[8] + vmovdqa64 $xc1,@key[9] + vmovdqa64 $xc2,@key[10] + vmovdqa64 $xc3,@key[11] + + vpshufd \$0x00,$xd3,$xd0 + vpshufd \$0x55,$xd3,$xd1 + vpshufd \$0xaa,$xd3,$xd2 + vpshufd \$0xff,$xd3,$xd3 + vpaddd .Lincy(%rip),$xd0,$xd0 # don't save counters yet + vmovdqa64 $xd0,@key[12] + vmovdqa64 $xd1,@key[13] + vmovdqa64 $xd2,@key[14] + vmovdqa64 $xd3,@key[15] + + mov \$10,%eax + jmp .Loop8xvl + +.align 32 +.Loop_outer8xvl: + #vpbroadcastd 0(%r9),$xa0 # reload key + #vpbroadcastd 4(%r9),$xa1 + vpbroadcastd 8(%r9),$xa2 + vpbroadcastd 12(%r9),$xa3 + vpaddd .Leight(%rip),@key[12],@key[12] # next SIMD counters + vmovdqa64 @key[4],$xb0 + vmovdqa64 @key[5],$xb1 + vmovdqa64 @key[6],$xb2 + vmovdqa64 @key[7],$xb3 + vmovdqa64 @key[8],$xc0 + vmovdqa64 @key[9],$xc1 + vmovdqa64 @key[10],$xc2 + vmovdqa64 @key[11],$xc3 + vmovdqa64 @key[12],$xd0 + vmovdqa64 @key[13],$xd1 + vmovdqa64 @key[14],$xd2 + vmovdqa64 @key[15],$xd3 + + vmovdqa64 $xa0,@key[0] + vmovdqa64 $xa1,@key[1] + vmovdqa64 $xa2,@key[2] + vmovdqa64 $xa3,@key[3] + + mov \$10,%eax + jmp .Loop8xvl + +.align 32 +.Loop8xvl: +___ + foreach (&AVX512_lane_ROUND(0, 4, 8,12)) { eval; } + foreach (&AVX512_lane_ROUND(0, 5,10,15)) { eval; } +$code.=<<___; + dec %eax + jnz .Loop8xvl + + vpaddd @key[0],$xa0,$xa0 # accumulate key + vpaddd @key[1],$xa1,$xa1 + vpaddd @key[2],$xa2,$xa2 + vpaddd @key[3],$xa3,$xa3 + + vpunpckldq $xa1,$xa0,$xt2 # "de-interlace" data + vpunpckldq $xa3,$xa2,$xt3 + vpunpckhdq $xa1,$xa0,$xa0 + vpunpckhdq $xa3,$xa2,$xa2 + vpunpcklqdq $xt3,$xt2,$xa1 # "a0" + vpunpckhqdq $xt3,$xt2,$xt2 # "a1" + vpunpcklqdq $xa2,$xa0,$xa3 # "a2" + vpunpckhqdq $xa2,$xa0,$xa0 # "a3" +___ + ($xa0,$xa1,$xa2,$xa3,$xt2)=($xa1,$xt2,$xa3,$xa0,$xa2); +$code.=<<___; + vpaddd @key[4],$xb0,$xb0 + vpaddd @key[5],$xb1,$xb1 + vpaddd @key[6],$xb2,$xb2 + vpaddd @key[7],$xb3,$xb3 + + vpunpckldq $xb1,$xb0,$xt2 + vpunpckldq $xb3,$xb2,$xt3 + vpunpckhdq $xb1,$xb0,$xb0 + vpunpckhdq $xb3,$xb2,$xb2 + vpunpcklqdq $xt3,$xt2,$xb1 # "b0" + vpunpckhqdq $xt3,$xt2,$xt2 # "b1" + vpunpcklqdq $xb2,$xb0,$xb3 # "b2" + vpunpckhqdq $xb2,$xb0,$xb0 # "b3" +___ + ($xb0,$xb1,$xb2,$xb3,$xt2)=($xb1,$xt2,$xb3,$xb0,$xb2); +$code.=<<___; + vshufi32x4 \$0,$xb0,$xa0,$xt3 # "de-interlace" further + vshufi32x4 \$3,$xb0,$xa0,$xb0 + vshufi32x4 \$0,$xb1,$xa1,$xa0 + vshufi32x4 \$3,$xb1,$xa1,$xb1 + vshufi32x4 \$0,$xb2,$xa2,$xa1 + vshufi32x4 \$3,$xb2,$xa2,$xb2 + vshufi32x4 \$0,$xb3,$xa3,$xa2 + vshufi32x4 \$3,$xb3,$xa3,$xb3 +___ + ($xa0,$xa1,$xa2,$xa3,$xt3)=($xt3,$xa0,$xa1,$xa2,$xa3); +$code.=<<___; + vpaddd @key[8],$xc0,$xc0 + vpaddd @key[9],$xc1,$xc1 + vpaddd @key[10],$xc2,$xc2 + vpaddd @key[11],$xc3,$xc3 + + vpunpckldq $xc1,$xc0,$xt2 + vpunpckldq $xc3,$xc2,$xt3 + vpunpckhdq $xc1,$xc0,$xc0 + vpunpckhdq $xc3,$xc2,$xc2 + vpunpcklqdq $xt3,$xt2,$xc1 # "c0" + vpunpckhqdq $xt3,$xt2,$xt2 # "c1" + vpunpcklqdq $xc2,$xc0,$xc3 # "c2" + vpunpckhqdq $xc2,$xc0,$xc0 # "c3" +___ + ($xc0,$xc1,$xc2,$xc3,$xt2)=($xc1,$xt2,$xc3,$xc0,$xc2); +$code.=<<___; + vpaddd @key[12],$xd0,$xd0 + vpaddd @key[13],$xd1,$xd1 + vpaddd @key[14],$xd2,$xd2 + vpaddd @key[15],$xd3,$xd3 + + vpunpckldq $xd1,$xd0,$xt2 + vpunpckldq $xd3,$xd2,$xt3 + vpunpckhdq $xd1,$xd0,$xd0 + vpunpckhdq $xd3,$xd2,$xd2 + vpunpcklqdq $xt3,$xt2,$xd1 # "d0" + vpunpckhqdq $xt3,$xt2,$xt2 # "d1" + vpunpcklqdq $xd2,$xd0,$xd3 # "d2" + vpunpckhqdq $xd2,$xd0,$xd0 # "d3" +___ + ($xd0,$xd1,$xd2,$xd3,$xt2)=($xd1,$xt2,$xd3,$xd0,$xd2); +$code.=<<___; + vperm2i128 \$0x20,$xd0,$xc0,$xt3 # "de-interlace" further + vperm2i128 \$0x31,$xd0,$xc0,$xd0 + vperm2i128 \$0x20,$xd1,$xc1,$xc0 + vperm2i128 \$0x31,$xd1,$xc1,$xd1 + vperm2i128 \$0x20,$xd2,$xc2,$xc1 + vperm2i128 \$0x31,$xd2,$xc2,$xd2 + vperm2i128 \$0x20,$xd3,$xc3,$xc2 + vperm2i128 \$0x31,$xd3,$xc3,$xd3 +___ + ($xc0,$xc1,$xc2,$xc3,$xt3)=($xt3,$xc0,$xc1,$xc2,$xc3); + ($xb0,$xb1,$xb2,$xb3,$xc0,$xc1,$xc2,$xc3)= + ($xc0,$xc1,$xc2,$xc3,$xb0,$xb1,$xb2,$xb3); +$code.=<<___; + cmp \$64*8,$len + jb .Ltail8xvl + + mov \$0x80,%eax # size optimization + vpxord 0x00($inp),$xa0,$xa0 # xor with input + vpxor 0x20($inp),$xb0,$xb0 + vpxor 0x40($inp),$xc0,$xc0 + vpxor 0x60($inp),$xd0,$xd0 + lea ($inp,%rax),$inp # size optimization + vmovdqu32 $xa0,0x00($out) + vmovdqu $xb0,0x20($out) + vmovdqu $xc0,0x40($out) + vmovdqu $xd0,0x60($out) + lea ($out,%rax),$out # size optimization + + vpxor 0x00($inp),$xa1,$xa1 + vpxor 0x20($inp),$xb1,$xb1 + vpxor 0x40($inp),$xc1,$xc1 + vpxor 0x60($inp),$xd1,$xd1 + lea ($inp,%rax),$inp # size optimization + vmovdqu $xa1,0x00($out) + vmovdqu $xb1,0x20($out) + vmovdqu $xc1,0x40($out) + vmovdqu $xd1,0x60($out) + lea ($out,%rax),$out # size optimization + + vpxord 0x00($inp),$xa2,$xa2 + vpxor 0x20($inp),$xb2,$xb2 + vpxor 0x40($inp),$xc2,$xc2 + vpxor 0x60($inp),$xd2,$xd2 + lea ($inp,%rax),$inp # size optimization + vmovdqu32 $xa2,0x00($out) + vmovdqu $xb2,0x20($out) + vmovdqu $xc2,0x40($out) + vmovdqu $xd2,0x60($out) + lea ($out,%rax),$out # size optimization + + vpxor 0x00($inp),$xa3,$xa3 + vpxor 0x20($inp),$xb3,$xb3 + vpxor 0x40($inp),$xc3,$xc3 + vpxor 0x60($inp),$xd3,$xd3 + lea ($inp,%rax),$inp # size optimization + vmovdqu $xa3,0x00($out) + vmovdqu $xb3,0x20($out) + vmovdqu $xc3,0x40($out) + vmovdqu $xd3,0x60($out) + lea ($out,%rax),$out # size optimization + + vpbroadcastd 0(%r9),%ymm0 # reload key + vpbroadcastd 4(%r9),%ymm1 + + sub \$64*8,$len + jnz .Loop_outer8xvl + + jmp .Ldone8xvl + +.align 32 +.Ltail8xvl: + vmovdqa64 $xa0,%ymm8 # size optimization +___ +$xa0 = "%ymm8"; +$code.=<<___; + xor %r9,%r9 + sub $inp,$out + cmp \$64*1,$len + jb .Less_than_64_8xvl + vpxor 0x00($inp),$xa0,$xa0 # xor with input + vpxor 0x20($inp),$xb0,$xb0 + vmovdqu $xa0,0x00($out,$inp) + vmovdqu $xb0,0x20($out,$inp) + je .Ldone8xvl + vmovdqa $xc0,$xa0 + vmovdqa $xd0,$xb0 + lea 64($inp),$inp + + cmp \$64*2,$len + jb .Less_than_64_8xvl + vpxor 0x00($inp),$xc0,$xc0 + vpxor 0x20($inp),$xd0,$xd0 + vmovdqu $xc0,0x00($out,$inp) + vmovdqu $xd0,0x20($out,$inp) + je .Ldone8xvl + vmovdqa $xa1,$xa0 + vmovdqa $xb1,$xb0 + lea 64($inp),$inp + + cmp \$64*3,$len + jb .Less_than_64_8xvl + vpxor 0x00($inp),$xa1,$xa1 + vpxor 0x20($inp),$xb1,$xb1 + vmovdqu $xa1,0x00($out,$inp) + vmovdqu $xb1,0x20($out,$inp) + je .Ldone8xvl + vmovdqa $xc1,$xa0 + vmovdqa $xd1,$xb0 + lea 64($inp),$inp + + cmp \$64*4,$len + jb .Less_than_64_8xvl + vpxor 0x00($inp),$xc1,$xc1 + vpxor 0x20($inp),$xd1,$xd1 + vmovdqu $xc1,0x00($out,$inp) + vmovdqu $xd1,0x20($out,$inp) + je .Ldone8xvl + vmovdqa32 $xa2,$xa0 + vmovdqa $xb2,$xb0 + lea 64($inp),$inp + + cmp \$64*5,$len + jb .Less_than_64_8xvl + vpxord 0x00($inp),$xa2,$xa2 + vpxor 0x20($inp),$xb2,$xb2 + vmovdqu32 $xa2,0x00($out,$inp) + vmovdqu $xb2,0x20($out,$inp) + je .Ldone8xvl + vmovdqa $xc2,$xa0 + vmovdqa $xd2,$xb0 + lea 64($inp),$inp + + cmp \$64*6,$len + jb .Less_than_64_8xvl + vpxor 0x00($inp),$xc2,$xc2 + vpxor 0x20($inp),$xd2,$xd2 + vmovdqu $xc2,0x00($out,$inp) + vmovdqu $xd2,0x20($out,$inp) + je .Ldone8xvl + vmovdqa $xa3,$xa0 + vmovdqa $xb3,$xb0 + lea 64($inp),$inp + + cmp \$64*7,$len + jb .Less_than_64_8xvl + vpxor 0x00($inp),$xa3,$xa3 + vpxor 0x20($inp),$xb3,$xb3 + vmovdqu $xa3,0x00($out,$inp) + vmovdqu $xb3,0x20($out,$inp) + je .Ldone8xvl + vmovdqa $xc3,$xa0 + vmovdqa $xd3,$xb0 + lea 64($inp),$inp + +.Less_than_64_8xvl: + vmovdqa $xa0,0x00(%rsp) + vmovdqa $xb0,0x20(%rsp) + lea ($out,$inp),$out + and \$63,$len + +.Loop_tail8xvl: + movzb ($inp,%r9),%eax + movzb (%rsp,%r9),%ecx + lea 1(%r9),%r9 + xor %ecx,%eax + mov %al,-1($out,%r9) + dec $len + jnz .Loop_tail8xvl + + vpxor $xa0,$xa0,$xa0 + vmovdqa $xa0,0x00(%rsp) + vmovdqa $xa0,0x20(%rsp) + +.Ldone8xvl: + vzeroall +___ +$code.=<<___ if ($win64); + movaps -0xb0(%r10),%xmm6 + movaps -0xa0(%r10),%xmm7 + movaps -0x90(%r10),%xmm8 + movaps -0x80(%r10),%xmm9 + movaps -0x70(%r10),%xmm10 + movaps -0x60(%r10),%xmm11 + movaps -0x50(%r10),%xmm12 + movaps -0x40(%r10),%xmm13 + movaps -0x30(%r10),%xmm14 + movaps -0x20(%r10),%xmm15 +___ +$code.=<<___; + lea -8(%r10),%rsp +.cfi_def_cfa_register %rsp +.L8xvl_epilogue: + ret +.cfi_endproc +.size chacha20_8xvl,.-chacha20_8xvl +___ +if($kernel) { + $code .= "#endif\n"; +} +} + +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +if ($win64) { +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind +.type se_handler,\@abi-omnipotent +.align 16 +se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + lea .Lctr32_body(%rip),%r10 + cmp %r10,%rbx # context->Rip<.Lprologue + jb .Lcommon_seh_tail + + mov 152($context),%rax # pull context->Rsp + + lea .Lno_data(%rip),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=.Lepilogue + jae .Lcommon_seh_tail + + lea 64+24+48(%rax),%rax + + mov -8(%rax),%rbx + mov -16(%rax),%rbp + mov -24(%rax),%r12 + mov -32(%rax),%r13 + mov -40(%rax),%r14 + mov -48(%rax),%r15 + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R14 + +.Lcommon_seh_tail: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size se_handler,.-se_handler + +.type simd_handler,\@abi-omnipotent +.align 16 +simd_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # prologue label + cmp %r10,%rbx # context->RipR9 + + mov 4(%r11),%r10d # HandlerData[1] + mov 8(%r11),%ecx # HandlerData[2] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lcommon_seh_tail + + neg %rcx + lea -8(%rax,%rcx),%rsi + lea 512($context),%rdi # &context.Xmm6 + neg %ecx + shr \$3,%ecx + .long 0xa548f3fc # cld; rep movsq + + jmp .Lcommon_seh_tail +.size simd_handler,.-simd_handler + +.section .pdata +.align 4 + .rva .LSEH_begin_chacha20_ctr32 + .rva .LSEH_end_chacha20_ctr32 + .rva .LSEH_info_chacha20_ctr32 + + .rva .LSEH_begin_chacha20_ssse3 + .rva .LSEH_end_chacha20_ssse3 + .rva .LSEH_info_chacha20_ssse3 + + .rva .LSEH_begin_chacha20_128 + .rva .LSEH_end_chacha20_128 + .rva .LSEH_info_chacha20_128 + + .rva .LSEH_begin_chacha20_4x + .rva .LSEH_end_chacha20_4x + .rva .LSEH_info_chacha20_4x +___ +$code.=<<___ if ($avx); + .rva .LSEH_begin_chacha20_xop + .rva .LSEH_end_chacha20_xop + .rva .LSEH_info_chacha20_xop +___ +$code.=<<___ if ($avx>1); + .rva .LSEH_begin_chacha20_avx2 + .rva .LSEH_end_chacha20_avx2 + .rva .LSEH_info_chacha20_avx2 +___ +$code.=<<___ if ($avx>2); + .rva .LSEH_begin_chacha20_avx512 + .rva .LSEH_end_chacha20_avx512 + .rva .LSEH_info_chacha20_avx512 + + .rva .LSEH_begin_chacha20_avx512vl + .rva .LSEH_end_chacha20_avx512vl + .rva .LSEH_info_chacha20_avx512vl + + .rva .LSEH_begin_chacha20_16x + .rva .LSEH_end_chacha20_16x + .rva .LSEH_info_chacha20_16x + + .rva .LSEH_begin_chacha20_8xvl + .rva .LSEH_end_chacha20_8xvl + .rva .LSEH_info_chacha20_8xvl +___ +$code.=<<___; +.section .xdata +.align 8 +.LSEH_info_chacha20_ctr32: + .byte 9,0,0,0 + .rva se_handler + +.LSEH_info_chacha20_ssse3: + .byte 9,0,0,0 + .rva simd_handler + .rva .Lssse3_body,.Lssse3_epilogue + .long 0x20,0 + +.LSEH_info_chacha20_128: + .byte 9,0,0,0 + .rva simd_handler + .rva .L128_body,.L128_epilogue + .long 0x60,0 + +.LSEH_info_chacha20_4x: + .byte 9,0,0,0 + .rva simd_handler + .rva .L4x_body,.L4x_epilogue + .long 0xa0,0 +___ +$code.=<<___ if ($avx); +.LSEH_info_chacha20_xop: + .byte 9,0,0,0 + .rva simd_handler + .rva .L4xop_body,.L4xop_epilogue # HandlerData[] + .long 0xa0,0 +___ +$code.=<<___ if ($avx>1); +.LSEH_info_chacha20_avx2: + .byte 9,0,0,0 + .rva simd_handler + .rva .L8x_body,.L8x_epilogue # HandlerData[] + .long 0xa0,0 +___ +$code.=<<___ if ($avx>2); +.LSEH_info_chacha20_avx512: + .byte 9,0,0,0 + .rva simd_handler + .rva .Lavx512_body,.Lavx512_epilogue # HandlerData[] + .long 0x20,0 + +.LSEH_info_chacha20_avx512vl: + .byte 9,0,0,0 + .rva simd_handler + .rva .Lavx512vl_body,.Lavx512vl_epilogue # HandlerData[] + .long 0x20,0 + +.LSEH_info_chacha20_16x: + .byte 9,0,0,0 + .rva simd_handler + .rva .L16x_body,.L16x_epilogue # HandlerData[] + .long 0xa0,0 + +.LSEH_info_chacha20_8xvl: + .byte 9,0,0,0 + .rva simd_handler + .rva .L8xvl_body,.L8xvl_epilogue # HandlerData[] + .long 0xa0,0 +___ +} + +open SELF,$0; +while() { + next if (/^#!/); + last if (!s/^#/\/\// and !/^$/); + print; +} +close SELF; + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/ge; + + s/%x#%[yz]/%x/g; # "down-shift" + + if ($kernel) { + s/(^\.type.*),[0-9]+$/\1/; + next if /^\.cfi.*/; + } + + print $_,"\n"; +} + +close STDOUT; diff --git a/net/wireguard/crypto/zinc/chacha20/chacha20.c b/net/wireguard/crypto/zinc/chacha20/chacha20.c new file mode 100644 index 000000000000..f4ca8b09d01e --- /dev/null +++ b/net/wireguard/crypto/zinc/chacha20/chacha20.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * + * Implementation of the ChaCha20 stream cipher. + * + * Information: https://cr.yp.to/chacha.html + */ + +#include +#include "../selftest/run.h" + +#include +#include +#include +#include +#include // For crypto_xor_cpy. + +#if defined(CONFIG_ZINC_ARCH_X86_64) +#include "chacha20-x86_64-glue.c" +#elif defined(CONFIG_ZINC_ARCH_ARM) || defined(CONFIG_ZINC_ARCH_ARM64) +#include "chacha20-arm-glue.c" +#elif defined(CONFIG_ZINC_ARCH_MIPS) +#include "chacha20-mips-glue.c" +#else +static bool *const chacha20_nobs[] __initconst = { }; +static void __init chacha20_fpu_init(void) +{ +} +static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst, + const u8 *src, size_t len, + simd_context_t *simd_context) +{ + return false; +} +static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS], + const u8 nonce[HCHACHA20_NONCE_SIZE], + const u8 key[HCHACHA20_KEY_SIZE], + simd_context_t *simd_context) +{ + return false; +} +#endif + +#define QUARTER_ROUND(x, a, b, c, d) ( \ + x[a] += x[b], \ + x[d] = rol32((x[d] ^ x[a]), 16), \ + x[c] += x[d], \ + x[b] = rol32((x[b] ^ x[c]), 12), \ + x[a] += x[b], \ + x[d] = rol32((x[d] ^ x[a]), 8), \ + x[c] += x[d], \ + x[b] = rol32((x[b] ^ x[c]), 7) \ +) + +#define C(i, j) (i * 4 + j) + +#define DOUBLE_ROUND(x) ( \ + /* Column Round */ \ + QUARTER_ROUND(x, C(0, 0), C(1, 0), C(2, 0), C(3, 0)), \ + QUARTER_ROUND(x, C(0, 1), C(1, 1), C(2, 1), C(3, 1)), \ + QUARTER_ROUND(x, C(0, 2), C(1, 2), C(2, 2), C(3, 2)), \ + QUARTER_ROUND(x, C(0, 3), C(1, 3), C(2, 3), C(3, 3)), \ + /* Diagonal Round */ \ + QUARTER_ROUND(x, C(0, 0), C(1, 1), C(2, 2), C(3, 3)), \ + QUARTER_ROUND(x, C(0, 1), C(1, 2), C(2, 3), C(3, 0)), \ + QUARTER_ROUND(x, C(0, 2), C(1, 3), C(2, 0), C(3, 1)), \ + QUARTER_ROUND(x, C(0, 3), C(1, 0), C(2, 1), C(3, 2)) \ +) + +#define TWENTY_ROUNDS(x) ( \ + DOUBLE_ROUND(x), \ + DOUBLE_ROUND(x), \ + DOUBLE_ROUND(x), \ + DOUBLE_ROUND(x), \ + DOUBLE_ROUND(x), \ + DOUBLE_ROUND(x), \ + DOUBLE_ROUND(x), \ + DOUBLE_ROUND(x), \ + DOUBLE_ROUND(x), \ + DOUBLE_ROUND(x) \ +) + +static void chacha20_block_generic(struct chacha20_ctx *ctx, __le32 *stream) +{ + u32 x[CHACHA20_BLOCK_WORDS]; + int i; + + for (i = 0; i < ARRAY_SIZE(x); ++i) + x[i] = ctx->state[i]; + + TWENTY_ROUNDS(x); + + for (i = 0; i < ARRAY_SIZE(x); ++i) + stream[i] = cpu_to_le32(x[i] + ctx->state[i]); + + ctx->counter[0] += 1; +} + +static void chacha20_generic(struct chacha20_ctx *ctx, u8 *out, const u8 *in, + u32 len) +{ + __le32 buf[CHACHA20_BLOCK_WORDS]; + + while (len >= CHACHA20_BLOCK_SIZE) { + chacha20_block_generic(ctx, buf); + crypto_xor_cpy(out, in, (u8 *)buf, CHACHA20_BLOCK_SIZE); + len -= CHACHA20_BLOCK_SIZE; + out += CHACHA20_BLOCK_SIZE; + in += CHACHA20_BLOCK_SIZE; + } + if (len) { + chacha20_block_generic(ctx, buf); + crypto_xor_cpy(out, in, (u8 *)buf, len); + } +} + +void chacha20(struct chacha20_ctx *ctx, u8 *dst, const u8 *src, u32 len, + simd_context_t *simd_context) +{ + if (!chacha20_arch(ctx, dst, src, len, simd_context)) + chacha20_generic(ctx, dst, src, len); +} + +static void hchacha20_generic(u32 derived_key[CHACHA20_KEY_WORDS], + const u8 nonce[HCHACHA20_NONCE_SIZE], + const u8 key[HCHACHA20_KEY_SIZE]) +{ + u32 x[] = { CHACHA20_CONSTANT_EXPA, + CHACHA20_CONSTANT_ND_3, + CHACHA20_CONSTANT_2_BY, + CHACHA20_CONSTANT_TE_K, + get_unaligned_le32(key + 0), + get_unaligned_le32(key + 4), + get_unaligned_le32(key + 8), + get_unaligned_le32(key + 12), + get_unaligned_le32(key + 16), + get_unaligned_le32(key + 20), + get_unaligned_le32(key + 24), + get_unaligned_le32(key + 28), + get_unaligned_le32(nonce + 0), + get_unaligned_le32(nonce + 4), + get_unaligned_le32(nonce + 8), + get_unaligned_le32(nonce + 12) + }; + + TWENTY_ROUNDS(x); + + memcpy(derived_key + 0, x + 0, sizeof(u32) * 4); + memcpy(derived_key + 4, x + 12, sizeof(u32) * 4); +} + +/* Derived key should be 32-bit aligned */ +void hchacha20(u32 derived_key[CHACHA20_KEY_WORDS], + const u8 nonce[HCHACHA20_NONCE_SIZE], + const u8 key[HCHACHA20_KEY_SIZE], simd_context_t *simd_context) +{ + if (!hchacha20_arch(derived_key, nonce, key, simd_context)) + hchacha20_generic(derived_key, nonce, key); +} + +#include "../selftest/chacha20.c" + +static bool nosimd __initdata = false; + +#ifndef COMPAT_ZINC_IS_A_MODULE +int __init chacha20_mod_init(void) +#else +static int __init mod_init(void) +#endif +{ + if (!nosimd) + chacha20_fpu_init(); + if (!selftest_run("chacha20", chacha20_selftest, chacha20_nobs, + ARRAY_SIZE(chacha20_nobs))) + return -ENOTRECOVERABLE; + return 0; +} + +#ifdef COMPAT_ZINC_IS_A_MODULE +static void __exit mod_exit(void) +{ +} + +module_param(nosimd, bool, 0); +module_init(mod_init); +module_exit(mod_exit); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("ChaCha20 stream cipher"); +MODULE_AUTHOR("Jason A. Donenfeld "); +#endif diff --git a/net/wireguard/crypto/zinc/chacha20poly1305.c b/net/wireguard/crypto/zinc/chacha20poly1305.c new file mode 100644 index 000000000000..cee29db01bc0 --- /dev/null +++ b/net/wireguard/crypto/zinc/chacha20poly1305.c @@ -0,0 +1,398 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * + * This is an implementation of the ChaCha20Poly1305 AEAD construction. + * + * Information: https://tools.ietf.org/html/rfc8439 + */ + +#include +#include +#include +#include "selftest/run.h" + +#include +#include +#include +#include +#include // For blkcipher_walk. + +static const u8 pad0[CHACHA20_BLOCK_SIZE] = { 0 }; + +static inline void +__chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, + const u8 *ad, const size_t ad_len, const u64 nonce, + const u8 key[CHACHA20POLY1305_KEY_SIZE], + simd_context_t *simd_context) +{ + struct poly1305_ctx poly1305_state; + struct chacha20_ctx chacha20_state; + union { + u8 block0[POLY1305_KEY_SIZE]; + __le64 lens[2]; + } b = { { 0 } }; + + chacha20_init(&chacha20_state, key, nonce); + chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), + simd_context); + poly1305_init(&poly1305_state, b.block0); + + poly1305_update(&poly1305_state, ad, ad_len, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, + simd_context); + + chacha20(&chacha20_state, dst, src, src_len, simd_context); + + poly1305_update(&poly1305_state, dst, src_len, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, + simd_context); + + b.lens[0] = cpu_to_le64(ad_len); + b.lens[1] = cpu_to_le64(src_len); + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), + simd_context); + + poly1305_final(&poly1305_state, dst + src_len, simd_context); + + memzero_explicit(&chacha20_state, sizeof(chacha20_state)); + memzero_explicit(&b, sizeof(b)); +} + +void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, + const u8 *ad, const size_t ad_len, + const u64 nonce, + const u8 key[CHACHA20POLY1305_KEY_SIZE]) +{ + simd_context_t simd_context; + + simd_get(&simd_context); + __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, nonce, key, + &simd_context); + simd_put(&simd_context); +} + +bool chacha20poly1305_encrypt_sg_inplace(struct scatterlist *src, + const size_t src_len, + const u8 *ad, const size_t ad_len, + const u64 nonce, + const u8 key[CHACHA20POLY1305_KEY_SIZE], + simd_context_t *simd_context) +{ + struct poly1305_ctx poly1305_state; + struct chacha20_ctx chacha20_state; + struct sg_mapping_iter miter; + size_t partial = 0; + ssize_t sl; + union { + u8 chacha20_stream[CHACHA20_BLOCK_SIZE]; + u8 block0[POLY1305_KEY_SIZE]; + u8 mac[POLY1305_MAC_SIZE]; + __le64 lens[2]; + } b __aligned(16) = { { 0 } }; + + if (WARN_ON(src_len > INT_MAX)) + return false; + + chacha20_init(&chacha20_state, key, nonce); + chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), + simd_context); + poly1305_init(&poly1305_state, b.block0); + + poly1305_update(&poly1305_state, ad, ad_len, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, + simd_context); + + sg_miter_start(&miter, src, sg_nents(src), SG_MITER_TO_SG | SG_MITER_ATOMIC); + for (sl = src_len; sl > 0 && sg_miter_next(&miter); sl -= miter.length) { + u8 *addr = miter.addr; + size_t length = min_t(size_t, sl, miter.length); + + if (unlikely(partial)) { + size_t l = min(length, CHACHA20_BLOCK_SIZE - partial); + + crypto_xor(addr, b.chacha20_stream + partial, l); + partial = (partial + l) & (CHACHA20_BLOCK_SIZE - 1); + + addr += l; + length -= l; + } + + if (likely(length >= CHACHA20_BLOCK_SIZE || length == sl)) { + size_t l = length; + + if (unlikely(length < sl)) + l &= ~(CHACHA20_BLOCK_SIZE - 1); + chacha20(&chacha20_state, addr, addr, l, simd_context); + addr += l; + length -= l; + } + + if (unlikely(length > 0)) { + chacha20(&chacha20_state, b.chacha20_stream, pad0, + CHACHA20_BLOCK_SIZE, simd_context); + crypto_xor(addr, b.chacha20_stream, length); + partial = length; + } + + poly1305_update(&poly1305_state, miter.addr, + min_t(size_t, sl, miter.length), simd_context); + + simd_relax(simd_context); + } + + poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, + simd_context); + + b.lens[0] = cpu_to_le64(ad_len); + b.lens[1] = cpu_to_le64(src_len); + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), + simd_context); + + if (likely(sl <= -POLY1305_MAC_SIZE)) + poly1305_final(&poly1305_state, miter.addr + miter.length + sl, + simd_context); + + sg_miter_stop(&miter); + + if (unlikely(sl > -POLY1305_MAC_SIZE)) { + poly1305_final(&poly1305_state, b.mac, simd_context); + scatterwalk_map_and_copy(b.mac, src, src_len, sizeof(b.mac), 1); + } + + memzero_explicit(&chacha20_state, sizeof(chacha20_state)); + memzero_explicit(&b, sizeof(b)); + return true; +} + +static inline bool +__chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, + const u8 *ad, const size_t ad_len, const u64 nonce, + const u8 key[CHACHA20POLY1305_KEY_SIZE], + simd_context_t *simd_context) +{ + struct poly1305_ctx poly1305_state; + struct chacha20_ctx chacha20_state; + int ret; + size_t dst_len; + union { + u8 block0[POLY1305_KEY_SIZE]; + u8 mac[POLY1305_MAC_SIZE]; + __le64 lens[2]; + } b = { { 0 } }; + + if (unlikely(src_len < POLY1305_MAC_SIZE)) + return false; + + chacha20_init(&chacha20_state, key, nonce); + chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), + simd_context); + poly1305_init(&poly1305_state, b.block0); + + poly1305_update(&poly1305_state, ad, ad_len, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, + simd_context); + + dst_len = src_len - POLY1305_MAC_SIZE; + poly1305_update(&poly1305_state, src, dst_len, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, + simd_context); + + b.lens[0] = cpu_to_le64(ad_len); + b.lens[1] = cpu_to_le64(dst_len); + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), + simd_context); + + poly1305_final(&poly1305_state, b.mac, simd_context); + + ret = crypto_memneq(b.mac, src + dst_len, POLY1305_MAC_SIZE); + if (likely(!ret)) + chacha20(&chacha20_state, dst, src, dst_len, simd_context); + + memzero_explicit(&chacha20_state, sizeof(chacha20_state)); + memzero_explicit(&b, sizeof(b)); + + return !ret; +} + +bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, + const u8 *ad, const size_t ad_len, + const u64 nonce, + const u8 key[CHACHA20POLY1305_KEY_SIZE]) +{ + simd_context_t simd_context, ret; + + simd_get(&simd_context); + ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, nonce, + key, &simd_context); + simd_put(&simd_context); + return ret; +} + +bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, + size_t src_len, + const u8 *ad, const size_t ad_len, + const u64 nonce, + const u8 key[CHACHA20POLY1305_KEY_SIZE], + simd_context_t *simd_context) +{ + struct poly1305_ctx poly1305_state; + struct chacha20_ctx chacha20_state; + struct sg_mapping_iter miter; + size_t partial = 0; + ssize_t sl; + union { + u8 chacha20_stream[CHACHA20_BLOCK_SIZE]; + u8 block0[POLY1305_KEY_SIZE]; + struct { + u8 read_mac[POLY1305_MAC_SIZE]; + u8 computed_mac[POLY1305_MAC_SIZE]; + }; + __le64 lens[2]; + } b __aligned(16) = { { 0 } }; + bool ret = false; + + if (unlikely(src_len < POLY1305_MAC_SIZE || WARN_ON(src_len > INT_MAX))) + return ret; + src_len -= POLY1305_MAC_SIZE; + + chacha20_init(&chacha20_state, key, nonce); + chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), + simd_context); + poly1305_init(&poly1305_state, b.block0); + + poly1305_update(&poly1305_state, ad, ad_len, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, + simd_context); + + sg_miter_start(&miter, src, sg_nents(src), SG_MITER_TO_SG | SG_MITER_ATOMIC); + for (sl = src_len; sl > 0 && sg_miter_next(&miter); sl -= miter.length) { + u8 *addr = miter.addr; + size_t length = min_t(size_t, sl, miter.length); + + poly1305_update(&poly1305_state, addr, length, simd_context); + + if (unlikely(partial)) { + size_t l = min(length, CHACHA20_BLOCK_SIZE - partial); + + crypto_xor(addr, b.chacha20_stream + partial, l); + partial = (partial + l) & (CHACHA20_BLOCK_SIZE - 1); + + addr += l; + length -= l; + } + + if (likely(length >= CHACHA20_BLOCK_SIZE || length == sl)) { + size_t l = length; + + if (unlikely(length < sl)) + l &= ~(CHACHA20_BLOCK_SIZE - 1); + chacha20(&chacha20_state, addr, addr, l, simd_context); + addr += l; + length -= l; + } + + if (unlikely(length > 0)) { + chacha20(&chacha20_state, b.chacha20_stream, pad0, + CHACHA20_BLOCK_SIZE, simd_context); + crypto_xor(addr, b.chacha20_stream, length); + partial = length; + } + + simd_relax(simd_context); + } + + poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, + simd_context); + + b.lens[0] = cpu_to_le64(ad_len); + b.lens[1] = cpu_to_le64(src_len); + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), + simd_context); + + if (likely(sl <= -POLY1305_MAC_SIZE)) { + poly1305_final(&poly1305_state, b.computed_mac, simd_context); + ret = !crypto_memneq(b.computed_mac, + miter.addr + miter.length + sl, + POLY1305_MAC_SIZE); + } + + sg_miter_stop(&miter); + + if (unlikely(sl > -POLY1305_MAC_SIZE)) { + poly1305_final(&poly1305_state, b.computed_mac, simd_context); + scatterwalk_map_and_copy(b.read_mac, src, src_len, + sizeof(b.read_mac), 0); + ret = !crypto_memneq(b.read_mac, b.computed_mac, + POLY1305_MAC_SIZE); + + } + + memzero_explicit(&chacha20_state, sizeof(chacha20_state)); + memzero_explicit(&b, sizeof(b)); + return ret; +} + +void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, + const u8 *ad, const size_t ad_len, + const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE], + const u8 key[CHACHA20POLY1305_KEY_SIZE]) +{ + simd_context_t simd_context; + u32 derived_key[CHACHA20_KEY_WORDS] __aligned(16); + + simd_get(&simd_context); + hchacha20(derived_key, nonce, key, &simd_context); + cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key)); + __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, + get_unaligned_le64(nonce + 16), + (u8 *)derived_key, &simd_context); + memzero_explicit(derived_key, CHACHA20POLY1305_KEY_SIZE); + simd_put(&simd_context); +} + +bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, + const u8 *ad, const size_t ad_len, + const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE], + const u8 key[CHACHA20POLY1305_KEY_SIZE]) +{ + bool ret; + simd_context_t simd_context; + u32 derived_key[CHACHA20_KEY_WORDS] __aligned(16); + + simd_get(&simd_context); + hchacha20(derived_key, nonce, key, &simd_context); + cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key)); + ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, + get_unaligned_le64(nonce + 16), + (u8 *)derived_key, &simd_context); + memzero_explicit(derived_key, CHACHA20POLY1305_KEY_SIZE); + simd_put(&simd_context); + return ret; +} + +#include "selftest/chacha20poly1305.c" + +#ifndef COMPAT_ZINC_IS_A_MODULE +int __init chacha20poly1305_mod_init(void) +#else +static int __init mod_init(void) +#endif +{ + if (!selftest_run("chacha20poly1305", chacha20poly1305_selftest, + NULL, 0)) + return -ENOTRECOVERABLE; + return 0; +} + +#ifdef COMPAT_ZINC_IS_A_MODULE +static void __exit mod_exit(void) +{ +} + +module_init(mod_init); +module_exit(mod_exit); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("ChaCha20Poly1305 AEAD construction"); +MODULE_AUTHOR("Jason A. Donenfeld "); +#endif diff --git a/net/wireguard/crypto/zinc/curve25519/curve25519-arm-glue.c b/net/wireguard/crypto/zinc/curve25519/curve25519-arm-glue.c new file mode 100644 index 000000000000..e0c5a5d297c0 --- /dev/null +++ b/net/wireguard/crypto/zinc/curve25519/curve25519-arm-glue.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include +#include +#include + +asmlinkage void curve25519_neon(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]); + +static bool curve25519_use_neon __ro_after_init; +static bool *const curve25519_nobs[] __initconst = { &curve25519_use_neon }; +static void __init curve25519_fpu_init(void) +{ + curve25519_use_neon = elf_hwcap & HWCAP_NEON; +} + +static inline bool curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]) +{ + simd_context_t simd_context; + bool used_arch = false; + + simd_get(&simd_context); + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && + !IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) && curve25519_use_neon && + simd_use(&simd_context)) { + curve25519_neon(mypublic, secret, basepoint); + used_arch = true; + } + simd_put(&simd_context); + return used_arch; +} + +static inline bool curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE]) +{ + return false; +} diff --git a/net/wireguard/crypto/zinc/curve25519/curve25519-arm.S b/net/wireguard/crypto/zinc/curve25519/curve25519-arm.S new file mode 100644 index 000000000000..8eca8a11ef28 --- /dev/null +++ b/net/wireguard/crypto/zinc/curve25519/curve25519-arm.S @@ -0,0 +1,2064 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * + * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This + * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been + * manually reworked for use in kernel space. + */ + +#if defined(CONFIG_KERNEL_MODE_NEON) && !defined(__ARMEB__) +#include + +.text +.fpu neon +.arch armv7-a +.align 4 + +SYM_FUNC_START(curve25519_neon) + push {r4-r11, lr} + mov ip, sp + sub r3, sp, #704 + and r3, r3, #0xfffffff0 + mov sp, r3 + movw r4, #0 + movw r5, #254 + vmov.i32 q0, #1 + vshr.u64 q1, q0, #7 + vshr.u64 q0, q0, #8 + vmov.i32 d4, #19 + vmov.i32 d5, #38 + add r6, sp, #480 + vst1.8 {d2-d3}, [r6, : 128]! + vst1.8 {d0-d1}, [r6, : 128]! + vst1.8 {d4-d5}, [r6, : 128] + add r6, r3, #0 + vmov.i32 q2, #0 + vst1.8 {d4-d5}, [r6, : 128]! + vst1.8 {d4-d5}, [r6, : 128]! + vst1.8 d4, [r6, : 64] + add r6, r3, #0 + movw r7, #960 + sub r7, r7, #2 + neg r7, r7 + sub r7, r7, r7, LSL #7 + str r7, [r6] + add r6, sp, #672 + vld1.8 {d4-d5}, [r1]! + vld1.8 {d6-d7}, [r1] + vst1.8 {d4-d5}, [r6, : 128]! + vst1.8 {d6-d7}, [r6, : 128] + sub r1, r6, #16 + ldrb r6, [r1] + and r6, r6, #248 + strb r6, [r1] + ldrb r6, [r1, #31] + and r6, r6, #127 + orr r6, r6, #64 + strb r6, [r1, #31] + vmov.i64 q2, #0xffffffff + vshr.u64 q3, q2, #7 + vshr.u64 q2, q2, #6 + vld1.8 {d8}, [r2] + vld1.8 {d10}, [r2] + add r2, r2, #6 + vld1.8 {d12}, [r2] + vld1.8 {d14}, [r2] + add r2, r2, #6 + vld1.8 {d16}, [r2] + add r2, r2, #4 + vld1.8 {d18}, [r2] + vld1.8 {d20}, [r2] + add r2, r2, #6 + vld1.8 {d22}, [r2] + add r2, r2, #2 + vld1.8 {d24}, [r2] + vld1.8 {d26}, [r2] + vshr.u64 q5, q5, #26 + vshr.u64 q6, q6, #3 + vshr.u64 q7, q7, #29 + vshr.u64 q8, q8, #6 + vshr.u64 q10, q10, #25 + vshr.u64 q11, q11, #3 + vshr.u64 q12, q12, #12 + vshr.u64 q13, q13, #38 + vand q4, q4, q2 + vand q6, q6, q2 + vand q8, q8, q2 + vand q10, q10, q2 + vand q2, q12, q2 + vand q5, q5, q3 + vand q7, q7, q3 + vand q9, q9, q3 + vand q11, q11, q3 + vand q3, q13, q3 + add r2, r3, #48 + vadd.i64 q12, q4, q1 + vadd.i64 q13, q10, q1 + vshr.s64 q12, q12, #26 + vshr.s64 q13, q13, #26 + vadd.i64 q5, q5, q12 + vshl.i64 q12, q12, #26 + vadd.i64 q14, q5, q0 + vadd.i64 q11, q11, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q15, q11, q0 + vsub.i64 q4, q4, q12 + vshr.s64 q12, q14, #25 + vsub.i64 q10, q10, q13 + vshr.s64 q13, q15, #25 + vadd.i64 q6, q6, q12 + vshl.i64 q12, q12, #25 + vadd.i64 q14, q6, q1 + vadd.i64 q2, q2, q13 + vsub.i64 q5, q5, q12 + vshr.s64 q12, q14, #26 + vshl.i64 q13, q13, #25 + vadd.i64 q14, q2, q1 + vadd.i64 q7, q7, q12 + vshl.i64 q12, q12, #26 + vadd.i64 q15, q7, q0 + vsub.i64 q11, q11, q13 + vshr.s64 q13, q14, #26 + vsub.i64 q6, q6, q12 + vshr.s64 q12, q15, #25 + vadd.i64 q3, q3, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q14, q3, q0 + vadd.i64 q8, q8, q12 + vshl.i64 q12, q12, #25 + vadd.i64 q15, q8, q1 + add r2, r2, #8 + vsub.i64 q2, q2, q13 + vshr.s64 q13, q14, #25 + vsub.i64 q7, q7, q12 + vshr.s64 q12, q15, #26 + vadd.i64 q14, q13, q13 + vadd.i64 q9, q9, q12 + vtrn.32 d12, d14 + vshl.i64 q12, q12, #26 + vtrn.32 d13, d15 + vadd.i64 q0, q9, q0 + vadd.i64 q4, q4, q14 + vst1.8 d12, [r2, : 64]! + vshl.i64 q6, q13, #4 + vsub.i64 q7, q8, q12 + vshr.s64 q0, q0, #25 + vadd.i64 q4, q4, q6 + vadd.i64 q6, q10, q0 + vshl.i64 q0, q0, #25 + vadd.i64 q8, q6, q1 + vadd.i64 q4, q4, q13 + vshl.i64 q10, q13, #25 + vadd.i64 q1, q4, q1 + vsub.i64 q0, q9, q0 + vshr.s64 q8, q8, #26 + vsub.i64 q3, q3, q10 + vtrn.32 d14, d0 + vshr.s64 q1, q1, #26 + vtrn.32 d15, d1 + vadd.i64 q0, q11, q8 + vst1.8 d14, [r2, : 64] + vshl.i64 q7, q8, #26 + vadd.i64 q5, q5, q1 + vtrn.32 d4, d6 + vshl.i64 q1, q1, #26 + vtrn.32 d5, d7 + vsub.i64 q3, q6, q7 + add r2, r2, #16 + vsub.i64 q1, q4, q1 + vst1.8 d4, [r2, : 64] + vtrn.32 d6, d0 + vtrn.32 d7, d1 + sub r2, r2, #8 + vtrn.32 d2, d10 + vtrn.32 d3, d11 + vst1.8 d6, [r2, : 64] + sub r2, r2, #24 + vst1.8 d2, [r2, : 64] + add r2, r3, #96 + vmov.i32 q0, #0 + vmov.i64 d2, #0xff + vmov.i64 d3, #0 + vshr.u32 q1, q1, #7 + vst1.8 {d2-d3}, [r2, : 128]! + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 d0, [r2, : 64] + add r2, r3, #144 + vmov.i32 q0, #0 + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 d0, [r2, : 64] + add r2, r3, #240 + vmov.i32 q0, #0 + vmov.i64 d2, #0xff + vmov.i64 d3, #0 + vshr.u32 q1, q1, #7 + vst1.8 {d2-d3}, [r2, : 128]! + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 d0, [r2, : 64] + add r2, r3, #48 + add r6, r3, #192 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4}, [r2, : 64] + vst1.8 {d0-d1}, [r6, : 128]! + vst1.8 {d2-d3}, [r6, : 128]! + vst1.8 d4, [r6, : 64] +.Lmainloop: + mov r2, r5, LSR #3 + and r6, r5, #7 + ldrb r2, [r1, r2] + mov r2, r2, LSR r6 + and r2, r2, #1 + str r5, [sp, #456] + eor r4, r4, r2 + str r2, [sp, #460] + neg r2, r4 + add r4, r3, #96 + add r5, r3, #192 + add r6, r3, #144 + vld1.8 {d8-d9}, [r4, : 128]! + add r7, r3, #240 + vld1.8 {d10-d11}, [r5, : 128]! + veor q6, q4, q5 + vld1.8 {d14-d15}, [r6, : 128]! + vdup.i32 q8, r2 + vld1.8 {d18-d19}, [r7, : 128]! + veor q10, q7, q9 + vld1.8 {d22-d23}, [r4, : 128]! + vand q6, q6, q8 + vld1.8 {d24-d25}, [r5, : 128]! + vand q10, q10, q8 + vld1.8 {d26-d27}, [r6, : 128]! + veor q4, q4, q6 + vld1.8 {d28-d29}, [r7, : 128]! + veor q5, q5, q6 + vld1.8 {d0}, [r4, : 64] + veor q6, q7, q10 + vld1.8 {d2}, [r5, : 64] + veor q7, q9, q10 + vld1.8 {d4}, [r6, : 64] + veor q9, q11, q12 + vld1.8 {d6}, [r7, : 64] + veor q10, q0, q1 + sub r2, r4, #32 + vand q9, q9, q8 + sub r4, r5, #32 + vand q10, q10, q8 + sub r5, r6, #32 + veor q11, q11, q9 + sub r6, r7, #32 + veor q0, q0, q10 + veor q9, q12, q9 + veor q1, q1, q10 + veor q10, q13, q14 + veor q12, q2, q3 + vand q10, q10, q8 + vand q8, q12, q8 + veor q12, q13, q10 + veor q2, q2, q8 + veor q10, q14, q10 + veor q3, q3, q8 + vadd.i32 q8, q4, q6 + vsub.i32 q4, q4, q6 + vst1.8 {d16-d17}, [r2, : 128]! + vadd.i32 q6, q11, q12 + vst1.8 {d8-d9}, [r5, : 128]! + vsub.i32 q4, q11, q12 + vst1.8 {d12-d13}, [r2, : 128]! + vadd.i32 q6, q0, q2 + vst1.8 {d8-d9}, [r5, : 128]! + vsub.i32 q0, q0, q2 + vst1.8 d12, [r2, : 64] + vadd.i32 q2, q5, q7 + vst1.8 d0, [r5, : 64] + vsub.i32 q0, q5, q7 + vst1.8 {d4-d5}, [r4, : 128]! + vadd.i32 q2, q9, q10 + vst1.8 {d0-d1}, [r6, : 128]! + vsub.i32 q0, q9, q10 + vst1.8 {d4-d5}, [r4, : 128]! + vadd.i32 q2, q1, q3 + vst1.8 {d0-d1}, [r6, : 128]! + vsub.i32 q0, q1, q3 + vst1.8 d4, [r4, : 64] + vst1.8 d0, [r6, : 64] + add r2, sp, #512 + add r4, r3, #96 + add r5, r3, #144 + vld1.8 {d0-d1}, [r2, : 128] + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4-d5}, [r5, : 128]! + vzip.i32 q1, q2 + vld1.8 {d6-d7}, [r4, : 128]! + vld1.8 {d8-d9}, [r5, : 128]! + vshl.i32 q5, q1, #1 + vzip.i32 q3, q4 + vshl.i32 q6, q2, #1 + vld1.8 {d14}, [r4, : 64] + vshl.i32 q8, q3, #1 + vld1.8 {d15}, [r5, : 64] + vshl.i32 q9, q4, #1 + vmul.i32 d21, d7, d1 + vtrn.32 d14, d15 + vmul.i32 q11, q4, q0 + vmul.i32 q0, q7, q0 + vmull.s32 q12, d2, d2 + vmlal.s32 q12, d11, d1 + vmlal.s32 q12, d12, d0 + vmlal.s32 q12, d13, d23 + vmlal.s32 q12, d16, d22 + vmlal.s32 q12, d7, d21 + vmull.s32 q10, d2, d11 + vmlal.s32 q10, d4, d1 + vmlal.s32 q10, d13, d0 + vmlal.s32 q10, d6, d23 + vmlal.s32 q10, d17, d22 + vmull.s32 q13, d10, d4 + vmlal.s32 q13, d11, d3 + vmlal.s32 q13, d13, d1 + vmlal.s32 q13, d16, d0 + vmlal.s32 q13, d17, d23 + vmlal.s32 q13, d8, d22 + vmull.s32 q1, d10, d5 + vmlal.s32 q1, d11, d4 + vmlal.s32 q1, d6, d1 + vmlal.s32 q1, d17, d0 + vmlal.s32 q1, d8, d23 + vmull.s32 q14, d10, d6 + vmlal.s32 q14, d11, d13 + vmlal.s32 q14, d4, d4 + vmlal.s32 q14, d17, d1 + vmlal.s32 q14, d18, d0 + vmlal.s32 q14, d9, d23 + vmull.s32 q11, d10, d7 + vmlal.s32 q11, d11, d6 + vmlal.s32 q11, d12, d5 + vmlal.s32 q11, d8, d1 + vmlal.s32 q11, d19, d0 + vmull.s32 q15, d10, d8 + vmlal.s32 q15, d11, d17 + vmlal.s32 q15, d12, d6 + vmlal.s32 q15, d13, d5 + vmlal.s32 q15, d19, d1 + vmlal.s32 q15, d14, d0 + vmull.s32 q2, d10, d9 + vmlal.s32 q2, d11, d8 + vmlal.s32 q2, d12, d7 + vmlal.s32 q2, d13, d6 + vmlal.s32 q2, d14, d1 + vmull.s32 q0, d15, d1 + vmlal.s32 q0, d10, d14 + vmlal.s32 q0, d11, d19 + vmlal.s32 q0, d12, d8 + vmlal.s32 q0, d13, d17 + vmlal.s32 q0, d6, d6 + add r2, sp, #480 + vld1.8 {d18-d19}, [r2, : 128]! + vmull.s32 q3, d16, d7 + vmlal.s32 q3, d10, d15 + vmlal.s32 q3, d11, d14 + vmlal.s32 q3, d12, d9 + vmlal.s32 q3, d13, d8 + vld1.8 {d8-d9}, [r2, : 128] + vadd.i64 q5, q12, q9 + vadd.i64 q6, q15, q9 + vshr.s64 q5, q5, #26 + vshr.s64 q6, q6, #26 + vadd.i64 q7, q10, q5 + vshl.i64 q5, q5, #26 + vadd.i64 q8, q7, q4 + vadd.i64 q2, q2, q6 + vshl.i64 q6, q6, #26 + vadd.i64 q10, q2, q4 + vsub.i64 q5, q12, q5 + vshr.s64 q8, q8, #25 + vsub.i64 q6, q15, q6 + vshr.s64 q10, q10, #25 + vadd.i64 q12, q13, q8 + vshl.i64 q8, q8, #25 + vadd.i64 q13, q12, q9 + vadd.i64 q0, q0, q10 + vsub.i64 q7, q7, q8 + vshr.s64 q8, q13, #26 + vshl.i64 q10, q10, #25 + vadd.i64 q13, q0, q9 + vadd.i64 q1, q1, q8 + vshl.i64 q8, q8, #26 + vadd.i64 q15, q1, q4 + vsub.i64 q2, q2, q10 + vshr.s64 q10, q13, #26 + vsub.i64 q8, q12, q8 + vshr.s64 q12, q15, #25 + vadd.i64 q3, q3, q10 + vshl.i64 q10, q10, #26 + vadd.i64 q13, q3, q4 + vadd.i64 q14, q14, q12 + add r2, r3, #288 + vshl.i64 q12, q12, #25 + add r4, r3, #336 + vadd.i64 q15, q14, q9 + add r2, r2, #8 + vsub.i64 q0, q0, q10 + add r4, r4, #8 + vshr.s64 q10, q13, #25 + vsub.i64 q1, q1, q12 + vshr.s64 q12, q15, #26 + vadd.i64 q13, q10, q10 + vadd.i64 q11, q11, q12 + vtrn.32 d16, d2 + vshl.i64 q12, q12, #26 + vtrn.32 d17, d3 + vadd.i64 q1, q11, q4 + vadd.i64 q4, q5, q13 + vst1.8 d16, [r2, : 64]! + vshl.i64 q5, q10, #4 + vst1.8 d17, [r4, : 64]! + vsub.i64 q8, q14, q12 + vshr.s64 q1, q1, #25 + vadd.i64 q4, q4, q5 + vadd.i64 q5, q6, q1 + vshl.i64 q1, q1, #25 + vadd.i64 q6, q5, q9 + vadd.i64 q4, q4, q10 + vshl.i64 q10, q10, #25 + vadd.i64 q9, q4, q9 + vsub.i64 q1, q11, q1 + vshr.s64 q6, q6, #26 + vsub.i64 q3, q3, q10 + vtrn.32 d16, d2 + vshr.s64 q9, q9, #26 + vtrn.32 d17, d3 + vadd.i64 q1, q2, q6 + vst1.8 d16, [r2, : 64] + vshl.i64 q2, q6, #26 + vst1.8 d17, [r4, : 64] + vadd.i64 q6, q7, q9 + vtrn.32 d0, d6 + vshl.i64 q7, q9, #26 + vtrn.32 d1, d7 + vsub.i64 q2, q5, q2 + add r2, r2, #16 + vsub.i64 q3, q4, q7 + vst1.8 d0, [r2, : 64] + add r4, r4, #16 + vst1.8 d1, [r4, : 64] + vtrn.32 d4, d2 + vtrn.32 d5, d3 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d6, d12 + vtrn.32 d7, d13 + vst1.8 d4, [r2, : 64] + vst1.8 d5, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d6, [r2, : 64] + vst1.8 d7, [r4, : 64] + add r2, r3, #240 + add r4, r3, #96 + vld1.8 {d0-d1}, [r4, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4}, [r4, : 64] + add r4, r3, #144 + vld1.8 {d6-d7}, [r4, : 128]! + vtrn.32 q0, q3 + vld1.8 {d8-d9}, [r4, : 128]! + vshl.i32 q5, q0, #4 + vtrn.32 q1, q4 + vshl.i32 q6, q3, #4 + vadd.i32 q5, q5, q0 + vadd.i32 q6, q6, q3 + vshl.i32 q7, q1, #4 + vld1.8 {d5}, [r4, : 64] + vshl.i32 q8, q4, #4 + vtrn.32 d4, d5 + vadd.i32 q7, q7, q1 + vadd.i32 q8, q8, q4 + vld1.8 {d18-d19}, [r2, : 128]! + vshl.i32 q10, q2, #4 + vld1.8 {d22-d23}, [r2, : 128]! + vadd.i32 q10, q10, q2 + vld1.8 {d24}, [r2, : 64] + vadd.i32 q5, q5, q0 + add r2, r3, #192 + vld1.8 {d26-d27}, [r2, : 128]! + vadd.i32 q6, q6, q3 + vld1.8 {d28-d29}, [r2, : 128]! + vadd.i32 q8, q8, q4 + vld1.8 {d25}, [r2, : 64] + vadd.i32 q10, q10, q2 + vtrn.32 q9, q13 + vadd.i32 q7, q7, q1 + vadd.i32 q5, q5, q0 + vtrn.32 q11, q14 + vadd.i32 q6, q6, q3 + add r2, sp, #528 + vadd.i32 q10, q10, q2 + vtrn.32 d24, d25 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q6, q13, #1 + vst1.8 {d20-d21}, [r2, : 128]! + vshl.i32 q10, q14, #1 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q15, q12, #1 + vadd.i32 q8, q8, q4 + vext.32 d10, d31, d30, #0 + vadd.i32 q7, q7, q1 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q8, d18, d5 + vmlal.s32 q8, d26, d4 + vmlal.s32 q8, d19, d9 + vmlal.s32 q8, d27, d3 + vmlal.s32 q8, d22, d8 + vmlal.s32 q8, d28, d2 + vmlal.s32 q8, d23, d7 + vmlal.s32 q8, d29, d1 + vmlal.s32 q8, d24, d6 + vmlal.s32 q8, d25, d0 + vst1.8 {d14-d15}, [r2, : 128]! + vmull.s32 q2, d18, d4 + vmlal.s32 q2, d12, d9 + vmlal.s32 q2, d13, d8 + vmlal.s32 q2, d19, d3 + vmlal.s32 q2, d22, d2 + vmlal.s32 q2, d23, d1 + vmlal.s32 q2, d24, d0 + vst1.8 {d20-d21}, [r2, : 128]! + vmull.s32 q7, d18, d9 + vmlal.s32 q7, d26, d3 + vmlal.s32 q7, d19, d8 + vmlal.s32 q7, d27, d2 + vmlal.s32 q7, d22, d7 + vmlal.s32 q7, d28, d1 + vmlal.s32 q7, d23, d6 + vmlal.s32 q7, d29, d0 + vst1.8 {d10-d11}, [r2, : 128]! + vmull.s32 q5, d18, d3 + vmlal.s32 q5, d19, d2 + vmlal.s32 q5, d22, d1 + vmlal.s32 q5, d23, d0 + vmlal.s32 q5, d12, d8 + vst1.8 {d16-d17}, [r2, : 128] + vmull.s32 q4, d18, d8 + vmlal.s32 q4, d26, d2 + vmlal.s32 q4, d19, d7 + vmlal.s32 q4, d27, d1 + vmlal.s32 q4, d22, d6 + vmlal.s32 q4, d28, d0 + vmull.s32 q8, d18, d7 + vmlal.s32 q8, d26, d1 + vmlal.s32 q8, d19, d6 + vmlal.s32 q8, d27, d0 + add r2, sp, #544 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q7, d24, d21 + vmlal.s32 q7, d25, d20 + vmlal.s32 q4, d23, d21 + vmlal.s32 q4, d29, d20 + vmlal.s32 q8, d22, d21 + vmlal.s32 q8, d28, d20 + vmlal.s32 q5, d24, d20 + vst1.8 {d14-d15}, [r2, : 128] + vmull.s32 q7, d18, d6 + vmlal.s32 q7, d26, d0 + add r2, sp, #624 + vld1.8 {d30-d31}, [r2, : 128] + vmlal.s32 q2, d30, d21 + vmlal.s32 q7, d19, d21 + vmlal.s32 q7, d27, d20 + add r2, sp, #592 + vld1.8 {d26-d27}, [r2, : 128] + vmlal.s32 q4, d25, d27 + vmlal.s32 q8, d29, d27 + vmlal.s32 q8, d25, d26 + vmlal.s32 q7, d28, d27 + vmlal.s32 q7, d29, d26 + add r2, sp, #576 + vld1.8 {d28-d29}, [r2, : 128] + vmlal.s32 q4, d24, d29 + vmlal.s32 q8, d23, d29 + vmlal.s32 q8, d24, d28 + vmlal.s32 q7, d22, d29 + vmlal.s32 q7, d23, d28 + vst1.8 {d8-d9}, [r2, : 128] + add r2, sp, #528 + vld1.8 {d8-d9}, [r2, : 128] + vmlal.s32 q7, d24, d9 + vmlal.s32 q7, d25, d31 + vmull.s32 q1, d18, d2 + vmlal.s32 q1, d19, d1 + vmlal.s32 q1, d22, d0 + vmlal.s32 q1, d24, d27 + vmlal.s32 q1, d23, d20 + vmlal.s32 q1, d12, d7 + vmlal.s32 q1, d13, d6 + vmull.s32 q6, d18, d1 + vmlal.s32 q6, d19, d0 + vmlal.s32 q6, d23, d27 + vmlal.s32 q6, d22, d20 + vmlal.s32 q6, d24, d26 + vmull.s32 q0, d18, d0 + vmlal.s32 q0, d22, d27 + vmlal.s32 q0, d23, d26 + vmlal.s32 q0, d24, d31 + vmlal.s32 q0, d19, d20 + add r2, sp, #608 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q2, d18, d7 + vmlal.s32 q5, d18, d6 + vmlal.s32 q1, d18, d21 + vmlal.s32 q0, d18, d28 + vmlal.s32 q6, d18, d29 + vmlal.s32 q2, d19, d6 + vmlal.s32 q5, d19, d21 + vmlal.s32 q1, d19, d29 + vmlal.s32 q0, d19, d9 + vmlal.s32 q6, d19, d28 + add r2, sp, #560 + vld1.8 {d18-d19}, [r2, : 128] + add r2, sp, #480 + vld1.8 {d22-d23}, [r2, : 128] + vmlal.s32 q5, d19, d7 + vmlal.s32 q0, d18, d21 + vmlal.s32 q0, d19, d29 + vmlal.s32 q6, d18, d6 + add r2, sp, #496 + vld1.8 {d6-d7}, [r2, : 128] + vmlal.s32 q6, d19, d21 + add r2, sp, #544 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q0, d30, d8 + add r2, sp, #640 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q5, d30, d29 + add r2, sp, #576 + vld1.8 {d24-d25}, [r2, : 128] + vmlal.s32 q1, d30, d28 + vadd.i64 q13, q0, q11 + vadd.i64 q14, q5, q11 + vmlal.s32 q6, d30, d9 + vshr.s64 q4, q13, #26 + vshr.s64 q13, q14, #26 + vadd.i64 q7, q7, q4 + vshl.i64 q4, q4, #26 + vadd.i64 q14, q7, q3 + vadd.i64 q9, q9, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q15, q9, q3 + vsub.i64 q0, q0, q4 + vshr.s64 q4, q14, #25 + vsub.i64 q5, q5, q13 + vshr.s64 q13, q15, #25 + vadd.i64 q6, q6, q4 + vshl.i64 q4, q4, #25 + vadd.i64 q14, q6, q11 + vadd.i64 q2, q2, q13 + vsub.i64 q4, q7, q4 + vshr.s64 q7, q14, #26 + vshl.i64 q13, q13, #25 + vadd.i64 q14, q2, q11 + vadd.i64 q8, q8, q7 + vshl.i64 q7, q7, #26 + vadd.i64 q15, q8, q3 + vsub.i64 q9, q9, q13 + vshr.s64 q13, q14, #26 + vsub.i64 q6, q6, q7 + vshr.s64 q7, q15, #25 + vadd.i64 q10, q10, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q14, q10, q3 + vadd.i64 q1, q1, q7 + add r2, r3, #144 + vshl.i64 q7, q7, #25 + add r4, r3, #96 + vadd.i64 q15, q1, q11 + add r2, r2, #8 + vsub.i64 q2, q2, q13 + add r4, r4, #8 + vshr.s64 q13, q14, #25 + vsub.i64 q7, q8, q7 + vshr.s64 q8, q15, #26 + vadd.i64 q14, q13, q13 + vadd.i64 q12, q12, q8 + vtrn.32 d12, d14 + vshl.i64 q8, q8, #26 + vtrn.32 d13, d15 + vadd.i64 q3, q12, q3 + vadd.i64 q0, q0, q14 + vst1.8 d12, [r2, : 64]! + vshl.i64 q7, q13, #4 + vst1.8 d13, [r4, : 64]! + vsub.i64 q1, q1, q8 + vshr.s64 q3, q3, #25 + vadd.i64 q0, q0, q7 + vadd.i64 q5, q5, q3 + vshl.i64 q3, q3, #25 + vadd.i64 q6, q5, q11 + vadd.i64 q0, q0, q13 + vshl.i64 q7, q13, #25 + vadd.i64 q8, q0, q11 + vsub.i64 q3, q12, q3 + vshr.s64 q6, q6, #26 + vsub.i64 q7, q10, q7 + vtrn.32 d2, d6 + vshr.s64 q8, q8, #26 + vtrn.32 d3, d7 + vadd.i64 q3, q9, q6 + vst1.8 d2, [r2, : 64] + vshl.i64 q6, q6, #26 + vst1.8 d3, [r4, : 64] + vadd.i64 q1, q4, q8 + vtrn.32 d4, d14 + vshl.i64 q4, q8, #26 + vtrn.32 d5, d15 + vsub.i64 q5, q5, q6 + add r2, r2, #16 + vsub.i64 q0, q0, q4 + vst1.8 d4, [r2, : 64] + add r4, r4, #16 + vst1.8 d5, [r4, : 64] + vtrn.32 d10, d6 + vtrn.32 d11, d7 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d0, d2 + vtrn.32 d1, d3 + vst1.8 d10, [r2, : 64] + vst1.8 d11, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d0, [r2, : 64] + vst1.8 d1, [r4, : 64] + add r2, r3, #288 + add r4, r3, #336 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vsub.i32 q0, q0, q1 + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4-d5}, [r4, : 128]! + vsub.i32 q1, q1, q2 + add r5, r3, #240 + vld1.8 {d4}, [r2, : 64] + vld1.8 {d6}, [r4, : 64] + vsub.i32 q2, q2, q3 + vst1.8 {d0-d1}, [r5, : 128]! + vst1.8 {d2-d3}, [r5, : 128]! + vst1.8 d4, [r5, : 64] + add r2, r3, #144 + add r4, r3, #96 + add r5, r3, #144 + add r6, r3, #192 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vsub.i32 q2, q0, q1 + vadd.i32 q0, q0, q1 + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d6-d7}, [r4, : 128]! + vsub.i32 q4, q1, q3 + vadd.i32 q1, q1, q3 + vld1.8 {d6}, [r2, : 64] + vld1.8 {d10}, [r4, : 64] + vsub.i32 q6, q3, q5 + vadd.i32 q3, q3, q5 + vst1.8 {d4-d5}, [r5, : 128]! + vst1.8 {d0-d1}, [r6, : 128]! + vst1.8 {d8-d9}, [r5, : 128]! + vst1.8 {d2-d3}, [r6, : 128]! + vst1.8 d12, [r5, : 64] + vst1.8 d6, [r6, : 64] + add r2, r3, #0 + add r4, r3, #240 + vld1.8 {d0-d1}, [r4, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4}, [r4, : 64] + add r4, r3, #336 + vld1.8 {d6-d7}, [r4, : 128]! + vtrn.32 q0, q3 + vld1.8 {d8-d9}, [r4, : 128]! + vshl.i32 q5, q0, #4 + vtrn.32 q1, q4 + vshl.i32 q6, q3, #4 + vadd.i32 q5, q5, q0 + vadd.i32 q6, q6, q3 + vshl.i32 q7, q1, #4 + vld1.8 {d5}, [r4, : 64] + vshl.i32 q8, q4, #4 + vtrn.32 d4, d5 + vadd.i32 q7, q7, q1 + vadd.i32 q8, q8, q4 + vld1.8 {d18-d19}, [r2, : 128]! + vshl.i32 q10, q2, #4 + vld1.8 {d22-d23}, [r2, : 128]! + vadd.i32 q10, q10, q2 + vld1.8 {d24}, [r2, : 64] + vadd.i32 q5, q5, q0 + add r2, r3, #288 + vld1.8 {d26-d27}, [r2, : 128]! + vadd.i32 q6, q6, q3 + vld1.8 {d28-d29}, [r2, : 128]! + vadd.i32 q8, q8, q4 + vld1.8 {d25}, [r2, : 64] + vadd.i32 q10, q10, q2 + vtrn.32 q9, q13 + vadd.i32 q7, q7, q1 + vadd.i32 q5, q5, q0 + vtrn.32 q11, q14 + vadd.i32 q6, q6, q3 + add r2, sp, #528 + vadd.i32 q10, q10, q2 + vtrn.32 d24, d25 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q6, q13, #1 + vst1.8 {d20-d21}, [r2, : 128]! + vshl.i32 q10, q14, #1 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q15, q12, #1 + vadd.i32 q8, q8, q4 + vext.32 d10, d31, d30, #0 + vadd.i32 q7, q7, q1 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q8, d18, d5 + vmlal.s32 q8, d26, d4 + vmlal.s32 q8, d19, d9 + vmlal.s32 q8, d27, d3 + vmlal.s32 q8, d22, d8 + vmlal.s32 q8, d28, d2 + vmlal.s32 q8, d23, d7 + vmlal.s32 q8, d29, d1 + vmlal.s32 q8, d24, d6 + vmlal.s32 q8, d25, d0 + vst1.8 {d14-d15}, [r2, : 128]! + vmull.s32 q2, d18, d4 + vmlal.s32 q2, d12, d9 + vmlal.s32 q2, d13, d8 + vmlal.s32 q2, d19, d3 + vmlal.s32 q2, d22, d2 + vmlal.s32 q2, d23, d1 + vmlal.s32 q2, d24, d0 + vst1.8 {d20-d21}, [r2, : 128]! + vmull.s32 q7, d18, d9 + vmlal.s32 q7, d26, d3 + vmlal.s32 q7, d19, d8 + vmlal.s32 q7, d27, d2 + vmlal.s32 q7, d22, d7 + vmlal.s32 q7, d28, d1 + vmlal.s32 q7, d23, d6 + vmlal.s32 q7, d29, d0 + vst1.8 {d10-d11}, [r2, : 128]! + vmull.s32 q5, d18, d3 + vmlal.s32 q5, d19, d2 + vmlal.s32 q5, d22, d1 + vmlal.s32 q5, d23, d0 + vmlal.s32 q5, d12, d8 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q4, d18, d8 + vmlal.s32 q4, d26, d2 + vmlal.s32 q4, d19, d7 + vmlal.s32 q4, d27, d1 + vmlal.s32 q4, d22, d6 + vmlal.s32 q4, d28, d0 + vmull.s32 q8, d18, d7 + vmlal.s32 q8, d26, d1 + vmlal.s32 q8, d19, d6 + vmlal.s32 q8, d27, d0 + add r2, sp, #544 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q7, d24, d21 + vmlal.s32 q7, d25, d20 + vmlal.s32 q4, d23, d21 + vmlal.s32 q4, d29, d20 + vmlal.s32 q8, d22, d21 + vmlal.s32 q8, d28, d20 + vmlal.s32 q5, d24, d20 + vst1.8 {d14-d15}, [r2, : 128] + vmull.s32 q7, d18, d6 + vmlal.s32 q7, d26, d0 + add r2, sp, #624 + vld1.8 {d30-d31}, [r2, : 128] + vmlal.s32 q2, d30, d21 + vmlal.s32 q7, d19, d21 + vmlal.s32 q7, d27, d20 + add r2, sp, #592 + vld1.8 {d26-d27}, [r2, : 128] + vmlal.s32 q4, d25, d27 + vmlal.s32 q8, d29, d27 + vmlal.s32 q8, d25, d26 + vmlal.s32 q7, d28, d27 + vmlal.s32 q7, d29, d26 + add r2, sp, #576 + vld1.8 {d28-d29}, [r2, : 128] + vmlal.s32 q4, d24, d29 + vmlal.s32 q8, d23, d29 + vmlal.s32 q8, d24, d28 + vmlal.s32 q7, d22, d29 + vmlal.s32 q7, d23, d28 + vst1.8 {d8-d9}, [r2, : 128] + add r2, sp, #528 + vld1.8 {d8-d9}, [r2, : 128] + vmlal.s32 q7, d24, d9 + vmlal.s32 q7, d25, d31 + vmull.s32 q1, d18, d2 + vmlal.s32 q1, d19, d1 + vmlal.s32 q1, d22, d0 + vmlal.s32 q1, d24, d27 + vmlal.s32 q1, d23, d20 + vmlal.s32 q1, d12, d7 + vmlal.s32 q1, d13, d6 + vmull.s32 q6, d18, d1 + vmlal.s32 q6, d19, d0 + vmlal.s32 q6, d23, d27 + vmlal.s32 q6, d22, d20 + vmlal.s32 q6, d24, d26 + vmull.s32 q0, d18, d0 + vmlal.s32 q0, d22, d27 + vmlal.s32 q0, d23, d26 + vmlal.s32 q0, d24, d31 + vmlal.s32 q0, d19, d20 + add r2, sp, #608 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q2, d18, d7 + vmlal.s32 q5, d18, d6 + vmlal.s32 q1, d18, d21 + vmlal.s32 q0, d18, d28 + vmlal.s32 q6, d18, d29 + vmlal.s32 q2, d19, d6 + vmlal.s32 q5, d19, d21 + vmlal.s32 q1, d19, d29 + vmlal.s32 q0, d19, d9 + vmlal.s32 q6, d19, d28 + add r2, sp, #560 + vld1.8 {d18-d19}, [r2, : 128] + add r2, sp, #480 + vld1.8 {d22-d23}, [r2, : 128] + vmlal.s32 q5, d19, d7 + vmlal.s32 q0, d18, d21 + vmlal.s32 q0, d19, d29 + vmlal.s32 q6, d18, d6 + add r2, sp, #496 + vld1.8 {d6-d7}, [r2, : 128] + vmlal.s32 q6, d19, d21 + add r2, sp, #544 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q0, d30, d8 + add r2, sp, #640 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q5, d30, d29 + add r2, sp, #576 + vld1.8 {d24-d25}, [r2, : 128] + vmlal.s32 q1, d30, d28 + vadd.i64 q13, q0, q11 + vadd.i64 q14, q5, q11 + vmlal.s32 q6, d30, d9 + vshr.s64 q4, q13, #26 + vshr.s64 q13, q14, #26 + vadd.i64 q7, q7, q4 + vshl.i64 q4, q4, #26 + vadd.i64 q14, q7, q3 + vadd.i64 q9, q9, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q15, q9, q3 + vsub.i64 q0, q0, q4 + vshr.s64 q4, q14, #25 + vsub.i64 q5, q5, q13 + vshr.s64 q13, q15, #25 + vadd.i64 q6, q6, q4 + vshl.i64 q4, q4, #25 + vadd.i64 q14, q6, q11 + vadd.i64 q2, q2, q13 + vsub.i64 q4, q7, q4 + vshr.s64 q7, q14, #26 + vshl.i64 q13, q13, #25 + vadd.i64 q14, q2, q11 + vadd.i64 q8, q8, q7 + vshl.i64 q7, q7, #26 + vadd.i64 q15, q8, q3 + vsub.i64 q9, q9, q13 + vshr.s64 q13, q14, #26 + vsub.i64 q6, q6, q7 + vshr.s64 q7, q15, #25 + vadd.i64 q10, q10, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q14, q10, q3 + vadd.i64 q1, q1, q7 + add r2, r3, #288 + vshl.i64 q7, q7, #25 + add r4, r3, #96 + vadd.i64 q15, q1, q11 + add r2, r2, #8 + vsub.i64 q2, q2, q13 + add r4, r4, #8 + vshr.s64 q13, q14, #25 + vsub.i64 q7, q8, q7 + vshr.s64 q8, q15, #26 + vadd.i64 q14, q13, q13 + vadd.i64 q12, q12, q8 + vtrn.32 d12, d14 + vshl.i64 q8, q8, #26 + vtrn.32 d13, d15 + vadd.i64 q3, q12, q3 + vadd.i64 q0, q0, q14 + vst1.8 d12, [r2, : 64]! + vshl.i64 q7, q13, #4 + vst1.8 d13, [r4, : 64]! + vsub.i64 q1, q1, q8 + vshr.s64 q3, q3, #25 + vadd.i64 q0, q0, q7 + vadd.i64 q5, q5, q3 + vshl.i64 q3, q3, #25 + vadd.i64 q6, q5, q11 + vadd.i64 q0, q0, q13 + vshl.i64 q7, q13, #25 + vadd.i64 q8, q0, q11 + vsub.i64 q3, q12, q3 + vshr.s64 q6, q6, #26 + vsub.i64 q7, q10, q7 + vtrn.32 d2, d6 + vshr.s64 q8, q8, #26 + vtrn.32 d3, d7 + vadd.i64 q3, q9, q6 + vst1.8 d2, [r2, : 64] + vshl.i64 q6, q6, #26 + vst1.8 d3, [r4, : 64] + vadd.i64 q1, q4, q8 + vtrn.32 d4, d14 + vshl.i64 q4, q8, #26 + vtrn.32 d5, d15 + vsub.i64 q5, q5, q6 + add r2, r2, #16 + vsub.i64 q0, q0, q4 + vst1.8 d4, [r2, : 64] + add r4, r4, #16 + vst1.8 d5, [r4, : 64] + vtrn.32 d10, d6 + vtrn.32 d11, d7 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d0, d2 + vtrn.32 d1, d3 + vst1.8 d10, [r2, : 64] + vst1.8 d11, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d0, [r2, : 64] + vst1.8 d1, [r4, : 64] + add r2, sp, #512 + add r4, r3, #144 + add r5, r3, #192 + vld1.8 {d0-d1}, [r2, : 128] + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4-d5}, [r5, : 128]! + vzip.i32 q1, q2 + vld1.8 {d6-d7}, [r4, : 128]! + vld1.8 {d8-d9}, [r5, : 128]! + vshl.i32 q5, q1, #1 + vzip.i32 q3, q4 + vshl.i32 q6, q2, #1 + vld1.8 {d14}, [r4, : 64] + vshl.i32 q8, q3, #1 + vld1.8 {d15}, [r5, : 64] + vshl.i32 q9, q4, #1 + vmul.i32 d21, d7, d1 + vtrn.32 d14, d15 + vmul.i32 q11, q4, q0 + vmul.i32 q0, q7, q0 + vmull.s32 q12, d2, d2 + vmlal.s32 q12, d11, d1 + vmlal.s32 q12, d12, d0 + vmlal.s32 q12, d13, d23 + vmlal.s32 q12, d16, d22 + vmlal.s32 q12, d7, d21 + vmull.s32 q10, d2, d11 + vmlal.s32 q10, d4, d1 + vmlal.s32 q10, d13, d0 + vmlal.s32 q10, d6, d23 + vmlal.s32 q10, d17, d22 + vmull.s32 q13, d10, d4 + vmlal.s32 q13, d11, d3 + vmlal.s32 q13, d13, d1 + vmlal.s32 q13, d16, d0 + vmlal.s32 q13, d17, d23 + vmlal.s32 q13, d8, d22 + vmull.s32 q1, d10, d5 + vmlal.s32 q1, d11, d4 + vmlal.s32 q1, d6, d1 + vmlal.s32 q1, d17, d0 + vmlal.s32 q1, d8, d23 + vmull.s32 q14, d10, d6 + vmlal.s32 q14, d11, d13 + vmlal.s32 q14, d4, d4 + vmlal.s32 q14, d17, d1 + vmlal.s32 q14, d18, d0 + vmlal.s32 q14, d9, d23 + vmull.s32 q11, d10, d7 + vmlal.s32 q11, d11, d6 + vmlal.s32 q11, d12, d5 + vmlal.s32 q11, d8, d1 + vmlal.s32 q11, d19, d0 + vmull.s32 q15, d10, d8 + vmlal.s32 q15, d11, d17 + vmlal.s32 q15, d12, d6 + vmlal.s32 q15, d13, d5 + vmlal.s32 q15, d19, d1 + vmlal.s32 q15, d14, d0 + vmull.s32 q2, d10, d9 + vmlal.s32 q2, d11, d8 + vmlal.s32 q2, d12, d7 + vmlal.s32 q2, d13, d6 + vmlal.s32 q2, d14, d1 + vmull.s32 q0, d15, d1 + vmlal.s32 q0, d10, d14 + vmlal.s32 q0, d11, d19 + vmlal.s32 q0, d12, d8 + vmlal.s32 q0, d13, d17 + vmlal.s32 q0, d6, d6 + add r2, sp, #480 + vld1.8 {d18-d19}, [r2, : 128]! + vmull.s32 q3, d16, d7 + vmlal.s32 q3, d10, d15 + vmlal.s32 q3, d11, d14 + vmlal.s32 q3, d12, d9 + vmlal.s32 q3, d13, d8 + vld1.8 {d8-d9}, [r2, : 128] + vadd.i64 q5, q12, q9 + vadd.i64 q6, q15, q9 + vshr.s64 q5, q5, #26 + vshr.s64 q6, q6, #26 + vadd.i64 q7, q10, q5 + vshl.i64 q5, q5, #26 + vadd.i64 q8, q7, q4 + vadd.i64 q2, q2, q6 + vshl.i64 q6, q6, #26 + vadd.i64 q10, q2, q4 + vsub.i64 q5, q12, q5 + vshr.s64 q8, q8, #25 + vsub.i64 q6, q15, q6 + vshr.s64 q10, q10, #25 + vadd.i64 q12, q13, q8 + vshl.i64 q8, q8, #25 + vadd.i64 q13, q12, q9 + vadd.i64 q0, q0, q10 + vsub.i64 q7, q7, q8 + vshr.s64 q8, q13, #26 + vshl.i64 q10, q10, #25 + vadd.i64 q13, q0, q9 + vadd.i64 q1, q1, q8 + vshl.i64 q8, q8, #26 + vadd.i64 q15, q1, q4 + vsub.i64 q2, q2, q10 + vshr.s64 q10, q13, #26 + vsub.i64 q8, q12, q8 + vshr.s64 q12, q15, #25 + vadd.i64 q3, q3, q10 + vshl.i64 q10, q10, #26 + vadd.i64 q13, q3, q4 + vadd.i64 q14, q14, q12 + add r2, r3, #144 + vshl.i64 q12, q12, #25 + add r4, r3, #192 + vadd.i64 q15, q14, q9 + add r2, r2, #8 + vsub.i64 q0, q0, q10 + add r4, r4, #8 + vshr.s64 q10, q13, #25 + vsub.i64 q1, q1, q12 + vshr.s64 q12, q15, #26 + vadd.i64 q13, q10, q10 + vadd.i64 q11, q11, q12 + vtrn.32 d16, d2 + vshl.i64 q12, q12, #26 + vtrn.32 d17, d3 + vadd.i64 q1, q11, q4 + vadd.i64 q4, q5, q13 + vst1.8 d16, [r2, : 64]! + vshl.i64 q5, q10, #4 + vst1.8 d17, [r4, : 64]! + vsub.i64 q8, q14, q12 + vshr.s64 q1, q1, #25 + vadd.i64 q4, q4, q5 + vadd.i64 q5, q6, q1 + vshl.i64 q1, q1, #25 + vadd.i64 q6, q5, q9 + vadd.i64 q4, q4, q10 + vshl.i64 q10, q10, #25 + vadd.i64 q9, q4, q9 + vsub.i64 q1, q11, q1 + vshr.s64 q6, q6, #26 + vsub.i64 q3, q3, q10 + vtrn.32 d16, d2 + vshr.s64 q9, q9, #26 + vtrn.32 d17, d3 + vadd.i64 q1, q2, q6 + vst1.8 d16, [r2, : 64] + vshl.i64 q2, q6, #26 + vst1.8 d17, [r4, : 64] + vadd.i64 q6, q7, q9 + vtrn.32 d0, d6 + vshl.i64 q7, q9, #26 + vtrn.32 d1, d7 + vsub.i64 q2, q5, q2 + add r2, r2, #16 + vsub.i64 q3, q4, q7 + vst1.8 d0, [r2, : 64] + add r4, r4, #16 + vst1.8 d1, [r4, : 64] + vtrn.32 d4, d2 + vtrn.32 d5, d3 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d6, d12 + vtrn.32 d7, d13 + vst1.8 d4, [r2, : 64] + vst1.8 d5, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d6, [r2, : 64] + vst1.8 d7, [r4, : 64] + add r2, r3, #336 + add r4, r3, #288 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vadd.i32 q0, q0, q1 + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4-d5}, [r4, : 128]! + vadd.i32 q1, q1, q2 + add r5, r3, #288 + vld1.8 {d4}, [r2, : 64] + vld1.8 {d6}, [r4, : 64] + vadd.i32 q2, q2, q3 + vst1.8 {d0-d1}, [r5, : 128]! + vst1.8 {d2-d3}, [r5, : 128]! + vst1.8 d4, [r5, : 64] + add r2, r3, #48 + add r4, r3, #144 + vld1.8 {d0-d1}, [r4, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4}, [r4, : 64] + add r4, r3, #288 + vld1.8 {d6-d7}, [r4, : 128]! + vtrn.32 q0, q3 + vld1.8 {d8-d9}, [r4, : 128]! + vshl.i32 q5, q0, #4 + vtrn.32 q1, q4 + vshl.i32 q6, q3, #4 + vadd.i32 q5, q5, q0 + vadd.i32 q6, q6, q3 + vshl.i32 q7, q1, #4 + vld1.8 {d5}, [r4, : 64] + vshl.i32 q8, q4, #4 + vtrn.32 d4, d5 + vadd.i32 q7, q7, q1 + vadd.i32 q8, q8, q4 + vld1.8 {d18-d19}, [r2, : 128]! + vshl.i32 q10, q2, #4 + vld1.8 {d22-d23}, [r2, : 128]! + vadd.i32 q10, q10, q2 + vld1.8 {d24}, [r2, : 64] + vadd.i32 q5, q5, q0 + add r2, r3, #240 + vld1.8 {d26-d27}, [r2, : 128]! + vadd.i32 q6, q6, q3 + vld1.8 {d28-d29}, [r2, : 128]! + vadd.i32 q8, q8, q4 + vld1.8 {d25}, [r2, : 64] + vadd.i32 q10, q10, q2 + vtrn.32 q9, q13 + vadd.i32 q7, q7, q1 + vadd.i32 q5, q5, q0 + vtrn.32 q11, q14 + vadd.i32 q6, q6, q3 + add r2, sp, #528 + vadd.i32 q10, q10, q2 + vtrn.32 d24, d25 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q6, q13, #1 + vst1.8 {d20-d21}, [r2, : 128]! + vshl.i32 q10, q14, #1 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q15, q12, #1 + vadd.i32 q8, q8, q4 + vext.32 d10, d31, d30, #0 + vadd.i32 q7, q7, q1 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q8, d18, d5 + vmlal.s32 q8, d26, d4 + vmlal.s32 q8, d19, d9 + vmlal.s32 q8, d27, d3 + vmlal.s32 q8, d22, d8 + vmlal.s32 q8, d28, d2 + vmlal.s32 q8, d23, d7 + vmlal.s32 q8, d29, d1 + vmlal.s32 q8, d24, d6 + vmlal.s32 q8, d25, d0 + vst1.8 {d14-d15}, [r2, : 128]! + vmull.s32 q2, d18, d4 + vmlal.s32 q2, d12, d9 + vmlal.s32 q2, d13, d8 + vmlal.s32 q2, d19, d3 + vmlal.s32 q2, d22, d2 + vmlal.s32 q2, d23, d1 + vmlal.s32 q2, d24, d0 + vst1.8 {d20-d21}, [r2, : 128]! + vmull.s32 q7, d18, d9 + vmlal.s32 q7, d26, d3 + vmlal.s32 q7, d19, d8 + vmlal.s32 q7, d27, d2 + vmlal.s32 q7, d22, d7 + vmlal.s32 q7, d28, d1 + vmlal.s32 q7, d23, d6 + vmlal.s32 q7, d29, d0 + vst1.8 {d10-d11}, [r2, : 128]! + vmull.s32 q5, d18, d3 + vmlal.s32 q5, d19, d2 + vmlal.s32 q5, d22, d1 + vmlal.s32 q5, d23, d0 + vmlal.s32 q5, d12, d8 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q4, d18, d8 + vmlal.s32 q4, d26, d2 + vmlal.s32 q4, d19, d7 + vmlal.s32 q4, d27, d1 + vmlal.s32 q4, d22, d6 + vmlal.s32 q4, d28, d0 + vmull.s32 q8, d18, d7 + vmlal.s32 q8, d26, d1 + vmlal.s32 q8, d19, d6 + vmlal.s32 q8, d27, d0 + add r2, sp, #544 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q7, d24, d21 + vmlal.s32 q7, d25, d20 + vmlal.s32 q4, d23, d21 + vmlal.s32 q4, d29, d20 + vmlal.s32 q8, d22, d21 + vmlal.s32 q8, d28, d20 + vmlal.s32 q5, d24, d20 + vst1.8 {d14-d15}, [r2, : 128] + vmull.s32 q7, d18, d6 + vmlal.s32 q7, d26, d0 + add r2, sp, #624 + vld1.8 {d30-d31}, [r2, : 128] + vmlal.s32 q2, d30, d21 + vmlal.s32 q7, d19, d21 + vmlal.s32 q7, d27, d20 + add r2, sp, #592 + vld1.8 {d26-d27}, [r2, : 128] + vmlal.s32 q4, d25, d27 + vmlal.s32 q8, d29, d27 + vmlal.s32 q8, d25, d26 + vmlal.s32 q7, d28, d27 + vmlal.s32 q7, d29, d26 + add r2, sp, #576 + vld1.8 {d28-d29}, [r2, : 128] + vmlal.s32 q4, d24, d29 + vmlal.s32 q8, d23, d29 + vmlal.s32 q8, d24, d28 + vmlal.s32 q7, d22, d29 + vmlal.s32 q7, d23, d28 + vst1.8 {d8-d9}, [r2, : 128] + add r2, sp, #528 + vld1.8 {d8-d9}, [r2, : 128] + vmlal.s32 q7, d24, d9 + vmlal.s32 q7, d25, d31 + vmull.s32 q1, d18, d2 + vmlal.s32 q1, d19, d1 + vmlal.s32 q1, d22, d0 + vmlal.s32 q1, d24, d27 + vmlal.s32 q1, d23, d20 + vmlal.s32 q1, d12, d7 + vmlal.s32 q1, d13, d6 + vmull.s32 q6, d18, d1 + vmlal.s32 q6, d19, d0 + vmlal.s32 q6, d23, d27 + vmlal.s32 q6, d22, d20 + vmlal.s32 q6, d24, d26 + vmull.s32 q0, d18, d0 + vmlal.s32 q0, d22, d27 + vmlal.s32 q0, d23, d26 + vmlal.s32 q0, d24, d31 + vmlal.s32 q0, d19, d20 + add r2, sp, #608 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q2, d18, d7 + vmlal.s32 q5, d18, d6 + vmlal.s32 q1, d18, d21 + vmlal.s32 q0, d18, d28 + vmlal.s32 q6, d18, d29 + vmlal.s32 q2, d19, d6 + vmlal.s32 q5, d19, d21 + vmlal.s32 q1, d19, d29 + vmlal.s32 q0, d19, d9 + vmlal.s32 q6, d19, d28 + add r2, sp, #560 + vld1.8 {d18-d19}, [r2, : 128] + add r2, sp, #480 + vld1.8 {d22-d23}, [r2, : 128] + vmlal.s32 q5, d19, d7 + vmlal.s32 q0, d18, d21 + vmlal.s32 q0, d19, d29 + vmlal.s32 q6, d18, d6 + add r2, sp, #496 + vld1.8 {d6-d7}, [r2, : 128] + vmlal.s32 q6, d19, d21 + add r2, sp, #544 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q0, d30, d8 + add r2, sp, #640 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q5, d30, d29 + add r2, sp, #576 + vld1.8 {d24-d25}, [r2, : 128] + vmlal.s32 q1, d30, d28 + vadd.i64 q13, q0, q11 + vadd.i64 q14, q5, q11 + vmlal.s32 q6, d30, d9 + vshr.s64 q4, q13, #26 + vshr.s64 q13, q14, #26 + vadd.i64 q7, q7, q4 + vshl.i64 q4, q4, #26 + vadd.i64 q14, q7, q3 + vadd.i64 q9, q9, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q15, q9, q3 + vsub.i64 q0, q0, q4 + vshr.s64 q4, q14, #25 + vsub.i64 q5, q5, q13 + vshr.s64 q13, q15, #25 + vadd.i64 q6, q6, q4 + vshl.i64 q4, q4, #25 + vadd.i64 q14, q6, q11 + vadd.i64 q2, q2, q13 + vsub.i64 q4, q7, q4 + vshr.s64 q7, q14, #26 + vshl.i64 q13, q13, #25 + vadd.i64 q14, q2, q11 + vadd.i64 q8, q8, q7 + vshl.i64 q7, q7, #26 + vadd.i64 q15, q8, q3 + vsub.i64 q9, q9, q13 + vshr.s64 q13, q14, #26 + vsub.i64 q6, q6, q7 + vshr.s64 q7, q15, #25 + vadd.i64 q10, q10, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q14, q10, q3 + vadd.i64 q1, q1, q7 + add r2, r3, #240 + vshl.i64 q7, q7, #25 + add r4, r3, #144 + vadd.i64 q15, q1, q11 + add r2, r2, #8 + vsub.i64 q2, q2, q13 + add r4, r4, #8 + vshr.s64 q13, q14, #25 + vsub.i64 q7, q8, q7 + vshr.s64 q8, q15, #26 + vadd.i64 q14, q13, q13 + vadd.i64 q12, q12, q8 + vtrn.32 d12, d14 + vshl.i64 q8, q8, #26 + vtrn.32 d13, d15 + vadd.i64 q3, q12, q3 + vadd.i64 q0, q0, q14 + vst1.8 d12, [r2, : 64]! + vshl.i64 q7, q13, #4 + vst1.8 d13, [r4, : 64]! + vsub.i64 q1, q1, q8 + vshr.s64 q3, q3, #25 + vadd.i64 q0, q0, q7 + vadd.i64 q5, q5, q3 + vshl.i64 q3, q3, #25 + vadd.i64 q6, q5, q11 + vadd.i64 q0, q0, q13 + vshl.i64 q7, q13, #25 + vadd.i64 q8, q0, q11 + vsub.i64 q3, q12, q3 + vshr.s64 q6, q6, #26 + vsub.i64 q7, q10, q7 + vtrn.32 d2, d6 + vshr.s64 q8, q8, #26 + vtrn.32 d3, d7 + vadd.i64 q3, q9, q6 + vst1.8 d2, [r2, : 64] + vshl.i64 q6, q6, #26 + vst1.8 d3, [r4, : 64] + vadd.i64 q1, q4, q8 + vtrn.32 d4, d14 + vshl.i64 q4, q8, #26 + vtrn.32 d5, d15 + vsub.i64 q5, q5, q6 + add r2, r2, #16 + vsub.i64 q0, q0, q4 + vst1.8 d4, [r2, : 64] + add r4, r4, #16 + vst1.8 d5, [r4, : 64] + vtrn.32 d10, d6 + vtrn.32 d11, d7 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d0, d2 + vtrn.32 d1, d3 + vst1.8 d10, [r2, : 64] + vst1.8 d11, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d0, [r2, : 64] + vst1.8 d1, [r4, : 64] + ldr r2, [sp, #456] + ldr r4, [sp, #460] + subs r5, r2, #1 + bge .Lmainloop + add r1, r3, #144 + add r2, r3, #336 + vld1.8 {d0-d1}, [r1, : 128]! + vld1.8 {d2-d3}, [r1, : 128]! + vld1.8 {d4}, [r1, : 64] + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 {d2-d3}, [r2, : 128]! + vst1.8 d4, [r2, : 64] + movw r1, #0 +.Linvertloop: + add r2, r3, #144 + movw r4, #0 + movw r5, #2 + cmp r1, #1 + moveq r5, #1 + addeq r2, r3, #336 + addeq r4, r3, #48 + cmp r1, #2 + moveq r5, #1 + addeq r2, r3, #48 + cmp r1, #3 + moveq r5, #5 + addeq r4, r3, #336 + cmp r1, #4 + moveq r5, #10 + cmp r1, #5 + moveq r5, #20 + cmp r1, #6 + moveq r5, #10 + addeq r2, r3, #336 + addeq r4, r3, #336 + cmp r1, #7 + moveq r5, #50 + cmp r1, #8 + moveq r5, #100 + cmp r1, #9 + moveq r5, #50 + addeq r2, r3, #336 + cmp r1, #10 + moveq r5, #5 + addeq r2, r3, #48 + cmp r1, #11 + moveq r5, #0 + addeq r2, r3, #96 + add r6, r3, #144 + add r7, r3, #288 + vld1.8 {d0-d1}, [r6, : 128]! + vld1.8 {d2-d3}, [r6, : 128]! + vld1.8 {d4}, [r6, : 64] + vst1.8 {d0-d1}, [r7, : 128]! + vst1.8 {d2-d3}, [r7, : 128]! + vst1.8 d4, [r7, : 64] + cmp r5, #0 + beq .Lskipsquaringloop +.Lsquaringloop: + add r6, r3, #288 + add r7, r3, #288 + add r8, r3, #288 + vmov.i32 q0, #19 + vmov.i32 q1, #0 + vmov.i32 q2, #1 + vzip.i32 q1, q2 + vld1.8 {d4-d5}, [r7, : 128]! + vld1.8 {d6-d7}, [r7, : 128]! + vld1.8 {d9}, [r7, : 64] + vld1.8 {d10-d11}, [r6, : 128]! + add r7, sp, #384 + vld1.8 {d12-d13}, [r6, : 128]! + vmul.i32 q7, q2, q0 + vld1.8 {d8}, [r6, : 64] + vext.32 d17, d11, d10, #1 + vmul.i32 q9, q3, q0 + vext.32 d16, d10, d8, #1 + vshl.u32 q10, q5, q1 + vext.32 d22, d14, d4, #1 + vext.32 d24, d18, d6, #1 + vshl.u32 q13, q6, q1 + vshl.u32 d28, d8, d2 + vrev64.i32 d22, d22 + vmul.i32 d1, d9, d1 + vrev64.i32 d24, d24 + vext.32 d29, d8, d13, #1 + vext.32 d0, d1, d9, #1 + vrev64.i32 d0, d0 + vext.32 d2, d9, d1, #1 + vext.32 d23, d15, d5, #1 + vmull.s32 q4, d20, d4 + vrev64.i32 d23, d23 + vmlal.s32 q4, d21, d1 + vrev64.i32 d2, d2 + vmlal.s32 q4, d26, d19 + vext.32 d3, d5, d15, #1 + vmlal.s32 q4, d27, d18 + vrev64.i32 d3, d3 + vmlal.s32 q4, d28, d15 + vext.32 d14, d12, d11, #1 + vmull.s32 q5, d16, d23 + vext.32 d15, d13, d12, #1 + vmlal.s32 q5, d17, d4 + vst1.8 d8, [r7, : 64]! + vmlal.s32 q5, d14, d1 + vext.32 d12, d9, d8, #0 + vmlal.s32 q5, d15, d19 + vmov.i64 d13, #0 + vmlal.s32 q5, d29, d18 + vext.32 d25, d19, d7, #1 + vmlal.s32 q6, d20, d5 + vrev64.i32 d25, d25 + vmlal.s32 q6, d21, d4 + vst1.8 d11, [r7, : 64]! + vmlal.s32 q6, d26, d1 + vext.32 d9, d10, d10, #0 + vmlal.s32 q6, d27, d19 + vmov.i64 d8, #0 + vmlal.s32 q6, d28, d18 + vmlal.s32 q4, d16, d24 + vmlal.s32 q4, d17, d5 + vmlal.s32 q4, d14, d4 + vst1.8 d12, [r7, : 64]! + vmlal.s32 q4, d15, d1 + vext.32 d10, d13, d12, #0 + vmlal.s32 q4, d29, d19 + vmov.i64 d11, #0 + vmlal.s32 q5, d20, d6 + vmlal.s32 q5, d21, d5 + vmlal.s32 q5, d26, d4 + vext.32 d13, d8, d8, #0 + vmlal.s32 q5, d27, d1 + vmov.i64 d12, #0 + vmlal.s32 q5, d28, d19 + vst1.8 d9, [r7, : 64]! + vmlal.s32 q6, d16, d25 + vmlal.s32 q6, d17, d6 + vst1.8 d10, [r7, : 64] + vmlal.s32 q6, d14, d5 + vext.32 d8, d11, d10, #0 + vmlal.s32 q6, d15, d4 + vmov.i64 d9, #0 + vmlal.s32 q6, d29, d1 + vmlal.s32 q4, d20, d7 + vmlal.s32 q4, d21, d6 + vmlal.s32 q4, d26, d5 + vext.32 d11, d12, d12, #0 + vmlal.s32 q4, d27, d4 + vmov.i64 d10, #0 + vmlal.s32 q4, d28, d1 + vmlal.s32 q5, d16, d0 + sub r6, r7, #32 + vmlal.s32 q5, d17, d7 + vmlal.s32 q5, d14, d6 + vext.32 d30, d9, d8, #0 + vmlal.s32 q5, d15, d5 + vld1.8 {d31}, [r6, : 64]! + vmlal.s32 q5, d29, d4 + vmlal.s32 q15, d20, d0 + vext.32 d0, d6, d18, #1 + vmlal.s32 q15, d21, d25 + vrev64.i32 d0, d0 + vmlal.s32 q15, d26, d24 + vext.32 d1, d7, d19, #1 + vext.32 d7, d10, d10, #0 + vmlal.s32 q15, d27, d23 + vrev64.i32 d1, d1 + vld1.8 {d6}, [r6, : 64] + vmlal.s32 q15, d28, d22 + vmlal.s32 q3, d16, d4 + add r6, r6, #24 + vmlal.s32 q3, d17, d2 + vext.32 d4, d31, d30, #0 + vmov d17, d11 + vmlal.s32 q3, d14, d1 + vext.32 d11, d13, d13, #0 + vext.32 d13, d30, d30, #0 + vmlal.s32 q3, d15, d0 + vext.32 d1, d8, d8, #0 + vmlal.s32 q3, d29, d3 + vld1.8 {d5}, [r6, : 64] + sub r6, r6, #16 + vext.32 d10, d6, d6, #0 + vmov.i32 q1, #0xffffffff + vshl.i64 q4, q1, #25 + add r7, sp, #480 + vld1.8 {d14-d15}, [r7, : 128] + vadd.i64 q9, q2, q7 + vshl.i64 q1, q1, #26 + vshr.s64 q10, q9, #26 + vld1.8 {d0}, [r6, : 64]! + vadd.i64 q5, q5, q10 + vand q9, q9, q1 + vld1.8 {d16}, [r6, : 64]! + add r6, sp, #496 + vld1.8 {d20-d21}, [r6, : 128] + vadd.i64 q11, q5, q10 + vsub.i64 q2, q2, q9 + vshr.s64 q9, q11, #25 + vext.32 d12, d5, d4, #0 + vand q11, q11, q4 + vadd.i64 q0, q0, q9 + vmov d19, d7 + vadd.i64 q3, q0, q7 + vsub.i64 q5, q5, q11 + vshr.s64 q11, q3, #26 + vext.32 d18, d11, d10, #0 + vand q3, q3, q1 + vadd.i64 q8, q8, q11 + vadd.i64 q11, q8, q10 + vsub.i64 q0, q0, q3 + vshr.s64 q3, q11, #25 + vand q11, q11, q4 + vadd.i64 q3, q6, q3 + vadd.i64 q6, q3, q7 + vsub.i64 q8, q8, q11 + vshr.s64 q11, q6, #26 + vand q6, q6, q1 + vadd.i64 q9, q9, q11 + vadd.i64 d25, d19, d21 + vsub.i64 q3, q3, q6 + vshr.s64 d23, d25, #25 + vand q4, q12, q4 + vadd.i64 d21, d23, d23 + vshl.i64 d25, d23, #4 + vadd.i64 d21, d21, d23 + vadd.i64 d25, d25, d21 + vadd.i64 d4, d4, d25 + vzip.i32 q0, q8 + vadd.i64 d12, d4, d14 + add r6, r8, #8 + vst1.8 d0, [r6, : 64] + vsub.i64 d19, d19, d9 + add r6, r6, #16 + vst1.8 d16, [r6, : 64] + vshr.s64 d22, d12, #26 + vand q0, q6, q1 + vadd.i64 d10, d10, d22 + vzip.i32 q3, q9 + vsub.i64 d4, d4, d0 + sub r6, r6, #8 + vst1.8 d6, [r6, : 64] + add r6, r6, #16 + vst1.8 d18, [r6, : 64] + vzip.i32 q2, q5 + sub r6, r6, #32 + vst1.8 d4, [r6, : 64] + subs r5, r5, #1 + bhi .Lsquaringloop +.Lskipsquaringloop: + mov r2, r2 + add r5, r3, #288 + add r6, r3, #144 + vmov.i32 q0, #19 + vmov.i32 q1, #0 + vmov.i32 q2, #1 + vzip.i32 q1, q2 + vld1.8 {d4-d5}, [r5, : 128]! + vld1.8 {d6-d7}, [r5, : 128]! + vld1.8 {d9}, [r5, : 64] + vld1.8 {d10-d11}, [r2, : 128]! + add r5, sp, #384 + vld1.8 {d12-d13}, [r2, : 128]! + vmul.i32 q7, q2, q0 + vld1.8 {d8}, [r2, : 64] + vext.32 d17, d11, d10, #1 + vmul.i32 q9, q3, q0 + vext.32 d16, d10, d8, #1 + vshl.u32 q10, q5, q1 + vext.32 d22, d14, d4, #1 + vext.32 d24, d18, d6, #1 + vshl.u32 q13, q6, q1 + vshl.u32 d28, d8, d2 + vrev64.i32 d22, d22 + vmul.i32 d1, d9, d1 + vrev64.i32 d24, d24 + vext.32 d29, d8, d13, #1 + vext.32 d0, d1, d9, #1 + vrev64.i32 d0, d0 + vext.32 d2, d9, d1, #1 + vext.32 d23, d15, d5, #1 + vmull.s32 q4, d20, d4 + vrev64.i32 d23, d23 + vmlal.s32 q4, d21, d1 + vrev64.i32 d2, d2 + vmlal.s32 q4, d26, d19 + vext.32 d3, d5, d15, #1 + vmlal.s32 q4, d27, d18 + vrev64.i32 d3, d3 + vmlal.s32 q4, d28, d15 + vext.32 d14, d12, d11, #1 + vmull.s32 q5, d16, d23 + vext.32 d15, d13, d12, #1 + vmlal.s32 q5, d17, d4 + vst1.8 d8, [r5, : 64]! + vmlal.s32 q5, d14, d1 + vext.32 d12, d9, d8, #0 + vmlal.s32 q5, d15, d19 + vmov.i64 d13, #0 + vmlal.s32 q5, d29, d18 + vext.32 d25, d19, d7, #1 + vmlal.s32 q6, d20, d5 + vrev64.i32 d25, d25 + vmlal.s32 q6, d21, d4 + vst1.8 d11, [r5, : 64]! + vmlal.s32 q6, d26, d1 + vext.32 d9, d10, d10, #0 + vmlal.s32 q6, d27, d19 + vmov.i64 d8, #0 + vmlal.s32 q6, d28, d18 + vmlal.s32 q4, d16, d24 + vmlal.s32 q4, d17, d5 + vmlal.s32 q4, d14, d4 + vst1.8 d12, [r5, : 64]! + vmlal.s32 q4, d15, d1 + vext.32 d10, d13, d12, #0 + vmlal.s32 q4, d29, d19 + vmov.i64 d11, #0 + vmlal.s32 q5, d20, d6 + vmlal.s32 q5, d21, d5 + vmlal.s32 q5, d26, d4 + vext.32 d13, d8, d8, #0 + vmlal.s32 q5, d27, d1 + vmov.i64 d12, #0 + vmlal.s32 q5, d28, d19 + vst1.8 d9, [r5, : 64]! + vmlal.s32 q6, d16, d25 + vmlal.s32 q6, d17, d6 + vst1.8 d10, [r5, : 64] + vmlal.s32 q6, d14, d5 + vext.32 d8, d11, d10, #0 + vmlal.s32 q6, d15, d4 + vmov.i64 d9, #0 + vmlal.s32 q6, d29, d1 + vmlal.s32 q4, d20, d7 + vmlal.s32 q4, d21, d6 + vmlal.s32 q4, d26, d5 + vext.32 d11, d12, d12, #0 + vmlal.s32 q4, d27, d4 + vmov.i64 d10, #0 + vmlal.s32 q4, d28, d1 + vmlal.s32 q5, d16, d0 + sub r2, r5, #32 + vmlal.s32 q5, d17, d7 + vmlal.s32 q5, d14, d6 + vext.32 d30, d9, d8, #0 + vmlal.s32 q5, d15, d5 + vld1.8 {d31}, [r2, : 64]! + vmlal.s32 q5, d29, d4 + vmlal.s32 q15, d20, d0 + vext.32 d0, d6, d18, #1 + vmlal.s32 q15, d21, d25 + vrev64.i32 d0, d0 + vmlal.s32 q15, d26, d24 + vext.32 d1, d7, d19, #1 + vext.32 d7, d10, d10, #0 + vmlal.s32 q15, d27, d23 + vrev64.i32 d1, d1 + vld1.8 {d6}, [r2, : 64] + vmlal.s32 q15, d28, d22 + vmlal.s32 q3, d16, d4 + add r2, r2, #24 + vmlal.s32 q3, d17, d2 + vext.32 d4, d31, d30, #0 + vmov d17, d11 + vmlal.s32 q3, d14, d1 + vext.32 d11, d13, d13, #0 + vext.32 d13, d30, d30, #0 + vmlal.s32 q3, d15, d0 + vext.32 d1, d8, d8, #0 + vmlal.s32 q3, d29, d3 + vld1.8 {d5}, [r2, : 64] + sub r2, r2, #16 + vext.32 d10, d6, d6, #0 + vmov.i32 q1, #0xffffffff + vshl.i64 q4, q1, #25 + add r5, sp, #480 + vld1.8 {d14-d15}, [r5, : 128] + vadd.i64 q9, q2, q7 + vshl.i64 q1, q1, #26 + vshr.s64 q10, q9, #26 + vld1.8 {d0}, [r2, : 64]! + vadd.i64 q5, q5, q10 + vand q9, q9, q1 + vld1.8 {d16}, [r2, : 64]! + add r2, sp, #496 + vld1.8 {d20-d21}, [r2, : 128] + vadd.i64 q11, q5, q10 + vsub.i64 q2, q2, q9 + vshr.s64 q9, q11, #25 + vext.32 d12, d5, d4, #0 + vand q11, q11, q4 + vadd.i64 q0, q0, q9 + vmov d19, d7 + vadd.i64 q3, q0, q7 + vsub.i64 q5, q5, q11 + vshr.s64 q11, q3, #26 + vext.32 d18, d11, d10, #0 + vand q3, q3, q1 + vadd.i64 q8, q8, q11 + vadd.i64 q11, q8, q10 + vsub.i64 q0, q0, q3 + vshr.s64 q3, q11, #25 + vand q11, q11, q4 + vadd.i64 q3, q6, q3 + vadd.i64 q6, q3, q7 + vsub.i64 q8, q8, q11 + vshr.s64 q11, q6, #26 + vand q6, q6, q1 + vadd.i64 q9, q9, q11 + vadd.i64 d25, d19, d21 + vsub.i64 q3, q3, q6 + vshr.s64 d23, d25, #25 + vand q4, q12, q4 + vadd.i64 d21, d23, d23 + vshl.i64 d25, d23, #4 + vadd.i64 d21, d21, d23 + vadd.i64 d25, d25, d21 + vadd.i64 d4, d4, d25 + vzip.i32 q0, q8 + vadd.i64 d12, d4, d14 + add r2, r6, #8 + vst1.8 d0, [r2, : 64] + vsub.i64 d19, d19, d9 + add r2, r2, #16 + vst1.8 d16, [r2, : 64] + vshr.s64 d22, d12, #26 + vand q0, q6, q1 + vadd.i64 d10, d10, d22 + vzip.i32 q3, q9 + vsub.i64 d4, d4, d0 + sub r2, r2, #8 + vst1.8 d6, [r2, : 64] + add r2, r2, #16 + vst1.8 d18, [r2, : 64] + vzip.i32 q2, q5 + sub r2, r2, #32 + vst1.8 d4, [r2, : 64] + cmp r4, #0 + beq .Lskippostcopy + add r2, r3, #144 + mov r4, r4 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4}, [r2, : 64] + vst1.8 {d0-d1}, [r4, : 128]! + vst1.8 {d2-d3}, [r4, : 128]! + vst1.8 d4, [r4, : 64] +.Lskippostcopy: + cmp r1, #1 + bne .Lskipfinalcopy + add r2, r3, #288 + add r4, r3, #144 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4}, [r2, : 64] + vst1.8 {d0-d1}, [r4, : 128]! + vst1.8 {d2-d3}, [r4, : 128]! + vst1.8 d4, [r4, : 64] +.Lskipfinalcopy: + add r1, r1, #1 + cmp r1, #12 + blo .Linvertloop + add r1, r3, #144 + ldr r2, [r1], #4 + ldr r3, [r1], #4 + ldr r4, [r1], #4 + ldr r5, [r1], #4 + ldr r6, [r1], #4 + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + ldr r1, [r1] + add r11, r1, r1, LSL #4 + add r11, r11, r1, LSL #1 + add r11, r11, #16777216 + mov r11, r11, ASR #25 + add r11, r11, r2 + mov r11, r11, ASR #26 + add r11, r11, r3 + mov r11, r11, ASR #25 + add r11, r11, r4 + mov r11, r11, ASR #26 + add r11, r11, r5 + mov r11, r11, ASR #25 + add r11, r11, r6 + mov r11, r11, ASR #26 + add r11, r11, r7 + mov r11, r11, ASR #25 + add r11, r11, r8 + mov r11, r11, ASR #26 + add r11, r11, r9 + mov r11, r11, ASR #25 + add r11, r11, r10 + mov r11, r11, ASR #26 + add r11, r11, r1 + mov r11, r11, ASR #25 + add r2, r2, r11 + add r2, r2, r11, LSL #1 + add r2, r2, r11, LSL #4 + mov r11, r2, ASR #26 + add r3, r3, r11 + sub r2, r2, r11, LSL #26 + mov r11, r3, ASR #25 + add r4, r4, r11 + sub r3, r3, r11, LSL #25 + mov r11, r4, ASR #26 + add r5, r5, r11 + sub r4, r4, r11, LSL #26 + mov r11, r5, ASR #25 + add r6, r6, r11 + sub r5, r5, r11, LSL #25 + mov r11, r6, ASR #26 + add r7, r7, r11 + sub r6, r6, r11, LSL #26 + mov r11, r7, ASR #25 + add r8, r8, r11 + sub r7, r7, r11, LSL #25 + mov r11, r8, ASR #26 + add r9, r9, r11 + sub r8, r8, r11, LSL #26 + mov r11, r9, ASR #25 + add r10, r10, r11 + sub r9, r9, r11, LSL #25 + mov r11, r10, ASR #26 + add r1, r1, r11 + sub r10, r10, r11, LSL #26 + mov r11, r1, ASR #25 + sub r1, r1, r11, LSL #25 + add r2, r2, r3, LSL #26 + mov r3, r3, LSR #6 + add r3, r3, r4, LSL #19 + mov r4, r4, LSR #13 + add r4, r4, r5, LSL #13 + mov r5, r5, LSR #19 + add r5, r5, r6, LSL #6 + add r6, r7, r8, LSL #25 + mov r7, r8, LSR #7 + add r7, r7, r9, LSL #19 + mov r8, r9, LSR #13 + add r8, r8, r10, LSL #12 + mov r9, r10, LSR #20 + add r1, r9, r1, LSL #6 + str r2, [r0] + str r3, [r0, #4] + str r4, [r0, #8] + str r5, [r0, #12] + str r6, [r0, #16] + str r7, [r0, #20] + str r8, [r0, #24] + str r1, [r0, #28] + movw r0, #0 + mov sp, ip + pop {r4-r11, pc} +SYM_FUNC_END(curve25519_neon) +#endif diff --git a/net/wireguard/crypto/zinc/curve25519/curve25519-fiat32.c b/net/wireguard/crypto/zinc/curve25519/curve25519-fiat32.c new file mode 100644 index 000000000000..42cfb6c00f98 --- /dev/null +++ b/net/wireguard/crypto/zinc/curve25519/curve25519-fiat32.c @@ -0,0 +1,860 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2016 The fiat-crypto Authors. + * Copyright (C) 2018-2019 Jason A. Donenfeld . All Rights Reserved. + * + * This is a machine-generated formally verified implementation of Curve25519 + * ECDH from: . Though originally + * machine generated, it has been tweaked to be suitable for use in the kernel. + * It is optimized for 32-bit machines and machines that cannot work efficiently + * with 128-bit integer types. + */ + +/* fe means field element. Here the field is \Z/(2^255-19). An element t, + * entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77 + * t[3]+2^102 t[4]+...+2^230 t[9]. + * fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc. + * Multiplication and carrying produce fe from fe_loose. + */ +typedef struct fe { u32 v[10]; } fe; + +/* fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc + * Addition and subtraction produce fe_loose from (fe, fe). + */ +typedef struct fe_loose { u32 v[10]; } fe_loose; + +static __always_inline void fe_frombytes_impl(u32 h[10], const u8 *s) +{ + /* Ignores top bit of s. */ + u32 a0 = get_unaligned_le32(s); + u32 a1 = get_unaligned_le32(s+4); + u32 a2 = get_unaligned_le32(s+8); + u32 a3 = get_unaligned_le32(s+12); + u32 a4 = get_unaligned_le32(s+16); + u32 a5 = get_unaligned_le32(s+20); + u32 a6 = get_unaligned_le32(s+24); + u32 a7 = get_unaligned_le32(s+28); + h[0] = a0&((1<<26)-1); /* 26 used, 32-26 left. 26 */ + h[1] = (a0>>26) | ((a1&((1<<19)-1))<< 6); /* (32-26) + 19 = 6+19 = 25 */ + h[2] = (a1>>19) | ((a2&((1<<13)-1))<<13); /* (32-19) + 13 = 13+13 = 26 */ + h[3] = (a2>>13) | ((a3&((1<< 6)-1))<<19); /* (32-13) + 6 = 19+ 6 = 25 */ + h[4] = (a3>> 6); /* (32- 6) = 26 */ + h[5] = a4&((1<<25)-1); /* 25 */ + h[6] = (a4>>25) | ((a5&((1<<19)-1))<< 7); /* (32-25) + 19 = 7+19 = 26 */ + h[7] = (a5>>19) | ((a6&((1<<12)-1))<<13); /* (32-19) + 12 = 13+12 = 25 */ + h[8] = (a6>>12) | ((a7&((1<< 6)-1))<<20); /* (32-12) + 6 = 20+ 6 = 26 */ + h[9] = (a7>> 6)&((1<<25)-1); /* 25 */ +} + +static __always_inline void fe_frombytes(fe *h, const u8 *s) +{ + fe_frombytes_impl(h->v, s); +} + +static __always_inline u8 /*bool*/ +addcarryx_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low) +{ + /* This function extracts 25 bits of result and 1 bit of carry + * (26 total), so a 32-bit intermediate is sufficient. + */ + u32 x = a + b + c; + *low = x & ((1 << 25) - 1); + return (x >> 25) & 1; +} + +static __always_inline u8 /*bool*/ +addcarryx_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low) +{ + /* This function extracts 26 bits of result and 1 bit of carry + * (27 total), so a 32-bit intermediate is sufficient. + */ + u32 x = a + b + c; + *low = x & ((1 << 26) - 1); + return (x >> 26) & 1; +} + +static __always_inline u8 /*bool*/ +subborrow_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low) +{ + /* This function extracts 25 bits of result and 1 bit of borrow + * (26 total), so a 32-bit intermediate is sufficient. + */ + u32 x = a - b - c; + *low = x & ((1 << 25) - 1); + return x >> 31; +} + +static __always_inline u8 /*bool*/ +subborrow_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low) +{ + /* This function extracts 26 bits of result and 1 bit of borrow + *(27 total), so a 32-bit intermediate is sufficient. + */ + u32 x = a - b - c; + *low = x & ((1 << 26) - 1); + return x >> 31; +} + +static __always_inline u32 cmovznz32(u32 t, u32 z, u32 nz) +{ + t = -!!t; /* all set if nonzero, 0 if 0 */ + return (t&nz) | ((~t)&z); +} + +static __always_inline void fe_freeze(u32 out[10], const u32 in1[10]) +{ + { const u32 x17 = in1[9]; + { const u32 x18 = in1[8]; + { const u32 x16 = in1[7]; + { const u32 x14 = in1[6]; + { const u32 x12 = in1[5]; + { const u32 x10 = in1[4]; + { const u32 x8 = in1[3]; + { const u32 x6 = in1[2]; + { const u32 x4 = in1[1]; + { const u32 x2 = in1[0]; + { u32 x20; u8/*bool*/ x21 = subborrow_u26(0x0, x2, 0x3ffffed, &x20); + { u32 x23; u8/*bool*/ x24 = subborrow_u25(x21, x4, 0x1ffffff, &x23); + { u32 x26; u8/*bool*/ x27 = subborrow_u26(x24, x6, 0x3ffffff, &x26); + { u32 x29; u8/*bool*/ x30 = subborrow_u25(x27, x8, 0x1ffffff, &x29); + { u32 x32; u8/*bool*/ x33 = subborrow_u26(x30, x10, 0x3ffffff, &x32); + { u32 x35; u8/*bool*/ x36 = subborrow_u25(x33, x12, 0x1ffffff, &x35); + { u32 x38; u8/*bool*/ x39 = subborrow_u26(x36, x14, 0x3ffffff, &x38); + { u32 x41; u8/*bool*/ x42 = subborrow_u25(x39, x16, 0x1ffffff, &x41); + { u32 x44; u8/*bool*/ x45 = subborrow_u26(x42, x18, 0x3ffffff, &x44); + { u32 x47; u8/*bool*/ x48 = subborrow_u25(x45, x17, 0x1ffffff, &x47); + { u32 x49 = cmovznz32(x48, 0x0, 0xffffffff); + { u32 x50 = (x49 & 0x3ffffed); + { u32 x52; u8/*bool*/ x53 = addcarryx_u26(0x0, x20, x50, &x52); + { u32 x54 = (x49 & 0x1ffffff); + { u32 x56; u8/*bool*/ x57 = addcarryx_u25(x53, x23, x54, &x56); + { u32 x58 = (x49 & 0x3ffffff); + { u32 x60; u8/*bool*/ x61 = addcarryx_u26(x57, x26, x58, &x60); + { u32 x62 = (x49 & 0x1ffffff); + { u32 x64; u8/*bool*/ x65 = addcarryx_u25(x61, x29, x62, &x64); + { u32 x66 = (x49 & 0x3ffffff); + { u32 x68; u8/*bool*/ x69 = addcarryx_u26(x65, x32, x66, &x68); + { u32 x70 = (x49 & 0x1ffffff); + { u32 x72; u8/*bool*/ x73 = addcarryx_u25(x69, x35, x70, &x72); + { u32 x74 = (x49 & 0x3ffffff); + { u32 x76; u8/*bool*/ x77 = addcarryx_u26(x73, x38, x74, &x76); + { u32 x78 = (x49 & 0x1ffffff); + { u32 x80; u8/*bool*/ x81 = addcarryx_u25(x77, x41, x78, &x80); + { u32 x82 = (x49 & 0x3ffffff); + { u32 x84; u8/*bool*/ x85 = addcarryx_u26(x81, x44, x82, &x84); + { u32 x86 = (x49 & 0x1ffffff); + { u32 x88; addcarryx_u25(x85, x47, x86, &x88); + out[0] = x52; + out[1] = x56; + out[2] = x60; + out[3] = x64; + out[4] = x68; + out[5] = x72; + out[6] = x76; + out[7] = x80; + out[8] = x84; + out[9] = x88; + }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} +} + +static __always_inline void fe_tobytes(u8 s[32], const fe *f) +{ + u32 h[10]; + fe_freeze(h, f->v); + s[0] = h[0] >> 0; + s[1] = h[0] >> 8; + s[2] = h[0] >> 16; + s[3] = (h[0] >> 24) | (h[1] << 2); + s[4] = h[1] >> 6; + s[5] = h[1] >> 14; + s[6] = (h[1] >> 22) | (h[2] << 3); + s[7] = h[2] >> 5; + s[8] = h[2] >> 13; + s[9] = (h[2] >> 21) | (h[3] << 5); + s[10] = h[3] >> 3; + s[11] = h[3] >> 11; + s[12] = (h[3] >> 19) | (h[4] << 6); + s[13] = h[4] >> 2; + s[14] = h[4] >> 10; + s[15] = h[4] >> 18; + s[16] = h[5] >> 0; + s[17] = h[5] >> 8; + s[18] = h[5] >> 16; + s[19] = (h[5] >> 24) | (h[6] << 1); + s[20] = h[6] >> 7; + s[21] = h[6] >> 15; + s[22] = (h[6] >> 23) | (h[7] << 3); + s[23] = h[7] >> 5; + s[24] = h[7] >> 13; + s[25] = (h[7] >> 21) | (h[8] << 4); + s[26] = h[8] >> 4; + s[27] = h[8] >> 12; + s[28] = (h[8] >> 20) | (h[9] << 6); + s[29] = h[9] >> 2; + s[30] = h[9] >> 10; + s[31] = h[9] >> 18; +} + +/* h = f */ +static __always_inline void fe_copy(fe *h, const fe *f) +{ + memmove(h, f, sizeof(u32) * 10); +} + +static __always_inline void fe_copy_lt(fe_loose *h, const fe *f) +{ + memmove(h, f, sizeof(u32) * 10); +} + +/* h = 0 */ +static __always_inline void fe_0(fe *h) +{ + memset(h, 0, sizeof(u32) * 10); +} + +/* h = 1 */ +static __always_inline void fe_1(fe *h) +{ + memset(h, 0, sizeof(u32) * 10); + h->v[0] = 1; +} + +static void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) +{ + { const u32 x20 = in1[9]; + { const u32 x21 = in1[8]; + { const u32 x19 = in1[7]; + { const u32 x17 = in1[6]; + { const u32 x15 = in1[5]; + { const u32 x13 = in1[4]; + { const u32 x11 = in1[3]; + { const u32 x9 = in1[2]; + { const u32 x7 = in1[1]; + { const u32 x5 = in1[0]; + { const u32 x38 = in2[9]; + { const u32 x39 = in2[8]; + { const u32 x37 = in2[7]; + { const u32 x35 = in2[6]; + { const u32 x33 = in2[5]; + { const u32 x31 = in2[4]; + { const u32 x29 = in2[3]; + { const u32 x27 = in2[2]; + { const u32 x25 = in2[1]; + { const u32 x23 = in2[0]; + out[0] = (x5 + x23); + out[1] = (x7 + x25); + out[2] = (x9 + x27); + out[3] = (x11 + x29); + out[4] = (x13 + x31); + out[5] = (x15 + x33); + out[6] = (x17 + x35); + out[7] = (x19 + x37); + out[8] = (x21 + x39); + out[9] = (x20 + x38); + }}}}}}}}}}}}}}}}}}}} +} + +/* h = f + g + * Can overlap h with f or g. + */ +static __always_inline void fe_add(fe_loose *h, const fe *f, const fe *g) +{ + fe_add_impl(h->v, f->v, g->v); +} + +static void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) +{ + { const u32 x20 = in1[9]; + { const u32 x21 = in1[8]; + { const u32 x19 = in1[7]; + { const u32 x17 = in1[6]; + { const u32 x15 = in1[5]; + { const u32 x13 = in1[4]; + { const u32 x11 = in1[3]; + { const u32 x9 = in1[2]; + { const u32 x7 = in1[1]; + { const u32 x5 = in1[0]; + { const u32 x38 = in2[9]; + { const u32 x39 = in2[8]; + { const u32 x37 = in2[7]; + { const u32 x35 = in2[6]; + { const u32 x33 = in2[5]; + { const u32 x31 = in2[4]; + { const u32 x29 = in2[3]; + { const u32 x27 = in2[2]; + { const u32 x25 = in2[1]; + { const u32 x23 = in2[0]; + out[0] = ((0x7ffffda + x5) - x23); + out[1] = ((0x3fffffe + x7) - x25); + out[2] = ((0x7fffffe + x9) - x27); + out[3] = ((0x3fffffe + x11) - x29); + out[4] = ((0x7fffffe + x13) - x31); + out[5] = ((0x3fffffe + x15) - x33); + out[6] = ((0x7fffffe + x17) - x35); + out[7] = ((0x3fffffe + x19) - x37); + out[8] = ((0x7fffffe + x21) - x39); + out[9] = ((0x3fffffe + x20) - x38); + }}}}}}}}}}}}}}}}}}}} +} + +/* h = f - g + * Can overlap h with f or g. + */ +static __always_inline void fe_sub(fe_loose *h, const fe *f, const fe *g) +{ + fe_sub_impl(h->v, f->v, g->v); +} + +static void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) +{ + { const u32 x20 = in1[9]; + { const u32 x21 = in1[8]; + { const u32 x19 = in1[7]; + { const u32 x17 = in1[6]; + { const u32 x15 = in1[5]; + { const u32 x13 = in1[4]; + { const u32 x11 = in1[3]; + { const u32 x9 = in1[2]; + { const u32 x7 = in1[1]; + { const u32 x5 = in1[0]; + { const u32 x38 = in2[9]; + { const u32 x39 = in2[8]; + { const u32 x37 = in2[7]; + { const u32 x35 = in2[6]; + { const u32 x33 = in2[5]; + { const u32 x31 = in2[4]; + { const u32 x29 = in2[3]; + { const u32 x27 = in2[2]; + { const u32 x25 = in2[1]; + { const u32 x23 = in2[0]; + { u64 x40 = ((u64)x23 * x5); + { u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5)); + { u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5)); + { u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5)); + { u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5)); + { u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5)); + { u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5)); + { u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5)); + { u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5)); + { u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5)); + { u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9)); + { u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9)); + { u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13)); + { u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13)); + { u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17)); + { u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17)); + { u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19)))); + { u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21)); + { u64 x58 = ((u64)(0x2 * x38) * x20); + { u64 x59 = (x48 + (x58 << 0x4)); + { u64 x60 = (x59 + (x58 << 0x1)); + { u64 x61 = (x60 + x58); + { u64 x62 = (x47 + (x57 << 0x4)); + { u64 x63 = (x62 + (x57 << 0x1)); + { u64 x64 = (x63 + x57); + { u64 x65 = (x46 + (x56 << 0x4)); + { u64 x66 = (x65 + (x56 << 0x1)); + { u64 x67 = (x66 + x56); + { u64 x68 = (x45 + (x55 << 0x4)); + { u64 x69 = (x68 + (x55 << 0x1)); + { u64 x70 = (x69 + x55); + { u64 x71 = (x44 + (x54 << 0x4)); + { u64 x72 = (x71 + (x54 << 0x1)); + { u64 x73 = (x72 + x54); + { u64 x74 = (x43 + (x53 << 0x4)); + { u64 x75 = (x74 + (x53 << 0x1)); + { u64 x76 = (x75 + x53); + { u64 x77 = (x42 + (x52 << 0x4)); + { u64 x78 = (x77 + (x52 << 0x1)); + { u64 x79 = (x78 + x52); + { u64 x80 = (x41 + (x51 << 0x4)); + { u64 x81 = (x80 + (x51 << 0x1)); + { u64 x82 = (x81 + x51); + { u64 x83 = (x40 + (x50 << 0x4)); + { u64 x84 = (x83 + (x50 << 0x1)); + { u64 x85 = (x84 + x50); + { u64 x86 = (x85 >> 0x1a); + { u32 x87 = ((u32)x85 & 0x3ffffff); + { u64 x88 = (x86 + x82); + { u64 x89 = (x88 >> 0x19); + { u32 x90 = ((u32)x88 & 0x1ffffff); + { u64 x91 = (x89 + x79); + { u64 x92 = (x91 >> 0x1a); + { u32 x93 = ((u32)x91 & 0x3ffffff); + { u64 x94 = (x92 + x76); + { u64 x95 = (x94 >> 0x19); + { u32 x96 = ((u32)x94 & 0x1ffffff); + { u64 x97 = (x95 + x73); + { u64 x98 = (x97 >> 0x1a); + { u32 x99 = ((u32)x97 & 0x3ffffff); + { u64 x100 = (x98 + x70); + { u64 x101 = (x100 >> 0x19); + { u32 x102 = ((u32)x100 & 0x1ffffff); + { u64 x103 = (x101 + x67); + { u64 x104 = (x103 >> 0x1a); + { u32 x105 = ((u32)x103 & 0x3ffffff); + { u64 x106 = (x104 + x64); + { u64 x107 = (x106 >> 0x19); + { u32 x108 = ((u32)x106 & 0x1ffffff); + { u64 x109 = (x107 + x61); + { u64 x110 = (x109 >> 0x1a); + { u32 x111 = ((u32)x109 & 0x3ffffff); + { u64 x112 = (x110 + x49); + { u64 x113 = (x112 >> 0x19); + { u32 x114 = ((u32)x112 & 0x1ffffff); + { u64 x115 = (x87 + (0x13 * x113)); + { u32 x116 = (u32) (x115 >> 0x1a); + { u32 x117 = ((u32)x115 & 0x3ffffff); + { u32 x118 = (x116 + x90); + { u32 x119 = (x118 >> 0x19); + { u32 x120 = (x118 & 0x1ffffff); + out[0] = x117; + out[1] = x120; + out[2] = (x119 + x93); + out[3] = x96; + out[4] = x99; + out[5] = x102; + out[6] = x105; + out[7] = x108; + out[8] = x111; + out[9] = x114; + }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} +} + +static __always_inline void fe_mul_ttt(fe *h, const fe *f, const fe *g) +{ + fe_mul_impl(h->v, f->v, g->v); +} + +static __always_inline void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g) +{ + fe_mul_impl(h->v, f->v, g->v); +} + +static __always_inline void +fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g) +{ + fe_mul_impl(h->v, f->v, g->v); +} + +static void fe_sqr_impl(u32 out[10], const u32 in1[10]) +{ + { const u32 x17 = in1[9]; + { const u32 x18 = in1[8]; + { const u32 x16 = in1[7]; + { const u32 x14 = in1[6]; + { const u32 x12 = in1[5]; + { const u32 x10 = in1[4]; + { const u32 x8 = in1[3]; + { const u32 x6 = in1[2]; + { const u32 x4 = in1[1]; + { const u32 x2 = in1[0]; + { u64 x19 = ((u64)x2 * x2); + { u64 x20 = ((u64)(0x2 * x2) * x4); + { u64 x21 = (0x2 * (((u64)x4 * x4) + ((u64)x2 * x6))); + { u64 x22 = (0x2 * (((u64)x4 * x6) + ((u64)x2 * x8))); + { u64 x23 = ((((u64)x6 * x6) + ((u64)(0x4 * x4) * x8)) + ((u64)(0x2 * x2) * x10)); + { u64 x24 = (0x2 * ((((u64)x6 * x8) + ((u64)x4 * x10)) + ((u64)x2 * x12))); + { u64 x25 = (0x2 * (((((u64)x8 * x8) + ((u64)x6 * x10)) + ((u64)x2 * x14)) + ((u64)(0x2 * x4) * x12))); + { u64 x26 = (0x2 * (((((u64)x8 * x10) + ((u64)x6 * x12)) + ((u64)x4 * x14)) + ((u64)x2 * x16))); + { u64 x27 = (((u64)x10 * x10) + (0x2 * ((((u64)x6 * x14) + ((u64)x2 * x18)) + (0x2 * (((u64)x4 * x16) + ((u64)x8 * x12)))))); + { u64 x28 = (0x2 * ((((((u64)x10 * x12) + ((u64)x8 * x14)) + ((u64)x6 * x16)) + ((u64)x4 * x18)) + ((u64)x2 * x17))); + { u64 x29 = (0x2 * (((((u64)x12 * x12) + ((u64)x10 * x14)) + ((u64)x6 * x18)) + (0x2 * (((u64)x8 * x16) + ((u64)x4 * x17))))); + { u64 x30 = (0x2 * (((((u64)x12 * x14) + ((u64)x10 * x16)) + ((u64)x8 * x18)) + ((u64)x6 * x17))); + { u64 x31 = (((u64)x14 * x14) + (0x2 * (((u64)x10 * x18) + (0x2 * (((u64)x12 * x16) + ((u64)x8 * x17)))))); + { u64 x32 = (0x2 * ((((u64)x14 * x16) + ((u64)x12 * x18)) + ((u64)x10 * x17))); + { u64 x33 = (0x2 * ((((u64)x16 * x16) + ((u64)x14 * x18)) + ((u64)(0x2 * x12) * x17))); + { u64 x34 = (0x2 * (((u64)x16 * x18) + ((u64)x14 * x17))); + { u64 x35 = (((u64)x18 * x18) + ((u64)(0x4 * x16) * x17)); + { u64 x36 = ((u64)(0x2 * x18) * x17); + { u64 x37 = ((u64)(0x2 * x17) * x17); + { u64 x38 = (x27 + (x37 << 0x4)); + { u64 x39 = (x38 + (x37 << 0x1)); + { u64 x40 = (x39 + x37); + { u64 x41 = (x26 + (x36 << 0x4)); + { u64 x42 = (x41 + (x36 << 0x1)); + { u64 x43 = (x42 + x36); + { u64 x44 = (x25 + (x35 << 0x4)); + { u64 x45 = (x44 + (x35 << 0x1)); + { u64 x46 = (x45 + x35); + { u64 x47 = (x24 + (x34 << 0x4)); + { u64 x48 = (x47 + (x34 << 0x1)); + { u64 x49 = (x48 + x34); + { u64 x50 = (x23 + (x33 << 0x4)); + { u64 x51 = (x50 + (x33 << 0x1)); + { u64 x52 = (x51 + x33); + { u64 x53 = (x22 + (x32 << 0x4)); + { u64 x54 = (x53 + (x32 << 0x1)); + { u64 x55 = (x54 + x32); + { u64 x56 = (x21 + (x31 << 0x4)); + { u64 x57 = (x56 + (x31 << 0x1)); + { u64 x58 = (x57 + x31); + { u64 x59 = (x20 + (x30 << 0x4)); + { u64 x60 = (x59 + (x30 << 0x1)); + { u64 x61 = (x60 + x30); + { u64 x62 = (x19 + (x29 << 0x4)); + { u64 x63 = (x62 + (x29 << 0x1)); + { u64 x64 = (x63 + x29); + { u64 x65 = (x64 >> 0x1a); + { u32 x66 = ((u32)x64 & 0x3ffffff); + { u64 x67 = (x65 + x61); + { u64 x68 = (x67 >> 0x19); + { u32 x69 = ((u32)x67 & 0x1ffffff); + { u64 x70 = (x68 + x58); + { u64 x71 = (x70 >> 0x1a); + { u32 x72 = ((u32)x70 & 0x3ffffff); + { u64 x73 = (x71 + x55); + { u64 x74 = (x73 >> 0x19); + { u32 x75 = ((u32)x73 & 0x1ffffff); + { u64 x76 = (x74 + x52); + { u64 x77 = (x76 >> 0x1a); + { u32 x78 = ((u32)x76 & 0x3ffffff); + { u64 x79 = (x77 + x49); + { u64 x80 = (x79 >> 0x19); + { u32 x81 = ((u32)x79 & 0x1ffffff); + { u64 x82 = (x80 + x46); + { u64 x83 = (x82 >> 0x1a); + { u32 x84 = ((u32)x82 & 0x3ffffff); + { u64 x85 = (x83 + x43); + { u64 x86 = (x85 >> 0x19); + { u32 x87 = ((u32)x85 & 0x1ffffff); + { u64 x88 = (x86 + x40); + { u64 x89 = (x88 >> 0x1a); + { u32 x90 = ((u32)x88 & 0x3ffffff); + { u64 x91 = (x89 + x28); + { u64 x92 = (x91 >> 0x19); + { u32 x93 = ((u32)x91 & 0x1ffffff); + { u64 x94 = (x66 + (0x13 * x92)); + { u32 x95 = (u32) (x94 >> 0x1a); + { u32 x96 = ((u32)x94 & 0x3ffffff); + { u32 x97 = (x95 + x69); + { u32 x98 = (x97 >> 0x19); + { u32 x99 = (x97 & 0x1ffffff); + out[0] = x96; + out[1] = x99; + out[2] = (x98 + x72); + out[3] = x75; + out[4] = x78; + out[5] = x81; + out[6] = x84; + out[7] = x87; + out[8] = x90; + out[9] = x93; + }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} +} + +static __always_inline void fe_sq_tl(fe *h, const fe_loose *f) +{ + fe_sqr_impl(h->v, f->v); +} + +static __always_inline void fe_sq_tt(fe *h, const fe *f) +{ + fe_sqr_impl(h->v, f->v); +} + +static __always_inline void fe_loose_invert(fe *out, const fe_loose *z) +{ + fe t0; + fe t1; + fe t2; + fe t3; + int i; + + fe_sq_tl(&t0, z); + fe_sq_tt(&t1, &t0); + for (i = 1; i < 2; ++i) + fe_sq_tt(&t1, &t1); + fe_mul_tlt(&t1, z, &t1); + fe_mul_ttt(&t0, &t0, &t1); + fe_sq_tt(&t2, &t0); + fe_mul_ttt(&t1, &t1, &t2); + fe_sq_tt(&t2, &t1); + for (i = 1; i < 5; ++i) + fe_sq_tt(&t2, &t2); + fe_mul_ttt(&t1, &t2, &t1); + fe_sq_tt(&t2, &t1); + for (i = 1; i < 10; ++i) + fe_sq_tt(&t2, &t2); + fe_mul_ttt(&t2, &t2, &t1); + fe_sq_tt(&t3, &t2); + for (i = 1; i < 20; ++i) + fe_sq_tt(&t3, &t3); + fe_mul_ttt(&t2, &t3, &t2); + fe_sq_tt(&t2, &t2); + for (i = 1; i < 10; ++i) + fe_sq_tt(&t2, &t2); + fe_mul_ttt(&t1, &t2, &t1); + fe_sq_tt(&t2, &t1); + for (i = 1; i < 50; ++i) + fe_sq_tt(&t2, &t2); + fe_mul_ttt(&t2, &t2, &t1); + fe_sq_tt(&t3, &t2); + for (i = 1; i < 100; ++i) + fe_sq_tt(&t3, &t3); + fe_mul_ttt(&t2, &t3, &t2); + fe_sq_tt(&t2, &t2); + for (i = 1; i < 50; ++i) + fe_sq_tt(&t2, &t2); + fe_mul_ttt(&t1, &t2, &t1); + fe_sq_tt(&t1, &t1); + for (i = 1; i < 5; ++i) + fe_sq_tt(&t1, &t1); + fe_mul_ttt(out, &t1, &t0); +} + +static __always_inline void fe_invert(fe *out, const fe *z) +{ + fe_loose l; + fe_copy_lt(&l, z); + fe_loose_invert(out, &l); +} + +/* Replace (f,g) with (g,f) if b == 1; + * replace (f,g) with (f,g) if b == 0. + * + * Preconditions: b in {0,1} + */ +static __always_inline void fe_cswap(fe *f, fe *g, unsigned int b) +{ + unsigned i; + b = 0 - b; + for (i = 0; i < 10; i++) { + u32 x = f->v[i] ^ g->v[i]; + x &= b; + f->v[i] ^= x; + g->v[i] ^= x; + } +} + +/* NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.*/ +static __always_inline void fe_mul_121666_impl(u32 out[10], const u32 in1[10]) +{ + { const u32 x20 = in1[9]; + { const u32 x21 = in1[8]; + { const u32 x19 = in1[7]; + { const u32 x17 = in1[6]; + { const u32 x15 = in1[5]; + { const u32 x13 = in1[4]; + { const u32 x11 = in1[3]; + { const u32 x9 = in1[2]; + { const u32 x7 = in1[1]; + { const u32 x5 = in1[0]; + { const u32 x38 = 0; + { const u32 x39 = 0; + { const u32 x37 = 0; + { const u32 x35 = 0; + { const u32 x33 = 0; + { const u32 x31 = 0; + { const u32 x29 = 0; + { const u32 x27 = 0; + { const u32 x25 = 0; + { const u32 x23 = 121666; + { u64 x40 = ((u64)x23 * x5); + { u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5)); + { u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5)); + { u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5)); + { u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5)); + { u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5)); + { u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5)); + { u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5)); + { u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5)); + { u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5)); + { u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9)); + { u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9)); + { u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13)); + { u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13)); + { u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17)); + { u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17)); + { u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19)))); + { u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21)); + { u64 x58 = ((u64)(0x2 * x38) * x20); + { u64 x59 = (x48 + (x58 << 0x4)); + { u64 x60 = (x59 + (x58 << 0x1)); + { u64 x61 = (x60 + x58); + { u64 x62 = (x47 + (x57 << 0x4)); + { u64 x63 = (x62 + (x57 << 0x1)); + { u64 x64 = (x63 + x57); + { u64 x65 = (x46 + (x56 << 0x4)); + { u64 x66 = (x65 + (x56 << 0x1)); + { u64 x67 = (x66 + x56); + { u64 x68 = (x45 + (x55 << 0x4)); + { u64 x69 = (x68 + (x55 << 0x1)); + { u64 x70 = (x69 + x55); + { u64 x71 = (x44 + (x54 << 0x4)); + { u64 x72 = (x71 + (x54 << 0x1)); + { u64 x73 = (x72 + x54); + { u64 x74 = (x43 + (x53 << 0x4)); + { u64 x75 = (x74 + (x53 << 0x1)); + { u64 x76 = (x75 + x53); + { u64 x77 = (x42 + (x52 << 0x4)); + { u64 x78 = (x77 + (x52 << 0x1)); + { u64 x79 = (x78 + x52); + { u64 x80 = (x41 + (x51 << 0x4)); + { u64 x81 = (x80 + (x51 << 0x1)); + { u64 x82 = (x81 + x51); + { u64 x83 = (x40 + (x50 << 0x4)); + { u64 x84 = (x83 + (x50 << 0x1)); + { u64 x85 = (x84 + x50); + { u64 x86 = (x85 >> 0x1a); + { u32 x87 = ((u32)x85 & 0x3ffffff); + { u64 x88 = (x86 + x82); + { u64 x89 = (x88 >> 0x19); + { u32 x90 = ((u32)x88 & 0x1ffffff); + { u64 x91 = (x89 + x79); + { u64 x92 = (x91 >> 0x1a); + { u32 x93 = ((u32)x91 & 0x3ffffff); + { u64 x94 = (x92 + x76); + { u64 x95 = (x94 >> 0x19); + { u32 x96 = ((u32)x94 & 0x1ffffff); + { u64 x97 = (x95 + x73); + { u64 x98 = (x97 >> 0x1a); + { u32 x99 = ((u32)x97 & 0x3ffffff); + { u64 x100 = (x98 + x70); + { u64 x101 = (x100 >> 0x19); + { u32 x102 = ((u32)x100 & 0x1ffffff); + { u64 x103 = (x101 + x67); + { u64 x104 = (x103 >> 0x1a); + { u32 x105 = ((u32)x103 & 0x3ffffff); + { u64 x106 = (x104 + x64); + { u64 x107 = (x106 >> 0x19); + { u32 x108 = ((u32)x106 & 0x1ffffff); + { u64 x109 = (x107 + x61); + { u64 x110 = (x109 >> 0x1a); + { u32 x111 = ((u32)x109 & 0x3ffffff); + { u64 x112 = (x110 + x49); + { u64 x113 = (x112 >> 0x19); + { u32 x114 = ((u32)x112 & 0x1ffffff); + { u64 x115 = (x87 + (0x13 * x113)); + { u32 x116 = (u32) (x115 >> 0x1a); + { u32 x117 = ((u32)x115 & 0x3ffffff); + { u32 x118 = (x116 + x90); + { u32 x119 = (x118 >> 0x19); + { u32 x120 = (x118 & 0x1ffffff); + out[0] = x117; + out[1] = x120; + out[2] = (x119 + x93); + out[3] = x96; + out[4] = x99; + out[5] = x102; + out[6] = x105; + out[7] = x108; + out[8] = x111; + out[9] = x114; + }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} +} + +static __always_inline void fe_mul121666(fe *h, const fe_loose *f) +{ + fe_mul_121666_impl(h->v, f->v); +} + +static void curve25519_generic(u8 out[CURVE25519_KEY_SIZE], + const u8 scalar[CURVE25519_KEY_SIZE], + const u8 point[CURVE25519_KEY_SIZE]) +{ + fe x1, x2, z2, x3, z3; + fe_loose x2l, z2l, x3l; + unsigned swap = 0; + int pos; + u8 e[32]; + + memcpy(e, scalar, 32); + curve25519_clamp_secret(e); + + /* The following implementation was transcribed to Coq and proven to + * correspond to unary scalar multiplication in affine coordinates given + * that x1 != 0 is the x coordinate of some point on the curve. It was + * also checked in Coq that doing a ladderstep with x1 = x3 = 0 gives + * z2' = z3' = 0, and z2 = z3 = 0 gives z2' = z3' = 0. The statement was + * quantified over the underlying field, so it applies to Curve25519 + * itself and the quadratic twist of Curve25519. It was not proven in + * Coq that prime-field arithmetic correctly simulates extension-field + * arithmetic on prime-field values. The decoding of the byte array + * representation of e was not considered. + * + * Specification of Montgomery curves in affine coordinates: + * + * + * Proof that these form a group that is isomorphic to a Weierstrass + * curve: + * + * + * Coq transcription and correctness proof of the loop + * (where scalarbits=255): + * + * + * preconditions: 0 <= e < 2^255 (not necessarily e < order), + * fe_invert(0) = 0 + */ + fe_frombytes(&x1, point); + fe_1(&x2); + fe_0(&z2); + fe_copy(&x3, &x1); + fe_1(&z3); + + for (pos = 254; pos >= 0; --pos) { + fe tmp0, tmp1; + fe_loose tmp0l, tmp1l; + /* loop invariant as of right before the test, for the case + * where x1 != 0: + * pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3 + * is nonzero + * let r := e >> (pos+1) in the following equalities of + * projective points: + * to_xz (r*P) === if swap then (x3, z3) else (x2, z2) + * to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3) + * x1 is the nonzero x coordinate of the nonzero + * point (r*P-(r+1)*P) + */ + unsigned b = 1 & (e[pos / 8] >> (pos & 7)); + swap ^= b; + fe_cswap(&x2, &x3, swap); + fe_cswap(&z2, &z3, swap); + swap = b; + /* Coq transcription of ladderstep formula (called from + * transcribed loop): + * + * + * x1 != 0 + * x1 = 0 + */ + fe_sub(&tmp0l, &x3, &z3); + fe_sub(&tmp1l, &x2, &z2); + fe_add(&x2l, &x2, &z2); + fe_add(&z2l, &x3, &z3); + fe_mul_tll(&z3, &tmp0l, &x2l); + fe_mul_tll(&z2, &z2l, &tmp1l); + fe_sq_tl(&tmp0, &tmp1l); + fe_sq_tl(&tmp1, &x2l); + fe_add(&x3l, &z3, &z2); + fe_sub(&z2l, &z3, &z2); + fe_mul_ttt(&x2, &tmp1, &tmp0); + fe_sub(&tmp1l, &tmp1, &tmp0); + fe_sq_tl(&z2, &z2l); + fe_mul121666(&z3, &tmp1l); + fe_sq_tl(&x3, &x3l); + fe_add(&tmp0l, &tmp0, &z3); + fe_mul_ttt(&z3, &x1, &z2); + fe_mul_tll(&z2, &tmp1l, &tmp0l); + } + /* here pos=-1, so r=e, so to_xz (e*P) === if swap then (x3, z3) + * else (x2, z2) + */ + fe_cswap(&x2, &x3, swap); + fe_cswap(&z2, &z3, swap); + + fe_invert(&z2, &z2); + fe_mul_ttt(&x2, &x2, &z2); + fe_tobytes(out, &x2); + + memzero_explicit(&x1, sizeof(x1)); + memzero_explicit(&x2, sizeof(x2)); + memzero_explicit(&z2, sizeof(z2)); + memzero_explicit(&x3, sizeof(x3)); + memzero_explicit(&z3, sizeof(z3)); + memzero_explicit(&x2l, sizeof(x2l)); + memzero_explicit(&z2l, sizeof(z2l)); + memzero_explicit(&x3l, sizeof(x3l)); + memzero_explicit(&e, sizeof(e)); +} diff --git a/net/wireguard/crypto/zinc/curve25519/curve25519-hacl64.c b/net/wireguard/crypto/zinc/curve25519/curve25519-hacl64.c new file mode 100644 index 000000000000..d6dcd0ce1892 --- /dev/null +++ b/net/wireguard/crypto/zinc/curve25519/curve25519-hacl64.c @@ -0,0 +1,779 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2016-2017 INRIA and Microsoft Corporation. + * Copyright (C) 2018-2019 Jason A. Donenfeld . All Rights Reserved. + * + * This is a machine-generated formally verified implementation of Curve25519 + * ECDH from: . Though originally machine + * generated, it has been tweaked to be suitable for use in the kernel. It is + * optimized for 64-bit machines that can efficiently work with 128-bit + * integer types. + */ + +typedef __uint128_t u128; + +static __always_inline u64 u64_eq_mask(u64 a, u64 b) +{ + u64 x = a ^ b; + u64 minus_x = ~x + (u64)1U; + u64 x_or_minus_x = x | minus_x; + u64 xnx = x_or_minus_x >> (u32)63U; + u64 c = xnx - (u64)1U; + return c; +} + +static __always_inline u64 u64_gte_mask(u64 a, u64 b) +{ + u64 x = a; + u64 y = b; + u64 x_xor_y = x ^ y; + u64 x_sub_y = x - y; + u64 x_sub_y_xor_y = x_sub_y ^ y; + u64 q = x_xor_y | x_sub_y_xor_y; + u64 x_xor_q = x ^ q; + u64 x_xor_q_ = x_xor_q >> (u32)63U; + u64 c = x_xor_q_ - (u64)1U; + return c; +} + +static __always_inline void modulo_carry_top(u64 *b) +{ + u64 b4 = b[4]; + u64 b0 = b[0]; + u64 b4_ = b4 & 0x7ffffffffffffLLU; + u64 b0_ = b0 + 19 * (b4 >> 51); + b[4] = b4_; + b[0] = b0_; +} + +static __always_inline void fproduct_copy_from_wide_(u64 *output, u128 *input) +{ + { + u128 xi = input[0]; + output[0] = ((u64)(xi)); + } + { + u128 xi = input[1]; + output[1] = ((u64)(xi)); + } + { + u128 xi = input[2]; + output[2] = ((u64)(xi)); + } + { + u128 xi = input[3]; + output[3] = ((u64)(xi)); + } + { + u128 xi = input[4]; + output[4] = ((u64)(xi)); + } +} + +static __always_inline void +fproduct_sum_scalar_multiplication_(u128 *output, u64 *input, u64 s) +{ + output[0] += (u128)input[0] * s; + output[1] += (u128)input[1] * s; + output[2] += (u128)input[2] * s; + output[3] += (u128)input[3] * s; + output[4] += (u128)input[4] * s; +} + +static __always_inline void fproduct_carry_wide_(u128 *tmp) +{ + { + u32 ctr = 0; + u128 tctr = tmp[ctr]; + u128 tctrp1 = tmp[ctr + 1]; + u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; + u128 c = ((tctr) >> (51)); + tmp[ctr] = ((u128)(r0)); + tmp[ctr + 1] = ((tctrp1) + (c)); + } + { + u32 ctr = 1; + u128 tctr = tmp[ctr]; + u128 tctrp1 = tmp[ctr + 1]; + u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; + u128 c = ((tctr) >> (51)); + tmp[ctr] = ((u128)(r0)); + tmp[ctr + 1] = ((tctrp1) + (c)); + } + + { + u32 ctr = 2; + u128 tctr = tmp[ctr]; + u128 tctrp1 = tmp[ctr + 1]; + u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; + u128 c = ((tctr) >> (51)); + tmp[ctr] = ((u128)(r0)); + tmp[ctr + 1] = ((tctrp1) + (c)); + } + { + u32 ctr = 3; + u128 tctr = tmp[ctr]; + u128 tctrp1 = tmp[ctr + 1]; + u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; + u128 c = ((tctr) >> (51)); + tmp[ctr] = ((u128)(r0)); + tmp[ctr + 1] = ((tctrp1) + (c)); + } +} + +static __always_inline void fmul_shift_reduce(u64 *output) +{ + u64 tmp = output[4]; + u64 b0; + { + u32 ctr = 5 - 0 - 1; + u64 z = output[ctr - 1]; + output[ctr] = z; + } + { + u32 ctr = 5 - 1 - 1; + u64 z = output[ctr - 1]; + output[ctr] = z; + } + { + u32 ctr = 5 - 2 - 1; + u64 z = output[ctr - 1]; + output[ctr] = z; + } + { + u32 ctr = 5 - 3 - 1; + u64 z = output[ctr - 1]; + output[ctr] = z; + } + output[0] = tmp; + b0 = output[0]; + output[0] = 19 * b0; +} + +static __always_inline void fmul_mul_shift_reduce_(u128 *output, u64 *input, + u64 *input21) +{ + u32 i; + u64 input2i; + { + u64 input2i = input21[0]; + fproduct_sum_scalar_multiplication_(output, input, input2i); + fmul_shift_reduce(input); + } + { + u64 input2i = input21[1]; + fproduct_sum_scalar_multiplication_(output, input, input2i); + fmul_shift_reduce(input); + } + { + u64 input2i = input21[2]; + fproduct_sum_scalar_multiplication_(output, input, input2i); + fmul_shift_reduce(input); + } + { + u64 input2i = input21[3]; + fproduct_sum_scalar_multiplication_(output, input, input2i); + fmul_shift_reduce(input); + } + i = 4; + input2i = input21[i]; + fproduct_sum_scalar_multiplication_(output, input, input2i); +} + +static __always_inline void fmul_fmul(u64 *output, u64 *input, u64 *input21) +{ + u64 tmp[5] = { input[0], input[1], input[2], input[3], input[4] }; + { + u128 b4; + u128 b0; + u128 b4_; + u128 b0_; + u64 i0; + u64 i1; + u64 i0_; + u64 i1_; + u128 t[5] = { 0 }; + fmul_mul_shift_reduce_(t, tmp, input21); + fproduct_carry_wide_(t); + b4 = t[4]; + b0 = t[0]; + b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU)))); + b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51)))))))); + t[4] = b4_; + t[0] = b0_; + fproduct_copy_from_wide_(output, t); + i0 = output[0]; + i1 = output[1]; + i0_ = i0 & 0x7ffffffffffffLLU; + i1_ = i1 + (i0 >> 51); + output[0] = i0_; + output[1] = i1_; + } +} + +static __always_inline void fsquare_fsquare__(u128 *tmp, u64 *output) +{ + u64 r0 = output[0]; + u64 r1 = output[1]; + u64 r2 = output[2]; + u64 r3 = output[3]; + u64 r4 = output[4]; + u64 d0 = r0 * 2; + u64 d1 = r1 * 2; + u64 d2 = r2 * 2 * 19; + u64 d419 = r4 * 19; + u64 d4 = d419 * 2; + u128 s0 = ((((((u128)(r0) * (r0))) + (((u128)(d4) * (r1))))) + + (((u128)(d2) * (r3)))); + u128 s1 = ((((((u128)(d0) * (r1))) + (((u128)(d4) * (r2))))) + + (((u128)(r3 * 19) * (r3)))); + u128 s2 = ((((((u128)(d0) * (r2))) + (((u128)(r1) * (r1))))) + + (((u128)(d4) * (r3)))); + u128 s3 = ((((((u128)(d0) * (r3))) + (((u128)(d1) * (r2))))) + + (((u128)(r4) * (d419)))); + u128 s4 = ((((((u128)(d0) * (r4))) + (((u128)(d1) * (r3))))) + + (((u128)(r2) * (r2)))); + tmp[0] = s0; + tmp[1] = s1; + tmp[2] = s2; + tmp[3] = s3; + tmp[4] = s4; +} + +static __always_inline void fsquare_fsquare_(u128 *tmp, u64 *output) +{ + u128 b4; + u128 b0; + u128 b4_; + u128 b0_; + u64 i0; + u64 i1; + u64 i0_; + u64 i1_; + fsquare_fsquare__(tmp, output); + fproduct_carry_wide_(tmp); + b4 = tmp[4]; + b0 = tmp[0]; + b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU)))); + b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51)))))))); + tmp[4] = b4_; + tmp[0] = b0_; + fproduct_copy_from_wide_(output, tmp); + i0 = output[0]; + i1 = output[1]; + i0_ = i0 & 0x7ffffffffffffLLU; + i1_ = i1 + (i0 >> 51); + output[0] = i0_; + output[1] = i1_; +} + +static __always_inline void fsquare_fsquare_times_(u64 *output, u128 *tmp, + u32 count1) +{ + u32 i; + fsquare_fsquare_(tmp, output); + for (i = 1; i < count1; ++i) + fsquare_fsquare_(tmp, output); +} + +static __always_inline void fsquare_fsquare_times(u64 *output, u64 *input, + u32 count1) +{ + u128 t[5]; + memcpy(output, input, 5 * sizeof(*input)); + fsquare_fsquare_times_(output, t, count1); +} + +static __always_inline void fsquare_fsquare_times_inplace(u64 *output, + u32 count1) +{ + u128 t[5]; + fsquare_fsquare_times_(output, t, count1); +} + +static __always_inline void crecip_crecip(u64 *out, u64 *z) +{ + u64 buf[20] = { 0 }; + u64 *a0 = buf; + u64 *t00 = buf + 5; + u64 *b0 = buf + 10; + u64 *t01; + u64 *b1; + u64 *c0; + u64 *a; + u64 *t0; + u64 *b; + u64 *c; + fsquare_fsquare_times(a0, z, 1); + fsquare_fsquare_times(t00, a0, 2); + fmul_fmul(b0, t00, z); + fmul_fmul(a0, b0, a0); + fsquare_fsquare_times(t00, a0, 1); + fmul_fmul(b0, t00, b0); + fsquare_fsquare_times(t00, b0, 5); + t01 = buf + 5; + b1 = buf + 10; + c0 = buf + 15; + fmul_fmul(b1, t01, b1); + fsquare_fsquare_times(t01, b1, 10); + fmul_fmul(c0, t01, b1); + fsquare_fsquare_times(t01, c0, 20); + fmul_fmul(t01, t01, c0); + fsquare_fsquare_times_inplace(t01, 10); + fmul_fmul(b1, t01, b1); + fsquare_fsquare_times(t01, b1, 50); + a = buf; + t0 = buf + 5; + b = buf + 10; + c = buf + 15; + fmul_fmul(c, t0, b); + fsquare_fsquare_times(t0, c, 100); + fmul_fmul(t0, t0, c); + fsquare_fsquare_times_inplace(t0, 50); + fmul_fmul(t0, t0, b); + fsquare_fsquare_times_inplace(t0, 5); + fmul_fmul(out, t0, a); +} + +static __always_inline void fsum(u64 *a, u64 *b) +{ + a[0] += b[0]; + a[1] += b[1]; + a[2] += b[2]; + a[3] += b[3]; + a[4] += b[4]; +} + +static __always_inline void fdifference(u64 *a, u64 *b) +{ + u64 tmp[5] = { 0 }; + u64 b0; + u64 b1; + u64 b2; + u64 b3; + u64 b4; + memcpy(tmp, b, 5 * sizeof(*b)); + b0 = tmp[0]; + b1 = tmp[1]; + b2 = tmp[2]; + b3 = tmp[3]; + b4 = tmp[4]; + tmp[0] = b0 + 0x3fffffffffff68LLU; + tmp[1] = b1 + 0x3ffffffffffff8LLU; + tmp[2] = b2 + 0x3ffffffffffff8LLU; + tmp[3] = b3 + 0x3ffffffffffff8LLU; + tmp[4] = b4 + 0x3ffffffffffff8LLU; + { + u64 xi = a[0]; + u64 yi = tmp[0]; + a[0] = yi - xi; + } + { + u64 xi = a[1]; + u64 yi = tmp[1]; + a[1] = yi - xi; + } + { + u64 xi = a[2]; + u64 yi = tmp[2]; + a[2] = yi - xi; + } + { + u64 xi = a[3]; + u64 yi = tmp[3]; + a[3] = yi - xi; + } + { + u64 xi = a[4]; + u64 yi = tmp[4]; + a[4] = yi - xi; + } +} + +static __always_inline void fscalar(u64 *output, u64 *b, u64 s) +{ + u128 tmp[5]; + u128 b4; + u128 b0; + u128 b4_; + u128 b0_; + { + u64 xi = b[0]; + tmp[0] = ((u128)(xi) * (s)); + } + { + u64 xi = b[1]; + tmp[1] = ((u128)(xi) * (s)); + } + { + u64 xi = b[2]; + tmp[2] = ((u128)(xi) * (s)); + } + { + u64 xi = b[3]; + tmp[3] = ((u128)(xi) * (s)); + } + { + u64 xi = b[4]; + tmp[4] = ((u128)(xi) * (s)); + } + fproduct_carry_wide_(tmp); + b4 = tmp[4]; + b0 = tmp[0]; + b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU)))); + b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51)))))))); + tmp[4] = b4_; + tmp[0] = b0_; + fproduct_copy_from_wide_(output, tmp); +} + +static __always_inline void crecip(u64 *output, u64 *input) +{ + crecip_crecip(output, input); +} + +static __always_inline void point_swap_conditional_step(u64 *a, u64 *b, + u64 swap1, u32 ctr) +{ + u32 i = ctr - 1; + u64 ai = a[i]; + u64 bi = b[i]; + u64 x = swap1 & (ai ^ bi); + u64 ai1 = ai ^ x; + u64 bi1 = bi ^ x; + a[i] = ai1; + b[i] = bi1; +} + +static __always_inline void point_swap_conditional5(u64 *a, u64 *b, u64 swap1) +{ + point_swap_conditional_step(a, b, swap1, 5); + point_swap_conditional_step(a, b, swap1, 4); + point_swap_conditional_step(a, b, swap1, 3); + point_swap_conditional_step(a, b, swap1, 2); + point_swap_conditional_step(a, b, swap1, 1); +} + +static __always_inline void point_swap_conditional(u64 *a, u64 *b, u64 iswap) +{ + u64 swap1 = 0 - iswap; + point_swap_conditional5(a, b, swap1); + point_swap_conditional5(a + 5, b + 5, swap1); +} + +static __always_inline void point_copy(u64 *output, u64 *input) +{ + memcpy(output, input, 5 * sizeof(*input)); + memcpy(output + 5, input + 5, 5 * sizeof(*input)); +} + +static __always_inline void addanddouble_fmonty(u64 *pp, u64 *ppq, u64 *p, + u64 *pq, u64 *qmqp) +{ + u64 *qx = qmqp; + u64 *x2 = pp; + u64 *z2 = pp + 5; + u64 *x3 = ppq; + u64 *z3 = ppq + 5; + u64 *x = p; + u64 *z = p + 5; + u64 *xprime = pq; + u64 *zprime = pq + 5; + u64 buf[40] = { 0 }; + u64 *origx = buf; + u64 *origxprime0 = buf + 5; + u64 *xxprime0; + u64 *zzprime0; + u64 *origxprime; + xxprime0 = buf + 25; + zzprime0 = buf + 30; + memcpy(origx, x, 5 * sizeof(*x)); + fsum(x, z); + fdifference(z, origx); + memcpy(origxprime0, xprime, 5 * sizeof(*xprime)); + fsum(xprime, zprime); + fdifference(zprime, origxprime0); + fmul_fmul(xxprime0, xprime, z); + fmul_fmul(zzprime0, x, zprime); + origxprime = buf + 5; + { + u64 *xx0; + u64 *zz0; + u64 *xxprime; + u64 *zzprime; + u64 *zzzprime; + xx0 = buf + 15; + zz0 = buf + 20; + xxprime = buf + 25; + zzprime = buf + 30; + zzzprime = buf + 35; + memcpy(origxprime, xxprime, 5 * sizeof(*xxprime)); + fsum(xxprime, zzprime); + fdifference(zzprime, origxprime); + fsquare_fsquare_times(x3, xxprime, 1); + fsquare_fsquare_times(zzzprime, zzprime, 1); + fmul_fmul(z3, zzzprime, qx); + fsquare_fsquare_times(xx0, x, 1); + fsquare_fsquare_times(zz0, z, 1); + { + u64 *zzz; + u64 *xx; + u64 *zz; + u64 scalar; + zzz = buf + 10; + xx = buf + 15; + zz = buf + 20; + fmul_fmul(x2, xx, zz); + fdifference(zz, xx); + scalar = 121665; + fscalar(zzz, zz, scalar); + fsum(zzz, xx); + fmul_fmul(z2, zzz, zz); + } + } +} + +static __always_inline void +ladder_smallloop_cmult_small_loop_step(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2, + u64 *q, u8 byt) +{ + u64 bit0 = (u64)(byt >> 7); + u64 bit; + point_swap_conditional(nq, nqpq, bit0); + addanddouble_fmonty(nq2, nqpq2, nq, nqpq, q); + bit = (u64)(byt >> 7); + point_swap_conditional(nq2, nqpq2, bit); +} + +static __always_inline void +ladder_smallloop_cmult_small_loop_double_step(u64 *nq, u64 *nqpq, u64 *nq2, + u64 *nqpq2, u64 *q, u8 byt) +{ + u8 byt1; + ladder_smallloop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt); + byt1 = byt << 1; + ladder_smallloop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt1); +} + +static __always_inline void +ladder_smallloop_cmult_small_loop(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2, + u64 *q, u8 byt, u32 i) +{ + while (i--) { + ladder_smallloop_cmult_small_loop_double_step(nq, nqpq, nq2, + nqpq2, q, byt); + byt <<= 2; + } +} + +static __always_inline void ladder_bigloop_cmult_big_loop(u8 *n1, u64 *nq, + u64 *nqpq, u64 *nq2, + u64 *nqpq2, u64 *q, + u32 i) +{ + while (i--) { + u8 byte = n1[i]; + ladder_smallloop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, + byte, 4); + } +} + +static void ladder_cmult(u64 *result, u8 *n1, u64 *q) +{ + u64 point_buf[40] = { 0 }; + u64 *nq = point_buf; + u64 *nqpq = point_buf + 10; + u64 *nq2 = point_buf + 20; + u64 *nqpq2 = point_buf + 30; + point_copy(nqpq, q); + nq[0] = 1; + ladder_bigloop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, 32); + point_copy(result, nq); +} + +static __always_inline void format_fexpand(u64 *output, const u8 *input) +{ + const u8 *x00 = input + 6; + const u8 *x01 = input + 12; + const u8 *x02 = input + 19; + const u8 *x0 = input + 24; + u64 i0, i1, i2, i3, i4, output0, output1, output2, output3, output4; + i0 = get_unaligned_le64(input); + i1 = get_unaligned_le64(x00); + i2 = get_unaligned_le64(x01); + i3 = get_unaligned_le64(x02); + i4 = get_unaligned_le64(x0); + output0 = i0 & 0x7ffffffffffffLLU; + output1 = i1 >> 3 & 0x7ffffffffffffLLU; + output2 = i2 >> 6 & 0x7ffffffffffffLLU; + output3 = i3 >> 1 & 0x7ffffffffffffLLU; + output4 = i4 >> 12 & 0x7ffffffffffffLLU; + output[0] = output0; + output[1] = output1; + output[2] = output2; + output[3] = output3; + output[4] = output4; +} + +static __always_inline void format_fcontract_first_carry_pass(u64 *input) +{ + u64 t0 = input[0]; + u64 t1 = input[1]; + u64 t2 = input[2]; + u64 t3 = input[3]; + u64 t4 = input[4]; + u64 t1_ = t1 + (t0 >> 51); + u64 t0_ = t0 & 0x7ffffffffffffLLU; + u64 t2_ = t2 + (t1_ >> 51); + u64 t1__ = t1_ & 0x7ffffffffffffLLU; + u64 t3_ = t3 + (t2_ >> 51); + u64 t2__ = t2_ & 0x7ffffffffffffLLU; + u64 t4_ = t4 + (t3_ >> 51); + u64 t3__ = t3_ & 0x7ffffffffffffLLU; + input[0] = t0_; + input[1] = t1__; + input[2] = t2__; + input[3] = t3__; + input[4] = t4_; +} + +static __always_inline void format_fcontract_first_carry_full(u64 *input) +{ + format_fcontract_first_carry_pass(input); + modulo_carry_top(input); +} + +static __always_inline void format_fcontract_second_carry_pass(u64 *input) +{ + u64 t0 = input[0]; + u64 t1 = input[1]; + u64 t2 = input[2]; + u64 t3 = input[3]; + u64 t4 = input[4]; + u64 t1_ = t1 + (t0 >> 51); + u64 t0_ = t0 & 0x7ffffffffffffLLU; + u64 t2_ = t2 + (t1_ >> 51); + u64 t1__ = t1_ & 0x7ffffffffffffLLU; + u64 t3_ = t3 + (t2_ >> 51); + u64 t2__ = t2_ & 0x7ffffffffffffLLU; + u64 t4_ = t4 + (t3_ >> 51); + u64 t3__ = t3_ & 0x7ffffffffffffLLU; + input[0] = t0_; + input[1] = t1__; + input[2] = t2__; + input[3] = t3__; + input[4] = t4_; +} + +static __always_inline void format_fcontract_second_carry_full(u64 *input) +{ + u64 i0; + u64 i1; + u64 i0_; + u64 i1_; + format_fcontract_second_carry_pass(input); + modulo_carry_top(input); + i0 = input[0]; + i1 = input[1]; + i0_ = i0 & 0x7ffffffffffffLLU; + i1_ = i1 + (i0 >> 51); + input[0] = i0_; + input[1] = i1_; +} + +static __always_inline void format_fcontract_trim(u64 *input) +{ + u64 a0 = input[0]; + u64 a1 = input[1]; + u64 a2 = input[2]; + u64 a3 = input[3]; + u64 a4 = input[4]; + u64 mask0 = u64_gte_mask(a0, 0x7ffffffffffedLLU); + u64 mask1 = u64_eq_mask(a1, 0x7ffffffffffffLLU); + u64 mask2 = u64_eq_mask(a2, 0x7ffffffffffffLLU); + u64 mask3 = u64_eq_mask(a3, 0x7ffffffffffffLLU); + u64 mask4 = u64_eq_mask(a4, 0x7ffffffffffffLLU); + u64 mask = (((mask0 & mask1) & mask2) & mask3) & mask4; + u64 a0_ = a0 - (0x7ffffffffffedLLU & mask); + u64 a1_ = a1 - (0x7ffffffffffffLLU & mask); + u64 a2_ = a2 - (0x7ffffffffffffLLU & mask); + u64 a3_ = a3 - (0x7ffffffffffffLLU & mask); + u64 a4_ = a4 - (0x7ffffffffffffLLU & mask); + input[0] = a0_; + input[1] = a1_; + input[2] = a2_; + input[3] = a3_; + input[4] = a4_; +} + +static __always_inline void format_fcontract_store(u8 *output, u64 *input) +{ + u64 t0 = input[0]; + u64 t1 = input[1]; + u64 t2 = input[2]; + u64 t3 = input[3]; + u64 t4 = input[4]; + u64 o0 = t1 << 51 | t0; + u64 o1 = t2 << 38 | t1 >> 13; + u64 o2 = t3 << 25 | t2 >> 26; + u64 o3 = t4 << 12 | t3 >> 39; + u8 *b0 = output; + u8 *b1 = output + 8; + u8 *b2 = output + 16; + u8 *b3 = output + 24; + put_unaligned_le64(o0, b0); + put_unaligned_le64(o1, b1); + put_unaligned_le64(o2, b2); + put_unaligned_le64(o3, b3); +} + +static __always_inline void format_fcontract(u8 *output, u64 *input) +{ + format_fcontract_first_carry_full(input); + format_fcontract_second_carry_full(input); + format_fcontract_trim(input); + format_fcontract_store(output, input); +} + +static __always_inline void format_scalar_of_point(u8 *scalar, u64 *point) +{ + u64 *x = point; + u64 *z = point + 5; + u64 buf[10] __aligned(32) = { 0 }; + u64 *zmone = buf; + u64 *sc = buf + 5; + crecip(zmone, z); + fmul_fmul(sc, x, zmone); + format_fcontract(scalar, sc); +} + +static void curve25519_generic(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]) +{ + u64 buf0[10] __aligned(32) = { 0 }; + u64 *x0 = buf0; + u64 *z = buf0 + 5; + u64 *q; + format_fexpand(x0, basepoint); + z[0] = 1; + q = buf0; + { + u8 e[32] __aligned(32) = { 0 }; + u8 *scalar; + memcpy(e, secret, 32); + curve25519_clamp_secret(e); + scalar = e; + { + u64 buf[15] = { 0 }; + u64 *nq = buf; + u64 *x = nq; + x[0] = 1; + ladder_cmult(nq, scalar, q); + format_scalar_of_point(mypublic, nq); + memzero_explicit(buf, sizeof(buf)); + } + memzero_explicit(e, sizeof(e)); + } + memzero_explicit(buf0, sizeof(buf0)); +} diff --git a/net/wireguard/crypto/zinc/curve25519/curve25519-x86_64-glue.c b/net/wireguard/crypto/zinc/curve25519/curve25519-x86_64-glue.c new file mode 100644 index 000000000000..e08cc2ba74f3 --- /dev/null +++ b/net/wireguard/crypto/zinc/curve25519/curve25519-x86_64-glue.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include +#include + +#include "curve25519-x86_64.c" + +static bool curve25519_use_bmi2_adx __ro_after_init; +static bool *const curve25519_nobs[] __initconst = { + &curve25519_use_bmi2_adx }; + +static void __init curve25519_fpu_init(void) +{ + curve25519_use_bmi2_adx = IS_ENABLED(CONFIG_AS_BMI2) && + IS_ENABLED(CONFIG_AS_ADX) && + boot_cpu_has(X86_FEATURE_BMI2) && + boot_cpu_has(X86_FEATURE_ADX); +} + +static inline bool curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]) +{ + if (IS_ENABLED(CONFIG_AS_ADX) && IS_ENABLED(CONFIG_AS_BMI2) && + curve25519_use_bmi2_adx) { + curve25519_ever64(mypublic, secret, basepoint); + return true; + } + return false; +} + +static inline bool curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE]) +{ + if (IS_ENABLED(CONFIG_AS_ADX) && IS_ENABLED(CONFIG_AS_BMI2) && + curve25519_use_bmi2_adx) { + curve25519_ever64_base(pub, secret); + return true; + } + return false; +} diff --git a/net/wireguard/crypto/zinc/curve25519/curve25519-x86_64.c b/net/wireguard/crypto/zinc/curve25519/curve25519-x86_64.c new file mode 100644 index 000000000000..8b6872a2f0d0 --- /dev/null +++ b/net/wireguard/crypto/zinc/curve25519/curve25519-x86_64.c @@ -0,0 +1,1580 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2020 Jason A. Donenfeld . All Rights Reserved. + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + */ + +static __always_inline u64 eq_mask(u64 a, u64 b) +{ + u64 x = a ^ b; + u64 minus_x = ~x + (u64)1U; + u64 x_or_minus_x = x | minus_x; + u64 xnx = x_or_minus_x >> (u32)63U; + return xnx - (u64)1U; +} + +static __always_inline u64 gte_mask(u64 a, u64 b) +{ + u64 x = a; + u64 y = b; + u64 x_xor_y = x ^ y; + u64 x_sub_y = x - y; + u64 x_sub_y_xor_y = x_sub_y ^ y; + u64 q = x_xor_y | x_sub_y_xor_y; + u64 x_xor_q = x ^ q; + u64 x_xor_q_ = x_xor_q >> (u32)63U; + return x_xor_q_ - (u64)1U; +} + +/* Computes the addition of four-element f1 with value in f2 + * and returns the carry (if any) */ +static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2) +{ + u64 carry_r; + + asm volatile( + /* Clear registers to propagate the carry bit */ + " xor %%r8d, %%r8d;" + " xor %%r9d, %%r9d;" + " xor %%r10d, %%r10d;" + " xor %%r11d, %%r11d;" + " xor %k1, %k1;" + + /* Begin addition chain */ + " addq 0(%3), %0;" + " movq %0, 0(%2);" + " adcxq 8(%3), %%r8;" + " movq %%r8, 8(%2);" + " adcxq 16(%3), %%r9;" + " movq %%r9, 16(%2);" + " adcxq 24(%3), %%r10;" + " movq %%r10, 24(%2);" + + /* Return the carry bit in a register */ + " adcx %%r11, %1;" + : "+&r"(f2), "=&r"(carry_r) + : "r"(out), "r"(f1) + : "%r8", "%r9", "%r10", "%r11", "memory", "cc"); + + return carry_r; +} + +/* Computes the field addition of two field elements */ +static inline void fadd(u64 *out, const u64 *f1, const u64 *f2) +{ + asm volatile( + /* Compute the raw addition of f1 + f2 */ + " movq 0(%0), %%r8;" + " addq 0(%2), %%r8;" + " movq 8(%0), %%r9;" + " adcxq 8(%2), %%r9;" + " movq 16(%0), %%r10;" + " adcxq 16(%2), %%r10;" + " movq 24(%0), %%r11;" + " adcxq 24(%2), %%r11;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute carry*38 */ + " mov $0, %%rax;" + " mov $38, %0;" + " cmovc %0, %%rax;" + + /* Step 2: Add carry*38 to the original sum */ + " xor %%ecx, %%ecx;" + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %0, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r"(f2) + : "r"(out), "r"(f1) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"); +} + +/* Computes the field subtraction of two field elements */ +static inline void fsub(u64 *out, const u64 *f1, const u64 *f2) +{ + asm volatile( + /* Compute the raw subtraction of f1-f2 */ + " movq 0(%1), %%r8;" + " subq 0(%2), %%r8;" + " movq 8(%1), %%r9;" + " sbbq 8(%2), %%r9;" + " movq 16(%1), %%r10;" + " sbbq 16(%2), %%r10;" + " movq 24(%1), %%r11;" + " sbbq 24(%2), %%r11;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute carry*38 */ + " mov $0, %%rax;" + " mov $38, %%rcx;" + " cmovc %%rcx, %%rax;" + + /* Step 2: Subtract carry*38 from the original difference */ + " sub %%rax, %%r8;" + " sbb $0, %%r9;" + " sbb $0, %%r10;" + " sbb $0, %%r11;" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rcx, %%rax;" + " sub %%rax, %%r8;" + + /* Store the result */ + " movq %%r8, 0(%0);" + " movq %%r9, 8(%0);" + " movq %%r10, 16(%0);" + " movq %%r11, 24(%0);" + : + : "r"(out), "r"(f1), "r"(f2) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"); +} + +/* Computes a field multiplication: out <- f1 * f2 + * Uses the 8-element buffer tmp for intermediate results */ +static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) +{ + asm volatile( + + /* Compute the raw multiplication: tmp <- src1 * src2 */ + + /* Compute src1[0] * src2 */ + " movq 0(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 0(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 8(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + + /* Compute src1[1] * src2 */ + " movq 8(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 8(%2), %%r8;" + " movq %%r8, 8(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 16(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[2] * src2 */ + " movq 16(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 16(%2), %%r8;" + " movq %%r8, 16(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 24(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[3] * src2 */ + " movq 24(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 24(%2), %%r8;" + " movq %%r8, 24(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 32(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 40(%2);" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 48(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 56(%2);" + + /* Line up pointers */ + " mov %2, %0;" + " mov %3, %2;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %1, %%r9;" + " movq %%r9, 8(%2);" + " adcx %1, %%r10;" + " movq %%r10, 16(%2);" + " adcx %1, %%r11;" + " movq %%r11, 24(%2);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%2);" + : "+&r"(f1), "+&r"(f2), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", + "%r14", "memory", "cc"); +} + +/* Computes two field multiplications: + * out[0] <- f1[0] * f2[0] + * out[1] <- f1[1] * f2[1] + * Uses the 16-element buffer tmp for intermediate results: */ +static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) +{ + asm volatile( + + /* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */ + + /* Compute src1[0] * src2 */ + " movq 0(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 0(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 8(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + + /* Compute src1[1] * src2 */ + " movq 8(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 8(%2), %%r8;" + " movq %%r8, 8(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 16(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[2] * src2 */ + " movq 16(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 16(%2), %%r8;" + " movq %%r8, 16(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 24(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[3] * src2 */ + " movq 24(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 24(%2), %%r8;" + " movq %%r8, 24(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 32(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 40(%2);" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 48(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 56(%2);" + + /* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */ + + /* Compute src1[0] * src2 */ + " movq 32(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 64(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 72(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + + /* Compute src1[1] * src2 */ + " movq 40(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 72(%2), %%r8;" + " movq %%r8, 72(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 80(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[2] * src2 */ + " movq 48(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 80(%2), %%r8;" + " movq %%r8, 80(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 88(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[3] * src2 */ + " movq 56(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 88(%2), %%r8;" + " movq %%r8, 88(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 96(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 104(%2);" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 112(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 120(%2);" + + /* Line up pointers */ + " mov %2, %0;" + " mov %3, %2;" + + /* Wrap the results back into the field */ + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %1, %%r9;" + " movq %%r9, 8(%2);" + " adcx %1, %%r10;" + " movq %%r10, 16(%2);" + " adcx %1, %%r11;" + " movq %%r11, 24(%2);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%2);" + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 96(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 64(%0), %%r8;" + " mulxq 104(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 72(%0), %%r9;" + " mulxq 112(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 80(%0), %%r10;" + " mulxq 120(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 88(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %1, %%r9;" + " movq %%r9, 40(%2);" + " adcx %1, %%r10;" + " movq %%r10, 48(%2);" + " adcx %1, %%r11;" + " movq %%r11, 56(%2);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 32(%2);" + : "+&r"(f1), "+&r"(f2), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", + "%r14", "memory", "cc"); +} + +/* Computes the field multiplication of four-element f1 with value in f2 + * Requires f2 to be smaller than 2^17 */ +static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2) +{ + register u64 f2_r asm("rdx") = f2; + + asm volatile( + /* Compute the raw multiplication of f1*f2 */ + " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */ + " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */ + " add %%rcx, %%r9;" + " mov $0, %%rcx;" + " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */ + " adcx %%rbx, %%r10;" + " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */ + " adcx %%r13, %%r11;" + " adcx %%rcx, %%rax;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute carry*38 */ + " mov $38, %%rdx;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r"(f2_r) + : "r"(out), "r"(f1) + : "%rax", "%rbx", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r13", + "memory", "cc"); +} + +/* Computes p1 <- bit ? p2 : p1 in constant time */ +static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2) +{ + asm volatile( + /* Transfer bit into CF flag */ + " add $18446744073709551615, %0;" + + /* cswap p1[0], p2[0] */ + " movq 0(%1), %%r8;" + " movq 0(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 0(%1);" + " movq %%r9, 0(%2);" + + /* cswap p1[1], p2[1] */ + " movq 8(%1), %%r8;" + " movq 8(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 8(%1);" + " movq %%r9, 8(%2);" + + /* cswap p1[2], p2[2] */ + " movq 16(%1), %%r8;" + " movq 16(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 16(%1);" + " movq %%r9, 16(%2);" + + /* cswap p1[3], p2[3] */ + " movq 24(%1), %%r8;" + " movq 24(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 24(%1);" + " movq %%r9, 24(%2);" + + /* cswap p1[4], p2[4] */ + " movq 32(%1), %%r8;" + " movq 32(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 32(%1);" + " movq %%r9, 32(%2);" + + /* cswap p1[5], p2[5] */ + " movq 40(%1), %%r8;" + " movq 40(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 40(%1);" + " movq %%r9, 40(%2);" + + /* cswap p1[6], p2[6] */ + " movq 48(%1), %%r8;" + " movq 48(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 48(%1);" + " movq %%r9, 48(%2);" + + /* cswap p1[7], p2[7] */ + " movq 56(%1), %%r8;" + " movq 56(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 56(%1);" + " movq %%r9, 56(%2);" + : "+&r"(bit) + : "r"(p1), "r"(p2) + : "%r8", "%r9", "%r10", "memory", "cc"); +} + +/* Computes the square of a field element: out <- f * f + * Uses the 8-element buffer tmp for intermediate results */ +static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) +{ + asm volatile( + /* Compute the raw multiplication: tmp <- f * f */ + + /* Step 1: Compute all partial products */ + " movq 0(%0), %%rdx;" /* f[0] */ + " mulxq 8(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" /* f[1]*f[0] */ + " mulxq 16(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 24(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 24(%0), %%rdx;" /* f[3] */ + " mulxq 8(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 16(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" /* f[2]*f[3] */ + " movq 8(%0), %%rdx;" + " adcx %%r15, %%r13;" /* f1 */ + " mulxq 16(%0), %%rax, %%rcx;" + " mov $0, %%r14;" /* f[2]*f[1] */ + + /* Step 2: Compute two parallel carry chains */ + " xor %%r15d, %%r15d;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + /* Step 3: Compute intermediate squares */ + " movq 0(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 0(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 8(%1);" + " movq 8(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" + " movq %%r9, 16(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 24(%1);" + " movq 16(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" + " movq %%r11, 32(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 40(%1);" + " movq 24(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" + " movq %%r13, 48(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 56(%1);" + + /* Line up pointers */ + " mov %1, %0;" + " mov %2, %1;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r,&r"(f), "+&r,&r"(tmp) + : "r,m"(out) + : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", + "%r13", "%r14", "%r15", "memory", "cc"); +} + +/* Computes two field squarings: + * out[0] <- f[0] * f[0] + * out[1] <- f[1] * f[1] + * Uses the 16-element buffer tmp for intermediate results */ +static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) +{ + asm volatile( + /* Step 1: Compute all partial products */ + " movq 0(%0), %%rdx;" /* f[0] */ + " mulxq 8(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" /* f[1]*f[0] */ + " mulxq 16(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 24(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 24(%0), %%rdx;" /* f[3] */ + " mulxq 8(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 16(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" /* f[2]*f[3] */ + " movq 8(%0), %%rdx;" + " adcx %%r15, %%r13;" /* f1 */ + " mulxq 16(%0), %%rax, %%rcx;" + " mov $0, %%r14;" /* f[2]*f[1] */ + + /* Step 2: Compute two parallel carry chains */ + " xor %%r15d, %%r15d;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + /* Step 3: Compute intermediate squares */ + " movq 0(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 0(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 8(%1);" + " movq 8(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" + " movq %%r9, 16(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 24(%1);" + " movq 16(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" + " movq %%r11, 32(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 40(%1);" + " movq 24(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" + " movq %%r13, 48(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 56(%1);" + + /* Step 1: Compute all partial products */ + " movq 32(%0), %%rdx;" /* f[0] */ + " mulxq 40(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" /* f[1]*f[0] */ + " mulxq 48(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 56(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 56(%0), %%rdx;" /* f[3] */ + " mulxq 40(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 48(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" /* f[2]*f[3] */ + " movq 40(%0), %%rdx;" + " adcx %%r15, %%r13;" /* f1 */ + " mulxq 48(%0), %%rax, %%rcx;" + " mov $0, %%r14;" /* f[2]*f[1] */ + + /* Step 2: Compute two parallel carry chains */ + " xor %%r15d, %%r15d;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + /* Step 3: Compute intermediate squares */ + " movq 32(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 64(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 72(%1);" + " movq 40(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" + " movq %%r9, 80(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 88(%1);" + " movq 48(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" + " movq %%r11, 96(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 104(%1);" + " movq 56(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" + " movq %%r13, 112(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 120(%1);" + + /* Line up pointers */ + " mov %1, %0;" + " mov %2, %1;" + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 96(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 64(%0), %%r8;" + " mulxq 104(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 72(%0), %%r9;" + " mulxq 112(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 80(%0), %%r10;" + " mulxq 120(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 88(%0), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 40(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 48(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 56(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 32(%1);" + : "+&r,&r"(f), "+&r,&r"(tmp) + : "r,m"(out) + : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", + "%r13", "%r14", "%r15", "memory", "cc"); +} + +static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2) +{ + u64 *nq = p01_tmp1; + u64 *nq_p1 = p01_tmp1 + (u32)8U; + u64 *tmp1 = p01_tmp1 + (u32)16U; + u64 *x1 = q; + u64 *x2 = nq; + u64 *z2 = nq + (u32)4U; + u64 *z3 = nq_p1 + (u32)4U; + u64 *a = tmp1; + u64 *b = tmp1 + (u32)4U; + u64 *ab = tmp1; + u64 *dc = tmp1 + (u32)8U; + u64 *x3; + u64 *z31; + u64 *d0; + u64 *c0; + u64 *a1; + u64 *b1; + u64 *d; + u64 *c; + u64 *ab1; + u64 *dc1; + fadd(a, x2, z2); + fsub(b, x2, z2); + x3 = nq_p1; + z31 = nq_p1 + (u32)4U; + d0 = dc; + c0 = dc + (u32)4U; + fadd(c0, x3, z31); + fsub(d0, x3, z31); + fmul2(dc, dc, ab, tmp2); + fadd(x3, d0, c0); + fsub(z31, d0, c0); + a1 = tmp1; + b1 = tmp1 + (u32)4U; + d = tmp1 + (u32)8U; + c = tmp1 + (u32)12U; + ab1 = tmp1; + dc1 = tmp1 + (u32)8U; + fsqr2(dc1, ab1, tmp2); + fsqr2(nq_p1, nq_p1, tmp2); + a1[0U] = c[0U]; + a1[1U] = c[1U]; + a1[2U] = c[2U]; + a1[3U] = c[3U]; + fsub(c, d, c); + fmul_scalar(b1, c, (u64)121665U); + fadd(b1, b1, d); + fmul2(nq, dc1, ab1, tmp2); + fmul(z3, z3, x1, tmp2); +} + +static void point_double(u64 *nq, u64 *tmp1, u64 *tmp2) +{ + u64 *x2 = nq; + u64 *z2 = nq + (u32)4U; + u64 *a = tmp1; + u64 *b = tmp1 + (u32)4U; + u64 *d = tmp1 + (u32)8U; + u64 *c = tmp1 + (u32)12U; + u64 *ab = tmp1; + u64 *dc = tmp1 + (u32)8U; + fadd(a, x2, z2); + fsub(b, x2, z2); + fsqr2(dc, ab, tmp2); + a[0U] = c[0U]; + a[1U] = c[1U]; + a[2U] = c[2U]; + a[3U] = c[3U]; + fsub(c, d, c); + fmul_scalar(b, c, (u64)121665U); + fadd(b, b, d); + fmul2(nq, dc, ab, tmp2); +} + +static void montgomery_ladder(u64 *out, const u8 *key, u64 *init1) +{ + u64 tmp2[16U] = { 0U }; + u64 p01_tmp1_swap[33U] = { 0U }; + u64 *p0 = p01_tmp1_swap; + u64 *p01 = p01_tmp1_swap; + u64 *p03 = p01; + u64 *p11 = p01 + (u32)8U; + u64 *x0; + u64 *z0; + u64 *p01_tmp1; + u64 *p01_tmp11; + u64 *nq10; + u64 *nq_p11; + u64 *swap1; + u64 sw0; + u64 *nq1; + u64 *tmp1; + memcpy(p11, init1, (u32)8U * sizeof(init1[0U])); + x0 = p03; + z0 = p03 + (u32)4U; + x0[0U] = (u64)1U; + x0[1U] = (u64)0U; + x0[2U] = (u64)0U; + x0[3U] = (u64)0U; + z0[0U] = (u64)0U; + z0[1U] = (u64)0U; + z0[2U] = (u64)0U; + z0[3U] = (u64)0U; + p01_tmp1 = p01_tmp1_swap; + p01_tmp11 = p01_tmp1_swap; + nq10 = p01_tmp1_swap; + nq_p11 = p01_tmp1_swap + (u32)8U; + swap1 = p01_tmp1_swap + (u32)32U; + cswap2((u64)1U, nq10, nq_p11); + point_add_and_double(init1, p01_tmp11, tmp2); + swap1[0U] = (u64)1U; + { + u32 i; + for (i = (u32)0U; i < (u32)251U; i = i + (u32)1U) { + u64 *p01_tmp12 = p01_tmp1_swap; + u64 *swap2 = p01_tmp1_swap + (u32)32U; + u64 *nq2 = p01_tmp12; + u64 *nq_p12 = p01_tmp12 + (u32)8U; + u64 bit = (u64)(key[((u32)253U - i) / (u32)8U] >> ((u32)253U - i) % (u32)8U & (u8)1U); + u64 sw = swap2[0U] ^ bit; + cswap2(sw, nq2, nq_p12); + point_add_and_double(init1, p01_tmp12, tmp2); + swap2[0U] = bit; + } + } + sw0 = swap1[0U]; + cswap2(sw0, nq10, nq_p11); + nq1 = p01_tmp1; + tmp1 = p01_tmp1 + (u32)16U; + point_double(nq1, tmp1, tmp2); + point_double(nq1, tmp1, tmp2); + point_double(nq1, tmp1, tmp2); + memcpy(out, p0, (u32)8U * sizeof(p0[0U])); + + memzero_explicit(tmp2, sizeof(tmp2)); + memzero_explicit(p01_tmp1_swap, sizeof(p01_tmp1_swap)); +} + +static void fsquare_times(u64 *o, const u64 *inp, u64 *tmp, u32 n1) +{ + u32 i; + fsqr(o, inp, tmp); + for (i = (u32)0U; i < n1 - (u32)1U; i = i + (u32)1U) + fsqr(o, o, tmp); +} + +static void finv(u64 *o, const u64 *i, u64 *tmp) +{ + u64 t1[16U] = { 0U }; + u64 *a0 = t1; + u64 *b = t1 + (u32)4U; + u64 *c = t1 + (u32)8U; + u64 *t00 = t1 + (u32)12U; + u64 *tmp1 = tmp; + u64 *a; + u64 *t0; + fsquare_times(a0, i, tmp1, (u32)1U); + fsquare_times(t00, a0, tmp1, (u32)2U); + fmul(b, t00, i, tmp); + fmul(a0, b, a0, tmp); + fsquare_times(t00, a0, tmp1, (u32)1U); + fmul(b, t00, b, tmp); + fsquare_times(t00, b, tmp1, (u32)5U); + fmul(b, t00, b, tmp); + fsquare_times(t00, b, tmp1, (u32)10U); + fmul(c, t00, b, tmp); + fsquare_times(t00, c, tmp1, (u32)20U); + fmul(t00, t00, c, tmp); + fsquare_times(t00, t00, tmp1, (u32)10U); + fmul(b, t00, b, tmp); + fsquare_times(t00, b, tmp1, (u32)50U); + fmul(c, t00, b, tmp); + fsquare_times(t00, c, tmp1, (u32)100U); + fmul(t00, t00, c, tmp); + fsquare_times(t00, t00, tmp1, (u32)50U); + fmul(t00, t00, b, tmp); + fsquare_times(t00, t00, tmp1, (u32)5U); + a = t1; + t0 = t1 + (u32)12U; + fmul(o, t0, a, tmp); +} + +static void store_felem(u64 *b, u64 *f) +{ + u64 f30 = f[3U]; + u64 top_bit0 = f30 >> (u32)63U; + u64 carry0; + u64 f31; + u64 top_bit; + u64 carry; + u64 f0; + u64 f1; + u64 f2; + u64 f3; + u64 m0; + u64 m1; + u64 m2; + u64 m3; + u64 mask; + u64 f0_; + u64 f1_; + u64 f2_; + u64 f3_; + u64 o0; + u64 o1; + u64 o2; + u64 o3; + f[3U] = f30 & (u64)0x7fffffffffffffffU; + carry0 = add_scalar(f, f, (u64)19U * top_bit0); + f31 = f[3U]; + top_bit = f31 >> (u32)63U; + f[3U] = f31 & (u64)0x7fffffffffffffffU; + carry = add_scalar(f, f, (u64)19U * top_bit); + f0 = f[0U]; + f1 = f[1U]; + f2 = f[2U]; + f3 = f[3U]; + m0 = gte_mask(f0, (u64)0xffffffffffffffedU); + m1 = eq_mask(f1, (u64)0xffffffffffffffffU); + m2 = eq_mask(f2, (u64)0xffffffffffffffffU); + m3 = eq_mask(f3, (u64)0x7fffffffffffffffU); + mask = ((m0 & m1) & m2) & m3; + f0_ = f0 - (mask & (u64)0xffffffffffffffedU); + f1_ = f1 - (mask & (u64)0xffffffffffffffffU); + f2_ = f2 - (mask & (u64)0xffffffffffffffffU); + f3_ = f3 - (mask & (u64)0x7fffffffffffffffU); + o0 = f0_; + o1 = f1_; + o2 = f2_; + o3 = f3_; + b[0U] = o0; + b[1U] = o1; + b[2U] = o2; + b[3U] = o3; +} + +static void encode_point(u8 *o, const u64 *i) +{ + const u64 *x = i; + const u64 *z = i + (u32)4U; + u64 tmp[4U] = { 0U }; + u64 tmp_w[16U] = { 0U }; + finv(tmp, z, tmp_w); + fmul(tmp, tmp, x, tmp_w); + store_felem((u64 *)o, tmp); +} + +static void curve25519_ever64(u8 *out, const u8 *priv, const u8 *pub) +{ + u64 init1[8U] = { 0U }; + u64 tmp[4U] = { 0U }; + u64 tmp3; + u64 *x; + u64 *z; + { + u32 i; + for (i = (u32)0U; i < (u32)4U; i = i + (u32)1U) { + u64 *os = tmp; + const u8 *bj = pub + i * (u32)8U; + u64 u = *(u64 *)bj; + u64 r = u; + u64 x0 = r; + os[i] = x0; + } + } + tmp3 = tmp[3U]; + tmp[3U] = tmp3 & (u64)0x7fffffffffffffffU; + x = init1; + z = init1 + (u32)4U; + z[0U] = (u64)1U; + z[1U] = (u64)0U; + z[2U] = (u64)0U; + z[3U] = (u64)0U; + x[0U] = tmp[0U]; + x[1U] = tmp[1U]; + x[2U] = tmp[2U]; + x[3U] = tmp[3U]; + montgomery_ladder(init1, priv, init1); + encode_point(out, init1); +} + +/* The below constants were generated using this sage script: + * + * #!/usr/bin/env sage + * import sys + * from sage.all import * + * def limbs(n): + * n = int(n) + * l = ((n >> 0) % 2^64, (n >> 64) % 2^64, (n >> 128) % 2^64, (n >> 192) % 2^64) + * return "0x%016xULL, 0x%016xULL, 0x%016xULL, 0x%016xULL" % l + * ec = EllipticCurve(GF(2^255 - 19), [0, 486662, 0, 1, 0]) + * p_minus_s = (ec.lift_x(9) - ec.lift_x(1))[0] + * print("static const u64 p_minus_s[] = { %s };\n" % limbs(p_minus_s)) + * print("static const u64 table_ladder[] = {") + * p = ec.lift_x(9) + * for i in range(252): + * l = (p[0] + p[2]) / (p[0] - p[2]) + * print(("\t%s" + ("," if i != 251 else "")) % limbs(l)) + * p = p * 2 + * print("};") + * + */ + +static const u64 p_minus_s[] = { 0x816b1e0137d48290ULL, 0x440f6a51eb4d1207ULL, 0x52385f46dca2b71dULL, 0x215132111d8354cbULL }; + +static const u64 table_ladder[] = { + 0xfffffffffffffff3ULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x5fffffffffffffffULL, + 0x6b8220f416aafe96ULL, 0x82ebeb2b4f566a34ULL, 0xd5a9a5b075a5950fULL, 0x5142b2cf4b2488f4ULL, + 0x6aaebc750069680cULL, 0x89cf7820a0f99c41ULL, 0x2a58d9183b56d0f4ULL, 0x4b5aca80e36011a4ULL, + 0x329132348c29745dULL, 0xf4a2e616e1642fd7ULL, 0x1e45bb03ff67bc34ULL, 0x306912d0f42a9b4aULL, + 0xff886507e6af7154ULL, 0x04f50e13dfeec82fULL, 0xaa512fe82abab5ceULL, 0x174e251a68d5f222ULL, + 0xcf96700d82028898ULL, 0x1743e3370a2c02c5ULL, 0x379eec98b4e86eaaULL, 0x0c59888a51e0482eULL, + 0xfbcbf1d699b5d189ULL, 0xacaef0d58e9fdc84ULL, 0xc1c20d06231f7614ULL, 0x2938218da274f972ULL, + 0xf6af49beff1d7f18ULL, 0xcc541c22387ac9c2ULL, 0x96fcc9ef4015c56bULL, 0x69c1627c690913a9ULL, + 0x7a86fd2f4733db0eULL, 0xfdb8c4f29e087de9ULL, 0x095e4b1a8ea2a229ULL, 0x1ad7a7c829b37a79ULL, + 0x342d89cad17ea0c0ULL, 0x67bedda6cced2051ULL, 0x19ca31bf2bb42f74ULL, 0x3df7b4c84980acbbULL, + 0xa8c6444dc80ad883ULL, 0xb91e440366e3ab85ULL, 0xc215cda00164f6d8ULL, 0x3d867c6ef247e668ULL, + 0xc7dd582bcc3e658cULL, 0xfd2c4748ee0e5528ULL, 0xa0fd9b95cc9f4f71ULL, 0x7529d871b0675ddfULL, + 0xb8f568b42d3cbd78ULL, 0x1233011b91f3da82ULL, 0x2dce6ccd4a7c3b62ULL, 0x75e7fc8e9e498603ULL, + 0x2f4f13f1fcd0b6ecULL, 0xf1a8ca1f29ff7a45ULL, 0xc249c1a72981e29bULL, 0x6ebe0dbb8c83b56aULL, + 0x7114fa8d170bb222ULL, 0x65a2dcd5bf93935fULL, 0xbdc41f68b59c979aULL, 0x2f0eef79a2ce9289ULL, + 0x42ecbf0c083c37ceULL, 0x2930bc09ec496322ULL, 0xf294b0c19cfeac0dULL, 0x3780aa4bedfabb80ULL, + 0x56c17d3e7cead929ULL, 0xe7cb4beb2e5722c5ULL, 0x0ce931732dbfe15aULL, 0x41b883c7621052f8ULL, + 0xdbf75ca0c3d25350ULL, 0x2936be086eb1e351ULL, 0xc936e03cb4a9b212ULL, 0x1d45bf82322225aaULL, + 0xe81ab1036a024cc5ULL, 0xe212201c304c9a72ULL, 0xc5d73fba6832b1fcULL, 0x20ffdb5a4d839581ULL, + 0xa283d367be5d0fadULL, 0x6c2b25ca8b164475ULL, 0x9d4935467caaf22eULL, 0x5166408eee85ff49ULL, + 0x3c67baa2fab4e361ULL, 0xb3e433c67ef35cefULL, 0x5259729241159b1cULL, 0x6a621892d5b0ab33ULL, + 0x20b74a387555cdcbULL, 0x532aa10e1208923fULL, 0xeaa17b7762281dd1ULL, 0x61ab3443f05c44bfULL, + 0x257a6c422324def8ULL, 0x131c6c1017e3cf7fULL, 0x23758739f630a257ULL, 0x295a407a01a78580ULL, + 0xf8c443246d5da8d9ULL, 0x19d775450c52fa5dULL, 0x2afcfc92731bf83dULL, 0x7d10c8e81b2b4700ULL, + 0xc8e0271f70baa20bULL, 0x993748867ca63957ULL, 0x5412efb3cb7ed4bbULL, 0x3196d36173e62975ULL, + 0xde5bcad141c7dffcULL, 0x47cc8cd2b395c848ULL, 0xa34cd942e11af3cbULL, 0x0256dbf2d04ecec2ULL, + 0x875ab7e94b0e667fULL, 0xcad4dd83c0850d10ULL, 0x47f12e8f4e72c79fULL, 0x5f1a87bb8c85b19bULL, + 0x7ae9d0b6437f51b8ULL, 0x12c7ce5518879065ULL, 0x2ade09fe5cf77aeeULL, 0x23a05a2f7d2c5627ULL, + 0x5908e128f17c169aULL, 0xf77498dd8ad0852dULL, 0x74b4c4ceab102f64ULL, 0x183abadd10139845ULL, + 0xb165ba8daa92aaacULL, 0xd5c5ef9599386705ULL, 0xbe2f8f0cf8fc40d1ULL, 0x2701e635ee204514ULL, + 0x629fa80020156514ULL, 0xf223868764a8c1ceULL, 0x5b894fff0b3f060eULL, 0x60d9944cf708a3faULL, + 0xaeea001a1c7a201fULL, 0xebf16a633ee2ce63ULL, 0x6f7709594c7a07e1ULL, 0x79b958150d0208cbULL, + 0x24b55e5301d410e7ULL, 0xe3a34edff3fdc84dULL, 0xd88768e4904032d8ULL, 0x131384427b3aaeecULL, + 0x8405e51286234f14ULL, 0x14dc4739adb4c529ULL, 0xb8a2b5b250634ffdULL, 0x2fe2a94ad8a7ff93ULL, + 0xec5c57efe843faddULL, 0x2843ce40f0bb9918ULL, 0xa4b561d6cf3d6305ULL, 0x743629bde8fb777eULL, + 0x343edd46bbaf738fULL, 0xed981828b101a651ULL, 0xa401760b882c797aULL, 0x1fc223e28dc88730ULL, + 0x48604e91fc0fba0eULL, 0xb637f78f052c6fa4ULL, 0x91ccac3d09e9239cULL, 0x23f7eed4437a687cULL, + 0x5173b1118d9bd800ULL, 0x29d641b63189d4a7ULL, 0xfdbf177988bbc586ULL, 0x2959894fcad81df5ULL, + 0xaebc8ef3b4bbc899ULL, 0x4148995ab26992b9ULL, 0x24e20b0134f92cfbULL, 0x40d158894a05dee8ULL, + 0x46b00b1185af76f6ULL, 0x26bac77873187a79ULL, 0x3dc0bf95ab8fff5fULL, 0x2a608bd8945524d7ULL, + 0x26449588bd446302ULL, 0x7c4bc21c0388439cULL, 0x8e98a4f383bd11b2ULL, 0x26218d7bc9d876b9ULL, + 0xe3081542997c178aULL, 0x3c2d29a86fb6606fULL, 0x5c217736fa279374ULL, 0x7dde05734afeb1faULL, + 0x3bf10e3906d42babULL, 0xe4f7803e1980649cULL, 0xe6053bf89595bf7aULL, 0x394faf38da245530ULL, + 0x7a8efb58896928f4ULL, 0xfbc778e9cc6a113cULL, 0x72670ce330af596fULL, 0x48f222a81d3d6cf7ULL, + 0xf01fce410d72caa7ULL, 0x5a20ecc7213b5595ULL, 0x7bc21165c1fa1483ULL, 0x07f89ae31da8a741ULL, + 0x05d2c2b4c6830ff9ULL, 0xd43e330fc6316293ULL, 0xa5a5590a96d3a904ULL, 0x705edb91a65333b6ULL, + 0x048ee15e0bb9a5f7ULL, 0x3240cfca9e0aaf5dULL, 0x8f4b71ceedc4a40bULL, 0x621c0da3de544a6dULL, + 0x92872836a08c4091ULL, 0xce8375b010c91445ULL, 0x8a72eb524f276394ULL, 0x2667fcfa7ec83635ULL, + 0x7f4c173345e8752aULL, 0x061b47feee7079a5ULL, 0x25dd9afa9f86ff34ULL, 0x3780cef5425dc89cULL, + 0x1a46035a513bb4e9ULL, 0x3e1ef379ac575adaULL, 0xc78c5f1c5fa24b50ULL, 0x321a967634fd9f22ULL, + 0x946707b8826e27faULL, 0x3dca84d64c506fd0ULL, 0xc189218075e91436ULL, 0x6d9284169b3b8484ULL, + 0x3a67e840383f2ddfULL, 0x33eec9a30c4f9b75ULL, 0x3ec7c86fa783ef47ULL, 0x26ec449fbac9fbc4ULL, + 0x5c0f38cba09b9e7dULL, 0x81168cc762a3478cULL, 0x3e23b0d306fc121cULL, 0x5a238aa0a5efdcddULL, + 0x1ba26121c4ea43ffULL, 0x36f8c77f7c8832b5ULL, 0x88fbea0b0adcf99aULL, 0x5ca9938ec25bebf9ULL, + 0xd5436a5e51fccda0ULL, 0x1dbc4797c2cd893bULL, 0x19346a65d3224a08ULL, 0x0f5034e49b9af466ULL, + 0xf23c3967a1e0b96eULL, 0xe58b08fa867a4d88ULL, 0xfb2fabc6a7341679ULL, 0x2a75381eb6026946ULL, + 0xc80a3be4c19420acULL, 0x66b1f6c681f2b6dcULL, 0x7cf7036761e93388ULL, 0x25abbbd8a660a4c4ULL, + 0x91ea12ba14fd5198ULL, 0x684950fc4a3cffa9ULL, 0xf826842130f5ad28ULL, 0x3ea988f75301a441ULL, + 0xc978109a695f8c6fULL, 0x1746eb4a0530c3f3ULL, 0x444d6d77b4459995ULL, 0x75952b8c054e5cc7ULL, + 0xa3703f7915f4d6aaULL, 0x66c346202f2647d8ULL, 0xd01469df811d644bULL, 0x77fea47d81a5d71fULL, + 0xc5e9529ef57ca381ULL, 0x6eeeb4b9ce2f881aULL, 0xb6e91a28e8009bd6ULL, 0x4b80be3e9afc3fecULL, + 0x7e3773c526aed2c5ULL, 0x1b4afcb453c9a49dULL, 0xa920bdd7baffb24dULL, 0x7c54699f122d400eULL, + 0xef46c8e14fa94bc8ULL, 0xe0b074ce2952ed5eULL, 0xbea450e1dbd885d5ULL, 0x61b68649320f712cULL, + 0x8a485f7309ccbdd1ULL, 0xbd06320d7d4d1a2dULL, 0x25232973322dbef4ULL, 0x445dc4758c17f770ULL, + 0xdb0434177cc8933cULL, 0xed6fe82175ea059fULL, 0x1efebefdc053db34ULL, 0x4adbe867c65daf99ULL, + 0x3acd71a2a90609dfULL, 0xe5e991856dd04050ULL, 0x1ec69b688157c23cULL, 0x697427f6885cfe4dULL, + 0xd7be7b9b65e1a851ULL, 0xa03d28d522c536ddULL, 0x28399d658fd2b645ULL, 0x49e5b7e17c2641e1ULL, + 0x6f8c3a98700457a4ULL, 0x5078f0a25ebb6778ULL, 0xd13c3ccbc382960fULL, 0x2e003258a7df84b1ULL, + 0x8ad1f39be6296a1cULL, 0xc1eeaa652a5fbfb2ULL, 0x33ee0673fd26f3cbULL, 0x59256173a69d2cccULL, + 0x41ea07aa4e18fc41ULL, 0xd9fc19527c87a51eULL, 0xbdaacb805831ca6fULL, 0x445b652dc916694fULL, + 0xce92a3a7f2172315ULL, 0x1edc282de11b9964ULL, 0xa1823aafe04c314aULL, 0x790a2d94437cf586ULL, + 0x71c447fb93f6e009ULL, 0x8922a56722845276ULL, 0xbf70903b204f5169ULL, 0x2f7a89891ba319feULL, + 0x02a08eb577e2140cULL, 0xed9a4ed4427bdcf4ULL, 0x5253ec44e4323cd1ULL, 0x3e88363c14e9355bULL, + 0xaa66c14277110b8cULL, 0x1ae0391610a23390ULL, 0x2030bd12c93fc2a2ULL, 0x3ee141579555c7abULL, + 0x9214de3a6d6e7d41ULL, 0x3ccdd88607f17efeULL, 0x674f1288f8e11217ULL, 0x5682250f329f93d0ULL, + 0x6cf00b136d2e396eULL, 0x6e4cf86f1014debfULL, 0x5930b1b5bfcc4e83ULL, 0x047069b48aba16b6ULL, + 0x0d4ce4ab69b20793ULL, 0xb24db91a97d0fb9eULL, 0xcdfa50f54e00d01dULL, 0x221b1085368bddb5ULL, + 0xe7e59468b1e3d8d2ULL, 0x53c56563bd122f93ULL, 0xeee8a903e0663f09ULL, 0x61efa662cbbe3d42ULL, + 0x2cf8ddddde6eab2aULL, 0x9bf80ad51435f231ULL, 0x5deadacec9f04973ULL, 0x29275b5d41d29b27ULL, + 0xcfde0f0895ebf14fULL, 0xb9aab96b054905a7ULL, 0xcae80dd9a1c420fdULL, 0x0a63bf2f1673bbc7ULL, + 0x092f6e11958fbc8cULL, 0x672a81e804822fadULL, 0xcac8351560d52517ULL, 0x6f3f7722c8f192f8ULL, + 0xf8ba90ccc2e894b7ULL, 0x2c7557a438ff9f0dULL, 0x894d1d855ae52359ULL, 0x68e122157b743d69ULL, + 0xd87e5570cfb919f3ULL, 0x3f2cdecd95798db9ULL, 0x2121154710c0a2ceULL, 0x3c66a115246dc5b2ULL, + 0xcbedc562294ecb72ULL, 0xba7143c36a280b16ULL, 0x9610c2efd4078b67ULL, 0x6144735d946a4b1eULL, + 0x536f111ed75b3350ULL, 0x0211db8c2041d81bULL, 0xf93cb1000e10413cULL, 0x149dfd3c039e8876ULL, + 0xd479dde46b63155bULL, 0xb66e15e93c837976ULL, 0xdafde43b1f13e038ULL, 0x5fafda1a2e4b0b35ULL, + 0x3600bbdf17197581ULL, 0x3972050bbe3cd2c2ULL, 0x5938906dbdd5be86ULL, 0x34fce5e43f9b860fULL, + 0x75a8a4cd42d14d02ULL, 0x828dabc53441df65ULL, 0x33dcabedd2e131d3ULL, 0x3ebad76fb814d25fULL, + 0xd4906f566f70e10fULL, 0x5d12f7aa51690f5aULL, 0x45adb16e76cefcf2ULL, 0x01f768aead232999ULL, + 0x2b6cc77b6248febdULL, 0x3cd30628ec3aaffdULL, 0xce1c0b80d4ef486aULL, 0x4c3bff2ea6f66c23ULL, + 0x3f2ec4094aeaeb5fULL, 0x61b19b286e372ca7ULL, 0x5eefa966de2a701dULL, 0x23b20565de55e3efULL, + 0xe301ca5279d58557ULL, 0x07b2d4ce27c2874fULL, 0xa532cd8a9dcf1d67ULL, 0x2a52fee23f2bff56ULL, + 0x8624efb37cd8663dULL, 0xbbc7ac20ffbd7594ULL, 0x57b85e9c82d37445ULL, 0x7b3052cb86a6ec66ULL, + 0x3482f0ad2525e91eULL, 0x2cb68043d28edca0ULL, 0xaf4f6d052e1b003aULL, 0x185f8c2529781b0aULL, + 0xaa41de5bd80ce0d6ULL, 0x9407b2416853e9d6ULL, 0x563ec36e357f4c3aULL, 0x4cc4b8dd0e297bceULL, + 0xa2fc1a52ffb8730eULL, 0x1811f16e67058e37ULL, 0x10f9a366cddf4ee1ULL, 0x72f4a0c4a0b9f099ULL, + 0x8c16c06f663f4ea7ULL, 0x693b3af74e970fbaULL, 0x2102e7f1d69ec345ULL, 0x0ba53cbc968a8089ULL, + 0xca3d9dc7fea15537ULL, 0x4c6824bb51536493ULL, 0xb9886314844006b1ULL, 0x40d2a72ab454cc60ULL, + 0x5936a1b712570975ULL, 0x91b9d648debda657ULL, 0x3344094bb64330eaULL, 0x006ba10d12ee51d0ULL, + 0x19228468f5de5d58ULL, 0x0eb12f4c38cc05b0ULL, 0xa1039f9dd5601990ULL, 0x4502d4ce4fff0e0bULL, + 0xeb2054106837c189ULL, 0xd0f6544c6dd3b93cULL, 0x40727064c416d74fULL, 0x6e15c6114b502ef0ULL, + 0x4df2a398cfb1a76bULL, 0x11256c7419f2f6b1ULL, 0x4a497962066e6043ULL, 0x705b3aab41355b44ULL, + 0x365ef536d797b1d8ULL, 0x00076bd622ddf0dbULL, 0x3bbf33b0e0575a88ULL, 0x3777aa05c8e4ca4dULL, + 0x392745c85578db5fULL, 0x6fda4149dbae5ae2ULL, 0xb1f0b00b8adc9867ULL, 0x09963437d36f1da3ULL, + 0x7e824e90a5dc3853ULL, 0xccb5f6641f135cbdULL, 0x6736d86c87ce8fccULL, 0x625f3ce26604249fULL, + 0xaf8ac8059502f63fULL, 0x0c05e70a2e351469ULL, 0x35292e9c764b6305ULL, 0x1a394360c7e23ac3ULL, + 0xd5c6d53251183264ULL, 0x62065abd43c2b74fULL, 0xb5fbf5d03b973f9bULL, 0x13a3da3661206e5eULL, + 0xc6bd5837725d94e5ULL, 0x18e30912205016c5ULL, 0x2088ce1570033c68ULL, 0x7fba1f495c837987ULL, + 0x5a8c7423f2f9079dULL, 0x1735157b34023fc5ULL, 0xe4f9b49ad2fab351ULL, 0x6691ff72c878e33cULL, + 0x122c2adedc5eff3eULL, 0xf8dd4bf1d8956cf4ULL, 0xeb86205d9e9e5bdaULL, 0x049b92b9d975c743ULL, + 0xa5379730b0f6c05aULL, 0x72a0ffacc6f3a553ULL, 0xb0032c34b20dcd6dULL, 0x470e9dbc88d5164aULL, + 0xb19cf10ca237c047ULL, 0xb65466711f6c81a2ULL, 0xb3321bd16dd80b43ULL, 0x48c14f600c5fbe8eULL, + 0x66451c264aa6c803ULL, 0xb66e3904a4fa7da6ULL, 0xd45f19b0b3128395ULL, 0x31602627c3c9bc10ULL, + 0x3120dc4832e4e10dULL, 0xeb20c46756c717f7ULL, 0x00f52e3f67280294ULL, 0x566d4fc14730c509ULL, + 0x7e3a5d40fd837206ULL, 0xc1e926dc7159547aULL, 0x216730fba68d6095ULL, 0x22e8c3843f69cea7ULL, + 0x33d074e8930e4b2bULL, 0xb6e4350e84d15816ULL, 0x5534c26ad6ba2365ULL, 0x7773c12f89f1f3f3ULL, + 0x8cba404da57962aaULL, 0x5b9897a81999ce56ULL, 0x508e862f121692fcULL, 0x3a81907fa093c291ULL, + 0x0dded0ff4725a510ULL, 0x10d8cc10673fc503ULL, 0x5b9d151c9f1f4e89ULL, 0x32a5c1d5cb09a44cULL, + 0x1e0aa442b90541fbULL, 0x5f85eb7cc1b485dbULL, 0xbee595ce8a9df2e5ULL, 0x25e496c722422236ULL, + 0x5edf3c46cd0fe5b9ULL, 0x34e75a7ed2a43388ULL, 0xe488de11d761e352ULL, 0x0e878a01a085545cULL, + 0xba493c77e021bb04ULL, 0x2b4d1843c7df899aULL, 0x9ea37a487ae80d67ULL, 0x67a9958011e41794ULL, + 0x4b58051a6697b065ULL, 0x47e33f7d8d6ba6d4ULL, 0xbb4da8d483ca46c1ULL, 0x68becaa181c2db0dULL, + 0x8d8980e90b989aa5ULL, 0xf95eb14a2c93c99bULL, 0x51c6c7c4796e73a2ULL, 0x6e228363b5efb569ULL, + 0xc6bbc0b02dd624c8ULL, 0x777eb47dec8170eeULL, 0x3cde15a004cfafa9ULL, 0x1dc6bc087160bf9bULL, + 0x2e07e043eec34002ULL, 0x18e9fc677a68dc7fULL, 0xd8da03188bd15b9aULL, 0x48fbc3bb00568253ULL, + 0x57547d4cfb654ce1ULL, 0xd3565b82a058e2adULL, 0xf63eaf0bbf154478ULL, 0x47531ef114dfbb18ULL, + 0xe1ec630a4278c587ULL, 0x5507d546ca8e83f3ULL, 0x85e135c63adc0c2bULL, 0x0aa7efa85682844eULL, + 0x72691ba8b3e1f615ULL, 0x32b4e9701fbe3ffaULL, 0x97b6d92e39bb7868ULL, 0x2cfe53dea02e39e8ULL, + 0x687392cd85cd52b0ULL, 0x27ff66c910e29831ULL, 0x97134556a9832d06ULL, 0x269bb0360a84f8a0ULL, + 0x706e55457643f85cULL, 0x3734a48c9b597d1bULL, 0x7aee91e8c6efa472ULL, 0x5cd6abc198a9d9e0ULL, + 0x0e04de06cb3ce41aULL, 0xd8c6eb893402e138ULL, 0x904659bb686e3772ULL, 0x7215c371746ba8c8ULL, + 0xfd12a97eeae4a2d9ULL, 0x9514b7516394f2c5ULL, 0x266fd5809208f294ULL, 0x5c847085619a26b9ULL, + 0x52985410fed694eaULL, 0x3c905b934a2ed254ULL, 0x10bb47692d3be467ULL, 0x063b3d2d69e5e9e1ULL, + 0x472726eedda57debULL, 0xefb6c4ae10f41891ULL, 0x2b1641917b307614ULL, 0x117c554fc4f45b7cULL, + 0xc07cf3118f9d8812ULL, 0x01dbd82050017939ULL, 0xd7e803f4171b2827ULL, 0x1015e87487d225eaULL, + 0xc58de3fed23acc4dULL, 0x50db91c294a7be2dULL, 0x0b94d43d1c9cf457ULL, 0x6b1640fa6e37524aULL, + 0x692f346c5fda0d09ULL, 0x200b1c59fa4d3151ULL, 0xb8c46f760777a296ULL, 0x4b38395f3ffdfbcfULL, + 0x18d25e00be54d671ULL, 0x60d50582bec8aba6ULL, 0x87ad8f263b78b982ULL, 0x50fdf64e9cda0432ULL, + 0x90f567aac578dcf0ULL, 0xef1e9b0ef2a3133bULL, 0x0eebba9242d9de71ULL, 0x15473c9bf03101c7ULL, + 0x7c77e8ae56b78095ULL, 0xb678e7666e6f078eULL, 0x2da0b9615348ba1fULL, 0x7cf931c1ff733f0bULL, + 0x26b357f50a0a366cULL, 0xe9708cf42b87d732ULL, 0xc13aeea5f91cb2c0ULL, 0x35d90c991143bb4cULL, + 0x47c1c404a9a0d9dcULL, 0x659e58451972d251ULL, 0x3875a8c473b38c31ULL, 0x1fbd9ed379561f24ULL, + 0x11fabc6fd41ec28dULL, 0x7ef8dfe3cd2a2dcaULL, 0x72e73b5d8c404595ULL, 0x6135fa4954b72f27ULL, + 0xccfc32a2de24b69cULL, 0x3f55698c1f095d88ULL, 0xbe3350ed5ac3f929ULL, 0x5e9bf806ca477eebULL, + 0xe9ce8fb63c309f68ULL, 0x5376f63565e1f9f4ULL, 0xd1afcfb35a6393f1ULL, 0x6632a1ede5623506ULL, + 0x0b7d6c390c2ded4cULL, 0x56cb3281df04cb1fULL, 0x66305a1249ecc3c7ULL, 0x5d588b60a38ca72aULL, + 0xa6ecbf78e8e5f42dULL, 0x86eeb44b3c8a3eecULL, 0xec219c48fbd21604ULL, 0x1aaf1af517c36731ULL, + 0xc306a2836769bde7ULL, 0x208280622b1e2adbULL, 0x8027f51ffbff94a6ULL, 0x76cfa1ce1124f26bULL, + 0x18eb00562422abb6ULL, 0xf377c4d58f8c29c3ULL, 0x4dbbc207f531561aULL, 0x0253b7f082128a27ULL, + 0x3d1f091cb62c17e0ULL, 0x4860e1abd64628a9ULL, 0x52d17436309d4253ULL, 0x356f97e13efae576ULL, + 0xd351e11aa150535bULL, 0x3e6b45bb1dd878ccULL, 0x0c776128bed92c98ULL, 0x1d34ae93032885b8ULL, + 0x4ba0488ca85ba4c3ULL, 0x985348c33c9ce6ceULL, 0x66124c6f97bda770ULL, 0x0f81a0290654124aULL, + 0x9ed09ca6569b86fdULL, 0x811009fd18af9a2dULL, 0xff08d03f93d8c20aULL, 0x52a148199faef26bULL, + 0x3e03f9dc2d8d1b73ULL, 0x4205801873961a70ULL, 0xc0d987f041a35970ULL, 0x07aa1f15a1c0d549ULL, + 0xdfd46ce08cd27224ULL, 0x6d0a024f934e4239ULL, 0x808a7a6399897b59ULL, 0x0a4556e9e13d95a2ULL, + 0xd21a991fe9c13045ULL, 0x9b0e8548fe7751b8ULL, 0x5da643cb4bf30035ULL, 0x77db28d63940f721ULL, + 0xfc5eeb614adc9011ULL, 0x5229419ae8c411ebULL, 0x9ec3e7787d1dcf74ULL, 0x340d053e216e4cb5ULL, + 0xcac7af39b48df2b4ULL, 0xc0faec2871a10a94ULL, 0x140a69245ca575edULL, 0x0cf1c37134273a4cULL, + 0xc8ee306ac224b8a5ULL, 0x57eaee7ccb4930b0ULL, 0xa1e806bdaacbe74fULL, 0x7d9a62742eeb657dULL, + 0x9eb6b6ef546c4830ULL, 0x885cca1fddb36e2eULL, 0xe6b9f383ef0d7105ULL, 0x58654fef9d2e0412ULL, + 0xa905c4ffbe0e8e26ULL, 0x942de5df9b31816eULL, 0x497d723f802e88e1ULL, 0x30684dea602f408dULL, + 0x21e5a278a3e6cb34ULL, 0xaefb6e6f5b151dc4ULL, 0xb30b8e049d77ca15ULL, 0x28c3c9cf53b98981ULL, + 0x287fb721556cdd2aULL, 0x0d317ca897022274ULL, 0x7468c7423a543258ULL, 0x4a7f11464eb5642fULL, + 0xa237a4774d193aa6ULL, 0xd865986ea92129a1ULL, 0x24c515ecf87c1a88ULL, 0x604003575f39f5ebULL, + 0x47b9f189570a9b27ULL, 0x2b98cede465e4b78ULL, 0x026df551dbb85c20ULL, 0x74fcd91047e21901ULL, + 0x13e2a90a23c1bfa3ULL, 0x0cb0074e478519f6ULL, 0x5ff1cbbe3af6cf44ULL, 0x67fe5438be812dbeULL, + 0xd13cf64fa40f05b0ULL, 0x054dfb2f32283787ULL, 0x4173915b7f0d2aeaULL, 0x482f144f1f610d4eULL, + 0xf6210201b47f8234ULL, 0x5d0ae1929e70b990ULL, 0xdcd7f455b049567cULL, 0x7e93d0f1f0916f01ULL, + 0xdd79cbf18a7db4faULL, 0xbe8391bf6f74c62fULL, 0x027145d14b8291bdULL, 0x585a73ea2cbf1705ULL, + 0x485ca03e928a0db2ULL, 0x10fc01a5742857e7ULL, 0x2f482edbd6d551a7ULL, 0x0f0433b5048fdb8aULL, + 0x60da2e8dd7dc6247ULL, 0x88b4c9d38cd4819aULL, 0x13033ac001f66697ULL, 0x273b24fe3b367d75ULL, + 0xc6e8f66a31b3b9d4ULL, 0x281514a494df49d5ULL, 0xd1726fdfc8b23da7ULL, 0x4b3ae7d103dee548ULL, + 0xc6256e19ce4b9d7eULL, 0xff5c5cf186e3c61cULL, 0xacc63ca34b8ec145ULL, 0x74621888fee66574ULL, + 0x956f409645290a1eULL, 0xef0bf8e3263a962eULL, 0xed6a50eb5ec2647bULL, 0x0694283a9dca7502ULL, + 0x769b963643a2dcd1ULL, 0x42b7c8ea09fc5353ULL, 0x4f002aee13397eabULL, 0x63005e2c19b7d63aULL, + 0xca6736da63023beaULL, 0x966c7f6db12a99b7ULL, 0xace09390c537c5e1ULL, 0x0b696063a1aa89eeULL, + 0xebb03e97288c56e5ULL, 0x432a9f9f938c8be8ULL, 0xa6a5a93d5b717f71ULL, 0x1a5fb4c3e18f9d97ULL, + 0x1c94e7ad1c60cdceULL, 0xee202a43fc02c4a0ULL, 0x8dafe4d867c46a20ULL, 0x0a10263c8ac27b58ULL, + 0xd0dea9dfe4432a4aULL, 0x856af87bbe9277c5ULL, 0xce8472acc212c71aULL, 0x6f151b6d9bbb1e91ULL, + 0x26776c527ceed56aULL, 0x7d211cb7fbf8faecULL, 0x37ae66a6fd4609ccULL, 0x1f81b702d2770c42ULL, + 0x2fb0b057eac58392ULL, 0xe1dd89fe29744e9dULL, 0xc964f8eb17beb4f8ULL, 0x29571073c9a2d41eULL, + 0xa948a18981c0e254ULL, 0x2df6369b65b22830ULL, 0xa33eb2d75fcfd3c6ULL, 0x078cd6ec4199a01fULL, + 0x4a584a41ad900d2fULL, 0x32142b78e2c74c52ULL, 0x68c4e8338431c978ULL, 0x7f69ea9008689fc2ULL, + 0x52f2c81e46a38265ULL, 0xfd78072d04a832fdULL, 0x8cd7d5fa25359e94ULL, 0x4de71b7454cc29d2ULL, + 0x42eb60ad1eda6ac9ULL, 0x0aad37dfdbc09c3aULL, 0x81004b71e33cc191ULL, 0x44e6be345122803cULL, + 0x03fe8388ba1920dbULL, 0xf5d57c32150db008ULL, 0x49c8c4281af60c29ULL, 0x21edb518de701aeeULL, + 0x7fb63e418f06dc99ULL, 0xa4460d99c166d7b8ULL, 0x24dd5248ce520a83ULL, 0x5ec3ad712b928358ULL, + 0x15022a5fbd17930fULL, 0xa4f64a77d82570e3ULL, 0x12bc8d6915783712ULL, 0x498194c0fc620abbULL, + 0x38a2d9d255686c82ULL, 0x785c6bd9193e21f0ULL, 0xe4d5c81ab24a5484ULL, 0x56307860b2e20989ULL, + 0x429d55f78b4d74c4ULL, 0x22f1834643350131ULL, 0x1e60c24598c71fffULL, 0x59f2f014979983efULL, + 0x46a47d56eb494a44ULL, 0x3e22a854d636a18eULL, 0xb346e15274491c3bULL, 0x2ceafd4e5390cde7ULL, + 0xba8a8538be0d6675ULL, 0x4b9074bb50818e23ULL, 0xcbdab89085d304c3ULL, 0x61a24fe0e56192c4ULL, + 0xcb7615e6db525bcbULL, 0xdd7d8c35a567e4caULL, 0xe6b4153acafcdd69ULL, 0x2d668e097f3c9766ULL, + 0xa57e7e265ce55ef0ULL, 0x5d9f4e527cd4b967ULL, 0xfbc83606492fd1e5ULL, 0x090d52beb7c3f7aeULL, + 0x09b9515a1e7b4d7cULL, 0x1f266a2599da44c0ULL, 0xa1c49548e2c55504ULL, 0x7ef04287126f15ccULL, + 0xfed1659dbd30ef15ULL, 0x8b4ab9eec4e0277bULL, 0x884d6236a5df3291ULL, 0x1fd96ea6bf5cf788ULL, + 0x42a161981f190d9aULL, 0x61d849507e6052c1ULL, 0x9fe113bf285a2cd5ULL, 0x7c22d676dbad85d8ULL, + 0x82e770ed2bfbd27dULL, 0x4c05b2ece996f5a5ULL, 0xcd40a9c2b0900150ULL, 0x5895319213d9bf64ULL, + 0xe7cc5d703fea2e08ULL, 0xb50c491258e2188cULL, 0xcce30baa48205bf0ULL, 0x537c659ccfa32d62ULL, + 0x37b6623a98cfc088ULL, 0xfe9bed1fa4d6aca4ULL, 0x04d29b8e56a8d1b0ULL, 0x725f71c40b519575ULL, + 0x28c7f89cd0339ce6ULL, 0x8367b14469ddc18bULL, 0x883ada83a6a1652cULL, 0x585f1974034d6c17ULL, + 0x89cfb266f1b19188ULL, 0xe63b4863e7c35217ULL, 0xd88c9da6b4c0526aULL, 0x3e035c9df0954635ULL, + 0xdd9d5412fb45de9dULL, 0xdd684532e4cff40dULL, 0x4b5c999b151d671cULL, 0x2d8c2cc811e7f690ULL, + 0x7f54be1d90055d40ULL, 0xa464c5df464aaf40ULL, 0x33979624f0e917beULL, 0x2c018dc527356b30ULL, + 0xa5415024e330b3d4ULL, 0x73ff3d96691652d3ULL, 0x94ec42c4ef9b59f1ULL, 0x0747201618d08e5aULL, + 0x4d6ca48aca411c53ULL, 0x66415f2fcfa66119ULL, 0x9c4dd40051e227ffULL, 0x59810bc09a02f7ebULL, + 0x2a7eb171b3dc101dULL, 0x441c5ab99ffef68eULL, 0x32025c9b93b359eaULL, 0x5e8ce0a71e9d112fULL, + 0xbfcccb92429503fdULL, 0xd271ba752f095d55ULL, 0x345ead5e972d091eULL, 0x18c8df11a83103baULL, + 0x90cd949a9aed0f4cULL, 0xc5d1f4cb6660e37eULL, 0xb8cac52d56c52e0bULL, 0x6e42e400c5808e0dULL, + 0xa3b46966eeaefd23ULL, 0x0c4f1f0be39ecdcaULL, 0x189dc8c9d683a51dULL, 0x51f27f054c09351bULL, + 0x4c487ccd2a320682ULL, 0x587ea95bb3df1c96ULL, 0xc8ccf79e555cb8e8ULL, 0x547dc829a206d73dULL, + 0xb822a6cd80c39b06ULL, 0xe96d54732000d4c6ULL, 0x28535b6f91463b4dULL, 0x228f4660e2486e1dULL, + 0x98799538de8d3abfULL, 0x8cd8330045ebca6eULL, 0x79952a008221e738ULL, 0x4322e1a7535cd2bbULL, + 0xb114c11819d1801cULL, 0x2016e4d84f3f5ec7ULL, 0xdd0e2df409260f4cULL, 0x5ec362c0ae5f7266ULL, + 0xc0462b18b8b2b4eeULL, 0x7cc8d950274d1afbULL, 0xf25f7105436b02d2ULL, 0x43bbf8dcbff9ccd3ULL, + 0xb6ad1767a039e9dfULL, 0xb0714da8f69d3583ULL, 0x5e55fa18b42931f5ULL, 0x4ed5558f33c60961ULL, + 0x1fe37901c647a5ddULL, 0x593ddf1f8081d357ULL, 0x0249a4fd813fd7a6ULL, 0x69acca274e9caf61ULL, + 0x047ba3ea330721c9ULL, 0x83423fc20e7e1ea0ULL, 0x1df4c0af01314a60ULL, 0x09a62dab89289527ULL, + 0xa5b325a49cc6cb00ULL, 0xe94b5dc654b56cb6ULL, 0x3be28779adc994a0ULL, 0x4296e8f8ba3a4aadULL, + 0x328689761e451eabULL, 0x2e4d598bff59594aULL, 0x49b96853d7a7084aULL, 0x4980a319601420a8ULL, + 0x9565b9e12f552c42ULL, 0x8a5318db7100fe96ULL, 0x05c90b4d43add0d7ULL, 0x538b4cd66a5d4edaULL, + 0xf4e94fc3e89f039fULL, 0x592c9af26f618045ULL, 0x08a36eb5fd4b9550ULL, 0x25fffaf6c2ed1419ULL, + 0x34434459cc79d354ULL, 0xeeecbfb4b1d5476bULL, 0xddeb34a061615d99ULL, 0x5129cecceb64b773ULL, + 0xee43215894993520ULL, 0x772f9c7cf14c0b3bULL, 0xd2e2fce306bedad5ULL, 0x715f42b546f06a97ULL, + 0x434ecdceda5b5f1aULL, 0x0da17115a49741a9ULL, 0x680bd77c73edad2eULL, 0x487c02354edd9041ULL, + 0xb8efeff3a70ed9c4ULL, 0x56a32aa3e857e302ULL, 0xdf3a68bd48a2a5a0ULL, 0x07f650b73176c444ULL, + 0xe38b9b1626e0ccb1ULL, 0x79e053c18b09fb36ULL, 0x56d90319c9f94964ULL, 0x1ca941e7ac9ff5c4ULL, + 0x49c4df29162fa0bbULL, 0x8488cf3282b33305ULL, 0x95dfda14cabb437dULL, 0x3391f78264d5ad86ULL, + 0x729ae06ae2b5095dULL, 0xd58a58d73259a946ULL, 0xe9834262d13921edULL, 0x27fedafaa54bb592ULL, + 0xa99dc5b829ad48bbULL, 0x5f025742499ee260ULL, 0x802c8ecd5d7513fdULL, 0x78ceb3ef3f6dd938ULL, + 0xc342f44f8a135d94ULL, 0x7b9edb44828cdda3ULL, 0x9436d11a0537cfe7ULL, 0x5064b164ec1ab4c8ULL, + 0x7020eccfd37eb2fcULL, 0x1f31ea3ed90d25fcULL, 0x1b930d7bdfa1bb34ULL, 0x5344467a48113044ULL, + 0x70073170f25e6dfbULL, 0xe385dc1a50114cc8ULL, 0x2348698ac8fc4f00ULL, 0x2a77a55284dd40d8ULL, + 0xfe06afe0c98c6ce4ULL, 0xc235df96dddfd6e4ULL, 0x1428d01e33bf1ed3ULL, 0x785768ec9300bdafULL, + 0x9702e57a91deb63bULL, 0x61bdb8bfe5ce8b80ULL, 0x645b426f3d1d58acULL, 0x4804a82227a557bcULL, + 0x8e57048ab44d2601ULL, 0x68d6501a4b3a6935ULL, 0xc39c9ec3f9e1c293ULL, 0x4172f257d4de63e2ULL, + 0xd368b450330c6401ULL, 0x040d3017418f2391ULL, 0x2c34bb6090b7d90dULL, 0x16f649228fdfd51fULL, + 0xbea6818e2b928ef5ULL, 0xe28ccf91cdc11e72ULL, 0x594aaa68e77a36cdULL, 0x313034806c7ffd0fULL, + 0x8a9d27ac2249bd65ULL, 0x19a3b464018e9512ULL, 0xc26ccff352b37ec7ULL, 0x056f68341d797b21ULL, + 0x5e79d6757efd2327ULL, 0xfabdbcb6553afe15ULL, 0xd3e7222c6eaf5a60ULL, 0x7046c76d4dae743bULL, + 0x660be872b18d4a55ULL, 0x19992518574e1496ULL, 0xc103053a302bdcbbULL, 0x3ed8e9800b218e8eULL, + 0x7b0b9239fa75e03eULL, 0xefe9fb684633c083ULL, 0x98a35fbe391a7793ULL, 0x6065510fe2d0fe34ULL, + 0x55cb668548abad0cULL, 0xb4584548da87e527ULL, 0x2c43ecea0107c1ddULL, 0x526028809372de35ULL, + 0x3415c56af9213b1fULL, 0x5bee1a4d017e98dbULL, 0x13f6b105b5cf709bULL, 0x5ff20e3482b29ab6ULL, + 0x0aa29c75cc2e6c90ULL, 0xfc7d73ca3a70e206ULL, 0x899fc38fc4b5c515ULL, 0x250386b124ffc207ULL, + 0x54ea28d5ae3d2b56ULL, 0x9913149dd6de60ceULL, 0x16694fc58f06d6c1ULL, 0x46b23975eb018fc7ULL, + 0x470a6a0fb4b7b4e2ULL, 0x5d92475a8f7253deULL, 0xabeee5b52fbd3adbULL, 0x7fa20801a0806968ULL, + 0x76f3faf19f7714d2ULL, 0xb3e840c12f4660c3ULL, 0x0fb4cd8df212744eULL, 0x4b065a251d3a2dd2ULL, + 0x5cebde383d77cd4aULL, 0x6adf39df882c9cb1ULL, 0xa2dd242eb09af759ULL, 0x3147c0e50e5f6422ULL, + 0x164ca5101d1350dbULL, 0xf8d13479c33fc962ULL, 0xe640ce4d13e5da08ULL, 0x4bdee0c45061f8baULL, + 0xd7c46dc1a4edb1c9ULL, 0x5514d7b6437fd98aULL, 0x58942f6bb2a1c00bULL, 0x2dffb2ab1d70710eULL, + 0xccdfcf2fc18b6d68ULL, 0xa8ebcba8b7806167ULL, 0x980697f95e2937e3ULL, 0x02fbba1cd0126e8cULL +}; + +static void curve25519_ever64_base(u8 *out, const u8 *priv) +{ + u64 swap = 1; + int i, j, k; + u64 tmp[16 + 32 + 4]; + u64 *x1 = &tmp[0]; + u64 *z1 = &tmp[4]; + u64 *x2 = &tmp[8]; + u64 *z2 = &tmp[12]; + u64 *xz1 = &tmp[0]; + u64 *xz2 = &tmp[8]; + u64 *a = &tmp[0 + 16]; + u64 *b = &tmp[4 + 16]; + u64 *c = &tmp[8 + 16]; + u64 *ab = &tmp[0 + 16]; + u64 *abcd = &tmp[0 + 16]; + u64 *ef = &tmp[16 + 16]; + u64 *efgh = &tmp[16 + 16]; + u64 *key = &tmp[0 + 16 + 32]; + + memcpy(key, priv, 32); + ((u8 *)key)[0] &= 248; + ((u8 *)key)[31] = (((u8 *)key)[31] & 127) | 64; + + x1[0] = 1, x1[1] = x1[2] = x1[3] = 0; + z1[0] = 1, z1[1] = z1[2] = z1[3] = 0; + z2[0] = 1, z2[1] = z2[2] = z2[3] = 0; + memcpy(x2, p_minus_s, sizeof(p_minus_s)); + + j = 3; + for (i = 0; i < 4; ++i) { + while (j < (const int[]){ 64, 64, 64, 63 }[i]) { + u64 bit = (key[i] >> j) & 1; + k = (64 * i + j - 3); + swap = swap ^ bit; + cswap2(swap, xz1, xz2); + swap = bit; + fsub(b, x1, z1); + fadd(a, x1, z1); + fmul(c, &table_ladder[4 * k], b, ef); + fsub(b, a, c); + fadd(a, a, c); + fsqr2(ab, ab, efgh); + fmul2(xz1, xz2, ab, efgh); + ++j; + } + j = 0; + } + + point_double(xz1, abcd, efgh); + point_double(xz1, abcd, efgh); + point_double(xz1, abcd, efgh); + encode_point(out, xz1); + + memzero_explicit(tmp, sizeof(tmp)); +} diff --git a/net/wireguard/crypto/zinc/curve25519/curve25519.c b/net/wireguard/crypto/zinc/curve25519/curve25519.c new file mode 100644 index 000000000000..dffaa09c18db --- /dev/null +++ b/net/wireguard/crypto/zinc/curve25519/curve25519.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * + * This is an implementation of the Curve25519 ECDH algorithm, using either + * a 32-bit implementation or a 64-bit implementation with 128-bit integers, + * depending on what is supported by the target compiler. + * + * Information: https://cr.yp.to/ecdh.html + */ + +#include +#include "../selftest/run.h" + +#include +#include +#include +#include +#include +#include // For crypto_memneq. + +#if defined(CONFIG_ZINC_ARCH_X86_64) +#include "curve25519-x86_64-glue.c" +#elif defined(CONFIG_ZINC_ARCH_ARM) +#include "curve25519-arm-glue.c" +#else +static bool *const curve25519_nobs[] __initconst = { }; +static void __init curve25519_fpu_init(void) +{ +} +static inline bool curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]) +{ + return false; +} +static inline bool curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE]) +{ + return false; +} +#endif + +#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__) +#include "curve25519-hacl64.c" +#else +#include "curve25519-fiat32.c" +#endif + +static const u8 null_point[CURVE25519_KEY_SIZE] = { 0 }; + +bool curve25519(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]) +{ + if (!curve25519_arch(mypublic, secret, basepoint)) + curve25519_generic(mypublic, secret, basepoint); + return crypto_memneq(mypublic, null_point, CURVE25519_KEY_SIZE); +} + +bool curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE]) +{ + static const u8 basepoint[CURVE25519_KEY_SIZE] __aligned(32) = { 9 }; + + if (unlikely(!crypto_memneq(secret, null_point, CURVE25519_KEY_SIZE))) + return false; + + if (curve25519_base_arch(pub, secret)) + return crypto_memneq(pub, null_point, CURVE25519_KEY_SIZE); + return curve25519(pub, secret, basepoint); +} + +void curve25519_generate_secret(u8 secret[CURVE25519_KEY_SIZE]) +{ + get_random_bytes_wait(secret, CURVE25519_KEY_SIZE); + curve25519_clamp_secret(secret); +} + +#include "../selftest/curve25519.c" + +static bool nosimd __initdata = false; + +#ifndef COMPAT_ZINC_IS_A_MODULE +int __init curve25519_mod_init(void) +#else +static int __init mod_init(void) +#endif +{ + if (!nosimd) + curve25519_fpu_init(); + if (!selftest_run("curve25519", curve25519_selftest, curve25519_nobs, + ARRAY_SIZE(curve25519_nobs))) + return -ENOTRECOVERABLE; + return 0; +} + +#ifdef COMPAT_ZINC_IS_A_MODULE +static void __exit mod_exit(void) +{ +} + +module_param(nosimd, bool, 0); +module_init(mod_init); +module_exit(mod_exit); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Curve25519 scalar multiplication"); +MODULE_AUTHOR("Jason A. Donenfeld "); +#endif diff --git a/net/wireguard/crypto/zinc/poly1305/poly1305-arm-glue.c b/net/wireguard/crypto/zinc/poly1305/poly1305-arm-glue.c new file mode 100644 index 000000000000..291fe4ba98b0 --- /dev/null +++ b/net/wireguard/crypto/zinc/poly1305/poly1305-arm-glue.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include +#include + +asmlinkage void poly1305_init_arm(void *ctx, const u8 key[16]); +asmlinkage void poly1305_blocks_arm(void *ctx, const u8 *inp, const size_t len, + const u32 padbit); +asmlinkage void poly1305_emit_arm(void *ctx, u8 mac[16], const u32 nonce[4]); +asmlinkage void poly1305_blocks_neon(void *ctx, const u8 *inp, const size_t len, + const u32 padbit); +asmlinkage void poly1305_emit_neon(void *ctx, u8 mac[16], const u32 nonce[4]); + +static bool poly1305_use_neon __ro_after_init; +static bool *const poly1305_nobs[] __initconst = { &poly1305_use_neon }; + +static void __init poly1305_fpu_init(void) +{ +#if defined(CONFIG_ZINC_ARCH_ARM64) + poly1305_use_neon = cpu_have_named_feature(ASIMD); +#elif defined(CONFIG_ZINC_ARCH_ARM) + poly1305_use_neon = elf_hwcap & HWCAP_NEON; +#endif +} + +#if defined(CONFIG_ZINC_ARCH_ARM64) +struct poly1305_arch_internal { + union { + u32 h[5]; + struct { + u64 h0, h1, h2; + }; + }; + u64 is_base2_26; + u64 r[2]; +}; +#elif defined(CONFIG_ZINC_ARCH_ARM) +struct poly1305_arch_internal { + union { + u32 h[5]; + struct { + u64 h0, h1; + u32 h2; + } __packed; + }; + u32 r[4]; + u32 is_base2_26; +}; +#endif + +/* The NEON code uses base 2^26, while the scalar code uses base 2^64 on 64-bit + * and base 2^32 on 32-bit. If we hit the unfortunate situation of using NEON + * and then having to go back to scalar -- because the user is silly and has + * called the update function from two separate contexts -- then we need to + * convert back to the original base before proceeding. The below function is + * written for 64-bit integers, and so we have to swap words at the end on + * big-endian 32-bit. It is possible to reason that the initial reduction below + * is sufficient given the implementation invariants. However, for an avoidance + * of doubt and because this is not performance critical, we do the full + * reduction anyway. + */ +static void convert_to_base2_64(void *ctx) +{ + struct poly1305_arch_internal *state = ctx; + u32 cy; + + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !state->is_base2_26) + return; + + cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy; + cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy; + cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy; + cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy; + state->h0 = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0]; + state->h1 = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12); + state->h2 = state->h[4] >> 24; + if (IS_ENABLED(CONFIG_ZINC_ARCH_ARM) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) { + state->h0 = rol64(state->h0, 32); + state->h1 = rol64(state->h1, 32); + } +#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1)) + cy = (state->h2 >> 2) + (state->h2 & ~3ULL); + state->h2 &= 3; + state->h0 += cy; + state->h1 += (cy = ULT(state->h0, cy)); + state->h2 += ULT(state->h1, cy); +#undef ULT + state->is_base2_26 = 0; +} + +static inline bool poly1305_init_arch(void *ctx, + const u8 key[POLY1305_KEY_SIZE]) +{ + poly1305_init_arm(ctx, key); + return true; +} + +static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp, + size_t len, const u32 padbit, + simd_context_t *simd_context) +{ + /* SIMD disables preemption, so relax after processing each page. */ + BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE || + PAGE_SIZE % POLY1305_BLOCK_SIZE); + + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !poly1305_use_neon || + !simd_use(simd_context)) { + convert_to_base2_64(ctx); + poly1305_blocks_arm(ctx, inp, len, padbit); + return true; + } + + for (;;) { + const size_t bytes = min_t(size_t, len, PAGE_SIZE); + + poly1305_blocks_neon(ctx, inp, bytes, padbit); + len -= bytes; + if (!len) + break; + inp += bytes; + simd_relax(simd_context); + } + return true; +} + +static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE], + const u32 nonce[4], + simd_context_t *simd_context) +{ + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !poly1305_use_neon || + !simd_use(simd_context)) { + convert_to_base2_64(ctx); + poly1305_emit_arm(ctx, mac, nonce); + } else + poly1305_emit_neon(ctx, mac, nonce); + return true; +} diff --git a/net/wireguard/crypto/zinc/poly1305/poly1305-arm.pl b/net/wireguard/crypto/zinc/poly1305/poly1305-arm.pl new file mode 100644 index 000000000000..468f41b76fbd --- /dev/null +++ b/net/wireguard/crypto/zinc/poly1305/poly1305-arm.pl @@ -0,0 +1,1276 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# +# This code is taken from the OpenSSL project but the author, Andy Polyakov, +# has relicensed it under the licenses specified in the SPDX header above. +# The original headers, including the original license headers, are +# included below for completeness. +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# IALU(*)/gcc-4.4 NEON +# +# ARM11xx(ARMv6) 7.78/+100% - +# Cortex-A5 6.35/+130% 3.00 +# Cortex-A8 6.25/+115% 2.36 +# Cortex-A9 5.10/+95% 2.55 +# Cortex-A15 3.85/+85% 1.25(**) +# Snapdragon S4 5.70/+100% 1.48(**) +# +# (*) this is for -march=armv6, i.e. with bunch of ldrb loading data; +# (**) these are trade-off results, they can be improved by ~8% but at +# the cost of 15/12% regression on Cortex-A5/A7, it's even possible +# to improve Cortex-A9 result, but then A5/A7 loose more than 20%; + +$flavour = shift; +if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } +else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} } + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} + +($ctx,$inp,$len,$padbit)=map("r$_",(0..3)); + +$code.=<<___; +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__ +# define poly1305_init poly1305_init_arm +# define poly1305_blocks poly1305_blocks_arm +# define poly1305_emit poly1305_emit_arm +#endif + +.text +#if defined(__thumb2__) +.syntax unified +.thumb +#else +.code 32 +#endif + +.globl poly1305_emit +.globl poly1305_blocks +.globl poly1305_init +.type poly1305_init,%function +.align 5 +poly1305_init: +.Lpoly1305_init: + stmdb sp!,{r4-r11} + + eor r3,r3,r3 + cmp $inp,#0 + str r3,[$ctx,#0] @ zero hash value + str r3,[$ctx,#4] + str r3,[$ctx,#8] + str r3,[$ctx,#12] + str r3,[$ctx,#16] + str r3,[$ctx,#36] @ is_base2_26 + add $ctx,$ctx,#20 + +#ifdef __thumb2__ + it eq +#endif + moveq r0,#0 + beq .Lno_key + +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + adr r11,.Lpoly1305_init + ldr r12,.LOPENSSL_armcap +#endif + ldrb r4,[$inp,#0] + mov r10,#0x0fffffff + ldrb r5,[$inp,#1] + and r3,r10,#-4 @ 0x0ffffffc + ldrb r6,[$inp,#2] + ldrb r7,[$inp,#3] + orr r4,r4,r5,lsl#8 + ldrb r5,[$inp,#4] + orr r4,r4,r6,lsl#16 + ldrb r6,[$inp,#5] + orr r4,r4,r7,lsl#24 + ldrb r7,[$inp,#6] + and r4,r4,r10 + +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + ldr r12,[r11,r12] @ OPENSSL_armcap_P +# ifdef __APPLE__ + ldr r12,[r12] +# endif +#endif + ldrb r8,[$inp,#7] + orr r5,r5,r6,lsl#8 + ldrb r6,[$inp,#8] + orr r5,r5,r7,lsl#16 + ldrb r7,[$inp,#9] + orr r5,r5,r8,lsl#24 + ldrb r8,[$inp,#10] + and r5,r5,r3 + +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + tst r12,#ARMV7_NEON @ check for NEON +# ifdef __APPLE__ + adr r9,poly1305_blocks_neon + adr r11,poly1305_blocks +# ifdef __thumb2__ + it ne +# endif + movne r11,r9 + adr r12,poly1305_emit + adr r10,poly1305_emit_neon +# ifdef __thumb2__ + it ne +# endif + movne r12,r10 +# else +# ifdef __thumb2__ + itete eq +# endif + addeq r12,r11,#(poly1305_emit-.Lpoly1305_init) + addne r12,r11,#(poly1305_emit_neon-.Lpoly1305_init) + addeq r11,r11,#(poly1305_blocks-.Lpoly1305_init) + addne r11,r11,#(poly1305_blocks_neon-.Lpoly1305_init) +# endif +# ifdef __thumb2__ + orr r12,r12,#1 @ thumb-ify address + orr r11,r11,#1 +# endif +#endif + ldrb r9,[$inp,#11] + orr r6,r6,r7,lsl#8 + ldrb r7,[$inp,#12] + orr r6,r6,r8,lsl#16 + ldrb r8,[$inp,#13] + orr r6,r6,r9,lsl#24 + ldrb r9,[$inp,#14] + and r6,r6,r3 + + ldrb r10,[$inp,#15] + orr r7,r7,r8,lsl#8 + str r4,[$ctx,#0] + orr r7,r7,r9,lsl#16 + str r5,[$ctx,#4] + orr r7,r7,r10,lsl#24 + str r6,[$ctx,#8] + and r7,r7,r3 + str r7,[$ctx,#12] +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + stmia r2,{r11,r12} @ fill functions table + mov r0,#1 +#else + mov r0,#0 +#endif +.Lno_key: + ldmia sp!,{r4-r11} +#if __ARM_ARCH__>=5 + ret @ bx lr +#else + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + bx lr @ interoperable with Thumb ISA:-) +#endif +.size poly1305_init,.-poly1305_init +___ +{ +my ($h0,$h1,$h2,$h3,$h4,$r0,$r1,$r2,$r3)=map("r$_",(4..12)); +my ($s1,$s2,$s3)=($r1,$r2,$r3); + +$code.=<<___; +.type poly1305_blocks,%function +.align 5 +poly1305_blocks: +.Lpoly1305_blocks: + stmdb sp!,{r3-r11,lr} + + ands $len,$len,#-16 + beq .Lno_data + + cmp $padbit,#0 + add $len,$len,$inp @ end pointer + sub sp,sp,#32 + + ldmia $ctx,{$h0-$r3} @ load context + + str $ctx,[sp,#12] @ offload stuff + mov lr,$inp + str $len,[sp,#16] + str $r1,[sp,#20] + str $r2,[sp,#24] + str $r3,[sp,#28] + b .Loop + +.Loop: +#if __ARM_ARCH__<7 + ldrb r0,[lr],#16 @ load input +# ifdef __thumb2__ + it hi +# endif + addhi $h4,$h4,#1 @ 1<<128 + ldrb r1,[lr,#-15] + ldrb r2,[lr,#-14] + ldrb r3,[lr,#-13] + orr r1,r0,r1,lsl#8 + ldrb r0,[lr,#-12] + orr r2,r1,r2,lsl#16 + ldrb r1,[lr,#-11] + orr r3,r2,r3,lsl#24 + ldrb r2,[lr,#-10] + adds $h0,$h0,r3 @ accumulate input + + ldrb r3,[lr,#-9] + orr r1,r0,r1,lsl#8 + ldrb r0,[lr,#-8] + orr r2,r1,r2,lsl#16 + ldrb r1,[lr,#-7] + orr r3,r2,r3,lsl#24 + ldrb r2,[lr,#-6] + adcs $h1,$h1,r3 + + ldrb r3,[lr,#-5] + orr r1,r0,r1,lsl#8 + ldrb r0,[lr,#-4] + orr r2,r1,r2,lsl#16 + ldrb r1,[lr,#-3] + orr r3,r2,r3,lsl#24 + ldrb r2,[lr,#-2] + adcs $h2,$h2,r3 + + ldrb r3,[lr,#-1] + orr r1,r0,r1,lsl#8 + str lr,[sp,#8] @ offload input pointer + orr r2,r1,r2,lsl#16 + add $s1,$r1,$r1,lsr#2 + orr r3,r2,r3,lsl#24 +#else + ldr r0,[lr],#16 @ load input +# ifdef __thumb2__ + it hi +# endif + addhi $h4,$h4,#1 @ padbit + ldr r1,[lr,#-12] + ldr r2,[lr,#-8] + ldr r3,[lr,#-4] +# ifdef __ARMEB__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +# endif + adds $h0,$h0,r0 @ accumulate input + str lr,[sp,#8] @ offload input pointer + adcs $h1,$h1,r1 + add $s1,$r1,$r1,lsr#2 + adcs $h2,$h2,r2 +#endif + add $s2,$r2,$r2,lsr#2 + adcs $h3,$h3,r3 + add $s3,$r3,$r3,lsr#2 + + umull r2,r3,$h1,$r0 + adc $h4,$h4,#0 + umull r0,r1,$h0,$r0 + umlal r2,r3,$h4,$s1 + umlal r0,r1,$h3,$s1 + ldr $r1,[sp,#20] @ reload $r1 + umlal r2,r3,$h2,$s3 + umlal r0,r1,$h1,$s3 + umlal r2,r3,$h3,$s2 + umlal r0,r1,$h2,$s2 + umlal r2,r3,$h0,$r1 + str r0,[sp,#0] @ future $h0 + mul r0,$s2,$h4 + ldr $r2,[sp,#24] @ reload $r2 + adds r2,r2,r1 @ d1+=d0>>32 + eor r1,r1,r1 + adc lr,r3,#0 @ future $h2 + str r2,[sp,#4] @ future $h1 + + mul r2,$s3,$h4 + eor r3,r3,r3 + umlal r0,r1,$h3,$s3 + ldr $r3,[sp,#28] @ reload $r3 + umlal r2,r3,$h3,$r0 + umlal r0,r1,$h2,$r0 + umlal r2,r3,$h2,$r1 + umlal r0,r1,$h1,$r1 + umlal r2,r3,$h1,$r2 + umlal r0,r1,$h0,$r2 + umlal r2,r3,$h0,$r3 + ldr $h0,[sp,#0] + mul $h4,$r0,$h4 + ldr $h1,[sp,#4] + + adds $h2,lr,r0 @ d2+=d1>>32 + ldr lr,[sp,#8] @ reload input pointer + adc r1,r1,#0 + adds $h3,r2,r1 @ d3+=d2>>32 + ldr r0,[sp,#16] @ reload end pointer + adc r3,r3,#0 + add $h4,$h4,r3 @ h4+=d3>>32 + + and r1,$h4,#-4 + and $h4,$h4,#3 + add r1,r1,r1,lsr#2 @ *=5 + adds $h0,$h0,r1 + adcs $h1,$h1,#0 + adcs $h2,$h2,#0 + adcs $h3,$h3,#0 + adc $h4,$h4,#0 + + cmp r0,lr @ done yet? + bhi .Loop + + ldr $ctx,[sp,#12] + add sp,sp,#32 + stmia $ctx,{$h0-$h4} @ store the result + +.Lno_data: +#if __ARM_ARCH__>=5 + ldmia sp!,{r3-r11,pc} +#else + ldmia sp!,{r3-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + bx lr @ interoperable with Thumb ISA:-) +#endif +.size poly1305_blocks,.-poly1305_blocks +___ +} +{ +my ($ctx,$mac,$nonce)=map("r$_",(0..2)); +my ($h0,$h1,$h2,$h3,$h4,$g0,$g1,$g2,$g3)=map("r$_",(3..11)); +my $g4=$h4; + +$code.=<<___; +.type poly1305_emit,%function +.align 5 +poly1305_emit: + stmdb sp!,{r4-r11} +.Lpoly1305_emit_enter: + + ldmia $ctx,{$h0-$h4} + adds $g0,$h0,#5 @ compare to modulus + adcs $g1,$h1,#0 + adcs $g2,$h2,#0 + adcs $g3,$h3,#0 + adc $g4,$h4,#0 + tst $g4,#4 @ did it carry/borrow? + +#ifdef __thumb2__ + it ne +#endif + movne $h0,$g0 + ldr $g0,[$nonce,#0] +#ifdef __thumb2__ + it ne +#endif + movne $h1,$g1 + ldr $g1,[$nonce,#4] +#ifdef __thumb2__ + it ne +#endif + movne $h2,$g2 + ldr $g2,[$nonce,#8] +#ifdef __thumb2__ + it ne +#endif + movne $h3,$g3 + ldr $g3,[$nonce,#12] + + adds $h0,$h0,$g0 + adcs $h1,$h1,$g1 + adcs $h2,$h2,$g2 + adc $h3,$h3,$g3 + +#if __ARM_ARCH__>=7 +# ifdef __ARMEB__ + rev $h0,$h0 + rev $h1,$h1 + rev $h2,$h2 + rev $h3,$h3 +# endif + str $h0,[$mac,#0] + str $h1,[$mac,#4] + str $h2,[$mac,#8] + str $h3,[$mac,#12] +#else + strb $h0,[$mac,#0] + mov $h0,$h0,lsr#8 + strb $h1,[$mac,#4] + mov $h1,$h1,lsr#8 + strb $h2,[$mac,#8] + mov $h2,$h2,lsr#8 + strb $h3,[$mac,#12] + mov $h3,$h3,lsr#8 + + strb $h0,[$mac,#1] + mov $h0,$h0,lsr#8 + strb $h1,[$mac,#5] + mov $h1,$h1,lsr#8 + strb $h2,[$mac,#9] + mov $h2,$h2,lsr#8 + strb $h3,[$mac,#13] + mov $h3,$h3,lsr#8 + + strb $h0,[$mac,#2] + mov $h0,$h0,lsr#8 + strb $h1,[$mac,#6] + mov $h1,$h1,lsr#8 + strb $h2,[$mac,#10] + mov $h2,$h2,lsr#8 + strb $h3,[$mac,#14] + mov $h3,$h3,lsr#8 + + strb $h0,[$mac,#3] + strb $h1,[$mac,#7] + strb $h2,[$mac,#11] + strb $h3,[$mac,#15] +#endif + ldmia sp!,{r4-r11} +#if __ARM_ARCH__>=5 + ret @ bx lr +#else + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + bx lr @ interoperable with Thumb ISA:-) +#endif +.size poly1305_emit,.-poly1305_emit +___ +{ +my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("d$_",(0..9)); +my ($D0,$D1,$D2,$D3,$D4, $H0,$H1,$H2,$H3,$H4) = map("q$_",(5..14)); +my ($T0,$T1,$MASK) = map("q$_",(15,4,0)); + +my ($in2,$zeros,$tbl0,$tbl1) = map("r$_",(4..7)); + +$code.=<<___; +#if (defined(__KERNEL__) && defined(CONFIG_KERNEL_MODE_NEON)) || (!defined(__KERNEL__) && __ARM_MAX_ARCH__>=7) +.fpu neon + +.type poly1305_init_neon,%function +.align 5 +poly1305_init_neon: +.Lpoly1305_init_neon: + ldr r4,[$ctx,#20] @ load key base 2^32 + ldr r5,[$ctx,#24] + ldr r6,[$ctx,#28] + ldr r7,[$ctx,#32] + + and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26 + mov r3,r4,lsr#26 + mov r4,r5,lsr#20 + orr r3,r3,r5,lsl#6 + mov r5,r6,lsr#14 + orr r4,r4,r6,lsl#12 + mov r6,r7,lsr#8 + orr r5,r5,r7,lsl#18 + and r3,r3,#0x03ffffff + and r4,r4,#0x03ffffff + and r5,r5,#0x03ffffff + + vdup.32 $R0,r2 @ r^1 in both lanes + add r2,r3,r3,lsl#2 @ *5 + vdup.32 $R1,r3 + add r3,r4,r4,lsl#2 + vdup.32 $S1,r2 + vdup.32 $R2,r4 + add r4,r5,r5,lsl#2 + vdup.32 $S2,r3 + vdup.32 $R3,r5 + add r5,r6,r6,lsl#2 + vdup.32 $S3,r4 + vdup.32 $R4,r6 + vdup.32 $S4,r5 + + mov $zeros,#2 @ counter + +.Lsquare_neon: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + + vmull.u32 $D0,$R0,${R0}[1] + vmull.u32 $D1,$R1,${R0}[1] + vmull.u32 $D2,$R2,${R0}[1] + vmull.u32 $D3,$R3,${R0}[1] + vmull.u32 $D4,$R4,${R0}[1] + + vmlal.u32 $D0,$R4,${S1}[1] + vmlal.u32 $D1,$R0,${R1}[1] + vmlal.u32 $D2,$R1,${R1}[1] + vmlal.u32 $D3,$R2,${R1}[1] + vmlal.u32 $D4,$R3,${R1}[1] + + vmlal.u32 $D0,$R3,${S2}[1] + vmlal.u32 $D1,$R4,${S2}[1] + vmlal.u32 $D3,$R1,${R2}[1] + vmlal.u32 $D2,$R0,${R2}[1] + vmlal.u32 $D4,$R2,${R2}[1] + + vmlal.u32 $D0,$R2,${S3}[1] + vmlal.u32 $D3,$R0,${R3}[1] + vmlal.u32 $D1,$R3,${S3}[1] + vmlal.u32 $D2,$R4,${S3}[1] + vmlal.u32 $D4,$R1,${R3}[1] + + vmlal.u32 $D3,$R4,${S4}[1] + vmlal.u32 $D0,$R1,${S4}[1] + vmlal.u32 $D1,$R2,${S4}[1] + vmlal.u32 $D2,$R3,${S4}[1] + vmlal.u32 $D4,$R0,${R4}[1] + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein + @ and P. Schwabe + @ + @ H0>>+H1>>+H2>>+H3>>+H4 + @ H3>>+H4>>*5+H0>>+H1 + @ + @ Trivia. + @ + @ Result of multiplication of n-bit number by m-bit number is + @ n+m bits wide. However! Even though 2^n is a n+1-bit number, + @ m-bit number multiplied by 2^n is still n+m bits wide. + @ + @ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2, + @ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit + @ one is n+1 bits wide. + @ + @ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that + @ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4 + @ can be 27. However! In cases when their width exceeds 26 bits + @ they are limited by 2^26+2^6. This in turn means that *sum* + @ of the products with these values can still be viewed as sum + @ of 52-bit numbers as long as the amount of addends is not a + @ power of 2. For example, + @ + @ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4, + @ + @ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or + @ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than + @ 8 * (2^52) or 2^55. However, the value is then multiplied by + @ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12), + @ which is less than 32 * (2^52) or 2^57. And when processing + @ data we are looking at triple as many addends... + @ + @ In key setup procedure pre-reduced H0 is limited by 5*4+1 and + @ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the + @ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while + @ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32 + @ instruction accepts 2x32-bit input and writes 2x64-bit result. + @ This means that result of reduction have to be compressed upon + @ loop wrap-around. This can be done in the process of reduction + @ to minimize amount of instructions [as well as amount of + @ 128-bit instructions, which benefits low-end processors], but + @ one has to watch for H2 (which is narrower than H0) and 5*H4 + @ not being wider than 58 bits, so that result of right shift + @ by 26 bits fits in 32 bits. This is also useful on x86, + @ because it allows to use paddd in place for paddq, which + @ benefits Atom, where paddq is ridiculously slow. + + vshr.u64 $T0,$D3,#26 + vmovn.i64 $D3#lo,$D3 + vshr.u64 $T1,$D0,#26 + vmovn.i64 $D0#lo,$D0 + vadd.i64 $D4,$D4,$T0 @ h3 -> h4 + vbic.i32 $D3#lo,#0xfc000000 @ &=0x03ffffff + vadd.i64 $D1,$D1,$T1 @ h0 -> h1 + vbic.i32 $D0#lo,#0xfc000000 + + vshrn.u64 $T0#lo,$D4,#26 + vmovn.i64 $D4#lo,$D4 + vshr.u64 $T1,$D1,#26 + vmovn.i64 $D1#lo,$D1 + vadd.i64 $D2,$D2,$T1 @ h1 -> h2 + vbic.i32 $D4#lo,#0xfc000000 + vbic.i32 $D1#lo,#0xfc000000 + + vadd.i32 $D0#lo,$D0#lo,$T0#lo + vshl.u32 $T0#lo,$T0#lo,#2 + vshrn.u64 $T1#lo,$D2,#26 + vmovn.i64 $D2#lo,$D2 + vadd.i32 $D0#lo,$D0#lo,$T0#lo @ h4 -> h0 + vadd.i32 $D3#lo,$D3#lo,$T1#lo @ h2 -> h3 + vbic.i32 $D2#lo,#0xfc000000 + + vshr.u32 $T0#lo,$D0#lo,#26 + vbic.i32 $D0#lo,#0xfc000000 + vshr.u32 $T1#lo,$D3#lo,#26 + vbic.i32 $D3#lo,#0xfc000000 + vadd.i32 $D1#lo,$D1#lo,$T0#lo @ h0 -> h1 + vadd.i32 $D4#lo,$D4#lo,$T1#lo @ h3 -> h4 + + subs $zeros,$zeros,#1 + beq .Lsquare_break_neon + + add $tbl0,$ctx,#(48+0*9*4) + add $tbl1,$ctx,#(48+1*9*4) + + vtrn.32 $R0,$D0#lo @ r^2:r^1 + vtrn.32 $R2,$D2#lo + vtrn.32 $R3,$D3#lo + vtrn.32 $R1,$D1#lo + vtrn.32 $R4,$D4#lo + + vshl.u32 $S2,$R2,#2 @ *5 + vshl.u32 $S3,$R3,#2 + vshl.u32 $S1,$R1,#2 + vshl.u32 $S4,$R4,#2 + vadd.i32 $S2,$S2,$R2 + vadd.i32 $S1,$S1,$R1 + vadd.i32 $S3,$S3,$R3 + vadd.i32 $S4,$S4,$R4 + + vst4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! + vst4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! + vst4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! + vst4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! + vst1.32 {${S4}[0]},[$tbl0,:32] + vst1.32 {${S4}[1]},[$tbl1,:32] + + b .Lsquare_neon + +.align 4 +.Lsquare_break_neon: + add $tbl0,$ctx,#(48+2*4*9) + add $tbl1,$ctx,#(48+3*4*9) + + vmov $R0,$D0#lo @ r^4:r^3 + vshl.u32 $S1,$D1#lo,#2 @ *5 + vmov $R1,$D1#lo + vshl.u32 $S2,$D2#lo,#2 + vmov $R2,$D2#lo + vshl.u32 $S3,$D3#lo,#2 + vmov $R3,$D3#lo + vshl.u32 $S4,$D4#lo,#2 + vmov $R4,$D4#lo + vadd.i32 $S1,$S1,$D1#lo + vadd.i32 $S2,$S2,$D2#lo + vadd.i32 $S3,$S3,$D3#lo + vadd.i32 $S4,$S4,$D4#lo + + vst4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! + vst4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! + vst4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! + vst4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! + vst1.32 {${S4}[0]},[$tbl0] + vst1.32 {${S4}[1]},[$tbl1] + + ret @ bx lr +.size poly1305_init_neon,.-poly1305_init_neon + +#ifdef __KERNEL__ +.globl poly1305_blocks_neon +#endif +.type poly1305_blocks_neon,%function +.align 5 +poly1305_blocks_neon: + ldr ip,[$ctx,#36] @ is_base2_26 + ands $len,$len,#-16 + beq .Lno_data_neon + + cmp $len,#64 + bhs .Lenter_neon + tst ip,ip @ is_base2_26? + beq .Lpoly1305_blocks + +.Lenter_neon: + stmdb sp!,{r4-r7} + vstmdb sp!,{d8-d15} @ ABI specification says so + + tst ip,ip @ is_base2_26? + bne .Lbase2_26_neon + + stmdb sp!,{r1-r3,lr} + bl .Lpoly1305_init_neon + + ldr r4,[$ctx,#0] @ load hash value base 2^32 + ldr r5,[$ctx,#4] + ldr r6,[$ctx,#8] + ldr r7,[$ctx,#12] + ldr ip,[$ctx,#16] + + and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26 + mov r3,r4,lsr#26 + veor $D0#lo,$D0#lo,$D0#lo + mov r4,r5,lsr#20 + orr r3,r3,r5,lsl#6 + veor $D1#lo,$D1#lo,$D1#lo + mov r5,r6,lsr#14 + orr r4,r4,r6,lsl#12 + veor $D2#lo,$D2#lo,$D2#lo + mov r6,r7,lsr#8 + orr r5,r5,r7,lsl#18 + veor $D3#lo,$D3#lo,$D3#lo + and r3,r3,#0x03ffffff + orr r6,r6,ip,lsl#24 + veor $D4#lo,$D4#lo,$D4#lo + and r4,r4,#0x03ffffff + mov r1,#1 + and r5,r5,#0x03ffffff + str r1,[$ctx,#36] @ is_base2_26 + + vmov.32 $D0#lo[0],r2 + vmov.32 $D1#lo[0],r3 + vmov.32 $D2#lo[0],r4 + vmov.32 $D3#lo[0],r5 + vmov.32 $D4#lo[0],r6 + adr $zeros,.Lzeros + + ldmia sp!,{r1-r3,lr} + b .Lbase2_32_neon + +.align 4 +.Lbase2_26_neon: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ load hash value + + veor $D0#lo,$D0#lo,$D0#lo + veor $D1#lo,$D1#lo,$D1#lo + veor $D2#lo,$D2#lo,$D2#lo + veor $D3#lo,$D3#lo,$D3#lo + veor $D4#lo,$D4#lo,$D4#lo + vld4.32 {$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]! + adr $zeros,.Lzeros + vld1.32 {$D4#lo[0]},[$ctx] + sub $ctx,$ctx,#16 @ rewind + +.Lbase2_32_neon: + add $in2,$inp,#32 + mov $padbit,$padbit,lsl#24 + tst $len,#31 + beq .Leven + + vld4.32 {$H0#lo[0],$H1#lo[0],$H2#lo[0],$H3#lo[0]},[$inp]! + vmov.32 $H4#lo[0],$padbit + sub $len,$len,#16 + add $in2,$inp,#32 + +# ifdef __ARMEB__ + vrev32.8 $H0,$H0 + vrev32.8 $H3,$H3 + vrev32.8 $H1,$H1 + vrev32.8 $H2,$H2 +# endif + vsri.u32 $H4#lo,$H3#lo,#8 @ base 2^32 -> base 2^26 + vshl.u32 $H3#lo,$H3#lo,#18 + + vsri.u32 $H3#lo,$H2#lo,#14 + vshl.u32 $H2#lo,$H2#lo,#12 + vadd.i32 $H4#hi,$H4#lo,$D4#lo @ add hash value and move to #hi + + vbic.i32 $H3#lo,#0xfc000000 + vsri.u32 $H2#lo,$H1#lo,#20 + vshl.u32 $H1#lo,$H1#lo,#6 + + vbic.i32 $H2#lo,#0xfc000000 + vsri.u32 $H1#lo,$H0#lo,#26 + vadd.i32 $H3#hi,$H3#lo,$D3#lo + + vbic.i32 $H0#lo,#0xfc000000 + vbic.i32 $H1#lo,#0xfc000000 + vadd.i32 $H2#hi,$H2#lo,$D2#lo + + vadd.i32 $H0#hi,$H0#lo,$D0#lo + vadd.i32 $H1#hi,$H1#lo,$D1#lo + + mov $tbl1,$zeros + add $tbl0,$ctx,#48 + + cmp $len,$len + b .Long_tail + +.align 4 +.Leven: + subs $len,$len,#64 + it lo + movlo $in2,$zeros + + vmov.i32 $H4,#1<<24 @ padbit, yes, always + vld4.32 {$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp] @ inp[0:1] + add $inp,$inp,#64 + vld4.32 {$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2] @ inp[2:3] (or 0) + add $in2,$in2,#64 + itt hi + addhi $tbl1,$ctx,#(48+1*9*4) + addhi $tbl0,$ctx,#(48+3*9*4) + +# ifdef __ARMEB__ + vrev32.8 $H0,$H0 + vrev32.8 $H3,$H3 + vrev32.8 $H1,$H1 + vrev32.8 $H2,$H2 +# endif + vsri.u32 $H4,$H3,#8 @ base 2^32 -> base 2^26 + vshl.u32 $H3,$H3,#18 + + vsri.u32 $H3,$H2,#14 + vshl.u32 $H2,$H2,#12 + + vbic.i32 $H3,#0xfc000000 + vsri.u32 $H2,$H1,#20 + vshl.u32 $H1,$H1,#6 + + vbic.i32 $H2,#0xfc000000 + vsri.u32 $H1,$H0,#26 + + vbic.i32 $H0,#0xfc000000 + vbic.i32 $H1,#0xfc000000 + + bls .Lskip_loop + + vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^2 + vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^4 + vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! + vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! + b .Loop_neon + +.align 5 +.Loop_neon: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 + @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r + @ \___________________/ + @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 + @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r + @ \___________________/ \____________________/ + @ + @ Note that we start with inp[2:3]*r^2. This is because it + @ doesn't depend on reduction in previous iteration. + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ inp[2:3]*r^2 + + vadd.i32 $H2#lo,$H2#lo,$D2#lo @ accumulate inp[0:1] + vmull.u32 $D2,$H2#hi,${R0}[1] + vadd.i32 $H0#lo,$H0#lo,$D0#lo + vmull.u32 $D0,$H0#hi,${R0}[1] + vadd.i32 $H3#lo,$H3#lo,$D3#lo + vmull.u32 $D3,$H3#hi,${R0}[1] + vmlal.u32 $D2,$H1#hi,${R1}[1] + vadd.i32 $H1#lo,$H1#lo,$D1#lo + vmull.u32 $D1,$H1#hi,${R0}[1] + + vadd.i32 $H4#lo,$H4#lo,$D4#lo + vmull.u32 $D4,$H4#hi,${R0}[1] + subs $len,$len,#64 + vmlal.u32 $D0,$H4#hi,${S1}[1] + it lo + movlo $in2,$zeros + vmlal.u32 $D3,$H2#hi,${R1}[1] + vld1.32 ${S4}[1],[$tbl1,:32] + vmlal.u32 $D1,$H0#hi,${R1}[1] + vmlal.u32 $D4,$H3#hi,${R1}[1] + + vmlal.u32 $D0,$H3#hi,${S2}[1] + vmlal.u32 $D3,$H1#hi,${R2}[1] + vmlal.u32 $D4,$H2#hi,${R2}[1] + vmlal.u32 $D1,$H4#hi,${S2}[1] + vmlal.u32 $D2,$H0#hi,${R2}[1] + + vmlal.u32 $D3,$H0#hi,${R3}[1] + vmlal.u32 $D0,$H2#hi,${S3}[1] + vmlal.u32 $D4,$H1#hi,${R3}[1] + vmlal.u32 $D1,$H3#hi,${S3}[1] + vmlal.u32 $D2,$H4#hi,${S3}[1] + + vmlal.u32 $D3,$H4#hi,${S4}[1] + vmlal.u32 $D0,$H1#hi,${S4}[1] + vmlal.u32 $D4,$H0#hi,${R4}[1] + vmlal.u32 $D1,$H2#hi,${S4}[1] + vmlal.u32 $D2,$H3#hi,${S4}[1] + + vld4.32 {$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2] @ inp[2:3] (or 0) + add $in2,$in2,#64 + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ (hash+inp[0:1])*r^4 and accumulate + + vmlal.u32 $D3,$H3#lo,${R0}[0] + vmlal.u32 $D0,$H0#lo,${R0}[0] + vmlal.u32 $D4,$H4#lo,${R0}[0] + vmlal.u32 $D1,$H1#lo,${R0}[0] + vmlal.u32 $D2,$H2#lo,${R0}[0] + vld1.32 ${S4}[0],[$tbl0,:32] + + vmlal.u32 $D3,$H2#lo,${R1}[0] + vmlal.u32 $D0,$H4#lo,${S1}[0] + vmlal.u32 $D4,$H3#lo,${R1}[0] + vmlal.u32 $D1,$H0#lo,${R1}[0] + vmlal.u32 $D2,$H1#lo,${R1}[0] + + vmlal.u32 $D3,$H1#lo,${R2}[0] + vmlal.u32 $D0,$H3#lo,${S2}[0] + vmlal.u32 $D4,$H2#lo,${R2}[0] + vmlal.u32 $D1,$H4#lo,${S2}[0] + vmlal.u32 $D2,$H0#lo,${R2}[0] + + vmlal.u32 $D3,$H0#lo,${R3}[0] + vmlal.u32 $D0,$H2#lo,${S3}[0] + vmlal.u32 $D4,$H1#lo,${R3}[0] + vmlal.u32 $D1,$H3#lo,${S3}[0] + vmlal.u32 $D3,$H4#lo,${S4}[0] + + vmlal.u32 $D2,$H4#lo,${S3}[0] + vmlal.u32 $D0,$H1#lo,${S4}[0] + vmlal.u32 $D4,$H0#lo,${R4}[0] + vmov.i32 $H4,#1<<24 @ padbit, yes, always + vmlal.u32 $D1,$H2#lo,${S4}[0] + vmlal.u32 $D2,$H3#lo,${S4}[0] + + vld4.32 {$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp] @ inp[0:1] + add $inp,$inp,#64 +# ifdef __ARMEB__ + vrev32.8 $H0,$H0 + vrev32.8 $H1,$H1 + vrev32.8 $H2,$H2 + vrev32.8 $H3,$H3 +# endif + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ lazy reduction interleaved with base 2^32 -> base 2^26 of + @ inp[0:3] previously loaded to $H0-$H3 and smashed to $H0-$H4. + + vshr.u64 $T0,$D3,#26 + vmovn.i64 $D3#lo,$D3 + vshr.u64 $T1,$D0,#26 + vmovn.i64 $D0#lo,$D0 + vadd.i64 $D4,$D4,$T0 @ h3 -> h4 + vbic.i32 $D3#lo,#0xfc000000 + vsri.u32 $H4,$H3,#8 @ base 2^32 -> base 2^26 + vadd.i64 $D1,$D1,$T1 @ h0 -> h1 + vshl.u32 $H3,$H3,#18 + vbic.i32 $D0#lo,#0xfc000000 + + vshrn.u64 $T0#lo,$D4,#26 + vmovn.i64 $D4#lo,$D4 + vshr.u64 $T1,$D1,#26 + vmovn.i64 $D1#lo,$D1 + vadd.i64 $D2,$D2,$T1 @ h1 -> h2 + vsri.u32 $H3,$H2,#14 + vbic.i32 $D4#lo,#0xfc000000 + vshl.u32 $H2,$H2,#12 + vbic.i32 $D1#lo,#0xfc000000 + + vadd.i32 $D0#lo,$D0#lo,$T0#lo + vshl.u32 $T0#lo,$T0#lo,#2 + vbic.i32 $H3,#0xfc000000 + vshrn.u64 $T1#lo,$D2,#26 + vmovn.i64 $D2#lo,$D2 + vaddl.u32 $D0,$D0#lo,$T0#lo @ h4 -> h0 [widen for a sec] + vsri.u32 $H2,$H1,#20 + vadd.i32 $D3#lo,$D3#lo,$T1#lo @ h2 -> h3 + vshl.u32 $H1,$H1,#6 + vbic.i32 $D2#lo,#0xfc000000 + vbic.i32 $H2,#0xfc000000 + + vshrn.u64 $T0#lo,$D0,#26 @ re-narrow + vmovn.i64 $D0#lo,$D0 + vsri.u32 $H1,$H0,#26 + vbic.i32 $H0,#0xfc000000 + vshr.u32 $T1#lo,$D3#lo,#26 + vbic.i32 $D3#lo,#0xfc000000 + vbic.i32 $D0#lo,#0xfc000000 + vadd.i32 $D1#lo,$D1#lo,$T0#lo @ h0 -> h1 + vadd.i32 $D4#lo,$D4#lo,$T1#lo @ h3 -> h4 + vbic.i32 $H1,#0xfc000000 + + bhi .Loop_neon + +.Lskip_loop: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 + + add $tbl1,$ctx,#(48+0*9*4) + add $tbl0,$ctx,#(48+1*9*4) + adds $len,$len,#32 + it ne + movne $len,#0 + bne .Long_tail + + vadd.i32 $H2#hi,$H2#lo,$D2#lo @ add hash value and move to #hi + vadd.i32 $H0#hi,$H0#lo,$D0#lo + vadd.i32 $H3#hi,$H3#lo,$D3#lo + vadd.i32 $H1#hi,$H1#lo,$D1#lo + vadd.i32 $H4#hi,$H4#lo,$D4#lo + +.Long_tail: + vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^1 + vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^2 + + vadd.i32 $H2#lo,$H2#lo,$D2#lo @ can be redundant + vmull.u32 $D2,$H2#hi,$R0 + vadd.i32 $H0#lo,$H0#lo,$D0#lo + vmull.u32 $D0,$H0#hi,$R0 + vadd.i32 $H3#lo,$H3#lo,$D3#lo + vmull.u32 $D3,$H3#hi,$R0 + vadd.i32 $H1#lo,$H1#lo,$D1#lo + vmull.u32 $D1,$H1#hi,$R0 + vadd.i32 $H4#lo,$H4#lo,$D4#lo + vmull.u32 $D4,$H4#hi,$R0 + + vmlal.u32 $D0,$H4#hi,$S1 + vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! + vmlal.u32 $D3,$H2#hi,$R1 + vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! + vmlal.u32 $D1,$H0#hi,$R1 + vmlal.u32 $D4,$H3#hi,$R1 + vmlal.u32 $D2,$H1#hi,$R1 + + vmlal.u32 $D3,$H1#hi,$R2 + vld1.32 ${S4}[1],[$tbl1,:32] + vmlal.u32 $D0,$H3#hi,$S2 + vld1.32 ${S4}[0],[$tbl0,:32] + vmlal.u32 $D4,$H2#hi,$R2 + vmlal.u32 $D1,$H4#hi,$S2 + vmlal.u32 $D2,$H0#hi,$R2 + + vmlal.u32 $D3,$H0#hi,$R3 + it ne + addne $tbl1,$ctx,#(48+2*9*4) + vmlal.u32 $D0,$H2#hi,$S3 + it ne + addne $tbl0,$ctx,#(48+3*9*4) + vmlal.u32 $D4,$H1#hi,$R3 + vmlal.u32 $D1,$H3#hi,$S3 + vmlal.u32 $D2,$H4#hi,$S3 + + vmlal.u32 $D3,$H4#hi,$S4 + vorn $MASK,$MASK,$MASK @ all-ones, can be redundant + vmlal.u32 $D0,$H1#hi,$S4 + vshr.u64 $MASK,$MASK,#38 + vmlal.u32 $D4,$H0#hi,$R4 + vmlal.u32 $D1,$H2#hi,$S4 + vmlal.u32 $D2,$H3#hi,$S4 + + beq .Lshort_tail + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ (hash+inp[0:1])*r^4:r^3 and accumulate + + vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^3 + vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^4 + + vmlal.u32 $D2,$H2#lo,$R0 + vmlal.u32 $D0,$H0#lo,$R0 + vmlal.u32 $D3,$H3#lo,$R0 + vmlal.u32 $D1,$H1#lo,$R0 + vmlal.u32 $D4,$H4#lo,$R0 + + vmlal.u32 $D0,$H4#lo,$S1 + vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! + vmlal.u32 $D3,$H2#lo,$R1 + vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! + vmlal.u32 $D1,$H0#lo,$R1 + vmlal.u32 $D4,$H3#lo,$R1 + vmlal.u32 $D2,$H1#lo,$R1 + + vmlal.u32 $D3,$H1#lo,$R2 + vld1.32 ${S4}[1],[$tbl1,:32] + vmlal.u32 $D0,$H3#lo,$S2 + vld1.32 ${S4}[0],[$tbl0,:32] + vmlal.u32 $D4,$H2#lo,$R2 + vmlal.u32 $D1,$H4#lo,$S2 + vmlal.u32 $D2,$H0#lo,$R2 + + vmlal.u32 $D3,$H0#lo,$R3 + vmlal.u32 $D0,$H2#lo,$S3 + vmlal.u32 $D4,$H1#lo,$R3 + vmlal.u32 $D1,$H3#lo,$S3 + vmlal.u32 $D2,$H4#lo,$S3 + + vmlal.u32 $D3,$H4#lo,$S4 + vorn $MASK,$MASK,$MASK @ all-ones + vmlal.u32 $D0,$H1#lo,$S4 + vshr.u64 $MASK,$MASK,#38 + vmlal.u32 $D4,$H0#lo,$R4 + vmlal.u32 $D1,$H2#lo,$S4 + vmlal.u32 $D2,$H3#lo,$S4 + +.Lshort_tail: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ horizontal addition + + vadd.i64 $D3#lo,$D3#lo,$D3#hi + vadd.i64 $D0#lo,$D0#lo,$D0#hi + vadd.i64 $D4#lo,$D4#lo,$D4#hi + vadd.i64 $D1#lo,$D1#lo,$D1#hi + vadd.i64 $D2#lo,$D2#lo,$D2#hi + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ lazy reduction, but without narrowing + + vshr.u64 $T0,$D3,#26 + vand.i64 $D3,$D3,$MASK + vshr.u64 $T1,$D0,#26 + vand.i64 $D0,$D0,$MASK + vadd.i64 $D4,$D4,$T0 @ h3 -> h4 + vadd.i64 $D1,$D1,$T1 @ h0 -> h1 + + vshr.u64 $T0,$D4,#26 + vand.i64 $D4,$D4,$MASK + vshr.u64 $T1,$D1,#26 + vand.i64 $D1,$D1,$MASK + vadd.i64 $D2,$D2,$T1 @ h1 -> h2 + + vadd.i64 $D0,$D0,$T0 + vshl.u64 $T0,$T0,#2 + vshr.u64 $T1,$D2,#26 + vand.i64 $D2,$D2,$MASK + vadd.i64 $D0,$D0,$T0 @ h4 -> h0 + vadd.i64 $D3,$D3,$T1 @ h2 -> h3 + + vshr.u64 $T0,$D0,#26 + vand.i64 $D0,$D0,$MASK + vshr.u64 $T1,$D3,#26 + vand.i64 $D3,$D3,$MASK + vadd.i64 $D1,$D1,$T0 @ h0 -> h1 + vadd.i64 $D4,$D4,$T1 @ h3 -> h4 + + cmp $len,#0 + bne .Leven + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ store hash value + + vst4.32 {$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]! + vst1.32 {$D4#lo[0]},[$ctx] + + vldmia sp!,{d8-d15} @ epilogue + ldmia sp!,{r4-r7} +.Lno_data_neon: + ret @ bx lr +.size poly1305_blocks_neon,.-poly1305_blocks_neon + +#ifdef __KERNEL__ +.globl poly1305_emit_neon +#endif +.type poly1305_emit_neon,%function +.align 5 +poly1305_emit_neon: + ldr ip,[$ctx,#36] @ is_base2_26 + + stmdb sp!,{r4-r11} + + tst ip,ip + beq .Lpoly1305_emit_enter + + ldmia $ctx,{$h0-$h4} + eor $g0,$g0,$g0 + + adds $h0,$h0,$h1,lsl#26 @ base 2^26 -> base 2^32 + mov $h1,$h1,lsr#6 + adcs $h1,$h1,$h2,lsl#20 + mov $h2,$h2,lsr#12 + adcs $h2,$h2,$h3,lsl#14 + mov $h3,$h3,lsr#18 + adcs $h3,$h3,$h4,lsl#8 + adc $h4,$g0,$h4,lsr#24 @ can be partially reduced ... + + and $g0,$h4,#-4 @ ... so reduce + and $h4,$h3,#3 + add $g0,$g0,$g0,lsr#2 @ *= 5 + adds $h0,$h0,$g0 + adcs $h1,$h1,#0 + adcs $h2,$h2,#0 + adcs $h3,$h3,#0 + adc $h4,$h4,#0 + + adds $g0,$h0,#5 @ compare to modulus + adcs $g1,$h1,#0 + adcs $g2,$h2,#0 + adcs $g3,$h3,#0 + adc $g4,$h4,#0 + tst $g4,#4 @ did it carry/borrow? + + it ne + movne $h0,$g0 + ldr $g0,[$nonce,#0] + it ne + movne $h1,$g1 + ldr $g1,[$nonce,#4] + it ne + movne $h2,$g2 + ldr $g2,[$nonce,#8] + it ne + movne $h3,$g3 + ldr $g3,[$nonce,#12] + + adds $h0,$h0,$g0 @ accumulate nonce + adcs $h1,$h1,$g1 + adcs $h2,$h2,$g2 + adc $h3,$h3,$g3 + +# ifdef __ARMEB__ + rev $h0,$h0 + rev $h1,$h1 + rev $h2,$h2 + rev $h3,$h3 +# endif + str $h0,[$mac,#0] @ store the result + str $h1,[$mac,#4] + str $h2,[$mac,#8] + str $h3,[$mac,#12] + + ldmia sp!,{r4-r11} + ret @ bx lr +.size poly1305_emit_neon,.-poly1305_emit_neon + +.align 5 +.Lzeros: +.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +# ifndef __KERNEL__ +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-.Lpoly1305_init +# endif +#endif +___ +} } +$code.=<<___; +.align 2 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.comm OPENSSL_armcap_P,4,4 +#endif +___ + +open SELF,$0; +while() { + next if (/^#!/); + last if (!s/^#/@/ and !/^$/); + print; +} +close SELF; + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/geo; + + s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or + s/\bret\b/bx lr/go or + s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 + + print $_,"\n"; +} +close STDOUT; # enforce flush diff --git a/net/wireguard/crypto/zinc/poly1305/poly1305-arm64.pl b/net/wireguard/crypto/zinc/poly1305/poly1305-arm64.pl new file mode 100644 index 000000000000..d513b45a149b --- /dev/null +++ b/net/wireguard/crypto/zinc/poly1305/poly1305-arm64.pl @@ -0,0 +1,974 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# +# This code is taken from the OpenSSL project but the author, Andy Polyakov, +# has relicensed it under the licenses specified in the SPDX header above. +# The original headers, including the original license headers, are +# included below for completeness. +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# This module implements Poly1305 hash for ARMv8. +# +# June 2015 +# +# Numbers are cycles per processed byte with poly1305_blocks alone. +# +# IALU/gcc-4.9 NEON +# +# Apple A7 1.86/+5% 0.72 +# Cortex-A53 2.69/+58% 1.47 +# Cortex-A57 2.70/+7% 1.14 +# Denver 1.64/+50% 1.18(*) +# X-Gene 2.13/+68% 2.27 +# Mongoose 1.77/+75% 1.12 +# Kryo 2.70/+55% 1.13 +# +# (*) estimate based on resources availability is less than 1.0, +# i.e. measured result is worse than expected, presumably binary +# translator is not almighty; + +$flavour=shift; +if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } +else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} } + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} + +my ($ctx,$inp,$len,$padbit) = map("x$_",(0..3)); +my ($mac,$nonce)=($inp,$len); + +my ($h0,$h1,$h2,$r0,$r1,$s1,$t0,$t1,$d0,$d1,$d2) = map("x$_",(4..14)); + +$code.=<<___; +#ifndef __KERNEL__ +# include "arm_arch.h" +.extern OPENSSL_armcap_P +#else +# define poly1305_init poly1305_init_arm +# define poly1305_blocks poly1305_blocks_arm +# define poly1305_emit poly1305_emit_arm +#endif + +.text + +// forward "declarations" are required for Apple +.globl poly1305_blocks +.globl poly1305_emit +.globl poly1305_init +.type poly1305_init,%function +.align 5 +poly1305_init: + cmp $inp,xzr + stp xzr,xzr,[$ctx] // zero hash value + stp xzr,xzr,[$ctx,#16] // [along with is_base2_26] + + csel x0,xzr,x0,eq + b.eq .Lno_key + +#ifndef __KERNEL__ +# ifdef __ILP32__ + ldrsw $t1,.LOPENSSL_armcap_P +# else + ldr $t1,.LOPENSSL_armcap_P +# endif + adr $t0,.LOPENSSL_armcap_P + ldr w17,[$t0,$t1] +#endif + + ldp $r0,$r1,[$inp] // load key + mov $s1,#0xfffffffc0fffffff + movk $s1,#0x0fff,lsl#48 +#ifdef __AARCH64EB__ + rev $r0,$r0 // flip bytes + rev $r1,$r1 +#endif + and $r0,$r0,$s1 // &=0ffffffc0fffffff + and $s1,$s1,#-4 + and $r1,$r1,$s1 // &=0ffffffc0ffffffc + stp $r0,$r1,[$ctx,#32] // save key value + +#ifndef __KERNEL__ + tst w17,#ARMV7_NEON + + adr $d0,poly1305_blocks + adr $r0,poly1305_blocks_neon + adr $d1,poly1305_emit + adr $r1,poly1305_emit_neon + + csel $d0,$d0,$r0,eq + csel $d1,$d1,$r1,eq + +# ifdef __ILP32__ + stp w12,w13,[$len] +# else + stp $d0,$d1,[$len] +# endif + + mov x0,#1 +#else + mov x0,#0 +#endif +.Lno_key: + ret +.size poly1305_init,.-poly1305_init + +.type poly1305_blocks,%function +.align 5 +poly1305_blocks: + ands $len,$len,#-16 + b.eq .Lno_data + + ldp $h0,$h1,[$ctx] // load hash value + ldp $r0,$r1,[$ctx,#32] // load key value + ldr $h2,[$ctx,#16] + add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2) + b .Loop + +.align 5 +.Loop: + ldp $t0,$t1,[$inp],#16 // load input + sub $len,$len,#16 +#ifdef __AARCH64EB__ + rev $t0,$t0 + rev $t1,$t1 +#endif + adds $h0,$h0,$t0 // accumulate input + adcs $h1,$h1,$t1 + + mul $d0,$h0,$r0 // h0*r0 + adc $h2,$h2,$padbit + umulh $d1,$h0,$r0 + + mul $t0,$h1,$s1 // h1*5*r1 + umulh $t1,$h1,$s1 + + adds $d0,$d0,$t0 + mul $t0,$h0,$r1 // h0*r1 + adc $d1,$d1,$t1 + umulh $d2,$h0,$r1 + + adds $d1,$d1,$t0 + mul $t0,$h1,$r0 // h1*r0 + adc $d2,$d2,xzr + umulh $t1,$h1,$r0 + + adds $d1,$d1,$t0 + mul $t0,$h2,$s1 // h2*5*r1 + adc $d2,$d2,$t1 + mul $t1,$h2,$r0 // h2*r0 + + adds $d1,$d1,$t0 + adc $d2,$d2,$t1 + + and $t0,$d2,#-4 // final reduction + and $h2,$d2,#3 + add $t0,$t0,$d2,lsr#2 + adds $h0,$d0,$t0 + adcs $h1,$d1,xzr + adc $h2,$h2,xzr + + cbnz $len,.Loop + + stp $h0,$h1,[$ctx] // store hash value + str $h2,[$ctx,#16] + +.Lno_data: + ret +.size poly1305_blocks,.-poly1305_blocks + +.type poly1305_emit,%function +.align 5 +poly1305_emit: + ldp $h0,$h1,[$ctx] // load hash base 2^64 + ldr $h2,[$ctx,#16] + ldp $t0,$t1,[$nonce] // load nonce + + adds $d0,$h0,#5 // compare to modulus + adcs $d1,$h1,xzr + adc $d2,$h2,xzr + + tst $d2,#-4 // see if it's carried/borrowed + + csel $h0,$h0,$d0,eq + csel $h1,$h1,$d1,eq + +#ifdef __AARCH64EB__ + ror $t0,$t0,#32 // flip nonce words + ror $t1,$t1,#32 +#endif + adds $h0,$h0,$t0 // accumulate nonce + adc $h1,$h1,$t1 +#ifdef __AARCH64EB__ + rev $h0,$h0 // flip output bytes + rev $h1,$h1 +#endif + stp $h0,$h1,[$mac] // write result + + ret +.size poly1305_emit,.-poly1305_emit +___ +my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("v$_.4s",(0..8)); +my ($IN01_0,$IN01_1,$IN01_2,$IN01_3,$IN01_4) = map("v$_.2s",(9..13)); +my ($IN23_0,$IN23_1,$IN23_2,$IN23_3,$IN23_4) = map("v$_.2s",(14..18)); +my ($ACC0,$ACC1,$ACC2,$ACC3,$ACC4) = map("v$_.2d",(19..23)); +my ($H0,$H1,$H2,$H3,$H4) = map("v$_.2s",(24..28)); +my ($T0,$T1,$MASK) = map("v$_",(29..31)); + +my ($in2,$zeros)=("x16","x17"); +my $is_base2_26 = $zeros; # borrow + +$code.=<<___; +.type __poly1305_mult,%function +.align 5 +__poly1305_mult: + mul $d0,$h0,$r0 // h0*r0 + umulh $d1,$h0,$r0 + + mul $t0,$h1,$s1 // h1*5*r1 + umulh $t1,$h1,$s1 + + adds $d0,$d0,$t0 + mul $t0,$h0,$r1 // h0*r1 + adc $d1,$d1,$t1 + umulh $d2,$h0,$r1 + + adds $d1,$d1,$t0 + mul $t0,$h1,$r0 // h1*r0 + adc $d2,$d2,xzr + umulh $t1,$h1,$r0 + + adds $d1,$d1,$t0 + mul $t0,$h2,$s1 // h2*5*r1 + adc $d2,$d2,$t1 + mul $t1,$h2,$r0 // h2*r0 + + adds $d1,$d1,$t0 + adc $d2,$d2,$t1 + + and $t0,$d2,#-4 // final reduction + and $h2,$d2,#3 + add $t0,$t0,$d2,lsr#2 + adds $h0,$d0,$t0 + adcs $h1,$d1,xzr + adc $h2,$h2,xzr + + ret +.size __poly1305_mult,.-__poly1305_mult + +.type __poly1305_splat,%function +.align 5 +__poly1305_splat: + and x12,$h0,#0x03ffffff // base 2^64 -> base 2^26 + ubfx x13,$h0,#26,#26 + extr x14,$h1,$h0,#52 + and x14,x14,#0x03ffffff + ubfx x15,$h1,#14,#26 + extr x16,$h2,$h1,#40 + + str w12,[$ctx,#16*0] // r0 + add w12,w13,w13,lsl#2 // r1*5 + str w13,[$ctx,#16*1] // r1 + add w13,w14,w14,lsl#2 // r2*5 + str w12,[$ctx,#16*2] // s1 + str w14,[$ctx,#16*3] // r2 + add w14,w15,w15,lsl#2 // r3*5 + str w13,[$ctx,#16*4] // s2 + str w15,[$ctx,#16*5] // r3 + add w15,w16,w16,lsl#2 // r4*5 + str w14,[$ctx,#16*6] // s3 + str w16,[$ctx,#16*7] // r4 + str w15,[$ctx,#16*8] // s4 + + ret +.size __poly1305_splat,.-__poly1305_splat + +#if !defined(__KERNEL__) || defined(CONFIG_KERNEL_MODE_NEON) +#ifdef __KERNEL__ +.globl poly1305_blocks_neon +.globl poly1305_emit_neon +#endif + +.type poly1305_blocks_neon,%function +.align 5 +poly1305_blocks_neon: + ldr $is_base2_26,[$ctx,#24] + cmp $len,#128 + b.hs .Lblocks_neon + cbz $is_base2_26,poly1305_blocks + +.Lblocks_neon: + stp x29,x30,[sp,#-80]! + add x29,sp,#0 + + ands $len,$len,#-16 + b.eq .Lno_data_neon + + cbz $is_base2_26,.Lbase2_64_neon + + ldp w10,w11,[$ctx] // load hash value base 2^26 + ldp w12,w13,[$ctx,#8] + ldr w14,[$ctx,#16] + + tst $len,#31 + b.eq .Leven_neon + + ldp $r0,$r1,[$ctx,#32] // load key value + + add $h0,x10,x11,lsl#26 // base 2^26 -> base 2^64 + lsr $h1,x12,#12 + adds $h0,$h0,x12,lsl#52 + add $h1,$h1,x13,lsl#14 + adc $h1,$h1,xzr + lsr $h2,x14,#24 + adds $h1,$h1,x14,lsl#40 + adc $d2,$h2,xzr // can be partially reduced... + + ldp $d0,$d1,[$inp],#16 // load input + sub $len,$len,#16 + add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2) + + and $t0,$d2,#-4 // ... so reduce + and $h2,$d2,#3 + add $t0,$t0,$d2,lsr#2 + adds $h0,$h0,$t0 + adcs $h1,$h1,xzr + adc $h2,$h2,xzr + +#ifdef __AARCH64EB__ + rev $d0,$d0 + rev $d1,$d1 +#endif + adds $h0,$h0,$d0 // accumulate input + adcs $h1,$h1,$d1 + adc $h2,$h2,$padbit + + bl __poly1305_mult + ldr x30,[sp,#8] + + cbz $padbit,.Lstore_base2_64_neon + + and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26 + ubfx x11,$h0,#26,#26 + extr x12,$h1,$h0,#52 + and x12,x12,#0x03ffffff + ubfx x13,$h1,#14,#26 + extr x14,$h2,$h1,#40 + + cbnz $len,.Leven_neon + + stp w10,w11,[$ctx] // store hash value base 2^26 + stp w12,w13,[$ctx,#8] + str w14,[$ctx,#16] + b .Lno_data_neon + +.align 4 +.Lstore_base2_64_neon: + stp $h0,$h1,[$ctx] // store hash value base 2^64 + stp $h2,xzr,[$ctx,#16] // note that is_base2_26 is zeroed + b .Lno_data_neon + +.align 4 +.Lbase2_64_neon: + ldp $r0,$r1,[$ctx,#32] // load key value + + ldp $h0,$h1,[$ctx] // load hash value base 2^64 + ldr $h2,[$ctx,#16] + + tst $len,#31 + b.eq .Linit_neon + + ldp $d0,$d1,[$inp],#16 // load input + sub $len,$len,#16 + add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2) +#ifdef __AARCH64EB__ + rev $d0,$d0 + rev $d1,$d1 +#endif + adds $h0,$h0,$d0 // accumulate input + adcs $h1,$h1,$d1 + adc $h2,$h2,$padbit + + bl __poly1305_mult + +.Linit_neon: + and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26 + ubfx x11,$h0,#26,#26 + extr x12,$h1,$h0,#52 + and x12,x12,#0x03ffffff + ubfx x13,$h1,#14,#26 + extr x14,$h2,$h1,#40 + + stp d8,d9,[sp,#16] // meet ABI requirements + stp d10,d11,[sp,#32] + stp d12,d13,[sp,#48] + stp d14,d15,[sp,#64] + + fmov ${H0},x10 + fmov ${H1},x11 + fmov ${H2},x12 + fmov ${H3},x13 + fmov ${H4},x14 + + ////////////////////////////////// initialize r^n table + mov $h0,$r0 // r^1 + add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2) + mov $h1,$r1 + mov $h2,xzr + add $ctx,$ctx,#48+12 + bl __poly1305_splat + + bl __poly1305_mult // r^2 + sub $ctx,$ctx,#4 + bl __poly1305_splat + + bl __poly1305_mult // r^3 + sub $ctx,$ctx,#4 + bl __poly1305_splat + + bl __poly1305_mult // r^4 + sub $ctx,$ctx,#4 + bl __poly1305_splat + ldr x30,[sp,#8] + + add $in2,$inp,#32 + adr $zeros,.Lzeros + subs $len,$len,#64 + csel $in2,$zeros,$in2,lo + + mov x4,#1 + str x4,[$ctx,#-24] // set is_base2_26 + sub $ctx,$ctx,#48 // restore original $ctx + b .Ldo_neon + +.align 4 +.Leven_neon: + add $in2,$inp,#32 + adr $zeros,.Lzeros + subs $len,$len,#64 + csel $in2,$zeros,$in2,lo + + stp d8,d9,[sp,#16] // meet ABI requirements + stp d10,d11,[sp,#32] + stp d12,d13,[sp,#48] + stp d14,d15,[sp,#64] + + fmov ${H0},x10 + fmov ${H1},x11 + fmov ${H2},x12 + fmov ${H3},x13 + fmov ${H4},x14 + +.Ldo_neon: + ldp x8,x12,[$in2],#16 // inp[2:3] (or zero) + ldp x9,x13,[$in2],#48 + + lsl $padbit,$padbit,#24 + add x15,$ctx,#48 + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 +#endif + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 + and x5,x9,#0x03ffffff + ubfx x6,x8,#26,#26 + ubfx x7,x9,#26,#26 + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 + extr x8,x12,x8,#52 + extr x9,x13,x9,#52 + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 + fmov $IN23_0,x4 + and x8,x8,#0x03ffffff + and x9,x9,#0x03ffffff + ubfx x10,x12,#14,#26 + ubfx x11,x13,#14,#26 + add x12,$padbit,x12,lsr#40 + add x13,$padbit,x13,lsr#40 + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 + fmov $IN23_1,x6 + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 + fmov $IN23_2,x8 + fmov $IN23_3,x10 + fmov $IN23_4,x12 + + ldp x8,x12,[$inp],#16 // inp[0:1] + ldp x9,x13,[$inp],#48 + + ld1 {$R0,$R1,$S1,$R2},[x15],#64 + ld1 {$S2,$R3,$S3,$R4},[x15],#64 + ld1 {$S4},[x15] + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 +#endif + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 + and x5,x9,#0x03ffffff + ubfx x6,x8,#26,#26 + ubfx x7,x9,#26,#26 + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 + extr x8,x12,x8,#52 + extr x9,x13,x9,#52 + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 + fmov $IN01_0,x4 + and x8,x8,#0x03ffffff + and x9,x9,#0x03ffffff + ubfx x10,x12,#14,#26 + ubfx x11,x13,#14,#26 + add x12,$padbit,x12,lsr#40 + add x13,$padbit,x13,lsr#40 + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 + fmov $IN01_1,x6 + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 + movi $MASK.2d,#-1 + fmov $IN01_2,x8 + fmov $IN01_3,x10 + fmov $IN01_4,x12 + ushr $MASK.2d,$MASK.2d,#38 + + b.ls .Lskip_loop + +.align 4 +.Loop_neon: + //////////////////////////////////////////////////////////////// + // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 + // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r + // \___________________/ + // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 + // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r + // \___________________/ \____________________/ + // + // Note that we start with inp[2:3]*r^2. This is because it + // doesn't depend on reduction in previous iteration. + //////////////////////////////////////////////////////////////// + // d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0 + // d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4 + // d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3 + // d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2 + // d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1 + + subs $len,$len,#64 + umull $ACC4,$IN23_0,${R4}[2] + csel $in2,$zeros,$in2,lo + umull $ACC3,$IN23_0,${R3}[2] + umull $ACC2,$IN23_0,${R2}[2] + ldp x8,x12,[$in2],#16 // inp[2:3] (or zero) + umull $ACC1,$IN23_0,${R1}[2] + ldp x9,x13,[$in2],#48 + umull $ACC0,$IN23_0,${R0}[2] +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 +#endif + + umlal $ACC4,$IN23_1,${R3}[2] + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 + umlal $ACC3,$IN23_1,${R2}[2] + and x5,x9,#0x03ffffff + umlal $ACC2,$IN23_1,${R1}[2] + ubfx x6,x8,#26,#26 + umlal $ACC1,$IN23_1,${R0}[2] + ubfx x7,x9,#26,#26 + umlal $ACC0,$IN23_1,${S4}[2] + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 + + umlal $ACC4,$IN23_2,${R2}[2] + extr x8,x12,x8,#52 + umlal $ACC3,$IN23_2,${R1}[2] + extr x9,x13,x9,#52 + umlal $ACC2,$IN23_2,${R0}[2] + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 + umlal $ACC1,$IN23_2,${S4}[2] + fmov $IN23_0,x4 + umlal $ACC0,$IN23_2,${S3}[2] + and x8,x8,#0x03ffffff + + umlal $ACC4,$IN23_3,${R1}[2] + and x9,x9,#0x03ffffff + umlal $ACC3,$IN23_3,${R0}[2] + ubfx x10,x12,#14,#26 + umlal $ACC2,$IN23_3,${S4}[2] + ubfx x11,x13,#14,#26 + umlal $ACC1,$IN23_3,${S3}[2] + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 + umlal $ACC0,$IN23_3,${S2}[2] + fmov $IN23_1,x6 + + add $IN01_2,$IN01_2,$H2 + add x12,$padbit,x12,lsr#40 + umlal $ACC4,$IN23_4,${R0}[2] + add x13,$padbit,x13,lsr#40 + umlal $ACC3,$IN23_4,${S4}[2] + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 + umlal $ACC2,$IN23_4,${S3}[2] + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 + umlal $ACC1,$IN23_4,${S2}[2] + fmov $IN23_2,x8 + umlal $ACC0,$IN23_4,${S1}[2] + fmov $IN23_3,x10 + + //////////////////////////////////////////////////////////////// + // (hash+inp[0:1])*r^4 and accumulate + + add $IN01_0,$IN01_0,$H0 + fmov $IN23_4,x12 + umlal $ACC3,$IN01_2,${R1}[0] + ldp x8,x12,[$inp],#16 // inp[0:1] + umlal $ACC0,$IN01_2,${S3}[0] + ldp x9,x13,[$inp],#48 + umlal $ACC4,$IN01_2,${R2}[0] + umlal $ACC1,$IN01_2,${S4}[0] + umlal $ACC2,$IN01_2,${R0}[0] +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 +#endif + + add $IN01_1,$IN01_1,$H1 + umlal $ACC3,$IN01_0,${R3}[0] + umlal $ACC4,$IN01_0,${R4}[0] + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 + umlal $ACC2,$IN01_0,${R2}[0] + and x5,x9,#0x03ffffff + umlal $ACC0,$IN01_0,${R0}[0] + ubfx x6,x8,#26,#26 + umlal $ACC1,$IN01_0,${R1}[0] + ubfx x7,x9,#26,#26 + + add $IN01_3,$IN01_3,$H3 + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 + umlal $ACC3,$IN01_1,${R2}[0] + extr x8,x12,x8,#52 + umlal $ACC4,$IN01_1,${R3}[0] + extr x9,x13,x9,#52 + umlal $ACC0,$IN01_1,${S4}[0] + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 + umlal $ACC2,$IN01_1,${R1}[0] + fmov $IN01_0,x4 + umlal $ACC1,$IN01_1,${R0}[0] + and x8,x8,#0x03ffffff + + add $IN01_4,$IN01_4,$H4 + and x9,x9,#0x03ffffff + umlal $ACC3,$IN01_3,${R0}[0] + ubfx x10,x12,#14,#26 + umlal $ACC0,$IN01_3,${S2}[0] + ubfx x11,x13,#14,#26 + umlal $ACC4,$IN01_3,${R1}[0] + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 + umlal $ACC1,$IN01_3,${S3}[0] + fmov $IN01_1,x6 + umlal $ACC2,$IN01_3,${S4}[0] + add x12,$padbit,x12,lsr#40 + + umlal $ACC3,$IN01_4,${S4}[0] + add x13,$padbit,x13,lsr#40 + umlal $ACC0,$IN01_4,${S1}[0] + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 + umlal $ACC4,$IN01_4,${R0}[0] + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 + umlal $ACC1,$IN01_4,${S2}[0] + fmov $IN01_2,x8 + umlal $ACC2,$IN01_4,${S3}[0] + fmov $IN01_3,x10 + fmov $IN01_4,x12 + + ///////////////////////////////////////////////////////////////// + // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein + // and P. Schwabe + // + // [see discussion in poly1305-armv4 module] + + ushr $T0.2d,$ACC3,#26 + xtn $H3,$ACC3 + ushr $T1.2d,$ACC0,#26 + and $ACC0,$ACC0,$MASK.2d + add $ACC4,$ACC4,$T0.2d // h3 -> h4 + bic $H3,#0xfc,lsl#24 // &=0x03ffffff + add $ACC1,$ACC1,$T1.2d // h0 -> h1 + + ushr $T0.2d,$ACC4,#26 + xtn $H4,$ACC4 + ushr $T1.2d,$ACC1,#26 + xtn $H1,$ACC1 + bic $H4,#0xfc,lsl#24 + add $ACC2,$ACC2,$T1.2d // h1 -> h2 + + add $ACC0,$ACC0,$T0.2d + shl $T0.2d,$T0.2d,#2 + shrn $T1.2s,$ACC2,#26 + xtn $H2,$ACC2 + add $ACC0,$ACC0,$T0.2d // h4 -> h0 + bic $H1,#0xfc,lsl#24 + add $H3,$H3,$T1.2s // h2 -> h3 + bic $H2,#0xfc,lsl#24 + + shrn $T0.2s,$ACC0,#26 + xtn $H0,$ACC0 + ushr $T1.2s,$H3,#26 + bic $H3,#0xfc,lsl#24 + bic $H0,#0xfc,lsl#24 + add $H1,$H1,$T0.2s // h0 -> h1 + add $H4,$H4,$T1.2s // h3 -> h4 + + b.hi .Loop_neon + +.Lskip_loop: + dup $IN23_2,${IN23_2}[0] + add $IN01_2,$IN01_2,$H2 + + //////////////////////////////////////////////////////////////// + // multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 + + adds $len,$len,#32 + b.ne .Long_tail + + dup $IN23_2,${IN01_2}[0] + add $IN23_0,$IN01_0,$H0 + add $IN23_3,$IN01_3,$H3 + add $IN23_1,$IN01_1,$H1 + add $IN23_4,$IN01_4,$H4 + +.Long_tail: + dup $IN23_0,${IN23_0}[0] + umull2 $ACC0,$IN23_2,${S3} + umull2 $ACC3,$IN23_2,${R1} + umull2 $ACC4,$IN23_2,${R2} + umull2 $ACC2,$IN23_2,${R0} + umull2 $ACC1,$IN23_2,${S4} + + dup $IN23_1,${IN23_1}[0] + umlal2 $ACC0,$IN23_0,${R0} + umlal2 $ACC2,$IN23_0,${R2} + umlal2 $ACC3,$IN23_0,${R3} + umlal2 $ACC4,$IN23_0,${R4} + umlal2 $ACC1,$IN23_0,${R1} + + dup $IN23_3,${IN23_3}[0] + umlal2 $ACC0,$IN23_1,${S4} + umlal2 $ACC3,$IN23_1,${R2} + umlal2 $ACC2,$IN23_1,${R1} + umlal2 $ACC4,$IN23_1,${R3} + umlal2 $ACC1,$IN23_1,${R0} + + dup $IN23_4,${IN23_4}[0] + umlal2 $ACC3,$IN23_3,${R0} + umlal2 $ACC4,$IN23_3,${R1} + umlal2 $ACC0,$IN23_3,${S2} + umlal2 $ACC1,$IN23_3,${S3} + umlal2 $ACC2,$IN23_3,${S4} + + umlal2 $ACC3,$IN23_4,${S4} + umlal2 $ACC0,$IN23_4,${S1} + umlal2 $ACC4,$IN23_4,${R0} + umlal2 $ACC1,$IN23_4,${S2} + umlal2 $ACC2,$IN23_4,${S3} + + b.eq .Lshort_tail + + //////////////////////////////////////////////////////////////// + // (hash+inp[0:1])*r^4:r^3 and accumulate + + add $IN01_0,$IN01_0,$H0 + umlal $ACC3,$IN01_2,${R1} + umlal $ACC0,$IN01_2,${S3} + umlal $ACC4,$IN01_2,${R2} + umlal $ACC1,$IN01_2,${S4} + umlal $ACC2,$IN01_2,${R0} + + add $IN01_1,$IN01_1,$H1 + umlal $ACC3,$IN01_0,${R3} + umlal $ACC0,$IN01_0,${R0} + umlal $ACC4,$IN01_0,${R4} + umlal $ACC1,$IN01_0,${R1} + umlal $ACC2,$IN01_0,${R2} + + add $IN01_3,$IN01_3,$H3 + umlal $ACC3,$IN01_1,${R2} + umlal $ACC0,$IN01_1,${S4} + umlal $ACC4,$IN01_1,${R3} + umlal $ACC1,$IN01_1,${R0} + umlal $ACC2,$IN01_1,${R1} + + add $IN01_4,$IN01_4,$H4 + umlal $ACC3,$IN01_3,${R0} + umlal $ACC0,$IN01_3,${S2} + umlal $ACC4,$IN01_3,${R1} + umlal $ACC1,$IN01_3,${S3} + umlal $ACC2,$IN01_3,${S4} + + umlal $ACC3,$IN01_4,${S4} + umlal $ACC0,$IN01_4,${S1} + umlal $ACC4,$IN01_4,${R0} + umlal $ACC1,$IN01_4,${S2} + umlal $ACC2,$IN01_4,${S3} + +.Lshort_tail: + //////////////////////////////////////////////////////////////// + // horizontal add + + addp $ACC3,$ACC3,$ACC3 + ldp d8,d9,[sp,#16] // meet ABI requirements + addp $ACC0,$ACC0,$ACC0 + ldp d10,d11,[sp,#32] + addp $ACC4,$ACC4,$ACC4 + ldp d12,d13,[sp,#48] + addp $ACC1,$ACC1,$ACC1 + ldp d14,d15,[sp,#64] + addp $ACC2,$ACC2,$ACC2 + + //////////////////////////////////////////////////////////////// + // lazy reduction, but without narrowing + + ushr $T0.2d,$ACC3,#26 + and $ACC3,$ACC3,$MASK.2d + ushr $T1.2d,$ACC0,#26 + and $ACC0,$ACC0,$MASK.2d + + add $ACC4,$ACC4,$T0.2d // h3 -> h4 + add $ACC1,$ACC1,$T1.2d // h0 -> h1 + + ushr $T0.2d,$ACC4,#26 + and $ACC4,$ACC4,$MASK.2d + ushr $T1.2d,$ACC1,#26 + and $ACC1,$ACC1,$MASK.2d + add $ACC2,$ACC2,$T1.2d // h1 -> h2 + + add $ACC0,$ACC0,$T0.2d + shl $T0.2d,$T0.2d,#2 + ushr $T1.2d,$ACC2,#26 + and $ACC2,$ACC2,$MASK.2d + add $ACC0,$ACC0,$T0.2d // h4 -> h0 + add $ACC3,$ACC3,$T1.2d // h2 -> h3 + + ushr $T0.2d,$ACC0,#26 + and $ACC0,$ACC0,$MASK.2d + ushr $T1.2d,$ACC3,#26 + and $ACC3,$ACC3,$MASK.2d + add $ACC1,$ACC1,$T0.2d // h0 -> h1 + add $ACC4,$ACC4,$T1.2d // h3 -> h4 + + //////////////////////////////////////////////////////////////// + // write the result, can be partially reduced + + st4 {$ACC0,$ACC1,$ACC2,$ACC3}[0],[$ctx],#16 + st1 {$ACC4}[0],[$ctx] + +.Lno_data_neon: + ldr x29,[sp],#80 + ret +.size poly1305_blocks_neon,.-poly1305_blocks_neon + +.type poly1305_emit_neon,%function +.align 5 +poly1305_emit_neon: + ldr $is_base2_26,[$ctx,#24] + cbz $is_base2_26,poly1305_emit + + ldp w10,w11,[$ctx] // load hash value base 2^26 + ldp w12,w13,[$ctx,#8] + ldr w14,[$ctx,#16] + + add $h0,x10,x11,lsl#26 // base 2^26 -> base 2^64 + lsr $h1,x12,#12 + adds $h0,$h0,x12,lsl#52 + add $h1,$h1,x13,lsl#14 + adc $h1,$h1,xzr + lsr $h2,x14,#24 + adds $h1,$h1,x14,lsl#40 + adc $h2,$h2,xzr // can be partially reduced... + + ldp $t0,$t1,[$nonce] // load nonce + + and $d0,$h2,#-4 // ... so reduce + add $d0,$d0,$h2,lsr#2 + and $h2,$h2,#3 + adds $h0,$h0,$d0 + adcs $h1,$h1,xzr + adc $h2,$h2,xzr + + adds $d0,$h0,#5 // compare to modulus + adcs $d1,$h1,xzr + adc $d2,$h2,xzr + + tst $d2,#-4 // see if it's carried/borrowed + + csel $h0,$h0,$d0,eq + csel $h1,$h1,$d1,eq + +#ifdef __AARCH64EB__ + ror $t0,$t0,#32 // flip nonce words + ror $t1,$t1,#32 +#endif + adds $h0,$h0,$t0 // accumulate nonce + adc $h1,$h1,$t1 +#ifdef __AARCH64EB__ + rev $h0,$h0 // flip output bytes + rev $h1,$h1 +#endif + stp $h0,$h1,[$mac] // write result + + ret +.size poly1305_emit_neon,.-poly1305_emit_neon +#endif + +.align 5 +.Lzeros: +.long 0,0,0,0,0,0,0,0 +#ifndef __KERNEL__ +.LOPENSSL_armcap_P: +#ifdef __ILP32__ +.long OPENSSL_armcap_P-. +#else +.quad OPENSSL_armcap_P-. +#endif +#endif +.align 2 +___ + +open SELF,$0; +while() { + next if (/^#!/); + last if (!s/^#/\/\// and !/^$/); + print; +} +close SELF; + +foreach (split("\n",$code)) { + s/\b(shrn\s+v[0-9]+)\.[24]d/$1.2s/ or + s/\b(fmov\s+)v([0-9]+)[^,]*,\s*x([0-9]+)/$1d$2,x$3/ or + (m/\bdup\b/ and (s/\.[24]s/.2d/g or 1)) or + (m/\b(eor|and)/ and (s/\.[248][sdh]/.16b/g or 1)) or + (m/\bum(ul|la)l\b/ and (s/\.4s/.2s/g or 1)) or + (m/\bum(ul|la)l2\b/ and (s/\.2s/.4s/g or 1)) or + (m/\bst[1-4]\s+{[^}]+}\[/ and (s/\.[24]d/.s/g or 1)); + + s/\.[124]([sd])\[/.$1\[/; + + print $_,"\n"; +} +close STDOUT; diff --git a/net/wireguard/crypto/zinc/poly1305/poly1305-donna32.c b/net/wireguard/crypto/zinc/poly1305/poly1305-donna32.c new file mode 100644 index 000000000000..527ccc3b59cc --- /dev/null +++ b/net/wireguard/crypto/zinc/poly1305/poly1305-donna32.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * + * This is based in part on Andrew Moon's poly1305-donna, which is in the + * public domain. + */ + +struct poly1305_internal { + u32 h[5]; + u32 r[5]; + u32 s[4]; +}; + +static void poly1305_init_generic(void *ctx, const u8 key[16]) +{ + struct poly1305_internal *st = (struct poly1305_internal *)ctx; + + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ + st->r[0] = (get_unaligned_le32(&key[0])) & 0x3ffffff; + st->r[1] = (get_unaligned_le32(&key[3]) >> 2) & 0x3ffff03; + st->r[2] = (get_unaligned_le32(&key[6]) >> 4) & 0x3ffc0ff; + st->r[3] = (get_unaligned_le32(&key[9]) >> 6) & 0x3f03fff; + st->r[4] = (get_unaligned_le32(&key[12]) >> 8) & 0x00fffff; + + /* s = 5*r */ + st->s[0] = st->r[1] * 5; + st->s[1] = st->r[2] * 5; + st->s[2] = st->r[3] * 5; + st->s[3] = st->r[4] * 5; + + /* h = 0 */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + st->h[3] = 0; + st->h[4] = 0; +} + +static void poly1305_blocks_generic(void *ctx, const u8 *input, size_t len, + const u32 padbit) +{ + struct poly1305_internal *st = (struct poly1305_internal *)ctx; + const u32 hibit = padbit << 24; + u32 r0, r1, r2, r3, r4; + u32 s1, s2, s3, s4; + u32 h0, h1, h2, h3, h4; + u64 d0, d1, d2, d3, d4; + u32 c; + + r0 = st->r[0]; + r1 = st->r[1]; + r2 = st->r[2]; + r3 = st->r[3]; + r4 = st->r[4]; + + s1 = st->s[0]; + s2 = st->s[1]; + s3 = st->s[2]; + s4 = st->s[3]; + + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + h3 = st->h[3]; + h4 = st->h[4]; + + while (len >= POLY1305_BLOCK_SIZE) { + /* h += m[i] */ + h0 += (get_unaligned_le32(&input[0])) & 0x3ffffff; + h1 += (get_unaligned_le32(&input[3]) >> 2) & 0x3ffffff; + h2 += (get_unaligned_le32(&input[6]) >> 4) & 0x3ffffff; + h3 += (get_unaligned_le32(&input[9]) >> 6) & 0x3ffffff; + h4 += (get_unaligned_le32(&input[12]) >> 8) | hibit; + + /* h *= r */ + d0 = ((u64)h0 * r0) + ((u64)h1 * s4) + + ((u64)h2 * s3) + ((u64)h3 * s2) + + ((u64)h4 * s1); + d1 = ((u64)h0 * r1) + ((u64)h1 * r0) + + ((u64)h2 * s4) + ((u64)h3 * s3) + + ((u64)h4 * s2); + d2 = ((u64)h0 * r2) + ((u64)h1 * r1) + + ((u64)h2 * r0) + ((u64)h3 * s4) + + ((u64)h4 * s3); + d3 = ((u64)h0 * r3) + ((u64)h1 * r2) + + ((u64)h2 * r1) + ((u64)h3 * r0) + + ((u64)h4 * s4); + d4 = ((u64)h0 * r4) + ((u64)h1 * r3) + + ((u64)h2 * r2) + ((u64)h3 * r1) + + ((u64)h4 * r0); + + /* (partial) h %= p */ + c = (u32)(d0 >> 26); + h0 = (u32)d0 & 0x3ffffff; + d1 += c; + c = (u32)(d1 >> 26); + h1 = (u32)d1 & 0x3ffffff; + d2 += c; + c = (u32)(d2 >> 26); + h2 = (u32)d2 & 0x3ffffff; + d3 += c; + c = (u32)(d3 >> 26); + h3 = (u32)d3 & 0x3ffffff; + d4 += c; + c = (u32)(d4 >> 26); + h4 = (u32)d4 & 0x3ffffff; + h0 += c * 5; + c = (h0 >> 26); + h0 = h0 & 0x3ffffff; + h1 += c; + + input += POLY1305_BLOCK_SIZE; + len -= POLY1305_BLOCK_SIZE; + } + + st->h[0] = h0; + st->h[1] = h1; + st->h[2] = h2; + st->h[3] = h3; + st->h[4] = h4; +} + +static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4]) +{ + struct poly1305_internal *st = (struct poly1305_internal *)ctx; + u32 h0, h1, h2, h3, h4, c; + u32 g0, g1, g2, g3, g4; + u64 f; + u32 mask; + + /* fully carry h */ + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + h3 = st->h[3]; + h4 = st->h[4]; + + c = h1 >> 26; + h1 = h1 & 0x3ffffff; + h2 += c; + c = h2 >> 26; + h2 = h2 & 0x3ffffff; + h3 += c; + c = h3 >> 26; + h3 = h3 & 0x3ffffff; + h4 += c; + c = h4 >> 26; + h4 = h4 & 0x3ffffff; + h0 += c * 5; + c = h0 >> 26; + h0 = h0 & 0x3ffffff; + h1 += c; + + /* compute h + -p */ + g0 = h0 + 5; + c = g0 >> 26; + g0 &= 0x3ffffff; + g1 = h1 + c; + c = g1 >> 26; + g1 &= 0x3ffffff; + g2 = h2 + c; + c = g2 >> 26; + g2 &= 0x3ffffff; + g3 = h3 + c; + c = g3 >> 26; + g3 &= 0x3ffffff; + g4 = h4 + c - (1UL << 26); + + /* select h if h < p, or h + -p if h >= p */ + mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1; + g0 &= mask; + g1 &= mask; + g2 &= mask; + g3 &= mask; + g4 &= mask; + mask = ~mask; + + h0 = (h0 & mask) | g0; + h1 = (h1 & mask) | g1; + h2 = (h2 & mask) | g2; + h3 = (h3 & mask) | g3; + h4 = (h4 & mask) | g4; + + /* h = h % (2^128) */ + h0 = ((h0) | (h1 << 26)) & 0xffffffff; + h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff; + h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff; + h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff; + + /* mac = (h + nonce) % (2^128) */ + f = (u64)h0 + nonce[0]; + h0 = (u32)f; + f = (u64)h1 + nonce[1] + (f >> 32); + h1 = (u32)f; + f = (u64)h2 + nonce[2] + (f >> 32); + h2 = (u32)f; + f = (u64)h3 + nonce[3] + (f >> 32); + h3 = (u32)f; + + put_unaligned_le32(h0, &mac[0]); + put_unaligned_le32(h1, &mac[4]); + put_unaligned_le32(h2, &mac[8]); + put_unaligned_le32(h3, &mac[12]); +} diff --git a/net/wireguard/crypto/zinc/poly1305/poly1305-donna64.c b/net/wireguard/crypto/zinc/poly1305/poly1305-donna64.c new file mode 100644 index 000000000000..131f1dda1b1d --- /dev/null +++ b/net/wireguard/crypto/zinc/poly1305/poly1305-donna64.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * + * This is based in part on Andrew Moon's poly1305-donna, which is in the + * public domain. + */ + +typedef __uint128_t u128; + +struct poly1305_internal { + u64 r[3]; + u64 h[3]; + u64 s[2]; +}; + +static void poly1305_init_generic(void *ctx, const u8 key[16]) +{ + struct poly1305_internal *st = (struct poly1305_internal *)ctx; + u64 t0, t1; + + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ + t0 = get_unaligned_le64(&key[0]); + t1 = get_unaligned_le64(&key[8]); + + st->r[0] = t0 & 0xffc0fffffffULL; + st->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffffULL; + st->r[2] = ((t1 >> 24)) & 0x00ffffffc0fULL; + + /* s = 20*r */ + st->s[0] = st->r[1] * 20; + st->s[1] = st->r[2] * 20; + + /* h = 0 */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; +} + +static void poly1305_blocks_generic(void *ctx, const u8 *input, size_t len, + const u32 padbit) +{ + struct poly1305_internal *st = (struct poly1305_internal *)ctx; + const u64 hibit = ((u64)padbit) << 40; + u64 r0, r1, r2; + u64 s1, s2; + u64 h0, h1, h2; + u64 c; + u128 d0, d1, d2, d; + + r0 = st->r[0]; + r1 = st->r[1]; + r2 = st->r[2]; + + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + + s1 = st->s[0]; + s2 = st->s[1]; + + while (len >= POLY1305_BLOCK_SIZE) { + u64 t0, t1; + + /* h += m[i] */ + t0 = get_unaligned_le64(&input[0]); + t1 = get_unaligned_le64(&input[8]); + + h0 += t0 & 0xfffffffffffULL; + h1 += ((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL; + h2 += (((t1 >> 24)) & 0x3ffffffffffULL) | hibit; + + /* h *= r */ + d0 = (u128)h0 * r0; + d = (u128)h1 * s2; + d0 += d; + d = (u128)h2 * s1; + d0 += d; + d1 = (u128)h0 * r1; + d = (u128)h1 * r0; + d1 += d; + d = (u128)h2 * s2; + d1 += d; + d2 = (u128)h0 * r2; + d = (u128)h1 * r1; + d2 += d; + d = (u128)h2 * r0; + d2 += d; + + /* (partial) h %= p */ + c = (u64)(d0 >> 44); + h0 = (u64)d0 & 0xfffffffffffULL; + d1 += c; + c = (u64)(d1 >> 44); + h1 = (u64)d1 & 0xfffffffffffULL; + d2 += c; + c = (u64)(d2 >> 42); + h2 = (u64)d2 & 0x3ffffffffffULL; + h0 += c * 5; + c = h0 >> 44; + h0 = h0 & 0xfffffffffffULL; + h1 += c; + + input += POLY1305_BLOCK_SIZE; + len -= POLY1305_BLOCK_SIZE; + } + + st->h[0] = h0; + st->h[1] = h1; + st->h[2] = h2; +} + +static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4]) +{ + struct poly1305_internal *st = (struct poly1305_internal *)ctx; + u64 h0, h1, h2, c; + u64 g0, g1, g2; + u64 t0, t1; + + /* fully carry h */ + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + + c = h1 >> 44; + h1 &= 0xfffffffffffULL; + h2 += c; + c = h2 >> 42; + h2 &= 0x3ffffffffffULL; + h0 += c * 5; + c = h0 >> 44; + h0 &= 0xfffffffffffULL; + h1 += c; + c = h1 >> 44; + h1 &= 0xfffffffffffULL; + h2 += c; + c = h2 >> 42; + h2 &= 0x3ffffffffffULL; + h0 += c * 5; + c = h0 >> 44; + h0 &= 0xfffffffffffULL; + h1 += c; + + /* compute h + -p */ + g0 = h0 + 5; + c = g0 >> 44; + g0 &= 0xfffffffffffULL; + g1 = h1 + c; + c = g1 >> 44; + g1 &= 0xfffffffffffULL; + g2 = h2 + c - (1ULL << 42); + + /* select h if h < p, or h + -p if h >= p */ + c = (g2 >> ((sizeof(u64) * 8) - 1)) - 1; + g0 &= c; + g1 &= c; + g2 &= c; + c = ~c; + h0 = (h0 & c) | g0; + h1 = (h1 & c) | g1; + h2 = (h2 & c) | g2; + + /* h = (h + nonce) */ + t0 = ((u64)nonce[1] << 32) | nonce[0]; + t1 = ((u64)nonce[3] << 32) | nonce[2]; + + h0 += t0 & 0xfffffffffffULL; + c = h0 >> 44; + h0 &= 0xfffffffffffULL; + h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL) + c; + c = h1 >> 44; + h1 &= 0xfffffffffffULL; + h2 += (((t1 >> 24)) & 0x3ffffffffffULL) + c; + h2 &= 0x3ffffffffffULL; + + /* mac = h % (2^128) */ + h0 = h0 | (h1 << 44); + h1 = (h1 >> 20) | (h2 << 24); + + put_unaligned_le64(h0, &mac[0]); + put_unaligned_le64(h1, &mac[8]); +} diff --git a/net/wireguard/crypto/zinc/poly1305/poly1305-mips-glue.c b/net/wireguard/crypto/zinc/poly1305/poly1305-mips-glue.c new file mode 100644 index 000000000000..a540e9c4eee8 --- /dev/null +++ b/net/wireguard/crypto/zinc/poly1305/poly1305-mips-glue.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +asmlinkage void poly1305_init_mips(void *ctx, const u8 key[16]); +asmlinkage void poly1305_blocks_mips(void *ctx, const u8 *inp, const size_t len, + const u32 padbit); +asmlinkage void poly1305_emit_mips(void *ctx, u8 mac[16], const u32 nonce[4]); + +static bool *const poly1305_nobs[] __initconst = { }; +static void __init poly1305_fpu_init(void) +{ +} + +static inline bool poly1305_init_arch(void *ctx, + const u8 key[POLY1305_KEY_SIZE]) +{ + poly1305_init_mips(ctx, key); + return true; +} + +static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp, + size_t len, const u32 padbit, + simd_context_t *simd_context) +{ + poly1305_blocks_mips(ctx, inp, len, padbit); + return true; +} + +static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE], + const u32 nonce[4], + simd_context_t *simd_context) +{ + poly1305_emit_mips(ctx, mac, nonce); + return true; +} diff --git a/net/wireguard/crypto/zinc/poly1305/poly1305-mips.S b/net/wireguard/crypto/zinc/poly1305/poly1305-mips.S new file mode 100644 index 000000000000..4291c156815b --- /dev/null +++ b/net/wireguard/crypto/zinc/poly1305/poly1305-mips.S @@ -0,0 +1,407 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2016-2018 René van Dorst All Rights Reserved. + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define MSB 0 +#define LSB 3 +#else +#define MSB 3 +#define LSB 0 +#endif + +#define POLY1305_BLOCK_SIZE 16 +.text +#define H0 $t0 +#define H1 $t1 +#define H2 $t2 +#define H3 $t3 +#define H4 $t4 + +#define R0 $t5 +#define R1 $t6 +#define R2 $t7 +#define R3 $t8 + +#define O0 $s0 +#define O1 $s4 +#define O2 $v1 +#define O3 $t9 +#define O4 $s5 + +#define S1 $s1 +#define S2 $s2 +#define S3 $s3 + +#define SC $at +#define CA $v0 + +/* Input arguments */ +#define poly $a0 +#define src $a1 +#define srclen $a2 +#define hibit $a3 + +/* Location in the opaque buffer + * R[0..3], CA, H[0..4] + */ +#define PTR_POLY1305_R(n) ( 0 + (n*4)) ## ($a0) +#define PTR_POLY1305_CA (16 ) ## ($a0) +#define PTR_POLY1305_H(n) (20 + (n*4)) ## ($a0) + +#define POLY1305_BLOCK_SIZE 16 +#define POLY1305_STACK_SIZE 32 + +.set noat +.align 4 +.globl poly1305_blocks_mips +.ent poly1305_blocks_mips +poly1305_blocks_mips: + .frame $sp, POLY1305_STACK_SIZE, $ra + /* srclen &= 0xFFFFFFF0 */ + ins srclen, $zero, 0, 4 + + addiu $sp, -(POLY1305_STACK_SIZE) + + /* check srclen >= 16 bytes */ + beqz srclen, .Lpoly1305_blocks_mips_end + + /* Calculate last round based on src address pointer. + * last round src ptr (srclen) = src + (srclen & 0xFFFFFFF0) + */ + addu srclen, src + + lw R0, PTR_POLY1305_R(0) + lw R1, PTR_POLY1305_R(1) + lw R2, PTR_POLY1305_R(2) + lw R3, PTR_POLY1305_R(3) + + /* store the used save registers. */ + sw $s0, 0($sp) + sw $s1, 4($sp) + sw $s2, 8($sp) + sw $s3, 12($sp) + sw $s4, 16($sp) + sw $s5, 20($sp) + + /* load Hx and Carry */ + lw CA, PTR_POLY1305_CA + lw H0, PTR_POLY1305_H(0) + lw H1, PTR_POLY1305_H(1) + lw H2, PTR_POLY1305_H(2) + lw H3, PTR_POLY1305_H(3) + lw H4, PTR_POLY1305_H(4) + + /* Sx = Rx + (Rx >> 2) */ + srl S1, R1, 2 + srl S2, R2, 2 + srl S3, R3, 2 + addu S1, R1 + addu S2, R2 + addu S3, R3 + + addiu SC, $zero, 1 + +.Lpoly1305_loop: + lwl O0, 0+MSB(src) + lwl O1, 4+MSB(src) + lwl O2, 8+MSB(src) + lwl O3,12+MSB(src) + lwr O0, 0+LSB(src) + lwr O1, 4+LSB(src) + lwr O2, 8+LSB(src) + lwr O3,12+LSB(src) + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + wsbh O0 + wsbh O1 + wsbh O2 + wsbh O3 + rotr O0, 16 + rotr O1, 16 + rotr O2, 16 + rotr O3, 16 +#endif + + /* h0 = (u32)(d0 = (u64)h0 + inp[0] + c 'Carry_previous cycle'); */ + addu H0, CA + sltu CA, H0, CA + addu O0, H0 + sltu H0, O0, H0 + addu CA, H0 + + /* h1 = (u32)(d1 = (u64)h1 + (d0 >> 32) + inp[4]); */ + addu H1, CA + sltu CA, H1, CA + addu O1, H1 + sltu H1, O1, H1 + addu CA, H1 + + /* h2 = (u32)(d2 = (u64)h2 + (d1 >> 32) + inp[8]); */ + addu H2, CA + sltu CA, H2, CA + addu O2, H2 + sltu H2, O2, H2 + addu CA, H2 + + /* h3 = (u32)(d3 = (u64)h3 + (d2 >> 32) + inp[12]); */ + addu H3, CA + sltu CA, H3, CA + addu O3, H3 + sltu H3, O3, H3 + addu CA, H3 + + /* h4 += (u32)(d3 >> 32) + padbit; */ + addu H4, hibit + addu O4, H4, CA + + /* D0 */ + multu O0, R0 + maddu O1, S3 + maddu O2, S2 + maddu O3, S1 + mfhi CA + mflo H0 + + /* D1 */ + multu O0, R1 + maddu O1, R0 + maddu O2, S3 + maddu O3, S2 + maddu O4, S1 + maddu CA, SC + mfhi CA + mflo H1 + + /* D2 */ + multu O0, R2 + maddu O1, R1 + maddu O2, R0 + maddu O3, S3 + maddu O4, S2 + maddu CA, SC + mfhi CA + mflo H2 + + /* D4 */ + mul H4, O4, R0 + + /* D3 */ + multu O0, R3 + maddu O1, R2 + maddu O2, R1 + maddu O3, R0 + maddu O4, S3 + maddu CA, SC + mfhi CA + mflo H3 + + addiu src, POLY1305_BLOCK_SIZE + + /* h4 += (u32)(d3 >> 32); */ + addu O4, H4, CA + /* h4 &= 3 */ + andi H4, O4, 3 + /* c = (h4 >> 2) + (h4 & ~3U); */ + srl CA, O4, 2 + ins O4, $zero, 0, 2 + + addu CA, O4 + + /* able to do a 16 byte block. */ + bne src, srclen, .Lpoly1305_loop + + /* restore the used save registers. */ + lw $s0, 0($sp) + lw $s1, 4($sp) + lw $s2, 8($sp) + lw $s3, 12($sp) + lw $s4, 16($sp) + lw $s5, 20($sp) + + /* store Hx and Carry */ + sw CA, PTR_POLY1305_CA + sw H0, PTR_POLY1305_H(0) + sw H1, PTR_POLY1305_H(1) + sw H2, PTR_POLY1305_H(2) + sw H3, PTR_POLY1305_H(3) + sw H4, PTR_POLY1305_H(4) + +.Lpoly1305_blocks_mips_end: + addiu $sp, POLY1305_STACK_SIZE + + /* Jump Back */ + jr $ra +.end poly1305_blocks_mips +.set at + +/* Input arguments CTX=$a0, MAC=$a1, NONCE=$a2 */ +#define MAC $a1 +#define NONCE $a2 + +#define G0 $t5 +#define G1 $t6 +#define G2 $t7 +#define G3 $t8 +#define G4 $t9 + +.set noat +.align 4 +.globl poly1305_emit_mips +.ent poly1305_emit_mips +poly1305_emit_mips: + /* load Hx and Carry */ + lw CA, PTR_POLY1305_CA + lw H0, PTR_POLY1305_H(0) + lw H1, PTR_POLY1305_H(1) + lw H2, PTR_POLY1305_H(2) + lw H3, PTR_POLY1305_H(3) + lw H4, PTR_POLY1305_H(4) + + /* Add left over carry */ + addu H0, CA + sltu CA, H0, CA + addu H1, CA + sltu CA, H1, CA + addu H2, CA + sltu CA, H2, CA + addu H3, CA + sltu CA, H3, CA + addu H4, CA + + /* compare to modulus by computing h + -p */ + addiu G0, H0, 5 + sltu CA, G0, H0 + addu G1, H1, CA + sltu CA, G1, H1 + addu G2, H2, CA + sltu CA, G2, H2 + addu G3, H3, CA + sltu CA, G3, H3 + addu G4, H4, CA + + srl SC, G4, 2 + + /* if there was carry into 131st bit, h3:h0 = g3:g0 */ + movn H0, G0, SC + movn H1, G1, SC + movn H2, G2, SC + movn H3, G3, SC + + lwl G0, 0+MSB(NONCE) + lwl G1, 4+MSB(NONCE) + lwl G2, 8+MSB(NONCE) + lwl G3,12+MSB(NONCE) + lwr G0, 0+LSB(NONCE) + lwr G1, 4+LSB(NONCE) + lwr G2, 8+LSB(NONCE) + lwr G3,12+LSB(NONCE) + + /* mac = (h + nonce) % (2^128) */ + addu H0, G0 + sltu CA, H0, G0 + + /* H1 */ + addu H1, CA + sltu CA, H1, CA + addu H1, G1 + sltu G1, H1, G1 + addu CA, G1 + + /* H2 */ + addu H2, CA + sltu CA, H2, CA + addu H2, G2 + sltu G2, H2, G2 + addu CA, G2 + + /* H3 */ + addu H3, CA + addu H3, G3 + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + wsbh H0 + wsbh H1 + wsbh H2 + wsbh H3 + rotr H0, 16 + rotr H1, 16 + rotr H2, 16 + rotr H3, 16 +#endif + + /* store MAC */ + swl H0, 0+MSB(MAC) + swl H1, 4+MSB(MAC) + swl H2, 8+MSB(MAC) + swl H3,12+MSB(MAC) + swr H0, 0+LSB(MAC) + swr H1, 4+LSB(MAC) + swr H2, 8+LSB(MAC) + swr H3,12+LSB(MAC) + + jr $ra +.end poly1305_emit_mips + +#define PR0 $t0 +#define PR1 $t1 +#define PR2 $t2 +#define PR3 $t3 +#define PT0 $t4 + +/* Input arguments CTX=$a0, KEY=$a1 */ + +.align 4 +.globl poly1305_init_mips +.ent poly1305_init_mips +poly1305_init_mips: + lwl PR0, 0+MSB($a1) + lwl PR1, 4+MSB($a1) + lwl PR2, 8+MSB($a1) + lwl PR3,12+MSB($a1) + lwr PR0, 0+LSB($a1) + lwr PR1, 4+LSB($a1) + lwr PR2, 8+LSB($a1) + lwr PR3,12+LSB($a1) + + /* store Hx and Carry */ + sw $zero, PTR_POLY1305_CA + sw $zero, PTR_POLY1305_H(0) + sw $zero, PTR_POLY1305_H(1) + sw $zero, PTR_POLY1305_H(2) + sw $zero, PTR_POLY1305_H(3) + sw $zero, PTR_POLY1305_H(4) + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + wsbh PR0 + wsbh PR1 + wsbh PR2 + wsbh PR3 + rotr PR0, 16 + rotr PR1, 16 + rotr PR2, 16 + rotr PR3, 16 +#endif + + lui PT0, 0x0FFF + ori PT0, 0xFFFC + + /* AND 0x0fffffff; */ + ext PR0, PR0, 0, (32-4) + + /* AND 0x0ffffffc; */ + and PR1, PT0 + and PR2, PT0 + and PR3, PT0 + + /* store Rx */ + sw PR0, PTR_POLY1305_R(0) + sw PR1, PTR_POLY1305_R(1) + sw PR2, PTR_POLY1305_R(2) + sw PR3, PTR_POLY1305_R(3) + + /* Jump Back */ + jr $ra +.end poly1305_init_mips diff --git a/net/wireguard/crypto/zinc/poly1305/poly1305-mips64.pl b/net/wireguard/crypto/zinc/poly1305/poly1305-mips64.pl new file mode 100644 index 000000000000..d30a03d79177 --- /dev/null +++ b/net/wireguard/crypto/zinc/poly1305/poly1305-mips64.pl @@ -0,0 +1,467 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# +# This code is taken from the OpenSSL project but the author, Andy Polyakov, +# has relicensed it under the licenses specified in the SPDX header above. +# The original headers, including the original license headers, are +# included below for completeness. +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# Poly1305 hash for MIPS64. +# +# May 2016 +# +# Numbers are cycles per processed byte with poly1305_blocks alone. +# +# IALU/gcc +# R1x000 5.64/+120% (big-endian) +# Octeon II 3.80/+280% (little-endian) + +###################################################################### +# There is a number of MIPS ABI in use, O32 and N32/64 are most +# widely used. Then there is a new contender: NUBI. It appears that if +# one picks the latter, it's possible to arrange code in ABI neutral +# manner. Therefore let's stick to NUBI register layout: +# +($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); +($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); +($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); +($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); +# +# The return value is placed in $a0. Following coding rules facilitate +# interoperability: +# +# - never ever touch $tp, "thread pointer", former $gp [o32 can be +# excluded from the rule, because it's specified volatile]; +# - copy return value to $t0, former $v0 [or to $a0 if you're adapting +# old code]; +# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; +# +# For reference here is register layout for N32/64 MIPS ABIs: +# +# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); +# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); +# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); +# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); +# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); +# +# +# +###################################################################### + +$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64 + +die "MIPS64 only" unless ($flavour =~ /64|n32/i); + +$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0; +$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000"; + +($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); +($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1); + +$code.=<<___; +#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\ + defined(_MIPS_ARCH_MIPS64R6)) \\ + && !defined(_MIPS_ARCH_MIPS64R2) +# define _MIPS_ARCH_MIPS64R2 +#endif + +#if defined(_MIPS_ARCH_MIPS64R6) +# define dmultu(rs,rt) +# define mflo(rd,rs,rt) dmulu rd,rs,rt +# define mfhi(rd,rs,rt) dmuhu rd,rs,rt +#else +# define dmultu(rs,rt) dmultu rs,rt +# define mflo(rd,rs,rt) mflo rd +# define mfhi(rd,rs,rt) mfhi rd +#endif + +#ifdef __KERNEL__ +# define poly1305_init poly1305_init_mips +# define poly1305_blocks poly1305_blocks_mips +# define poly1305_emit poly1305_emit_mips +#endif + +#if defined(__MIPSEB__) && !defined(MIPSEB) +# define MIPSEB +#endif + +#ifdef MIPSEB +# define MSB 0 +# define LSB 7 +#else +# define MSB 7 +# define LSB 0 +#endif + +.text +.set noat +.set noreorder + +.align 5 +.globl poly1305_init +.ent poly1305_init +poly1305_init: + .frame $sp,0,$ra + .set reorder + + sd $zero,0($ctx) + sd $zero,8($ctx) + sd $zero,16($ctx) + + beqz $inp,.Lno_key + +#if defined(_MIPS_ARCH_MIPS64R6) + ld $in0,0($inp) + ld $in1,8($inp) +#else + ldl $in0,0+MSB($inp) + ldl $in1,8+MSB($inp) + ldr $in0,0+LSB($inp) + ldr $in1,8+LSB($inp) +#endif +#ifdef MIPSEB +# if defined(_MIPS_ARCH_MIPS64R2) + dsbh $in0,$in0 # byte swap + dsbh $in1,$in1 + dshd $in0,$in0 + dshd $in1,$in1 +# else + ori $tmp0,$zero,0xFF + dsll $tmp2,$tmp0,32 + or $tmp0,$tmp2 # 0x000000FF000000FF + + and $tmp1,$in0,$tmp0 # byte swap + and $tmp3,$in1,$tmp0 + dsrl $tmp2,$in0,24 + dsrl $tmp4,$in1,24 + dsll $tmp1,24 + dsll $tmp3,24 + and $tmp2,$tmp0 + and $tmp4,$tmp0 + dsll $tmp0,8 # 0x0000FF000000FF00 + or $tmp1,$tmp2 + or $tmp3,$tmp4 + and $tmp2,$in0,$tmp0 + and $tmp4,$in1,$tmp0 + dsrl $in0,8 + dsrl $in1,8 + dsll $tmp2,8 + dsll $tmp4,8 + and $in0,$tmp0 + and $in1,$tmp0 + or $tmp1,$tmp2 + or $tmp3,$tmp4 + or $in0,$tmp1 + or $in1,$tmp3 + dsrl $tmp1,$in0,32 + dsrl $tmp3,$in1,32 + dsll $in0,32 + dsll $in1,32 + or $in0,$tmp1 + or $in1,$tmp3 +# endif +#endif + li $tmp0,1 + dsll $tmp0,32 + daddiu $tmp0,-63 + dsll $tmp0,28 + daddiu $tmp0,-1 # 0ffffffc0fffffff + + and $in0,$tmp0 + daddiu $tmp0,-3 # 0ffffffc0ffffffc + and $in1,$tmp0 + + sd $in0,24($ctx) + dsrl $tmp0,$in1,2 + sd $in1,32($ctx) + daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2) + sd $tmp0,40($ctx) + +.Lno_key: + li $v0,0 # return 0 + jr $ra +.end poly1305_init +___ +{ +my ($h0,$h1,$h2,$r0,$r1,$s1,$d0,$d1,$d2) = + ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2); + +$code.=<<___; +.align 5 +.globl poly1305_blocks +.ent poly1305_blocks +poly1305_blocks: + .set noreorder + dsrl $len,4 # number of complete blocks + bnez $len,poly1305_blocks_internal + nop + jr $ra + nop +.end poly1305_blocks + +.align 5 +.ent poly1305_blocks_internal +poly1305_blocks_internal: + .frame $sp,6*8,$ra + .mask $SAVED_REGS_MASK,-8 + .set noreorder + dsubu $sp,6*8 + sd $s5,40($sp) + sd $s4,32($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue + sd $s3,24($sp) + sd $s2,16($sp) + sd $s1,8($sp) + sd $s0,0($sp) +___ +$code.=<<___; + .set reorder + + ld $h0,0($ctx) # load hash value + ld $h1,8($ctx) + ld $h2,16($ctx) + + ld $r0,24($ctx) # load key + ld $r1,32($ctx) + ld $s1,40($ctx) + +.Loop: +#if defined(_MIPS_ARCH_MIPS64R6) + ld $in0,0($inp) # load input + ld $in1,8($inp) +#else + ldl $in0,0+MSB($inp) # load input + ldl $in1,8+MSB($inp) + ldr $in0,0+LSB($inp) + ldr $in1,8+LSB($inp) +#endif + daddiu $len,-1 + daddiu $inp,16 +#ifdef MIPSEB +# if defined(_MIPS_ARCH_MIPS64R2) + dsbh $in0,$in0 # byte swap + dsbh $in1,$in1 + dshd $in0,$in0 + dshd $in1,$in1 +# else + ori $tmp0,$zero,0xFF + dsll $tmp2,$tmp0,32 + or $tmp0,$tmp2 # 0x000000FF000000FF + + and $tmp1,$in0,$tmp0 # byte swap + and $tmp3,$in1,$tmp0 + dsrl $tmp2,$in0,24 + dsrl $tmp4,$in1,24 + dsll $tmp1,24 + dsll $tmp3,24 + and $tmp2,$tmp0 + and $tmp4,$tmp0 + dsll $tmp0,8 # 0x0000FF000000FF00 + or $tmp1,$tmp2 + or $tmp3,$tmp4 + and $tmp2,$in0,$tmp0 + and $tmp4,$in1,$tmp0 + dsrl $in0,8 + dsrl $in1,8 + dsll $tmp2,8 + dsll $tmp4,8 + and $in0,$tmp0 + and $in1,$tmp0 + or $tmp1,$tmp2 + or $tmp3,$tmp4 + or $in0,$tmp1 + or $in1,$tmp3 + dsrl $tmp1,$in0,32 + dsrl $tmp3,$in1,32 + dsll $in0,32 + dsll $in1,32 + or $in0,$tmp1 + or $in1,$tmp3 +# endif +#endif + daddu $h0,$in0 # accumulate input + daddu $h1,$in1 + sltu $tmp0,$h0,$in0 + sltu $tmp1,$h1,$in1 + daddu $h1,$tmp0 + + dmultu ($r0,$h0) # h0*r0 + daddu $h2,$padbit + sltu $tmp0,$h1,$tmp0 + mflo ($d0,$r0,$h0) + mfhi ($d1,$r0,$h0) + + dmultu ($s1,$h1) # h1*5*r1 + daddu $tmp0,$tmp1 + daddu $h2,$tmp0 + mflo ($tmp0,$s1,$h1) + mfhi ($tmp1,$s1,$h1) + + dmultu ($r1,$h0) # h0*r1 + daddu $d0,$tmp0 + daddu $d1,$tmp1 + mflo ($tmp2,$r1,$h0) + mfhi ($d2,$r1,$h0) + sltu $tmp0,$d0,$tmp0 + daddu $d1,$tmp0 + + dmultu ($r0,$h1) # h1*r0 + daddu $d1,$tmp2 + sltu $tmp2,$d1,$tmp2 + mflo ($tmp0,$r0,$h1) + mfhi ($tmp1,$r0,$h1) + daddu $d2,$tmp2 + + dmultu ($s1,$h2) # h2*5*r1 + daddu $d1,$tmp0 + daddu $d2,$tmp1 + mflo ($tmp2,$s1,$h2) + + dmultu ($r0,$h2) # h2*r0 + sltu $tmp0,$d1,$tmp0 + daddu $d2,$tmp0 + mflo ($tmp3,$r0,$h2) + + daddu $d1,$tmp2 + daddu $d2,$tmp3 + sltu $tmp2,$d1,$tmp2 + daddu $d2,$tmp2 + + li $tmp0,-4 # final reduction + and $tmp0,$d2 + dsrl $tmp1,$d2,2 + andi $h2,$d2,3 + daddu $tmp0,$tmp1 + daddu $h0,$d0,$tmp0 + sltu $tmp0,$h0,$tmp0 + daddu $h1,$d1,$tmp0 + sltu $tmp0,$h1,$tmp0 + daddu $h2,$h2,$tmp0 + + bnez $len,.Loop + + sd $h0,0($ctx) # store hash value + sd $h1,8($ctx) + sd $h2,16($ctx) + + .set noreorder + ld $s5,40($sp) # epilogue + ld $s4,32($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue + ld $s3,24($sp) + ld $s2,16($sp) + ld $s1,8($sp) + ld $s0,0($sp) +___ +$code.=<<___; + jr $ra + daddu $sp,6*8 +.end poly1305_blocks_internal +___ +} +{ +my ($ctx,$mac,$nonce) = ($a0,$a1,$a2); + +$code.=<<___; +.align 5 +.globl poly1305_emit +.ent poly1305_emit +poly1305_emit: + .frame $sp,0,$ra + .set reorder + + ld $tmp0,0($ctx) + ld $tmp1,8($ctx) + ld $tmp2,16($ctx) + + daddiu $in0,$tmp0,5 # compare to modulus + sltiu $tmp3,$in0,5 + daddu $in1,$tmp1,$tmp3 + sltu $tmp3,$in1,$tmp3 + daddu $tmp2,$tmp2,$tmp3 + + dsrl $tmp2,2 # see if it carried/borrowed + dsubu $tmp2,$zero,$tmp2 + nor $tmp3,$zero,$tmp2 + + and $in0,$tmp2 + and $tmp0,$tmp3 + and $in1,$tmp2 + and $tmp1,$tmp3 + or $in0,$tmp0 + or $in1,$tmp1 + + lwu $tmp0,0($nonce) # load nonce + lwu $tmp1,4($nonce) + lwu $tmp2,8($nonce) + lwu $tmp3,12($nonce) + dsll $tmp1,32 + dsll $tmp3,32 + or $tmp0,$tmp1 + or $tmp2,$tmp3 + + daddu $in0,$tmp0 # accumulate nonce + daddu $in1,$tmp2 + sltu $tmp0,$in0,$tmp0 + daddu $in1,$tmp0 + + dsrl $tmp0,$in0,8 # write mac value + dsrl $tmp1,$in0,16 + dsrl $tmp2,$in0,24 + sb $in0,0($mac) + dsrl $tmp3,$in0,32 + sb $tmp0,1($mac) + dsrl $tmp0,$in0,40 + sb $tmp1,2($mac) + dsrl $tmp1,$in0,48 + sb $tmp2,3($mac) + dsrl $tmp2,$in0,56 + sb $tmp3,4($mac) + dsrl $tmp3,$in1,8 + sb $tmp0,5($mac) + dsrl $tmp0,$in1,16 + sb $tmp1,6($mac) + dsrl $tmp1,$in1,24 + sb $tmp2,7($mac) + + sb $in1,8($mac) + dsrl $tmp2,$in1,32 + sb $tmp3,9($mac) + dsrl $tmp3,$in1,40 + sb $tmp0,10($mac) + dsrl $tmp0,$in1,48 + sb $tmp1,11($mac) + dsrl $tmp1,$in1,56 + sb $tmp2,12($mac) + sb $tmp3,13($mac) + sb $tmp0,14($mac) + sb $tmp1,15($mac) + + jr $ra +.end poly1305_emit +.rdata +.align 2 +___ +} + +open SELF,$0; +while() { + next if (/^#!/); + last if (!s/^#/\/\// and !/^$/); + print; +} +close SELF; + +$output=pop and open STDOUT,">$output"; +print $code; +close STDOUT; + diff --git a/net/wireguard/crypto/zinc/poly1305/poly1305-x86_64-glue.c b/net/wireguard/crypto/zinc/poly1305/poly1305-x86_64-glue.c new file mode 100644 index 000000000000..ce48a42f7654 --- /dev/null +++ b/net/wireguard/crypto/zinc/poly1305/poly1305-x86_64-glue.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include +#include +#include + +asmlinkage void poly1305_init_x86_64(void *ctx, + const u8 key[POLY1305_KEY_SIZE]); +asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp, + const size_t len, const u32 padbit); +asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_MAC_SIZE], + const u32 nonce[4]); +asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_MAC_SIZE], + const u32 nonce[4]); +asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len, + const u32 padbit); +asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len, + const u32 padbit); +asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp, + const size_t len, const u32 padbit); + +static bool poly1305_use_avx __ro_after_init; +static bool poly1305_use_avx2 __ro_after_init; +static bool poly1305_use_avx512 __ro_after_init; +static bool *const poly1305_nobs[] __initconst = { + &poly1305_use_avx, &poly1305_use_avx2, &poly1305_use_avx512 }; + +static void __init poly1305_fpu_init(void) +{ + poly1305_use_avx = + boot_cpu_has(X86_FEATURE_AVX) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); + poly1305_use_avx2 = + boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); +#ifndef COMPAT_CANNOT_USE_AVX512 + poly1305_use_avx512 = + boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_AVX512F) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | + XFEATURE_MASK_AVX512, NULL) && + /* Skylake downclocks unacceptably much when using zmm. */ + boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X; +#endif +} + +static inline bool poly1305_init_arch(void *ctx, + const u8 key[POLY1305_KEY_SIZE]) +{ + poly1305_init_x86_64(ctx, key); + return true; +} + +struct poly1305_arch_internal { + union { + struct { + u32 h[5]; + u32 is_base2_26; + }; + u64 hs[3]; + }; + u64 r[2]; + u64 pad; + struct { u32 r2, r1, r4, r3; } rn[9]; +}; + +/* The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit + * the unfortunate situation of using AVX and then having to go back to scalar + * -- because the user is silly and has called the update function from two + * separate contexts -- then we need to convert back to the original base before + * proceeding. It is possible to reason that the initial reduction below is + * sufficient given the implementation invariants. However, for an avoidance of + * doubt and because this is not performance critical, we do the full reduction + * anyway. + */ +static void convert_to_base2_64(void *ctx) +{ + struct poly1305_arch_internal *state = ctx; + u32 cy; + + if (!state->is_base2_26) + return; + + cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy; + cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy; + cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy; + cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy; + state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0]; + state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12); + state->hs[2] = state->h[4] >> 24; +#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1)) + cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL); + state->hs[2] &= 3; + state->hs[0] += cy; + state->hs[1] += (cy = ULT(state->hs[0], cy)); + state->hs[2] += ULT(state->hs[1], cy); +#undef ULT + state->is_base2_26 = 0; +} + +static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp, + size_t len, const u32 padbit, + simd_context_t *simd_context) +{ + struct poly1305_arch_internal *state = ctx; + + /* SIMD disables preemption, so relax after processing each page. */ + BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE || + PAGE_SIZE % POLY1305_BLOCK_SIZE); + + if (!IS_ENABLED(CONFIG_AS_AVX) || !poly1305_use_avx || + (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) || + !simd_use(simd_context)) { + convert_to_base2_64(ctx); + poly1305_blocks_x86_64(ctx, inp, len, padbit); + return true; + } + + for (;;) { + const size_t bytes = min_t(size_t, len, PAGE_SIZE); + + if (IS_ENABLED(CONFIG_AS_AVX512) && poly1305_use_avx512) + poly1305_blocks_avx512(ctx, inp, bytes, padbit); + else if (IS_ENABLED(CONFIG_AS_AVX2) && poly1305_use_avx2) + poly1305_blocks_avx2(ctx, inp, bytes, padbit); + else + poly1305_blocks_avx(ctx, inp, bytes, padbit); + len -= bytes; + if (!len) + break; + inp += bytes; + simd_relax(simd_context); + } + + return true; +} + +static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE], + const u32 nonce[4], + simd_context_t *simd_context) +{ + struct poly1305_arch_internal *state = ctx; + + if (!IS_ENABLED(CONFIG_AS_AVX) || !poly1305_use_avx || + !state->is_base2_26 || !simd_use(simd_context)) { + convert_to_base2_64(ctx); + poly1305_emit_x86_64(ctx, mac, nonce); + } else + poly1305_emit_avx(ctx, mac, nonce); + return true; +} diff --git a/net/wireguard/crypto/zinc/poly1305/poly1305-x86_64.pl b/net/wireguard/crypto/zinc/poly1305/poly1305-x86_64.pl new file mode 100644 index 000000000000..f994855cdbe2 --- /dev/null +++ b/net/wireguard/crypto/zinc/poly1305/poly1305-x86_64.pl @@ -0,0 +1,4266 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# +# Copyright (C) 2017-2018 Samuel Neves . All Rights Reserved. +# Copyright (C) 2017-2019 Jason A. Donenfeld . All Rights Reserved. +# Copyright (C) 2006-2017 CRYPTOGAMS by . All Rights Reserved. +# +# This code is taken from the OpenSSL project but the author, Andy Polyakov, +# has relicensed it under the licenses specified in the SPDX header above. +# The original headers, including the original license headers, are +# included below for completeness. +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# This module implements Poly1305 hash for x86_64. +# +# March 2015 +# +# Initial release. +# +# December 2016 +# +# Add AVX512F+VL+BW code path. +# +# November 2017 +# +# Convert AVX512F+VL+BW code path to pure AVX512F, so that it can be +# executed even on Knights Landing. Trigger for modification was +# observation that AVX512 code paths can negatively affect overall +# Skylake-X system performance. Since we are likely to suppress +# AVX512F capability flag [at least on Skylake-X], conversion serves +# as kind of "investment protection". Note that next *lake processor, +# Cannonlake, has AVX512IFMA code path to execute... +# +# Numbers are cycles per processed byte with poly1305_blocks alone, +# measured with rdtsc at fixed clock frequency. +# +# IALU/gcc-4.8(*) AVX(**) AVX2 AVX-512 +# P4 4.46/+120% - +# Core 2 2.41/+90% - +# Westmere 1.88/+120% - +# Sandy Bridge 1.39/+140% 1.10 +# Haswell 1.14/+175% 1.11 0.65 +# Skylake[-X] 1.13/+120% 0.96 0.51 [0.35] +# Silvermont 2.83/+95% - +# Knights L 3.60/? 1.65 1.10 0.41(***) +# Goldmont 1.70/+180% - +# VIA Nano 1.82/+150% - +# Sledgehammer 1.38/+160% - +# Bulldozer 2.30/+130% 0.97 +# Ryzen 1.15/+200% 1.08 1.18 +# +# (*) improvement coefficients relative to clang are more modest and +# are ~50% on most processors, in both cases we are comparing to +# __int128 code; +# (**) SSE2 implementation was attempted, but among non-AVX processors +# it was faster than integer-only code only on older Intel P4 and +# Core processors, 50-30%, less newer processor is, but slower on +# contemporary ones, for example almost 2x slower on Atom, and as +# former are naturally disappearing, SSE2 is deemed unnecessary; +# (***) strangely enough performance seems to vary from core to core, +# listed result is best case; + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); +$kernel=0; $kernel=1 if (!$flavour && !$output); + +if (!$kernel) { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or + die "can't locate x86_64-xlate.pl"; + + open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; + *STDOUT=*OUT; + + if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.19) + ($1>=2.22) + ($1>=2.25); + } + + if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)(?:\.([0-9]+))?/) { + $avx = ($1>=2.09) + ($1>=2.10) + ($1>=2.12); + $avx += 1 if ($1==2.11 && $2>=8); + } + + if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && + `ml64 2>&1` =~ /Version ([0-9]+)\./) { + $avx = ($1>=10) + ($1>=11); + } + + if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) { + $avx = ($2>=3.0) + ($2>3.0); + } +} else { + $avx = 4; # The kernel uses ifdefs for this. +} + +sub declare_function() { + my ($name, $align, $nargs) = @_; + if($kernel) { + $code .= ".align $align\n"; + $code .= "SYM_FUNC_START($name)\n"; + $code .= ".L$name:\n"; + } else { + $code .= ".globl $name\n"; + $code .= ".type $name,\@function,$nargs\n"; + $code .= ".align $align\n"; + $code .= "$name:\n"; + } +} + +sub end_function() { + my ($name) = @_; + if($kernel) { + $code .= "SYM_FUNC_END($name)\n"; + } else { + $code .= ".size $name,.-$name\n"; + } +} + +$code.=<<___ if $kernel; +#include +___ + +if ($avx) { +$code.=<<___ if $kernel; +.section .rodata +___ +$code.=<<___; +.align 64 +.Lconst: +.Lmask24: +.long 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0 +.L129: +.long `1<<24`,0,`1<<24`,0,`1<<24`,0,`1<<24`,0 +.Lmask26: +.long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0 +.Lpermd_avx2: +.long 2,2,2,3,2,0,2,1 +.Lpermd_avx512: +.long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7 + +.L2_44_inp_permd: +.long 0,1,1,2,2,3,7,7 +.L2_44_inp_shift: +.quad 0,12,24,64 +.L2_44_mask: +.quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff +.L2_44_shift_rgt: +.quad 44,44,42,64 +.L2_44_shift_lft: +.quad 8,8,10,64 + +.align 64 +.Lx_mask44: +.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff +.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff +.Lx_mask42: +.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff +.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff +___ +} +$code.=<<___ if (!$kernel); +.asciz "Poly1305 for x86_64, CRYPTOGAMS by " +.align 16 +___ + +my ($ctx,$inp,$len,$padbit)=("%rdi","%rsi","%rdx","%rcx"); +my ($mac,$nonce)=($inp,$len); # *_emit arguments +my ($d1,$d2,$d3, $r0,$r1,$s1)=("%r8","%r9","%rdi","%r11","%r12","%r13"); +my ($h0,$h1,$h2)=("%r14","%rbx","%r10"); + +sub poly1305_iteration { +# input: copy of $r1 in %rax, $h0-$h2, $r0-$r1 +# output: $h0-$h2 *= $r0-$r1 +$code.=<<___; + mulq $h0 # h0*r1 + mov %rax,$d2 + mov $r0,%rax + mov %rdx,$d3 + + mulq $h0 # h0*r0 + mov %rax,$h0 # future $h0 + mov $r0,%rax + mov %rdx,$d1 + + mulq $h1 # h1*r0 + add %rax,$d2 + mov $s1,%rax + adc %rdx,$d3 + + mulq $h1 # h1*s1 + mov $h2,$h1 # borrow $h1 + add %rax,$h0 + adc %rdx,$d1 + + imulq $s1,$h1 # h2*s1 + add $h1,$d2 + mov $d1,$h1 + adc \$0,$d3 + + imulq $r0,$h2 # h2*r0 + add $d2,$h1 + mov \$-4,%rax # mask value + adc $h2,$d3 + + and $d3,%rax # last reduction step + mov $d3,$h2 + shr \$2,$d3 + and \$3,$h2 + add $d3,%rax + add %rax,$h0 + adc \$0,$h1 + adc \$0,$h2 +___ +} + +######################################################################## +# Layout of opaque area is following. +# +# unsigned __int64 h[3]; # current hash value base 2^64 +# unsigned __int64 r[2]; # key value base 2^64 + +$code.=<<___; +.text +___ +$code.=<<___ if (!$kernel); +.extern OPENSSL_ia32cap_P + +.globl poly1305_init_x86_64 +.hidden poly1305_init_x86_64 +.globl poly1305_blocks_x86_64 +.hidden poly1305_blocks_x86_64 +.globl poly1305_emit_x86_64 +.hidden poly1305_emit_x86_64 +___ +&declare_function("poly1305_init_x86_64", 32, 3); +$code.=<<___; + xor %rax,%rax + mov %rax,0($ctx) # initialize hash value + mov %rax,8($ctx) + mov %rax,16($ctx) + + cmp \$0,$inp + je .Lno_key +___ +$code.=<<___ if (!$kernel); + lea poly1305_blocks_x86_64(%rip),%r10 + lea poly1305_emit_x86_64(%rip),%r11 +___ +$code.=<<___ if (!$kernel && $avx); + mov OPENSSL_ia32cap_P+4(%rip),%r9 + lea poly1305_blocks_avx(%rip),%rax + lea poly1305_emit_avx(%rip),%rcx + bt \$`60-32`,%r9 # AVX? + cmovc %rax,%r10 + cmovc %rcx,%r11 +___ +$code.=<<___ if (!$kernel && $avx>1); + lea poly1305_blocks_avx2(%rip),%rax + bt \$`5+32`,%r9 # AVX2? + cmovc %rax,%r10 +___ +$code.=<<___ if (!$kernel && $avx>3); + mov \$`(1<<31|1<<21|1<<16)`,%rax + shr \$32,%r9 + and %rax,%r9 + cmp %rax,%r9 + je .Linit_base2_44 +___ +$code.=<<___; + mov \$0x0ffffffc0fffffff,%rax + mov \$0x0ffffffc0ffffffc,%rcx + and 0($inp),%rax + and 8($inp),%rcx + mov %rax,24($ctx) + mov %rcx,32($ctx) +___ +$code.=<<___ if (!$kernel && $flavour !~ /elf32/); + mov %r10,0(%rdx) + mov %r11,8(%rdx) +___ +$code.=<<___ if (!$kernel && $flavour =~ /elf32/); + mov %r10d,0(%rdx) + mov %r11d,4(%rdx) +___ +$code.=<<___; + mov \$1,%eax +.Lno_key: + ret +___ +&end_function("poly1305_init_x86_64"); + +&declare_function("poly1305_blocks_x86_64", 32, 4); +$code.=<<___; +.cfi_startproc +.Lblocks: + shr \$4,$len + jz .Lno_data # too short + + push %rbx +.cfi_push %rbx + push %r12 +.cfi_push %r12 + push %r13 +.cfi_push %r13 + push %r14 +.cfi_push %r14 + push %r15 +.cfi_push %r15 + push $ctx +.cfi_push $ctx +.Lblocks_body: + + mov $len,%r15 # reassign $len + + mov 24($ctx),$r0 # load r + mov 32($ctx),$s1 + + mov 0($ctx),$h0 # load hash value + mov 8($ctx),$h1 + mov 16($ctx),$h2 + + mov $s1,$r1 + shr \$2,$s1 + mov $r1,%rax + add $r1,$s1 # s1 = r1 + (r1 >> 2) + jmp .Loop + +.align 32 +.Loop: + add 0($inp),$h0 # accumulate input + adc 8($inp),$h1 + lea 16($inp),$inp + adc $padbit,$h2 +___ + + &poly1305_iteration(); + +$code.=<<___; + mov $r1,%rax + dec %r15 # len-=16 + jnz .Loop + + mov 0(%rsp),$ctx +.cfi_restore $ctx + + mov $h0,0($ctx) # store hash value + mov $h1,8($ctx) + mov $h2,16($ctx) + + mov 8(%rsp),%r15 +.cfi_restore %r15 + mov 16(%rsp),%r14 +.cfi_restore %r14 + mov 24(%rsp),%r13 +.cfi_restore %r13 + mov 32(%rsp),%r12 +.cfi_restore %r12 + mov 40(%rsp),%rbx +.cfi_restore %rbx + lea 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lno_data: +.Lblocks_epilogue: + ret +.cfi_endproc +___ +&end_function("poly1305_blocks_x86_64"); + +&declare_function("poly1305_emit_x86_64", 32, 3); +$code.=<<___; +.Lemit: + mov 0($ctx),%r8 # load hash value + mov 8($ctx),%r9 + mov 16($ctx),%r10 + + mov %r8,%rax + add \$5,%r8 # compare to modulus + mov %r9,%rcx + adc \$0,%r9 + adc \$0,%r10 + shr \$2,%r10 # did 130-bit value overflow? + cmovnz %r8,%rax + cmovnz %r9,%rcx + + add 0($nonce),%rax # accumulate nonce + adc 8($nonce),%rcx + mov %rax,0($mac) # write result + mov %rcx,8($mac) + + ret +___ +&end_function("poly1305_emit_x86_64"); +if ($avx) { + +if($kernel) { + $code .= "#ifdef CONFIG_AS_AVX\n"; +} + +######################################################################## +# Layout of opaque area is following. +# +# unsigned __int32 h[5]; # current hash value base 2^26 +# unsigned __int32 is_base2_26; +# unsigned __int64 r[2]; # key value base 2^64 +# unsigned __int64 pad; +# struct { unsigned __int32 r^2, r^1, r^4, r^3; } r[9]; +# +# where r^n are base 2^26 digits of degrees of multiplier key. There are +# 5 digits, but last four are interleaved with multiples of 5, totalling +# in 9 elements: r0, r1, 5*r1, r2, 5*r2, r3, 5*r3, r4, 5*r4. + +my ($H0,$H1,$H2,$H3,$H4, $T0,$T1,$T2,$T3,$T4, $D0,$D1,$D2,$D3,$D4, $MASK) = + map("%xmm$_",(0..15)); + +$code.=<<___; +.type __poly1305_block,\@abi-omnipotent +.align 32 +__poly1305_block: + push $ctx +___ + &poly1305_iteration(); +$code.=<<___; + pop $ctx + ret +.size __poly1305_block,.-__poly1305_block + +.type __poly1305_init_avx,\@abi-omnipotent +.align 32 +__poly1305_init_avx: + push %rbp + mov %rsp,%rbp + mov $r0,$h0 + mov $r1,$h1 + xor $h2,$h2 + + lea 48+64($ctx),$ctx # size optimization + + mov $r1,%rax + call __poly1305_block # r^2 + + mov \$0x3ffffff,%eax # save interleaved r^2 and r base 2^26 + mov \$0x3ffffff,%edx + mov $h0,$d1 + and $h0#d,%eax + mov $r0,$d2 + and $r0#d,%edx + mov %eax,`16*0+0-64`($ctx) + shr \$26,$d1 + mov %edx,`16*0+4-64`($ctx) + shr \$26,$d2 + + mov \$0x3ffffff,%eax + mov \$0x3ffffff,%edx + and $d1#d,%eax + and $d2#d,%edx + mov %eax,`16*1+0-64`($ctx) + lea (%rax,%rax,4),%eax # *5 + mov %edx,`16*1+4-64`($ctx) + lea (%rdx,%rdx,4),%edx # *5 + mov %eax,`16*2+0-64`($ctx) + shr \$26,$d1 + mov %edx,`16*2+4-64`($ctx) + shr \$26,$d2 + + mov $h1,%rax + mov $r1,%rdx + shl \$12,%rax + shl \$12,%rdx + or $d1,%rax + or $d2,%rdx + and \$0x3ffffff,%eax + and \$0x3ffffff,%edx + mov %eax,`16*3+0-64`($ctx) + lea (%rax,%rax,4),%eax # *5 + mov %edx,`16*3+4-64`($ctx) + lea (%rdx,%rdx,4),%edx # *5 + mov %eax,`16*4+0-64`($ctx) + mov $h1,$d1 + mov %edx,`16*4+4-64`($ctx) + mov $r1,$d2 + + mov \$0x3ffffff,%eax + mov \$0x3ffffff,%edx + shr \$14,$d1 + shr \$14,$d2 + and $d1#d,%eax + and $d2#d,%edx + mov %eax,`16*5+0-64`($ctx) + lea (%rax,%rax,4),%eax # *5 + mov %edx,`16*5+4-64`($ctx) + lea (%rdx,%rdx,4),%edx # *5 + mov %eax,`16*6+0-64`($ctx) + shr \$26,$d1 + mov %edx,`16*6+4-64`($ctx) + shr \$26,$d2 + + mov $h2,%rax + shl \$24,%rax + or %rax,$d1 + mov $d1#d,`16*7+0-64`($ctx) + lea ($d1,$d1,4),$d1 # *5 + mov $d2#d,`16*7+4-64`($ctx) + lea ($d2,$d2,4),$d2 # *5 + mov $d1#d,`16*8+0-64`($ctx) + mov $d2#d,`16*8+4-64`($ctx) + + mov $r1,%rax + call __poly1305_block # r^3 + + mov \$0x3ffffff,%eax # save r^3 base 2^26 + mov $h0,$d1 + and $h0#d,%eax + shr \$26,$d1 + mov %eax,`16*0+12-64`($ctx) + + mov \$0x3ffffff,%edx + and $d1#d,%edx + mov %edx,`16*1+12-64`($ctx) + lea (%rdx,%rdx,4),%edx # *5 + shr \$26,$d1 + mov %edx,`16*2+12-64`($ctx) + + mov $h1,%rax + shl \$12,%rax + or $d1,%rax + and \$0x3ffffff,%eax + mov %eax,`16*3+12-64`($ctx) + lea (%rax,%rax,4),%eax # *5 + mov $h1,$d1 + mov %eax,`16*4+12-64`($ctx) + + mov \$0x3ffffff,%edx + shr \$14,$d1 + and $d1#d,%edx + mov %edx,`16*5+12-64`($ctx) + lea (%rdx,%rdx,4),%edx # *5 + shr \$26,$d1 + mov %edx,`16*6+12-64`($ctx) + + mov $h2,%rax + shl \$24,%rax + or %rax,$d1 + mov $d1#d,`16*7+12-64`($ctx) + lea ($d1,$d1,4),$d1 # *5 + mov $d1#d,`16*8+12-64`($ctx) + + mov $r1,%rax + call __poly1305_block # r^4 + + mov \$0x3ffffff,%eax # save r^4 base 2^26 + mov $h0,$d1 + and $h0#d,%eax + shr \$26,$d1 + mov %eax,`16*0+8-64`($ctx) + + mov \$0x3ffffff,%edx + and $d1#d,%edx + mov %edx,`16*1+8-64`($ctx) + lea (%rdx,%rdx,4),%edx # *5 + shr \$26,$d1 + mov %edx,`16*2+8-64`($ctx) + + mov $h1,%rax + shl \$12,%rax + or $d1,%rax + and \$0x3ffffff,%eax + mov %eax,`16*3+8-64`($ctx) + lea (%rax,%rax,4),%eax # *5 + mov $h1,$d1 + mov %eax,`16*4+8-64`($ctx) + + mov \$0x3ffffff,%edx + shr \$14,$d1 + and $d1#d,%edx + mov %edx,`16*5+8-64`($ctx) + lea (%rdx,%rdx,4),%edx # *5 + shr \$26,$d1 + mov %edx,`16*6+8-64`($ctx) + + mov $h2,%rax + shl \$24,%rax + or %rax,$d1 + mov $d1#d,`16*7+8-64`($ctx) + lea ($d1,$d1,4),$d1 # *5 + mov $d1#d,`16*8+8-64`($ctx) + + lea -48-64($ctx),$ctx # size [de-]optimization + pop %rbp + ret +.size __poly1305_init_avx,.-__poly1305_init_avx +___ + +&declare_function("poly1305_blocks_avx", 32, 4); +$code.=<<___; +.cfi_startproc + mov 20($ctx),%r8d # is_base2_26 + cmp \$128,$len + jae .Lblocks_avx + test %r8d,%r8d + jz .Lblocks + +.Lblocks_avx: + and \$-16,$len + jz .Lno_data_avx + + vzeroupper + + test %r8d,%r8d + jz .Lbase2_64_avx + + test \$31,$len + jz .Leven_avx + + push %rbp +.cfi_push %rbp + mov %rsp,%rbp + push %rbx +.cfi_push %rbx + push %r12 +.cfi_push %r12 + push %r13 +.cfi_push %r13 + push %r14 +.cfi_push %r14 + push %r15 +.cfi_push %r15 +.Lblocks_avx_body: + + mov $len,%r15 # reassign $len + + mov 0($ctx),$d1 # load hash value + mov 8($ctx),$d2 + mov 16($ctx),$h2#d + + mov 24($ctx),$r0 # load r + mov 32($ctx),$s1 + + ################################# base 2^26 -> base 2^64 + mov $d1#d,$h0#d + and \$`-1*(1<<31)`,$d1 + mov $d2,$r1 # borrow $r1 + mov $d2#d,$h1#d + and \$`-1*(1<<31)`,$d2 + + shr \$6,$d1 + shl \$52,$r1 + add $d1,$h0 + shr \$12,$h1 + shr \$18,$d2 + add $r1,$h0 + adc $d2,$h1 + + mov $h2,$d1 + shl \$40,$d1 + shr \$24,$h2 + add $d1,$h1 + adc \$0,$h2 # can be partially reduced... + + mov \$-4,$d2 # ... so reduce + mov $h2,$d1 + and $h2,$d2 + shr \$2,$d1 + and \$3,$h2 + add $d2,$d1 # =*5 + add $d1,$h0 + adc \$0,$h1 + adc \$0,$h2 + + mov $s1,$r1 + mov $s1,%rax + shr \$2,$s1 + add $r1,$s1 # s1 = r1 + (r1 >> 2) + + add 0($inp),$h0 # accumulate input + adc 8($inp),$h1 + lea 16($inp),$inp + adc $padbit,$h2 + + call __poly1305_block + + test $padbit,$padbit # if $padbit is zero, + jz .Lstore_base2_64_avx # store hash in base 2^64 format + + ################################# base 2^64 -> base 2^26 + mov $h0,%rax + mov $h0,%rdx + shr \$52,$h0 + mov $h1,$r0 + mov $h1,$r1 + shr \$26,%rdx + and \$0x3ffffff,%rax # h[0] + shl \$12,$r0 + and \$0x3ffffff,%rdx # h[1] + shr \$14,$h1 + or $r0,$h0 + shl \$24,$h2 + and \$0x3ffffff,$h0 # h[2] + shr \$40,$r1 + and \$0x3ffffff,$h1 # h[3] + or $r1,$h2 # h[4] + + sub \$16,%r15 + jz .Lstore_base2_26_avx + + vmovd %rax#d,$H0 + vmovd %rdx#d,$H1 + vmovd $h0#d,$H2 + vmovd $h1#d,$H3 + vmovd $h2#d,$H4 + jmp .Lproceed_avx + +.align 32 +.Lstore_base2_64_avx: + mov $h0,0($ctx) + mov $h1,8($ctx) + mov $h2,16($ctx) # note that is_base2_26 is zeroed + jmp .Ldone_avx + +.align 16 +.Lstore_base2_26_avx: + mov %rax#d,0($ctx) # store hash value base 2^26 + mov %rdx#d,4($ctx) + mov $h0#d,8($ctx) + mov $h1#d,12($ctx) + mov $h2#d,16($ctx) +.align 16 +.Ldone_avx: + pop %r15 +.cfi_restore %r15 + pop %r14 +.cfi_restore %r14 + pop %r13 +.cfi_restore %r13 + pop %r12 +.cfi_restore %r12 + pop %rbx +.cfi_restore %rbx + pop %rbp +.cfi_restore %rbp +.Lno_data_avx: +.Lblocks_avx_epilogue: + ret +.cfi_endproc + +.align 32 +.Lbase2_64_avx: +.cfi_startproc + push %rbp +.cfi_push %rbp + mov %rsp,%rbp + push %rbx +.cfi_push %rbx + push %r12 +.cfi_push %r12 + push %r13 +.cfi_push %r13 + push %r14 +.cfi_push %r14 + push %r15 +.cfi_push %r15 +.Lbase2_64_avx_body: + + mov $len,%r15 # reassign $len + + mov 24($ctx),$r0 # load r + mov 32($ctx),$s1 + + mov 0($ctx),$h0 # load hash value + mov 8($ctx),$h1 + mov 16($ctx),$h2#d + + mov $s1,$r1 + mov $s1,%rax + shr \$2,$s1 + add $r1,$s1 # s1 = r1 + (r1 >> 2) + + test \$31,$len + jz .Linit_avx + + add 0($inp),$h0 # accumulate input + adc 8($inp),$h1 + lea 16($inp),$inp + adc $padbit,$h2 + sub \$16,%r15 + + call __poly1305_block + +.Linit_avx: + ################################# base 2^64 -> base 2^26 + mov $h0,%rax + mov $h0,%rdx + shr \$52,$h0 + mov $h1,$d1 + mov $h1,$d2 + shr \$26,%rdx + and \$0x3ffffff,%rax # h[0] + shl \$12,$d1 + and \$0x3ffffff,%rdx # h[1] + shr \$14,$h1 + or $d1,$h0 + shl \$24,$h2 + and \$0x3ffffff,$h0 # h[2] + shr \$40,$d2 + and \$0x3ffffff,$h1 # h[3] + or $d2,$h2 # h[4] + + vmovd %rax#d,$H0 + vmovd %rdx#d,$H1 + vmovd $h0#d,$H2 + vmovd $h1#d,$H3 + vmovd $h2#d,$H4 + movl \$1,20($ctx) # set is_base2_26 + + call __poly1305_init_avx + +.Lproceed_avx: + mov %r15,$len + pop %r15 +.cfi_restore %r15 + pop %r14 +.cfi_restore %r14 + pop %r13 +.cfi_restore %r13 + pop %r12 +.cfi_restore %r12 + pop %rbx +.cfi_restore %rbx + pop %rbp +.cfi_restore %rbp +.Lbase2_64_avx_epilogue: + jmp .Ldo_avx +.cfi_endproc + +.align 32 +.Leven_avx: +.cfi_startproc + vmovd 4*0($ctx),$H0 # load hash value + vmovd 4*1($ctx),$H1 + vmovd 4*2($ctx),$H2 + vmovd 4*3($ctx),$H3 + vmovd 4*4($ctx),$H4 + +.Ldo_avx: +___ +$code.=<<___ if (!$win64); + lea 8(%rsp),%r10 +.cfi_def_cfa_register %r10 + and \$-32,%rsp + sub \$-8,%rsp + lea -0x58(%rsp),%r11 + sub \$0x178,%rsp + +___ +$code.=<<___ if ($win64); + lea -0xf8(%rsp),%r11 + sub \$0x218,%rsp + vmovdqa %xmm6,0x50(%r11) + vmovdqa %xmm7,0x60(%r11) + vmovdqa %xmm8,0x70(%r11) + vmovdqa %xmm9,0x80(%r11) + vmovdqa %xmm10,0x90(%r11) + vmovdqa %xmm11,0xa0(%r11) + vmovdqa %xmm12,0xb0(%r11) + vmovdqa %xmm13,0xc0(%r11) + vmovdqa %xmm14,0xd0(%r11) + vmovdqa %xmm15,0xe0(%r11) +.Ldo_avx_body: +___ +$code.=<<___; + sub \$64,$len + lea -32($inp),%rax + cmovc %rax,$inp + + vmovdqu `16*3`($ctx),$D4 # preload r0^2 + lea `16*3+64`($ctx),$ctx # size optimization + lea .Lconst(%rip),%rcx + + ################################################################ + # load input + vmovdqu 16*2($inp),$T0 + vmovdqu 16*3($inp),$T1 + vmovdqa 64(%rcx),$MASK # .Lmask26 + + vpsrldq \$6,$T0,$T2 # splat input + vpsrldq \$6,$T1,$T3 + vpunpckhqdq $T1,$T0,$T4 # 4 + vpunpcklqdq $T1,$T0,$T0 # 0:1 + vpunpcklqdq $T3,$T2,$T3 # 2:3 + + vpsrlq \$40,$T4,$T4 # 4 + vpsrlq \$26,$T0,$T1 + vpand $MASK,$T0,$T0 # 0 + vpsrlq \$4,$T3,$T2 + vpand $MASK,$T1,$T1 # 1 + vpsrlq \$30,$T3,$T3 + vpand $MASK,$T2,$T2 # 2 + vpand $MASK,$T3,$T3 # 3 + vpor 32(%rcx),$T4,$T4 # padbit, yes, always + + jbe .Lskip_loop_avx + + # expand and copy pre-calculated table to stack + vmovdqu `16*1-64`($ctx),$D1 + vmovdqu `16*2-64`($ctx),$D2 + vpshufd \$0xEE,$D4,$D3 # 34xx -> 3434 + vpshufd \$0x44,$D4,$D0 # xx12 -> 1212 + vmovdqa $D3,-0x90(%r11) + vmovdqa $D0,0x00(%rsp) + vpshufd \$0xEE,$D1,$D4 + vmovdqu `16*3-64`($ctx),$D0 + vpshufd \$0x44,$D1,$D1 + vmovdqa $D4,-0x80(%r11) + vmovdqa $D1,0x10(%rsp) + vpshufd \$0xEE,$D2,$D3 + vmovdqu `16*4-64`($ctx),$D1 + vpshufd \$0x44,$D2,$D2 + vmovdqa $D3,-0x70(%r11) + vmovdqa $D2,0x20(%rsp) + vpshufd \$0xEE,$D0,$D4 + vmovdqu `16*5-64`($ctx),$D2 + vpshufd \$0x44,$D0,$D0 + vmovdqa $D4,-0x60(%r11) + vmovdqa $D0,0x30(%rsp) + vpshufd \$0xEE,$D1,$D3 + vmovdqu `16*6-64`($ctx),$D0 + vpshufd \$0x44,$D1,$D1 + vmovdqa $D3,-0x50(%r11) + vmovdqa $D1,0x40(%rsp) + vpshufd \$0xEE,$D2,$D4 + vmovdqu `16*7-64`($ctx),$D1 + vpshufd \$0x44,$D2,$D2 + vmovdqa $D4,-0x40(%r11) + vmovdqa $D2,0x50(%rsp) + vpshufd \$0xEE,$D0,$D3 + vmovdqu `16*8-64`($ctx),$D2 + vpshufd \$0x44,$D0,$D0 + vmovdqa $D3,-0x30(%r11) + vmovdqa $D0,0x60(%rsp) + vpshufd \$0xEE,$D1,$D4 + vpshufd \$0x44,$D1,$D1 + vmovdqa $D4,-0x20(%r11) + vmovdqa $D1,0x70(%rsp) + vpshufd \$0xEE,$D2,$D3 + vmovdqa 0x00(%rsp),$D4 # preload r0^2 + vpshufd \$0x44,$D2,$D2 + vmovdqa $D3,-0x10(%r11) + vmovdqa $D2,0x80(%rsp) + + jmp .Loop_avx + +.align 32 +.Loop_avx: + ################################################################ + # ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 + # ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r + # \___________________/ + # ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 + # ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r + # \___________________/ \____________________/ + # + # Note that we start with inp[2:3]*r^2. This is because it + # doesn't depend on reduction in previous iteration. + ################################################################ + # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + # + # though note that $Tx and $Hx are "reversed" in this section, + # and $D4 is preloaded with r0^2... + + vpmuludq $T0,$D4,$D0 # d0 = h0*r0 + vpmuludq $T1,$D4,$D1 # d1 = h1*r0 + vmovdqa $H2,0x20(%r11) # offload hash + vpmuludq $T2,$D4,$D2 # d3 = h2*r0 + vmovdqa 0x10(%rsp),$H2 # r1^2 + vpmuludq $T3,$D4,$D3 # d3 = h3*r0 + vpmuludq $T4,$D4,$D4 # d4 = h4*r0 + + vmovdqa $H0,0x00(%r11) # + vpmuludq 0x20(%rsp),$T4,$H0 # h4*s1 + vmovdqa $H1,0x10(%r11) # + vpmuludq $T3,$H2,$H1 # h3*r1 + vpaddq $H0,$D0,$D0 # d0 += h4*s1 + vpaddq $H1,$D4,$D4 # d4 += h3*r1 + vmovdqa $H3,0x30(%r11) # + vpmuludq $T2,$H2,$H0 # h2*r1 + vpmuludq $T1,$H2,$H1 # h1*r1 + vpaddq $H0,$D3,$D3 # d3 += h2*r1 + vmovdqa 0x30(%rsp),$H3 # r2^2 + vpaddq $H1,$D2,$D2 # d2 += h1*r1 + vmovdqa $H4,0x40(%r11) # + vpmuludq $T0,$H2,$H2 # h0*r1 + vpmuludq $T2,$H3,$H0 # h2*r2 + vpaddq $H2,$D1,$D1 # d1 += h0*r1 + + vmovdqa 0x40(%rsp),$H4 # s2^2 + vpaddq $H0,$D4,$D4 # d4 += h2*r2 + vpmuludq $T1,$H3,$H1 # h1*r2 + vpmuludq $T0,$H3,$H3 # h0*r2 + vpaddq $H1,$D3,$D3 # d3 += h1*r2 + vmovdqa 0x50(%rsp),$H2 # r3^2 + vpaddq $H3,$D2,$D2 # d2 += h0*r2 + vpmuludq $T4,$H4,$H0 # h4*s2 + vpmuludq $T3,$H4,$H4 # h3*s2 + vpaddq $H0,$D1,$D1 # d1 += h4*s2 + vmovdqa 0x60(%rsp),$H3 # s3^2 + vpaddq $H4,$D0,$D0 # d0 += h3*s2 + + vmovdqa 0x80(%rsp),$H4 # s4^2 + vpmuludq $T1,$H2,$H1 # h1*r3 + vpmuludq $T0,$H2,$H2 # h0*r3 + vpaddq $H1,$D4,$D4 # d4 += h1*r3 + vpaddq $H2,$D3,$D3 # d3 += h0*r3 + vpmuludq $T4,$H3,$H0 # h4*s3 + vpmuludq $T3,$H3,$H1 # h3*s3 + vpaddq $H0,$D2,$D2 # d2 += h4*s3 + vmovdqu 16*0($inp),$H0 # load input + vpaddq $H1,$D1,$D1 # d1 += h3*s3 + vpmuludq $T2,$H3,$H3 # h2*s3 + vpmuludq $T2,$H4,$T2 # h2*s4 + vpaddq $H3,$D0,$D0 # d0 += h2*s3 + + vmovdqu 16*1($inp),$H1 # + vpaddq $T2,$D1,$D1 # d1 += h2*s4 + vpmuludq $T3,$H4,$T3 # h3*s4 + vpmuludq $T4,$H4,$T4 # h4*s4 + vpsrldq \$6,$H0,$H2 # splat input + vpaddq $T3,$D2,$D2 # d2 += h3*s4 + vpaddq $T4,$D3,$D3 # d3 += h4*s4 + vpsrldq \$6,$H1,$H3 # + vpmuludq 0x70(%rsp),$T0,$T4 # h0*r4 + vpmuludq $T1,$H4,$T0 # h1*s4 + vpunpckhqdq $H1,$H0,$H4 # 4 + vpaddq $T4,$D4,$D4 # d4 += h0*r4 + vmovdqa -0x90(%r11),$T4 # r0^4 + vpaddq $T0,$D0,$D0 # d0 += h1*s4 + + vpunpcklqdq $H1,$H0,$H0 # 0:1 + vpunpcklqdq $H3,$H2,$H3 # 2:3 + + #vpsrlq \$40,$H4,$H4 # 4 + vpsrldq \$`40/8`,$H4,$H4 # 4 + vpsrlq \$26,$H0,$H1 + vpand $MASK,$H0,$H0 # 0 + vpsrlq \$4,$H3,$H2 + vpand $MASK,$H1,$H1 # 1 + vpand 0(%rcx),$H4,$H4 # .Lmask24 + vpsrlq \$30,$H3,$H3 + vpand $MASK,$H2,$H2 # 2 + vpand $MASK,$H3,$H3 # 3 + vpor 32(%rcx),$H4,$H4 # padbit, yes, always + + vpaddq 0x00(%r11),$H0,$H0 # add hash value + vpaddq 0x10(%r11),$H1,$H1 + vpaddq 0x20(%r11),$H2,$H2 + vpaddq 0x30(%r11),$H3,$H3 + vpaddq 0x40(%r11),$H4,$H4 + + lea 16*2($inp),%rax + lea 16*4($inp),$inp + sub \$64,$len + cmovc %rax,$inp + + ################################################################ + # Now we accumulate (inp[0:1]+hash)*r^4 + ################################################################ + # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + + vpmuludq $H0,$T4,$T0 # h0*r0 + vpmuludq $H1,$T4,$T1 # h1*r0 + vpaddq $T0,$D0,$D0 + vpaddq $T1,$D1,$D1 + vmovdqa -0x80(%r11),$T2 # r1^4 + vpmuludq $H2,$T4,$T0 # h2*r0 + vpmuludq $H3,$T4,$T1 # h3*r0 + vpaddq $T0,$D2,$D2 + vpaddq $T1,$D3,$D3 + vpmuludq $H4,$T4,$T4 # h4*r0 + vpmuludq -0x70(%r11),$H4,$T0 # h4*s1 + vpaddq $T4,$D4,$D4 + + vpaddq $T0,$D0,$D0 # d0 += h4*s1 + vpmuludq $H2,$T2,$T1 # h2*r1 + vpmuludq $H3,$T2,$T0 # h3*r1 + vpaddq $T1,$D3,$D3 # d3 += h2*r1 + vmovdqa -0x60(%r11),$T3 # r2^4 + vpaddq $T0,$D4,$D4 # d4 += h3*r1 + vpmuludq $H1,$T2,$T1 # h1*r1 + vpmuludq $H0,$T2,$T2 # h0*r1 + vpaddq $T1,$D2,$D2 # d2 += h1*r1 + vpaddq $T2,$D1,$D1 # d1 += h0*r1 + + vmovdqa -0x50(%r11),$T4 # s2^4 + vpmuludq $H2,$T3,$T0 # h2*r2 + vpmuludq $H1,$T3,$T1 # h1*r2 + vpaddq $T0,$D4,$D4 # d4 += h2*r2 + vpaddq $T1,$D3,$D3 # d3 += h1*r2 + vmovdqa -0x40(%r11),$T2 # r3^4 + vpmuludq $H0,$T3,$T3 # h0*r2 + vpmuludq $H4,$T4,$T0 # h4*s2 + vpaddq $T3,$D2,$D2 # d2 += h0*r2 + vpaddq $T0,$D1,$D1 # d1 += h4*s2 + vmovdqa -0x30(%r11),$T3 # s3^4 + vpmuludq $H3,$T4,$T4 # h3*s2 + vpmuludq $H1,$T2,$T1 # h1*r3 + vpaddq $T4,$D0,$D0 # d0 += h3*s2 + + vmovdqa -0x10(%r11),$T4 # s4^4 + vpaddq $T1,$D4,$D4 # d4 += h1*r3 + vpmuludq $H0,$T2,$T2 # h0*r3 + vpmuludq $H4,$T3,$T0 # h4*s3 + vpaddq $T2,$D3,$D3 # d3 += h0*r3 + vpaddq $T0,$D2,$D2 # d2 += h4*s3 + vmovdqu 16*2($inp),$T0 # load input + vpmuludq $H3,$T3,$T2 # h3*s3 + vpmuludq $H2,$T3,$T3 # h2*s3 + vpaddq $T2,$D1,$D1 # d1 += h3*s3 + vmovdqu 16*3($inp),$T1 # + vpaddq $T3,$D0,$D0 # d0 += h2*s3 + + vpmuludq $H2,$T4,$H2 # h2*s4 + vpmuludq $H3,$T4,$H3 # h3*s4 + vpsrldq \$6,$T0,$T2 # splat input + vpaddq $H2,$D1,$D1 # d1 += h2*s4 + vpmuludq $H4,$T4,$H4 # h4*s4 + vpsrldq \$6,$T1,$T3 # + vpaddq $H3,$D2,$H2 # h2 = d2 + h3*s4 + vpaddq $H4,$D3,$H3 # h3 = d3 + h4*s4 + vpmuludq -0x20(%r11),$H0,$H4 # h0*r4 + vpmuludq $H1,$T4,$H0 + vpunpckhqdq $T1,$T0,$T4 # 4 + vpaddq $H4,$D4,$H4 # h4 = d4 + h0*r4 + vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4 + + vpunpcklqdq $T1,$T0,$T0 # 0:1 + vpunpcklqdq $T3,$T2,$T3 # 2:3 + + #vpsrlq \$40,$T4,$T4 # 4 + vpsrldq \$`40/8`,$T4,$T4 # 4 + vpsrlq \$26,$T0,$T1 + vmovdqa 0x00(%rsp),$D4 # preload r0^2 + vpand $MASK,$T0,$T0 # 0 + vpsrlq \$4,$T3,$T2 + vpand $MASK,$T1,$T1 # 1 + vpand 0(%rcx),$T4,$T4 # .Lmask24 + vpsrlq \$30,$T3,$T3 + vpand $MASK,$T2,$T2 # 2 + vpand $MASK,$T3,$T3 # 3 + vpor 32(%rcx),$T4,$T4 # padbit, yes, always + + ################################################################ + # lazy reduction as discussed in "NEON crypto" by D.J. Bernstein + # and P. Schwabe + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpaddq $D0,$D1,$H1 # h0 -> h1 + + vpsrlq \$26,$H4,$D0 + vpand $MASK,$H4,$H4 + + vpsrlq \$26,$H1,$D1 + vpand $MASK,$H1,$H1 + vpaddq $D1,$H2,$H2 # h1 -> h2 + + vpaddq $D0,$H0,$H0 + vpsllq \$2,$D0,$D0 + vpaddq $D0,$H0,$H0 # h4 -> h0 + + vpsrlq \$26,$H2,$D2 + vpand $MASK,$H2,$H2 + vpaddq $D2,$H3,$H3 # h2 -> h3 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpaddq $D0,$H1,$H1 # h0 -> h1 + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + ja .Loop_avx + +.Lskip_loop_avx: + ################################################################ + # multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 + + vpshufd \$0x10,$D4,$D4 # r0^n, xx12 -> x1x2 + add \$32,$len + jnz .Long_tail_avx + + vpaddq $H2,$T2,$T2 + vpaddq $H0,$T0,$T0 + vpaddq $H1,$T1,$T1 + vpaddq $H3,$T3,$T3 + vpaddq $H4,$T4,$T4 + +.Long_tail_avx: + vmovdqa $H2,0x20(%r11) + vmovdqa $H0,0x00(%r11) + vmovdqa $H1,0x10(%r11) + vmovdqa $H3,0x30(%r11) + vmovdqa $H4,0x40(%r11) + + # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + + vpmuludq $T2,$D4,$D2 # d2 = h2*r0 + vpmuludq $T0,$D4,$D0 # d0 = h0*r0 + vpshufd \$0x10,`16*1-64`($ctx),$H2 # r1^n + vpmuludq $T1,$D4,$D1 # d1 = h1*r0 + vpmuludq $T3,$D4,$D3 # d3 = h3*r0 + vpmuludq $T4,$D4,$D4 # d4 = h4*r0 + + vpmuludq $T3,$H2,$H0 # h3*r1 + vpaddq $H0,$D4,$D4 # d4 += h3*r1 + vpshufd \$0x10,`16*2-64`($ctx),$H3 # s1^n + vpmuludq $T2,$H2,$H1 # h2*r1 + vpaddq $H1,$D3,$D3 # d3 += h2*r1 + vpshufd \$0x10,`16*3-64`($ctx),$H4 # r2^n + vpmuludq $T1,$H2,$H0 # h1*r1 + vpaddq $H0,$D2,$D2 # d2 += h1*r1 + vpmuludq $T0,$H2,$H2 # h0*r1 + vpaddq $H2,$D1,$D1 # d1 += h0*r1 + vpmuludq $T4,$H3,$H3 # h4*s1 + vpaddq $H3,$D0,$D0 # d0 += h4*s1 + + vpshufd \$0x10,`16*4-64`($ctx),$H2 # s2^n + vpmuludq $T2,$H4,$H1 # h2*r2 + vpaddq $H1,$D4,$D4 # d4 += h2*r2 + vpmuludq $T1,$H4,$H0 # h1*r2 + vpaddq $H0,$D3,$D3 # d3 += h1*r2 + vpshufd \$0x10,`16*5-64`($ctx),$H3 # r3^n + vpmuludq $T0,$H4,$H4 # h0*r2 + vpaddq $H4,$D2,$D2 # d2 += h0*r2 + vpmuludq $T4,$H2,$H1 # h4*s2 + vpaddq $H1,$D1,$D1 # d1 += h4*s2 + vpshufd \$0x10,`16*6-64`($ctx),$H4 # s3^n + vpmuludq $T3,$H2,$H2 # h3*s2 + vpaddq $H2,$D0,$D0 # d0 += h3*s2 + + vpmuludq $T1,$H3,$H0 # h1*r3 + vpaddq $H0,$D4,$D4 # d4 += h1*r3 + vpmuludq $T0,$H3,$H3 # h0*r3 + vpaddq $H3,$D3,$D3 # d3 += h0*r3 + vpshufd \$0x10,`16*7-64`($ctx),$H2 # r4^n + vpmuludq $T4,$H4,$H1 # h4*s3 + vpaddq $H1,$D2,$D2 # d2 += h4*s3 + vpshufd \$0x10,`16*8-64`($ctx),$H3 # s4^n + vpmuludq $T3,$H4,$H0 # h3*s3 + vpaddq $H0,$D1,$D1 # d1 += h3*s3 + vpmuludq $T2,$H4,$H4 # h2*s3 + vpaddq $H4,$D0,$D0 # d0 += h2*s3 + + vpmuludq $T0,$H2,$H2 # h0*r4 + vpaddq $H2,$D4,$D4 # h4 = d4 + h0*r4 + vpmuludq $T4,$H3,$H1 # h4*s4 + vpaddq $H1,$D3,$D3 # h3 = d3 + h4*s4 + vpmuludq $T3,$H3,$H0 # h3*s4 + vpaddq $H0,$D2,$D2 # h2 = d2 + h3*s4 + vpmuludq $T2,$H3,$H1 # h2*s4 + vpaddq $H1,$D1,$D1 # h1 = d1 + h2*s4 + vpmuludq $T1,$H3,$H3 # h1*s4 + vpaddq $H3,$D0,$D0 # h0 = d0 + h1*s4 + + jz .Lshort_tail_avx + + vmovdqu 16*0($inp),$H0 # load input + vmovdqu 16*1($inp),$H1 + + vpsrldq \$6,$H0,$H2 # splat input + vpsrldq \$6,$H1,$H3 + vpunpckhqdq $H1,$H0,$H4 # 4 + vpunpcklqdq $H1,$H0,$H0 # 0:1 + vpunpcklqdq $H3,$H2,$H3 # 2:3 + + vpsrlq \$40,$H4,$H4 # 4 + vpsrlq \$26,$H0,$H1 + vpand $MASK,$H0,$H0 # 0 + vpsrlq \$4,$H3,$H2 + vpand $MASK,$H1,$H1 # 1 + vpsrlq \$30,$H3,$H3 + vpand $MASK,$H2,$H2 # 2 + vpand $MASK,$H3,$H3 # 3 + vpor 32(%rcx),$H4,$H4 # padbit, yes, always + + vpshufd \$0x32,`16*0-64`($ctx),$T4 # r0^n, 34xx -> x3x4 + vpaddq 0x00(%r11),$H0,$H0 + vpaddq 0x10(%r11),$H1,$H1 + vpaddq 0x20(%r11),$H2,$H2 + vpaddq 0x30(%r11),$H3,$H3 + vpaddq 0x40(%r11),$H4,$H4 + + ################################################################ + # multiply (inp[0:1]+hash) by r^4:r^3 and accumulate + + vpmuludq $H0,$T4,$T0 # h0*r0 + vpaddq $T0,$D0,$D0 # d0 += h0*r0 + vpmuludq $H1,$T4,$T1 # h1*r0 + vpaddq $T1,$D1,$D1 # d1 += h1*r0 + vpmuludq $H2,$T4,$T0 # h2*r0 + vpaddq $T0,$D2,$D2 # d2 += h2*r0 + vpshufd \$0x32,`16*1-64`($ctx),$T2 # r1^n + vpmuludq $H3,$T4,$T1 # h3*r0 + vpaddq $T1,$D3,$D3 # d3 += h3*r0 + vpmuludq $H4,$T4,$T4 # h4*r0 + vpaddq $T4,$D4,$D4 # d4 += h4*r0 + + vpmuludq $H3,$T2,$T0 # h3*r1 + vpaddq $T0,$D4,$D4 # d4 += h3*r1 + vpshufd \$0x32,`16*2-64`($ctx),$T3 # s1 + vpmuludq $H2,$T2,$T1 # h2*r1 + vpaddq $T1,$D3,$D3 # d3 += h2*r1 + vpshufd \$0x32,`16*3-64`($ctx),$T4 # r2 + vpmuludq $H1,$T2,$T0 # h1*r1 + vpaddq $T0,$D2,$D2 # d2 += h1*r1 + vpmuludq $H0,$T2,$T2 # h0*r1 + vpaddq $T2,$D1,$D1 # d1 += h0*r1 + vpmuludq $H4,$T3,$T3 # h4*s1 + vpaddq $T3,$D0,$D0 # d0 += h4*s1 + + vpshufd \$0x32,`16*4-64`($ctx),$T2 # s2 + vpmuludq $H2,$T4,$T1 # h2*r2 + vpaddq $T1,$D4,$D4 # d4 += h2*r2 + vpmuludq $H1,$T4,$T0 # h1*r2 + vpaddq $T0,$D3,$D3 # d3 += h1*r2 + vpshufd \$0x32,`16*5-64`($ctx),$T3 # r3 + vpmuludq $H0,$T4,$T4 # h0*r2 + vpaddq $T4,$D2,$D2 # d2 += h0*r2 + vpmuludq $H4,$T2,$T1 # h4*s2 + vpaddq $T1,$D1,$D1 # d1 += h4*s2 + vpshufd \$0x32,`16*6-64`($ctx),$T4 # s3 + vpmuludq $H3,$T2,$T2 # h3*s2 + vpaddq $T2,$D0,$D0 # d0 += h3*s2 + + vpmuludq $H1,$T3,$T0 # h1*r3 + vpaddq $T0,$D4,$D4 # d4 += h1*r3 + vpmuludq $H0,$T3,$T3 # h0*r3 + vpaddq $T3,$D3,$D3 # d3 += h0*r3 + vpshufd \$0x32,`16*7-64`($ctx),$T2 # r4 + vpmuludq $H4,$T4,$T1 # h4*s3 + vpaddq $T1,$D2,$D2 # d2 += h4*s3 + vpshufd \$0x32,`16*8-64`($ctx),$T3 # s4 + vpmuludq $H3,$T4,$T0 # h3*s3 + vpaddq $T0,$D1,$D1 # d1 += h3*s3 + vpmuludq $H2,$T4,$T4 # h2*s3 + vpaddq $T4,$D0,$D0 # d0 += h2*s3 + + vpmuludq $H0,$T2,$T2 # h0*r4 + vpaddq $T2,$D4,$D4 # d4 += h0*r4 + vpmuludq $H4,$T3,$T1 # h4*s4 + vpaddq $T1,$D3,$D3 # d3 += h4*s4 + vpmuludq $H3,$T3,$T0 # h3*s4 + vpaddq $T0,$D2,$D2 # d2 += h3*s4 + vpmuludq $H2,$T3,$T1 # h2*s4 + vpaddq $T1,$D1,$D1 # d1 += h2*s4 + vpmuludq $H1,$T3,$T3 # h1*s4 + vpaddq $T3,$D0,$D0 # d0 += h1*s4 + +.Lshort_tail_avx: + ################################################################ + # horizontal addition + + vpsrldq \$8,$D4,$T4 + vpsrldq \$8,$D3,$T3 + vpsrldq \$8,$D1,$T1 + vpsrldq \$8,$D0,$T0 + vpsrldq \$8,$D2,$T2 + vpaddq $T3,$D3,$D3 + vpaddq $T4,$D4,$D4 + vpaddq $T0,$D0,$D0 + vpaddq $T1,$D1,$D1 + vpaddq $T2,$D2,$D2 + + ################################################################ + # lazy reduction + + vpsrlq \$26,$D3,$H3 + vpand $MASK,$D3,$D3 + vpaddq $H3,$D4,$D4 # h3 -> h4 + + vpsrlq \$26,$D0,$H0 + vpand $MASK,$D0,$D0 + vpaddq $H0,$D1,$D1 # h0 -> h1 + + vpsrlq \$26,$D4,$H4 + vpand $MASK,$D4,$D4 + + vpsrlq \$26,$D1,$H1 + vpand $MASK,$D1,$D1 + vpaddq $H1,$D2,$D2 # h1 -> h2 + + vpaddq $H4,$D0,$D0 + vpsllq \$2,$H4,$H4 + vpaddq $H4,$D0,$D0 # h4 -> h0 + + vpsrlq \$26,$D2,$H2 + vpand $MASK,$D2,$D2 + vpaddq $H2,$D3,$D3 # h2 -> h3 + + vpsrlq \$26,$D0,$H0 + vpand $MASK,$D0,$D0 + vpaddq $H0,$D1,$D1 # h0 -> h1 + + vpsrlq \$26,$D3,$H3 + vpand $MASK,$D3,$D3 + vpaddq $H3,$D4,$D4 # h3 -> h4 + + vmovd $D0,`4*0-48-64`($ctx) # save partially reduced + vmovd $D1,`4*1-48-64`($ctx) + vmovd $D2,`4*2-48-64`($ctx) + vmovd $D3,`4*3-48-64`($ctx) + vmovd $D4,`4*4-48-64`($ctx) +___ +$code.=<<___ if ($win64); + vmovdqa 0x50(%r11),%xmm6 + vmovdqa 0x60(%r11),%xmm7 + vmovdqa 0x70(%r11),%xmm8 + vmovdqa 0x80(%r11),%xmm9 + vmovdqa 0x90(%r11),%xmm10 + vmovdqa 0xa0(%r11),%xmm11 + vmovdqa 0xb0(%r11),%xmm12 + vmovdqa 0xc0(%r11),%xmm13 + vmovdqa 0xd0(%r11),%xmm14 + vmovdqa 0xe0(%r11),%xmm15 + lea 0xf8(%r11),%rsp +.Ldo_avx_epilogue: +___ +$code.=<<___ if (!$win64); + lea -8(%r10),%rsp +.cfi_def_cfa_register %rsp +___ +$code.=<<___; + vzeroupper + ret +.cfi_endproc +___ +&end_function("poly1305_blocks_avx"); + +&declare_function("poly1305_emit_avx", 32, 3); +$code.=<<___; + cmpl \$0,20($ctx) # is_base2_26? + je .Lemit + + mov 0($ctx),%eax # load hash value base 2^26 + mov 4($ctx),%ecx + mov 8($ctx),%r8d + mov 12($ctx),%r11d + mov 16($ctx),%r10d + + shl \$26,%rcx # base 2^26 -> base 2^64 + mov %r8,%r9 + shl \$52,%r8 + add %rcx,%rax + shr \$12,%r9 + add %rax,%r8 # h0 + adc \$0,%r9 + + shl \$14,%r11 + mov %r10,%rax + shr \$24,%r10 + add %r11,%r9 + shl \$40,%rax + add %rax,%r9 # h1 + adc \$0,%r10 # h2 + + mov %r10,%rax # could be partially reduced, so reduce + mov %r10,%rcx + and \$3,%r10 + shr \$2,%rax + and \$-4,%rcx + add %rcx,%rax + add %rax,%r8 + adc \$0,%r9 + adc \$0,%r10 + + mov %r8,%rax + add \$5,%r8 # compare to modulus + mov %r9,%rcx + adc \$0,%r9 + adc \$0,%r10 + shr \$2,%r10 # did 130-bit value overflow? + cmovnz %r8,%rax + cmovnz %r9,%rcx + + add 0($nonce),%rax # accumulate nonce + adc 8($nonce),%rcx + mov %rax,0($mac) # write result + mov %rcx,8($mac) + + ret +___ +&end_function("poly1305_emit_avx"); + +if ($kernel) { + $code .= "#endif\n"; +} + +if ($avx>1) { + +if ($kernel) { + $code .= "#ifdef CONFIG_AS_AVX2\n"; +} + +my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) = + map("%ymm$_",(0..15)); +my $S4=$MASK; + +sub poly1305_blocks_avxN { + my ($avx512) = @_; + my $suffix = $avx512 ? "_avx512" : ""; +$code.=<<___; +.cfi_startproc + mov 20($ctx),%r8d # is_base2_26 + cmp \$128,$len + jae .Lblocks_avx2$suffix + test %r8d,%r8d + jz .Lblocks + +.Lblocks_avx2$suffix: + and \$-16,$len + jz .Lno_data_avx2$suffix + + vzeroupper + + test %r8d,%r8d + jz .Lbase2_64_avx2$suffix + + test \$63,$len + jz .Leven_avx2$suffix + + push %rbp +.cfi_push %rbp + mov %rsp,%rbp + push %rbx +.cfi_push %rbx + push %r12 +.cfi_push %r12 + push %r13 +.cfi_push %r13 + push %r14 +.cfi_push %r14 + push %r15 +.cfi_push %r15 +.Lblocks_avx2_body$suffix: + + mov $len,%r15 # reassign $len + + mov 0($ctx),$d1 # load hash value + mov 8($ctx),$d2 + mov 16($ctx),$h2#d + + mov 24($ctx),$r0 # load r + mov 32($ctx),$s1 + + ################################# base 2^26 -> base 2^64 + mov $d1#d,$h0#d + and \$`-1*(1<<31)`,$d1 + mov $d2,$r1 # borrow $r1 + mov $d2#d,$h1#d + and \$`-1*(1<<31)`,$d2 + + shr \$6,$d1 + shl \$52,$r1 + add $d1,$h0 + shr \$12,$h1 + shr \$18,$d2 + add $r1,$h0 + adc $d2,$h1 + + mov $h2,$d1 + shl \$40,$d1 + shr \$24,$h2 + add $d1,$h1 + adc \$0,$h2 # can be partially reduced... + + mov \$-4,$d2 # ... so reduce + mov $h2,$d1 + and $h2,$d2 + shr \$2,$d1 + and \$3,$h2 + add $d2,$d1 # =*5 + add $d1,$h0 + adc \$0,$h1 + adc \$0,$h2 + + mov $s1,$r1 + mov $s1,%rax + shr \$2,$s1 + add $r1,$s1 # s1 = r1 + (r1 >> 2) + +.Lbase2_26_pre_avx2$suffix: + add 0($inp),$h0 # accumulate input + adc 8($inp),$h1 + lea 16($inp),$inp + adc $padbit,$h2 + sub \$16,%r15 + + call __poly1305_block + mov $r1,%rax + + test \$63,%r15 + jnz .Lbase2_26_pre_avx2$suffix + + test $padbit,$padbit # if $padbit is zero, + jz .Lstore_base2_64_avx2$suffix # store hash in base 2^64 format + + ################################# base 2^64 -> base 2^26 + mov $h0,%rax + mov $h0,%rdx + shr \$52,$h0 + mov $h1,$r0 + mov $h1,$r1 + shr \$26,%rdx + and \$0x3ffffff,%rax # h[0] + shl \$12,$r0 + and \$0x3ffffff,%rdx # h[1] + shr \$14,$h1 + or $r0,$h0 + shl \$24,$h2 + and \$0x3ffffff,$h0 # h[2] + shr \$40,$r1 + and \$0x3ffffff,$h1 # h[3] + or $r1,$h2 # h[4] + + test %r15,%r15 + jz .Lstore_base2_26_avx2$suffix + + vmovd %rax#d,%x#$H0 + vmovd %rdx#d,%x#$H1 + vmovd $h0#d,%x#$H2 + vmovd $h1#d,%x#$H3 + vmovd $h2#d,%x#$H4 + jmp .Lproceed_avx2$suffix + +.align 32 +.Lstore_base2_64_avx2$suffix: + mov $h0,0($ctx) + mov $h1,8($ctx) + mov $h2,16($ctx) # note that is_base2_26 is zeroed + jmp .Ldone_avx2$suffix + +.align 16 +.Lstore_base2_26_avx2$suffix: + mov %rax#d,0($ctx) # store hash value base 2^26 + mov %rdx#d,4($ctx) + mov $h0#d,8($ctx) + mov $h1#d,12($ctx) + mov $h2#d,16($ctx) +.align 16 +.Ldone_avx2$suffix: + pop %r15 +.cfi_restore %r15 + pop %r14 +.cfi_restore %r14 + pop %r13 +.cfi_restore %r13 + pop %r12 +.cfi_restore %r12 + pop %rbx +.cfi_restore %rbx + pop %rbp +.cfi_restore %rbp +.Lno_data_avx2$suffix: +.Lblocks_avx2_epilogue$suffix: + ret +.cfi_endproc + +.align 32 +.Lbase2_64_avx2$suffix: +.cfi_startproc + push %rbp +.cfi_push %rbp + mov %rsp,%rbp + push %rbx +.cfi_push %rbx + push %r12 +.cfi_push %r12 + push %r13 +.cfi_push %r13 + push %r14 +.cfi_push %r14 + push %r15 +.cfi_push %r15 +.Lbase2_64_avx2_body$suffix: + + mov $len,%r15 # reassign $len + + mov 24($ctx),$r0 # load r + mov 32($ctx),$s1 + + mov 0($ctx),$h0 # load hash value + mov 8($ctx),$h1 + mov 16($ctx),$h2#d + + mov $s1,$r1 + mov $s1,%rax + shr \$2,$s1 + add $r1,$s1 # s1 = r1 + (r1 >> 2) + + test \$63,$len + jz .Linit_avx2$suffix + +.Lbase2_64_pre_avx2$suffix: + add 0($inp),$h0 # accumulate input + adc 8($inp),$h1 + lea 16($inp),$inp + adc $padbit,$h2 + sub \$16,%r15 + + call __poly1305_block + mov $r1,%rax + + test \$63,%r15 + jnz .Lbase2_64_pre_avx2$suffix + +.Linit_avx2$suffix: + ################################# base 2^64 -> base 2^26 + mov $h0,%rax + mov $h0,%rdx + shr \$52,$h0 + mov $h1,$d1 + mov $h1,$d2 + shr \$26,%rdx + and \$0x3ffffff,%rax # h[0] + shl \$12,$d1 + and \$0x3ffffff,%rdx # h[1] + shr \$14,$h1 + or $d1,$h0 + shl \$24,$h2 + and \$0x3ffffff,$h0 # h[2] + shr \$40,$d2 + and \$0x3ffffff,$h1 # h[3] + or $d2,$h2 # h[4] + + vmovd %rax#d,%x#$H0 + vmovd %rdx#d,%x#$H1 + vmovd $h0#d,%x#$H2 + vmovd $h1#d,%x#$H3 + vmovd $h2#d,%x#$H4 + movl \$1,20($ctx) # set is_base2_26 + + call __poly1305_init_avx + +.Lproceed_avx2$suffix: + mov %r15,$len # restore $len +___ +$code.=<<___ if (!$kernel); + mov OPENSSL_ia32cap_P+8(%rip),%r9d + mov \$`(1<<31|1<<30|1<<16)`,%r11d +___ +$code.=<<___; + pop %r15 +.cfi_restore %r15 + pop %r14 +.cfi_restore %r14 + pop %r13 +.cfi_restore %r13 + pop %r12 +.cfi_restore %r12 + pop %rbx +.cfi_restore %rbx + pop %rbp +.cfi_restore %rbp +.Lbase2_64_avx2_epilogue$suffix: + jmp .Ldo_avx2$suffix +.cfi_endproc + +.align 32 +.Leven_avx2$suffix: +.cfi_startproc +___ +$code.=<<___ if (!$kernel); + mov OPENSSL_ia32cap_P+8(%rip),%r9d +___ +$code.=<<___; + vmovd 4*0($ctx),%x#$H0 # load hash value base 2^26 + vmovd 4*1($ctx),%x#$H1 + vmovd 4*2($ctx),%x#$H2 + vmovd 4*3($ctx),%x#$H3 + vmovd 4*4($ctx),%x#$H4 + +.Ldo_avx2$suffix: +___ +$code.=<<___ if (!$kernel && $avx>2); + cmp \$512,$len + jb .Lskip_avx512 + and %r11d,%r9d + test \$`1<<16`,%r9d # check for AVX512F + jnz .Lblocks_avx512 +.Lskip_avx512$suffix: +___ +$code.=<<___ if ($avx > 2 && $avx512 && $kernel); + cmp \$512,$len + jae .Lblocks_avx512 +___ +$code.=<<___ if (!$win64); + lea 8(%rsp),%r10 +.cfi_def_cfa_register %r10 + sub \$0x128,%rsp +___ +$code.=<<___ if ($win64); + lea 8(%rsp),%r10 + sub \$0x1c8,%rsp + vmovdqa %xmm6,-0xb0(%r10) + vmovdqa %xmm7,-0xa0(%r10) + vmovdqa %xmm8,-0x90(%r10) + vmovdqa %xmm9,-0x80(%r10) + vmovdqa %xmm10,-0x70(%r10) + vmovdqa %xmm11,-0x60(%r10) + vmovdqa %xmm12,-0x50(%r10) + vmovdqa %xmm13,-0x40(%r10) + vmovdqa %xmm14,-0x30(%r10) + vmovdqa %xmm15,-0x20(%r10) +.Ldo_avx2_body$suffix: +___ +$code.=<<___; + lea .Lconst(%rip),%rcx + lea 48+64($ctx),$ctx # size optimization + vmovdqa 96(%rcx),$T0 # .Lpermd_avx2 + + # expand and copy pre-calculated table to stack + vmovdqu `16*0-64`($ctx),%x#$T2 + and \$-512,%rsp + vmovdqu `16*1-64`($ctx),%x#$T3 + vmovdqu `16*2-64`($ctx),%x#$T4 + vmovdqu `16*3-64`($ctx),%x#$D0 + vmovdqu `16*4-64`($ctx),%x#$D1 + vmovdqu `16*5-64`($ctx),%x#$D2 + lea 0x90(%rsp),%rax # size optimization + vmovdqu `16*6-64`($ctx),%x#$D3 + vpermd $T2,$T0,$T2 # 00003412 -> 14243444 + vmovdqu `16*7-64`($ctx),%x#$D4 + vpermd $T3,$T0,$T3 + vmovdqu `16*8-64`($ctx),%x#$MASK + vpermd $T4,$T0,$T4 + vmovdqa $T2,0x00(%rsp) + vpermd $D0,$T0,$D0 + vmovdqa $T3,0x20-0x90(%rax) + vpermd $D1,$T0,$D1 + vmovdqa $T4,0x40-0x90(%rax) + vpermd $D2,$T0,$D2 + vmovdqa $D0,0x60-0x90(%rax) + vpermd $D3,$T0,$D3 + vmovdqa $D1,0x80-0x90(%rax) + vpermd $D4,$T0,$D4 + vmovdqa $D2,0xa0-0x90(%rax) + vpermd $MASK,$T0,$MASK + vmovdqa $D3,0xc0-0x90(%rax) + vmovdqa $D4,0xe0-0x90(%rax) + vmovdqa $MASK,0x100-0x90(%rax) + vmovdqa 64(%rcx),$MASK # .Lmask26 + + ################################################################ + # load input + vmovdqu 16*0($inp),%x#$T0 + vmovdqu 16*1($inp),%x#$T1 + vinserti128 \$1,16*2($inp),$T0,$T0 + vinserti128 \$1,16*3($inp),$T1,$T1 + lea 16*4($inp),$inp + + vpsrldq \$6,$T0,$T2 # splat input + vpsrldq \$6,$T1,$T3 + vpunpckhqdq $T1,$T0,$T4 # 4 + vpunpcklqdq $T3,$T2,$T2 # 2:3 + vpunpcklqdq $T1,$T0,$T0 # 0:1 + + vpsrlq \$30,$T2,$T3 + vpsrlq \$4,$T2,$T2 + vpsrlq \$26,$T0,$T1 + vpsrlq \$40,$T4,$T4 # 4 + vpand $MASK,$T2,$T2 # 2 + vpand $MASK,$T0,$T0 # 0 + vpand $MASK,$T1,$T1 # 1 + vpand $MASK,$T3,$T3 # 3 + vpor 32(%rcx),$T4,$T4 # padbit, yes, always + + vpaddq $H2,$T2,$H2 # accumulate input + sub \$64,$len + jz .Ltail_avx2$suffix + jmp .Loop_avx2$suffix + +.align 32 +.Loop_avx2$suffix: + ################################################################ + # ((inp[0]*r^4+inp[4])*r^4+inp[ 8])*r^4 + # ((inp[1]*r^4+inp[5])*r^4+inp[ 9])*r^3 + # ((inp[2]*r^4+inp[6])*r^4+inp[10])*r^2 + # ((inp[3]*r^4+inp[7])*r^4+inp[11])*r^1 + # \________/\__________/ + ################################################################ + #vpaddq $H2,$T2,$H2 # accumulate input + vpaddq $H0,$T0,$H0 + vmovdqa `32*0`(%rsp),$T0 # r0^4 + vpaddq $H1,$T1,$H1 + vmovdqa `32*1`(%rsp),$T1 # r1^4 + vpaddq $H3,$T3,$H3 + vmovdqa `32*3`(%rsp),$T2 # r2^4 + vpaddq $H4,$T4,$H4 + vmovdqa `32*6-0x90`(%rax),$T3 # s3^4 + vmovdqa `32*8-0x90`(%rax),$S4 # s4^4 + + # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + # + # however, as h2 is "chronologically" first one available pull + # corresponding operations up, so it's + # + # d4 = h2*r2 + h4*r0 + h3*r1 + h1*r3 + h0*r4 + # d3 = h2*r1 + h3*r0 + h1*r2 + h0*r3 + h4*5*r4 + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + # d1 = h2*5*r4 + h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + # d0 = h2*5*r3 + h0*r0 + h4*5*r1 + h3*5*r2 + h1*5*r4 + + vpmuludq $H2,$T0,$D2 # d2 = h2*r0 + vpmuludq $H2,$T1,$D3 # d3 = h2*r1 + vpmuludq $H2,$T2,$D4 # d4 = h2*r2 + vpmuludq $H2,$T3,$D0 # d0 = h2*s3 + vpmuludq $H2,$S4,$D1 # d1 = h2*s4 + + vpmuludq $H0,$T1,$T4 # h0*r1 + vpmuludq $H1,$T1,$H2 # h1*r1, borrow $H2 as temp + vpaddq $T4,$D1,$D1 # d1 += h0*r1 + vpaddq $H2,$D2,$D2 # d2 += h1*r1 + vpmuludq $H3,$T1,$T4 # h3*r1 + vpmuludq `32*2`(%rsp),$H4,$H2 # h4*s1 + vpaddq $T4,$D4,$D4 # d4 += h3*r1 + vpaddq $H2,$D0,$D0 # d0 += h4*s1 + vmovdqa `32*4-0x90`(%rax),$T1 # s2 + + vpmuludq $H0,$T0,$T4 # h0*r0 + vpmuludq $H1,$T0,$H2 # h1*r0 + vpaddq $T4,$D0,$D0 # d0 += h0*r0 + vpaddq $H2,$D1,$D1 # d1 += h1*r0 + vpmuludq $H3,$T0,$T4 # h3*r0 + vpmuludq $H4,$T0,$H2 # h4*r0 + vmovdqu 16*0($inp),%x#$T0 # load input + vpaddq $T4,$D3,$D3 # d3 += h3*r0 + vpaddq $H2,$D4,$D4 # d4 += h4*r0 + vinserti128 \$1,16*2($inp),$T0,$T0 + + vpmuludq $H3,$T1,$T4 # h3*s2 + vpmuludq $H4,$T1,$H2 # h4*s2 + vmovdqu 16*1($inp),%x#$T1 + vpaddq $T4,$D0,$D0 # d0 += h3*s2 + vpaddq $H2,$D1,$D1 # d1 += h4*s2 + vmovdqa `32*5-0x90`(%rax),$H2 # r3 + vpmuludq $H1,$T2,$T4 # h1*r2 + vpmuludq $H0,$T2,$T2 # h0*r2 + vpaddq $T4,$D3,$D3 # d3 += h1*r2 + vpaddq $T2,$D2,$D2 # d2 += h0*r2 + vinserti128 \$1,16*3($inp),$T1,$T1 + lea 16*4($inp),$inp + + vpmuludq $H1,$H2,$T4 # h1*r3 + vpmuludq $H0,$H2,$H2 # h0*r3 + vpsrldq \$6,$T0,$T2 # splat input + vpaddq $T4,$D4,$D4 # d4 += h1*r3 + vpaddq $H2,$D3,$D3 # d3 += h0*r3 + vpmuludq $H3,$T3,$T4 # h3*s3 + vpmuludq $H4,$T3,$H2 # h4*s3 + vpsrldq \$6,$T1,$T3 + vpaddq $T4,$D1,$D1 # d1 += h3*s3 + vpaddq $H2,$D2,$D2 # d2 += h4*s3 + vpunpckhqdq $T1,$T0,$T4 # 4 + + vpmuludq $H3,$S4,$H3 # h3*s4 + vpmuludq $H4,$S4,$H4 # h4*s4 + vpunpcklqdq $T1,$T0,$T0 # 0:1 + vpaddq $H3,$D2,$H2 # h2 = d2 + h3*r4 + vpaddq $H4,$D3,$H3 # h3 = d3 + h4*r4 + vpunpcklqdq $T3,$T2,$T3 # 2:3 + vpmuludq `32*7-0x90`(%rax),$H0,$H4 # h0*r4 + vpmuludq $H1,$S4,$H0 # h1*s4 + vmovdqa 64(%rcx),$MASK # .Lmask26 + vpaddq $H4,$D4,$H4 # h4 = d4 + h0*r4 + vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4 + + ################################################################ + # lazy reduction (interleaved with tail of input splat) + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpaddq $D0,$D1,$H1 # h0 -> h1 + + vpsrlq \$26,$H4,$D4 + vpand $MASK,$H4,$H4 + + vpsrlq \$4,$T3,$T2 + + vpsrlq \$26,$H1,$D1 + vpand $MASK,$H1,$H1 + vpaddq $D1,$H2,$H2 # h1 -> h2 + + vpaddq $D4,$H0,$H0 + vpsllq \$2,$D4,$D4 + vpaddq $D4,$H0,$H0 # h4 -> h0 + + vpand $MASK,$T2,$T2 # 2 + vpsrlq \$26,$T0,$T1 + + vpsrlq \$26,$H2,$D2 + vpand $MASK,$H2,$H2 + vpaddq $D2,$H3,$H3 # h2 -> h3 + + vpaddq $T2,$H2,$H2 # modulo-scheduled + vpsrlq \$30,$T3,$T3 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpaddq $D0,$H1,$H1 # h0 -> h1 + + vpsrlq \$40,$T4,$T4 # 4 + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + vpand $MASK,$T0,$T0 # 0 + vpand $MASK,$T1,$T1 # 1 + vpand $MASK,$T3,$T3 # 3 + vpor 32(%rcx),$T4,$T4 # padbit, yes, always + + sub \$64,$len + jnz .Loop_avx2$suffix + + .byte 0x66,0x90 +.Ltail_avx2$suffix: + ################################################################ + # while above multiplications were by r^4 in all lanes, in last + # iteration we multiply least significant lane by r^4 and most + # significant one by r, so copy of above except that references + # to the precomputed table are displaced by 4... + + #vpaddq $H2,$T2,$H2 # accumulate input + vpaddq $H0,$T0,$H0 + vmovdqu `32*0+4`(%rsp),$T0 # r0^4 + vpaddq $H1,$T1,$H1 + vmovdqu `32*1+4`(%rsp),$T1 # r1^4 + vpaddq $H3,$T3,$H3 + vmovdqu `32*3+4`(%rsp),$T2 # r2^4 + vpaddq $H4,$T4,$H4 + vmovdqu `32*6+4-0x90`(%rax),$T3 # s3^4 + vmovdqu `32*8+4-0x90`(%rax),$S4 # s4^4 + + vpmuludq $H2,$T0,$D2 # d2 = h2*r0 + vpmuludq $H2,$T1,$D3 # d3 = h2*r1 + vpmuludq $H2,$T2,$D4 # d4 = h2*r2 + vpmuludq $H2,$T3,$D0 # d0 = h2*s3 + vpmuludq $H2,$S4,$D1 # d1 = h2*s4 + + vpmuludq $H0,$T1,$T4 # h0*r1 + vpmuludq $H1,$T1,$H2 # h1*r1 + vpaddq $T4,$D1,$D1 # d1 += h0*r1 + vpaddq $H2,$D2,$D2 # d2 += h1*r1 + vpmuludq $H3,$T1,$T4 # h3*r1 + vpmuludq `32*2+4`(%rsp),$H4,$H2 # h4*s1 + vpaddq $T4,$D4,$D4 # d4 += h3*r1 + vpaddq $H2,$D0,$D0 # d0 += h4*s1 + + vpmuludq $H0,$T0,$T4 # h0*r0 + vpmuludq $H1,$T0,$H2 # h1*r0 + vpaddq $T4,$D0,$D0 # d0 += h0*r0 + vmovdqu `32*4+4-0x90`(%rax),$T1 # s2 + vpaddq $H2,$D1,$D1 # d1 += h1*r0 + vpmuludq $H3,$T0,$T4 # h3*r0 + vpmuludq $H4,$T0,$H2 # h4*r0 + vpaddq $T4,$D3,$D3 # d3 += h3*r0 + vpaddq $H2,$D4,$D4 # d4 += h4*r0 + + vpmuludq $H3,$T1,$T4 # h3*s2 + vpmuludq $H4,$T1,$H2 # h4*s2 + vpaddq $T4,$D0,$D0 # d0 += h3*s2 + vpaddq $H2,$D1,$D1 # d1 += h4*s2 + vmovdqu `32*5+4-0x90`(%rax),$H2 # r3 + vpmuludq $H1,$T2,$T4 # h1*r2 + vpmuludq $H0,$T2,$T2 # h0*r2 + vpaddq $T4,$D3,$D3 # d3 += h1*r2 + vpaddq $T2,$D2,$D2 # d2 += h0*r2 + + vpmuludq $H1,$H2,$T4 # h1*r3 + vpmuludq $H0,$H2,$H2 # h0*r3 + vpaddq $T4,$D4,$D4 # d4 += h1*r3 + vpaddq $H2,$D3,$D3 # d3 += h0*r3 + vpmuludq $H3,$T3,$T4 # h3*s3 + vpmuludq $H4,$T3,$H2 # h4*s3 + vpaddq $T4,$D1,$D1 # d1 += h3*s3 + vpaddq $H2,$D2,$D2 # d2 += h4*s3 + + vpmuludq $H3,$S4,$H3 # h3*s4 + vpmuludq $H4,$S4,$H4 # h4*s4 + vpaddq $H3,$D2,$H2 # h2 = d2 + h3*r4 + vpaddq $H4,$D3,$H3 # h3 = d3 + h4*r4 + vpmuludq `32*7+4-0x90`(%rax),$H0,$H4 # h0*r4 + vpmuludq $H1,$S4,$H0 # h1*s4 + vmovdqa 64(%rcx),$MASK # .Lmask26 + vpaddq $H4,$D4,$H4 # h4 = d4 + h0*r4 + vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4 + + ################################################################ + # horizontal addition + + vpsrldq \$8,$D1,$T1 + vpsrldq \$8,$H2,$T2 + vpsrldq \$8,$H3,$T3 + vpsrldq \$8,$H4,$T4 + vpsrldq \$8,$H0,$T0 + vpaddq $T1,$D1,$D1 + vpaddq $T2,$H2,$H2 + vpaddq $T3,$H3,$H3 + vpaddq $T4,$H4,$H4 + vpaddq $T0,$H0,$H0 + + vpermq \$0x2,$H3,$T3 + vpermq \$0x2,$H4,$T4 + vpermq \$0x2,$H0,$T0 + vpermq \$0x2,$D1,$T1 + vpermq \$0x2,$H2,$T2 + vpaddq $T3,$H3,$H3 + vpaddq $T4,$H4,$H4 + vpaddq $T0,$H0,$H0 + vpaddq $T1,$D1,$D1 + vpaddq $T2,$H2,$H2 + + ################################################################ + # lazy reduction + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpaddq $D0,$D1,$H1 # h0 -> h1 + + vpsrlq \$26,$H4,$D4 + vpand $MASK,$H4,$H4 + + vpsrlq \$26,$H1,$D1 + vpand $MASK,$H1,$H1 + vpaddq $D1,$H2,$H2 # h1 -> h2 + + vpaddq $D4,$H0,$H0 + vpsllq \$2,$D4,$D4 + vpaddq $D4,$H0,$H0 # h4 -> h0 + + vpsrlq \$26,$H2,$D2 + vpand $MASK,$H2,$H2 + vpaddq $D2,$H3,$H3 # h2 -> h3 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpaddq $D0,$H1,$H1 # h0 -> h1 + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + vmovd %x#$H0,`4*0-48-64`($ctx)# save partially reduced + vmovd %x#$H1,`4*1-48-64`($ctx) + vmovd %x#$H2,`4*2-48-64`($ctx) + vmovd %x#$H3,`4*3-48-64`($ctx) + vmovd %x#$H4,`4*4-48-64`($ctx) +___ +$code.=<<___ if ($win64); + vmovdqa -0xb0(%r10),%xmm6 + vmovdqa -0xa0(%r10),%xmm7 + vmovdqa -0x90(%r10),%xmm8 + vmovdqa -0x80(%r10),%xmm9 + vmovdqa -0x70(%r10),%xmm10 + vmovdqa -0x60(%r10),%xmm11 + vmovdqa -0x50(%r10),%xmm12 + vmovdqa -0x40(%r10),%xmm13 + vmovdqa -0x30(%r10),%xmm14 + vmovdqa -0x20(%r10),%xmm15 + lea -8(%r10),%rsp +.Ldo_avx2_epilogue$suffix: +___ +$code.=<<___ if (!$win64); + lea -8(%r10),%rsp +.cfi_def_cfa_register %rsp +___ +$code.=<<___; + vzeroupper + ret +.cfi_endproc +___ +if($avx > 2 && $avx512) { +my ($R0,$R1,$R2,$R3,$R4, $S1,$S2,$S3,$S4) = map("%zmm$_",(16..24)); +my ($M0,$M1,$M2,$M3,$M4) = map("%zmm$_",(25..29)); +my $PADBIT="%zmm30"; + +map(s/%y/%z/,($T4,$T0,$T1,$T2,$T3)); # switch to %zmm domain +map(s/%y/%z/,($D0,$D1,$D2,$D3,$D4)); +map(s/%y/%z/,($H0,$H1,$H2,$H3,$H4)); +map(s/%y/%z/,($MASK)); + +$code.=<<___; +.cfi_startproc +.Lblocks_avx512: + mov \$15,%eax + kmovw %eax,%k2 +___ +$code.=<<___ if (!$win64); + lea 8(%rsp),%r10 +.cfi_def_cfa_register %r10 + sub \$0x128,%rsp +___ +$code.=<<___ if ($win64); + lea 8(%rsp),%r10 + sub \$0x1c8,%rsp + vmovdqa %xmm6,-0xb0(%r10) + vmovdqa %xmm7,-0xa0(%r10) + vmovdqa %xmm8,-0x90(%r10) + vmovdqa %xmm9,-0x80(%r10) + vmovdqa %xmm10,-0x70(%r10) + vmovdqa %xmm11,-0x60(%r10) + vmovdqa %xmm12,-0x50(%r10) + vmovdqa %xmm13,-0x40(%r10) + vmovdqa %xmm14,-0x30(%r10) + vmovdqa %xmm15,-0x20(%r10) +.Ldo_avx512_body: +___ +$code.=<<___; + lea .Lconst(%rip),%rcx + lea 48+64($ctx),$ctx # size optimization + vmovdqa 96(%rcx),%y#$T2 # .Lpermd_avx2 + + # expand pre-calculated table + vmovdqu `16*0-64`($ctx),%x#$D0 # will become expanded ${R0} + and \$-512,%rsp + vmovdqu `16*1-64`($ctx),%x#$D1 # will become ... ${R1} + mov \$0x20,%rax + vmovdqu `16*2-64`($ctx),%x#$T0 # ... ${S1} + vmovdqu `16*3-64`($ctx),%x#$D2 # ... ${R2} + vmovdqu `16*4-64`($ctx),%x#$T1 # ... ${S2} + vmovdqu `16*5-64`($ctx),%x#$D3 # ... ${R3} + vmovdqu `16*6-64`($ctx),%x#$T3 # ... ${S3} + vmovdqu `16*7-64`($ctx),%x#$D4 # ... ${R4} + vmovdqu `16*8-64`($ctx),%x#$T4 # ... ${S4} + vpermd $D0,$T2,$R0 # 00003412 -> 14243444 + vpbroadcastq 64(%rcx),$MASK # .Lmask26 + vpermd $D1,$T2,$R1 + vpermd $T0,$T2,$S1 + vpermd $D2,$T2,$R2 + vmovdqa64 $R0,0x00(%rsp){%k2} # save in case $len%128 != 0 + vpsrlq \$32,$R0,$T0 # 14243444 -> 01020304 + vpermd $T1,$T2,$S2 + vmovdqu64 $R1,0x00(%rsp,%rax){%k2} + vpsrlq \$32,$R1,$T1 + vpermd $D3,$T2,$R3 + vmovdqa64 $S1,0x40(%rsp){%k2} + vpermd $T3,$T2,$S3 + vpermd $D4,$T2,$R4 + vmovdqu64 $R2,0x40(%rsp,%rax){%k2} + vpermd $T4,$T2,$S4 + vmovdqa64 $S2,0x80(%rsp){%k2} + vmovdqu64 $R3,0x80(%rsp,%rax){%k2} + vmovdqa64 $S3,0xc0(%rsp){%k2} + vmovdqu64 $R4,0xc0(%rsp,%rax){%k2} + vmovdqa64 $S4,0x100(%rsp){%k2} + + ################################################################ + # calculate 5th through 8th powers of the key + # + # d0 = r0'*r0 + r1'*5*r4 + r2'*5*r3 + r3'*5*r2 + r4'*5*r1 + # d1 = r0'*r1 + r1'*r0 + r2'*5*r4 + r3'*5*r3 + r4'*5*r2 + # d2 = r0'*r2 + r1'*r1 + r2'*r0 + r3'*5*r4 + r4'*5*r3 + # d3 = r0'*r3 + r1'*r2 + r2'*r1 + r3'*r0 + r4'*5*r4 + # d4 = r0'*r4 + r1'*r3 + r2'*r2 + r3'*r1 + r4'*r0 + + vpmuludq $T0,$R0,$D0 # d0 = r0'*r0 + vpmuludq $T0,$R1,$D1 # d1 = r0'*r1 + vpmuludq $T0,$R2,$D2 # d2 = r0'*r2 + vpmuludq $T0,$R3,$D3 # d3 = r0'*r3 + vpmuludq $T0,$R4,$D4 # d4 = r0'*r4 + vpsrlq \$32,$R2,$T2 + + vpmuludq $T1,$S4,$M0 + vpmuludq $T1,$R0,$M1 + vpmuludq $T1,$R1,$M2 + vpmuludq $T1,$R2,$M3 + vpmuludq $T1,$R3,$M4 + vpsrlq \$32,$R3,$T3 + vpaddq $M0,$D0,$D0 # d0 += r1'*5*r4 + vpaddq $M1,$D1,$D1 # d1 += r1'*r0 + vpaddq $M2,$D2,$D2 # d2 += r1'*r1 + vpaddq $M3,$D3,$D3 # d3 += r1'*r2 + vpaddq $M4,$D4,$D4 # d4 += r1'*r3 + + vpmuludq $T2,$S3,$M0 + vpmuludq $T2,$S4,$M1 + vpmuludq $T2,$R1,$M3 + vpmuludq $T2,$R2,$M4 + vpmuludq $T2,$R0,$M2 + vpsrlq \$32,$R4,$T4 + vpaddq $M0,$D0,$D0 # d0 += r2'*5*r3 + vpaddq $M1,$D1,$D1 # d1 += r2'*5*r4 + vpaddq $M3,$D3,$D3 # d3 += r2'*r1 + vpaddq $M4,$D4,$D4 # d4 += r2'*r2 + vpaddq $M2,$D2,$D2 # d2 += r2'*r0 + + vpmuludq $T3,$S2,$M0 + vpmuludq $T3,$R0,$M3 + vpmuludq $T3,$R1,$M4 + vpmuludq $T3,$S3,$M1 + vpmuludq $T3,$S4,$M2 + vpaddq $M0,$D0,$D0 # d0 += r3'*5*r2 + vpaddq $M3,$D3,$D3 # d3 += r3'*r0 + vpaddq $M4,$D4,$D4 # d4 += r3'*r1 + vpaddq $M1,$D1,$D1 # d1 += r3'*5*r3 + vpaddq $M2,$D2,$D2 # d2 += r3'*5*r4 + + vpmuludq $T4,$S4,$M3 + vpmuludq $T4,$R0,$M4 + vpmuludq $T4,$S1,$M0 + vpmuludq $T4,$S2,$M1 + vpmuludq $T4,$S3,$M2 + vpaddq $M3,$D3,$D3 # d3 += r2'*5*r4 + vpaddq $M4,$D4,$D4 # d4 += r2'*r0 + vpaddq $M0,$D0,$D0 # d0 += r2'*5*r1 + vpaddq $M1,$D1,$D1 # d1 += r2'*5*r2 + vpaddq $M2,$D2,$D2 # d2 += r2'*5*r3 + + ################################################################ + # load input + vmovdqu64 16*0($inp),%z#$T3 + vmovdqu64 16*4($inp),%z#$T4 + lea 16*8($inp),$inp + + ################################################################ + # lazy reduction + + vpsrlq \$26,$D3,$M3 + vpandq $MASK,$D3,$D3 + vpaddq $M3,$D4,$D4 # d3 -> d4 + + vpsrlq \$26,$D0,$M0 + vpandq $MASK,$D0,$D0 + vpaddq $M0,$D1,$D1 # d0 -> d1 + + vpsrlq \$26,$D4,$M4 + vpandq $MASK,$D4,$D4 + + vpsrlq \$26,$D1,$M1 + vpandq $MASK,$D1,$D1 + vpaddq $M1,$D2,$D2 # d1 -> d2 + + vpaddq $M4,$D0,$D0 + vpsllq \$2,$M4,$M4 + vpaddq $M4,$D0,$D0 # d4 -> d0 + + vpsrlq \$26,$D2,$M2 + vpandq $MASK,$D2,$D2 + vpaddq $M2,$D3,$D3 # d2 -> d3 + + vpsrlq \$26,$D0,$M0 + vpandq $MASK,$D0,$D0 + vpaddq $M0,$D1,$D1 # d0 -> d1 + + vpsrlq \$26,$D3,$M3 + vpandq $MASK,$D3,$D3 + vpaddq $M3,$D4,$D4 # d3 -> d4 + + ################################################################ + # at this point we have 14243444 in $R0-$S4 and 05060708 in + # $D0-$D4, ... + + vpunpcklqdq $T4,$T3,$T0 # transpose input + vpunpckhqdq $T4,$T3,$T4 + + # ... since input 64-bit lanes are ordered as 73625140, we could + # "vperm" it to 76543210 (here and in each loop iteration), *or* + # we could just flow along, hence the goal for $R0-$S4 is + # 1858286838784888 ... + + vmovdqa32 128(%rcx),$M0 # .Lpermd_avx512: + mov \$0x7777,%eax + kmovw %eax,%k1 + + vpermd $R0,$M0,$R0 # 14243444 -> 1---2---3---4--- + vpermd $R1,$M0,$R1 + vpermd $R2,$M0,$R2 + vpermd $R3,$M0,$R3 + vpermd $R4,$M0,$R4 + + vpermd $D0,$M0,${R0}{%k1} # 05060708 -> 1858286838784888 + vpermd $D1,$M0,${R1}{%k1} + vpermd $D2,$M0,${R2}{%k1} + vpermd $D3,$M0,${R3}{%k1} + vpermd $D4,$M0,${R4}{%k1} + + vpslld \$2,$R1,$S1 # *5 + vpslld \$2,$R2,$S2 + vpslld \$2,$R3,$S3 + vpslld \$2,$R4,$S4 + vpaddd $R1,$S1,$S1 + vpaddd $R2,$S2,$S2 + vpaddd $R3,$S3,$S3 + vpaddd $R4,$S4,$S4 + + vpbroadcastq 32(%rcx),$PADBIT # .L129 + + vpsrlq \$52,$T0,$T2 # splat input + vpsllq \$12,$T4,$T3 + vporq $T3,$T2,$T2 + vpsrlq \$26,$T0,$T1 + vpsrlq \$14,$T4,$T3 + vpsrlq \$40,$T4,$T4 # 4 + vpandq $MASK,$T2,$T2 # 2 + vpandq $MASK,$T0,$T0 # 0 + #vpandq $MASK,$T1,$T1 # 1 + #vpandq $MASK,$T3,$T3 # 3 + #vporq $PADBIT,$T4,$T4 # padbit, yes, always + + vpaddq $H2,$T2,$H2 # accumulate input + sub \$192,$len + jbe .Ltail_avx512 + jmp .Loop_avx512 + +.align 32 +.Loop_avx512: + ################################################################ + # ((inp[0]*r^8+inp[ 8])*r^8+inp[16])*r^8 + # ((inp[1]*r^8+inp[ 9])*r^8+inp[17])*r^7 + # ((inp[2]*r^8+inp[10])*r^8+inp[18])*r^6 + # ((inp[3]*r^8+inp[11])*r^8+inp[19])*r^5 + # ((inp[4]*r^8+inp[12])*r^8+inp[20])*r^4 + # ((inp[5]*r^8+inp[13])*r^8+inp[21])*r^3 + # ((inp[6]*r^8+inp[14])*r^8+inp[22])*r^2 + # ((inp[7]*r^8+inp[15])*r^8+inp[23])*r^1 + # \________/\___________/ + ################################################################ + #vpaddq $H2,$T2,$H2 # accumulate input + + # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + # + # however, as h2 is "chronologically" first one available pull + # corresponding operations up, so it's + # + # d3 = h2*r1 + h0*r3 + h1*r2 + h3*r0 + h4*5*r4 + # d4 = h2*r2 + h0*r4 + h1*r3 + h3*r1 + h4*r0 + # d0 = h2*5*r3 + h0*r0 + h1*5*r4 + h3*5*r2 + h4*5*r1 + # d1 = h2*5*r4 + h0*r1 + h1*r0 + h3*5*r3 + h4*5*r2 + # d2 = h2*r0 + h0*r2 + h1*r1 + h3*5*r4 + h4*5*r3 + + vpmuludq $H2,$R1,$D3 # d3 = h2*r1 + vpaddq $H0,$T0,$H0 + vpmuludq $H2,$R2,$D4 # d4 = h2*r2 + vpandq $MASK,$T1,$T1 # 1 + vpmuludq $H2,$S3,$D0 # d0 = h2*s3 + vpandq $MASK,$T3,$T3 # 3 + vpmuludq $H2,$S4,$D1 # d1 = h2*s4 + vporq $PADBIT,$T4,$T4 # padbit, yes, always + vpmuludq $H2,$R0,$D2 # d2 = h2*r0 + vpaddq $H1,$T1,$H1 # accumulate input + vpaddq $H3,$T3,$H3 + vpaddq $H4,$T4,$H4 + + vmovdqu64 16*0($inp),$T3 # load input + vmovdqu64 16*4($inp),$T4 + lea 16*8($inp),$inp + vpmuludq $H0,$R3,$M3 + vpmuludq $H0,$R4,$M4 + vpmuludq $H0,$R0,$M0 + vpmuludq $H0,$R1,$M1 + vpaddq $M3,$D3,$D3 # d3 += h0*r3 + vpaddq $M4,$D4,$D4 # d4 += h0*r4 + vpaddq $M0,$D0,$D0 # d0 += h0*r0 + vpaddq $M1,$D1,$D1 # d1 += h0*r1 + + vpmuludq $H1,$R2,$M3 + vpmuludq $H1,$R3,$M4 + vpmuludq $H1,$S4,$M0 + vpmuludq $H0,$R2,$M2 + vpaddq $M3,$D3,$D3 # d3 += h1*r2 + vpaddq $M4,$D4,$D4 # d4 += h1*r3 + vpaddq $M0,$D0,$D0 # d0 += h1*s4 + vpaddq $M2,$D2,$D2 # d2 += h0*r2 + + vpunpcklqdq $T4,$T3,$T0 # transpose input + vpunpckhqdq $T4,$T3,$T4 + + vpmuludq $H3,$R0,$M3 + vpmuludq $H3,$R1,$M4 + vpmuludq $H1,$R0,$M1 + vpmuludq $H1,$R1,$M2 + vpaddq $M3,$D3,$D3 # d3 += h3*r0 + vpaddq $M4,$D4,$D4 # d4 += h3*r1 + vpaddq $M1,$D1,$D1 # d1 += h1*r0 + vpaddq $M2,$D2,$D2 # d2 += h1*r1 + + vpmuludq $H4,$S4,$M3 + vpmuludq $H4,$R0,$M4 + vpmuludq $H3,$S2,$M0 + vpmuludq $H3,$S3,$M1 + vpaddq $M3,$D3,$D3 # d3 += h4*s4 + vpmuludq $H3,$S4,$M2 + vpaddq $M4,$D4,$D4 # d4 += h4*r0 + vpaddq $M0,$D0,$D0 # d0 += h3*s2 + vpaddq $M1,$D1,$D1 # d1 += h3*s3 + vpaddq $M2,$D2,$D2 # d2 += h3*s4 + + vpmuludq $H4,$S1,$M0 + vpmuludq $H4,$S2,$M1 + vpmuludq $H4,$S3,$M2 + vpaddq $M0,$D0,$H0 # h0 = d0 + h4*s1 + vpaddq $M1,$D1,$H1 # h1 = d2 + h4*s2 + vpaddq $M2,$D2,$H2 # h2 = d3 + h4*s3 + + ################################################################ + # lazy reduction (interleaved with input splat) + + vpsrlq \$52,$T0,$T2 # splat input + vpsllq \$12,$T4,$T3 + + vpsrlq \$26,$D3,$H3 + vpandq $MASK,$D3,$D3 + vpaddq $H3,$D4,$H4 # h3 -> h4 + + vporq $T3,$T2,$T2 + + vpsrlq \$26,$H0,$D0 + vpandq $MASK,$H0,$H0 + vpaddq $D0,$H1,$H1 # h0 -> h1 + + vpandq $MASK,$T2,$T2 # 2 + + vpsrlq \$26,$H4,$D4 + vpandq $MASK,$H4,$H4 + + vpsrlq \$26,$H1,$D1 + vpandq $MASK,$H1,$H1 + vpaddq $D1,$H2,$H2 # h1 -> h2 + + vpaddq $D4,$H0,$H0 + vpsllq \$2,$D4,$D4 + vpaddq $D4,$H0,$H0 # h4 -> h0 + + vpaddq $T2,$H2,$H2 # modulo-scheduled + vpsrlq \$26,$T0,$T1 + + vpsrlq \$26,$H2,$D2 + vpandq $MASK,$H2,$H2 + vpaddq $D2,$D3,$H3 # h2 -> h3 + + vpsrlq \$14,$T4,$T3 + + vpsrlq \$26,$H0,$D0 + vpandq $MASK,$H0,$H0 + vpaddq $D0,$H1,$H1 # h0 -> h1 + + vpsrlq \$40,$T4,$T4 # 4 + + vpsrlq \$26,$H3,$D3 + vpandq $MASK,$H3,$H3 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + vpandq $MASK,$T0,$T0 # 0 + #vpandq $MASK,$T1,$T1 # 1 + #vpandq $MASK,$T3,$T3 # 3 + #vporq $PADBIT,$T4,$T4 # padbit, yes, always + + sub \$128,$len + ja .Loop_avx512 + +.Ltail_avx512: + ################################################################ + # while above multiplications were by r^8 in all lanes, in last + # iteration we multiply least significant lane by r^8 and most + # significant one by r, that's why table gets shifted... + + vpsrlq \$32,$R0,$R0 # 0105020603070408 + vpsrlq \$32,$R1,$R1 + vpsrlq \$32,$R2,$R2 + vpsrlq \$32,$S3,$S3 + vpsrlq \$32,$S4,$S4 + vpsrlq \$32,$R3,$R3 + vpsrlq \$32,$R4,$R4 + vpsrlq \$32,$S1,$S1 + vpsrlq \$32,$S2,$S2 + + ################################################################ + # load either next or last 64 byte of input + lea ($inp,$len),$inp + + #vpaddq $H2,$T2,$H2 # accumulate input + vpaddq $H0,$T0,$H0 + + vpmuludq $H2,$R1,$D3 # d3 = h2*r1 + vpmuludq $H2,$R2,$D4 # d4 = h2*r2 + vpmuludq $H2,$S3,$D0 # d0 = h2*s3 + vpandq $MASK,$T1,$T1 # 1 + vpmuludq $H2,$S4,$D1 # d1 = h2*s4 + vpandq $MASK,$T3,$T3 # 3 + vpmuludq $H2,$R0,$D2 # d2 = h2*r0 + vporq $PADBIT,$T4,$T4 # padbit, yes, always + vpaddq $H1,$T1,$H1 # accumulate input + vpaddq $H3,$T3,$H3 + vpaddq $H4,$T4,$H4 + + vmovdqu 16*0($inp),%x#$T0 + vpmuludq $H0,$R3,$M3 + vpmuludq $H0,$R4,$M4 + vpmuludq $H0,$R0,$M0 + vpmuludq $H0,$R1,$M1 + vpaddq $M3,$D3,$D3 # d3 += h0*r3 + vpaddq $M4,$D4,$D4 # d4 += h0*r4 + vpaddq $M0,$D0,$D0 # d0 += h0*r0 + vpaddq $M1,$D1,$D1 # d1 += h0*r1 + + vmovdqu 16*1($inp),%x#$T1 + vpmuludq $H1,$R2,$M3 + vpmuludq $H1,$R3,$M4 + vpmuludq $H1,$S4,$M0 + vpmuludq $H0,$R2,$M2 + vpaddq $M3,$D3,$D3 # d3 += h1*r2 + vpaddq $M4,$D4,$D4 # d4 += h1*r3 + vpaddq $M0,$D0,$D0 # d0 += h1*s4 + vpaddq $M2,$D2,$D2 # d2 += h0*r2 + + vinserti128 \$1,16*2($inp),%y#$T0,%y#$T0 + vpmuludq $H3,$R0,$M3 + vpmuludq $H3,$R1,$M4 + vpmuludq $H1,$R0,$M1 + vpmuludq $H1,$R1,$M2 + vpaddq $M3,$D3,$D3 # d3 += h3*r0 + vpaddq $M4,$D4,$D4 # d4 += h3*r1 + vpaddq $M1,$D1,$D1 # d1 += h1*r0 + vpaddq $M2,$D2,$D2 # d2 += h1*r1 + + vinserti128 \$1,16*3($inp),%y#$T1,%y#$T1 + vpmuludq $H4,$S4,$M3 + vpmuludq $H4,$R0,$M4 + vpmuludq $H3,$S2,$M0 + vpmuludq $H3,$S3,$M1 + vpmuludq $H3,$S4,$M2 + vpaddq $M3,$D3,$H3 # h3 = d3 + h4*s4 + vpaddq $M4,$D4,$D4 # d4 += h4*r0 + vpaddq $M0,$D0,$D0 # d0 += h3*s2 + vpaddq $M1,$D1,$D1 # d1 += h3*s3 + vpaddq $M2,$D2,$D2 # d2 += h3*s4 + + vpmuludq $H4,$S1,$M0 + vpmuludq $H4,$S2,$M1 + vpmuludq $H4,$S3,$M2 + vpaddq $M0,$D0,$H0 # h0 = d0 + h4*s1 + vpaddq $M1,$D1,$H1 # h1 = d2 + h4*s2 + vpaddq $M2,$D2,$H2 # h2 = d3 + h4*s3 + + ################################################################ + # horizontal addition + + mov \$1,%eax + vpermq \$0xb1,$H3,$D3 + vpermq \$0xb1,$D4,$H4 + vpermq \$0xb1,$H0,$D0 + vpermq \$0xb1,$H1,$D1 + vpermq \$0xb1,$H2,$D2 + vpaddq $D3,$H3,$H3 + vpaddq $D4,$H4,$H4 + vpaddq $D0,$H0,$H0 + vpaddq $D1,$H1,$H1 + vpaddq $D2,$H2,$H2 + + kmovw %eax,%k3 + vpermq \$0x2,$H3,$D3 + vpermq \$0x2,$H4,$D4 + vpermq \$0x2,$H0,$D0 + vpermq \$0x2,$H1,$D1 + vpermq \$0x2,$H2,$D2 + vpaddq $D3,$H3,$H3 + vpaddq $D4,$H4,$H4 + vpaddq $D0,$H0,$H0 + vpaddq $D1,$H1,$H1 + vpaddq $D2,$H2,$H2 + + vextracti64x4 \$0x1,$H3,%y#$D3 + vextracti64x4 \$0x1,$H4,%y#$D4 + vextracti64x4 \$0x1,$H0,%y#$D0 + vextracti64x4 \$0x1,$H1,%y#$D1 + vextracti64x4 \$0x1,$H2,%y#$D2 + vpaddq $D3,$H3,${H3}{%k3}{z} # keep single qword in case + vpaddq $D4,$H4,${H4}{%k3}{z} # it's passed to .Ltail_avx2 + vpaddq $D0,$H0,${H0}{%k3}{z} + vpaddq $D1,$H1,${H1}{%k3}{z} + vpaddq $D2,$H2,${H2}{%k3}{z} +___ +map(s/%z/%y/,($T0,$T1,$T2,$T3,$T4, $PADBIT)); +map(s/%z/%y/,($H0,$H1,$H2,$H3,$H4, $D0,$D1,$D2,$D3,$D4, $MASK)); +$code.=<<___; + ################################################################ + # lazy reduction (interleaved with input splat) + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpsrldq \$6,$T0,$T2 # splat input + vpsrldq \$6,$T1,$T3 + vpunpckhqdq $T1,$T0,$T4 # 4 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpunpcklqdq $T3,$T2,$T2 # 2:3 + vpunpcklqdq $T1,$T0,$T0 # 0:1 + vpaddq $D0,$H1,$H1 # h0 -> h1 + + vpsrlq \$26,$H4,$D4 + vpand $MASK,$H4,$H4 + + vpsrlq \$26,$H1,$D1 + vpand $MASK,$H1,$H1 + vpsrlq \$30,$T2,$T3 + vpsrlq \$4,$T2,$T2 + vpaddq $D1,$H2,$H2 # h1 -> h2 + + vpaddq $D4,$H0,$H0 + vpsllq \$2,$D4,$D4 + vpsrlq \$26,$T0,$T1 + vpsrlq \$40,$T4,$T4 # 4 + vpaddq $D4,$H0,$H0 # h4 -> h0 + + vpsrlq \$26,$H2,$D2 + vpand $MASK,$H2,$H2 + vpand $MASK,$T2,$T2 # 2 + vpand $MASK,$T0,$T0 # 0 + vpaddq $D2,$H3,$H3 # h2 -> h3 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpaddq $H2,$T2,$H2 # accumulate input for .Ltail_avx2 + vpand $MASK,$T1,$T1 # 1 + vpaddq $D0,$H1,$H1 # h0 -> h1 + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpand $MASK,$T3,$T3 # 3 + vpor 32(%rcx),$T4,$T4 # padbit, yes, always + vpaddq $D3,$H4,$H4 # h3 -> h4 + + lea 0x90(%rsp),%rax # size optimization for .Ltail_avx2 + add \$64,$len + jnz .Ltail_avx2$suffix + + vpsubq $T2,$H2,$H2 # undo input accumulation + vmovd %x#$H0,`4*0-48-64`($ctx)# save partially reduced + vmovd %x#$H1,`4*1-48-64`($ctx) + vmovd %x#$H2,`4*2-48-64`($ctx) + vmovd %x#$H3,`4*3-48-64`($ctx) + vmovd %x#$H4,`4*4-48-64`($ctx) + vzeroall +___ +$code.=<<___ if ($win64); + movdqa -0xb0(%r10),%xmm6 + movdqa -0xa0(%r10),%xmm7 + movdqa -0x90(%r10),%xmm8 + movdqa -0x80(%r10),%xmm9 + movdqa -0x70(%r10),%xmm10 + movdqa -0x60(%r10),%xmm11 + movdqa -0x50(%r10),%xmm12 + movdqa -0x40(%r10),%xmm13 + movdqa -0x30(%r10),%xmm14 + movdqa -0x20(%r10),%xmm15 + lea -8(%r10),%rsp +.Ldo_avx512_epilogue: +___ +$code.=<<___ if (!$win64); + lea -8(%r10),%rsp +.cfi_def_cfa_register %rsp +___ +$code.=<<___; + ret +.cfi_endproc +___ + +} + +} + +&declare_function("poly1305_blocks_avx2", 32, 4); +poly1305_blocks_avxN(0); +&end_function("poly1305_blocks_avx2"); + +if($kernel) { + $code .= "#endif\n"; +} + +####################################################################### +if ($avx>2) { +# On entry we have input length divisible by 64. But since inner loop +# processes 128 bytes per iteration, cases when length is not divisible +# by 128 are handled by passing tail 64 bytes to .Ltail_avx2. For this +# reason stack layout is kept identical to poly1305_blocks_avx2. If not +# for this tail, we wouldn't have to even allocate stack frame... + +if($kernel) { + $code .= "#ifdef CONFIG_AS_AVX512\n"; +} + +&declare_function("poly1305_blocks_avx512", 32, 4); +poly1305_blocks_avxN(1); +&end_function("poly1305_blocks_avx512"); + +if ($kernel) { + $code .= "#endif\n"; +} + +if (!$kernel && $avx>3) { +######################################################################## +# VPMADD52 version using 2^44 radix. +# +# One can argue that base 2^52 would be more natural. Well, even though +# some operations would be more natural, one has to recognize couple of +# things. Base 2^52 doesn't provide advantage over base 2^44 if you look +# at amount of multiply-n-accumulate operations. Secondly, it makes it +# impossible to pre-compute multiples of 5 [referred to as s[]/sN in +# reference implementations], which means that more such operations +# would have to be performed in inner loop, which in turn makes critical +# path longer. In other words, even though base 2^44 reduction might +# look less elegant, overall critical path is actually shorter... + +######################################################################## +# Layout of opaque area is following. +# +# unsigned __int64 h[3]; # current hash value base 2^44 +# unsigned __int64 s[2]; # key value*20 base 2^44 +# unsigned __int64 r[3]; # key value base 2^44 +# struct { unsigned __int64 r^1, r^3, r^2, r^4; } R[4]; +# # r^n positions reflect +# # placement in register, not +# # memory, R[3] is R[1]*20 + +$code.=<<___; +.type poly1305_init_base2_44,\@function,3 +.align 32 +poly1305_init_base2_44: + xor %rax,%rax + mov %rax,0($ctx) # initialize hash value + mov %rax,8($ctx) + mov %rax,16($ctx) + +.Linit_base2_44: + lea poly1305_blocks_vpmadd52(%rip),%r10 + lea poly1305_emit_base2_44(%rip),%r11 + + mov \$0x0ffffffc0fffffff,%rax + mov \$0x0ffffffc0ffffffc,%rcx + and 0($inp),%rax + mov \$0x00000fffffffffff,%r8 + and 8($inp),%rcx + mov \$0x00000fffffffffff,%r9 + and %rax,%r8 + shrd \$44,%rcx,%rax + mov %r8,40($ctx) # r0 + and %r9,%rax + shr \$24,%rcx + mov %rax,48($ctx) # r1 + lea (%rax,%rax,4),%rax # *5 + mov %rcx,56($ctx) # r2 + shl \$2,%rax # magic <<2 + lea (%rcx,%rcx,4),%rcx # *5 + shl \$2,%rcx # magic <<2 + mov %rax,24($ctx) # s1 + mov %rcx,32($ctx) # s2 + movq \$-1,64($ctx) # write impossible value +___ +$code.=<<___ if ($flavour !~ /elf32/); + mov %r10,0(%rdx) + mov %r11,8(%rdx) +___ +$code.=<<___ if ($flavour =~ /elf32/); + mov %r10d,0(%rdx) + mov %r11d,4(%rdx) +___ +$code.=<<___; + mov \$1,%eax + ret +.size poly1305_init_base2_44,.-poly1305_init_base2_44 +___ +{ +my ($H0,$H1,$H2,$r2r1r0,$r1r0s2,$r0s2s1,$Dlo,$Dhi) = map("%ymm$_",(0..5,16,17)); +my ($T0,$inp_permd,$inp_shift,$PAD) = map("%ymm$_",(18..21)); +my ($reduc_mask,$reduc_rght,$reduc_left) = map("%ymm$_",(22..25)); + +$code.=<<___; +.type poly1305_blocks_vpmadd52,\@function,4 +.align 32 +poly1305_blocks_vpmadd52: + shr \$4,$len + jz .Lno_data_vpmadd52 # too short + + shl \$40,$padbit + mov 64($ctx),%r8 # peek on power of the key + + # if powers of the key are not calculated yet, process up to 3 + # blocks with this single-block subroutine, otherwise ensure that + # length is divisible by 2 blocks and pass the rest down to next + # subroutine... + + mov \$3,%rax + mov \$1,%r10 + cmp \$4,$len # is input long + cmovae %r10,%rax + test %r8,%r8 # is power value impossible? + cmovns %r10,%rax + + and $len,%rax # is input of favourable length? + jz .Lblocks_vpmadd52_4x + + sub %rax,$len + mov \$7,%r10d + mov \$1,%r11d + kmovw %r10d,%k7 + lea .L2_44_inp_permd(%rip),%r10 + kmovw %r11d,%k1 + + vmovq $padbit,%x#$PAD + vmovdqa64 0(%r10),$inp_permd # .L2_44_inp_permd + vmovdqa64 32(%r10),$inp_shift # .L2_44_inp_shift + vpermq \$0xcf,$PAD,$PAD + vmovdqa64 64(%r10),$reduc_mask # .L2_44_mask + + vmovdqu64 0($ctx),${Dlo}{%k7}{z} # load hash value + vmovdqu64 40($ctx),${r2r1r0}{%k7}{z} # load keys + vmovdqu64 32($ctx),${r1r0s2}{%k7}{z} + vmovdqu64 24($ctx),${r0s2s1}{%k7}{z} + + vmovdqa64 96(%r10),$reduc_rght # .L2_44_shift_rgt + vmovdqa64 128(%r10),$reduc_left # .L2_44_shift_lft + + jmp .Loop_vpmadd52 + +.align 32 +.Loop_vpmadd52: + vmovdqu32 0($inp),%x#$T0 # load input as ----3210 + lea 16($inp),$inp + + vpermd $T0,$inp_permd,$T0 # ----3210 -> --322110 + vpsrlvq $inp_shift,$T0,$T0 + vpandq $reduc_mask,$T0,$T0 + vporq $PAD,$T0,$T0 + + vpaddq $T0,$Dlo,$Dlo # accumulate input + + vpermq \$0,$Dlo,${H0}{%k7}{z} # smash hash value + vpermq \$0b01010101,$Dlo,${H1}{%k7}{z} + vpermq \$0b10101010,$Dlo,${H2}{%k7}{z} + + vpxord $Dlo,$Dlo,$Dlo + vpxord $Dhi,$Dhi,$Dhi + + vpmadd52luq $r2r1r0,$H0,$Dlo + vpmadd52huq $r2r1r0,$H0,$Dhi + + vpmadd52luq $r1r0s2,$H1,$Dlo + vpmadd52huq $r1r0s2,$H1,$Dhi + + vpmadd52luq $r0s2s1,$H2,$Dlo + vpmadd52huq $r0s2s1,$H2,$Dhi + + vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost qword + vpsllvq $reduc_left,$Dhi,$Dhi # 0 in topmost qword + vpandq $reduc_mask,$Dlo,$Dlo + + vpaddq $T0,$Dhi,$Dhi + + vpermq \$0b10010011,$Dhi,$Dhi # 0 in lowest qword + + vpaddq $Dhi,$Dlo,$Dlo # note topmost qword :-) + + vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost word + vpandq $reduc_mask,$Dlo,$Dlo + + vpermq \$0b10010011,$T0,$T0 + + vpaddq $T0,$Dlo,$Dlo + + vpermq \$0b10010011,$Dlo,${T0}{%k1}{z} + + vpaddq $T0,$Dlo,$Dlo + vpsllq \$2,$T0,$T0 + + vpaddq $T0,$Dlo,$Dlo + + dec %rax # len-=16 + jnz .Loop_vpmadd52 + + vmovdqu64 $Dlo,0($ctx){%k7} # store hash value + + test $len,$len + jnz .Lblocks_vpmadd52_4x + +.Lno_data_vpmadd52: + ret +.size poly1305_blocks_vpmadd52,.-poly1305_blocks_vpmadd52 +___ +} +{ +######################################################################## +# As implied by its name 4x subroutine processes 4 blocks in parallel +# (but handles even 4*n+2 blocks lengths). It takes up to 4th key power +# and is handled in 256-bit %ymm registers. + +my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17)); +my ($D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi) = map("%ymm$_",(18..23)); +my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31)); + +$code.=<<___; +.type poly1305_blocks_vpmadd52_4x,\@function,4 +.align 32 +poly1305_blocks_vpmadd52_4x: + shr \$4,$len + jz .Lno_data_vpmadd52_4x # too short + + shl \$40,$padbit + mov 64($ctx),%r8 # peek on power of the key + +.Lblocks_vpmadd52_4x: + vpbroadcastq $padbit,$PAD + + vmovdqa64 .Lx_mask44(%rip),$mask44 + mov \$5,%eax + vmovdqa64 .Lx_mask42(%rip),$mask42 + kmovw %eax,%k1 # used in 2x path + + test %r8,%r8 # is power value impossible? + js .Linit_vpmadd52 # if it is, then init R[4] + + vmovq 0($ctx),%x#$H0 # load current hash value + vmovq 8($ctx),%x#$H1 + vmovq 16($ctx),%x#$H2 + + test \$3,$len # is length 4*n+2? + jnz .Lblocks_vpmadd52_2x_do + +.Lblocks_vpmadd52_4x_do: + vpbroadcastq 64($ctx),$R0 # load 4th power of the key + vpbroadcastq 96($ctx),$R1 + vpbroadcastq 128($ctx),$R2 + vpbroadcastq 160($ctx),$S1 + +.Lblocks_vpmadd52_4x_key_loaded: + vpsllq \$2,$R2,$S2 # S2 = R2*5*4 + vpaddq $R2,$S2,$S2 + vpsllq \$2,$S2,$S2 + + test \$7,$len # is len 8*n? + jz .Lblocks_vpmadd52_8x + + vmovdqu64 16*0($inp),$T2 # load data + vmovdqu64 16*2($inp),$T3 + lea 16*4($inp),$inp + + vpunpcklqdq $T3,$T2,$T1 # transpose data + vpunpckhqdq $T3,$T2,$T3 + + # at this point 64-bit lanes are ordered as 3-1-2-0 + + vpsrlq \$24,$T3,$T2 # splat the data + vporq $PAD,$T2,$T2 + vpaddq $T2,$H2,$H2 # accumulate input + vpandq $mask44,$T1,$T0 + vpsrlq \$44,$T1,$T1 + vpsllq \$20,$T3,$T3 + vporq $T3,$T1,$T1 + vpandq $mask44,$T1,$T1 + + sub \$4,$len + jz .Ltail_vpmadd52_4x + jmp .Loop_vpmadd52_4x + ud2 + +.align 32 +.Linit_vpmadd52: + vmovq 24($ctx),%x#$S1 # load key + vmovq 56($ctx),%x#$H2 + vmovq 32($ctx),%x#$S2 + vmovq 40($ctx),%x#$R0 + vmovq 48($ctx),%x#$R1 + + vmovdqa $R0,$H0 + vmovdqa $R1,$H1 + vmovdqa $H2,$R2 + + mov \$2,%eax + +.Lmul_init_vpmadd52: + vpxorq $D0lo,$D0lo,$D0lo + vpmadd52luq $H2,$S1,$D0lo + vpxorq $D0hi,$D0hi,$D0hi + vpmadd52huq $H2,$S1,$D0hi + vpxorq $D1lo,$D1lo,$D1lo + vpmadd52luq $H2,$S2,$D1lo + vpxorq $D1hi,$D1hi,$D1hi + vpmadd52huq $H2,$S2,$D1hi + vpxorq $D2lo,$D2lo,$D2lo + vpmadd52luq $H2,$R0,$D2lo + vpxorq $D2hi,$D2hi,$D2hi + vpmadd52huq $H2,$R0,$D2hi + + vpmadd52luq $H0,$R0,$D0lo + vpmadd52huq $H0,$R0,$D0hi + vpmadd52luq $H0,$R1,$D1lo + vpmadd52huq $H0,$R1,$D1hi + vpmadd52luq $H0,$R2,$D2lo + vpmadd52huq $H0,$R2,$D2hi + + vpmadd52luq $H1,$S2,$D0lo + vpmadd52huq $H1,$S2,$D0hi + vpmadd52luq $H1,$R0,$D1lo + vpmadd52huq $H1,$R0,$D1hi + vpmadd52luq $H1,$R1,$D2lo + vpmadd52huq $H1,$R1,$D2hi + + ################################################################ + # partial reduction + vpsrlq \$44,$D0lo,$tmp + vpsllq \$8,$D0hi,$D0hi + vpandq $mask44,$D0lo,$H0 + vpaddq $tmp,$D0hi,$D0hi + + vpaddq $D0hi,$D1lo,$D1lo + + vpsrlq \$44,$D1lo,$tmp + vpsllq \$8,$D1hi,$D1hi + vpandq $mask44,$D1lo,$H1 + vpaddq $tmp,$D1hi,$D1hi + + vpaddq $D1hi,$D2lo,$D2lo + + vpsrlq \$42,$D2lo,$tmp + vpsllq \$10,$D2hi,$D2hi + vpandq $mask42,$D2lo,$H2 + vpaddq $tmp,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + vpsllq \$2,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + + vpsrlq \$44,$H0,$tmp # additional step + vpandq $mask44,$H0,$H0 + + vpaddq $tmp,$H1,$H1 + + dec %eax + jz .Ldone_init_vpmadd52 + + vpunpcklqdq $R1,$H1,$R1 # 1,2 + vpbroadcastq %x#$H1,%x#$H1 # 2,2 + vpunpcklqdq $R2,$H2,$R2 + vpbroadcastq %x#$H2,%x#$H2 + vpunpcklqdq $R0,$H0,$R0 + vpbroadcastq %x#$H0,%x#$H0 + + vpsllq \$2,$R1,$S1 # S1 = R1*5*4 + vpsllq \$2,$R2,$S2 # S2 = R2*5*4 + vpaddq $R1,$S1,$S1 + vpaddq $R2,$S2,$S2 + vpsllq \$2,$S1,$S1 + vpsllq \$2,$S2,$S2 + + jmp .Lmul_init_vpmadd52 + ud2 + +.align 32 +.Ldone_init_vpmadd52: + vinserti128 \$1,%x#$R1,$H1,$R1 # 1,2,3,4 + vinserti128 \$1,%x#$R2,$H2,$R2 + vinserti128 \$1,%x#$R0,$H0,$R0 + + vpermq \$0b11011000,$R1,$R1 # 1,3,2,4 + vpermq \$0b11011000,$R2,$R2 + vpermq \$0b11011000,$R0,$R0 + + vpsllq \$2,$R1,$S1 # S1 = R1*5*4 + vpaddq $R1,$S1,$S1 + vpsllq \$2,$S1,$S1 + + vmovq 0($ctx),%x#$H0 # load current hash value + vmovq 8($ctx),%x#$H1 + vmovq 16($ctx),%x#$H2 + + test \$3,$len # is length 4*n+2? + jnz .Ldone_init_vpmadd52_2x + + vmovdqu64 $R0,64($ctx) # save key powers + vpbroadcastq %x#$R0,$R0 # broadcast 4th power + vmovdqu64 $R1,96($ctx) + vpbroadcastq %x#$R1,$R1 + vmovdqu64 $R2,128($ctx) + vpbroadcastq %x#$R2,$R2 + vmovdqu64 $S1,160($ctx) + vpbroadcastq %x#$S1,$S1 + + jmp .Lblocks_vpmadd52_4x_key_loaded + ud2 + +.align 32 +.Ldone_init_vpmadd52_2x: + vmovdqu64 $R0,64($ctx) # save key powers + vpsrldq \$8,$R0,$R0 # 0-1-0-2 + vmovdqu64 $R1,96($ctx) + vpsrldq \$8,$R1,$R1 + vmovdqu64 $R2,128($ctx) + vpsrldq \$8,$R2,$R2 + vmovdqu64 $S1,160($ctx) + vpsrldq \$8,$S1,$S1 + jmp .Lblocks_vpmadd52_2x_key_loaded + ud2 + +.align 32 +.Lblocks_vpmadd52_2x_do: + vmovdqu64 128+8($ctx),${R2}{%k1}{z}# load 2nd and 1st key powers + vmovdqu64 160+8($ctx),${S1}{%k1}{z} + vmovdqu64 64+8($ctx),${R0}{%k1}{z} + vmovdqu64 96+8($ctx),${R1}{%k1}{z} + +.Lblocks_vpmadd52_2x_key_loaded: + vmovdqu64 16*0($inp),$T2 # load data + vpxorq $T3,$T3,$T3 + lea 16*2($inp),$inp + + vpunpcklqdq $T3,$T2,$T1 # transpose data + vpunpckhqdq $T3,$T2,$T3 + + # at this point 64-bit lanes are ordered as x-1-x-0 + + vpsrlq \$24,$T3,$T2 # splat the data + vporq $PAD,$T2,$T2 + vpaddq $T2,$H2,$H2 # accumulate input + vpandq $mask44,$T1,$T0 + vpsrlq \$44,$T1,$T1 + vpsllq \$20,$T3,$T3 + vporq $T3,$T1,$T1 + vpandq $mask44,$T1,$T1 + + jmp .Ltail_vpmadd52_2x + ud2 + +.align 32 +.Loop_vpmadd52_4x: + #vpaddq $T2,$H2,$H2 # accumulate input + vpaddq $T0,$H0,$H0 + vpaddq $T1,$H1,$H1 + + vpxorq $D0lo,$D0lo,$D0lo + vpmadd52luq $H2,$S1,$D0lo + vpxorq $D0hi,$D0hi,$D0hi + vpmadd52huq $H2,$S1,$D0hi + vpxorq $D1lo,$D1lo,$D1lo + vpmadd52luq $H2,$S2,$D1lo + vpxorq $D1hi,$D1hi,$D1hi + vpmadd52huq $H2,$S2,$D1hi + vpxorq $D2lo,$D2lo,$D2lo + vpmadd52luq $H2,$R0,$D2lo + vpxorq $D2hi,$D2hi,$D2hi + vpmadd52huq $H2,$R0,$D2hi + + vmovdqu64 16*0($inp),$T2 # load data + vmovdqu64 16*2($inp),$T3 + lea 16*4($inp),$inp + vpmadd52luq $H0,$R0,$D0lo + vpmadd52huq $H0,$R0,$D0hi + vpmadd52luq $H0,$R1,$D1lo + vpmadd52huq $H0,$R1,$D1hi + vpmadd52luq $H0,$R2,$D2lo + vpmadd52huq $H0,$R2,$D2hi + + vpunpcklqdq $T3,$T2,$T1 # transpose data + vpunpckhqdq $T3,$T2,$T3 + vpmadd52luq $H1,$S2,$D0lo + vpmadd52huq $H1,$S2,$D0hi + vpmadd52luq $H1,$R0,$D1lo + vpmadd52huq $H1,$R0,$D1hi + vpmadd52luq $H1,$R1,$D2lo + vpmadd52huq $H1,$R1,$D2hi + + ################################################################ + # partial reduction (interleaved with data splat) + vpsrlq \$44,$D0lo,$tmp + vpsllq \$8,$D0hi,$D0hi + vpandq $mask44,$D0lo,$H0 + vpaddq $tmp,$D0hi,$D0hi + + vpsrlq \$24,$T3,$T2 + vporq $PAD,$T2,$T2 + vpaddq $D0hi,$D1lo,$D1lo + + vpsrlq \$44,$D1lo,$tmp + vpsllq \$8,$D1hi,$D1hi + vpandq $mask44,$D1lo,$H1 + vpaddq $tmp,$D1hi,$D1hi + + vpandq $mask44,$T1,$T0 + vpsrlq \$44,$T1,$T1 + vpsllq \$20,$T3,$T3 + vpaddq $D1hi,$D2lo,$D2lo + + vpsrlq \$42,$D2lo,$tmp + vpsllq \$10,$D2hi,$D2hi + vpandq $mask42,$D2lo,$H2 + vpaddq $tmp,$D2hi,$D2hi + + vpaddq $T2,$H2,$H2 # accumulate input + vpaddq $D2hi,$H0,$H0 + vpsllq \$2,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + vporq $T3,$T1,$T1 + vpandq $mask44,$T1,$T1 + + vpsrlq \$44,$H0,$tmp # additional step + vpandq $mask44,$H0,$H0 + + vpaddq $tmp,$H1,$H1 + + sub \$4,$len # len-=64 + jnz .Loop_vpmadd52_4x + +.Ltail_vpmadd52_4x: + vmovdqu64 128($ctx),$R2 # load all key powers + vmovdqu64 160($ctx),$S1 + vmovdqu64 64($ctx),$R0 + vmovdqu64 96($ctx),$R1 + +.Ltail_vpmadd52_2x: + vpsllq \$2,$R2,$S2 # S2 = R2*5*4 + vpaddq $R2,$S2,$S2 + vpsllq \$2,$S2,$S2 + + #vpaddq $T2,$H2,$H2 # accumulate input + vpaddq $T0,$H0,$H0 + vpaddq $T1,$H1,$H1 + + vpxorq $D0lo,$D0lo,$D0lo + vpmadd52luq $H2,$S1,$D0lo + vpxorq $D0hi,$D0hi,$D0hi + vpmadd52huq $H2,$S1,$D0hi + vpxorq $D1lo,$D1lo,$D1lo + vpmadd52luq $H2,$S2,$D1lo + vpxorq $D1hi,$D1hi,$D1hi + vpmadd52huq $H2,$S2,$D1hi + vpxorq $D2lo,$D2lo,$D2lo + vpmadd52luq $H2,$R0,$D2lo + vpxorq $D2hi,$D2hi,$D2hi + vpmadd52huq $H2,$R0,$D2hi + + vpmadd52luq $H0,$R0,$D0lo + vpmadd52huq $H0,$R0,$D0hi + vpmadd52luq $H0,$R1,$D1lo + vpmadd52huq $H0,$R1,$D1hi + vpmadd52luq $H0,$R2,$D2lo + vpmadd52huq $H0,$R2,$D2hi + + vpmadd52luq $H1,$S2,$D0lo + vpmadd52huq $H1,$S2,$D0hi + vpmadd52luq $H1,$R0,$D1lo + vpmadd52huq $H1,$R0,$D1hi + vpmadd52luq $H1,$R1,$D2lo + vpmadd52huq $H1,$R1,$D2hi + + ################################################################ + # horizontal addition + + mov \$1,%eax + kmovw %eax,%k1 + vpsrldq \$8,$D0lo,$T0 + vpsrldq \$8,$D0hi,$H0 + vpsrldq \$8,$D1lo,$T1 + vpsrldq \$8,$D1hi,$H1 + vpaddq $T0,$D0lo,$D0lo + vpaddq $H0,$D0hi,$D0hi + vpsrldq \$8,$D2lo,$T2 + vpsrldq \$8,$D2hi,$H2 + vpaddq $T1,$D1lo,$D1lo + vpaddq $H1,$D1hi,$D1hi + vpermq \$0x2,$D0lo,$T0 + vpermq \$0x2,$D0hi,$H0 + vpaddq $T2,$D2lo,$D2lo + vpaddq $H2,$D2hi,$D2hi + + vpermq \$0x2,$D1lo,$T1 + vpermq \$0x2,$D1hi,$H1 + vpaddq $T0,$D0lo,${D0lo}{%k1}{z} + vpaddq $H0,$D0hi,${D0hi}{%k1}{z} + vpermq \$0x2,$D2lo,$T2 + vpermq \$0x2,$D2hi,$H2 + vpaddq $T1,$D1lo,${D1lo}{%k1}{z} + vpaddq $H1,$D1hi,${D1hi}{%k1}{z} + vpaddq $T2,$D2lo,${D2lo}{%k1}{z} + vpaddq $H2,$D2hi,${D2hi}{%k1}{z} + + ################################################################ + # partial reduction + vpsrlq \$44,$D0lo,$tmp + vpsllq \$8,$D0hi,$D0hi + vpandq $mask44,$D0lo,$H0 + vpaddq $tmp,$D0hi,$D0hi + + vpaddq $D0hi,$D1lo,$D1lo + + vpsrlq \$44,$D1lo,$tmp + vpsllq \$8,$D1hi,$D1hi + vpandq $mask44,$D1lo,$H1 + vpaddq $tmp,$D1hi,$D1hi + + vpaddq $D1hi,$D2lo,$D2lo + + vpsrlq \$42,$D2lo,$tmp + vpsllq \$10,$D2hi,$D2hi + vpandq $mask42,$D2lo,$H2 + vpaddq $tmp,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + vpsllq \$2,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + + vpsrlq \$44,$H0,$tmp # additional step + vpandq $mask44,$H0,$H0 + + vpaddq $tmp,$H1,$H1 + # at this point $len is + # either 4*n+2 or 0... + sub \$2,$len # len-=32 + ja .Lblocks_vpmadd52_4x_do + + vmovq %x#$H0,0($ctx) + vmovq %x#$H1,8($ctx) + vmovq %x#$H2,16($ctx) + vzeroall + +.Lno_data_vpmadd52_4x: + ret +.size poly1305_blocks_vpmadd52_4x,.-poly1305_blocks_vpmadd52_4x +___ +} +{ +######################################################################## +# As implied by its name 8x subroutine processes 8 blocks in parallel... +# This is intermediate version, as it's used only in cases when input +# length is either 8*n, 8*n+1 or 8*n+2... + +my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17)); +my ($D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi) = map("%ymm$_",(18..23)); +my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31)); +my ($RR0,$RR1,$RR2,$SS1,$SS2) = map("%ymm$_",(6..10)); + +$code.=<<___; +.type poly1305_blocks_vpmadd52_8x,\@function,4 +.align 32 +poly1305_blocks_vpmadd52_8x: + shr \$4,$len + jz .Lno_data_vpmadd52_8x # too short + + shl \$40,$padbit + mov 64($ctx),%r8 # peek on power of the key + + vmovdqa64 .Lx_mask44(%rip),$mask44 + vmovdqa64 .Lx_mask42(%rip),$mask42 + + test %r8,%r8 # is power value impossible? + js .Linit_vpmadd52 # if it is, then init R[4] + + vmovq 0($ctx),%x#$H0 # load current hash value + vmovq 8($ctx),%x#$H1 + vmovq 16($ctx),%x#$H2 + +.Lblocks_vpmadd52_8x: + ################################################################ + # fist we calculate more key powers + + vmovdqu64 128($ctx),$R2 # load 1-3-2-4 powers + vmovdqu64 160($ctx),$S1 + vmovdqu64 64($ctx),$R0 + vmovdqu64 96($ctx),$R1 + + vpsllq \$2,$R2,$S2 # S2 = R2*5*4 + vpaddq $R2,$S2,$S2 + vpsllq \$2,$S2,$S2 + + vpbroadcastq %x#$R2,$RR2 # broadcast 4th power + vpbroadcastq %x#$R0,$RR0 + vpbroadcastq %x#$R1,$RR1 + + vpxorq $D0lo,$D0lo,$D0lo + vpmadd52luq $RR2,$S1,$D0lo + vpxorq $D0hi,$D0hi,$D0hi + vpmadd52huq $RR2,$S1,$D0hi + vpxorq $D1lo,$D1lo,$D1lo + vpmadd52luq $RR2,$S2,$D1lo + vpxorq $D1hi,$D1hi,$D1hi + vpmadd52huq $RR2,$S2,$D1hi + vpxorq $D2lo,$D2lo,$D2lo + vpmadd52luq $RR2,$R0,$D2lo + vpxorq $D2hi,$D2hi,$D2hi + vpmadd52huq $RR2,$R0,$D2hi + + vpmadd52luq $RR0,$R0,$D0lo + vpmadd52huq $RR0,$R0,$D0hi + vpmadd52luq $RR0,$R1,$D1lo + vpmadd52huq $RR0,$R1,$D1hi + vpmadd52luq $RR0,$R2,$D2lo + vpmadd52huq $RR0,$R2,$D2hi + + vpmadd52luq $RR1,$S2,$D0lo + vpmadd52huq $RR1,$S2,$D0hi + vpmadd52luq $RR1,$R0,$D1lo + vpmadd52huq $RR1,$R0,$D1hi + vpmadd52luq $RR1,$R1,$D2lo + vpmadd52huq $RR1,$R1,$D2hi + + ################################################################ + # partial reduction + vpsrlq \$44,$D0lo,$tmp + vpsllq \$8,$D0hi,$D0hi + vpandq $mask44,$D0lo,$RR0 + vpaddq $tmp,$D0hi,$D0hi + + vpaddq $D0hi,$D1lo,$D1lo + + vpsrlq \$44,$D1lo,$tmp + vpsllq \$8,$D1hi,$D1hi + vpandq $mask44,$D1lo,$RR1 + vpaddq $tmp,$D1hi,$D1hi + + vpaddq $D1hi,$D2lo,$D2lo + + vpsrlq \$42,$D2lo,$tmp + vpsllq \$10,$D2hi,$D2hi + vpandq $mask42,$D2lo,$RR2 + vpaddq $tmp,$D2hi,$D2hi + + vpaddq $D2hi,$RR0,$RR0 + vpsllq \$2,$D2hi,$D2hi + + vpaddq $D2hi,$RR0,$RR0 + + vpsrlq \$44,$RR0,$tmp # additional step + vpandq $mask44,$RR0,$RR0 + + vpaddq $tmp,$RR1,$RR1 + + ################################################################ + # At this point Rx holds 1324 powers, RRx - 5768, and the goal + # is 15263748, which reflects how data is loaded... + + vpunpcklqdq $R2,$RR2,$T2 # 3748 + vpunpckhqdq $R2,$RR2,$R2 # 1526 + vpunpcklqdq $R0,$RR0,$T0 + vpunpckhqdq $R0,$RR0,$R0 + vpunpcklqdq $R1,$RR1,$T1 + vpunpckhqdq $R1,$RR1,$R1 +___ +######## switch to %zmm +map(s/%y/%z/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2); +map(s/%y/%z/, $D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi); +map(s/%y/%z/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD); +map(s/%y/%z/, $RR0,$RR1,$RR2,$SS1,$SS2); + +$code.=<<___; + vshufi64x2 \$0x44,$R2,$T2,$RR2 # 15263748 + vshufi64x2 \$0x44,$R0,$T0,$RR0 + vshufi64x2 \$0x44,$R1,$T1,$RR1 + + vmovdqu64 16*0($inp),$T2 # load data + vmovdqu64 16*4($inp),$T3 + lea 16*8($inp),$inp + + vpsllq \$2,$RR2,$SS2 # S2 = R2*5*4 + vpsllq \$2,$RR1,$SS1 # S1 = R1*5*4 + vpaddq $RR2,$SS2,$SS2 + vpaddq $RR1,$SS1,$SS1 + vpsllq \$2,$SS2,$SS2 + vpsllq \$2,$SS1,$SS1 + + vpbroadcastq $padbit,$PAD + vpbroadcastq %x#$mask44,$mask44 + vpbroadcastq %x#$mask42,$mask42 + + vpbroadcastq %x#$SS1,$S1 # broadcast 8th power + vpbroadcastq %x#$SS2,$S2 + vpbroadcastq %x#$RR0,$R0 + vpbroadcastq %x#$RR1,$R1 + vpbroadcastq %x#$RR2,$R2 + + vpunpcklqdq $T3,$T2,$T1 # transpose data + vpunpckhqdq $T3,$T2,$T3 + + # at this point 64-bit lanes are ordered as 73625140 + + vpsrlq \$24,$T3,$T2 # splat the data + vporq $PAD,$T2,$T2 + vpaddq $T2,$H2,$H2 # accumulate input + vpandq $mask44,$T1,$T0 + vpsrlq \$44,$T1,$T1 + vpsllq \$20,$T3,$T3 + vporq $T3,$T1,$T1 + vpandq $mask44,$T1,$T1 + + sub \$8,$len + jz .Ltail_vpmadd52_8x + jmp .Loop_vpmadd52_8x + +.align 32 +.Loop_vpmadd52_8x: + #vpaddq $T2,$H2,$H2 # accumulate input + vpaddq $T0,$H0,$H0 + vpaddq $T1,$H1,$H1 + + vpxorq $D0lo,$D0lo,$D0lo + vpmadd52luq $H2,$S1,$D0lo + vpxorq $D0hi,$D0hi,$D0hi + vpmadd52huq $H2,$S1,$D0hi + vpxorq $D1lo,$D1lo,$D1lo + vpmadd52luq $H2,$S2,$D1lo + vpxorq $D1hi,$D1hi,$D1hi + vpmadd52huq $H2,$S2,$D1hi + vpxorq $D2lo,$D2lo,$D2lo + vpmadd52luq $H2,$R0,$D2lo + vpxorq $D2hi,$D2hi,$D2hi + vpmadd52huq $H2,$R0,$D2hi + + vmovdqu64 16*0($inp),$T2 # load data + vmovdqu64 16*4($inp),$T3 + lea 16*8($inp),$inp + vpmadd52luq $H0,$R0,$D0lo + vpmadd52huq $H0,$R0,$D0hi + vpmadd52luq $H0,$R1,$D1lo + vpmadd52huq $H0,$R1,$D1hi + vpmadd52luq $H0,$R2,$D2lo + vpmadd52huq $H0,$R2,$D2hi + + vpunpcklqdq $T3,$T2,$T1 # transpose data + vpunpckhqdq $T3,$T2,$T3 + vpmadd52luq $H1,$S2,$D0lo + vpmadd52huq $H1,$S2,$D0hi + vpmadd52luq $H1,$R0,$D1lo + vpmadd52huq $H1,$R0,$D1hi + vpmadd52luq $H1,$R1,$D2lo + vpmadd52huq $H1,$R1,$D2hi + + ################################################################ + # partial reduction (interleaved with data splat) + vpsrlq \$44,$D0lo,$tmp + vpsllq \$8,$D0hi,$D0hi + vpandq $mask44,$D0lo,$H0 + vpaddq $tmp,$D0hi,$D0hi + + vpsrlq \$24,$T3,$T2 + vporq $PAD,$T2,$T2 + vpaddq $D0hi,$D1lo,$D1lo + + vpsrlq \$44,$D1lo,$tmp + vpsllq \$8,$D1hi,$D1hi + vpandq $mask44,$D1lo,$H1 + vpaddq $tmp,$D1hi,$D1hi + + vpandq $mask44,$T1,$T0 + vpsrlq \$44,$T1,$T1 + vpsllq \$20,$T3,$T3 + vpaddq $D1hi,$D2lo,$D2lo + + vpsrlq \$42,$D2lo,$tmp + vpsllq \$10,$D2hi,$D2hi + vpandq $mask42,$D2lo,$H2 + vpaddq $tmp,$D2hi,$D2hi + + vpaddq $T2,$H2,$H2 # accumulate input + vpaddq $D2hi,$H0,$H0 + vpsllq \$2,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + vporq $T3,$T1,$T1 + vpandq $mask44,$T1,$T1 + + vpsrlq \$44,$H0,$tmp # additional step + vpandq $mask44,$H0,$H0 + + vpaddq $tmp,$H1,$H1 + + sub \$8,$len # len-=128 + jnz .Loop_vpmadd52_8x + +.Ltail_vpmadd52_8x: + #vpaddq $T2,$H2,$H2 # accumulate input + vpaddq $T0,$H0,$H0 + vpaddq $T1,$H1,$H1 + + vpxorq $D0lo,$D0lo,$D0lo + vpmadd52luq $H2,$SS1,$D0lo + vpxorq $D0hi,$D0hi,$D0hi + vpmadd52huq $H2,$SS1,$D0hi + vpxorq $D1lo,$D1lo,$D1lo + vpmadd52luq $H2,$SS2,$D1lo + vpxorq $D1hi,$D1hi,$D1hi + vpmadd52huq $H2,$SS2,$D1hi + vpxorq $D2lo,$D2lo,$D2lo + vpmadd52luq $H2,$RR0,$D2lo + vpxorq $D2hi,$D2hi,$D2hi + vpmadd52huq $H2,$RR0,$D2hi + + vpmadd52luq $H0,$RR0,$D0lo + vpmadd52huq $H0,$RR0,$D0hi + vpmadd52luq $H0,$RR1,$D1lo + vpmadd52huq $H0,$RR1,$D1hi + vpmadd52luq $H0,$RR2,$D2lo + vpmadd52huq $H0,$RR2,$D2hi + + vpmadd52luq $H1,$SS2,$D0lo + vpmadd52huq $H1,$SS2,$D0hi + vpmadd52luq $H1,$RR0,$D1lo + vpmadd52huq $H1,$RR0,$D1hi + vpmadd52luq $H1,$RR1,$D2lo + vpmadd52huq $H1,$RR1,$D2hi + + ################################################################ + # horizontal addition + + mov \$1,%eax + kmovw %eax,%k1 + vpsrldq \$8,$D0lo,$T0 + vpsrldq \$8,$D0hi,$H0 + vpsrldq \$8,$D1lo,$T1 + vpsrldq \$8,$D1hi,$H1 + vpaddq $T0,$D0lo,$D0lo + vpaddq $H0,$D0hi,$D0hi + vpsrldq \$8,$D2lo,$T2 + vpsrldq \$8,$D2hi,$H2 + vpaddq $T1,$D1lo,$D1lo + vpaddq $H1,$D1hi,$D1hi + vpermq \$0x2,$D0lo,$T0 + vpermq \$0x2,$D0hi,$H0 + vpaddq $T2,$D2lo,$D2lo + vpaddq $H2,$D2hi,$D2hi + + vpermq \$0x2,$D1lo,$T1 + vpermq \$0x2,$D1hi,$H1 + vpaddq $T0,$D0lo,$D0lo + vpaddq $H0,$D0hi,$D0hi + vpermq \$0x2,$D2lo,$T2 + vpermq \$0x2,$D2hi,$H2 + vpaddq $T1,$D1lo,$D1lo + vpaddq $H1,$D1hi,$D1hi + vextracti64x4 \$1,$D0lo,%y#$T0 + vextracti64x4 \$1,$D0hi,%y#$H0 + vpaddq $T2,$D2lo,$D2lo + vpaddq $H2,$D2hi,$D2hi + + vextracti64x4 \$1,$D1lo,%y#$T1 + vextracti64x4 \$1,$D1hi,%y#$H1 + vextracti64x4 \$1,$D2lo,%y#$T2 + vextracti64x4 \$1,$D2hi,%y#$H2 +___ +######## switch back to %ymm +map(s/%z/%y/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2); +map(s/%z/%y/, $D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi); +map(s/%z/%y/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD); + +$code.=<<___; + vpaddq $T0,$D0lo,${D0lo}{%k1}{z} + vpaddq $H0,$D0hi,${D0hi}{%k1}{z} + vpaddq $T1,$D1lo,${D1lo}{%k1}{z} + vpaddq $H1,$D1hi,${D1hi}{%k1}{z} + vpaddq $T2,$D2lo,${D2lo}{%k1}{z} + vpaddq $H2,$D2hi,${D2hi}{%k1}{z} + + ################################################################ + # partial reduction + vpsrlq \$44,$D0lo,$tmp + vpsllq \$8,$D0hi,$D0hi + vpandq $mask44,$D0lo,$H0 + vpaddq $tmp,$D0hi,$D0hi + + vpaddq $D0hi,$D1lo,$D1lo + + vpsrlq \$44,$D1lo,$tmp + vpsllq \$8,$D1hi,$D1hi + vpandq $mask44,$D1lo,$H1 + vpaddq $tmp,$D1hi,$D1hi + + vpaddq $D1hi,$D2lo,$D2lo + + vpsrlq \$42,$D2lo,$tmp + vpsllq \$10,$D2hi,$D2hi + vpandq $mask42,$D2lo,$H2 + vpaddq $tmp,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + vpsllq \$2,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + + vpsrlq \$44,$H0,$tmp # additional step + vpandq $mask44,$H0,$H0 + + vpaddq $tmp,$H1,$H1 + + ################################################################ + + vmovq %x#$H0,0($ctx) + vmovq %x#$H1,8($ctx) + vmovq %x#$H2,16($ctx) + vzeroall + +.Lno_data_vpmadd52_8x: + ret +.size poly1305_blocks_vpmadd52_8x,.-poly1305_blocks_vpmadd52_8x +___ +} +$code.=<<___; +.type poly1305_emit_base2_44,\@function,3 +.align 32 +poly1305_emit_base2_44: + mov 0($ctx),%r8 # load hash value + mov 8($ctx),%r9 + mov 16($ctx),%r10 + + mov %r9,%rax + shr \$20,%r9 + shl \$44,%rax + mov %r10,%rcx + shr \$40,%r10 + shl \$24,%rcx + + add %rax,%r8 + adc %rcx,%r9 + adc \$0,%r10 + + mov %r8,%rax + add \$5,%r8 # compare to modulus + mov %r9,%rcx + adc \$0,%r9 + adc \$0,%r10 + shr \$2,%r10 # did 130-bit value overflow? + cmovnz %r8,%rax + cmovnz %r9,%rcx + + add 0($nonce),%rax # accumulate nonce + adc 8($nonce),%rcx + mov %rax,0($mac) # write result + mov %rcx,8($mac) + + ret +.size poly1305_emit_base2_44,.-poly1305_emit_base2_44 +___ +} } } +} + +if (!$kernel) +{ # chacha20-poly1305 helpers +my ($out,$inp,$otp,$len)=$win64 ? ("%rcx","%rdx","%r8", "%r9") : # Win64 order + ("%rdi","%rsi","%rdx","%rcx"); # Unix order +$code.=<<___; +.globl xor128_encrypt_n_pad +.type xor128_encrypt_n_pad,\@abi-omnipotent +.align 16 +xor128_encrypt_n_pad: + sub $otp,$inp + sub $otp,$out + mov $len,%r10 # put len aside + shr \$4,$len # len / 16 + jz .Ltail_enc + nop +.Loop_enc_xmm: + movdqu ($inp,$otp),%xmm0 + pxor ($otp),%xmm0 + movdqu %xmm0,($out,$otp) + movdqa %xmm0,($otp) + lea 16($otp),$otp + dec $len + jnz .Loop_enc_xmm + + and \$15,%r10 # len % 16 + jz .Ldone_enc + +.Ltail_enc: + mov \$16,$len + sub %r10,$len + xor %eax,%eax +.Loop_enc_byte: + mov ($inp,$otp),%al + xor ($otp),%al + mov %al,($out,$otp) + mov %al,($otp) + lea 1($otp),$otp + dec %r10 + jnz .Loop_enc_byte + + xor %eax,%eax +.Loop_enc_pad: + mov %al,($otp) + lea 1($otp),$otp + dec $len + jnz .Loop_enc_pad + +.Ldone_enc: + mov $otp,%rax + ret +.size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad + +.globl xor128_decrypt_n_pad +.type xor128_decrypt_n_pad,\@abi-omnipotent +.align 16 +xor128_decrypt_n_pad: + sub $otp,$inp + sub $otp,$out + mov $len,%r10 # put len aside + shr \$4,$len # len / 16 + jz .Ltail_dec + nop +.Loop_dec_xmm: + movdqu ($inp,$otp),%xmm0 + movdqa ($otp),%xmm1 + pxor %xmm0,%xmm1 + movdqu %xmm1,($out,$otp) + movdqa %xmm0,($otp) + lea 16($otp),$otp + dec $len + jnz .Loop_dec_xmm + + pxor %xmm1,%xmm1 + and \$15,%r10 # len % 16 + jz .Ldone_dec + +.Ltail_dec: + mov \$16,$len + sub %r10,$len + xor %eax,%eax + xor %r11,%r11 +.Loop_dec_byte: + mov ($inp,$otp),%r11b + mov ($otp),%al + xor %r11b,%al + mov %al,($out,$otp) + mov %r11b,($otp) + lea 1($otp),$otp + dec %r10 + jnz .Loop_dec_byte + + xor %eax,%eax +.Loop_dec_pad: + mov %al,($otp) + lea 1($otp),$otp + dec $len + jnz .Loop_dec_pad + +.Ldone_dec: + mov $otp,%rax + ret +.size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad +___ +} + +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +if ($win64) { +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind +.type se_handler,\@abi-omnipotent +.align 16 +se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # prologue label + cmp %r10,%rbx # context->Rip<.Lprologue + jb .Lcommon_seh_tail + + mov 152($context),%rax # pull context->Rsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=.Lepilogue + jae .Lcommon_seh_tail + + lea 48(%rax),%rax + + mov -8(%rax),%rbx + mov -16(%rax),%rbp + mov -24(%rax),%r12 + mov -32(%rax),%r13 + mov -40(%rax),%r14 + mov -48(%rax),%r15 + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R14 + + jmp .Lcommon_seh_tail +.size se_handler,.-se_handler + +.type avx_handler,\@abi-omnipotent +.align 16 +avx_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # prologue label + cmp %r10,%rbx # context->RipRsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lcommon_seh_tail + + mov 208($context),%rax # pull context->R11 + + lea 0x50(%rax),%rsi + lea 0xf8(%rax),%rax + lea 512($context),%rdi # &context.Xmm6 + mov \$20,%ecx + .long 0xa548f3fc # cld; rep movsq + +.Lcommon_seh_tail: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size avx_handler,.-avx_handler + +.section .pdata +.align 4 + .rva .LSEH_begin_poly1305_init_x86_64 + .rva .LSEH_end_poly1305_init_x86_64 + .rva .LSEH_info_poly1305_init_x86_64 + + .rva .LSEH_begin_poly1305_blocks_x86_64 + .rva .LSEH_end_poly1305_blocks_x86_64 + .rva .LSEH_info_poly1305_blocks_x86_64 + + .rva .LSEH_begin_poly1305_emit_x86_64 + .rva .LSEH_end_poly1305_emit_x86_64 + .rva .LSEH_info_poly1305_emit_x86_64 +___ +$code.=<<___ if ($avx); + .rva .LSEH_begin_poly1305_blocks_avx + .rva .Lbase2_64_avx + .rva .LSEH_info_poly1305_blocks_avx_1 + + .rva .Lbase2_64_avx + .rva .Leven_avx + .rva .LSEH_info_poly1305_blocks_avx_2 + + .rva .Leven_avx + .rva .LSEH_end_poly1305_blocks_avx + .rva .LSEH_info_poly1305_blocks_avx_3 + + .rva .LSEH_begin_poly1305_emit_avx + .rva .LSEH_end_poly1305_emit_avx + .rva .LSEH_info_poly1305_emit_avx +___ +$code.=<<___ if ($avx>1); + .rva .LSEH_begin_poly1305_blocks_avx2 + .rva .Lbase2_64_avx2 + .rva .LSEH_info_poly1305_blocks_avx2_1 + + .rva .Lbase2_64_avx2 + .rva .Leven_avx2 + .rva .LSEH_info_poly1305_blocks_avx2_2 + + .rva .Leven_avx2 + .rva .LSEH_end_poly1305_blocks_avx2 + .rva .LSEH_info_poly1305_blocks_avx2_3 +___ +$code.=<<___ if ($avx>2); + .rva .LSEH_begin_poly1305_blocks_avx512 + .rva .LSEH_end_poly1305_blocks_avx512 + .rva .LSEH_info_poly1305_blocks_avx512 +___ +$code.=<<___; +.section .xdata +.align 8 +.LSEH_info_poly1305_init_x86_64: + .byte 9,0,0,0 + .rva se_handler + .rva .LSEH_begin_poly1305_init_x86_64,.LSEH_begin_poly1305_init_x86_64 + +.LSEH_info_poly1305_blocks_x86_64: + .byte 9,0,0,0 + .rva se_handler + .rva .Lblocks_body,.Lblocks_epilogue + +.LSEH_info_poly1305_emit_x86_64: + .byte 9,0,0,0 + .rva se_handler + .rva .LSEH_begin_poly1305_emit_x86_64,.LSEH_begin_poly1305_emit_x86_64 +___ +$code.=<<___ if ($avx); +.LSEH_info_poly1305_blocks_avx_1: + .byte 9,0,0,0 + .rva se_handler + .rva .Lblocks_avx_body,.Lblocks_avx_epilogue # HandlerData[] + +.LSEH_info_poly1305_blocks_avx_2: + .byte 9,0,0,0 + .rva se_handler + .rva .Lbase2_64_avx_body,.Lbase2_64_avx_epilogue # HandlerData[] + +.LSEH_info_poly1305_blocks_avx_3: + .byte 9,0,0,0 + .rva avx_handler + .rva .Ldo_avx_body,.Ldo_avx_epilogue # HandlerData[] + +.LSEH_info_poly1305_emit_avx: + .byte 9,0,0,0 + .rva se_handler + .rva .LSEH_begin_poly1305_emit_avx,.LSEH_begin_poly1305_emit_avx +___ +$code.=<<___ if ($avx>1); +.LSEH_info_poly1305_blocks_avx2_1: + .byte 9,0,0,0 + .rva se_handler + .rva .Lblocks_avx2_body,.Lblocks_avx2_epilogue # HandlerData[] + +.LSEH_info_poly1305_blocks_avx2_2: + .byte 9,0,0,0 + .rva se_handler + .rva .Lbase2_64_avx2_body,.Lbase2_64_avx2_epilogue # HandlerData[] + +.LSEH_info_poly1305_blocks_avx2_3: + .byte 9,0,0,0 + .rva avx_handler + .rva .Ldo_avx2_body,.Ldo_avx2_epilogue # HandlerData[] +___ +$code.=<<___ if ($avx>2); +.LSEH_info_poly1305_blocks_avx512: + .byte 9,0,0,0 + .rva avx_handler + .rva .Ldo_avx512_body,.Ldo_avx512_epilogue # HandlerData[] +___ +} + +open SELF,$0; +while() { + next if (/^#!/); + last if (!s/^#/\/\// and !/^$/); + print; +} +close SELF; + +foreach (split('\n',$code)) { + s/\`([^\`]*)\`/eval($1)/ge; + s/%r([a-z]+)#d/%e$1/g; + s/%r([0-9]+)#d/%r$1d/g; + s/%x#%[yz]/%x/g or s/%y#%z/%y/g or s/%z#%[yz]/%z/g; + + if ($kernel) { + s/(^\.type.*),[0-9]+$/\1/; + s/(^\.type.*),\@abi-omnipotent+$/\1,\@function/; + next if /^\.cfi.*/; + } + + print $_,"\n"; +} +close STDOUT; diff --git a/net/wireguard/crypto/zinc/poly1305/poly1305.c b/net/wireguard/crypto/zinc/poly1305/poly1305.c new file mode 100644 index 000000000000..a54bc3309cf2 --- /dev/null +++ b/net/wireguard/crypto/zinc/poly1305/poly1305.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * + * Implementation of the Poly1305 message authenticator. + * + * Information: https://cr.yp.to/mac.html + */ + +#include +#include "../selftest/run.h" + +#include +#include +#include +#include +#include + +#if defined(CONFIG_ZINC_ARCH_X86_64) +#include "poly1305-x86_64-glue.c" +#elif defined(CONFIG_ZINC_ARCH_ARM) || defined(CONFIG_ZINC_ARCH_ARM64) +#include "poly1305-arm-glue.c" +#elif defined(CONFIG_ZINC_ARCH_MIPS) || defined(CONFIG_ZINC_ARCH_MIPS64) +#include "poly1305-mips-glue.c" +#else +static inline bool poly1305_init_arch(void *ctx, + const u8 key[POLY1305_KEY_SIZE]) +{ + return false; +} +static inline bool poly1305_blocks_arch(void *ctx, const u8 *input, + size_t len, const u32 padbit, + simd_context_t *simd_context) +{ + return false; +} +static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE], + const u32 nonce[4], + simd_context_t *simd_context) +{ + return false; +} +static bool *const poly1305_nobs[] __initconst = { }; +static void __init poly1305_fpu_init(void) +{ +} +#endif + +#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__) +#include "poly1305-donna64.c" +#else +#include "poly1305-donna32.c" +#endif + +void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE]) +{ + ctx->nonce[0] = get_unaligned_le32(&key[16]); + ctx->nonce[1] = get_unaligned_le32(&key[20]); + ctx->nonce[2] = get_unaligned_le32(&key[24]); + ctx->nonce[3] = get_unaligned_le32(&key[28]); + + if (!poly1305_init_arch(ctx->opaque, key)) + poly1305_init_generic(ctx->opaque, key); + + ctx->num = 0; +} + +static inline void poly1305_blocks(void *ctx, const u8 *input, const size_t len, + const u32 padbit, + simd_context_t *simd_context) +{ + if (!poly1305_blocks_arch(ctx, input, len, padbit, simd_context)) + poly1305_blocks_generic(ctx, input, len, padbit); +} + +static inline void poly1305_emit(void *ctx, u8 mac[POLY1305_KEY_SIZE], + const u32 nonce[4], + simd_context_t *simd_context) +{ + if (!poly1305_emit_arch(ctx, mac, nonce, simd_context)) + poly1305_emit_generic(ctx, mac, nonce); +} + +void poly1305_update(struct poly1305_ctx *ctx, const u8 *input, size_t len, + simd_context_t *simd_context) +{ + const size_t num = ctx->num; + size_t rem; + + if (num) { + rem = POLY1305_BLOCK_SIZE - num; + if (len < rem) { + memcpy(ctx->data + num, input, len); + ctx->num = num + len; + return; + } + memcpy(ctx->data + num, input, rem); + poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 1, + simd_context); + input += rem; + len -= rem; + } + + rem = len % POLY1305_BLOCK_SIZE; + len -= rem; + + if (len >= POLY1305_BLOCK_SIZE) { + poly1305_blocks(ctx->opaque, input, len, 1, simd_context); + input += len; + } + + if (rem) + memcpy(ctx->data, input, rem); + + ctx->num = rem; +} + +void poly1305_final(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE], + simd_context_t *simd_context) +{ + size_t num = ctx->num; + + if (num) { + ctx->data[num++] = 1; + while (num < POLY1305_BLOCK_SIZE) + ctx->data[num++] = 0; + poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 0, + simd_context); + } + + poly1305_emit(ctx->opaque, mac, ctx->nonce, simd_context); + + memzero_explicit(ctx, sizeof(*ctx)); +} + +#include "../selftest/poly1305.c" + +static bool nosimd __initdata = false; + +#ifndef COMPAT_ZINC_IS_A_MODULE +int __init poly1305_mod_init(void) +#else +static int __init mod_init(void) +#endif +{ + if (!nosimd) + poly1305_fpu_init(); + if (!selftest_run("poly1305", poly1305_selftest, poly1305_nobs, + ARRAY_SIZE(poly1305_nobs))) + return -ENOTRECOVERABLE; + return 0; +} + +#ifdef COMPAT_ZINC_IS_A_MODULE +static void __exit mod_exit(void) +{ +} + +module_param(nosimd, bool, 0); +module_init(mod_init); +module_exit(mod_exit); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Poly1305 one-time authenticator"); +MODULE_AUTHOR("Jason A. Donenfeld "); +#endif diff --git a/net/wireguard/crypto/zinc/selftest/blake2s.c b/net/wireguard/crypto/zinc/selftest/blake2s.c new file mode 100644 index 000000000000..1b5c210dc7a8 --- /dev/null +++ b/net/wireguard/crypto/zinc/selftest/blake2s.c @@ -0,0 +1,2090 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = { + { 0x69, 0x21, 0x7a, 0x30, 0x79, 0x90, 0x80, 0x94, + 0xe1, 0x11, 0x21, 0xd0, 0x42, 0x35, 0x4a, 0x7c, + 0x1f, 0x55, 0xb6, 0x48, 0x2c, 0xa1, 0xa5, 0x1e, + 0x1b, 0x25, 0x0d, 0xfd, 0x1e, 0xd0, 0xee, 0xf9 }, + { 0xe3, 0x4d, 0x74, 0xdb, 0xaf, 0x4f, 0xf4, 0xc6, + 0xab, 0xd8, 0x71, 0xcc, 0x22, 0x04, 0x51, 0xd2, + 0xea, 0x26, 0x48, 0x84, 0x6c, 0x77, 0x57, 0xfb, + 0xaa, 0xc8, 0x2f, 0xe5, 0x1a, 0xd6, 0x4b, 0xea }, + { 0xdd, 0xad, 0x9a, 0xb1, 0x5d, 0xac, 0x45, 0x49, + 0xba, 0x42, 0xf4, 0x9d, 0x26, 0x24, 0x96, 0xbe, + 0xf6, 0xc0, 0xba, 0xe1, 0xdd, 0x34, 0x2a, 0x88, + 0x08, 0xf8, 0xea, 0x26, 0x7c, 0x6e, 0x21, 0x0c }, + { 0xe8, 0xf9, 0x1c, 0x6e, 0xf2, 0x32, 0xa0, 0x41, + 0x45, 0x2a, 0xb0, 0xe1, 0x49, 0x07, 0x0c, 0xdd, + 0x7d, 0xd1, 0x76, 0x9e, 0x75, 0xb3, 0xa5, 0x92, + 0x1b, 0xe3, 0x78, 0x76, 0xc4, 0x5c, 0x99, 0x00 }, + { 0x0c, 0xc7, 0x0e, 0x00, 0x34, 0x8b, 0x86, 0xba, + 0x29, 0x44, 0xd0, 0xc3, 0x20, 0x38, 0xb2, 0x5c, + 0x55, 0x58, 0x4f, 0x90, 0xdf, 0x23, 0x04, 0xf5, + 0x5f, 0xa3, 0x32, 0xaf, 0x5f, 0xb0, 0x1e, 0x20 }, + { 0xec, 0x19, 0x64, 0x19, 0x10, 0x87, 0xa4, 0xfe, + 0x9d, 0xf1, 0xc7, 0x95, 0x34, 0x2a, 0x02, 0xff, + 0xc1, 0x91, 0xa5, 0xb2, 0x51, 0x76, 0x48, 0x56, + 0xae, 0x5b, 0x8b, 0x57, 0x69, 0xf0, 0xc6, 0xcd }, + { 0xe1, 0xfa, 0x51, 0x61, 0x8d, 0x7d, 0xf4, 0xeb, + 0x70, 0xcf, 0x0d, 0x5a, 0x9e, 0x90, 0x6f, 0x80, + 0x6e, 0x9d, 0x19, 0xf7, 0xf4, 0xf0, 0x1e, 0x3b, + 0x62, 0x12, 0x88, 0xe4, 0x12, 0x04, 0x05, 0xd6 }, + { 0x59, 0x80, 0x01, 0xfa, 0xfb, 0xe8, 0xf9, 0x4e, + 0xc6, 0x6d, 0xc8, 0x27, 0xd0, 0x12, 0xcf, 0xcb, + 0xba, 0x22, 0x28, 0x56, 0x9f, 0x44, 0x8e, 0x89, + 0xea, 0x22, 0x08, 0xc8, 0xbf, 0x76, 0x92, 0x93 }, + { 0xc7, 0xe8, 0x87, 0xb5, 0x46, 0x62, 0x36, 0x35, + 0xe9, 0x3e, 0x04, 0x95, 0x59, 0x8f, 0x17, 0x26, + 0x82, 0x19, 0x96, 0xc2, 0x37, 0x77, 0x05, 0xb9, + 0x3a, 0x1f, 0x63, 0x6f, 0x87, 0x2b, 0xfa, 0x2d }, + { 0xc3, 0x15, 0xa4, 0x37, 0xdd, 0x28, 0x06, 0x2a, + 0x77, 0x0d, 0x48, 0x19, 0x67, 0x13, 0x6b, 0x1b, + 0x5e, 0xb8, 0x8b, 0x21, 0xee, 0x53, 0xd0, 0x32, + 0x9c, 0x58, 0x97, 0x12, 0x6e, 0x9d, 0xb0, 0x2c }, + { 0xbb, 0x47, 0x3d, 0xed, 0xdc, 0x05, 0x5f, 0xea, + 0x62, 0x28, 0xf2, 0x07, 0xda, 0x57, 0x53, 0x47, + 0xbb, 0x00, 0x40, 0x4c, 0xd3, 0x49, 0xd3, 0x8c, + 0x18, 0x02, 0x63, 0x07, 0xa2, 0x24, 0xcb, 0xff }, + { 0x68, 0x7e, 0x18, 0x73, 0xa8, 0x27, 0x75, 0x91, + 0xbb, 0x33, 0xd9, 0xad, 0xf9, 0xa1, 0x39, 0x12, + 0xef, 0xef, 0xe5, 0x57, 0xca, 0xfc, 0x39, 0xa7, + 0x95, 0x26, 0x23, 0xe4, 0x72, 0x55, 0xf1, 0x6d }, + { 0x1a, 0xc7, 0xba, 0x75, 0x4d, 0x6e, 0x2f, 0x94, + 0xe0, 0xe8, 0x6c, 0x46, 0xbf, 0xb2, 0x62, 0xab, + 0xbb, 0x74, 0xf4, 0x50, 0xef, 0x45, 0x6d, 0x6b, + 0x4d, 0x97, 0xaa, 0x80, 0xce, 0x6d, 0xa7, 0x67 }, + { 0x01, 0x2c, 0x97, 0x80, 0x96, 0x14, 0x81, 0x6b, + 0x5d, 0x94, 0x94, 0x47, 0x7d, 0x4b, 0x68, 0x7d, + 0x15, 0xb9, 0x6e, 0xb6, 0x9c, 0x0e, 0x80, 0x74, + 0xa8, 0x51, 0x6f, 0x31, 0x22, 0x4b, 0x5c, 0x98 }, + { 0x91, 0xff, 0xd2, 0x6c, 0xfa, 0x4d, 0xa5, 0x13, + 0x4c, 0x7e, 0xa2, 0x62, 0xf7, 0x88, 0x9c, 0x32, + 0x9f, 0x61, 0xf6, 0xa6, 0x57, 0x22, 0x5c, 0xc2, + 0x12, 0xf4, 0x00, 0x56, 0xd9, 0x86, 0xb3, 0xf4 }, + { 0xd9, 0x7c, 0x82, 0x8d, 0x81, 0x82, 0xa7, 0x21, + 0x80, 0xa0, 0x6a, 0x78, 0x26, 0x83, 0x30, 0x67, + 0x3f, 0x7c, 0x4e, 0x06, 0x35, 0x94, 0x7c, 0x04, + 0xc0, 0x23, 0x23, 0xfd, 0x45, 0xc0, 0xa5, 0x2d }, + { 0xef, 0xc0, 0x4c, 0xdc, 0x39, 0x1c, 0x7e, 0x91, + 0x19, 0xbd, 0x38, 0x66, 0x8a, 0x53, 0x4e, 0x65, + 0xfe, 0x31, 0x03, 0x6d, 0x6a, 0x62, 0x11, 0x2e, + 0x44, 0xeb, 0xeb, 0x11, 0xf9, 0xc5, 0x70, 0x80 }, + { 0x99, 0x2c, 0xf5, 0xc0, 0x53, 0x44, 0x2a, 0x5f, + 0xbc, 0x4f, 0xaf, 0x58, 0x3e, 0x04, 0xe5, 0x0b, + 0xb7, 0x0d, 0x2f, 0x39, 0xfb, 0xb6, 0xa5, 0x03, + 0xf8, 0x9e, 0x56, 0xa6, 0x3e, 0x18, 0x57, 0x8a }, + { 0x38, 0x64, 0x0e, 0x9f, 0x21, 0x98, 0x3e, 0x67, + 0xb5, 0x39, 0xca, 0xcc, 0xae, 0x5e, 0xcf, 0x61, + 0x5a, 0xe2, 0x76, 0x4f, 0x75, 0xa0, 0x9c, 0x9c, + 0x59, 0xb7, 0x64, 0x83, 0xc1, 0xfb, 0xc7, 0x35 }, + { 0x21, 0x3d, 0xd3, 0x4c, 0x7e, 0xfe, 0x4f, 0xb2, + 0x7a, 0x6b, 0x35, 0xf6, 0xb4, 0x00, 0x0d, 0x1f, + 0xe0, 0x32, 0x81, 0xaf, 0x3c, 0x72, 0x3e, 0x5c, + 0x9f, 0x94, 0x74, 0x7a, 0x5f, 0x31, 0xcd, 0x3b }, + { 0xec, 0x24, 0x6e, 0xee, 0xb9, 0xce, 0xd3, 0xf7, + 0xad, 0x33, 0xed, 0x28, 0x66, 0x0d, 0xd9, 0xbb, + 0x07, 0x32, 0x51, 0x3d, 0xb4, 0xe2, 0xfa, 0x27, + 0x8b, 0x60, 0xcd, 0xe3, 0x68, 0x2a, 0x4c, 0xcd }, + { 0xac, 0x9b, 0x61, 0xd4, 0x46, 0x64, 0x8c, 0x30, + 0x05, 0xd7, 0x89, 0x2b, 0xf3, 0xa8, 0x71, 0x9f, + 0x4c, 0x81, 0x81, 0xcf, 0xdc, 0xbc, 0x2b, 0x79, + 0xfe, 0xf1, 0x0a, 0x27, 0x9b, 0x91, 0x10, 0x95 }, + { 0x7b, 0xf8, 0xb2, 0x29, 0x59, 0xe3, 0x4e, 0x3a, + 0x43, 0xf7, 0x07, 0x92, 0x23, 0xe8, 0x3a, 0x97, + 0x54, 0x61, 0x7d, 0x39, 0x1e, 0x21, 0x3d, 0xfd, + 0x80, 0x8e, 0x41, 0xb9, 0xbe, 0xad, 0x4c, 0xe7 }, + { 0x68, 0xd4, 0xb5, 0xd4, 0xfa, 0x0e, 0x30, 0x2b, + 0x64, 0xcc, 0xc5, 0xaf, 0x79, 0x29, 0x13, 0xac, + 0x4c, 0x88, 0xec, 0x95, 0xc0, 0x7d, 0xdf, 0x40, + 0x69, 0x42, 0x56, 0xeb, 0x88, 0xce, 0x9f, 0x3d }, + { 0xb2, 0xc2, 0x42, 0x0f, 0x05, 0xf9, 0xab, 0xe3, + 0x63, 0x15, 0x91, 0x93, 0x36, 0xb3, 0x7e, 0x4e, + 0x0f, 0xa3, 0x3f, 0xf7, 0xe7, 0x6a, 0x49, 0x27, + 0x67, 0x00, 0x6f, 0xdb, 0x5d, 0x93, 0x54, 0x62 }, + { 0x13, 0x4f, 0x61, 0xbb, 0xd0, 0xbb, 0xb6, 0x9a, + 0xed, 0x53, 0x43, 0x90, 0x45, 0x51, 0xa3, 0xe6, + 0xc1, 0xaa, 0x7d, 0xcd, 0xd7, 0x7e, 0x90, 0x3e, + 0x70, 0x23, 0xeb, 0x7c, 0x60, 0x32, 0x0a, 0xa7 }, + { 0x46, 0x93, 0xf9, 0xbf, 0xf7, 0xd4, 0xf3, 0x98, + 0x6a, 0x7d, 0x17, 0x6e, 0x6e, 0x06, 0xf7, 0x2a, + 0xd1, 0x49, 0x0d, 0x80, 0x5c, 0x99, 0xe2, 0x53, + 0x47, 0xb8, 0xde, 0x77, 0xb4, 0xdb, 0x6d, 0x9b }, + { 0x85, 0x3e, 0x26, 0xf7, 0x41, 0x95, 0x3b, 0x0f, + 0xd5, 0xbd, 0xb4, 0x24, 0xe8, 0xab, 0x9e, 0x8b, + 0x37, 0x50, 0xea, 0xa8, 0xef, 0x61, 0xe4, 0x79, + 0x02, 0xc9, 0x1e, 0x55, 0x4e, 0x9c, 0x73, 0xb9 }, + { 0xf7, 0xde, 0x53, 0x63, 0x61, 0xab, 0xaa, 0x0e, + 0x15, 0x81, 0x56, 0xcf, 0x0e, 0xa4, 0xf6, 0x3a, + 0x99, 0xb5, 0xe4, 0x05, 0x4f, 0x8f, 0xa4, 0xc9, + 0xd4, 0x5f, 0x62, 0x85, 0xca, 0xd5, 0x56, 0x94 }, + { 0x4c, 0x23, 0x06, 0x08, 0x86, 0x0a, 0x99, 0xae, + 0x8d, 0x7b, 0xd5, 0xc2, 0xcc, 0x17, 0xfa, 0x52, + 0x09, 0x6b, 0x9a, 0x61, 0xbe, 0xdb, 0x17, 0xcb, + 0x76, 0x17, 0x86, 0x4a, 0xd2, 0x9c, 0xa7, 0xa6 }, + { 0xae, 0xb9, 0x20, 0xea, 0x87, 0x95, 0x2d, 0xad, + 0xb1, 0xfb, 0x75, 0x92, 0x91, 0xe3, 0x38, 0x81, + 0x39, 0xa8, 0x72, 0x86, 0x50, 0x01, 0x88, 0x6e, + 0xd8, 0x47, 0x52, 0xe9, 0x3c, 0x25, 0x0c, 0x2a }, + { 0xab, 0xa4, 0xad, 0x9b, 0x48, 0x0b, 0x9d, 0xf3, + 0xd0, 0x8c, 0xa5, 0xe8, 0x7b, 0x0c, 0x24, 0x40, + 0xd4, 0xe4, 0xea, 0x21, 0x22, 0x4c, 0x2e, 0xb4, + 0x2c, 0xba, 0xe4, 0x69, 0xd0, 0x89, 0xb9, 0x31 }, + { 0x05, 0x82, 0x56, 0x07, 0xd7, 0xfd, 0xf2, 0xd8, + 0x2e, 0xf4, 0xc3, 0xc8, 0xc2, 0xae, 0xa9, 0x61, + 0xad, 0x98, 0xd6, 0x0e, 0xdf, 0xf7, 0xd0, 0x18, + 0x98, 0x3e, 0x21, 0x20, 0x4c, 0x0d, 0x93, 0xd1 }, + { 0xa7, 0x42, 0xf8, 0xb6, 0xaf, 0x82, 0xd8, 0xa6, + 0xca, 0x23, 0x57, 0xc5, 0xf1, 0xcf, 0x91, 0xde, + 0xfb, 0xd0, 0x66, 0x26, 0x7d, 0x75, 0xc0, 0x48, + 0xb3, 0x52, 0x36, 0x65, 0x85, 0x02, 0x59, 0x62 }, + { 0x2b, 0xca, 0xc8, 0x95, 0x99, 0x00, 0x0b, 0x42, + 0xc9, 0x5a, 0xe2, 0x38, 0x35, 0xa7, 0x13, 0x70, + 0x4e, 0xd7, 0x97, 0x89, 0xc8, 0x4f, 0xef, 0x14, + 0x9a, 0x87, 0x4f, 0xf7, 0x33, 0xf0, 0x17, 0xa2 }, + { 0xac, 0x1e, 0xd0, 0x7d, 0x04, 0x8f, 0x10, 0x5a, + 0x9e, 0x5b, 0x7a, 0xb8, 0x5b, 0x09, 0xa4, 0x92, + 0xd5, 0xba, 0xff, 0x14, 0xb8, 0xbf, 0xb0, 0xe9, + 0xfd, 0x78, 0x94, 0x86, 0xee, 0xa2, 0xb9, 0x74 }, + { 0xe4, 0x8d, 0x0e, 0xcf, 0xaf, 0x49, 0x7d, 0x5b, + 0x27, 0xc2, 0x5d, 0x99, 0xe1, 0x56, 0xcb, 0x05, + 0x79, 0xd4, 0x40, 0xd6, 0xe3, 0x1f, 0xb6, 0x24, + 0x73, 0x69, 0x6d, 0xbf, 0x95, 0xe0, 0x10, 0xe4 }, + { 0x12, 0xa9, 0x1f, 0xad, 0xf8, 0xb2, 0x16, 0x44, + 0xfd, 0x0f, 0x93, 0x4f, 0x3c, 0x4a, 0x8f, 0x62, + 0xba, 0x86, 0x2f, 0xfd, 0x20, 0xe8, 0xe9, 0x61, + 0x15, 0x4c, 0x15, 0xc1, 0x38, 0x84, 0xed, 0x3d }, + { 0x7c, 0xbe, 0xe9, 0x6e, 0x13, 0x98, 0x97, 0xdc, + 0x98, 0xfb, 0xef, 0x3b, 0xe8, 0x1a, 0xd4, 0xd9, + 0x64, 0xd2, 0x35, 0xcb, 0x12, 0x14, 0x1f, 0xb6, + 0x67, 0x27, 0xe6, 0xe5, 0xdf, 0x73, 0xa8, 0x78 }, + { 0xeb, 0xf6, 0x6a, 0xbb, 0x59, 0x7a, 0xe5, 0x72, + 0xa7, 0x29, 0x7c, 0xb0, 0x87, 0x1e, 0x35, 0x5a, + 0xcc, 0xaf, 0xad, 0x83, 0x77, 0xb8, 0xe7, 0x8b, + 0xf1, 0x64, 0xce, 0x2a, 0x18, 0xde, 0x4b, 0xaf }, + { 0x71, 0xb9, 0x33, 0xb0, 0x7e, 0x4f, 0xf7, 0x81, + 0x8c, 0xe0, 0x59, 0xd0, 0x08, 0x82, 0x9e, 0x45, + 0x3c, 0x6f, 0xf0, 0x2e, 0xc0, 0xa7, 0xdb, 0x39, + 0x3f, 0xc2, 0xd8, 0x70, 0xf3, 0x7a, 0x72, 0x86 }, + { 0x7c, 0xf7, 0xc5, 0x13, 0x31, 0x22, 0x0b, 0x8d, + 0x3e, 0xba, 0xed, 0x9c, 0x29, 0x39, 0x8a, 0x16, + 0xd9, 0x81, 0x56, 0xe2, 0x61, 0x3c, 0xb0, 0x88, + 0xf2, 0xb0, 0xe0, 0x8a, 0x1b, 0xe4, 0xcf, 0x4f }, + { 0x3e, 0x41, 0xa1, 0x08, 0xe0, 0xf6, 0x4a, 0xd2, + 0x76, 0xb9, 0x79, 0xe1, 0xce, 0x06, 0x82, 0x79, + 0xe1, 0x6f, 0x7b, 0xc7, 0xe4, 0xaa, 0x1d, 0x21, + 0x1e, 0x17, 0xb8, 0x11, 0x61, 0xdf, 0x16, 0x02 }, + { 0x88, 0x65, 0x02, 0xa8, 0x2a, 0xb4, 0x7b, 0xa8, + 0xd8, 0x67, 0x10, 0xaa, 0x9d, 0xe3, 0xd4, 0x6e, + 0xa6, 0x5c, 0x47, 0xaf, 0x6e, 0xe8, 0xde, 0x45, + 0x0c, 0xce, 0xb8, 0xb1, 0x1b, 0x04, 0x5f, 0x50 }, + { 0xc0, 0x21, 0xbc, 0x5f, 0x09, 0x54, 0xfe, 0xe9, + 0x4f, 0x46, 0xea, 0x09, 0x48, 0x7e, 0x10, 0xa8, + 0x48, 0x40, 0xd0, 0x2f, 0x64, 0x81, 0x0b, 0xc0, + 0x8d, 0x9e, 0x55, 0x1f, 0x7d, 0x41, 0x68, 0x14 }, + { 0x20, 0x30, 0x51, 0x6e, 0x8a, 0x5f, 0xe1, 0x9a, + 0xe7, 0x9c, 0x33, 0x6f, 0xce, 0x26, 0x38, 0x2a, + 0x74, 0x9d, 0x3f, 0xd0, 0xec, 0x91, 0xe5, 0x37, + 0xd4, 0xbd, 0x23, 0x58, 0xc1, 0x2d, 0xfb, 0x22 }, + { 0x55, 0x66, 0x98, 0xda, 0xc8, 0x31, 0x7f, 0xd3, + 0x6d, 0xfb, 0xdf, 0x25, 0xa7, 0x9c, 0xb1, 0x12, + 0xd5, 0x42, 0x58, 0x60, 0x60, 0x5c, 0xba, 0xf5, + 0x07, 0xf2, 0x3b, 0xf7, 0xe9, 0xf4, 0x2a, 0xfe }, + { 0x2f, 0x86, 0x7b, 0xa6, 0x77, 0x73, 0xfd, 0xc3, + 0xe9, 0x2f, 0xce, 0xd9, 0x9a, 0x64, 0x09, 0xad, + 0x39, 0xd0, 0xb8, 0x80, 0xfd, 0xe8, 0xf1, 0x09, + 0xa8, 0x17, 0x30, 0xc4, 0x45, 0x1d, 0x01, 0x78 }, + { 0x17, 0x2e, 0xc2, 0x18, 0xf1, 0x19, 0xdf, 0xae, + 0x98, 0x89, 0x6d, 0xff, 0x29, 0xdd, 0x98, 0x76, + 0xc9, 0x4a, 0xf8, 0x74, 0x17, 0xf9, 0xae, 0x4c, + 0x70, 0x14, 0xbb, 0x4e, 0x4b, 0x96, 0xaf, 0xc7 }, + { 0x3f, 0x85, 0x81, 0x4a, 0x18, 0x19, 0x5f, 0x87, + 0x9a, 0xa9, 0x62, 0xf9, 0x5d, 0x26, 0xbd, 0x82, + 0xa2, 0x78, 0xf2, 0xb8, 0x23, 0x20, 0x21, 0x8f, + 0x6b, 0x3b, 0xd6, 0xf7, 0xf6, 0x67, 0xa6, 0xd9 }, + { 0x1b, 0x61, 0x8f, 0xba, 0xa5, 0x66, 0xb3, 0xd4, + 0x98, 0xc1, 0x2e, 0x98, 0x2c, 0x9e, 0xc5, 0x2e, + 0x4d, 0xa8, 0x5a, 0x8c, 0x54, 0xf3, 0x8f, 0x34, + 0xc0, 0x90, 0x39, 0x4f, 0x23, 0xc1, 0x84, 0xc1 }, + { 0x0c, 0x75, 0x8f, 0xb5, 0x69, 0x2f, 0xfd, 0x41, + 0xa3, 0x57, 0x5d, 0x0a, 0xf0, 0x0c, 0xc7, 0xfb, + 0xf2, 0xcb, 0xe5, 0x90, 0x5a, 0x58, 0x32, 0x3a, + 0x88, 0xae, 0x42, 0x44, 0xf6, 0xe4, 0xc9, 0x93 }, + { 0xa9, 0x31, 0x36, 0x0c, 0xad, 0x62, 0x8c, 0x7f, + 0x12, 0xa6, 0xc1, 0xc4, 0xb7, 0x53, 0xb0, 0xf4, + 0x06, 0x2a, 0xef, 0x3c, 0xe6, 0x5a, 0x1a, 0xe3, + 0xf1, 0x93, 0x69, 0xda, 0xdf, 0x3a, 0xe2, 0x3d }, + { 0xcb, 0xac, 0x7d, 0x77, 0x3b, 0x1e, 0x3b, 0x3c, + 0x66, 0x91, 0xd7, 0xab, 0xb7, 0xe9, 0xdf, 0x04, + 0x5c, 0x8b, 0xa1, 0x92, 0x68, 0xde, 0xd1, 0x53, + 0x20, 0x7f, 0x5e, 0x80, 0x43, 0x52, 0xec, 0x5d }, + { 0x23, 0xa1, 0x96, 0xd3, 0x80, 0x2e, 0xd3, 0xc1, + 0xb3, 0x84, 0x01, 0x9a, 0x82, 0x32, 0x58, 0x40, + 0xd3, 0x2f, 0x71, 0x95, 0x0c, 0x45, 0x80, 0xb0, + 0x34, 0x45, 0xe0, 0x89, 0x8e, 0x14, 0x05, 0x3c }, + { 0xf4, 0x49, 0x54, 0x70, 0xf2, 0x26, 0xc8, 0xc2, + 0x14, 0xbe, 0x08, 0xfd, 0xfa, 0xd4, 0xbc, 0x4a, + 0x2a, 0x9d, 0xbe, 0xa9, 0x13, 0x6a, 0x21, 0x0d, + 0xf0, 0xd4, 0xb6, 0x49, 0x29, 0xe6, 0xfc, 0x14 }, + { 0xe2, 0x90, 0xdd, 0x27, 0x0b, 0x46, 0x7f, 0x34, + 0xab, 0x1c, 0x00, 0x2d, 0x34, 0x0f, 0xa0, 0x16, + 0x25, 0x7f, 0xf1, 0x9e, 0x58, 0x33, 0xfd, 0xbb, + 0xf2, 0xcb, 0x40, 0x1c, 0x3b, 0x28, 0x17, 0xde }, + { 0x9f, 0xc7, 0xb5, 0xde, 0xd3, 0xc1, 0x50, 0x42, + 0xb2, 0xa6, 0x58, 0x2d, 0xc3, 0x9b, 0xe0, 0x16, + 0xd2, 0x4a, 0x68, 0x2d, 0x5e, 0x61, 0xad, 0x1e, + 0xff, 0x9c, 0x63, 0x30, 0x98, 0x48, 0xf7, 0x06 }, + { 0x8c, 0xca, 0x67, 0xa3, 0x6d, 0x17, 0xd5, 0xe6, + 0x34, 0x1c, 0xb5, 0x92, 0xfd, 0x7b, 0xef, 0x99, + 0x26, 0xc9, 0xe3, 0xaa, 0x10, 0x27, 0xea, 0x11, + 0xa7, 0xd8, 0xbd, 0x26, 0x0b, 0x57, 0x6e, 0x04 }, + { 0x40, 0x93, 0x92, 0xf5, 0x60, 0xf8, 0x68, 0x31, + 0xda, 0x43, 0x73, 0xee, 0x5e, 0x00, 0x74, 0x26, + 0x05, 0x95, 0xd7, 0xbc, 0x24, 0x18, 0x3b, 0x60, + 0xed, 0x70, 0x0d, 0x45, 0x83, 0xd3, 0xf6, 0xf0 }, + { 0x28, 0x02, 0x16, 0x5d, 0xe0, 0x90, 0x91, 0x55, + 0x46, 0xf3, 0x39, 0x8c, 0xd8, 0x49, 0x16, 0x4a, + 0x19, 0xf9, 0x2a, 0xdb, 0xc3, 0x61, 0xad, 0xc9, + 0x9b, 0x0f, 0x20, 0xc8, 0xea, 0x07, 0x10, 0x54 }, + { 0xad, 0x83, 0x91, 0x68, 0xd9, 0xf8, 0xa4, 0xbe, + 0x95, 0xba, 0x9e, 0xf9, 0xa6, 0x92, 0xf0, 0x72, + 0x56, 0xae, 0x43, 0xfe, 0x6f, 0x98, 0x64, 0xe2, + 0x90, 0x69, 0x1b, 0x02, 0x56, 0xce, 0x50, 0xa9 }, + { 0x75, 0xfd, 0xaa, 0x50, 0x38, 0xc2, 0x84, 0xb8, + 0x6d, 0x6e, 0x8a, 0xff, 0xe8, 0xb2, 0x80, 0x7e, + 0x46, 0x7b, 0x86, 0x60, 0x0e, 0x79, 0xaf, 0x36, + 0x89, 0xfb, 0xc0, 0x63, 0x28, 0xcb, 0xf8, 0x94 }, + { 0xe5, 0x7c, 0xb7, 0x94, 0x87, 0xdd, 0x57, 0x90, + 0x24, 0x32, 0xb2, 0x50, 0x73, 0x38, 0x13, 0xbd, + 0x96, 0xa8, 0x4e, 0xfc, 0xe5, 0x9f, 0x65, 0x0f, + 0xac, 0x26, 0xe6, 0x69, 0x6a, 0xef, 0xaf, 0xc3 }, + { 0x56, 0xf3, 0x4e, 0x8b, 0x96, 0x55, 0x7e, 0x90, + 0xc1, 0xf2, 0x4b, 0x52, 0xd0, 0xc8, 0x9d, 0x51, + 0x08, 0x6a, 0xcf, 0x1b, 0x00, 0xf6, 0x34, 0xcf, + 0x1d, 0xde, 0x92, 0x33, 0xb8, 0xea, 0xaa, 0x3e }, + { 0x1b, 0x53, 0xee, 0x94, 0xaa, 0xf3, 0x4e, 0x4b, + 0x15, 0x9d, 0x48, 0xde, 0x35, 0x2c, 0x7f, 0x06, + 0x61, 0xd0, 0xa4, 0x0e, 0xdf, 0xf9, 0x5a, 0x0b, + 0x16, 0x39, 0xb4, 0x09, 0x0e, 0x97, 0x44, 0x72 }, + { 0x05, 0x70, 0x5e, 0x2a, 0x81, 0x75, 0x7c, 0x14, + 0xbd, 0x38, 0x3e, 0xa9, 0x8d, 0xda, 0x54, 0x4e, + 0xb1, 0x0e, 0x6b, 0xc0, 0x7b, 0xae, 0x43, 0x5e, + 0x25, 0x18, 0xdb, 0xe1, 0x33, 0x52, 0x53, 0x75 }, + { 0xd8, 0xb2, 0x86, 0x6e, 0x8a, 0x30, 0x9d, 0xb5, + 0x3e, 0x52, 0x9e, 0xc3, 0x29, 0x11, 0xd8, 0x2f, + 0x5c, 0xa1, 0x6c, 0xff, 0x76, 0x21, 0x68, 0x91, + 0xa9, 0x67, 0x6a, 0xa3, 0x1a, 0xaa, 0x6c, 0x42 }, + { 0xf5, 0x04, 0x1c, 0x24, 0x12, 0x70, 0xeb, 0x04, + 0xc7, 0x1e, 0xc2, 0xc9, 0x5d, 0x4c, 0x38, 0xd8, + 0x03, 0xb1, 0x23, 0x7b, 0x0f, 0x29, 0xfd, 0x4d, + 0xb3, 0xeb, 0x39, 0x76, 0x69, 0xe8, 0x86, 0x99 }, + { 0x9a, 0x4c, 0xe0, 0x77, 0xc3, 0x49, 0x32, 0x2f, + 0x59, 0x5e, 0x0e, 0xe7, 0x9e, 0xd0, 0xda, 0x5f, + 0xab, 0x66, 0x75, 0x2c, 0xbf, 0xef, 0x8f, 0x87, + 0xd0, 0xe9, 0xd0, 0x72, 0x3c, 0x75, 0x30, 0xdd }, + { 0x65, 0x7b, 0x09, 0xf3, 0xd0, 0xf5, 0x2b, 0x5b, + 0x8f, 0x2f, 0x97, 0x16, 0x3a, 0x0e, 0xdf, 0x0c, + 0x04, 0xf0, 0x75, 0x40, 0x8a, 0x07, 0xbb, 0xeb, + 0x3a, 0x41, 0x01, 0xa8, 0x91, 0x99, 0x0d, 0x62 }, + { 0x1e, 0x3f, 0x7b, 0xd5, 0xa5, 0x8f, 0xa5, 0x33, + 0x34, 0x4a, 0xa8, 0xed, 0x3a, 0xc1, 0x22, 0xbb, + 0x9e, 0x70, 0xd4, 0xef, 0x50, 0xd0, 0x04, 0x53, + 0x08, 0x21, 0x94, 0x8f, 0x5f, 0xe6, 0x31, 0x5a }, + { 0x80, 0xdc, 0xcf, 0x3f, 0xd8, 0x3d, 0xfd, 0x0d, + 0x35, 0xaa, 0x28, 0x58, 0x59, 0x22, 0xab, 0x89, + 0xd5, 0x31, 0x39, 0x97, 0x67, 0x3e, 0xaf, 0x90, + 0x5c, 0xea, 0x9c, 0x0b, 0x22, 0x5c, 0x7b, 0x5f }, + { 0x8a, 0x0d, 0x0f, 0xbf, 0x63, 0x77, 0xd8, 0x3b, + 0xb0, 0x8b, 0x51, 0x4b, 0x4b, 0x1c, 0x43, 0xac, + 0xc9, 0x5d, 0x75, 0x17, 0x14, 0xf8, 0x92, 0x56, + 0x45, 0xcb, 0x6b, 0xc8, 0x56, 0xca, 0x15, 0x0a }, + { 0x9f, 0xa5, 0xb4, 0x87, 0x73, 0x8a, 0xd2, 0x84, + 0x4c, 0xc6, 0x34, 0x8a, 0x90, 0x19, 0x18, 0xf6, + 0x59, 0xa3, 0xb8, 0x9e, 0x9c, 0x0d, 0xfe, 0xea, + 0xd3, 0x0d, 0xd9, 0x4b, 0xcf, 0x42, 0xef, 0x8e }, + { 0x80, 0x83, 0x2c, 0x4a, 0x16, 0x77, 0xf5, 0xea, + 0x25, 0x60, 0xf6, 0x68, 0xe9, 0x35, 0x4d, 0xd3, + 0x69, 0x97, 0xf0, 0x37, 0x28, 0xcf, 0xa5, 0x5e, + 0x1b, 0x38, 0x33, 0x7c, 0x0c, 0x9e, 0xf8, 0x18 }, + { 0xab, 0x37, 0xdd, 0xb6, 0x83, 0x13, 0x7e, 0x74, + 0x08, 0x0d, 0x02, 0x6b, 0x59, 0x0b, 0x96, 0xae, + 0x9b, 0xb4, 0x47, 0x72, 0x2f, 0x30, 0x5a, 0x5a, + 0xc5, 0x70, 0xec, 0x1d, 0xf9, 0xb1, 0x74, 0x3c }, + { 0x3e, 0xe7, 0x35, 0xa6, 0x94, 0xc2, 0x55, 0x9b, + 0x69, 0x3a, 0xa6, 0x86, 0x29, 0x36, 0x1e, 0x15, + 0xd1, 0x22, 0x65, 0xad, 0x6a, 0x3d, 0xed, 0xf4, + 0x88, 0xb0, 0xb0, 0x0f, 0xac, 0x97, 0x54, 0xba }, + { 0xd6, 0xfc, 0xd2, 0x32, 0x19, 0xb6, 0x47, 0xe4, + 0xcb, 0xd5, 0xeb, 0x2d, 0x0a, 0xd0, 0x1e, 0xc8, + 0x83, 0x8a, 0x4b, 0x29, 0x01, 0xfc, 0x32, 0x5c, + 0xc3, 0x70, 0x19, 0x81, 0xca, 0x6c, 0x88, 0x8b }, + { 0x05, 0x20, 0xec, 0x2f, 0x5b, 0xf7, 0xa7, 0x55, + 0xda, 0xcb, 0x50, 0xc6, 0xbf, 0x23, 0x3e, 0x35, + 0x15, 0x43, 0x47, 0x63, 0xdb, 0x01, 0x39, 0xcc, + 0xd9, 0xfa, 0xef, 0xbb, 0x82, 0x07, 0x61, 0x2d }, + { 0xaf, 0xf3, 0xb7, 0x5f, 0x3f, 0x58, 0x12, 0x64, + 0xd7, 0x66, 0x16, 0x62, 0xb9, 0x2f, 0x5a, 0xd3, + 0x7c, 0x1d, 0x32, 0xbd, 0x45, 0xff, 0x81, 0xa4, + 0xed, 0x8a, 0xdc, 0x9e, 0xf3, 0x0d, 0xd9, 0x89 }, + { 0xd0, 0xdd, 0x65, 0x0b, 0xef, 0xd3, 0xba, 0x63, + 0xdc, 0x25, 0x10, 0x2c, 0x62, 0x7c, 0x92, 0x1b, + 0x9c, 0xbe, 0xb0, 0xb1, 0x30, 0x68, 0x69, 0x35, + 0xb5, 0xc9, 0x27, 0xcb, 0x7c, 0xcd, 0x5e, 0x3b }, + { 0xe1, 0x14, 0x98, 0x16, 0xb1, 0x0a, 0x85, 0x14, + 0xfb, 0x3e, 0x2c, 0xab, 0x2c, 0x08, 0xbe, 0xe9, + 0xf7, 0x3c, 0xe7, 0x62, 0x21, 0x70, 0x12, 0x46, + 0xa5, 0x89, 0xbb, 0xb6, 0x73, 0x02, 0xd8, 0xa9 }, + { 0x7d, 0xa3, 0xf4, 0x41, 0xde, 0x90, 0x54, 0x31, + 0x7e, 0x72, 0xb5, 0xdb, 0xf9, 0x79, 0xda, 0x01, + 0xe6, 0xbc, 0xee, 0xbb, 0x84, 0x78, 0xea, 0xe6, + 0xa2, 0x28, 0x49, 0xd9, 0x02, 0x92, 0x63, 0x5c }, + { 0x12, 0x30, 0xb1, 0xfc, 0x8a, 0x7d, 0x92, 0x15, + 0xed, 0xc2, 0xd4, 0xa2, 0xde, 0xcb, 0xdd, 0x0a, + 0x6e, 0x21, 0x6c, 0x92, 0x42, 0x78, 0xc9, 0x1f, + 0xc5, 0xd1, 0x0e, 0x7d, 0x60, 0x19, 0x2d, 0x94 }, + { 0x57, 0x50, 0xd7, 0x16, 0xb4, 0x80, 0x8f, 0x75, + 0x1f, 0xeb, 0xc3, 0x88, 0x06, 0xba, 0x17, 0x0b, + 0xf6, 0xd5, 0x19, 0x9a, 0x78, 0x16, 0xbe, 0x51, + 0x4e, 0x3f, 0x93, 0x2f, 0xbe, 0x0c, 0xb8, 0x71 }, + { 0x6f, 0xc5, 0x9b, 0x2f, 0x10, 0xfe, 0xba, 0x95, + 0x4a, 0xa6, 0x82, 0x0b, 0x3c, 0xa9, 0x87, 0xee, + 0x81, 0xd5, 0xcc, 0x1d, 0xa3, 0xc6, 0x3c, 0xe8, + 0x27, 0x30, 0x1c, 0x56, 0x9d, 0xfb, 0x39, 0xce }, + { 0xc7, 0xc3, 0xfe, 0x1e, 0xeb, 0xdc, 0x7b, 0x5a, + 0x93, 0x93, 0x26, 0xe8, 0xdd, 0xb8, 0x3e, 0x8b, + 0xf2, 0xb7, 0x80, 0xb6, 0x56, 0x78, 0xcb, 0x62, + 0xf2, 0x08, 0xb0, 0x40, 0xab, 0xdd, 0x35, 0xe2 }, + { 0x0c, 0x75, 0xc1, 0xa1, 0x5c, 0xf3, 0x4a, 0x31, + 0x4e, 0xe4, 0x78, 0xf4, 0xa5, 0xce, 0x0b, 0x8a, + 0x6b, 0x36, 0x52, 0x8e, 0xf7, 0xa8, 0x20, 0x69, + 0x6c, 0x3e, 0x42, 0x46, 0xc5, 0xa1, 0x58, 0x64 }, + { 0x21, 0x6d, 0xc1, 0x2a, 0x10, 0x85, 0x69, 0xa3, + 0xc7, 0xcd, 0xde, 0x4a, 0xed, 0x43, 0xa6, 0xc3, + 0x30, 0x13, 0x9d, 0xda, 0x3c, 0xcc, 0x4a, 0x10, + 0x89, 0x05, 0xdb, 0x38, 0x61, 0x89, 0x90, 0x50 }, + { 0xa5, 0x7b, 0xe6, 0xae, 0x67, 0x56, 0xf2, 0x8b, + 0x02, 0xf5, 0x9d, 0xad, 0xf7, 0xe0, 0xd7, 0xd8, + 0x80, 0x7f, 0x10, 0xfa, 0x15, 0xce, 0xd1, 0xad, + 0x35, 0x85, 0x52, 0x1a, 0x1d, 0x99, 0x5a, 0x89 }, + { 0x81, 0x6a, 0xef, 0x87, 0x59, 0x53, 0x71, 0x6c, + 0xd7, 0xa5, 0x81, 0xf7, 0x32, 0xf5, 0x3d, 0xd4, + 0x35, 0xda, 0xb6, 0x6d, 0x09, 0xc3, 0x61, 0xd2, + 0xd6, 0x59, 0x2d, 0xe1, 0x77, 0x55, 0xd8, 0xa8 }, + { 0x9a, 0x76, 0x89, 0x32, 0x26, 0x69, 0x3b, 0x6e, + 0xa9, 0x7e, 0x6a, 0x73, 0x8f, 0x9d, 0x10, 0xfb, + 0x3d, 0x0b, 0x43, 0xae, 0x0e, 0x8b, 0x7d, 0x81, + 0x23, 0xea, 0x76, 0xce, 0x97, 0x98, 0x9c, 0x7e }, + { 0x8d, 0xae, 0xdb, 0x9a, 0x27, 0x15, 0x29, 0xdb, + 0xb7, 0xdc, 0x3b, 0x60, 0x7f, 0xe5, 0xeb, 0x2d, + 0x32, 0x11, 0x77, 0x07, 0x58, 0xdd, 0x3b, 0x0a, + 0x35, 0x93, 0xd2, 0xd7, 0x95, 0x4e, 0x2d, 0x5b }, + { 0x16, 0xdb, 0xc0, 0xaa, 0x5d, 0xd2, 0xc7, 0x74, + 0xf5, 0x05, 0x10, 0x0f, 0x73, 0x37, 0x86, 0xd8, + 0xa1, 0x75, 0xfc, 0xbb, 0xb5, 0x9c, 0x43, 0xe1, + 0xfb, 0xff, 0x3e, 0x1e, 0xaf, 0x31, 0xcb, 0x4a }, + { 0x86, 0x06, 0xcb, 0x89, 0x9c, 0x6a, 0xea, 0xf5, + 0x1b, 0x9d, 0xb0, 0xfe, 0x49, 0x24, 0xa9, 0xfd, + 0x5d, 0xab, 0xc1, 0x9f, 0x88, 0x26, 0xf2, 0xbc, + 0x1c, 0x1d, 0x7d, 0xa1, 0x4d, 0x2c, 0x2c, 0x99 }, + { 0x84, 0x79, 0x73, 0x1a, 0xed, 0xa5, 0x7b, 0xd3, + 0x7e, 0xad, 0xb5, 0x1a, 0x50, 0x7e, 0x30, 0x7f, + 0x3b, 0xd9, 0x5e, 0x69, 0xdb, 0xca, 0x94, 0xf3, + 0xbc, 0x21, 0x72, 0x60, 0x66, 0xad, 0x6d, 0xfd }, + { 0x58, 0x47, 0x3a, 0x9e, 0xa8, 0x2e, 0xfa, 0x3f, + 0x3b, 0x3d, 0x8f, 0xc8, 0x3e, 0xd8, 0x86, 0x31, + 0x27, 0xb3, 0x3a, 0xe8, 0xde, 0xae, 0x63, 0x07, + 0x20, 0x1e, 0xdb, 0x6d, 0xde, 0x61, 0xde, 0x29 }, + { 0x9a, 0x92, 0x55, 0xd5, 0x3a, 0xf1, 0x16, 0xde, + 0x8b, 0xa2, 0x7c, 0xe3, 0x5b, 0x4c, 0x7e, 0x15, + 0x64, 0x06, 0x57, 0xa0, 0xfc, 0xb8, 0x88, 0xc7, + 0x0d, 0x95, 0x43, 0x1d, 0xac, 0xd8, 0xf8, 0x30 }, + { 0x9e, 0xb0, 0x5f, 0xfb, 0xa3, 0x9f, 0xd8, 0x59, + 0x6a, 0x45, 0x49, 0x3e, 0x18, 0xd2, 0x51, 0x0b, + 0xf3, 0xef, 0x06, 0x5c, 0x51, 0xd6, 0xe1, 0x3a, + 0xbe, 0x66, 0xaa, 0x57, 0xe0, 0x5c, 0xfd, 0xb7 }, + { 0x81, 0xdc, 0xc3, 0xa5, 0x05, 0xea, 0xce, 0x3f, + 0x87, 0x9d, 0x8f, 0x70, 0x27, 0x76, 0x77, 0x0f, + 0x9d, 0xf5, 0x0e, 0x52, 0x1d, 0x14, 0x28, 0xa8, + 0x5d, 0xaf, 0x04, 0xf9, 0xad, 0x21, 0x50, 0xe0 }, + { 0xe3, 0xe3, 0xc4, 0xaa, 0x3a, 0xcb, 0xbc, 0x85, + 0x33, 0x2a, 0xf9, 0xd5, 0x64, 0xbc, 0x24, 0x16, + 0x5e, 0x16, 0x87, 0xf6, 0xb1, 0xad, 0xcb, 0xfa, + 0xe7, 0x7a, 0x8f, 0x03, 0xc7, 0x2a, 0xc2, 0x8c }, + { 0x67, 0x46, 0xc8, 0x0b, 0x4e, 0xb5, 0x6a, 0xea, + 0x45, 0xe6, 0x4e, 0x72, 0x89, 0xbb, 0xa3, 0xed, + 0xbf, 0x45, 0xec, 0xf8, 0x20, 0x64, 0x81, 0xff, + 0x63, 0x02, 0x12, 0x29, 0x84, 0xcd, 0x52, 0x6a }, + { 0x2b, 0x62, 0x8e, 0x52, 0x76, 0x4d, 0x7d, 0x62, + 0xc0, 0x86, 0x8b, 0x21, 0x23, 0x57, 0xcd, 0xd1, + 0x2d, 0x91, 0x49, 0x82, 0x2f, 0x4e, 0x98, 0x45, + 0xd9, 0x18, 0xa0, 0x8d, 0x1a, 0xe9, 0x90, 0xc0 }, + { 0xe4, 0xbf, 0xe8, 0x0d, 0x58, 0xc9, 0x19, 0x94, + 0x61, 0x39, 0x09, 0xdc, 0x4b, 0x1a, 0x12, 0x49, + 0x68, 0x96, 0xc0, 0x04, 0xaf, 0x7b, 0x57, 0x01, + 0x48, 0x3d, 0xe4, 0x5d, 0x28, 0x23, 0xd7, 0x8e }, + { 0xeb, 0xb4, 0xba, 0x15, 0x0c, 0xef, 0x27, 0x34, + 0x34, 0x5b, 0x5d, 0x64, 0x1b, 0xbe, 0xd0, 0x3a, + 0x21, 0xea, 0xfa, 0xe9, 0x33, 0xc9, 0x9e, 0x00, + 0x92, 0x12, 0xef, 0x04, 0x57, 0x4a, 0x85, 0x30 }, + { 0x39, 0x66, 0xec, 0x73, 0xb1, 0x54, 0xac, 0xc6, + 0x97, 0xac, 0x5c, 0xf5, 0xb2, 0x4b, 0x40, 0xbd, + 0xb0, 0xdb, 0x9e, 0x39, 0x88, 0x36, 0xd7, 0x6d, + 0x4b, 0x88, 0x0e, 0x3b, 0x2a, 0xf1, 0xaa, 0x27 }, + { 0xef, 0x7e, 0x48, 0x31, 0xb3, 0xa8, 0x46, 0x36, + 0x51, 0x8d, 0x6e, 0x4b, 0xfc, 0xe6, 0x4a, 0x43, + 0xdb, 0x2a, 0x5d, 0xda, 0x9c, 0xca, 0x2b, 0x44, + 0xf3, 0x90, 0x33, 0xbd, 0xc4, 0x0d, 0x62, 0x43 }, + { 0x7a, 0xbf, 0x6a, 0xcf, 0x5c, 0x8e, 0x54, 0x9d, + 0xdb, 0xb1, 0x5a, 0xe8, 0xd8, 0xb3, 0x88, 0xc1, + 0xc1, 0x97, 0xe6, 0x98, 0x73, 0x7c, 0x97, 0x85, + 0x50, 0x1e, 0xd1, 0xf9, 0x49, 0x30, 0xb7, 0xd9 }, + { 0x88, 0x01, 0x8d, 0xed, 0x66, 0x81, 0x3f, 0x0c, + 0xa9, 0x5d, 0xef, 0x47, 0x4c, 0x63, 0x06, 0x92, + 0x01, 0x99, 0x67, 0xb9, 0xe3, 0x68, 0x88, 0xda, + 0xdd, 0x94, 0x12, 0x47, 0x19, 0xb6, 0x82, 0xf6 }, + { 0x39, 0x30, 0x87, 0x6b, 0x9f, 0xc7, 0x52, 0x90, + 0x36, 0xb0, 0x08, 0xb1, 0xb8, 0xbb, 0x99, 0x75, + 0x22, 0xa4, 0x41, 0x63, 0x5a, 0x0c, 0x25, 0xec, + 0x02, 0xfb, 0x6d, 0x90, 0x26, 0xe5, 0x5a, 0x97 }, + { 0x0a, 0x40, 0x49, 0xd5, 0x7e, 0x83, 0x3b, 0x56, + 0x95, 0xfa, 0xc9, 0x3d, 0xd1, 0xfb, 0xef, 0x31, + 0x66, 0xb4, 0x4b, 0x12, 0xad, 0x11, 0x24, 0x86, + 0x62, 0x38, 0x3a, 0xe0, 0x51, 0xe1, 0x58, 0x27 }, + { 0x81, 0xdc, 0xc0, 0x67, 0x8b, 0xb6, 0xa7, 0x65, + 0xe4, 0x8c, 0x32, 0x09, 0x65, 0x4f, 0xe9, 0x00, + 0x89, 0xce, 0x44, 0xff, 0x56, 0x18, 0x47, 0x7e, + 0x39, 0xab, 0x28, 0x64, 0x76, 0xdf, 0x05, 0x2b }, + { 0xe6, 0x9b, 0x3a, 0x36, 0xa4, 0x46, 0x19, 0x12, + 0xdc, 0x08, 0x34, 0x6b, 0x11, 0xdd, 0xcb, 0x9d, + 0xb7, 0x96, 0xf8, 0x85, 0xfd, 0x01, 0x93, 0x6e, + 0x66, 0x2f, 0xe2, 0x92, 0x97, 0xb0, 0x99, 0xa4 }, + { 0x5a, 0xc6, 0x50, 0x3b, 0x0d, 0x8d, 0xa6, 0x91, + 0x76, 0x46, 0xe6, 0xdc, 0xc8, 0x7e, 0xdc, 0x58, + 0xe9, 0x42, 0x45, 0x32, 0x4c, 0xc2, 0x04, 0xf4, + 0xdd, 0x4a, 0xf0, 0x15, 0x63, 0xac, 0xd4, 0x27 }, + { 0xdf, 0x6d, 0xda, 0x21, 0x35, 0x9a, 0x30, 0xbc, + 0x27, 0x17, 0x80, 0x97, 0x1c, 0x1a, 0xbd, 0x56, + 0xa6, 0xef, 0x16, 0x7e, 0x48, 0x08, 0x87, 0x88, + 0x8e, 0x73, 0xa8, 0x6d, 0x3b, 0xf6, 0x05, 0xe9 }, + { 0xe8, 0xe6, 0xe4, 0x70, 0x71, 0xe7, 0xb7, 0xdf, + 0x25, 0x80, 0xf2, 0x25, 0xcf, 0xbb, 0xed, 0xf8, + 0x4c, 0xe6, 0x77, 0x46, 0x62, 0x66, 0x28, 0xd3, + 0x30, 0x97, 0xe4, 0xb7, 0xdc, 0x57, 0x11, 0x07 }, + { 0x53, 0xe4, 0x0e, 0xad, 0x62, 0x05, 0x1e, 0x19, + 0xcb, 0x9b, 0xa8, 0x13, 0x3e, 0x3e, 0x5c, 0x1c, + 0xe0, 0x0d, 0xdc, 0xad, 0x8a, 0xcf, 0x34, 0x2a, + 0x22, 0x43, 0x60, 0xb0, 0xac, 0xc1, 0x47, 0x77 }, + { 0x9c, 0xcd, 0x53, 0xfe, 0x80, 0xbe, 0x78, 0x6a, + 0xa9, 0x84, 0x63, 0x84, 0x62, 0xfb, 0x28, 0xaf, + 0xdf, 0x12, 0x2b, 0x34, 0xd7, 0x8f, 0x46, 0x87, + 0xec, 0x63, 0x2b, 0xb1, 0x9d, 0xe2, 0x37, 0x1a }, + { 0xcb, 0xd4, 0x80, 0x52, 0xc4, 0x8d, 0x78, 0x84, + 0x66, 0xa3, 0xe8, 0x11, 0x8c, 0x56, 0xc9, 0x7f, + 0xe1, 0x46, 0xe5, 0x54, 0x6f, 0xaa, 0xf9, 0x3e, + 0x2b, 0xc3, 0xc4, 0x7e, 0x45, 0x93, 0x97, 0x53 }, + { 0x25, 0x68, 0x83, 0xb1, 0x4e, 0x2a, 0xf4, 0x4d, + 0xad, 0xb2, 0x8e, 0x1b, 0x34, 0xb2, 0xac, 0x0f, + 0x0f, 0x4c, 0x91, 0xc3, 0x4e, 0xc9, 0x16, 0x9e, + 0x29, 0x03, 0x61, 0x58, 0xac, 0xaa, 0x95, 0xb9 }, + { 0x44, 0x71, 0xb9, 0x1a, 0xb4, 0x2d, 0xb7, 0xc4, + 0xdd, 0x84, 0x90, 0xab, 0x95, 0xa2, 0xee, 0x8d, + 0x04, 0xe3, 0xef, 0x5c, 0x3d, 0x6f, 0xc7, 0x1a, + 0xc7, 0x4b, 0x2b, 0x26, 0x91, 0x4d, 0x16, 0x41 }, + { 0xa5, 0xeb, 0x08, 0x03, 0x8f, 0x8f, 0x11, 0x55, + 0xed, 0x86, 0xe6, 0x31, 0x90, 0x6f, 0xc1, 0x30, + 0x95, 0xf6, 0xbb, 0xa4, 0x1d, 0xe5, 0xd4, 0xe7, + 0x95, 0x75, 0x8e, 0xc8, 0xc8, 0xdf, 0x8a, 0xf1 }, + { 0xdc, 0x1d, 0xb6, 0x4e, 0xd8, 0xb4, 0x8a, 0x91, + 0x0e, 0x06, 0x0a, 0x6b, 0x86, 0x63, 0x74, 0xc5, + 0x78, 0x78, 0x4e, 0x9a, 0xc4, 0x9a, 0xb2, 0x77, + 0x40, 0x92, 0xac, 0x71, 0x50, 0x19, 0x34, 0xac }, + { 0x28, 0x54, 0x13, 0xb2, 0xf2, 0xee, 0x87, 0x3d, + 0x34, 0x31, 0x9e, 0xe0, 0xbb, 0xfb, 0xb9, 0x0f, + 0x32, 0xda, 0x43, 0x4c, 0xc8, 0x7e, 0x3d, 0xb5, + 0xed, 0x12, 0x1b, 0xb3, 0x98, 0xed, 0x96, 0x4b }, + { 0x02, 0x16, 0xe0, 0xf8, 0x1f, 0x75, 0x0f, 0x26, + 0xf1, 0x99, 0x8b, 0xc3, 0x93, 0x4e, 0x3e, 0x12, + 0x4c, 0x99, 0x45, 0xe6, 0x85, 0xa6, 0x0b, 0x25, + 0xe8, 0xfb, 0xd9, 0x62, 0x5a, 0xb6, 0xb5, 0x99 }, + { 0x38, 0xc4, 0x10, 0xf5, 0xb9, 0xd4, 0x07, 0x20, + 0x50, 0x75, 0x5b, 0x31, 0xdc, 0xa8, 0x9f, 0xd5, + 0x39, 0x5c, 0x67, 0x85, 0xee, 0xb3, 0xd7, 0x90, + 0xf3, 0x20, 0xff, 0x94, 0x1c, 0x5a, 0x93, 0xbf }, + { 0xf1, 0x84, 0x17, 0xb3, 0x9d, 0x61, 0x7a, 0xb1, + 0xc1, 0x8f, 0xdf, 0x91, 0xeb, 0xd0, 0xfc, 0x6d, + 0x55, 0x16, 0xbb, 0x34, 0xcf, 0x39, 0x36, 0x40, + 0x37, 0xbc, 0xe8, 0x1f, 0xa0, 0x4c, 0xec, 0xb1 }, + { 0x1f, 0xa8, 0x77, 0xde, 0x67, 0x25, 0x9d, 0x19, + 0x86, 0x3a, 0x2a, 0x34, 0xbc, 0xc6, 0x96, 0x2a, + 0x2b, 0x25, 0xfc, 0xbf, 0x5c, 0xbe, 0xcd, 0x7e, + 0xde, 0x8f, 0x1f, 0xa3, 0x66, 0x88, 0xa7, 0x96 }, + { 0x5b, 0xd1, 0x69, 0xe6, 0x7c, 0x82, 0xc2, 0xc2, + 0xe9, 0x8e, 0xf7, 0x00, 0x8b, 0xdf, 0x26, 0x1f, + 0x2d, 0xdf, 0x30, 0xb1, 0xc0, 0x0f, 0x9e, 0x7f, + 0x27, 0x5b, 0xb3, 0xe8, 0xa2, 0x8d, 0xc9, 0xa2 }, + { 0xc8, 0x0a, 0xbe, 0xeb, 0xb6, 0x69, 0xad, 0x5d, + 0xee, 0xb5, 0xf5, 0xec, 0x8e, 0xa6, 0xb7, 0xa0, + 0x5d, 0xdf, 0x7d, 0x31, 0xec, 0x4c, 0x0a, 0x2e, + 0xe2, 0x0b, 0x0b, 0x98, 0xca, 0xec, 0x67, 0x46 }, + { 0xe7, 0x6d, 0x3f, 0xbd, 0xa5, 0xba, 0x37, 0x4e, + 0x6b, 0xf8, 0xe5, 0x0f, 0xad, 0xc3, 0xbb, 0xb9, + 0xba, 0x5c, 0x20, 0x6e, 0xbd, 0xec, 0x89, 0xa3, + 0xa5, 0x4c, 0xf3, 0xdd, 0x84, 0xa0, 0x70, 0x16 }, + { 0x7b, 0xba, 0x9d, 0xc5, 0xb5, 0xdb, 0x20, 0x71, + 0xd1, 0x77, 0x52, 0xb1, 0x04, 0x4c, 0x1e, 0xce, + 0xd9, 0x6a, 0xaf, 0x2d, 0xd4, 0x6e, 0x9b, 0x43, + 0x37, 0x50, 0xe8, 0xea, 0x0d, 0xcc, 0x18, 0x70 }, + { 0xf2, 0x9b, 0x1b, 0x1a, 0xb9, 0xba, 0xb1, 0x63, + 0x01, 0x8e, 0xe3, 0xda, 0x15, 0x23, 0x2c, 0xca, + 0x78, 0xec, 0x52, 0xdb, 0xc3, 0x4e, 0xda, 0x5b, + 0x82, 0x2e, 0xc1, 0xd8, 0x0f, 0xc2, 0x1b, 0xd0 }, + { 0x9e, 0xe3, 0xe3, 0xe7, 0xe9, 0x00, 0xf1, 0xe1, + 0x1d, 0x30, 0x8c, 0x4b, 0x2b, 0x30, 0x76, 0xd2, + 0x72, 0xcf, 0x70, 0x12, 0x4f, 0x9f, 0x51, 0xe1, + 0xda, 0x60, 0xf3, 0x78, 0x46, 0xcd, 0xd2, 0xf4 }, + { 0x70, 0xea, 0x3b, 0x01, 0x76, 0x92, 0x7d, 0x90, + 0x96, 0xa1, 0x85, 0x08, 0xcd, 0x12, 0x3a, 0x29, + 0x03, 0x25, 0x92, 0x0a, 0x9d, 0x00, 0xa8, 0x9b, + 0x5d, 0xe0, 0x42, 0x73, 0xfb, 0xc7, 0x6b, 0x85 }, + { 0x67, 0xde, 0x25, 0xc0, 0x2a, 0x4a, 0xab, 0xa2, + 0x3b, 0xdc, 0x97, 0x3c, 0x8b, 0xb0, 0xb5, 0x79, + 0x6d, 0x47, 0xcc, 0x06, 0x59, 0xd4, 0x3d, 0xff, + 0x1f, 0x97, 0xde, 0x17, 0x49, 0x63, 0xb6, 0x8e }, + { 0xb2, 0x16, 0x8e, 0x4e, 0x0f, 0x18, 0xb0, 0xe6, + 0x41, 0x00, 0xb5, 0x17, 0xed, 0x95, 0x25, 0x7d, + 0x73, 0xf0, 0x62, 0x0d, 0xf8, 0x85, 0xc1, 0x3d, + 0x2e, 0xcf, 0x79, 0x36, 0x7b, 0x38, 0x4c, 0xee }, + { 0x2e, 0x7d, 0xec, 0x24, 0x28, 0x85, 0x3b, 0x2c, + 0x71, 0x76, 0x07, 0x45, 0x54, 0x1f, 0x7a, 0xfe, + 0x98, 0x25, 0xb5, 0xdd, 0x77, 0xdf, 0x06, 0x51, + 0x1d, 0x84, 0x41, 0xa9, 0x4b, 0xac, 0xc9, 0x27 }, + { 0xca, 0x9f, 0xfa, 0xc4, 0xc4, 0x3f, 0x0b, 0x48, + 0x46, 0x1d, 0xc5, 0xc2, 0x63, 0xbe, 0xa3, 0xf6, + 0xf0, 0x06, 0x11, 0xce, 0xac, 0xab, 0xf6, 0xf8, + 0x95, 0xba, 0x2b, 0x01, 0x01, 0xdb, 0xb6, 0x8d }, + { 0x74, 0x10, 0xd4, 0x2d, 0x8f, 0xd1, 0xd5, 0xe9, + 0xd2, 0xf5, 0x81, 0x5c, 0xb9, 0x34, 0x17, 0x99, + 0x88, 0x28, 0xef, 0x3c, 0x42, 0x30, 0xbf, 0xbd, + 0x41, 0x2d, 0xf0, 0xa4, 0xa7, 0xa2, 0x50, 0x7a }, + { 0x50, 0x10, 0xf6, 0x84, 0x51, 0x6d, 0xcc, 0xd0, + 0xb6, 0xee, 0x08, 0x52, 0xc2, 0x51, 0x2b, 0x4d, + 0xc0, 0x06, 0x6c, 0xf0, 0xd5, 0x6f, 0x35, 0x30, + 0x29, 0x78, 0xdb, 0x8a, 0xe3, 0x2c, 0x6a, 0x81 }, + { 0xac, 0xaa, 0xb5, 0x85, 0xf7, 0xb7, 0x9b, 0x71, + 0x99, 0x35, 0xce, 0xb8, 0x95, 0x23, 0xdd, 0xc5, + 0x48, 0x27, 0xf7, 0x5c, 0x56, 0x88, 0x38, 0x56, + 0x15, 0x4a, 0x56, 0xcd, 0xcd, 0x5e, 0xe9, 0x88 }, + { 0x66, 0x6d, 0xe5, 0xd1, 0x44, 0x0f, 0xee, 0x73, + 0x31, 0xaa, 0xf0, 0x12, 0x3a, 0x62, 0xef, 0x2d, + 0x8b, 0xa5, 0x74, 0x53, 0xa0, 0x76, 0x96, 0x35, + 0xac, 0x6c, 0xd0, 0x1e, 0x63, 0x3f, 0x77, 0x12 }, + { 0xa6, 0xf9, 0x86, 0x58, 0xf6, 0xea, 0xba, 0xf9, + 0x02, 0xd8, 0xb3, 0x87, 0x1a, 0x4b, 0x10, 0x1d, + 0x16, 0x19, 0x6e, 0x8a, 0x4b, 0x24, 0x1e, 0x15, + 0x58, 0xfe, 0x29, 0x96, 0x6e, 0x10, 0x3e, 0x8d }, + { 0x89, 0x15, 0x46, 0xa8, 0xb2, 0x9f, 0x30, 0x47, + 0xdd, 0xcf, 0xe5, 0xb0, 0x0e, 0x45, 0xfd, 0x55, + 0x75, 0x63, 0x73, 0x10, 0x5e, 0xa8, 0x63, 0x7d, + 0xfc, 0xff, 0x54, 0x7b, 0x6e, 0xa9, 0x53, 0x5f }, + { 0x18, 0xdf, 0xbc, 0x1a, 0xc5, 0xd2, 0x5b, 0x07, + 0x61, 0x13, 0x7d, 0xbd, 0x22, 0xc1, 0x7c, 0x82, + 0x9d, 0x0f, 0x0e, 0xf1, 0xd8, 0x23, 0x44, 0xe9, + 0xc8, 0x9c, 0x28, 0x66, 0x94, 0xda, 0x24, 0xe8 }, + { 0xb5, 0x4b, 0x9b, 0x67, 0xf8, 0xfe, 0xd5, 0x4b, + 0xbf, 0x5a, 0x26, 0x66, 0xdb, 0xdf, 0x4b, 0x23, + 0xcf, 0xf1, 0xd1, 0xb6, 0xf4, 0xaf, 0xc9, 0x85, + 0xb2, 0xe6, 0xd3, 0x30, 0x5a, 0x9f, 0xf8, 0x0f }, + { 0x7d, 0xb4, 0x42, 0xe1, 0x32, 0xba, 0x59, 0xbc, + 0x12, 0x89, 0xaa, 0x98, 0xb0, 0xd3, 0xe8, 0x06, + 0x00, 0x4f, 0x8e, 0xc1, 0x28, 0x11, 0xaf, 0x1e, + 0x2e, 0x33, 0xc6, 0x9b, 0xfd, 0xe7, 0x29, 0xe1 }, + { 0x25, 0x0f, 0x37, 0xcd, 0xc1, 0x5e, 0x81, 0x7d, + 0x2f, 0x16, 0x0d, 0x99, 0x56, 0xc7, 0x1f, 0xe3, + 0xeb, 0x5d, 0xb7, 0x45, 0x56, 0xe4, 0xad, 0xf9, + 0xa4, 0xff, 0xaf, 0xba, 0x74, 0x01, 0x03, 0x96 }, + { 0x4a, 0xb8, 0xa3, 0xdd, 0x1d, 0xdf, 0x8a, 0xd4, + 0x3d, 0xab, 0x13, 0xa2, 0x7f, 0x66, 0xa6, 0x54, + 0x4f, 0x29, 0x05, 0x97, 0xfa, 0x96, 0x04, 0x0e, + 0x0e, 0x1d, 0xb9, 0x26, 0x3a, 0xa4, 0x79, 0xf8 }, + { 0xee, 0x61, 0x72, 0x7a, 0x07, 0x66, 0xdf, 0x93, + 0x9c, 0xcd, 0xc8, 0x60, 0x33, 0x40, 0x44, 0xc7, + 0x9a, 0x3c, 0x9b, 0x15, 0x62, 0x00, 0xbc, 0x3a, + 0xa3, 0x29, 0x73, 0x48, 0x3d, 0x83, 0x41, 0xae }, + { 0x3f, 0x68, 0xc7, 0xec, 0x63, 0xac, 0x11, 0xeb, + 0xb9, 0x8f, 0x94, 0xb3, 0x39, 0xb0, 0x5c, 0x10, + 0x49, 0x84, 0xfd, 0xa5, 0x01, 0x03, 0x06, 0x01, + 0x44, 0xe5, 0xa2, 0xbf, 0xcc, 0xc9, 0xda, 0x95 }, + { 0x05, 0x6f, 0x29, 0x81, 0x6b, 0x8a, 0xf8, 0xf5, + 0x66, 0x82, 0xbc, 0x4d, 0x7c, 0xf0, 0x94, 0x11, + 0x1d, 0xa7, 0x73, 0x3e, 0x72, 0x6c, 0xd1, 0x3d, + 0x6b, 0x3e, 0x8e, 0xa0, 0x3e, 0x92, 0xa0, 0xd5 }, + { 0xf5, 0xec, 0x43, 0xa2, 0x8a, 0xcb, 0xef, 0xf1, + 0xf3, 0x31, 0x8a, 0x5b, 0xca, 0xc7, 0xc6, 0x6d, + 0xdb, 0x52, 0x30, 0xb7, 0x9d, 0xb2, 0xd1, 0x05, + 0xbc, 0xbe, 0x15, 0xf3, 0xc1, 0x14, 0x8d, 0x69 }, + { 0x2a, 0x69, 0x60, 0xad, 0x1d, 0x8d, 0xd5, 0x47, + 0x55, 0x5c, 0xfb, 0xd5, 0xe4, 0x60, 0x0f, 0x1e, + 0xaa, 0x1c, 0x8e, 0xda, 0x34, 0xde, 0x03, 0x74, + 0xec, 0x4a, 0x26, 0xea, 0xaa, 0xa3, 0x3b, 0x4e }, + { 0xdc, 0xc1, 0xea, 0x7b, 0xaa, 0xb9, 0x33, 0x84, + 0xf7, 0x6b, 0x79, 0x68, 0x66, 0x19, 0x97, 0x54, + 0x74, 0x2f, 0x7b, 0x96, 0xd6, 0xb4, 0xc1, 0x20, + 0x16, 0x5c, 0x04, 0xa6, 0xc4, 0xf5, 0xce, 0x10 }, + { 0x13, 0xd5, 0xdf, 0x17, 0x92, 0x21, 0x37, 0x9c, + 0x6a, 0x78, 0xc0, 0x7c, 0x79, 0x3f, 0xf5, 0x34, + 0x87, 0xca, 0xe6, 0xbf, 0x9f, 0xe8, 0x82, 0x54, + 0x1a, 0xb0, 0xe7, 0x35, 0xe3, 0xea, 0xda, 0x3b }, + { 0x8c, 0x59, 0xe4, 0x40, 0x76, 0x41, 0xa0, 0x1e, + 0x8f, 0xf9, 0x1f, 0x99, 0x80, 0xdc, 0x23, 0x6f, + 0x4e, 0xcd, 0x6f, 0xcf, 0x52, 0x58, 0x9a, 0x09, + 0x9a, 0x96, 0x16, 0x33, 0x96, 0x77, 0x14, 0xe1 }, + { 0x83, 0x3b, 0x1a, 0xc6, 0xa2, 0x51, 0xfd, 0x08, + 0xfd, 0x6d, 0x90, 0x8f, 0xea, 0x2a, 0x4e, 0xe1, + 0xe0, 0x40, 0xbc, 0xa9, 0x3f, 0xc1, 0xa3, 0x8e, + 0xc3, 0x82, 0x0e, 0x0c, 0x10, 0xbd, 0x82, 0xea }, + { 0xa2, 0x44, 0xf9, 0x27, 0xf3, 0xb4, 0x0b, 0x8f, + 0x6c, 0x39, 0x15, 0x70, 0xc7, 0x65, 0x41, 0x8f, + 0x2f, 0x6e, 0x70, 0x8e, 0xac, 0x90, 0x06, 0xc5, + 0x1a, 0x7f, 0xef, 0xf4, 0xaf, 0x3b, 0x2b, 0x9e }, + { 0x3d, 0x99, 0xed, 0x95, 0x50, 0xcf, 0x11, 0x96, + 0xe6, 0xc4, 0xd2, 0x0c, 0x25, 0x96, 0x20, 0xf8, + 0x58, 0xc3, 0xd7, 0x03, 0x37, 0x4c, 0x12, 0x8c, + 0xe7, 0xb5, 0x90, 0x31, 0x0c, 0x83, 0x04, 0x6d }, + { 0x2b, 0x35, 0xc4, 0x7d, 0x7b, 0x87, 0x76, 0x1f, + 0x0a, 0xe4, 0x3a, 0xc5, 0x6a, 0xc2, 0x7b, 0x9f, + 0x25, 0x83, 0x03, 0x67, 0xb5, 0x95, 0xbe, 0x8c, + 0x24, 0x0e, 0x94, 0x60, 0x0c, 0x6e, 0x33, 0x12 }, + { 0x5d, 0x11, 0xed, 0x37, 0xd2, 0x4d, 0xc7, 0x67, + 0x30, 0x5c, 0xb7, 0xe1, 0x46, 0x7d, 0x87, 0xc0, + 0x65, 0xac, 0x4b, 0xc8, 0xa4, 0x26, 0xde, 0x38, + 0x99, 0x1f, 0xf5, 0x9a, 0xa8, 0x73, 0x5d, 0x02 }, + { 0xb8, 0x36, 0x47, 0x8e, 0x1c, 0xa0, 0x64, 0x0d, + 0xce, 0x6f, 0xd9, 0x10, 0xa5, 0x09, 0x62, 0x72, + 0xc8, 0x33, 0x09, 0x90, 0xcd, 0x97, 0x86, 0x4a, + 0xc2, 0xbf, 0x14, 0xef, 0x6b, 0x23, 0x91, 0x4a }, + { 0x91, 0x00, 0xf9, 0x46, 0xd6, 0xcc, 0xde, 0x3a, + 0x59, 0x7f, 0x90, 0xd3, 0x9f, 0xc1, 0x21, 0x5b, + 0xad, 0xdc, 0x74, 0x13, 0x64, 0x3d, 0x85, 0xc2, + 0x1c, 0x3e, 0xee, 0x5d, 0x2d, 0xd3, 0x28, 0x94 }, + { 0xda, 0x70, 0xee, 0xdd, 0x23, 0xe6, 0x63, 0xaa, + 0x1a, 0x74, 0xb9, 0x76, 0x69, 0x35, 0xb4, 0x79, + 0x22, 0x2a, 0x72, 0xaf, 0xba, 0x5c, 0x79, 0x51, + 0x58, 0xda, 0xd4, 0x1a, 0x3b, 0xd7, 0x7e, 0x40 }, + { 0xf0, 0x67, 0xed, 0x6a, 0x0d, 0xbd, 0x43, 0xaa, + 0x0a, 0x92, 0x54, 0xe6, 0x9f, 0xd6, 0x6b, 0xdd, + 0x8a, 0xcb, 0x87, 0xde, 0x93, 0x6c, 0x25, 0x8c, + 0xfb, 0x02, 0x28, 0x5f, 0x2c, 0x11, 0xfa, 0x79 }, + { 0x71, 0x5c, 0x99, 0xc7, 0xd5, 0x75, 0x80, 0xcf, + 0x97, 0x53, 0xb4, 0xc1, 0xd7, 0x95, 0xe4, 0x5a, + 0x83, 0xfb, 0xb2, 0x28, 0xc0, 0xd3, 0x6f, 0xbe, + 0x20, 0xfa, 0xf3, 0x9b, 0xdd, 0x6d, 0x4e, 0x85 }, + { 0xe4, 0x57, 0xd6, 0xad, 0x1e, 0x67, 0xcb, 0x9b, + 0xbd, 0x17, 0xcb, 0xd6, 0x98, 0xfa, 0x6d, 0x7d, + 0xae, 0x0c, 0x9b, 0x7a, 0xd6, 0xcb, 0xd6, 0x53, + 0x96, 0x34, 0xe3, 0x2a, 0x71, 0x9c, 0x84, 0x92 }, + { 0xec, 0xe3, 0xea, 0x81, 0x03, 0xe0, 0x24, 0x83, + 0xc6, 0x4a, 0x70, 0xa4, 0xbd, 0xce, 0xe8, 0xce, + 0xb6, 0x27, 0x8f, 0x25, 0x33, 0xf3, 0xf4, 0x8d, + 0xbe, 0xed, 0xfb, 0xa9, 0x45, 0x31, 0xd4, 0xae }, + { 0x38, 0x8a, 0xa5, 0xd3, 0x66, 0x7a, 0x97, 0xc6, + 0x8d, 0x3d, 0x56, 0xf8, 0xf3, 0xee, 0x8d, 0x3d, + 0x36, 0x09, 0x1f, 0x17, 0xfe, 0x5d, 0x1b, 0x0d, + 0x5d, 0x84, 0xc9, 0x3b, 0x2f, 0xfe, 0x40, 0xbd }, + { 0x8b, 0x6b, 0x31, 0xb9, 0xad, 0x7c, 0x3d, 0x5c, + 0xd8, 0x4b, 0xf9, 0x89, 0x47, 0xb9, 0xcd, 0xb5, + 0x9d, 0xf8, 0xa2, 0x5f, 0xf7, 0x38, 0x10, 0x10, + 0x13, 0xbe, 0x4f, 0xd6, 0x5e, 0x1d, 0xd1, 0xa3 }, + { 0x06, 0x62, 0x91, 0xf6, 0xbb, 0xd2, 0x5f, 0x3c, + 0x85, 0x3d, 0xb7, 0xd8, 0xb9, 0x5c, 0x9a, 0x1c, + 0xfb, 0x9b, 0xf1, 0xc1, 0xc9, 0x9f, 0xb9, 0x5a, + 0x9b, 0x78, 0x69, 0xd9, 0x0f, 0x1c, 0x29, 0x03 }, + { 0xa7, 0x07, 0xef, 0xbc, 0xcd, 0xce, 0xed, 0x42, + 0x96, 0x7a, 0x66, 0xf5, 0x53, 0x9b, 0x93, 0xed, + 0x75, 0x60, 0xd4, 0x67, 0x30, 0x40, 0x16, 0xc4, + 0x78, 0x0d, 0x77, 0x55, 0xa5, 0x65, 0xd4, 0xc4 }, + { 0x38, 0xc5, 0x3d, 0xfb, 0x70, 0xbe, 0x7e, 0x79, + 0x2b, 0x07, 0xa6, 0xa3, 0x5b, 0x8a, 0x6a, 0x0a, + 0xba, 0x02, 0xc5, 0xc5, 0xf3, 0x8b, 0xaf, 0x5c, + 0x82, 0x3f, 0xdf, 0xd9, 0xe4, 0x2d, 0x65, 0x7e }, + { 0xf2, 0x91, 0x13, 0x86, 0x50, 0x1d, 0x9a, 0xb9, + 0xd7, 0x20, 0xcf, 0x8a, 0xd1, 0x05, 0x03, 0xd5, + 0x63, 0x4b, 0xf4, 0xb7, 0xd1, 0x2b, 0x56, 0xdf, + 0xb7, 0x4f, 0xec, 0xc6, 0xe4, 0x09, 0x3f, 0x68 }, + { 0xc6, 0xf2, 0xbd, 0xd5, 0x2b, 0x81, 0xe6, 0xe4, + 0xf6, 0x59, 0x5a, 0xbd, 0x4d, 0x7f, 0xb3, 0x1f, + 0x65, 0x11, 0x69, 0xd0, 0x0f, 0xf3, 0x26, 0x92, + 0x6b, 0x34, 0x94, 0x7b, 0x28, 0xa8, 0x39, 0x59 }, + { 0x29, 0x3d, 0x94, 0xb1, 0x8c, 0x98, 0xbb, 0x32, + 0x23, 0x36, 0x6b, 0x8c, 0xe7, 0x4c, 0x28, 0xfb, + 0xdf, 0x28, 0xe1, 0xf8, 0x4a, 0x33, 0x50, 0xb0, + 0xeb, 0x2d, 0x18, 0x04, 0xa5, 0x77, 0x57, 0x9b }, + { 0x2c, 0x2f, 0xa5, 0xc0, 0xb5, 0x15, 0x33, 0x16, + 0x5b, 0xc3, 0x75, 0xc2, 0x2e, 0x27, 0x81, 0x76, + 0x82, 0x70, 0xa3, 0x83, 0x98, 0x5d, 0x13, 0xbd, + 0x6b, 0x67, 0xb6, 0xfd, 0x67, 0xf8, 0x89, 0xeb }, + { 0xca, 0xa0, 0x9b, 0x82, 0xb7, 0x25, 0x62, 0xe4, + 0x3f, 0x4b, 0x22, 0x75, 0xc0, 0x91, 0x91, 0x8e, + 0x62, 0x4d, 0x91, 0x16, 0x61, 0xcc, 0x81, 0x1b, + 0xb5, 0xfa, 0xec, 0x51, 0xf6, 0x08, 0x8e, 0xf7 }, + { 0x24, 0x76, 0x1e, 0x45, 0xe6, 0x74, 0x39, 0x53, + 0x79, 0xfb, 0x17, 0x72, 0x9c, 0x78, 0xcb, 0x93, + 0x9e, 0x6f, 0x74, 0xc5, 0xdf, 0xfb, 0x9c, 0x96, + 0x1f, 0x49, 0x59, 0x82, 0xc3, 0xed, 0x1f, 0xe3 }, + { 0x55, 0xb7, 0x0a, 0x82, 0x13, 0x1e, 0xc9, 0x48, + 0x88, 0xd7, 0xab, 0x54, 0xa7, 0xc5, 0x15, 0x25, + 0x5c, 0x39, 0x38, 0xbb, 0x10, 0xbc, 0x78, 0x4d, + 0xc9, 0xb6, 0x7f, 0x07, 0x6e, 0x34, 0x1a, 0x73 }, + { 0x6a, 0xb9, 0x05, 0x7b, 0x97, 0x7e, 0xbc, 0x3c, + 0xa4, 0xd4, 0xce, 0x74, 0x50, 0x6c, 0x25, 0xcc, + 0xcd, 0xc5, 0x66, 0x49, 0x7c, 0x45, 0x0b, 0x54, + 0x15, 0xa3, 0x94, 0x86, 0xf8, 0x65, 0x7a, 0x03 }, + { 0x24, 0x06, 0x6d, 0xee, 0xe0, 0xec, 0xee, 0x15, + 0xa4, 0x5f, 0x0a, 0x32, 0x6d, 0x0f, 0x8d, 0xbc, + 0x79, 0x76, 0x1e, 0xbb, 0x93, 0xcf, 0x8c, 0x03, + 0x77, 0xaf, 0x44, 0x09, 0x78, 0xfc, 0xf9, 0x94 }, + { 0x20, 0x00, 0x0d, 0x3f, 0x66, 0xba, 0x76, 0x86, + 0x0d, 0x5a, 0x95, 0x06, 0x88, 0xb9, 0xaa, 0x0d, + 0x76, 0xcf, 0xea, 0x59, 0xb0, 0x05, 0xd8, 0x59, + 0x91, 0x4b, 0x1a, 0x46, 0x65, 0x3a, 0x93, 0x9b }, + { 0xb9, 0x2d, 0xaa, 0x79, 0x60, 0x3e, 0x3b, 0xdb, + 0xc3, 0xbf, 0xe0, 0xf4, 0x19, 0xe4, 0x09, 0xb2, + 0xea, 0x10, 0xdc, 0x43, 0x5b, 0xee, 0xfe, 0x29, + 0x59, 0xda, 0x16, 0x89, 0x5d, 0x5d, 0xca, 0x1c }, + { 0xe9, 0x47, 0x94, 0x87, 0x05, 0xb2, 0x06, 0xd5, + 0x72, 0xb0, 0xe8, 0xf6, 0x2f, 0x66, 0xa6, 0x55, + 0x1c, 0xbd, 0x6b, 0xc3, 0x05, 0xd2, 0x6c, 0xe7, + 0x53, 0x9a, 0x12, 0xf9, 0xaa, 0xdf, 0x75, 0x71 }, + { 0x3d, 0x67, 0xc1, 0xb3, 0xf9, 0xb2, 0x39, 0x10, + 0xe3, 0xd3, 0x5e, 0x6b, 0x0f, 0x2c, 0xcf, 0x44, + 0xa0, 0xb5, 0x40, 0xa4, 0x5c, 0x18, 0xba, 0x3c, + 0x36, 0x26, 0x4d, 0xd4, 0x8e, 0x96, 0xaf, 0x6a }, + { 0xc7, 0x55, 0x8b, 0xab, 0xda, 0x04, 0xbc, 0xcb, + 0x76, 0x4d, 0x0b, 0xbf, 0x33, 0x58, 0x42, 0x51, + 0x41, 0x90, 0x2d, 0x22, 0x39, 0x1d, 0x9f, 0x8c, + 0x59, 0x15, 0x9f, 0xec, 0x9e, 0x49, 0xb1, 0x51 }, + { 0x0b, 0x73, 0x2b, 0xb0, 0x35, 0x67, 0x5a, 0x50, + 0xff, 0x58, 0xf2, 0xc2, 0x42, 0xe4, 0x71, 0x0a, + 0xec, 0xe6, 0x46, 0x70, 0x07, 0x9c, 0x13, 0x04, + 0x4c, 0x79, 0xc9, 0xb7, 0x49, 0x1f, 0x70, 0x00 }, + { 0xd1, 0x20, 0xb5, 0xef, 0x6d, 0x57, 0xeb, 0xf0, + 0x6e, 0xaf, 0x96, 0xbc, 0x93, 0x3c, 0x96, 0x7b, + 0x16, 0xcb, 0xe6, 0xe2, 0xbf, 0x00, 0x74, 0x1c, + 0x30, 0xaa, 0x1c, 0x54, 0xba, 0x64, 0x80, 0x1f }, + { 0x58, 0xd2, 0x12, 0xad, 0x6f, 0x58, 0xae, 0xf0, + 0xf8, 0x01, 0x16, 0xb4, 0x41, 0xe5, 0x7f, 0x61, + 0x95, 0xbf, 0xef, 0x26, 0xb6, 0x14, 0x63, 0xed, + 0xec, 0x11, 0x83, 0xcd, 0xb0, 0x4f, 0xe7, 0x6d }, + { 0xb8, 0x83, 0x6f, 0x51, 0xd1, 0xe2, 0x9b, 0xdf, + 0xdb, 0xa3, 0x25, 0x56, 0x53, 0x60, 0x26, 0x8b, + 0x8f, 0xad, 0x62, 0x74, 0x73, 0xed, 0xec, 0xef, + 0x7e, 0xae, 0xfe, 0xe8, 0x37, 0xc7, 0x40, 0x03 }, + { 0xc5, 0x47, 0xa3, 0xc1, 0x24, 0xae, 0x56, 0x85, + 0xff, 0xa7, 0xb8, 0xed, 0xaf, 0x96, 0xec, 0x86, + 0xf8, 0xb2, 0xd0, 0xd5, 0x0c, 0xee, 0x8b, 0xe3, + 0xb1, 0xf0, 0xc7, 0x67, 0x63, 0x06, 0x9d, 0x9c }, + { 0x5d, 0x16, 0x8b, 0x76, 0x9a, 0x2f, 0x67, 0x85, + 0x3d, 0x62, 0x95, 0xf7, 0x56, 0x8b, 0xe4, 0x0b, + 0xb7, 0xa1, 0x6b, 0x8d, 0x65, 0xba, 0x87, 0x63, + 0x5d, 0x19, 0x78, 0xd2, 0xab, 0x11, 0xba, 0x2a }, + { 0xa2, 0xf6, 0x75, 0xdc, 0x73, 0x02, 0x63, 0x8c, + 0xb6, 0x02, 0x01, 0x06, 0x4c, 0xa5, 0x50, 0x77, + 0x71, 0x4d, 0x71, 0xfe, 0x09, 0x6a, 0x31, 0x5f, + 0x2f, 0xe7, 0x40, 0x12, 0x77, 0xca, 0xa5, 0xaf }, + { 0xc8, 0xaa, 0xb5, 0xcd, 0x01, 0x60, 0xae, 0x78, + 0xcd, 0x2e, 0x8a, 0xc5, 0xfb, 0x0e, 0x09, 0x3c, + 0xdb, 0x5c, 0x4b, 0x60, 0x52, 0xa0, 0xa9, 0x7b, + 0xb0, 0x42, 0x16, 0x82, 0x6f, 0xa7, 0xa4, 0x37 }, + { 0xff, 0x68, 0xca, 0x40, 0x35, 0xbf, 0xeb, 0x43, + 0xfb, 0xf1, 0x45, 0xfd, 0xdd, 0x5e, 0x43, 0xf1, + 0xce, 0xa5, 0x4f, 0x11, 0xf7, 0xbe, 0xe1, 0x30, + 0x58, 0xf0, 0x27, 0x32, 0x9a, 0x4a, 0x5f, 0xa4 }, + { 0x1d, 0x4e, 0x54, 0x87, 0xae, 0x3c, 0x74, 0x0f, + 0x2b, 0xa6, 0xe5, 0x41, 0xac, 0x91, 0xbc, 0x2b, + 0xfc, 0xd2, 0x99, 0x9c, 0x51, 0x8d, 0x80, 0x7b, + 0x42, 0x67, 0x48, 0x80, 0x3a, 0x35, 0x0f, 0xd4 }, + { 0x6d, 0x24, 0x4e, 0x1a, 0x06, 0xce, 0x4e, 0xf5, + 0x78, 0xdd, 0x0f, 0x63, 0xaf, 0xf0, 0x93, 0x67, + 0x06, 0x73, 0x51, 0x19, 0xca, 0x9c, 0x8d, 0x22, + 0xd8, 0x6c, 0x80, 0x14, 0x14, 0xab, 0x97, 0x41 }, + { 0xde, 0xcf, 0x73, 0x29, 0xdb, 0xcc, 0x82, 0x7b, + 0x8f, 0xc5, 0x24, 0xc9, 0x43, 0x1e, 0x89, 0x98, + 0x02, 0x9e, 0xce, 0x12, 0xce, 0x93, 0xb7, 0xb2, + 0xf3, 0xe7, 0x69, 0xa9, 0x41, 0xfb, 0x8c, 0xea }, + { 0x2f, 0xaf, 0xcc, 0x0f, 0x2e, 0x63, 0xcb, 0xd0, + 0x77, 0x55, 0xbe, 0x7b, 0x75, 0xec, 0xea, 0x0a, + 0xdf, 0xf9, 0xaa, 0x5e, 0xde, 0x2a, 0x52, 0xfd, + 0xab, 0x4d, 0xfd, 0x03, 0x74, 0xcd, 0x48, 0x3f }, + { 0xaa, 0x85, 0x01, 0x0d, 0xd4, 0x6a, 0x54, 0x6b, + 0x53, 0x5e, 0xf4, 0xcf, 0x5f, 0x07, 0xd6, 0x51, + 0x61, 0xe8, 0x98, 0x28, 0xf3, 0xa7, 0x7d, 0xb7, + 0xb9, 0xb5, 0x6f, 0x0d, 0xf5, 0x9a, 0xae, 0x45 }, + { 0x07, 0xe8, 0xe1, 0xee, 0x73, 0x2c, 0xb0, 0xd3, + 0x56, 0xc9, 0xc0, 0xd1, 0x06, 0x9c, 0x89, 0xd1, + 0x7a, 0xdf, 0x6a, 0x9a, 0x33, 0x4f, 0x74, 0x5e, + 0xc7, 0x86, 0x73, 0x32, 0x54, 0x8c, 0xa8, 0xe9 }, + { 0x0e, 0x01, 0xe8, 0x1c, 0xad, 0xa8, 0x16, 0x2b, + 0xfd, 0x5f, 0x8a, 0x8c, 0x81, 0x8a, 0x6c, 0x69, + 0xfe, 0xdf, 0x02, 0xce, 0xb5, 0x20, 0x85, 0x23, + 0xcb, 0xe5, 0x31, 0x3b, 0x89, 0xca, 0x10, 0x53 }, + { 0x6b, 0xb6, 0xc6, 0x47, 0x26, 0x55, 0x08, 0x43, + 0x99, 0x85, 0x2e, 0x00, 0x24, 0x9f, 0x8c, 0xb2, + 0x47, 0x89, 0x6d, 0x39, 0x2b, 0x02, 0xd7, 0x3b, + 0x7f, 0x0d, 0xd8, 0x18, 0xe1, 0xe2, 0x9b, 0x07 }, + { 0x42, 0xd4, 0x63, 0x6e, 0x20, 0x60, 0xf0, 0x8f, + 0x41, 0xc8, 0x82, 0xe7, 0x6b, 0x39, 0x6b, 0x11, + 0x2e, 0xf6, 0x27, 0xcc, 0x24, 0xc4, 0x3d, 0xd5, + 0xf8, 0x3a, 0x1d, 0x1a, 0x7e, 0xad, 0x71, 0x1a }, + { 0x48, 0x58, 0xc9, 0xa1, 0x88, 0xb0, 0x23, 0x4f, + 0xb9, 0xa8, 0xd4, 0x7d, 0x0b, 0x41, 0x33, 0x65, + 0x0a, 0x03, 0x0b, 0xd0, 0x61, 0x1b, 0x87, 0xc3, + 0x89, 0x2e, 0x94, 0x95, 0x1f, 0x8d, 0xf8, 0x52 }, + { 0x3f, 0xab, 0x3e, 0x36, 0x98, 0x8d, 0x44, 0x5a, + 0x51, 0xc8, 0x78, 0x3e, 0x53, 0x1b, 0xe3, 0xa0, + 0x2b, 0xe4, 0x0c, 0xd0, 0x47, 0x96, 0xcf, 0xb6, + 0x1d, 0x40, 0x34, 0x74, 0x42, 0xd3, 0xf7, 0x94 }, + { 0xeb, 0xab, 0xc4, 0x96, 0x36, 0xbd, 0x43, 0x3d, + 0x2e, 0xc8, 0xf0, 0xe5, 0x18, 0x73, 0x2e, 0xf8, + 0xfa, 0x21, 0xd4, 0xd0, 0x71, 0xcc, 0x3b, 0xc4, + 0x6c, 0xd7, 0x9f, 0xa3, 0x8a, 0x28, 0xb8, 0x10 }, + { 0xa1, 0xd0, 0x34, 0x35, 0x23, 0xb8, 0x93, 0xfc, + 0xa8, 0x4f, 0x47, 0xfe, 0xb4, 0xa6, 0x4d, 0x35, + 0x0a, 0x17, 0xd8, 0xee, 0xf5, 0x49, 0x7e, 0xce, + 0x69, 0x7d, 0x02, 0xd7, 0x91, 0x78, 0xb5, 0x91 }, + { 0x26, 0x2e, 0xbf, 0xd9, 0x13, 0x0b, 0x7d, 0x28, + 0x76, 0x0d, 0x08, 0xef, 0x8b, 0xfd, 0x3b, 0x86, + 0xcd, 0xd3, 0xb2, 0x11, 0x3d, 0x2c, 0xae, 0xf7, + 0xea, 0x95, 0x1a, 0x30, 0x3d, 0xfa, 0x38, 0x46 }, + { 0xf7, 0x61, 0x58, 0xed, 0xd5, 0x0a, 0x15, 0x4f, + 0xa7, 0x82, 0x03, 0xed, 0x23, 0x62, 0x93, 0x2f, + 0xcb, 0x82, 0x53, 0xaa, 0xe3, 0x78, 0x90, 0x3e, + 0xde, 0xd1, 0xe0, 0x3f, 0x70, 0x21, 0xa2, 0x57 }, + { 0x26, 0x17, 0x8e, 0x95, 0x0a, 0xc7, 0x22, 0xf6, + 0x7a, 0xe5, 0x6e, 0x57, 0x1b, 0x28, 0x4c, 0x02, + 0x07, 0x68, 0x4a, 0x63, 0x34, 0xa1, 0x77, 0x48, + 0xa9, 0x4d, 0x26, 0x0b, 0xc5, 0xf5, 0x52, 0x74 }, + { 0xc3, 0x78, 0xd1, 0xe4, 0x93, 0xb4, 0x0e, 0xf1, + 0x1f, 0xe6, 0xa1, 0x5d, 0x9c, 0x27, 0x37, 0xa3, + 0x78, 0x09, 0x63, 0x4c, 0x5a, 0xba, 0xd5, 0xb3, + 0x3d, 0x7e, 0x39, 0x3b, 0x4a, 0xe0, 0x5d, 0x03 }, + { 0x98, 0x4b, 0xd8, 0x37, 0x91, 0x01, 0xbe, 0x8f, + 0xd8, 0x06, 0x12, 0xd8, 0xea, 0x29, 0x59, 0xa7, + 0x86, 0x5e, 0xc9, 0x71, 0x85, 0x23, 0x55, 0x01, + 0x07, 0xae, 0x39, 0x38, 0xdf, 0x32, 0x01, 0x1b }, + { 0xc6, 0xf2, 0x5a, 0x81, 0x2a, 0x14, 0x48, 0x58, + 0xac, 0x5c, 0xed, 0x37, 0xa9, 0x3a, 0x9f, 0x47, + 0x59, 0xba, 0x0b, 0x1c, 0x0f, 0xdc, 0x43, 0x1d, + 0xce, 0x35, 0xf9, 0xec, 0x1f, 0x1f, 0x4a, 0x99 }, + { 0x92, 0x4c, 0x75, 0xc9, 0x44, 0x24, 0xff, 0x75, + 0xe7, 0x4b, 0x8b, 0x4e, 0x94, 0x35, 0x89, 0x58, + 0xb0, 0x27, 0xb1, 0x71, 0xdf, 0x5e, 0x57, 0x89, + 0x9a, 0xd0, 0xd4, 0xda, 0xc3, 0x73, 0x53, 0xb6 }, + { 0x0a, 0xf3, 0x58, 0x92, 0xa6, 0x3f, 0x45, 0x93, + 0x1f, 0x68, 0x46, 0xed, 0x19, 0x03, 0x61, 0xcd, + 0x07, 0x30, 0x89, 0xe0, 0x77, 0x16, 0x57, 0x14, + 0xb5, 0x0b, 0x81, 0xa2, 0xe3, 0xdd, 0x9b, 0xa1 }, + { 0xcc, 0x80, 0xce, 0xfb, 0x26, 0xc3, 0xb2, 0xb0, + 0xda, 0xef, 0x23, 0x3e, 0x60, 0x6d, 0x5f, 0xfc, + 0x80, 0xfa, 0x17, 0x42, 0x7d, 0x18, 0xe3, 0x04, + 0x89, 0x67, 0x3e, 0x06, 0xef, 0x4b, 0x87, 0xf7 }, + { 0xc2, 0xf8, 0xc8, 0x11, 0x74, 0x47, 0xf3, 0x97, + 0x8b, 0x08, 0x18, 0xdc, 0xf6, 0xf7, 0x01, 0x16, + 0xac, 0x56, 0xfd, 0x18, 0x4d, 0xd1, 0x27, 0x84, + 0x94, 0xe1, 0x03, 0xfc, 0x6d, 0x74, 0xa8, 0x87 }, + { 0xbd, 0xec, 0xf6, 0xbf, 0xc1, 0xba, 0x0d, 0xf6, + 0xe8, 0x62, 0xc8, 0x31, 0x99, 0x22, 0x07, 0x79, + 0x6a, 0xcc, 0x79, 0x79, 0x68, 0x35, 0x88, 0x28, + 0xc0, 0x6e, 0x7a, 0x51, 0xe0, 0x90, 0x09, 0x8f }, + { 0x24, 0xd1, 0xa2, 0x6e, 0x3d, 0xab, 0x02, 0xfe, + 0x45, 0x72, 0xd2, 0xaa, 0x7d, 0xbd, 0x3e, 0xc3, + 0x0f, 0x06, 0x93, 0xdb, 0x26, 0xf2, 0x73, 0xd0, + 0xab, 0x2c, 0xb0, 0xc1, 0x3b, 0x5e, 0x64, 0x51 }, + { 0xec, 0x56, 0xf5, 0x8b, 0x09, 0x29, 0x9a, 0x30, + 0x0b, 0x14, 0x05, 0x65, 0xd7, 0xd3, 0xe6, 0x87, + 0x82, 0xb6, 0xe2, 0xfb, 0xeb, 0x4b, 0x7e, 0xa9, + 0x7a, 0xc0, 0x57, 0x98, 0x90, 0x61, 0xdd, 0x3f }, + { 0x11, 0xa4, 0x37, 0xc1, 0xab, 0xa3, 0xc1, 0x19, + 0xdd, 0xfa, 0xb3, 0x1b, 0x3e, 0x8c, 0x84, 0x1d, + 0xee, 0xeb, 0x91, 0x3e, 0xf5, 0x7f, 0x7e, 0x48, + 0xf2, 0xc9, 0xcf, 0x5a, 0x28, 0xfa, 0x42, 0xbc }, + { 0x53, 0xc7, 0xe6, 0x11, 0x4b, 0x85, 0x0a, 0x2c, + 0xb4, 0x96, 0xc9, 0xb3, 0xc6, 0x9a, 0x62, 0x3e, + 0xae, 0xa2, 0xcb, 0x1d, 0x33, 0xdd, 0x81, 0x7e, + 0x47, 0x65, 0xed, 0xaa, 0x68, 0x23, 0xc2, 0x28 }, + { 0x15, 0x4c, 0x3e, 0x96, 0xfe, 0xe5, 0xdb, 0x14, + 0xf8, 0x77, 0x3e, 0x18, 0xaf, 0x14, 0x85, 0x79, + 0x13, 0x50, 0x9d, 0xa9, 0x99, 0xb4, 0x6c, 0xdd, + 0x3d, 0x4c, 0x16, 0x97, 0x60, 0xc8, 0x3a, 0xd2 }, + { 0x40, 0xb9, 0x91, 0x6f, 0x09, 0x3e, 0x02, 0x7a, + 0x87, 0x86, 0x64, 0x18, 0x18, 0x92, 0x06, 0x20, + 0x47, 0x2f, 0xbc, 0xf6, 0x8f, 0x70, 0x1d, 0x1b, + 0x68, 0x06, 0x32, 0xe6, 0x99, 0x6b, 0xde, 0xd3 }, + { 0x24, 0xc4, 0xcb, 0xba, 0x07, 0x11, 0x98, 0x31, + 0xa7, 0x26, 0xb0, 0x53, 0x05, 0xd9, 0x6d, 0xa0, + 0x2f, 0xf8, 0xb1, 0x48, 0xf0, 0xda, 0x44, 0x0f, + 0xe2, 0x33, 0xbc, 0xaa, 0x32, 0xc7, 0x2f, 0x6f }, + { 0x5d, 0x20, 0x15, 0x10, 0x25, 0x00, 0x20, 0xb7, + 0x83, 0x68, 0x96, 0x88, 0xab, 0xbf, 0x8e, 0xcf, + 0x25, 0x94, 0xa9, 0x6a, 0x08, 0xf2, 0xbf, 0xec, + 0x6c, 0xe0, 0x57, 0x44, 0x65, 0xdd, 0xed, 0x71 }, + { 0x04, 0x3b, 0x97, 0xe3, 0x36, 0xee, 0x6f, 0xdb, + 0xbe, 0x2b, 0x50, 0xf2, 0x2a, 0xf8, 0x32, 0x75, + 0xa4, 0x08, 0x48, 0x05, 0xd2, 0xd5, 0x64, 0x59, + 0x62, 0x45, 0x4b, 0x6c, 0x9b, 0x80, 0x53, 0xa0 }, + { 0x56, 0x48, 0x35, 0xcb, 0xae, 0xa7, 0x74, 0x94, + 0x85, 0x68, 0xbe, 0x36, 0xcf, 0x52, 0xfc, 0xdd, + 0x83, 0x93, 0x4e, 0xb0, 0xa2, 0x75, 0x12, 0xdb, + 0xe3, 0xe2, 0xdb, 0x47, 0xb9, 0xe6, 0x63, 0x5a }, + { 0xf2, 0x1c, 0x33, 0xf4, 0x7b, 0xde, 0x40, 0xa2, + 0xa1, 0x01, 0xc9, 0xcd, 0xe8, 0x02, 0x7a, 0xaf, + 0x61, 0xa3, 0x13, 0x7d, 0xe2, 0x42, 0x2b, 0x30, + 0x03, 0x5a, 0x04, 0xc2, 0x70, 0x89, 0x41, 0x83 }, + { 0x9d, 0xb0, 0xef, 0x74, 0xe6, 0x6c, 0xbb, 0x84, + 0x2e, 0xb0, 0xe0, 0x73, 0x43, 0xa0, 0x3c, 0x5c, + 0x56, 0x7e, 0x37, 0x2b, 0x3f, 0x23, 0xb9, 0x43, + 0xc7, 0x88, 0xa4, 0xf2, 0x50, 0xf6, 0x78, 0x91 }, + { 0xab, 0x8d, 0x08, 0x65, 0x5f, 0xf1, 0xd3, 0xfe, + 0x87, 0x58, 0xd5, 0x62, 0x23, 0x5f, 0xd2, 0x3e, + 0x7c, 0xf9, 0xdc, 0xaa, 0xd6, 0x58, 0x87, 0x2a, + 0x49, 0xe5, 0xd3, 0x18, 0x3b, 0x6c, 0xce, 0xbd }, + { 0x6f, 0x27, 0xf7, 0x7e, 0x7b, 0xcf, 0x46, 0xa1, + 0xe9, 0x63, 0xad, 0xe0, 0x30, 0x97, 0x33, 0x54, + 0x30, 0x31, 0xdc, 0xcd, 0xd4, 0x7c, 0xaa, 0xc1, + 0x74, 0xd7, 0xd2, 0x7c, 0xe8, 0x07, 0x7e, 0x8b }, + { 0xe3, 0xcd, 0x54, 0xda, 0x7e, 0x44, 0x4c, 0xaa, + 0x62, 0x07, 0x56, 0x95, 0x25, 0xa6, 0x70, 0xeb, + 0xae, 0x12, 0x78, 0xde, 0x4e, 0x3f, 0xe2, 0x68, + 0x4b, 0x3e, 0x33, 0xf5, 0xef, 0x90, 0xcc, 0x1b }, + { 0xb2, 0xc3, 0xe3, 0x3a, 0x51, 0xd2, 0x2c, 0x4c, + 0x08, 0xfc, 0x09, 0x89, 0xc8, 0x73, 0xc9, 0xcc, + 0x41, 0x50, 0x57, 0x9b, 0x1e, 0x61, 0x63, 0xfa, + 0x69, 0x4a, 0xd5, 0x1d, 0x53, 0xd7, 0x12, 0xdc }, + { 0xbe, 0x7f, 0xda, 0x98, 0x3e, 0x13, 0x18, 0x9b, + 0x4c, 0x77, 0xe0, 0xa8, 0x09, 0x20, 0xb6, 0xe0, + 0xe0, 0xea, 0x80, 0xc3, 0xb8, 0x4d, 0xbe, 0x7e, + 0x71, 0x17, 0xd2, 0x53, 0xf4, 0x81, 0x12, 0xf4 }, + { 0xb6, 0x00, 0x8c, 0x28, 0xfa, 0xe0, 0x8a, 0xa4, + 0x27, 0xe5, 0xbd, 0x3a, 0xad, 0x36, 0xf1, 0x00, + 0x21, 0xf1, 0x6c, 0x77, 0xcf, 0xea, 0xbe, 0xd0, + 0x7f, 0x97, 0xcc, 0x7d, 0xc1, 0xf1, 0x28, 0x4a }, + { 0x6e, 0x4e, 0x67, 0x60, 0xc5, 0x38, 0xf2, 0xe9, + 0x7b, 0x3a, 0xdb, 0xfb, 0xbc, 0xde, 0x57, 0xf8, + 0x96, 0x6b, 0x7e, 0xa8, 0xfc, 0xb5, 0xbf, 0x7e, + 0xfe, 0xc9, 0x13, 0xfd, 0x2a, 0x2b, 0x0c, 0x55 }, + { 0x4a, 0xe5, 0x1f, 0xd1, 0x83, 0x4a, 0xa5, 0xbd, + 0x9a, 0x6f, 0x7e, 0xc3, 0x9f, 0xc6, 0x63, 0x33, + 0x8d, 0xc5, 0xd2, 0xe2, 0x07, 0x61, 0x56, 0x6d, + 0x90, 0xcc, 0x68, 0xb1, 0xcb, 0x87, 0x5e, 0xd8 }, + { 0xb6, 0x73, 0xaa, 0xd7, 0x5a, 0xb1, 0xfd, 0xb5, + 0x40, 0x1a, 0xbf, 0xa1, 0xbf, 0x89, 0xf3, 0xad, + 0xd2, 0xeb, 0xc4, 0x68, 0xdf, 0x36, 0x24, 0xa4, + 0x78, 0xf4, 0xfe, 0x85, 0x9d, 0x8d, 0x55, 0xe2 }, + { 0x13, 0xc9, 0x47, 0x1a, 0x98, 0x55, 0x91, 0x35, + 0x39, 0x83, 0x66, 0x60, 0x39, 0x8d, 0xa0, 0xf3, + 0xf9, 0x9a, 0xda, 0x08, 0x47, 0x9c, 0x69, 0xd1, + 0xb7, 0xfc, 0xaa, 0x34, 0x61, 0xdd, 0x7e, 0x59 }, + { 0x2c, 0x11, 0xf4, 0xa7, 0xf9, 0x9a, 0x1d, 0x23, + 0xa5, 0x8b, 0xb6, 0x36, 0x35, 0x0f, 0xe8, 0x49, + 0xf2, 0x9c, 0xba, 0xc1, 0xb2, 0xa1, 0x11, 0x2d, + 0x9f, 0x1e, 0xd5, 0xbc, 0x5b, 0x31, 0x3c, 0xcd }, + { 0xc7, 0xd3, 0xc0, 0x70, 0x6b, 0x11, 0xae, 0x74, + 0x1c, 0x05, 0xa1, 0xef, 0x15, 0x0d, 0xd6, 0x5b, + 0x54, 0x94, 0xd6, 0xd5, 0x4c, 0x9a, 0x86, 0xe2, + 0x61, 0x78, 0x54, 0xe6, 0xae, 0xee, 0xbb, 0xd9 }, + { 0x19, 0x4e, 0x10, 0xc9, 0x38, 0x93, 0xaf, 0xa0, + 0x64, 0xc3, 0xac, 0x04, 0xc0, 0xdd, 0x80, 0x8d, + 0x79, 0x1c, 0x3d, 0x4b, 0x75, 0x56, 0xe8, 0x9d, + 0x8d, 0x9c, 0xb2, 0x25, 0xc4, 0xb3, 0x33, 0x39 }, + { 0x6f, 0xc4, 0x98, 0x8b, 0x8f, 0x78, 0x54, 0x6b, + 0x16, 0x88, 0x99, 0x18, 0x45, 0x90, 0x8f, 0x13, + 0x4b, 0x6a, 0x48, 0x2e, 0x69, 0x94, 0xb3, 0xd4, + 0x83, 0x17, 0xbf, 0x08, 0xdb, 0x29, 0x21, 0x85 }, + { 0x56, 0x65, 0xbe, 0xb8, 0xb0, 0x95, 0x55, 0x25, + 0x81, 0x3b, 0x59, 0x81, 0xcd, 0x14, 0x2e, 0xd4, + 0xd0, 0x3f, 0xba, 0x38, 0xa6, 0xf3, 0xe5, 0xad, + 0x26, 0x8e, 0x0c, 0xc2, 0x70, 0xd1, 0xcd, 0x11 }, + { 0xb8, 0x83, 0xd6, 0x8f, 0x5f, 0xe5, 0x19, 0x36, + 0x43, 0x1b, 0xa4, 0x25, 0x67, 0x38, 0x05, 0x3b, + 0x1d, 0x04, 0x26, 0xd4, 0xcb, 0x64, 0xb1, 0x6e, + 0x83, 0xba, 0xdc, 0x5e, 0x9f, 0xbe, 0x3b, 0x81 }, + { 0x53, 0xe7, 0xb2, 0x7e, 0xa5, 0x9c, 0x2f, 0x6d, + 0xbb, 0x50, 0x76, 0x9e, 0x43, 0x55, 0x4d, 0xf3, + 0x5a, 0xf8, 0x9f, 0x48, 0x22, 0xd0, 0x46, 0x6b, + 0x00, 0x7d, 0xd6, 0xf6, 0xde, 0xaf, 0xff, 0x02 }, + { 0x1f, 0x1a, 0x02, 0x29, 0xd4, 0x64, 0x0f, 0x01, + 0x90, 0x15, 0x88, 0xd9, 0xde, 0xc2, 0x2d, 0x13, + 0xfc, 0x3e, 0xb3, 0x4a, 0x61, 0xb3, 0x29, 0x38, + 0xef, 0xbf, 0x53, 0x34, 0xb2, 0x80, 0x0a, 0xfa }, + { 0xc2, 0xb4, 0x05, 0xaf, 0xa0, 0xfa, 0x66, 0x68, + 0x85, 0x2a, 0xee, 0x4d, 0x88, 0x04, 0x08, 0x53, + 0xfa, 0xb8, 0x00, 0xe7, 0x2b, 0x57, 0x58, 0x14, + 0x18, 0xe5, 0x50, 0x6f, 0x21, 0x4c, 0x7d, 0x1f }, + { 0xc0, 0x8a, 0xa1, 0xc2, 0x86, 0xd7, 0x09, 0xfd, + 0xc7, 0x47, 0x37, 0x44, 0x97, 0x71, 0x88, 0xc8, + 0x95, 0xba, 0x01, 0x10, 0x14, 0x24, 0x7e, 0x4e, + 0xfa, 0x8d, 0x07, 0xe7, 0x8f, 0xec, 0x69, 0x5c }, + { 0xf0, 0x3f, 0x57, 0x89, 0xd3, 0x33, 0x6b, 0x80, + 0xd0, 0x02, 0xd5, 0x9f, 0xdf, 0x91, 0x8b, 0xdb, + 0x77, 0x5b, 0x00, 0x95, 0x6e, 0xd5, 0x52, 0x8e, + 0x86, 0xaa, 0x99, 0x4a, 0xcb, 0x38, 0xfe, 0x2d } +}; + +static const u8 blake2s_keyed_testvecs[][BLAKE2S_HASH_SIZE] __initconst = { + { 0x48, 0xa8, 0x99, 0x7d, 0xa4, 0x07, 0x87, 0x6b, + 0x3d, 0x79, 0xc0, 0xd9, 0x23, 0x25, 0xad, 0x3b, + 0x89, 0xcb, 0xb7, 0x54, 0xd8, 0x6a, 0xb7, 0x1a, + 0xee, 0x04, 0x7a, 0xd3, 0x45, 0xfd, 0x2c, 0x49 }, + { 0x40, 0xd1, 0x5f, 0xee, 0x7c, 0x32, 0x88, 0x30, + 0x16, 0x6a, 0xc3, 0xf9, 0x18, 0x65, 0x0f, 0x80, + 0x7e, 0x7e, 0x01, 0xe1, 0x77, 0x25, 0x8c, 0xdc, + 0x0a, 0x39, 0xb1, 0x1f, 0x59, 0x80, 0x66, 0xf1 }, + { 0x6b, 0xb7, 0x13, 0x00, 0x64, 0x4c, 0xd3, 0x99, + 0x1b, 0x26, 0xcc, 0xd4, 0xd2, 0x74, 0xac, 0xd1, + 0xad, 0xea, 0xb8, 0xb1, 0xd7, 0x91, 0x45, 0x46, + 0xc1, 0x19, 0x8b, 0xbe, 0x9f, 0xc9, 0xd8, 0x03 }, + { 0x1d, 0x22, 0x0d, 0xbe, 0x2e, 0xe1, 0x34, 0x66, + 0x1f, 0xdf, 0x6d, 0x9e, 0x74, 0xb4, 0x17, 0x04, + 0x71, 0x05, 0x56, 0xf2, 0xf6, 0xe5, 0xa0, 0x91, + 0xb2, 0x27, 0x69, 0x74, 0x45, 0xdb, 0xea, 0x6b }, + { 0xf6, 0xc3, 0xfb, 0xad, 0xb4, 0xcc, 0x68, 0x7a, + 0x00, 0x64, 0xa5, 0xbe, 0x6e, 0x79, 0x1b, 0xec, + 0x63, 0xb8, 0x68, 0xad, 0x62, 0xfb, 0xa6, 0x1b, + 0x37, 0x57, 0xef, 0x9c, 0xa5, 0x2e, 0x05, 0xb2 }, + { 0x49, 0xc1, 0xf2, 0x11, 0x88, 0xdf, 0xd7, 0x69, + 0xae, 0xa0, 0xe9, 0x11, 0xdd, 0x6b, 0x41, 0xf1, + 0x4d, 0xab, 0x10, 0x9d, 0x2b, 0x85, 0x97, 0x7a, + 0xa3, 0x08, 0x8b, 0x5c, 0x70, 0x7e, 0x85, 0x98 }, + { 0xfd, 0xd8, 0x99, 0x3d, 0xcd, 0x43, 0xf6, 0x96, + 0xd4, 0x4f, 0x3c, 0xea, 0x0f, 0xf3, 0x53, 0x45, + 0x23, 0x4e, 0xc8, 0xee, 0x08, 0x3e, 0xb3, 0xca, + 0xda, 0x01, 0x7c, 0x7f, 0x78, 0xc1, 0x71, 0x43 }, + { 0xe6, 0xc8, 0x12, 0x56, 0x37, 0x43, 0x8d, 0x09, + 0x05, 0xb7, 0x49, 0xf4, 0x65, 0x60, 0xac, 0x89, + 0xfd, 0x47, 0x1c, 0xf8, 0x69, 0x2e, 0x28, 0xfa, + 0xb9, 0x82, 0xf7, 0x3f, 0x01, 0x9b, 0x83, 0xa9 }, + { 0x19, 0xfc, 0x8c, 0xa6, 0x97, 0x9d, 0x60, 0xe6, + 0xed, 0xd3, 0xb4, 0x54, 0x1e, 0x2f, 0x96, 0x7c, + 0xed, 0x74, 0x0d, 0xf6, 0xec, 0x1e, 0xae, 0xbb, + 0xfe, 0x81, 0x38, 0x32, 0xe9, 0x6b, 0x29, 0x74 }, + { 0xa6, 0xad, 0x77, 0x7c, 0xe8, 0x81, 0xb5, 0x2b, + 0xb5, 0xa4, 0x42, 0x1a, 0xb6, 0xcd, 0xd2, 0xdf, + 0xba, 0x13, 0xe9, 0x63, 0x65, 0x2d, 0x4d, 0x6d, + 0x12, 0x2a, 0xee, 0x46, 0x54, 0x8c, 0x14, 0xa7 }, + { 0xf5, 0xc4, 0xb2, 0xba, 0x1a, 0x00, 0x78, 0x1b, + 0x13, 0xab, 0xa0, 0x42, 0x52, 0x42, 0xc6, 0x9c, + 0xb1, 0x55, 0x2f, 0x3f, 0x71, 0xa9, 0xa3, 0xbb, + 0x22, 0xb4, 0xa6, 0xb4, 0x27, 0x7b, 0x46, 0xdd }, + { 0xe3, 0x3c, 0x4c, 0x9b, 0xd0, 0xcc, 0x7e, 0x45, + 0xc8, 0x0e, 0x65, 0xc7, 0x7f, 0xa5, 0x99, 0x7f, + 0xec, 0x70, 0x02, 0x73, 0x85, 0x41, 0x50, 0x9e, + 0x68, 0xa9, 0x42, 0x38, 0x91, 0xe8, 0x22, 0xa3 }, + { 0xfb, 0xa1, 0x61, 0x69, 0xb2, 0xc3, 0xee, 0x10, + 0x5b, 0xe6, 0xe1, 0xe6, 0x50, 0xe5, 0xcb, 0xf4, + 0x07, 0x46, 0xb6, 0x75, 0x3d, 0x03, 0x6a, 0xb5, + 0x51, 0x79, 0x01, 0x4a, 0xd7, 0xef, 0x66, 0x51 }, + { 0xf5, 0xc4, 0xbe, 0xc6, 0xd6, 0x2f, 0xc6, 0x08, + 0xbf, 0x41, 0xcc, 0x11, 0x5f, 0x16, 0xd6, 0x1c, + 0x7e, 0xfd, 0x3f, 0xf6, 0xc6, 0x56, 0x92, 0xbb, + 0xe0, 0xaf, 0xff, 0xb1, 0xfe, 0xde, 0x74, 0x75 }, + { 0xa4, 0x86, 0x2e, 0x76, 0xdb, 0x84, 0x7f, 0x05, + 0xba, 0x17, 0xed, 0xe5, 0xda, 0x4e, 0x7f, 0x91, + 0xb5, 0x92, 0x5c, 0xf1, 0xad, 0x4b, 0xa1, 0x27, + 0x32, 0xc3, 0x99, 0x57, 0x42, 0xa5, 0xcd, 0x6e }, + { 0x65, 0xf4, 0xb8, 0x60, 0xcd, 0x15, 0xb3, 0x8e, + 0xf8, 0x14, 0xa1, 0xa8, 0x04, 0x31, 0x4a, 0x55, + 0xbe, 0x95, 0x3c, 0xaa, 0x65, 0xfd, 0x75, 0x8a, + 0xd9, 0x89, 0xff, 0x34, 0xa4, 0x1c, 0x1e, 0xea }, + { 0x19, 0xba, 0x23, 0x4f, 0x0a, 0x4f, 0x38, 0x63, + 0x7d, 0x18, 0x39, 0xf9, 0xd9, 0xf7, 0x6a, 0xd9, + 0x1c, 0x85, 0x22, 0x30, 0x71, 0x43, 0xc9, 0x7d, + 0x5f, 0x93, 0xf6, 0x92, 0x74, 0xce, 0xc9, 0xa7 }, + { 0x1a, 0x67, 0x18, 0x6c, 0xa4, 0xa5, 0xcb, 0x8e, + 0x65, 0xfc, 0xa0, 0xe2, 0xec, 0xbc, 0x5d, 0xdc, + 0x14, 0xae, 0x38, 0x1b, 0xb8, 0xbf, 0xfe, 0xb9, + 0xe0, 0xa1, 0x03, 0x44, 0x9e, 0x3e, 0xf0, 0x3c }, + { 0xaf, 0xbe, 0xa3, 0x17, 0xb5, 0xa2, 0xe8, 0x9c, + 0x0b, 0xd9, 0x0c, 0xcf, 0x5d, 0x7f, 0xd0, 0xed, + 0x57, 0xfe, 0x58, 0x5e, 0x4b, 0xe3, 0x27, 0x1b, + 0x0a, 0x6b, 0xf0, 0xf5, 0x78, 0x6b, 0x0f, 0x26 }, + { 0xf1, 0xb0, 0x15, 0x58, 0xce, 0x54, 0x12, 0x62, + 0xf5, 0xec, 0x34, 0x29, 0x9d, 0x6f, 0xb4, 0x09, + 0x00, 0x09, 0xe3, 0x43, 0x4b, 0xe2, 0xf4, 0x91, + 0x05, 0xcf, 0x46, 0xaf, 0x4d, 0x2d, 0x41, 0x24 }, + { 0x13, 0xa0, 0xa0, 0xc8, 0x63, 0x35, 0x63, 0x5e, + 0xaa, 0x74, 0xca, 0x2d, 0x5d, 0x48, 0x8c, 0x79, + 0x7b, 0xbb, 0x4f, 0x47, 0xdc, 0x07, 0x10, 0x50, + 0x15, 0xed, 0x6a, 0x1f, 0x33, 0x09, 0xef, 0xce }, + { 0x15, 0x80, 0xaf, 0xee, 0xbe, 0xbb, 0x34, 0x6f, + 0x94, 0xd5, 0x9f, 0xe6, 0x2d, 0xa0, 0xb7, 0x92, + 0x37, 0xea, 0xd7, 0xb1, 0x49, 0x1f, 0x56, 0x67, + 0xa9, 0x0e, 0x45, 0xed, 0xf6, 0xca, 0x8b, 0x03 }, + { 0x20, 0xbe, 0x1a, 0x87, 0x5b, 0x38, 0xc5, 0x73, + 0xdd, 0x7f, 0xaa, 0xa0, 0xde, 0x48, 0x9d, 0x65, + 0x5c, 0x11, 0xef, 0xb6, 0xa5, 0x52, 0x69, 0x8e, + 0x07, 0xa2, 0xd3, 0x31, 0xb5, 0xf6, 0x55, 0xc3 }, + { 0xbe, 0x1f, 0xe3, 0xc4, 0xc0, 0x40, 0x18, 0xc5, + 0x4c, 0x4a, 0x0f, 0x6b, 0x9a, 0x2e, 0xd3, 0xc5, + 0x3a, 0xbe, 0x3a, 0x9f, 0x76, 0xb4, 0xd2, 0x6d, + 0xe5, 0x6f, 0xc9, 0xae, 0x95, 0x05, 0x9a, 0x99 }, + { 0xe3, 0xe3, 0xac, 0xe5, 0x37, 0xeb, 0x3e, 0xdd, + 0x84, 0x63, 0xd9, 0xad, 0x35, 0x82, 0xe1, 0x3c, + 0xf8, 0x65, 0x33, 0xff, 0xde, 0x43, 0xd6, 0x68, + 0xdd, 0x2e, 0x93, 0xbb, 0xdb, 0xd7, 0x19, 0x5a }, + { 0x11, 0x0c, 0x50, 0xc0, 0xbf, 0x2c, 0x6e, 0x7a, + 0xeb, 0x7e, 0x43, 0x5d, 0x92, 0xd1, 0x32, 0xab, + 0x66, 0x55, 0x16, 0x8e, 0x78, 0xa2, 0xde, 0xcd, + 0xec, 0x33, 0x30, 0x77, 0x76, 0x84, 0xd9, 0xc1 }, + { 0xe9, 0xba, 0x8f, 0x50, 0x5c, 0x9c, 0x80, 0xc0, + 0x86, 0x66, 0xa7, 0x01, 0xf3, 0x36, 0x7e, 0x6c, + 0xc6, 0x65, 0xf3, 0x4b, 0x22, 0xe7, 0x3c, 0x3c, + 0x04, 0x17, 0xeb, 0x1c, 0x22, 0x06, 0x08, 0x2f }, + { 0x26, 0xcd, 0x66, 0xfc, 0xa0, 0x23, 0x79, 0xc7, + 0x6d, 0xf1, 0x23, 0x17, 0x05, 0x2b, 0xca, 0xfd, + 0x6c, 0xd8, 0xc3, 0xa7, 0xb8, 0x90, 0xd8, 0x05, + 0xf3, 0x6c, 0x49, 0x98, 0x97, 0x82, 0x43, 0x3a }, + { 0x21, 0x3f, 0x35, 0x96, 0xd6, 0xe3, 0xa5, 0xd0, + 0xe9, 0x93, 0x2c, 0xd2, 0x15, 0x91, 0x46, 0x01, + 0x5e, 0x2a, 0xbc, 0x94, 0x9f, 0x47, 0x29, 0xee, + 0x26, 0x32, 0xfe, 0x1e, 0xdb, 0x78, 0xd3, 0x37 }, + { 0x10, 0x15, 0xd7, 0x01, 0x08, 0xe0, 0x3b, 0xe1, + 0xc7, 0x02, 0xfe, 0x97, 0x25, 0x36, 0x07, 0xd1, + 0x4a, 0xee, 0x59, 0x1f, 0x24, 0x13, 0xea, 0x67, + 0x87, 0x42, 0x7b, 0x64, 0x59, 0xff, 0x21, 0x9a }, + { 0x3c, 0xa9, 0x89, 0xde, 0x10, 0xcf, 0xe6, 0x09, + 0x90, 0x94, 0x72, 0xc8, 0xd3, 0x56, 0x10, 0x80, + 0x5b, 0x2f, 0x97, 0x77, 0x34, 0xcf, 0x65, 0x2c, + 0xc6, 0x4b, 0x3b, 0xfc, 0x88, 0x2d, 0x5d, 0x89 }, + { 0xb6, 0x15, 0x6f, 0x72, 0xd3, 0x80, 0xee, 0x9e, + 0xa6, 0xac, 0xd1, 0x90, 0x46, 0x4f, 0x23, 0x07, + 0xa5, 0xc1, 0x79, 0xef, 0x01, 0xfd, 0x71, 0xf9, + 0x9f, 0x2d, 0x0f, 0x7a, 0x57, 0x36, 0x0a, 0xea }, + { 0xc0, 0x3b, 0xc6, 0x42, 0xb2, 0x09, 0x59, 0xcb, + 0xe1, 0x33, 0xa0, 0x30, 0x3e, 0x0c, 0x1a, 0xbf, + 0xf3, 0xe3, 0x1e, 0xc8, 0xe1, 0xa3, 0x28, 0xec, + 0x85, 0x65, 0xc3, 0x6d, 0xec, 0xff, 0x52, 0x65 }, + { 0x2c, 0x3e, 0x08, 0x17, 0x6f, 0x76, 0x0c, 0x62, + 0x64, 0xc3, 0xa2, 0xcd, 0x66, 0xfe, 0xc6, 0xc3, + 0xd7, 0x8d, 0xe4, 0x3f, 0xc1, 0x92, 0x45, 0x7b, + 0x2a, 0x4a, 0x66, 0x0a, 0x1e, 0x0e, 0xb2, 0x2b }, + { 0xf7, 0x38, 0xc0, 0x2f, 0x3c, 0x1b, 0x19, 0x0c, + 0x51, 0x2b, 0x1a, 0x32, 0xde, 0xab, 0xf3, 0x53, + 0x72, 0x8e, 0x0e, 0x9a, 0xb0, 0x34, 0x49, 0x0e, + 0x3c, 0x34, 0x09, 0x94, 0x6a, 0x97, 0xae, 0xec }, + { 0x8b, 0x18, 0x80, 0xdf, 0x30, 0x1c, 0xc9, 0x63, + 0x41, 0x88, 0x11, 0x08, 0x89, 0x64, 0x83, 0x92, + 0x87, 0xff, 0x7f, 0xe3, 0x1c, 0x49, 0xea, 0x6e, + 0xbd, 0x9e, 0x48, 0xbd, 0xee, 0xe4, 0x97, 0xc5 }, + { 0x1e, 0x75, 0xcb, 0x21, 0xc6, 0x09, 0x89, 0x02, + 0x03, 0x75, 0xf1, 0xa7, 0xa2, 0x42, 0x83, 0x9f, + 0x0b, 0x0b, 0x68, 0x97, 0x3a, 0x4c, 0x2a, 0x05, + 0xcf, 0x75, 0x55, 0xed, 0x5a, 0xae, 0xc4, 0xc1 }, + { 0x62, 0xbf, 0x8a, 0x9c, 0x32, 0xa5, 0xbc, 0xcf, + 0x29, 0x0b, 0x6c, 0x47, 0x4d, 0x75, 0xb2, 0xa2, + 0xa4, 0x09, 0x3f, 0x1a, 0x9e, 0x27, 0x13, 0x94, + 0x33, 0xa8, 0xf2, 0xb3, 0xbc, 0xe7, 0xb8, 0xd7 }, + { 0x16, 0x6c, 0x83, 0x50, 0xd3, 0x17, 0x3b, 0x5e, + 0x70, 0x2b, 0x78, 0x3d, 0xfd, 0x33, 0xc6, 0x6e, + 0xe0, 0x43, 0x27, 0x42, 0xe9, 0xb9, 0x2b, 0x99, + 0x7f, 0xd2, 0x3c, 0x60, 0xdc, 0x67, 0x56, 0xca }, + { 0x04, 0x4a, 0x14, 0xd8, 0x22, 0xa9, 0x0c, 0xac, + 0xf2, 0xf5, 0xa1, 0x01, 0x42, 0x8a, 0xdc, 0x8f, + 0x41, 0x09, 0x38, 0x6c, 0xcb, 0x15, 0x8b, 0xf9, + 0x05, 0xc8, 0x61, 0x8b, 0x8e, 0xe2, 0x4e, 0xc3 }, + { 0x38, 0x7d, 0x39, 0x7e, 0xa4, 0x3a, 0x99, 0x4b, + 0xe8, 0x4d, 0x2d, 0x54, 0x4a, 0xfb, 0xe4, 0x81, + 0xa2, 0x00, 0x0f, 0x55, 0x25, 0x26, 0x96, 0xbb, + 0xa2, 0xc5, 0x0c, 0x8e, 0xbd, 0x10, 0x13, 0x47 }, + { 0x56, 0xf8, 0xcc, 0xf1, 0xf8, 0x64, 0x09, 0xb4, + 0x6c, 0xe3, 0x61, 0x66, 0xae, 0x91, 0x65, 0x13, + 0x84, 0x41, 0x57, 0x75, 0x89, 0xdb, 0x08, 0xcb, + 0xc5, 0xf6, 0x6c, 0xa2, 0x97, 0x43, 0xb9, 0xfd }, + { 0x97, 0x06, 0xc0, 0x92, 0xb0, 0x4d, 0x91, 0xf5, + 0x3d, 0xff, 0x91, 0xfa, 0x37, 0xb7, 0x49, 0x3d, + 0x28, 0xb5, 0x76, 0xb5, 0xd7, 0x10, 0x46, 0x9d, + 0xf7, 0x94, 0x01, 0x66, 0x22, 0x36, 0xfc, 0x03 }, + { 0x87, 0x79, 0x68, 0x68, 0x6c, 0x06, 0x8c, 0xe2, + 0xf7, 0xe2, 0xad, 0xcf, 0xf6, 0x8b, 0xf8, 0x74, + 0x8e, 0xdf, 0x3c, 0xf8, 0x62, 0xcf, 0xb4, 0xd3, + 0x94, 0x7a, 0x31, 0x06, 0x95, 0x80, 0x54, 0xe3 }, + { 0x88, 0x17, 0xe5, 0x71, 0x98, 0x79, 0xac, 0xf7, + 0x02, 0x47, 0x87, 0xec, 0xcd, 0xb2, 0x71, 0x03, + 0x55, 0x66, 0xcf, 0xa3, 0x33, 0xe0, 0x49, 0x40, + 0x7c, 0x01, 0x78, 0xcc, 0xc5, 0x7a, 0x5b, 0x9f }, + { 0x89, 0x38, 0x24, 0x9e, 0x4b, 0x50, 0xca, 0xda, + 0xcc, 0xdf, 0x5b, 0x18, 0x62, 0x13, 0x26, 0xcb, + 0xb1, 0x52, 0x53, 0xe3, 0x3a, 0x20, 0xf5, 0x63, + 0x6e, 0x99, 0x5d, 0x72, 0x47, 0x8d, 0xe4, 0x72 }, + { 0xf1, 0x64, 0xab, 0xba, 0x49, 0x63, 0xa4, 0x4d, + 0x10, 0x72, 0x57, 0xe3, 0x23, 0x2d, 0x90, 0xac, + 0xa5, 0xe6, 0x6a, 0x14, 0x08, 0x24, 0x8c, 0x51, + 0x74, 0x1e, 0x99, 0x1d, 0xb5, 0x22, 0x77, 0x56 }, + { 0xd0, 0x55, 0x63, 0xe2, 0xb1, 0xcb, 0xa0, 0xc4, + 0xa2, 0xa1, 0xe8, 0xbd, 0xe3, 0xa1, 0xa0, 0xd9, + 0xf5, 0xb4, 0x0c, 0x85, 0xa0, 0x70, 0xd6, 0xf5, + 0xfb, 0x21, 0x06, 0x6e, 0xad, 0x5d, 0x06, 0x01 }, + { 0x03, 0xfb, 0xb1, 0x63, 0x84, 0xf0, 0xa3, 0x86, + 0x6f, 0x4c, 0x31, 0x17, 0x87, 0x76, 0x66, 0xef, + 0xbf, 0x12, 0x45, 0x97, 0x56, 0x4b, 0x29, 0x3d, + 0x4a, 0xab, 0x0d, 0x26, 0x9f, 0xab, 0xdd, 0xfa }, + { 0x5f, 0xa8, 0x48, 0x6a, 0xc0, 0xe5, 0x29, 0x64, + 0xd1, 0x88, 0x1b, 0xbe, 0x33, 0x8e, 0xb5, 0x4b, + 0xe2, 0xf7, 0x19, 0x54, 0x92, 0x24, 0x89, 0x20, + 0x57, 0xb4, 0xda, 0x04, 0xba, 0x8b, 0x34, 0x75 }, + { 0xcd, 0xfa, 0xbc, 0xee, 0x46, 0x91, 0x11, 0x11, + 0x23, 0x6a, 0x31, 0x70, 0x8b, 0x25, 0x39, 0xd7, + 0x1f, 0xc2, 0x11, 0xd9, 0xb0, 0x9c, 0x0d, 0x85, + 0x30, 0xa1, 0x1e, 0x1d, 0xbf, 0x6e, 0xed, 0x01 }, + { 0x4f, 0x82, 0xde, 0x03, 0xb9, 0x50, 0x47, 0x93, + 0xb8, 0x2a, 0x07, 0xa0, 0xbd, 0xcd, 0xff, 0x31, + 0x4d, 0x75, 0x9e, 0x7b, 0x62, 0xd2, 0x6b, 0x78, + 0x49, 0x46, 0xb0, 0xd3, 0x6f, 0x91, 0x6f, 0x52 }, + { 0x25, 0x9e, 0xc7, 0xf1, 0x73, 0xbc, 0xc7, 0x6a, + 0x09, 0x94, 0xc9, 0x67, 0xb4, 0xf5, 0xf0, 0x24, + 0xc5, 0x60, 0x57, 0xfb, 0x79, 0xc9, 0x65, 0xc4, + 0xfa, 0xe4, 0x18, 0x75, 0xf0, 0x6a, 0x0e, 0x4c }, + { 0x19, 0x3c, 0xc8, 0xe7, 0xc3, 0xe0, 0x8b, 0xb3, + 0x0f, 0x54, 0x37, 0xaa, 0x27, 0xad, 0xe1, 0xf1, + 0x42, 0x36, 0x9b, 0x24, 0x6a, 0x67, 0x5b, 0x23, + 0x83, 0xe6, 0xda, 0x9b, 0x49, 0xa9, 0x80, 0x9e }, + { 0x5c, 0x10, 0x89, 0x6f, 0x0e, 0x28, 0x56, 0xb2, + 0xa2, 0xee, 0xe0, 0xfe, 0x4a, 0x2c, 0x16, 0x33, + 0x56, 0x5d, 0x18, 0xf0, 0xe9, 0x3e, 0x1f, 0xab, + 0x26, 0xc3, 0x73, 0xe8, 0xf8, 0x29, 0x65, 0x4d }, + { 0xf1, 0x60, 0x12, 0xd9, 0x3f, 0x28, 0x85, 0x1a, + 0x1e, 0xb9, 0x89, 0xf5, 0xd0, 0xb4, 0x3f, 0x3f, + 0x39, 0xca, 0x73, 0xc9, 0xa6, 0x2d, 0x51, 0x81, + 0xbf, 0xf2, 0x37, 0x53, 0x6b, 0xd3, 0x48, 0xc3 }, + { 0x29, 0x66, 0xb3, 0xcf, 0xae, 0x1e, 0x44, 0xea, + 0x99, 0x6d, 0xc5, 0xd6, 0x86, 0xcf, 0x25, 0xfa, + 0x05, 0x3f, 0xb6, 0xf6, 0x72, 0x01, 0xb9, 0xe4, + 0x6e, 0xad, 0xe8, 0x5d, 0x0a, 0xd6, 0xb8, 0x06 }, + { 0xdd, 0xb8, 0x78, 0x24, 0x85, 0xe9, 0x00, 0xbc, + 0x60, 0xbc, 0xf4, 0xc3, 0x3a, 0x6f, 0xd5, 0x85, + 0x68, 0x0c, 0xc6, 0x83, 0xd5, 0x16, 0xef, 0xa0, + 0x3e, 0xb9, 0x98, 0x5f, 0xad, 0x87, 0x15, 0xfb }, + { 0x4c, 0x4d, 0x6e, 0x71, 0xae, 0xa0, 0x57, 0x86, + 0x41, 0x31, 0x48, 0xfc, 0x7a, 0x78, 0x6b, 0x0e, + 0xca, 0xf5, 0x82, 0xcf, 0xf1, 0x20, 0x9f, 0x5a, + 0x80, 0x9f, 0xba, 0x85, 0x04, 0xce, 0x66, 0x2c }, + { 0xfb, 0x4c, 0x5e, 0x86, 0xd7, 0xb2, 0x22, 0x9b, + 0x99, 0xb8, 0xba, 0x6d, 0x94, 0xc2, 0x47, 0xef, + 0x96, 0x4a, 0xa3, 0xa2, 0xba, 0xe8, 0xed, 0xc7, + 0x75, 0x69, 0xf2, 0x8d, 0xbb, 0xff, 0x2d, 0x4e }, + { 0xe9, 0x4f, 0x52, 0x6d, 0xe9, 0x01, 0x96, 0x33, + 0xec, 0xd5, 0x4a, 0xc6, 0x12, 0x0f, 0x23, 0x95, + 0x8d, 0x77, 0x18, 0xf1, 0xe7, 0x71, 0x7b, 0xf3, + 0x29, 0x21, 0x1a, 0x4f, 0xae, 0xed, 0x4e, 0x6d }, + { 0xcb, 0xd6, 0x66, 0x0a, 0x10, 0xdb, 0x3f, 0x23, + 0xf7, 0xa0, 0x3d, 0x4b, 0x9d, 0x40, 0x44, 0xc7, + 0x93, 0x2b, 0x28, 0x01, 0xac, 0x89, 0xd6, 0x0b, + 0xc9, 0xeb, 0x92, 0xd6, 0x5a, 0x46, 0xc2, 0xa0 }, + { 0x88, 0x18, 0xbb, 0xd3, 0xdb, 0x4d, 0xc1, 0x23, + 0xb2, 0x5c, 0xbb, 0xa5, 0xf5, 0x4c, 0x2b, 0xc4, + 0xb3, 0xfc, 0xf9, 0xbf, 0x7d, 0x7a, 0x77, 0x09, + 0xf4, 0xae, 0x58, 0x8b, 0x26, 0x7c, 0x4e, 0xce }, + { 0xc6, 0x53, 0x82, 0x51, 0x3f, 0x07, 0x46, 0x0d, + 0xa3, 0x98, 0x33, 0xcb, 0x66, 0x6c, 0x5e, 0xd8, + 0x2e, 0x61, 0xb9, 0xe9, 0x98, 0xf4, 0xb0, 0xc4, + 0x28, 0x7c, 0xee, 0x56, 0xc3, 0xcc, 0x9b, 0xcd }, + { 0x89, 0x75, 0xb0, 0x57, 0x7f, 0xd3, 0x55, 0x66, + 0xd7, 0x50, 0xb3, 0x62, 0xb0, 0x89, 0x7a, 0x26, + 0xc3, 0x99, 0x13, 0x6d, 0xf0, 0x7b, 0xab, 0xab, + 0xbd, 0xe6, 0x20, 0x3f, 0xf2, 0x95, 0x4e, 0xd4 }, + { 0x21, 0xfe, 0x0c, 0xeb, 0x00, 0x52, 0xbe, 0x7f, + 0xb0, 0xf0, 0x04, 0x18, 0x7c, 0xac, 0xd7, 0xde, + 0x67, 0xfa, 0x6e, 0xb0, 0x93, 0x8d, 0x92, 0x76, + 0x77, 0xf2, 0x39, 0x8c, 0x13, 0x23, 0x17, 0xa8 }, + { 0x2e, 0xf7, 0x3f, 0x3c, 0x26, 0xf1, 0x2d, 0x93, + 0x88, 0x9f, 0x3c, 0x78, 0xb6, 0xa6, 0x6c, 0x1d, + 0x52, 0xb6, 0x49, 0xdc, 0x9e, 0x85, 0x6e, 0x2c, + 0x17, 0x2e, 0xa7, 0xc5, 0x8a, 0xc2, 0xb5, 0xe3 }, + { 0x38, 0x8a, 0x3c, 0xd5, 0x6d, 0x73, 0x86, 0x7a, + 0xbb, 0x5f, 0x84, 0x01, 0x49, 0x2b, 0x6e, 0x26, + 0x81, 0xeb, 0x69, 0x85, 0x1e, 0x76, 0x7f, 0xd8, + 0x42, 0x10, 0xa5, 0x60, 0x76, 0xfb, 0x3d, 0xd3 }, + { 0xaf, 0x53, 0x3e, 0x02, 0x2f, 0xc9, 0x43, 0x9e, + 0x4e, 0x3c, 0xb8, 0x38, 0xec, 0xd1, 0x86, 0x92, + 0x23, 0x2a, 0xdf, 0x6f, 0xe9, 0x83, 0x95, 0x26, + 0xd3, 0xc3, 0xdd, 0x1b, 0x71, 0x91, 0x0b, 0x1a }, + { 0x75, 0x1c, 0x09, 0xd4, 0x1a, 0x93, 0x43, 0x88, + 0x2a, 0x81, 0xcd, 0x13, 0xee, 0x40, 0x81, 0x8d, + 0x12, 0xeb, 0x44, 0xc6, 0xc7, 0xf4, 0x0d, 0xf1, + 0x6e, 0x4a, 0xea, 0x8f, 0xab, 0x91, 0x97, 0x2a }, + { 0x5b, 0x73, 0xdd, 0xb6, 0x8d, 0x9d, 0x2b, 0x0a, + 0xa2, 0x65, 0xa0, 0x79, 0x88, 0xd6, 0xb8, 0x8a, + 0xe9, 0xaa, 0xc5, 0x82, 0xaf, 0x83, 0x03, 0x2f, + 0x8a, 0x9b, 0x21, 0xa2, 0xe1, 0xb7, 0xbf, 0x18 }, + { 0x3d, 0xa2, 0x91, 0x26, 0xc7, 0xc5, 0xd7, 0xf4, + 0x3e, 0x64, 0x24, 0x2a, 0x79, 0xfe, 0xaa, 0x4e, + 0xf3, 0x45, 0x9c, 0xde, 0xcc, 0xc8, 0x98, 0xed, + 0x59, 0xa9, 0x7f, 0x6e, 0xc9, 0x3b, 0x9d, 0xab }, + { 0x56, 0x6d, 0xc9, 0x20, 0x29, 0x3d, 0xa5, 0xcb, + 0x4f, 0xe0, 0xaa, 0x8a, 0xbd, 0xa8, 0xbb, 0xf5, + 0x6f, 0x55, 0x23, 0x13, 0xbf, 0xf1, 0x90, 0x46, + 0x64, 0x1e, 0x36, 0x15, 0xc1, 0xe3, 0xed, 0x3f }, + { 0x41, 0x15, 0xbe, 0xa0, 0x2f, 0x73, 0xf9, 0x7f, + 0x62, 0x9e, 0x5c, 0x55, 0x90, 0x72, 0x0c, 0x01, + 0xe7, 0xe4, 0x49, 0xae, 0x2a, 0x66, 0x97, 0xd4, + 0xd2, 0x78, 0x33, 0x21, 0x30, 0x36, 0x92, 0xf9 }, + { 0x4c, 0xe0, 0x8f, 0x47, 0x62, 0x46, 0x8a, 0x76, + 0x70, 0x01, 0x21, 0x64, 0x87, 0x8d, 0x68, 0x34, + 0x0c, 0x52, 0xa3, 0x5e, 0x66, 0xc1, 0x88, 0x4d, + 0x5c, 0x86, 0x48, 0x89, 0xab, 0xc9, 0x66, 0x77 }, + { 0x81, 0xea, 0x0b, 0x78, 0x04, 0x12, 0x4e, 0x0c, + 0x22, 0xea, 0x5f, 0xc7, 0x11, 0x04, 0xa2, 0xaf, + 0xcb, 0x52, 0xa1, 0xfa, 0x81, 0x6f, 0x3e, 0xcb, + 0x7d, 0xcb, 0x5d, 0x9d, 0xea, 0x17, 0x86, 0xd0 }, + { 0xfe, 0x36, 0x27, 0x33, 0xb0, 0x5f, 0x6b, 0xed, + 0xaf, 0x93, 0x79, 0xd7, 0xf7, 0x93, 0x6e, 0xde, + 0x20, 0x9b, 0x1f, 0x83, 0x23, 0xc3, 0x92, 0x25, + 0x49, 0xd9, 0xe7, 0x36, 0x81, 0xb5, 0xdb, 0x7b }, + { 0xef, 0xf3, 0x7d, 0x30, 0xdf, 0xd2, 0x03, 0x59, + 0xbe, 0x4e, 0x73, 0xfd, 0xf4, 0x0d, 0x27, 0x73, + 0x4b, 0x3d, 0xf9, 0x0a, 0x97, 0xa5, 0x5e, 0xd7, + 0x45, 0x29, 0x72, 0x94, 0xca, 0x85, 0xd0, 0x9f }, + { 0x17, 0x2f, 0xfc, 0x67, 0x15, 0x3d, 0x12, 0xe0, + 0xca, 0x76, 0xa8, 0xb6, 0xcd, 0x5d, 0x47, 0x31, + 0x88, 0x5b, 0x39, 0xce, 0x0c, 0xac, 0x93, 0xa8, + 0x97, 0x2a, 0x18, 0x00, 0x6c, 0x8b, 0x8b, 0xaf }, + { 0xc4, 0x79, 0x57, 0xf1, 0xcc, 0x88, 0xe8, 0x3e, + 0xf9, 0x44, 0x58, 0x39, 0x70, 0x9a, 0x48, 0x0a, + 0x03, 0x6b, 0xed, 0x5f, 0x88, 0xac, 0x0f, 0xcc, + 0x8e, 0x1e, 0x70, 0x3f, 0xfa, 0xac, 0x13, 0x2c }, + { 0x30, 0xf3, 0x54, 0x83, 0x70, 0xcf, 0xdc, 0xed, + 0xa5, 0xc3, 0x7b, 0x56, 0x9b, 0x61, 0x75, 0xe7, + 0x99, 0xee, 0xf1, 0xa6, 0x2a, 0xaa, 0x94, 0x32, + 0x45, 0xae, 0x76, 0x69, 0xc2, 0x27, 0xa7, 0xb5 }, + { 0xc9, 0x5d, 0xcb, 0x3c, 0xf1, 0xf2, 0x7d, 0x0e, + 0xef, 0x2f, 0x25, 0xd2, 0x41, 0x38, 0x70, 0x90, + 0x4a, 0x87, 0x7c, 0x4a, 0x56, 0xc2, 0xde, 0x1e, + 0x83, 0xe2, 0xbc, 0x2a, 0xe2, 0xe4, 0x68, 0x21 }, + { 0xd5, 0xd0, 0xb5, 0xd7, 0x05, 0x43, 0x4c, 0xd4, + 0x6b, 0x18, 0x57, 0x49, 0xf6, 0x6b, 0xfb, 0x58, + 0x36, 0xdc, 0xdf, 0x6e, 0xe5, 0x49, 0xa2, 0xb7, + 0xa4, 0xae, 0xe7, 0xf5, 0x80, 0x07, 0xca, 0xaf }, + { 0xbb, 0xc1, 0x24, 0xa7, 0x12, 0xf1, 0x5d, 0x07, + 0xc3, 0x00, 0xe0, 0x5b, 0x66, 0x83, 0x89, 0xa4, + 0x39, 0xc9, 0x17, 0x77, 0xf7, 0x21, 0xf8, 0x32, + 0x0c, 0x1c, 0x90, 0x78, 0x06, 0x6d, 0x2c, 0x7e }, + { 0xa4, 0x51, 0xb4, 0x8c, 0x35, 0xa6, 0xc7, 0x85, + 0x4c, 0xfa, 0xae, 0x60, 0x26, 0x2e, 0x76, 0x99, + 0x08, 0x16, 0x38, 0x2a, 0xc0, 0x66, 0x7e, 0x5a, + 0x5c, 0x9e, 0x1b, 0x46, 0xc4, 0x34, 0x2d, 0xdf }, + { 0xb0, 0xd1, 0x50, 0xfb, 0x55, 0xe7, 0x78, 0xd0, + 0x11, 0x47, 0xf0, 0xb5, 0xd8, 0x9d, 0x99, 0xec, + 0xb2, 0x0f, 0xf0, 0x7e, 0x5e, 0x67, 0x60, 0xd6, + 0xb6, 0x45, 0xeb, 0x5b, 0x65, 0x4c, 0x62, 0x2b }, + { 0x34, 0xf7, 0x37, 0xc0, 0xab, 0x21, 0x99, 0x51, + 0xee, 0xe8, 0x9a, 0x9f, 0x8d, 0xac, 0x29, 0x9c, + 0x9d, 0x4c, 0x38, 0xf3, 0x3f, 0xa4, 0x94, 0xc5, + 0xc6, 0xee, 0xfc, 0x92, 0xb6, 0xdb, 0x08, 0xbc }, + { 0x1a, 0x62, 0xcc, 0x3a, 0x00, 0x80, 0x0d, 0xcb, + 0xd9, 0x98, 0x91, 0x08, 0x0c, 0x1e, 0x09, 0x84, + 0x58, 0x19, 0x3a, 0x8c, 0xc9, 0xf9, 0x70, 0xea, + 0x99, 0xfb, 0xef, 0xf0, 0x03, 0x18, 0xc2, 0x89 }, + { 0xcf, 0xce, 0x55, 0xeb, 0xaf, 0xc8, 0x40, 0xd7, + 0xae, 0x48, 0x28, 0x1c, 0x7f, 0xd5, 0x7e, 0xc8, + 0xb4, 0x82, 0xd4, 0xb7, 0x04, 0x43, 0x74, 0x95, + 0x49, 0x5a, 0xc4, 0x14, 0xcf, 0x4a, 0x37, 0x4b }, + { 0x67, 0x46, 0xfa, 0xcf, 0x71, 0x14, 0x6d, 0x99, + 0x9d, 0xab, 0xd0, 0x5d, 0x09, 0x3a, 0xe5, 0x86, + 0x64, 0x8d, 0x1e, 0xe2, 0x8e, 0x72, 0x61, 0x7b, + 0x99, 0xd0, 0xf0, 0x08, 0x6e, 0x1e, 0x45, 0xbf }, + { 0x57, 0x1c, 0xed, 0x28, 0x3b, 0x3f, 0x23, 0xb4, + 0xe7, 0x50, 0xbf, 0x12, 0xa2, 0xca, 0xf1, 0x78, + 0x18, 0x47, 0xbd, 0x89, 0x0e, 0x43, 0x60, 0x3c, + 0xdc, 0x59, 0x76, 0x10, 0x2b, 0x7b, 0xb1, 0x1b }, + { 0xcf, 0xcb, 0x76, 0x5b, 0x04, 0x8e, 0x35, 0x02, + 0x2c, 0x5d, 0x08, 0x9d, 0x26, 0xe8, 0x5a, 0x36, + 0xb0, 0x05, 0xa2, 0xb8, 0x04, 0x93, 0xd0, 0x3a, + 0x14, 0x4e, 0x09, 0xf4, 0x09, 0xb6, 0xaf, 0xd1 }, + { 0x40, 0x50, 0xc7, 0xa2, 0x77, 0x05, 0xbb, 0x27, + 0xf4, 0x20, 0x89, 0xb2, 0x99, 0xf3, 0xcb, 0xe5, + 0x05, 0x4e, 0xad, 0x68, 0x72, 0x7e, 0x8e, 0xf9, + 0x31, 0x8c, 0xe6, 0xf2, 0x5c, 0xd6, 0xf3, 0x1d }, + { 0x18, 0x40, 0x70, 0xbd, 0x5d, 0x26, 0x5f, 0xbd, + 0xc1, 0x42, 0xcd, 0x1c, 0x5c, 0xd0, 0xd7, 0xe4, + 0x14, 0xe7, 0x03, 0x69, 0xa2, 0x66, 0xd6, 0x27, + 0xc8, 0xfb, 0xa8, 0x4f, 0xa5, 0xe8, 0x4c, 0x34 }, + { 0x9e, 0xdd, 0xa9, 0xa4, 0x44, 0x39, 0x02, 0xa9, + 0x58, 0x8c, 0x0d, 0x0c, 0xcc, 0x62, 0xb9, 0x30, + 0x21, 0x84, 0x79, 0xa6, 0x84, 0x1e, 0x6f, 0xe7, + 0xd4, 0x30, 0x03, 0xf0, 0x4b, 0x1f, 0xd6, 0x43 }, + { 0xe4, 0x12, 0xfe, 0xef, 0x79, 0x08, 0x32, 0x4a, + 0x6d, 0xa1, 0x84, 0x16, 0x29, 0xf3, 0x5d, 0x3d, + 0x35, 0x86, 0x42, 0x01, 0x93, 0x10, 0xec, 0x57, + 0xc6, 0x14, 0x83, 0x6b, 0x63, 0xd3, 0x07, 0x63 }, + { 0x1a, 0x2b, 0x8e, 0xdf, 0xf3, 0xf9, 0xac, 0xc1, + 0x55, 0x4f, 0xcb, 0xae, 0x3c, 0xf1, 0xd6, 0x29, + 0x8c, 0x64, 0x62, 0xe2, 0x2e, 0x5e, 0xb0, 0x25, + 0x96, 0x84, 0xf8, 0x35, 0x01, 0x2b, 0xd1, 0x3f }, + { 0x28, 0x8c, 0x4a, 0xd9, 0xb9, 0x40, 0x97, 0x62, + 0xea, 0x07, 0xc2, 0x4a, 0x41, 0xf0, 0x4f, 0x69, + 0xa7, 0xd7, 0x4b, 0xee, 0x2d, 0x95, 0x43, 0x53, + 0x74, 0xbd, 0xe9, 0x46, 0xd7, 0x24, 0x1c, 0x7b }, + { 0x80, 0x56, 0x91, 0xbb, 0x28, 0x67, 0x48, 0xcf, + 0xb5, 0x91, 0xd3, 0xae, 0xbe, 0x7e, 0x6f, 0x4e, + 0x4d, 0xc6, 0xe2, 0x80, 0x8c, 0x65, 0x14, 0x3c, + 0xc0, 0x04, 0xe4, 0xeb, 0x6f, 0xd0, 0x9d, 0x43 }, + { 0xd4, 0xac, 0x8d, 0x3a, 0x0a, 0xfc, 0x6c, 0xfa, + 0x7b, 0x46, 0x0a, 0xe3, 0x00, 0x1b, 0xae, 0xb3, + 0x6d, 0xad, 0xb3, 0x7d, 0xa0, 0x7d, 0x2e, 0x8a, + 0xc9, 0x18, 0x22, 0xdf, 0x34, 0x8a, 0xed, 0x3d }, + { 0xc3, 0x76, 0x61, 0x70, 0x14, 0xd2, 0x01, 0x58, + 0xbc, 0xed, 0x3d, 0x3b, 0xa5, 0x52, 0xb6, 0xec, + 0xcf, 0x84, 0xe6, 0x2a, 0xa3, 0xeb, 0x65, 0x0e, + 0x90, 0x02, 0x9c, 0x84, 0xd1, 0x3e, 0xea, 0x69 }, + { 0xc4, 0x1f, 0x09, 0xf4, 0x3c, 0xec, 0xae, 0x72, + 0x93, 0xd6, 0x00, 0x7c, 0xa0, 0xa3, 0x57, 0x08, + 0x7d, 0x5a, 0xe5, 0x9b, 0xe5, 0x00, 0xc1, 0xcd, + 0x5b, 0x28, 0x9e, 0xe8, 0x10, 0xc7, 0xb0, 0x82 }, + { 0x03, 0xd1, 0xce, 0xd1, 0xfb, 0xa5, 0xc3, 0x91, + 0x55, 0xc4, 0x4b, 0x77, 0x65, 0xcb, 0x76, 0x0c, + 0x78, 0x70, 0x8d, 0xcf, 0xc8, 0x0b, 0x0b, 0xd8, + 0xad, 0xe3, 0xa5, 0x6d, 0xa8, 0x83, 0x0b, 0x29 }, + { 0x09, 0xbd, 0xe6, 0xf1, 0x52, 0x21, 0x8d, 0xc9, + 0x2c, 0x41, 0xd7, 0xf4, 0x53, 0x87, 0xe6, 0x3e, + 0x58, 0x69, 0xd8, 0x07, 0xec, 0x70, 0xb8, 0x21, + 0x40, 0x5d, 0xbd, 0x88, 0x4b, 0x7f, 0xcf, 0x4b }, + { 0x71, 0xc9, 0x03, 0x6e, 0x18, 0x17, 0x9b, 0x90, + 0xb3, 0x7d, 0x39, 0xe9, 0xf0, 0x5e, 0xb8, 0x9c, + 0xc5, 0xfc, 0x34, 0x1f, 0xd7, 0xc4, 0x77, 0xd0, + 0xd7, 0x49, 0x32, 0x85, 0xfa, 0xca, 0x08, 0xa4 }, + { 0x59, 0x16, 0x83, 0x3e, 0xbb, 0x05, 0xcd, 0x91, + 0x9c, 0xa7, 0xfe, 0x83, 0xb6, 0x92, 0xd3, 0x20, + 0x5b, 0xef, 0x72, 0x39, 0x2b, 0x2c, 0xf6, 0xbb, + 0x0a, 0x6d, 0x43, 0xf9, 0x94, 0xf9, 0x5f, 0x11 }, + { 0xf6, 0x3a, 0xab, 0x3e, 0xc6, 0x41, 0xb3, 0xb0, + 0x24, 0x96, 0x4c, 0x2b, 0x43, 0x7c, 0x04, 0xf6, + 0x04, 0x3c, 0x4c, 0x7e, 0x02, 0x79, 0x23, 0x99, + 0x95, 0x40, 0x19, 0x58, 0xf8, 0x6b, 0xbe, 0x54 }, + { 0xf1, 0x72, 0xb1, 0x80, 0xbf, 0xb0, 0x97, 0x40, + 0x49, 0x31, 0x20, 0xb6, 0x32, 0x6c, 0xbd, 0xc5, + 0x61, 0xe4, 0x77, 0xde, 0xf9, 0xbb, 0xcf, 0xd2, + 0x8c, 0xc8, 0xc1, 0xc5, 0xe3, 0x37, 0x9a, 0x31 }, + { 0xcb, 0x9b, 0x89, 0xcc, 0x18, 0x38, 0x1d, 0xd9, + 0x14, 0x1a, 0xde, 0x58, 0x86, 0x54, 0xd4, 0xe6, + 0xa2, 0x31, 0xd5, 0xbf, 0x49, 0xd4, 0xd5, 0x9a, + 0xc2, 0x7d, 0x86, 0x9c, 0xbe, 0x10, 0x0c, 0xf3 }, + { 0x7b, 0xd8, 0x81, 0x50, 0x46, 0xfd, 0xd8, 0x10, + 0xa9, 0x23, 0xe1, 0x98, 0x4a, 0xae, 0xbd, 0xcd, + 0xf8, 0x4d, 0x87, 0xc8, 0x99, 0x2d, 0x68, 0xb5, + 0xee, 0xb4, 0x60, 0xf9, 0x3e, 0xb3, 0xc8, 0xd7 }, + { 0x60, 0x7b, 0xe6, 0x68, 0x62, 0xfd, 0x08, 0xee, + 0x5b, 0x19, 0xfa, 0xca, 0xc0, 0x9d, 0xfd, 0xbc, + 0xd4, 0x0c, 0x31, 0x21, 0x01, 0xd6, 0x6e, 0x6e, + 0xbd, 0x2b, 0x84, 0x1f, 0x1b, 0x9a, 0x93, 0x25 }, + { 0x9f, 0xe0, 0x3b, 0xbe, 0x69, 0xab, 0x18, 0x34, + 0xf5, 0x21, 0x9b, 0x0d, 0xa8, 0x8a, 0x08, 0xb3, + 0x0a, 0x66, 0xc5, 0x91, 0x3f, 0x01, 0x51, 0x96, + 0x3c, 0x36, 0x05, 0x60, 0xdb, 0x03, 0x87, 0xb3 }, + { 0x90, 0xa8, 0x35, 0x85, 0x71, 0x7b, 0x75, 0xf0, + 0xe9, 0xb7, 0x25, 0xe0, 0x55, 0xee, 0xee, 0xb9, + 0xe7, 0xa0, 0x28, 0xea, 0x7e, 0x6c, 0xbc, 0x07, + 0xb2, 0x09, 0x17, 0xec, 0x03, 0x63, 0xe3, 0x8c }, + { 0x33, 0x6e, 0xa0, 0x53, 0x0f, 0x4a, 0x74, 0x69, + 0x12, 0x6e, 0x02, 0x18, 0x58, 0x7e, 0xbb, 0xde, + 0x33, 0x58, 0xa0, 0xb3, 0x1c, 0x29, 0xd2, 0x00, + 0xf7, 0xdc, 0x7e, 0xb1, 0x5c, 0x6a, 0xad, 0xd8 }, + { 0xa7, 0x9e, 0x76, 0xdc, 0x0a, 0xbc, 0xa4, 0x39, + 0x6f, 0x07, 0x47, 0xcd, 0x7b, 0x74, 0x8d, 0xf9, + 0x13, 0x00, 0x76, 0x26, 0xb1, 0xd6, 0x59, 0xda, + 0x0c, 0x1f, 0x78, 0xb9, 0x30, 0x3d, 0x01, 0xa3 }, + { 0x44, 0xe7, 0x8a, 0x77, 0x37, 0x56, 0xe0, 0x95, + 0x15, 0x19, 0x50, 0x4d, 0x70, 0x38, 0xd2, 0x8d, + 0x02, 0x13, 0xa3, 0x7e, 0x0c, 0xe3, 0x75, 0x37, + 0x17, 0x57, 0xbc, 0x99, 0x63, 0x11, 0xe3, 0xb8 }, + { 0x77, 0xac, 0x01, 0x2a, 0x3f, 0x75, 0x4d, 0xcf, + 0xea, 0xb5, 0xeb, 0x99, 0x6b, 0xe9, 0xcd, 0x2d, + 0x1f, 0x96, 0x11, 0x1b, 0x6e, 0x49, 0xf3, 0x99, + 0x4d, 0xf1, 0x81, 0xf2, 0x85, 0x69, 0xd8, 0x25 }, + { 0xce, 0x5a, 0x10, 0xdb, 0x6f, 0xcc, 0xda, 0xf1, + 0x40, 0xaa, 0xa4, 0xde, 0xd6, 0x25, 0x0a, 0x9c, + 0x06, 0xe9, 0x22, 0x2b, 0xc9, 0xf9, 0xf3, 0x65, + 0x8a, 0x4a, 0xff, 0x93, 0x5f, 0x2b, 0x9f, 0x3a }, + { 0xec, 0xc2, 0x03, 0xa7, 0xfe, 0x2b, 0xe4, 0xab, + 0xd5, 0x5b, 0xb5, 0x3e, 0x6e, 0x67, 0x35, 0x72, + 0xe0, 0x07, 0x8d, 0xa8, 0xcd, 0x37, 0x5e, 0xf4, + 0x30, 0xcc, 0x97, 0xf9, 0xf8, 0x00, 0x83, 0xaf }, + { 0x14, 0xa5, 0x18, 0x6d, 0xe9, 0xd7, 0xa1, 0x8b, + 0x04, 0x12, 0xb8, 0x56, 0x3e, 0x51, 0xcc, 0x54, + 0x33, 0x84, 0x0b, 0x4a, 0x12, 0x9a, 0x8f, 0xf9, + 0x63, 0xb3, 0x3a, 0x3c, 0x4a, 0xfe, 0x8e, 0xbb }, + { 0x13, 0xf8, 0xef, 0x95, 0xcb, 0x86, 0xe6, 0xa6, + 0x38, 0x93, 0x1c, 0x8e, 0x10, 0x76, 0x73, 0xeb, + 0x76, 0xba, 0x10, 0xd7, 0xc2, 0xcd, 0x70, 0xb9, + 0xd9, 0x92, 0x0b, 0xbe, 0xed, 0x92, 0x94, 0x09 }, + { 0x0b, 0x33, 0x8f, 0x4e, 0xe1, 0x2f, 0x2d, 0xfc, + 0xb7, 0x87, 0x13, 0x37, 0x79, 0x41, 0xe0, 0xb0, + 0x63, 0x21, 0x52, 0x58, 0x1d, 0x13, 0x32, 0x51, + 0x6e, 0x4a, 0x2c, 0xab, 0x19, 0x42, 0xcc, 0xa4 }, + { 0xea, 0xab, 0x0e, 0xc3, 0x7b, 0x3b, 0x8a, 0xb7, + 0x96, 0xe9, 0xf5, 0x72, 0x38, 0xde, 0x14, 0xa2, + 0x64, 0xa0, 0x76, 0xf3, 0x88, 0x7d, 0x86, 0xe2, + 0x9b, 0xb5, 0x90, 0x6d, 0xb5, 0xa0, 0x0e, 0x02 }, + { 0x23, 0xcb, 0x68, 0xb8, 0xc0, 0xe6, 0xdc, 0x26, + 0xdc, 0x27, 0x76, 0x6d, 0xdc, 0x0a, 0x13, 0xa9, + 0x94, 0x38, 0xfd, 0x55, 0x61, 0x7a, 0xa4, 0x09, + 0x5d, 0x8f, 0x96, 0x97, 0x20, 0xc8, 0x72, 0xdf }, + { 0x09, 0x1d, 0x8e, 0xe3, 0x0d, 0x6f, 0x29, 0x68, + 0xd4, 0x6b, 0x68, 0x7d, 0xd6, 0x52, 0x92, 0x66, + 0x57, 0x42, 0xde, 0x0b, 0xb8, 0x3d, 0xcc, 0x00, + 0x04, 0xc7, 0x2c, 0xe1, 0x00, 0x07, 0xa5, 0x49 }, + { 0x7f, 0x50, 0x7a, 0xbc, 0x6d, 0x19, 0xba, 0x00, + 0xc0, 0x65, 0xa8, 0x76, 0xec, 0x56, 0x57, 0x86, + 0x88, 0x82, 0xd1, 0x8a, 0x22, 0x1b, 0xc4, 0x6c, + 0x7a, 0x69, 0x12, 0x54, 0x1f, 0x5b, 0xc7, 0xba }, + { 0xa0, 0x60, 0x7c, 0x24, 0xe1, 0x4e, 0x8c, 0x22, + 0x3d, 0xb0, 0xd7, 0x0b, 0x4d, 0x30, 0xee, 0x88, + 0x01, 0x4d, 0x60, 0x3f, 0x43, 0x7e, 0x9e, 0x02, + 0xaa, 0x7d, 0xaf, 0xa3, 0xcd, 0xfb, 0xad, 0x94 }, + { 0xdd, 0xbf, 0xea, 0x75, 0xcc, 0x46, 0x78, 0x82, + 0xeb, 0x34, 0x83, 0xce, 0x5e, 0x2e, 0x75, 0x6a, + 0x4f, 0x47, 0x01, 0xb7, 0x6b, 0x44, 0x55, 0x19, + 0xe8, 0x9f, 0x22, 0xd6, 0x0f, 0xa8, 0x6e, 0x06 }, + { 0x0c, 0x31, 0x1f, 0x38, 0xc3, 0x5a, 0x4f, 0xb9, + 0x0d, 0x65, 0x1c, 0x28, 0x9d, 0x48, 0x68, 0x56, + 0xcd, 0x14, 0x13, 0xdf, 0x9b, 0x06, 0x77, 0xf5, + 0x3e, 0xce, 0x2c, 0xd9, 0xe4, 0x77, 0xc6, 0x0a }, + { 0x46, 0xa7, 0x3a, 0x8d, 0xd3, 0xe7, 0x0f, 0x59, + 0xd3, 0x94, 0x2c, 0x01, 0xdf, 0x59, 0x9d, 0xef, + 0x78, 0x3c, 0x9d, 0xa8, 0x2f, 0xd8, 0x32, 0x22, + 0xcd, 0x66, 0x2b, 0x53, 0xdc, 0xe7, 0xdb, 0xdf }, + { 0xad, 0x03, 0x8f, 0xf9, 0xb1, 0x4d, 0xe8, 0x4a, + 0x80, 0x1e, 0x4e, 0x62, 0x1c, 0xe5, 0xdf, 0x02, + 0x9d, 0xd9, 0x35, 0x20, 0xd0, 0xc2, 0xfa, 0x38, + 0xbf, 0xf1, 0x76, 0xa8, 0xb1, 0xd1, 0x69, 0x8c }, + { 0xab, 0x70, 0xc5, 0xdf, 0xbd, 0x1e, 0xa8, 0x17, + 0xfe, 0xd0, 0xcd, 0x06, 0x72, 0x93, 0xab, 0xf3, + 0x19, 0xe5, 0xd7, 0x90, 0x1c, 0x21, 0x41, 0xd5, + 0xd9, 0x9b, 0x23, 0xf0, 0x3a, 0x38, 0xe7, 0x48 }, + { 0x1f, 0xff, 0xda, 0x67, 0x93, 0x2b, 0x73, 0xc8, + 0xec, 0xaf, 0x00, 0x9a, 0x34, 0x91, 0xa0, 0x26, + 0x95, 0x3b, 0xab, 0xfe, 0x1f, 0x66, 0x3b, 0x06, + 0x97, 0xc3, 0xc4, 0xae, 0x8b, 0x2e, 0x7d, 0xcb }, + { 0xb0, 0xd2, 0xcc, 0x19, 0x47, 0x2d, 0xd5, 0x7f, + 0x2b, 0x17, 0xef, 0xc0, 0x3c, 0x8d, 0x58, 0xc2, + 0x28, 0x3d, 0xbb, 0x19, 0xda, 0x57, 0x2f, 0x77, + 0x55, 0x85, 0x5a, 0xa9, 0x79, 0x43, 0x17, 0xa0 }, + { 0xa0, 0xd1, 0x9a, 0x6e, 0xe3, 0x39, 0x79, 0xc3, + 0x25, 0x51, 0x0e, 0x27, 0x66, 0x22, 0xdf, 0x41, + 0xf7, 0x15, 0x83, 0xd0, 0x75, 0x01, 0xb8, 0x70, + 0x71, 0x12, 0x9a, 0x0a, 0xd9, 0x47, 0x32, 0xa5 }, + { 0x72, 0x46, 0x42, 0xa7, 0x03, 0x2d, 0x10, 0x62, + 0xb8, 0x9e, 0x52, 0xbe, 0xa3, 0x4b, 0x75, 0xdf, + 0x7d, 0x8f, 0xe7, 0x72, 0xd9, 0xfe, 0x3c, 0x93, + 0xdd, 0xf3, 0xc4, 0x54, 0x5a, 0xb5, 0xa9, 0x9b }, + { 0xad, 0xe5, 0xea, 0xa7, 0xe6, 0x1f, 0x67, 0x2d, + 0x58, 0x7e, 0xa0, 0x3d, 0xae, 0x7d, 0x7b, 0x55, + 0x22, 0x9c, 0x01, 0xd0, 0x6b, 0xc0, 0xa5, 0x70, + 0x14, 0x36, 0xcb, 0xd1, 0x83, 0x66, 0xa6, 0x26 }, + { 0x01, 0x3b, 0x31, 0xeb, 0xd2, 0x28, 0xfc, 0xdd, + 0xa5, 0x1f, 0xab, 0xb0, 0x3b, 0xb0, 0x2d, 0x60, + 0xac, 0x20, 0xca, 0x21, 0x5a, 0xaf, 0xa8, 0x3b, + 0xdd, 0x85, 0x5e, 0x37, 0x55, 0xa3, 0x5f, 0x0b }, + { 0x33, 0x2e, 0xd4, 0x0b, 0xb1, 0x0d, 0xde, 0x3c, + 0x95, 0x4a, 0x75, 0xd7, 0xb8, 0x99, 0x9d, 0x4b, + 0x26, 0xa1, 0xc0, 0x63, 0xc1, 0xdc, 0x6e, 0x32, + 0xc1, 0xd9, 0x1b, 0xab, 0x7b, 0xbb, 0x7d, 0x16 }, + { 0xc7, 0xa1, 0x97, 0xb3, 0xa0, 0x5b, 0x56, 0x6b, + 0xcc, 0x9f, 0xac, 0xd2, 0x0e, 0x44, 0x1d, 0x6f, + 0x6c, 0x28, 0x60, 0xac, 0x96, 0x51, 0xcd, 0x51, + 0xd6, 0xb9, 0xd2, 0xcd, 0xee, 0xea, 0x03, 0x90 }, + { 0xbd, 0x9c, 0xf6, 0x4e, 0xa8, 0x95, 0x3c, 0x03, + 0x71, 0x08, 0xe6, 0xf6, 0x54, 0x91, 0x4f, 0x39, + 0x58, 0xb6, 0x8e, 0x29, 0xc1, 0x67, 0x00, 0xdc, + 0x18, 0x4d, 0x94, 0xa2, 0x17, 0x08, 0xff, 0x60 }, + { 0x88, 0x35, 0xb0, 0xac, 0x02, 0x11, 0x51, 0xdf, + 0x71, 0x64, 0x74, 0xce, 0x27, 0xce, 0x4d, 0x3c, + 0x15, 0xf0, 0xb2, 0xda, 0xb4, 0x80, 0x03, 0xcf, + 0x3f, 0x3e, 0xfd, 0x09, 0x45, 0x10, 0x6b, 0x9a }, + { 0x3b, 0xfe, 0xfa, 0x33, 0x01, 0xaa, 0x55, 0xc0, + 0x80, 0x19, 0x0c, 0xff, 0xda, 0x8e, 0xae, 0x51, + 0xd9, 0xaf, 0x48, 0x8b, 0x4c, 0x1f, 0x24, 0xc3, + 0xd9, 0xa7, 0x52, 0x42, 0xfd, 0x8e, 0xa0, 0x1d }, + { 0x08, 0x28, 0x4d, 0x14, 0x99, 0x3c, 0xd4, 0x7d, + 0x53, 0xeb, 0xae, 0xcf, 0x0d, 0xf0, 0x47, 0x8c, + 0xc1, 0x82, 0xc8, 0x9c, 0x00, 0xe1, 0x85, 0x9c, + 0x84, 0x85, 0x16, 0x86, 0xdd, 0xf2, 0xc1, 0xb7 }, + { 0x1e, 0xd7, 0xef, 0x9f, 0x04, 0xc2, 0xac, 0x8d, + 0xb6, 0xa8, 0x64, 0xdb, 0x13, 0x10, 0x87, 0xf2, + 0x70, 0x65, 0x09, 0x8e, 0x69, 0xc3, 0xfe, 0x78, + 0x71, 0x8d, 0x9b, 0x94, 0x7f, 0x4a, 0x39, 0xd0 }, + { 0xc1, 0x61, 0xf2, 0xdc, 0xd5, 0x7e, 0x9c, 0x14, + 0x39, 0xb3, 0x1a, 0x9d, 0xd4, 0x3d, 0x8f, 0x3d, + 0x7d, 0xd8, 0xf0, 0xeb, 0x7c, 0xfa, 0xc6, 0xfb, + 0x25, 0xa0, 0xf2, 0x8e, 0x30, 0x6f, 0x06, 0x61 }, + { 0xc0, 0x19, 0x69, 0xad, 0x34, 0xc5, 0x2c, 0xaf, + 0x3d, 0xc4, 0xd8, 0x0d, 0x19, 0x73, 0x5c, 0x29, + 0x73, 0x1a, 0xc6, 0xe7, 0xa9, 0x20, 0x85, 0xab, + 0x92, 0x50, 0xc4, 0x8d, 0xea, 0x48, 0xa3, 0xfc }, + { 0x17, 0x20, 0xb3, 0x65, 0x56, 0x19, 0xd2, 0xa5, + 0x2b, 0x35, 0x21, 0xae, 0x0e, 0x49, 0xe3, 0x45, + 0xcb, 0x33, 0x89, 0xeb, 0xd6, 0x20, 0x8a, 0xca, + 0xf9, 0xf1, 0x3f, 0xda, 0xcc, 0xa8, 0xbe, 0x49 }, + { 0x75, 0x62, 0x88, 0x36, 0x1c, 0x83, 0xe2, 0x4c, + 0x61, 0x7c, 0xf9, 0x5c, 0x90, 0x5b, 0x22, 0xd0, + 0x17, 0xcd, 0xc8, 0x6f, 0x0b, 0xf1, 0xd6, 0x58, + 0xf4, 0x75, 0x6c, 0x73, 0x79, 0x87, 0x3b, 0x7f }, + { 0xe7, 0xd0, 0xed, 0xa3, 0x45, 0x26, 0x93, 0xb7, + 0x52, 0xab, 0xcd, 0xa1, 0xb5, 0x5e, 0x27, 0x6f, + 0x82, 0x69, 0x8f, 0x5f, 0x16, 0x05, 0x40, 0x3e, + 0xff, 0x83, 0x0b, 0xea, 0x00, 0x71, 0xa3, 0x94 }, + { 0x2c, 0x82, 0xec, 0xaa, 0x6b, 0x84, 0x80, 0x3e, + 0x04, 0x4a, 0xf6, 0x31, 0x18, 0xaf, 0xe5, 0x44, + 0x68, 0x7c, 0xb6, 0xe6, 0xc7, 0xdf, 0x49, 0xed, + 0x76, 0x2d, 0xfd, 0x7c, 0x86, 0x93, 0xa1, 0xbc }, + { 0x61, 0x36, 0xcb, 0xf4, 0xb4, 0x41, 0x05, 0x6f, + 0xa1, 0xe2, 0x72, 0x24, 0x98, 0x12, 0x5d, 0x6d, + 0xed, 0x45, 0xe1, 0x7b, 0x52, 0x14, 0x39, 0x59, + 0xc7, 0xf4, 0xd4, 0xe3, 0x95, 0x21, 0x8a, 0xc2 }, + { 0x72, 0x1d, 0x32, 0x45, 0xaa, 0xfe, 0xf2, 0x7f, + 0x6a, 0x62, 0x4f, 0x47, 0x95, 0x4b, 0x6c, 0x25, + 0x50, 0x79, 0x52, 0x6f, 0xfa, 0x25, 0xe9, 0xff, + 0x77, 0xe5, 0xdc, 0xff, 0x47, 0x3b, 0x15, 0x97 }, + { 0x9d, 0xd2, 0xfb, 0xd8, 0xce, 0xf1, 0x6c, 0x35, + 0x3c, 0x0a, 0xc2, 0x11, 0x91, 0xd5, 0x09, 0xeb, + 0x28, 0xdd, 0x9e, 0x3e, 0x0d, 0x8c, 0xea, 0x5d, + 0x26, 0xca, 0x83, 0x93, 0x93, 0x85, 0x1c, 0x3a }, + { 0xb2, 0x39, 0x4c, 0xea, 0xcd, 0xeb, 0xf2, 0x1b, + 0xf9, 0xdf, 0x2c, 0xed, 0x98, 0xe5, 0x8f, 0x1c, + 0x3a, 0x4b, 0xbb, 0xff, 0x66, 0x0d, 0xd9, 0x00, + 0xf6, 0x22, 0x02, 0xd6, 0x78, 0x5c, 0xc4, 0x6e }, + { 0x57, 0x08, 0x9f, 0x22, 0x27, 0x49, 0xad, 0x78, + 0x71, 0x76, 0x5f, 0x06, 0x2b, 0x11, 0x4f, 0x43, + 0xba, 0x20, 0xec, 0x56, 0x42, 0x2a, 0x8b, 0x1e, + 0x3f, 0x87, 0x19, 0x2c, 0x0e, 0xa7, 0x18, 0xc6 }, + { 0xe4, 0x9a, 0x94, 0x59, 0x96, 0x1c, 0xd3, 0x3c, + 0xdf, 0x4a, 0xae, 0x1b, 0x10, 0x78, 0xa5, 0xde, + 0xa7, 0xc0, 0x40, 0xe0, 0xfe, 0xa3, 0x40, 0xc9, + 0x3a, 0x72, 0x48, 0x72, 0xfc, 0x4a, 0xf8, 0x06 }, + { 0xed, 0xe6, 0x7f, 0x72, 0x0e, 0xff, 0xd2, 0xca, + 0x9c, 0x88, 0x99, 0x41, 0x52, 0xd0, 0x20, 0x1d, + 0xee, 0x6b, 0x0a, 0x2d, 0x2c, 0x07, 0x7a, 0xca, + 0x6d, 0xae, 0x29, 0xf7, 0x3f, 0x8b, 0x63, 0x09 }, + { 0xe0, 0xf4, 0x34, 0xbf, 0x22, 0xe3, 0x08, 0x80, + 0x39, 0xc2, 0x1f, 0x71, 0x9f, 0xfc, 0x67, 0xf0, + 0xf2, 0xcb, 0x5e, 0x98, 0xa7, 0xa0, 0x19, 0x4c, + 0x76, 0xe9, 0x6b, 0xf4, 0xe8, 0xe1, 0x7e, 0x61 }, + { 0x27, 0x7c, 0x04, 0xe2, 0x85, 0x34, 0x84, 0xa4, + 0xeb, 0xa9, 0x10, 0xad, 0x33, 0x6d, 0x01, 0xb4, + 0x77, 0xb6, 0x7c, 0xc2, 0x00, 0xc5, 0x9f, 0x3c, + 0x8d, 0x77, 0xee, 0xf8, 0x49, 0x4f, 0x29, 0xcd }, + { 0x15, 0x6d, 0x57, 0x47, 0xd0, 0xc9, 0x9c, 0x7f, + 0x27, 0x09, 0x7d, 0x7b, 0x7e, 0x00, 0x2b, 0x2e, + 0x18, 0x5c, 0xb7, 0x2d, 0x8d, 0xd7, 0xeb, 0x42, + 0x4a, 0x03, 0x21, 0x52, 0x81, 0x61, 0x21, 0x9f }, + { 0x20, 0xdd, 0xd1, 0xed, 0x9b, 0x1c, 0xa8, 0x03, + 0x94, 0x6d, 0x64, 0xa8, 0x3a, 0xe4, 0x65, 0x9d, + 0xa6, 0x7f, 0xba, 0x7a, 0x1a, 0x3e, 0xdd, 0xb1, + 0xe1, 0x03, 0xc0, 0xf5, 0xe0, 0x3e, 0x3a, 0x2c }, + { 0xf0, 0xaf, 0x60, 0x4d, 0x3d, 0xab, 0xbf, 0x9a, + 0x0f, 0x2a, 0x7d, 0x3d, 0xda, 0x6b, 0xd3, 0x8b, + 0xba, 0x72, 0xc6, 0xd0, 0x9b, 0xe4, 0x94, 0xfc, + 0xef, 0x71, 0x3f, 0xf1, 0x01, 0x89, 0xb6, 0xe6 }, + { 0x98, 0x02, 0xbb, 0x87, 0xde, 0xf4, 0xcc, 0x10, + 0xc4, 0xa5, 0xfd, 0x49, 0xaa, 0x58, 0xdf, 0xe2, + 0xf3, 0xfd, 0xdb, 0x46, 0xb4, 0x70, 0x88, 0x14, + 0xea, 0xd8, 0x1d, 0x23, 0xba, 0x95, 0x13, 0x9b }, + { 0x4f, 0x8c, 0xe1, 0xe5, 0x1d, 0x2f, 0xe7, 0xf2, + 0x40, 0x43, 0xa9, 0x04, 0xd8, 0x98, 0xeb, 0xfc, + 0x91, 0x97, 0x54, 0x18, 0x75, 0x34, 0x13, 0xaa, + 0x09, 0x9b, 0x79, 0x5e, 0xcb, 0x35, 0xce, 0xdb }, + { 0xbd, 0xdc, 0x65, 0x14, 0xd7, 0xee, 0x6a, 0xce, + 0x0a, 0x4a, 0xc1, 0xd0, 0xe0, 0x68, 0x11, 0x22, + 0x88, 0xcb, 0xcf, 0x56, 0x04, 0x54, 0x64, 0x27, + 0x05, 0x63, 0x01, 0x77, 0xcb, 0xa6, 0x08, 0xbd }, + { 0xd6, 0x35, 0x99, 0x4f, 0x62, 0x91, 0x51, 0x7b, + 0x02, 0x81, 0xff, 0xdd, 0x49, 0x6a, 0xfa, 0x86, + 0x27, 0x12, 0xe5, 0xb3, 0xc4, 0xe5, 0x2e, 0x4c, + 0xd5, 0xfd, 0xae, 0x8c, 0x0e, 0x72, 0xfb, 0x08 }, + { 0x87, 0x8d, 0x9c, 0xa6, 0x00, 0xcf, 0x87, 0xe7, + 0x69, 0xcc, 0x30, 0x5c, 0x1b, 0x35, 0x25, 0x51, + 0x86, 0x61, 0x5a, 0x73, 0xa0, 0xda, 0x61, 0x3b, + 0x5f, 0x1c, 0x98, 0xdb, 0xf8, 0x12, 0x83, 0xea }, + { 0xa6, 0x4e, 0xbe, 0x5d, 0xc1, 0x85, 0xde, 0x9f, + 0xdd, 0xe7, 0x60, 0x7b, 0x69, 0x98, 0x70, 0x2e, + 0xb2, 0x34, 0x56, 0x18, 0x49, 0x57, 0x30, 0x7d, + 0x2f, 0xa7, 0x2e, 0x87, 0xa4, 0x77, 0x02, 0xd6 }, + { 0xce, 0x50, 0xea, 0xb7, 0xb5, 0xeb, 0x52, 0xbd, + 0xc9, 0xad, 0x8e, 0x5a, 0x48, 0x0a, 0xb7, 0x80, + 0xca, 0x93, 0x20, 0xe4, 0x43, 0x60, 0xb1, 0xfe, + 0x37, 0xe0, 0x3f, 0x2f, 0x7a, 0xd7, 0xde, 0x01 }, + { 0xee, 0xdd, 0xb7, 0xc0, 0xdb, 0x6e, 0x30, 0xab, + 0xe6, 0x6d, 0x79, 0xe3, 0x27, 0x51, 0x1e, 0x61, + 0xfc, 0xeb, 0xbc, 0x29, 0xf1, 0x59, 0xb4, 0x0a, + 0x86, 0xb0, 0x46, 0xec, 0xf0, 0x51, 0x38, 0x23 }, + { 0x78, 0x7f, 0xc9, 0x34, 0x40, 0xc1, 0xec, 0x96, + 0xb5, 0xad, 0x01, 0xc1, 0x6c, 0xf7, 0x79, 0x16, + 0xa1, 0x40, 0x5f, 0x94, 0x26, 0x35, 0x6e, 0xc9, + 0x21, 0xd8, 0xdf, 0xf3, 0xea, 0x63, 0xb7, 0xe0 }, + { 0x7f, 0x0d, 0x5e, 0xab, 0x47, 0xee, 0xfd, 0xa6, + 0x96, 0xc0, 0xbf, 0x0f, 0xbf, 0x86, 0xab, 0x21, + 0x6f, 0xce, 0x46, 0x1e, 0x93, 0x03, 0xab, 0xa6, + 0xac, 0x37, 0x41, 0x20, 0xe8, 0x90, 0xe8, 0xdf }, + { 0xb6, 0x80, 0x04, 0xb4, 0x2f, 0x14, 0xad, 0x02, + 0x9f, 0x4c, 0x2e, 0x03, 0xb1, 0xd5, 0xeb, 0x76, + 0xd5, 0x71, 0x60, 0xe2, 0x64, 0x76, 0xd2, 0x11, + 0x31, 0xbe, 0xf2, 0x0a, 0xda, 0x7d, 0x27, 0xf4 }, + { 0xb0, 0xc4, 0xeb, 0x18, 0xae, 0x25, 0x0b, 0x51, + 0xa4, 0x13, 0x82, 0xea, 0xd9, 0x2d, 0x0d, 0xc7, + 0x45, 0x5f, 0x93, 0x79, 0xfc, 0x98, 0x84, 0x42, + 0x8e, 0x47, 0x70, 0x60, 0x8d, 0xb0, 0xfa, 0xec }, + { 0xf9, 0x2b, 0x7a, 0x87, 0x0c, 0x05, 0x9f, 0x4d, + 0x46, 0x46, 0x4c, 0x82, 0x4e, 0xc9, 0x63, 0x55, + 0x14, 0x0b, 0xdc, 0xe6, 0x81, 0x32, 0x2c, 0xc3, + 0xa9, 0x92, 0xff, 0x10, 0x3e, 0x3f, 0xea, 0x52 }, + { 0x53, 0x64, 0x31, 0x26, 0x14, 0x81, 0x33, 0x98, + 0xcc, 0x52, 0x5d, 0x4c, 0x4e, 0x14, 0x6e, 0xde, + 0xb3, 0x71, 0x26, 0x5f, 0xba, 0x19, 0x13, 0x3a, + 0x2c, 0x3d, 0x21, 0x59, 0x29, 0x8a, 0x17, 0x42 }, + { 0xf6, 0x62, 0x0e, 0x68, 0xd3, 0x7f, 0xb2, 0xaf, + 0x50, 0x00, 0xfc, 0x28, 0xe2, 0x3b, 0x83, 0x22, + 0x97, 0xec, 0xd8, 0xbc, 0xe9, 0x9e, 0x8b, 0xe4, + 0xd0, 0x4e, 0x85, 0x30, 0x9e, 0x3d, 0x33, 0x74 }, + { 0x53, 0x16, 0xa2, 0x79, 0x69, 0xd7, 0xfe, 0x04, + 0xff, 0x27, 0xb2, 0x83, 0x96, 0x1b, 0xff, 0xc3, + 0xbf, 0x5d, 0xfb, 0x32, 0xfb, 0x6a, 0x89, 0xd1, + 0x01, 0xc6, 0xc3, 0xb1, 0x93, 0x7c, 0x28, 0x71 }, + { 0x81, 0xd1, 0x66, 0x4f, 0xdf, 0x3c, 0xb3, 0x3c, + 0x24, 0xee, 0xba, 0xc0, 0xbd, 0x64, 0x24, 0x4b, + 0x77, 0xc4, 0xab, 0xea, 0x90, 0xbb, 0xe8, 0xb5, + 0xee, 0x0b, 0x2a, 0xaf, 0xcf, 0x2d, 0x6a, 0x53 }, + { 0x34, 0x57, 0x82, 0xf2, 0x95, 0xb0, 0x88, 0x03, + 0x52, 0xe9, 0x24, 0xa0, 0x46, 0x7b, 0x5f, 0xbc, + 0x3e, 0x8f, 0x3b, 0xfb, 0xc3, 0xc7, 0xe4, 0x8b, + 0x67, 0x09, 0x1f, 0xb5, 0xe8, 0x0a, 0x94, 0x42 }, + { 0x79, 0x41, 0x11, 0xea, 0x6c, 0xd6, 0x5e, 0x31, + 0x1f, 0x74, 0xee, 0x41, 0xd4, 0x76, 0xcb, 0x63, + 0x2c, 0xe1, 0xe4, 0xb0, 0x51, 0xdc, 0x1d, 0x9e, + 0x9d, 0x06, 0x1a, 0x19, 0xe1, 0xd0, 0xbb, 0x49 }, + { 0x2a, 0x85, 0xda, 0xf6, 0x13, 0x88, 0x16, 0xb9, + 0x9b, 0xf8, 0xd0, 0x8b, 0xa2, 0x11, 0x4b, 0x7a, + 0xb0, 0x79, 0x75, 0xa7, 0x84, 0x20, 0xc1, 0xa3, + 0xb0, 0x6a, 0x77, 0x7c, 0x22, 0xdd, 0x8b, 0xcb }, + { 0x89, 0xb0, 0xd5, 0xf2, 0x89, 0xec, 0x16, 0x40, + 0x1a, 0x06, 0x9a, 0x96, 0x0d, 0x0b, 0x09, 0x3e, + 0x62, 0x5d, 0xa3, 0xcf, 0x41, 0xee, 0x29, 0xb5, + 0x9b, 0x93, 0x0c, 0x58, 0x20, 0x14, 0x54, 0x55 }, + { 0xd0, 0xfd, 0xcb, 0x54, 0x39, 0x43, 0xfc, 0x27, + 0xd2, 0x08, 0x64, 0xf5, 0x21, 0x81, 0x47, 0x1b, + 0x94, 0x2c, 0xc7, 0x7c, 0xa6, 0x75, 0xbc, 0xb3, + 0x0d, 0xf3, 0x1d, 0x35, 0x8e, 0xf7, 0xb1, 0xeb }, + { 0xb1, 0x7e, 0xa8, 0xd7, 0x70, 0x63, 0xc7, 0x09, + 0xd4, 0xdc, 0x6b, 0x87, 0x94, 0x13, 0xc3, 0x43, + 0xe3, 0x79, 0x0e, 0x9e, 0x62, 0xca, 0x85, 0xb7, + 0x90, 0x0b, 0x08, 0x6f, 0x6b, 0x75, 0xc6, 0x72 }, + { 0xe7, 0x1a, 0x3e, 0x2c, 0x27, 0x4d, 0xb8, 0x42, + 0xd9, 0x21, 0x14, 0xf2, 0x17, 0xe2, 0xc0, 0xea, + 0xc8, 0xb4, 0x50, 0x93, 0xfd, 0xfd, 0x9d, 0xf4, + 0xca, 0x71, 0x62, 0x39, 0x48, 0x62, 0xd5, 0x01 }, + { 0xc0, 0x47, 0x67, 0x59, 0xab, 0x7a, 0xa3, 0x33, + 0x23, 0x4f, 0x6b, 0x44, 0xf5, 0xfd, 0x85, 0x83, + 0x90, 0xec, 0x23, 0x69, 0x4c, 0x62, 0x2c, 0xb9, + 0x86, 0xe7, 0x69, 0xc7, 0x8e, 0xdd, 0x73, 0x3e }, + { 0x9a, 0xb8, 0xea, 0xbb, 0x14, 0x16, 0x43, 0x4d, + 0x85, 0x39, 0x13, 0x41, 0xd5, 0x69, 0x93, 0xc5, + 0x54, 0x58, 0x16, 0x7d, 0x44, 0x18, 0xb1, 0x9a, + 0x0f, 0x2a, 0xd8, 0xb7, 0x9a, 0x83, 0xa7, 0x5b }, + { 0x79, 0x92, 0xd0, 0xbb, 0xb1, 0x5e, 0x23, 0x82, + 0x6f, 0x44, 0x3e, 0x00, 0x50, 0x5d, 0x68, 0xd3, + 0xed, 0x73, 0x72, 0x99, 0x5a, 0x5c, 0x3e, 0x49, + 0x86, 0x54, 0x10, 0x2f, 0xbc, 0xd0, 0x96, 0x4e }, + { 0xc0, 0x21, 0xb3, 0x00, 0x85, 0x15, 0x14, 0x35, + 0xdf, 0x33, 0xb0, 0x07, 0xcc, 0xec, 0xc6, 0x9d, + 0xf1, 0x26, 0x9f, 0x39, 0xba, 0x25, 0x09, 0x2b, + 0xed, 0x59, 0xd9, 0x32, 0xac, 0x0f, 0xdc, 0x28 }, + { 0x91, 0xa2, 0x5e, 0xc0, 0xec, 0x0d, 0x9a, 0x56, + 0x7f, 0x89, 0xc4, 0xbf, 0xe1, 0xa6, 0x5a, 0x0e, + 0x43, 0x2d, 0x07, 0x06, 0x4b, 0x41, 0x90, 0xe2, + 0x7d, 0xfb, 0x81, 0x90, 0x1f, 0xd3, 0x13, 0x9b }, + { 0x59, 0x50, 0xd3, 0x9a, 0x23, 0xe1, 0x54, 0x5f, + 0x30, 0x12, 0x70, 0xaa, 0x1a, 0x12, 0xf2, 0xe6, + 0xc4, 0x53, 0x77, 0x6e, 0x4d, 0x63, 0x55, 0xde, + 0x42, 0x5c, 0xc1, 0x53, 0xf9, 0x81, 0x88, 0x67 }, + { 0xd7, 0x9f, 0x14, 0x72, 0x0c, 0x61, 0x0a, 0xf1, + 0x79, 0xa3, 0x76, 0x5d, 0x4b, 0x7c, 0x09, 0x68, + 0xf9, 0x77, 0x96, 0x2d, 0xbf, 0x65, 0x5b, 0x52, + 0x12, 0x72, 0xb6, 0xf1, 0xe1, 0x94, 0x48, 0x8e }, + { 0xe9, 0x53, 0x1b, 0xfc, 0x8b, 0x02, 0x99, 0x5a, + 0xea, 0xa7, 0x5b, 0xa2, 0x70, 0x31, 0xfa, 0xdb, + 0xcb, 0xf4, 0xa0, 0xda, 0xb8, 0x96, 0x1d, 0x92, + 0x96, 0xcd, 0x7e, 0x84, 0xd2, 0x5d, 0x60, 0x06 }, + { 0x34, 0xe9, 0xc2, 0x6a, 0x01, 0xd7, 0xf1, 0x61, + 0x81, 0xb4, 0x54, 0xa9, 0xd1, 0x62, 0x3c, 0x23, + 0x3c, 0xb9, 0x9d, 0x31, 0xc6, 0x94, 0x65, 0x6e, + 0x94, 0x13, 0xac, 0xa3, 0xe9, 0x18, 0x69, 0x2f }, + { 0xd9, 0xd7, 0x42, 0x2f, 0x43, 0x7b, 0xd4, 0x39, + 0xdd, 0xd4, 0xd8, 0x83, 0xda, 0xe2, 0xa0, 0x83, + 0x50, 0x17, 0x34, 0x14, 0xbe, 0x78, 0x15, 0x51, + 0x33, 0xff, 0xf1, 0x96, 0x4c, 0x3d, 0x79, 0x72 }, + { 0x4a, 0xee, 0x0c, 0x7a, 0xaf, 0x07, 0x54, 0x14, + 0xff, 0x17, 0x93, 0xea, 0xd7, 0xea, 0xca, 0x60, + 0x17, 0x75, 0xc6, 0x15, 0xdb, 0xd6, 0x0b, 0x64, + 0x0b, 0x0a, 0x9f, 0x0c, 0xe5, 0x05, 0xd4, 0x35 }, + { 0x6b, 0xfd, 0xd1, 0x54, 0x59, 0xc8, 0x3b, 0x99, + 0xf0, 0x96, 0xbf, 0xb4, 0x9e, 0xe8, 0x7b, 0x06, + 0x3d, 0x69, 0xc1, 0x97, 0x4c, 0x69, 0x28, 0xac, + 0xfc, 0xfb, 0x40, 0x99, 0xf8, 0xc4, 0xef, 0x67 }, + { 0x9f, 0xd1, 0xc4, 0x08, 0xfd, 0x75, 0xc3, 0x36, + 0x19, 0x3a, 0x2a, 0x14, 0xd9, 0x4f, 0x6a, 0xf5, + 0xad, 0xf0, 0x50, 0xb8, 0x03, 0x87, 0xb4, 0xb0, + 0x10, 0xfb, 0x29, 0xf4, 0xcc, 0x72, 0x70, 0x7c }, + { 0x13, 0xc8, 0x84, 0x80, 0xa5, 0xd0, 0x0d, 0x6c, + 0x8c, 0x7a, 0xd2, 0x11, 0x0d, 0x76, 0xa8, 0x2d, + 0x9b, 0x70, 0xf4, 0xfa, 0x66, 0x96, 0xd4, 0xe5, + 0xdd, 0x42, 0xa0, 0x66, 0xdc, 0xaf, 0x99, 0x20 }, + { 0x82, 0x0e, 0x72, 0x5e, 0xe2, 0x5f, 0xe8, 0xfd, + 0x3a, 0x8d, 0x5a, 0xbe, 0x4c, 0x46, 0xc3, 0xba, + 0x88, 0x9d, 0xe6, 0xfa, 0x91, 0x91, 0xaa, 0x22, + 0xba, 0x67, 0xd5, 0x70, 0x54, 0x21, 0x54, 0x2b }, + { 0x32, 0xd9, 0x3a, 0x0e, 0xb0, 0x2f, 0x42, 0xfb, + 0xbc, 0xaf, 0x2b, 0xad, 0x00, 0x85, 0xb2, 0x82, + 0xe4, 0x60, 0x46, 0xa4, 0xdf, 0x7a, 0xd1, 0x06, + 0x57, 0xc9, 0xd6, 0x47, 0x63, 0x75, 0xb9, 0x3e }, + { 0xad, 0xc5, 0x18, 0x79, 0x05, 0xb1, 0x66, 0x9c, + 0xd8, 0xec, 0x9c, 0x72, 0x1e, 0x19, 0x53, 0x78, + 0x6b, 0x9d, 0x89, 0xa9, 0xba, 0xe3, 0x07, 0x80, + 0xf1, 0xe1, 0xea, 0xb2, 0x4a, 0x00, 0x52, 0x3c }, + { 0xe9, 0x07, 0x56, 0xff, 0x7f, 0x9a, 0xd8, 0x10, + 0xb2, 0x39, 0xa1, 0x0c, 0xed, 0x2c, 0xf9, 0xb2, + 0x28, 0x43, 0x54, 0xc1, 0xf8, 0xc7, 0xe0, 0xac, + 0xcc, 0x24, 0x61, 0xdc, 0x79, 0x6d, 0x6e, 0x89 }, + { 0x12, 0x51, 0xf7, 0x6e, 0x56, 0x97, 0x84, 0x81, + 0x87, 0x53, 0x59, 0x80, 0x1d, 0xb5, 0x89, 0xa0, + 0xb2, 0x2f, 0x86, 0xd8, 0xd6, 0x34, 0xdc, 0x04, + 0x50, 0x6f, 0x32, 0x2e, 0xd7, 0x8f, 0x17, 0xe8 }, + { 0x3a, 0xfa, 0x89, 0x9f, 0xd9, 0x80, 0xe7, 0x3e, + 0xcb, 0x7f, 0x4d, 0x8b, 0x8f, 0x29, 0x1d, 0xc9, + 0xaf, 0x79, 0x6b, 0xc6, 0x5d, 0x27, 0xf9, 0x74, + 0xc6, 0xf1, 0x93, 0xc9, 0x19, 0x1a, 0x09, 0xfd }, + { 0xaa, 0x30, 0x5b, 0xe2, 0x6e, 0x5d, 0xed, 0xdc, + 0x3c, 0x10, 0x10, 0xcb, 0xc2, 0x13, 0xf9, 0x5f, + 0x05, 0x1c, 0x78, 0x5c, 0x5b, 0x43, 0x1e, 0x6a, + 0x7c, 0xd0, 0x48, 0xf1, 0x61, 0x78, 0x75, 0x28 }, + { 0x8e, 0xa1, 0x88, 0x4f, 0xf3, 0x2e, 0x9d, 0x10, + 0xf0, 0x39, 0xb4, 0x07, 0xd0, 0xd4, 0x4e, 0x7e, + 0x67, 0x0a, 0xbd, 0x88, 0x4a, 0xee, 0xe0, 0xfb, + 0x75, 0x7a, 0xe9, 0x4e, 0xaa, 0x97, 0x37, 0x3d }, + { 0xd4, 0x82, 0xb2, 0x15, 0x5d, 0x4d, 0xec, 0x6b, + 0x47, 0x36, 0xa1, 0xf1, 0x61, 0x7b, 0x53, 0xaa, + 0xa3, 0x73, 0x10, 0x27, 0x7d, 0x3f, 0xef, 0x0c, + 0x37, 0xad, 0x41, 0x76, 0x8f, 0xc2, 0x35, 0xb4 }, + { 0x4d, 0x41, 0x39, 0x71, 0x38, 0x7e, 0x7a, 0x88, + 0x98, 0xa8, 0xdc, 0x2a, 0x27, 0x50, 0x07, 0x78, + 0x53, 0x9e, 0xa2, 0x14, 0xa2, 0xdf, 0xe9, 0xb3, + 0xd7, 0xe8, 0xeb, 0xdc, 0xe5, 0xcf, 0x3d, 0xb3 }, + { 0x69, 0x6e, 0x5d, 0x46, 0xe6, 0xc5, 0x7e, 0x87, + 0x96, 0xe4, 0x73, 0x5d, 0x08, 0x91, 0x6e, 0x0b, + 0x79, 0x29, 0xb3, 0xcf, 0x29, 0x8c, 0x29, 0x6d, + 0x22, 0xe9, 0xd3, 0x01, 0x96, 0x53, 0x37, 0x1c }, + { 0x1f, 0x56, 0x47, 0xc1, 0xd3, 0xb0, 0x88, 0x22, + 0x88, 0x85, 0x86, 0x5c, 0x89, 0x40, 0x90, 0x8b, + 0xf4, 0x0d, 0x1a, 0x82, 0x72, 0x82, 0x19, 0x73, + 0xb1, 0x60, 0x00, 0x8e, 0x7a, 0x3c, 0xe2, 0xeb }, + { 0xb6, 0xe7, 0x6c, 0x33, 0x0f, 0x02, 0x1a, 0x5b, + 0xda, 0x65, 0x87, 0x50, 0x10, 0xb0, 0xed, 0xf0, + 0x91, 0x26, 0xc0, 0xf5, 0x10, 0xea, 0x84, 0x90, + 0x48, 0x19, 0x20, 0x03, 0xae, 0xf4, 0xc6, 0x1c }, + { 0x3c, 0xd9, 0x52, 0xa0, 0xbe, 0xad, 0xa4, 0x1a, + 0xbb, 0x42, 0x4c, 0xe4, 0x7f, 0x94, 0xb4, 0x2b, + 0xe6, 0x4e, 0x1f, 0xfb, 0x0f, 0xd0, 0x78, 0x22, + 0x76, 0x80, 0x79, 0x46, 0xd0, 0xd0, 0xbc, 0x55 }, + { 0x98, 0xd9, 0x26, 0x77, 0x43, 0x9b, 0x41, 0xb7, + 0xbb, 0x51, 0x33, 0x12, 0xaf, 0xb9, 0x2b, 0xcc, + 0x8e, 0xe9, 0x68, 0xb2, 0xe3, 0xb2, 0x38, 0xce, + 0xcb, 0x9b, 0x0f, 0x34, 0xc9, 0xbb, 0x63, 0xd0 }, + { 0xec, 0xbc, 0xa2, 0xcf, 0x08, 0xae, 0x57, 0xd5, + 0x17, 0xad, 0x16, 0x15, 0x8a, 0x32, 0xbf, 0xa7, + 0xdc, 0x03, 0x82, 0xea, 0xed, 0xa1, 0x28, 0xe9, + 0x18, 0x86, 0x73, 0x4c, 0x24, 0xa0, 0xb2, 0x9d }, + { 0x94, 0x2c, 0xc7, 0xc0, 0xb5, 0x2e, 0x2b, 0x16, + 0xa4, 0xb8, 0x9f, 0xa4, 0xfc, 0x7e, 0x0b, 0xf6, + 0x09, 0xe2, 0x9a, 0x08, 0xc1, 0xa8, 0x54, 0x34, + 0x52, 0xb7, 0x7c, 0x7b, 0xfd, 0x11, 0xbb, 0x28 }, + { 0x8a, 0x06, 0x5d, 0x8b, 0x61, 0xa0, 0xdf, 0xfb, + 0x17, 0x0d, 0x56, 0x27, 0x73, 0x5a, 0x76, 0xb0, + 0xe9, 0x50, 0x60, 0x37, 0x80, 0x8c, 0xba, 0x16, + 0xc3, 0x45, 0x00, 0x7c, 0x9f, 0x79, 0xcf, 0x8f }, + { 0x1b, 0x9f, 0xa1, 0x97, 0x14, 0x65, 0x9c, 0x78, + 0xff, 0x41, 0x38, 0x71, 0x84, 0x92, 0x15, 0x36, + 0x10, 0x29, 0xac, 0x80, 0x2b, 0x1c, 0xbc, 0xd5, + 0x4e, 0x40, 0x8b, 0xd8, 0x72, 0x87, 0xf8, 0x1f }, + { 0x8d, 0xab, 0x07, 0x1b, 0xcd, 0x6c, 0x72, 0x92, + 0xa9, 0xef, 0x72, 0x7b, 0x4a, 0xe0, 0xd8, 0x67, + 0x13, 0x30, 0x1d, 0xa8, 0x61, 0x8d, 0x9a, 0x48, + 0xad, 0xce, 0x55, 0xf3, 0x03, 0xa8, 0x69, 0xa1 }, + { 0x82, 0x53, 0xe3, 0xe7, 0xc7, 0xb6, 0x84, 0xb9, + 0xcb, 0x2b, 0xeb, 0x01, 0x4c, 0xe3, 0x30, 0xff, + 0x3d, 0x99, 0xd1, 0x7a, 0xbb, 0xdb, 0xab, 0xe4, + 0xf4, 0xd6, 0x74, 0xde, 0xd5, 0x3f, 0xfc, 0x6b }, + { 0xf1, 0x95, 0xf3, 0x21, 0xe9, 0xe3, 0xd6, 0xbd, + 0x7d, 0x07, 0x45, 0x04, 0xdd, 0x2a, 0xb0, 0xe6, + 0x24, 0x1f, 0x92, 0xe7, 0x84, 0xb1, 0xaa, 0x27, + 0x1f, 0xf6, 0x48, 0xb1, 0xca, 0xb6, 0xd7, 0xf6 }, + { 0x27, 0xe4, 0xcc, 0x72, 0x09, 0x0f, 0x24, 0x12, + 0x66, 0x47, 0x6a, 0x7c, 0x09, 0x49, 0x5f, 0x2d, + 0xb1, 0x53, 0xd5, 0xbc, 0xbd, 0x76, 0x19, 0x03, + 0xef, 0x79, 0x27, 0x5e, 0xc5, 0x6b, 0x2e, 0xd8 }, + { 0x89, 0x9c, 0x24, 0x05, 0x78, 0x8e, 0x25, 0xb9, + 0x9a, 0x18, 0x46, 0x35, 0x5e, 0x64, 0x6d, 0x77, + 0xcf, 0x40, 0x00, 0x83, 0x41, 0x5f, 0x7d, 0xc5, + 0xaf, 0xe6, 0x9d, 0x6e, 0x17, 0xc0, 0x00, 0x23 }, + { 0xa5, 0x9b, 0x78, 0xc4, 0x90, 0x57, 0x44, 0x07, + 0x6b, 0xfe, 0xe8, 0x94, 0xde, 0x70, 0x7d, 0x4f, + 0x12, 0x0b, 0x5c, 0x68, 0x93, 0xea, 0x04, 0x00, + 0x29, 0x7d, 0x0b, 0xb8, 0x34, 0x72, 0x76, 0x32 }, + { 0x59, 0xdc, 0x78, 0xb1, 0x05, 0x64, 0x97, 0x07, + 0xa2, 0xbb, 0x44, 0x19, 0xc4, 0x8f, 0x00, 0x54, + 0x00, 0xd3, 0x97, 0x3d, 0xe3, 0x73, 0x66, 0x10, + 0x23, 0x04, 0x35, 0xb1, 0x04, 0x24, 0xb2, 0x4f }, + { 0xc0, 0x14, 0x9d, 0x1d, 0x7e, 0x7a, 0x63, 0x53, + 0xa6, 0xd9, 0x06, 0xef, 0xe7, 0x28, 0xf2, 0xf3, + 0x29, 0xfe, 0x14, 0xa4, 0x14, 0x9a, 0x3e, 0xa7, + 0x76, 0x09, 0xbc, 0x42, 0xb9, 0x75, 0xdd, 0xfa }, + { 0xa3, 0x2f, 0x24, 0x14, 0x74, 0xa6, 0xc1, 0x69, + 0x32, 0xe9, 0x24, 0x3b, 0xe0, 0xcf, 0x09, 0xbc, + 0xdc, 0x7e, 0x0c, 0xa0, 0xe7, 0xa6, 0xa1, 0xb9, + 0xb1, 0xa0, 0xf0, 0x1e, 0x41, 0x50, 0x23, 0x77 }, + { 0xb2, 0x39, 0xb2, 0xe4, 0xf8, 0x18, 0x41, 0x36, + 0x1c, 0x13, 0x39, 0xf6, 0x8e, 0x2c, 0x35, 0x9f, + 0x92, 0x9a, 0xf9, 0xad, 0x9f, 0x34, 0xe0, 0x1a, + 0xab, 0x46, 0x31, 0xad, 0x6d, 0x55, 0x00, 0xb0 }, + { 0x85, 0xfb, 0x41, 0x9c, 0x70, 0x02, 0xa3, 0xe0, + 0xb4, 0xb6, 0xea, 0x09, 0x3b, 0x4c, 0x1a, 0xc6, + 0x93, 0x66, 0x45, 0xb6, 0x5d, 0xac, 0x5a, 0xc1, + 0x5a, 0x85, 0x28, 0xb7, 0xb9, 0x4c, 0x17, 0x54 }, + { 0x96, 0x19, 0x72, 0x06, 0x25, 0xf1, 0x90, 0xb9, + 0x3a, 0x3f, 0xad, 0x18, 0x6a, 0xb3, 0x14, 0x18, + 0x96, 0x33, 0xc0, 0xd3, 0xa0, 0x1e, 0x6f, 0x9b, + 0xc8, 0xc4, 0xa8, 0xf8, 0x2f, 0x38, 0x3d, 0xbf }, + { 0x7d, 0x62, 0x0d, 0x90, 0xfe, 0x69, 0xfa, 0x46, + 0x9a, 0x65, 0x38, 0x38, 0x89, 0x70, 0xa1, 0xaa, + 0x09, 0xbb, 0x48, 0xa2, 0xd5, 0x9b, 0x34, 0x7b, + 0x97, 0xe8, 0xce, 0x71, 0xf4, 0x8c, 0x7f, 0x46 }, + { 0x29, 0x43, 0x83, 0x56, 0x85, 0x96, 0xfb, 0x37, + 0xc7, 0x5b, 0xba, 0xcd, 0x97, 0x9c, 0x5f, 0xf6, + 0xf2, 0x0a, 0x55, 0x6b, 0xf8, 0x87, 0x9c, 0xc7, + 0x29, 0x24, 0x85, 0x5d, 0xf9, 0xb8, 0x24, 0x0e }, + { 0x16, 0xb1, 0x8a, 0xb3, 0x14, 0x35, 0x9c, 0x2b, + 0x83, 0x3c, 0x1c, 0x69, 0x86, 0xd4, 0x8c, 0x55, + 0xa9, 0xfc, 0x97, 0xcd, 0xe9, 0xa3, 0xc1, 0xf1, + 0x0a, 0x31, 0x77, 0x14, 0x0f, 0x73, 0xf7, 0x38 }, + { 0x8c, 0xbb, 0xdd, 0x14, 0xbc, 0x33, 0xf0, 0x4c, + 0xf4, 0x58, 0x13, 0xe4, 0xa1, 0x53, 0xa2, 0x73, + 0xd3, 0x6a, 0xda, 0xd5, 0xce, 0x71, 0xf4, 0x99, + 0xee, 0xb8, 0x7f, 0xb8, 0xac, 0x63, 0xb7, 0x29 }, + { 0x69, 0xc9, 0xa4, 0x98, 0xdb, 0x17, 0x4e, 0xca, + 0xef, 0xcc, 0x5a, 0x3a, 0xc9, 0xfd, 0xed, 0xf0, + 0xf8, 0x13, 0xa5, 0xbe, 0xc7, 0x27, 0xf1, 0xe7, + 0x75, 0xba, 0xbd, 0xec, 0x77, 0x18, 0x81, 0x6e }, + { 0xb4, 0x62, 0xc3, 0xbe, 0x40, 0x44, 0x8f, 0x1d, + 0x4f, 0x80, 0x62, 0x62, 0x54, 0xe5, 0x35, 0xb0, + 0x8b, 0xc9, 0xcd, 0xcf, 0xf5, 0x99, 0xa7, 0x68, + 0x57, 0x8d, 0x4b, 0x28, 0x81, 0xa8, 0xe3, 0xf0 }, + { 0x55, 0x3e, 0x9d, 0x9c, 0x5f, 0x36, 0x0a, 0xc0, + 0xb7, 0x4a, 0x7d, 0x44, 0xe5, 0xa3, 0x91, 0xda, + 0xd4, 0xce, 0xd0, 0x3e, 0x0c, 0x24, 0x18, 0x3b, + 0x7e, 0x8e, 0xca, 0xbd, 0xf1, 0x71, 0x5a, 0x64 }, + { 0x7a, 0x7c, 0x55, 0xa5, 0x6f, 0xa9, 0xae, 0x51, + 0xe6, 0x55, 0xe0, 0x19, 0x75, 0xd8, 0xa6, 0xff, + 0x4a, 0xe9, 0xe4, 0xb4, 0x86, 0xfc, 0xbe, 0x4e, + 0xac, 0x04, 0x45, 0x88, 0xf2, 0x45, 0xeb, 0xea }, + { 0x2a, 0xfd, 0xf3, 0xc8, 0x2a, 0xbc, 0x48, 0x67, + 0xf5, 0xde, 0x11, 0x12, 0x86, 0xc2, 0xb3, 0xbe, + 0x7d, 0x6e, 0x48, 0x65, 0x7b, 0xa9, 0x23, 0xcf, + 0xbf, 0x10, 0x1a, 0x6d, 0xfc, 0xf9, 0xdb, 0x9a }, + { 0x41, 0x03, 0x7d, 0x2e, 0xdc, 0xdc, 0xe0, 0xc4, + 0x9b, 0x7f, 0xb4, 0xa6, 0xaa, 0x09, 0x99, 0xca, + 0x66, 0x97, 0x6c, 0x74, 0x83, 0xaf, 0xe6, 0x31, + 0xd4, 0xed, 0xa2, 0x83, 0x14, 0x4f, 0x6d, 0xfc }, + { 0xc4, 0x46, 0x6f, 0x84, 0x97, 0xca, 0x2e, 0xeb, + 0x45, 0x83, 0xa0, 0xb0, 0x8e, 0x9d, 0x9a, 0xc7, + 0x43, 0x95, 0x70, 0x9f, 0xda, 0x10, 0x9d, 0x24, + 0xf2, 0xe4, 0x46, 0x21, 0x96, 0x77, 0x9c, 0x5d }, + { 0x75, 0xf6, 0x09, 0x33, 0x8a, 0xa6, 0x7d, 0x96, + 0x9a, 0x2a, 0xe2, 0xa2, 0x36, 0x2b, 0x2d, 0xa9, + 0xd7, 0x7c, 0x69, 0x5d, 0xfd, 0x1d, 0xf7, 0x22, + 0x4a, 0x69, 0x01, 0xdb, 0x93, 0x2c, 0x33, 0x64 }, + { 0x68, 0x60, 0x6c, 0xeb, 0x98, 0x9d, 0x54, 0x88, + 0xfc, 0x7c, 0xf6, 0x49, 0xf3, 0xd7, 0xc2, 0x72, + 0xef, 0x05, 0x5d, 0xa1, 0xa9, 0x3f, 0xae, 0xcd, + 0x55, 0xfe, 0x06, 0xf6, 0x96, 0x70, 0x98, 0xca }, + { 0x44, 0x34, 0x6b, 0xde, 0xb7, 0xe0, 0x52, 0xf6, + 0x25, 0x50, 0x48, 0xf0, 0xd9, 0xb4, 0x2c, 0x42, + 0x5b, 0xab, 0x9c, 0x3d, 0xd2, 0x41, 0x68, 0x21, + 0x2c, 0x3e, 0xcf, 0x1e, 0xbf, 0x34, 0xe6, 0xae }, + { 0x8e, 0x9c, 0xf6, 0xe1, 0xf3, 0x66, 0x47, 0x1f, + 0x2a, 0xc7, 0xd2, 0xee, 0x9b, 0x5e, 0x62, 0x66, + 0xfd, 0xa7, 0x1f, 0x8f, 0x2e, 0x41, 0x09, 0xf2, + 0x23, 0x7e, 0xd5, 0xf8, 0x81, 0x3f, 0xc7, 0x18 }, + { 0x84, 0xbb, 0xeb, 0x84, 0x06, 0xd2, 0x50, 0x95, + 0x1f, 0x8c, 0x1b, 0x3e, 0x86, 0xa7, 0xc0, 0x10, + 0x08, 0x29, 0x21, 0x83, 0x3d, 0xfd, 0x95, 0x55, + 0xa2, 0xf9, 0x09, 0xb1, 0x08, 0x6e, 0xb4, 0xb8 }, + { 0xee, 0x66, 0x6f, 0x3e, 0xef, 0x0f, 0x7e, 0x2a, + 0x9c, 0x22, 0x29, 0x58, 0xc9, 0x7e, 0xaf, 0x35, + 0xf5, 0x1c, 0xed, 0x39, 0x3d, 0x71, 0x44, 0x85, + 0xab, 0x09, 0xa0, 0x69, 0x34, 0x0f, 0xdf, 0x88 }, + { 0xc1, 0x53, 0xd3, 0x4a, 0x65, 0xc4, 0x7b, 0x4a, + 0x62, 0xc5, 0xca, 0xcf, 0x24, 0x01, 0x09, 0x75, + 0xd0, 0x35, 0x6b, 0x2f, 0x32, 0xc8, 0xf5, 0xda, + 0x53, 0x0d, 0x33, 0x88, 0x16, 0xad, 0x5d, 0xe6 }, + { 0x9f, 0xc5, 0x45, 0x01, 0x09, 0xe1, 0xb7, 0x79, + 0xf6, 0xc7, 0xae, 0x79, 0xd5, 0x6c, 0x27, 0x63, + 0x5c, 0x8d, 0xd4, 0x26, 0xc5, 0xa9, 0xd5, 0x4e, + 0x25, 0x78, 0xdb, 0x98, 0x9b, 0x8c, 0x3b, 0x4e }, + { 0xd1, 0x2b, 0xf3, 0x73, 0x2e, 0xf4, 0xaf, 0x5c, + 0x22, 0xfa, 0x90, 0x35, 0x6a, 0xf8, 0xfc, 0x50, + 0xfc, 0xb4, 0x0f, 0x8f, 0x2e, 0xa5, 0xc8, 0x59, + 0x47, 0x37, 0xa3, 0xb3, 0xd5, 0xab, 0xdb, 0xd7 }, + { 0x11, 0x03, 0x0b, 0x92, 0x89, 0xbb, 0xa5, 0xaf, + 0x65, 0x26, 0x06, 0x72, 0xab, 0x6f, 0xee, 0x88, + 0xb8, 0x74, 0x20, 0xac, 0xef, 0x4a, 0x17, 0x89, + 0xa2, 0x07, 0x3b, 0x7e, 0xc2, 0xf2, 0xa0, 0x9e }, + { 0x69, 0xcb, 0x19, 0x2b, 0x84, 0x44, 0x00, 0x5c, + 0x8c, 0x0c, 0xeb, 0x12, 0xc8, 0x46, 0x86, 0x07, + 0x68, 0x18, 0x8c, 0xda, 0x0a, 0xec, 0x27, 0xa9, + 0xc8, 0xa5, 0x5c, 0xde, 0xe2, 0x12, 0x36, 0x32 }, + { 0xdb, 0x44, 0x4c, 0x15, 0x59, 0x7b, 0x5f, 0x1a, + 0x03, 0xd1, 0xf9, 0xed, 0xd1, 0x6e, 0x4a, 0x9f, + 0x43, 0xa6, 0x67, 0xcc, 0x27, 0x51, 0x75, 0xdf, + 0xa2, 0xb7, 0x04, 0xe3, 0xbb, 0x1a, 0x9b, 0x83 }, + { 0x3f, 0xb7, 0x35, 0x06, 0x1a, 0xbc, 0x51, 0x9d, + 0xfe, 0x97, 0x9e, 0x54, 0xc1, 0xee, 0x5b, 0xfa, + 0xd0, 0xa9, 0xd8, 0x58, 0xb3, 0x31, 0x5b, 0xad, + 0x34, 0xbd, 0xe9, 0x99, 0xef, 0xd7, 0x24, 0xdd } +}; + +static bool __init blake2s_selftest(void) +{ + u8 key[BLAKE2S_KEY_SIZE]; + u8 buf[ARRAY_SIZE(blake2s_testvecs)]; + u8 hash[BLAKE2S_HASH_SIZE]; + size_t i; + bool success = true; + + for (i = 0; i < BLAKE2S_KEY_SIZE; ++i) + key[i] = (u8)i; + + for (i = 0; i < ARRAY_SIZE(blake2s_testvecs); ++i) + buf[i] = (u8)i; + + for (i = 0; i < ARRAY_SIZE(blake2s_keyed_testvecs); ++i) { + blake2s(hash, buf, key, BLAKE2S_HASH_SIZE, i, BLAKE2S_KEY_SIZE); + if (memcmp(hash, blake2s_keyed_testvecs[i], BLAKE2S_HASH_SIZE)) { + pr_err("blake2s keyed self-test %zu: FAIL\n", i + 1); + success = false; + } + } + + for (i = 0; i < ARRAY_SIZE(blake2s_testvecs); ++i) { + blake2s(hash, buf, NULL, BLAKE2S_HASH_SIZE, i, 0); + if (memcmp(hash, blake2s_testvecs[i], BLAKE2S_HASH_SIZE)) { + pr_err("blake2s unkeyed self-test %zu: FAIL\n", i + i); + success = false; + } + } + return success; +} diff --git a/net/wireguard/crypto/zinc/selftest/chacha20.c b/net/wireguard/crypto/zinc/selftest/chacha20.c new file mode 100644 index 000000000000..1a2390aaf6c2 --- /dev/null +++ b/net/wireguard/crypto/zinc/selftest/chacha20.c @@ -0,0 +1,2698 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +struct chacha20_testvec { + const u8 *input, *output, *key; + u64 nonce; + size_t ilen; +}; + +struct hchacha20_testvec { + u8 key[HCHACHA20_KEY_SIZE]; + u8 nonce[HCHACHA20_NONCE_SIZE]; + u8 output[CHACHA20_KEY_SIZE]; +}; + +/* These test vectors are generated by reference implementations and are + * designed to check chacha20 implementation block handling, as well as from + * the draft-arciszewski-xchacha-01 document. + */ + +static const u8 input01[] __initconst = { }; +static const u8 output01[] __initconst = { }; +static const u8 key01[] __initconst = { + 0x09, 0xf4, 0xe8, 0x57, 0x10, 0xf2, 0x12, 0xc3, + 0xc6, 0x91, 0xc4, 0x09, 0x97, 0x46, 0xef, 0xfe, + 0x02, 0x00, 0xe4, 0x5c, 0x82, 0xed, 0x16, 0xf3, + 0x32, 0xbe, 0xec, 0x7a, 0xe6, 0x68, 0x12, 0x26 +}; +enum { nonce01 = 0x3834e2afca3c66d3ULL }; + +static const u8 input02[] __initconst = { + 0x9d +}; +static const u8 output02[] __initconst = { + 0x94 +}; +static const u8 key02[] __initconst = { + 0x8c, 0x01, 0xac, 0xaf, 0x62, 0x63, 0x56, 0x7a, + 0xad, 0x23, 0x4c, 0x58, 0x29, 0x29, 0xbe, 0xab, + 0xe9, 0xf8, 0xdf, 0x6c, 0x8c, 0x74, 0x4d, 0x7d, + 0x13, 0x94, 0x10, 0x02, 0x3d, 0x8e, 0x9f, 0x94 +}; +enum { nonce02 = 0x5d1b3bfdedd9f73aULL }; + +static const u8 input03[] __initconst = { + 0x04, 0x16 +}; +static const u8 output03[] __initconst = { + 0x92, 0x07 +}; +static const u8 key03[] __initconst = { + 0x22, 0x0c, 0x79, 0x2c, 0x38, 0x51, 0xbe, 0x99, + 0xa9, 0x59, 0x24, 0x50, 0xef, 0x87, 0x38, 0xa6, + 0xa0, 0x97, 0x20, 0xcb, 0xb4, 0x0c, 0x94, 0x67, + 0x1f, 0x98, 0xdc, 0xc4, 0x83, 0xbc, 0x35, 0x4d +}; +enum { nonce03 = 0x7a3353ad720a3e2eULL }; + +static const u8 input04[] __initconst = { + 0xc7, 0xcc, 0xd0 +}; +static const u8 output04[] __initconst = { + 0xd8, 0x41, 0x80 +}; +static const u8 key04[] __initconst = { + 0x81, 0x5e, 0x12, 0x01, 0xc4, 0x36, 0x15, 0x03, + 0x11, 0xa0, 0xe9, 0x86, 0xbb, 0x5a, 0xdc, 0x45, + 0x7d, 0x5e, 0x98, 0xf8, 0x06, 0x76, 0x1c, 0xec, + 0xc0, 0xf7, 0xca, 0x4e, 0x99, 0xd9, 0x42, 0x38 +}; +enum { nonce04 = 0x6816e2fc66176da2ULL }; + +static const u8 input05[] __initconst = { + 0x48, 0xf1, 0x31, 0x5f +}; +static const u8 output05[] __initconst = { + 0x48, 0xf7, 0x13, 0x67 +}; +static const u8 key05[] __initconst = { + 0x3f, 0xd6, 0xb6, 0x5e, 0x2f, 0xda, 0x82, 0x39, + 0x97, 0x06, 0xd3, 0x62, 0x4f, 0xbd, 0xcb, 0x9b, + 0x1d, 0xe6, 0x4a, 0x76, 0xab, 0xdd, 0x14, 0x50, + 0x59, 0x21, 0xe3, 0xb2, 0xc7, 0x95, 0xbc, 0x45 +}; +enum { nonce05 = 0xc41a7490e228cc42ULL }; + +static const u8 input06[] __initconst = { + 0xae, 0xa2, 0x85, 0x1d, 0xc8 +}; +static const u8 output06[] __initconst = { + 0xfa, 0xff, 0x45, 0x6b, 0x6f +}; +static const u8 key06[] __initconst = { + 0x04, 0x8d, 0xea, 0x67, 0x20, 0x78, 0xfb, 0x8f, + 0x49, 0x80, 0x35, 0xb5, 0x7b, 0xe4, 0x31, 0x74, + 0x57, 0x43, 0x3a, 0x64, 0x64, 0xb9, 0xe6, 0x23, + 0x4d, 0xfe, 0xb8, 0x7b, 0x71, 0x4d, 0x9d, 0x21 +}; +enum { nonce06 = 0x251366db50b10903ULL }; + +static const u8 input07[] __initconst = { + 0x1a, 0x32, 0x85, 0xb6, 0xe8, 0x52 +}; +static const u8 output07[] __initconst = { + 0xd3, 0x5f, 0xf0, 0x07, 0x69, 0xec +}; +static const u8 key07[] __initconst = { + 0xbf, 0x2d, 0x42, 0x99, 0x97, 0x76, 0x04, 0xad, + 0xd3, 0x8f, 0x6e, 0x6a, 0x34, 0x85, 0xaf, 0x81, + 0xef, 0x36, 0x33, 0xd5, 0x43, 0xa2, 0xaa, 0x08, + 0x0f, 0x77, 0x42, 0x83, 0x58, 0xc5, 0x42, 0x2a +}; +enum { nonce07 = 0xe0796da17dba9b58ULL }; + +static const u8 input08[] __initconst = { + 0x40, 0xae, 0xcd, 0xe4, 0x3d, 0x22, 0xe0 +}; +static const u8 output08[] __initconst = { + 0xfd, 0x8a, 0x9f, 0x3d, 0x05, 0xc9, 0xd3 +}; +static const u8 key08[] __initconst = { + 0xdc, 0x3f, 0x41, 0xe3, 0x23, 0x2a, 0x8d, 0xf6, + 0x41, 0x2a, 0xa7, 0x66, 0x05, 0x68, 0xe4, 0x7b, + 0xc4, 0x58, 0xd6, 0xcc, 0xdf, 0x0d, 0xc6, 0x25, + 0x1b, 0x61, 0x32, 0x12, 0x4e, 0xf1, 0xe6, 0x29 +}; +enum { nonce08 = 0xb1d2536d9e159832ULL }; + +static const u8 input09[] __initconst = { + 0xba, 0x1d, 0x14, 0x16, 0x9f, 0x83, 0x67, 0x24 +}; +static const u8 output09[] __initconst = { + 0x7c, 0xe3, 0x78, 0x1d, 0xa2, 0xe7, 0xe9, 0x39 +}; +static const u8 key09[] __initconst = { + 0x17, 0x55, 0x90, 0x52, 0xa4, 0xce, 0x12, 0xae, + 0xd4, 0xfd, 0xd4, 0xfb, 0xd5, 0x18, 0x59, 0x50, + 0x4e, 0x51, 0x99, 0x32, 0x09, 0x31, 0xfc, 0xf7, + 0x27, 0x10, 0x8e, 0xa2, 0x4b, 0xa5, 0xf5, 0x62 +}; +enum { nonce09 = 0x495fc269536d003ULL }; + +static const u8 input10[] __initconst = { + 0x09, 0xfd, 0x3c, 0x0b, 0x3d, 0x0e, 0xf3, 0x9d, + 0x27 +}; +static const u8 output10[] __initconst = { + 0xdc, 0xe4, 0x33, 0x60, 0x0c, 0x07, 0xcb, 0x51, + 0x6b +}; +static const u8 key10[] __initconst = { + 0x4e, 0x00, 0x72, 0x37, 0x0f, 0x52, 0x4d, 0x6f, + 0x37, 0x50, 0x3c, 0xb3, 0x51, 0x81, 0x49, 0x16, + 0x7e, 0xfd, 0xb1, 0x51, 0x72, 0x2e, 0xe4, 0x16, + 0x68, 0x5c, 0x5b, 0x8a, 0xc3, 0x90, 0x70, 0x04 +}; +enum { nonce10 = 0x1ad9d1114d88cbbdULL }; + +static const u8 input11[] __initconst = { + 0x70, 0x18, 0x52, 0x85, 0xba, 0x66, 0xff, 0x2c, + 0x9a, 0x46 +}; +static const u8 output11[] __initconst = { + 0xf5, 0x2a, 0x7a, 0xfd, 0x31, 0x7c, 0x91, 0x41, + 0xb1, 0xcf +}; +static const u8 key11[] __initconst = { + 0x48, 0xb4, 0xd0, 0x7c, 0x88, 0xd1, 0x96, 0x0d, + 0x80, 0x33, 0xb4, 0xd5, 0x31, 0x9a, 0x88, 0xca, + 0x14, 0xdc, 0xf0, 0xa8, 0xf3, 0xac, 0xb8, 0x47, + 0x75, 0x86, 0x7c, 0x88, 0x50, 0x11, 0x43, 0x40 +}; +enum { nonce11 = 0x47c35dd1f4f8aa4fULL }; + +static const u8 input12[] __initconst = { + 0x9e, 0x8e, 0x3d, 0x2a, 0x05, 0xfd, 0xe4, 0x90, + 0x24, 0x1c, 0xd3 +}; +static const u8 output12[] __initconst = { + 0x97, 0x72, 0x40, 0x9f, 0xc0, 0x6b, 0x05, 0x33, + 0x42, 0x7e, 0x28 +}; +static const u8 key12[] __initconst = { + 0xee, 0xff, 0x33, 0x33, 0xe0, 0x28, 0xdf, 0xa2, + 0xb6, 0x5e, 0x25, 0x09, 0x52, 0xde, 0xa5, 0x9c, + 0x8f, 0x95, 0xa9, 0x03, 0x77, 0x0f, 0xbe, 0xa1, + 0xd0, 0x7d, 0x73, 0x2f, 0xf8, 0x7e, 0x51, 0x44 +}; +enum { nonce12 = 0xc22d044dc6ea4af3ULL }; + +static const u8 input13[] __initconst = { + 0x9c, 0x16, 0xa2, 0x22, 0x4d, 0xbe, 0x04, 0x9a, + 0xb3, 0xb5, 0xc6, 0x58 +}; +static const u8 output13[] __initconst = { + 0xf0, 0x81, 0xdb, 0x6d, 0xa3, 0xe9, 0xb2, 0xc6, + 0x32, 0x50, 0x16, 0x9f +}; +static const u8 key13[] __initconst = { + 0x96, 0xb3, 0x01, 0xd2, 0x7a, 0x8c, 0x94, 0x09, + 0x4f, 0x58, 0xbe, 0x80, 0xcc, 0xa9, 0x7e, 0x2d, + 0xad, 0x58, 0x3b, 0x63, 0xb8, 0x5c, 0x17, 0xce, + 0xbf, 0x43, 0x33, 0x7a, 0x7b, 0x82, 0x28, 0x2f +}; +enum { nonce13 = 0x2a5d05d88cd7b0daULL }; + +static const u8 input14[] __initconst = { + 0x57, 0x4f, 0xaa, 0x30, 0xe6, 0x23, 0x50, 0x86, + 0x91, 0xa5, 0x60, 0x96, 0x2b +}; +static const u8 output14[] __initconst = { + 0x6c, 0x1f, 0x3b, 0x42, 0xb6, 0x2f, 0xf0, 0xbd, + 0x76, 0x60, 0xc7, 0x7e, 0x8d +}; +static const u8 key14[] __initconst = { + 0x22, 0x85, 0xaf, 0x8f, 0xa3, 0x53, 0xa0, 0xc4, + 0xb5, 0x75, 0xc0, 0xba, 0x30, 0x92, 0xc3, 0x32, + 0x20, 0x5a, 0x8f, 0x7e, 0x93, 0xda, 0x65, 0x18, + 0xd1, 0xf6, 0x9a, 0x9b, 0x8f, 0x85, 0x30, 0xe6 +}; +enum { nonce14 = 0xf9946c166aa4475fULL }; + +static const u8 input15[] __initconst = { + 0x89, 0x81, 0xc7, 0xe2, 0x00, 0xac, 0x52, 0x70, + 0xa4, 0x79, 0xab, 0xeb, 0x74, 0xf7 +}; +static const u8 output15[] __initconst = { + 0xb4, 0xd0, 0xa9, 0x9d, 0x15, 0x5f, 0x48, 0xd6, + 0x00, 0x7e, 0x4c, 0x77, 0x5a, 0x46 +}; +static const u8 key15[] __initconst = { + 0x0a, 0x66, 0x36, 0xca, 0x5d, 0x82, 0x23, 0xb6, + 0xe4, 0x9b, 0xad, 0x5e, 0xd0, 0x7f, 0xf6, 0x7a, + 0x7b, 0x03, 0xa7, 0x4c, 0xfd, 0xec, 0xd5, 0xa1, + 0xfc, 0x25, 0x54, 0xda, 0x5a, 0x5c, 0xf0, 0x2c +}; +enum { nonce15 = 0x9ab2b87a35e772c8ULL }; + +static const u8 input16[] __initconst = { + 0x5f, 0x09, 0xc0, 0x8b, 0x1e, 0xde, 0xca, 0xd9, + 0xb7, 0x5c, 0x23, 0xc9, 0x55, 0x1e, 0xcf +}; +static const u8 output16[] __initconst = { + 0x76, 0x9b, 0x53, 0xf3, 0x66, 0x88, 0x28, 0x60, + 0x98, 0x80, 0x2c, 0xa8, 0x80, 0xa6, 0x48 +}; +static const u8 key16[] __initconst = { + 0x80, 0xb5, 0x51, 0xdf, 0x17, 0x5b, 0xb0, 0xef, + 0x8b, 0x5b, 0x2e, 0x3e, 0xc5, 0xe3, 0xa5, 0x86, + 0xac, 0x0d, 0x8e, 0x32, 0x90, 0x9d, 0x82, 0x27, + 0xf1, 0x23, 0x26, 0xc3, 0xea, 0x55, 0xb6, 0x63 +}; +enum { nonce16 = 0xa82e9d39e4d02ef5ULL }; + +static const u8 input17[] __initconst = { + 0x87, 0x0b, 0x36, 0x71, 0x7c, 0xb9, 0x0b, 0x80, + 0x4d, 0x77, 0x5c, 0x4f, 0xf5, 0x51, 0x0e, 0x1a +}; +static const u8 output17[] __initconst = { + 0xf1, 0x12, 0x4a, 0x8a, 0xd9, 0xd0, 0x08, 0x67, + 0x66, 0xd7, 0x34, 0xea, 0x32, 0x3b, 0x54, 0x0e +}; +static const u8 key17[] __initconst = { + 0xfb, 0x71, 0x5f, 0x3f, 0x7a, 0xc0, 0x9a, 0xc8, + 0xc8, 0xcf, 0xe8, 0xbc, 0xfb, 0x09, 0xbf, 0x89, + 0x6a, 0xef, 0xd5, 0xe5, 0x36, 0x87, 0x14, 0x76, + 0x00, 0xb9, 0x32, 0x28, 0xb2, 0x00, 0x42, 0x53 +}; +enum { nonce17 = 0x229b87e73d557b96ULL }; + +static const u8 input18[] __initconst = { + 0x38, 0x42, 0xb5, 0x37, 0xb4, 0x3d, 0xfe, 0x59, + 0x38, 0x68, 0x88, 0xfa, 0x89, 0x8a, 0x5f, 0x90, + 0x3c +}; +static const u8 output18[] __initconst = { + 0xac, 0xad, 0x14, 0xe8, 0x7e, 0xd7, 0xce, 0x96, + 0x3d, 0xb3, 0x78, 0x85, 0x22, 0x5a, 0xcb, 0x39, + 0xd4 +}; +static const u8 key18[] __initconst = { + 0xe1, 0xc1, 0xa8, 0xe0, 0x91, 0xe7, 0x38, 0x66, + 0x80, 0x17, 0x12, 0x3c, 0x5e, 0x2d, 0xbb, 0xea, + 0xeb, 0x6c, 0x8b, 0xc8, 0x1b, 0x6f, 0x7c, 0xea, + 0x50, 0x57, 0x23, 0x1e, 0x65, 0x6f, 0x6d, 0x81 +}; +enum { nonce18 = 0xfaf5fcf8f30e57a9ULL }; + +static const u8 input19[] __initconst = { + 0x1c, 0x4a, 0x30, 0x26, 0xef, 0x9a, 0x32, 0xa7, + 0x8f, 0xe5, 0xc0, 0x0f, 0x30, 0x3a, 0xbf, 0x38, + 0x54, 0xba +}; +static const u8 output19[] __initconst = { + 0x57, 0x67, 0x54, 0x4f, 0x31, 0xd6, 0xef, 0x35, + 0x0b, 0xd9, 0x52, 0xa7, 0x46, 0x7d, 0x12, 0x17, + 0x1e, 0xe3 +}; +static const u8 key19[] __initconst = { + 0x5a, 0x79, 0xc1, 0xea, 0x33, 0xb3, 0xc7, 0x21, + 0xec, 0xf8, 0xcb, 0xd2, 0x58, 0x96, 0x23, 0xd6, + 0x4d, 0xed, 0x2f, 0xdf, 0x8a, 0x79, 0xe6, 0x8b, + 0x38, 0xa3, 0xc3, 0x7a, 0x33, 0xda, 0x02, 0xc7 +}; +enum { nonce19 = 0x2b23b61840429604ULL }; + +static const u8 input20[] __initconst = { + 0xab, 0xe9, 0x32, 0xbb, 0x35, 0x17, 0xe0, 0x60, + 0x80, 0xb1, 0x27, 0xdc, 0xe6, 0x62, 0x9e, 0x0c, + 0x77, 0xf4, 0x50 +}; +static const u8 output20[] __initconst = { + 0x54, 0x6d, 0xaa, 0xfc, 0x08, 0xfb, 0x71, 0xa8, + 0xd6, 0x1d, 0x7d, 0xf3, 0x45, 0x10, 0xb5, 0x4c, + 0xcc, 0x4b, 0x45 +}; +static const u8 key20[] __initconst = { + 0xa3, 0xfd, 0x3d, 0xa9, 0xeb, 0xea, 0x2c, 0x69, + 0xcf, 0x59, 0x38, 0x13, 0x5b, 0xa7, 0x53, 0x8f, + 0x5e, 0xa2, 0x33, 0x86, 0x4c, 0x75, 0x26, 0xaf, + 0x35, 0x12, 0x09, 0x71, 0x81, 0xea, 0x88, 0x66 +}; +enum { nonce20 = 0x7459667a8fadff58ULL }; + +static const u8 input21[] __initconst = { + 0xa6, 0x82, 0x21, 0x23, 0xad, 0x27, 0x3f, 0xc6, + 0xd7, 0x16, 0x0d, 0x6d, 0x24, 0x15, 0x54, 0xc5, + 0x96, 0x72, 0x59, 0x8a +}; +static const u8 output21[] __initconst = { + 0x5f, 0x34, 0x32, 0xea, 0x06, 0xd4, 0x9e, 0x01, + 0xdc, 0x32, 0x32, 0x40, 0x66, 0x73, 0x6d, 0x4a, + 0x6b, 0x12, 0x20, 0xe8 +}; +static const u8 key21[] __initconst = { + 0x96, 0xfd, 0x13, 0x23, 0xa9, 0x89, 0x04, 0xe6, + 0x31, 0xa5, 0x2c, 0xc1, 0x40, 0xd5, 0x69, 0x5c, + 0x32, 0x79, 0x56, 0xe0, 0x29, 0x93, 0x8f, 0xe8, + 0x5f, 0x65, 0x53, 0x7f, 0xc1, 0xe9, 0xaf, 0xaf +}; +enum { nonce21 = 0xba8defee9d8e13b5ULL }; + +static const u8 input22[] __initconst = { + 0xb8, 0x32, 0x1a, 0x81, 0xd8, 0x38, 0x89, 0x5a, + 0xb0, 0x05, 0xbe, 0xf4, 0xd2, 0x08, 0xc6, 0xee, + 0x79, 0x7b, 0x3a, 0x76, 0x59 +}; +static const u8 output22[] __initconst = { + 0xb7, 0xba, 0xae, 0x80, 0xe4, 0x9f, 0x79, 0x84, + 0x5a, 0x48, 0x50, 0x6d, 0xcb, 0xd0, 0x06, 0x0c, + 0x15, 0x63, 0xa7, 0x5e, 0xbd +}; +static const u8 key22[] __initconst = { + 0x0f, 0x35, 0x3d, 0xeb, 0x5f, 0x0a, 0x82, 0x0d, + 0x24, 0x59, 0x71, 0xd8, 0xe6, 0x2d, 0x5f, 0xe1, + 0x7e, 0x0c, 0xae, 0xf6, 0xdc, 0x2c, 0xc5, 0x4a, + 0x38, 0x88, 0xf2, 0xde, 0xd9, 0x5f, 0x76, 0x7c +}; +enum { nonce22 = 0xe77f1760e9f5e192ULL }; + +static const u8 input23[] __initconst = { + 0x4b, 0x1e, 0x79, 0x99, 0xcf, 0xef, 0x64, 0x4b, + 0xb0, 0x66, 0xae, 0x99, 0x2e, 0x68, 0x97, 0xf5, + 0x5d, 0x9b, 0x3f, 0x7a, 0xa9, 0xd9 +}; +static const u8 output23[] __initconst = { + 0x5f, 0xa4, 0x08, 0x39, 0xca, 0xfa, 0x2b, 0x83, + 0x5d, 0x95, 0x70, 0x7c, 0x2e, 0xd4, 0xae, 0xfa, + 0x45, 0x4a, 0x77, 0x7f, 0xa7, 0x65 +}; +static const u8 key23[] __initconst = { + 0x4a, 0x06, 0x83, 0x64, 0xaa, 0xe3, 0x38, 0x32, + 0x28, 0x5d, 0xa4, 0xb2, 0x5a, 0xee, 0xcf, 0x8e, + 0x19, 0x67, 0xf1, 0x09, 0xe8, 0xc9, 0xf6, 0x40, + 0x02, 0x6d, 0x0b, 0xde, 0xfa, 0x81, 0x03, 0xb1 +}; +enum { nonce23 = 0x9b3f349158709849ULL }; + +static const u8 input24[] __initconst = { + 0xc6, 0xfc, 0x47, 0x5e, 0xd8, 0xed, 0xa9, 0xe5, + 0x4f, 0x82, 0x79, 0x35, 0xee, 0x3e, 0x7e, 0x3e, + 0x35, 0x70, 0x6e, 0xfa, 0x6d, 0x08, 0xe8 +}; +static const u8 output24[] __initconst = { + 0x3b, 0xc5, 0xf8, 0xc2, 0xbf, 0x2b, 0x90, 0x33, + 0xa6, 0xae, 0xf5, 0x5a, 0x65, 0xb3, 0x3d, 0xe1, + 0xcd, 0x5f, 0x55, 0xfa, 0xe7, 0xa5, 0x4a +}; +static const u8 key24[] __initconst = { + 0x00, 0x24, 0xc3, 0x65, 0x5f, 0xe6, 0x31, 0xbb, + 0x6d, 0xfc, 0x20, 0x7b, 0x1b, 0xa8, 0x96, 0x26, + 0x55, 0x21, 0x62, 0x25, 0x7e, 0xba, 0x23, 0x97, + 0xc9, 0xb8, 0x53, 0xa8, 0xef, 0xab, 0xad, 0x61 +}; +enum { nonce24 = 0x13ee0b8f526177c3ULL }; + +static const u8 input25[] __initconst = { + 0x33, 0x07, 0x16, 0xb1, 0x34, 0x33, 0x67, 0x04, + 0x9b, 0x0a, 0xce, 0x1b, 0xe9, 0xde, 0x1a, 0xec, + 0xd0, 0x55, 0xfb, 0xc6, 0x33, 0xaf, 0x2d, 0xe3 +}; +static const u8 output25[] __initconst = { + 0x05, 0x93, 0x10, 0xd1, 0x58, 0x6f, 0x68, 0x62, + 0x45, 0xdb, 0x91, 0xae, 0x70, 0xcf, 0xd4, 0x5f, + 0xee, 0xdf, 0xd5, 0xba, 0x9e, 0xde, 0x68, 0xe6 +}; +static const u8 key25[] __initconst = { + 0x83, 0xa9, 0x4f, 0x5d, 0x74, 0xd5, 0x91, 0xb3, + 0xc9, 0x97, 0x19, 0x15, 0xdb, 0x0d, 0x0b, 0x4a, + 0x3d, 0x55, 0xcf, 0xab, 0xb2, 0x05, 0x21, 0x35, + 0x45, 0x50, 0xeb, 0xf8, 0xf5, 0xbf, 0x36, 0x35 +}; +enum { nonce25 = 0x7c6f459e49ebfebcULL }; + +static const u8 input26[] __initconst = { + 0xc2, 0xd4, 0x7a, 0xa3, 0x92, 0xe1, 0xac, 0x46, + 0x1a, 0x15, 0x38, 0xc9, 0xb5, 0xfd, 0xdf, 0x84, + 0x38, 0xbc, 0x6b, 0x1d, 0xb0, 0x83, 0x43, 0x04, + 0x39 +}; +static const u8 output26[] __initconst = { + 0x7f, 0xde, 0xd6, 0x87, 0xcc, 0x34, 0xf4, 0x12, + 0xae, 0x55, 0xa5, 0x89, 0x95, 0x29, 0xfc, 0x18, + 0xd8, 0xc7, 0x7c, 0xd3, 0xcb, 0x85, 0x95, 0x21, + 0xd2 +}; +static const u8 key26[] __initconst = { + 0xe4, 0xd0, 0x54, 0x1d, 0x7d, 0x47, 0xa8, 0xc1, + 0x08, 0xca, 0xe2, 0x42, 0x52, 0x95, 0x16, 0x43, + 0xa3, 0x01, 0x23, 0x03, 0xcc, 0x3b, 0x81, 0x78, + 0x23, 0xcc, 0xa7, 0x36, 0xd7, 0xa0, 0x97, 0x8d +}; +enum { nonce26 = 0x524401012231683ULL }; + +static const u8 input27[] __initconst = { + 0x0d, 0xb0, 0xcf, 0xec, 0xfc, 0x38, 0x9d, 0x9d, + 0x89, 0x00, 0x96, 0xf2, 0x79, 0x8a, 0xa1, 0x8d, + 0x32, 0x5e, 0xc6, 0x12, 0x22, 0xec, 0xf6, 0x52, + 0xc1, 0x0b +}; +static const u8 output27[] __initconst = { + 0xef, 0xe1, 0xf2, 0x67, 0x8e, 0x2c, 0x00, 0x9f, + 0x1d, 0x4c, 0x66, 0x1f, 0x94, 0x58, 0xdc, 0xbb, + 0xb9, 0x11, 0x8f, 0x74, 0xfd, 0x0e, 0x14, 0x01, + 0xa8, 0x21 +}; +static const u8 key27[] __initconst = { + 0x78, 0x71, 0xa4, 0xe6, 0xb2, 0x95, 0x44, 0x12, + 0x81, 0xaa, 0x7e, 0x94, 0xa7, 0x8d, 0x44, 0xea, + 0xc4, 0xbc, 0x01, 0xb7, 0x9e, 0xf7, 0x82, 0x9e, + 0x3b, 0x23, 0x9f, 0x31, 0xdd, 0xb8, 0x0d, 0x18 +}; +enum { nonce27 = 0xd58fe0e58fb254d6ULL }; + +static const u8 input28[] __initconst = { + 0xaa, 0xb7, 0xaa, 0xd9, 0xa8, 0x91, 0xd7, 0x8a, + 0x97, 0x9b, 0xdb, 0x7c, 0x47, 0x2b, 0xdb, 0xd2, + 0xda, 0x77, 0xb1, 0xfa, 0x2d, 0x12, 0xe3, 0xe9, + 0xc4, 0x7f, 0x54 +}; +static const u8 output28[] __initconst = { + 0x87, 0x84, 0xa9, 0xa6, 0xad, 0x8f, 0xe6, 0x0f, + 0x69, 0xf8, 0x21, 0xc3, 0x54, 0x95, 0x0f, 0xb0, + 0x4e, 0xc7, 0x02, 0xe4, 0x04, 0xb0, 0x6c, 0x42, + 0x8c, 0x63, 0xe3 +}; +static const u8 key28[] __initconst = { + 0x12, 0x23, 0x37, 0x95, 0x04, 0xb4, 0x21, 0xe8, + 0xbc, 0x65, 0x46, 0x7a, 0xf4, 0x01, 0x05, 0x3f, + 0xb1, 0x34, 0x73, 0xd2, 0x49, 0xbf, 0x6f, 0x20, + 0xbd, 0x23, 0x58, 0x5f, 0xd1, 0x73, 0x57, 0xa6 +}; +enum { nonce28 = 0x3a04d51491eb4e07ULL }; + +static const u8 input29[] __initconst = { + 0x55, 0xd0, 0xd4, 0x4b, 0x17, 0xc8, 0xc4, 0x2b, + 0xc0, 0x28, 0xbd, 0x9d, 0x65, 0x4d, 0xaf, 0x77, + 0x72, 0x7c, 0x36, 0x68, 0xa7, 0xb6, 0x87, 0x4d, + 0xb9, 0x27, 0x25, 0x6c +}; +static const u8 output29[] __initconst = { + 0x0e, 0xac, 0x4c, 0xf5, 0x12, 0xb5, 0x56, 0xa5, + 0x00, 0x9a, 0xd6, 0xe5, 0x1a, 0x59, 0x2c, 0xf6, + 0x42, 0x22, 0xcf, 0x23, 0x98, 0x34, 0x29, 0xac, + 0x6e, 0xe3, 0x37, 0x6d +}; +static const u8 key29[] __initconst = { + 0xda, 0x9d, 0x05, 0x0c, 0x0c, 0xba, 0x75, 0xb9, + 0x9e, 0xb1, 0x8d, 0xd9, 0x73, 0x26, 0x2c, 0xa9, + 0x3a, 0xb5, 0xcb, 0x19, 0x49, 0xa7, 0x4f, 0xf7, + 0x64, 0x35, 0x23, 0x20, 0x2a, 0x45, 0x78, 0xc7 +}; +enum { nonce29 = 0xc25ac9982431cbfULL }; + +static const u8 input30[] __initconst = { + 0x4e, 0xd6, 0x85, 0xbb, 0xe7, 0x99, 0xfa, 0x04, + 0x33, 0x24, 0xfd, 0x75, 0x18, 0xe3, 0xd3, 0x25, + 0xcd, 0xca, 0xae, 0x00, 0xbe, 0x52, 0x56, 0x4a, + 0x31, 0xe9, 0x4f, 0xae, 0x8a +}; +static const u8 output30[] __initconst = { + 0x30, 0x36, 0x32, 0xa2, 0x3c, 0xb6, 0xf9, 0xf9, + 0x76, 0x70, 0xad, 0xa6, 0x10, 0x41, 0x00, 0x4a, + 0xfa, 0xce, 0x1b, 0x86, 0x05, 0xdb, 0x77, 0x96, + 0xb3, 0xb7, 0x8f, 0x61, 0x24 +}; +static const u8 key30[] __initconst = { + 0x49, 0x35, 0x4c, 0x15, 0x98, 0xfb, 0xc6, 0x57, + 0x62, 0x6d, 0x06, 0xc3, 0xd4, 0x79, 0x20, 0x96, + 0x05, 0x2a, 0x31, 0x63, 0xc0, 0x44, 0x42, 0x09, + 0x13, 0x13, 0xff, 0x1b, 0xc8, 0x63, 0x1f, 0x0b +}; +enum { nonce30 = 0x4967f9c08e41568bULL }; + +static const u8 input31[] __initconst = { + 0x91, 0x04, 0x20, 0x47, 0x59, 0xee, 0xa6, 0x0f, + 0x04, 0x75, 0xc8, 0x18, 0x95, 0x44, 0x01, 0x28, + 0x20, 0x6f, 0x73, 0x68, 0x66, 0xb5, 0x03, 0xb3, + 0x58, 0x27, 0x6e, 0x7a, 0x76, 0xb8 +}; +static const u8 output31[] __initconst = { + 0xe8, 0x03, 0x78, 0x9d, 0x13, 0x15, 0x98, 0xef, + 0x64, 0x68, 0x12, 0x41, 0xb0, 0x29, 0x94, 0x0c, + 0x83, 0x35, 0x46, 0xa9, 0x74, 0xe1, 0x75, 0xf0, + 0xb6, 0x96, 0xc3, 0x6f, 0xd7, 0x70 +}; +static const u8 key31[] __initconst = { + 0xef, 0xcd, 0x5a, 0x4a, 0xf4, 0x7e, 0x6a, 0x3a, + 0x11, 0x88, 0x72, 0x94, 0xb8, 0xae, 0x84, 0xc3, + 0x66, 0xe0, 0xde, 0x4b, 0x00, 0xa5, 0xd6, 0x2d, + 0x50, 0xb7, 0x28, 0xff, 0x76, 0x57, 0x18, 0x1f +}; +enum { nonce31 = 0xcb6f428fa4192e19ULL }; + +static const u8 input32[] __initconst = { + 0x90, 0x06, 0x50, 0x4b, 0x98, 0x14, 0x30, 0xf1, + 0xb8, 0xd7, 0xf0, 0xa4, 0x3e, 0x4e, 0xd8, 0x00, + 0xea, 0xdb, 0x4f, 0x93, 0x05, 0xef, 0x02, 0x71, + 0x1a, 0xcd, 0xa3, 0xb1, 0xae, 0xd3, 0x18 +}; +static const u8 output32[] __initconst = { + 0xcb, 0x4a, 0x37, 0x3f, 0xea, 0x40, 0xab, 0x86, + 0xfe, 0xcc, 0x07, 0xd5, 0xdc, 0xb2, 0x25, 0xb6, + 0xfd, 0x2a, 0x72, 0xbc, 0x5e, 0xd4, 0x75, 0xff, + 0x71, 0xfc, 0xce, 0x1e, 0x6f, 0x22, 0xc1 +}; +static const u8 key32[] __initconst = { + 0xfc, 0x6d, 0xc3, 0x80, 0xce, 0xa4, 0x31, 0xa1, + 0xcc, 0xfa, 0x9d, 0x10, 0x0b, 0xc9, 0x11, 0x77, + 0x34, 0xdb, 0xad, 0x1b, 0xc4, 0xfc, 0xeb, 0x79, + 0x91, 0xda, 0x59, 0x3b, 0x0d, 0xb1, 0x19, 0x3b +}; +enum { nonce32 = 0x88551bf050059467ULL }; + +static const u8 input33[] __initconst = { + 0x88, 0x94, 0x71, 0x92, 0xe8, 0xd7, 0xf9, 0xbd, + 0x55, 0xe3, 0x22, 0xdb, 0x99, 0x51, 0xfb, 0x50, + 0xbf, 0x82, 0xb5, 0x70, 0x8b, 0x2b, 0x6a, 0x03, + 0x37, 0xa0, 0xc6, 0x19, 0x5d, 0xc9, 0xbc, 0xcc +}; +static const u8 output33[] __initconst = { + 0xb6, 0x17, 0x51, 0xc8, 0xea, 0x8a, 0x14, 0xdc, + 0x23, 0x1b, 0xd4, 0xed, 0xbf, 0x50, 0xb9, 0x38, + 0x00, 0xc2, 0x3f, 0x78, 0x3d, 0xbf, 0xa0, 0x84, + 0xef, 0x45, 0xb2, 0x7d, 0x48, 0x7b, 0x62, 0xa7 +}; +static const u8 key33[] __initconst = { + 0xb9, 0x8f, 0x6a, 0xad, 0xb4, 0x6f, 0xb5, 0xdc, + 0x48, 0xfa, 0x43, 0x57, 0x62, 0x97, 0xef, 0x89, + 0x4c, 0x5a, 0x7b, 0x67, 0xb8, 0x9d, 0xf0, 0x42, + 0x2b, 0x8f, 0xf3, 0x18, 0x05, 0x2e, 0x48, 0xd0 +}; +enum { nonce33 = 0x31f16488fe8447f5ULL }; + +static const u8 input34[] __initconst = { + 0xda, 0x2b, 0x3d, 0x63, 0x9e, 0x4f, 0xc2, 0xb8, + 0x7f, 0xc2, 0x1a, 0x8b, 0x0d, 0x95, 0x65, 0x55, + 0x52, 0xba, 0x51, 0x51, 0xc0, 0x61, 0x9f, 0x0a, + 0x5d, 0xb0, 0x59, 0x8c, 0x64, 0x6a, 0xab, 0xf5, + 0x57 +}; +static const u8 output34[] __initconst = { + 0x5c, 0xf6, 0x62, 0x24, 0x8c, 0x45, 0xa3, 0x26, + 0xd0, 0xe4, 0x88, 0x1c, 0xed, 0xc4, 0x26, 0x58, + 0xb5, 0x5d, 0x92, 0xc4, 0x17, 0x44, 0x1c, 0xb8, + 0x2c, 0xf3, 0x55, 0x7e, 0xd6, 0xe5, 0xb3, 0x65, + 0xa8 +}; +static const u8 key34[] __initconst = { + 0xde, 0xd1, 0x27, 0xb7, 0x7c, 0xfa, 0xa6, 0x78, + 0x39, 0x80, 0xdf, 0xb7, 0x46, 0xac, 0x71, 0x26, + 0xd0, 0x2a, 0x56, 0x79, 0x12, 0xeb, 0x26, 0x37, + 0x01, 0x0d, 0x30, 0xe0, 0xe3, 0x66, 0xb2, 0xf4 +}; +enum { nonce34 = 0x92d0d9b252c24149ULL }; + +static const u8 input35[] __initconst = { + 0x3a, 0x15, 0x5b, 0x75, 0x6e, 0xd0, 0x52, 0x20, + 0x6c, 0x82, 0xfa, 0xce, 0x5b, 0xea, 0xf5, 0x43, + 0xc1, 0x81, 0x7c, 0xb2, 0xac, 0x16, 0x3f, 0xd3, + 0x5a, 0xaf, 0x55, 0x98, 0xf4, 0xc6, 0xba, 0x71, + 0x25, 0x8b +}; +static const u8 output35[] __initconst = { + 0xb3, 0xaf, 0xac, 0x6d, 0x4d, 0xc7, 0x68, 0x56, + 0x50, 0x5b, 0x69, 0x2a, 0xe5, 0x90, 0xf9, 0x5f, + 0x99, 0x88, 0xff, 0x0c, 0xa6, 0xb1, 0x83, 0xd6, + 0x80, 0xa6, 0x1b, 0xde, 0x94, 0xa4, 0x2c, 0xc3, + 0x74, 0xfa +}; +static const u8 key35[] __initconst = { + 0xd8, 0x24, 0xe2, 0x06, 0xd7, 0x7a, 0xce, 0x81, + 0x52, 0x72, 0x02, 0x69, 0x89, 0xc4, 0xe9, 0x53, + 0x3b, 0x08, 0x5f, 0x98, 0x1e, 0x1b, 0x99, 0x6e, + 0x28, 0x17, 0x6d, 0xba, 0xc0, 0x96, 0xf9, 0x3c +}; +enum { nonce35 = 0x7baf968c4c8e3a37ULL }; + +static const u8 input36[] __initconst = { + 0x31, 0x5d, 0x4f, 0xe3, 0xac, 0xad, 0x17, 0xa6, + 0xb5, 0x01, 0xe2, 0xc6, 0xd4, 0x7e, 0xc4, 0x80, + 0xc0, 0x59, 0x72, 0xbb, 0x4b, 0x74, 0x6a, 0x41, + 0x0f, 0x9c, 0xf6, 0xca, 0x20, 0xb3, 0x73, 0x07, + 0x6b, 0x02, 0x2a +}; +static const u8 output36[] __initconst = { + 0xf9, 0x09, 0x92, 0x94, 0x7e, 0x31, 0xf7, 0x53, + 0xe8, 0x8a, 0x5b, 0x20, 0xef, 0x9b, 0x45, 0x81, + 0xba, 0x5e, 0x45, 0x63, 0xc1, 0xc7, 0x9e, 0x06, + 0x0e, 0xd9, 0x62, 0x8e, 0x96, 0xf9, 0xfa, 0x43, + 0x4d, 0xd4, 0x28 +}; +static const u8 key36[] __initconst = { + 0x13, 0x30, 0x4c, 0x06, 0xae, 0x18, 0xde, 0x03, + 0x1d, 0x02, 0x40, 0xf5, 0xbb, 0x19, 0xe3, 0x88, + 0x41, 0xb1, 0x29, 0x15, 0x97, 0xc2, 0x69, 0x3f, + 0x32, 0x2a, 0x0c, 0x8b, 0xcf, 0x83, 0x8b, 0x6c +}; +enum { nonce36 = 0x226d251d475075a0ULL }; + +static const u8 input37[] __initconst = { + 0x10, 0x18, 0xbe, 0xfd, 0x66, 0xc9, 0x77, 0xcc, + 0x43, 0xe5, 0x46, 0x0b, 0x08, 0x8b, 0xae, 0x11, + 0x86, 0x15, 0xc2, 0xf6, 0x45, 0xd4, 0x5f, 0xd6, + 0xb6, 0x5f, 0x9f, 0x3e, 0x97, 0xb7, 0xd4, 0xad, + 0x0b, 0xe8, 0x31, 0x94 +}; +static const u8 output37[] __initconst = { + 0x03, 0x2c, 0x1c, 0xee, 0xc6, 0xdd, 0xed, 0x38, + 0x80, 0x6d, 0x84, 0x16, 0xc3, 0xc2, 0x04, 0x63, + 0xcd, 0xa7, 0x6e, 0x36, 0x8b, 0xed, 0x78, 0x63, + 0x95, 0xfc, 0x69, 0x7a, 0x3f, 0x8d, 0x75, 0x6b, + 0x6c, 0x26, 0x56, 0x4d +}; +static const u8 key37[] __initconst = { + 0xac, 0x84, 0x4d, 0xa9, 0x29, 0x49, 0x3c, 0x39, + 0x7f, 0xd9, 0xa6, 0x01, 0xf3, 0x7e, 0xfa, 0x4a, + 0x14, 0x80, 0x22, 0x74, 0xf0, 0x29, 0x30, 0x2d, + 0x07, 0x21, 0xda, 0xc0, 0x4d, 0x70, 0x56, 0xa2 +}; +enum { nonce37 = 0x167823ce3b64925aULL }; + +static const u8 input38[] __initconst = { + 0x30, 0x8f, 0xfa, 0x24, 0x29, 0xb1, 0xfb, 0xce, + 0x31, 0x62, 0xdc, 0xd0, 0x46, 0xab, 0xe1, 0x31, + 0xd9, 0xae, 0x60, 0x0d, 0xca, 0x0a, 0x49, 0x12, + 0x3d, 0x92, 0xe9, 0x91, 0x67, 0x12, 0x62, 0x18, + 0x89, 0xe2, 0xf9, 0x1c, 0xcc +}; +static const u8 output38[] __initconst = { + 0x56, 0x9c, 0xc8, 0x7a, 0xc5, 0x98, 0xa3, 0x0f, + 0xba, 0xd5, 0x3e, 0xe1, 0xc9, 0x33, 0x64, 0x33, + 0xf0, 0xd5, 0xf7, 0x43, 0x66, 0x0e, 0x08, 0x9a, + 0x6e, 0x09, 0xe4, 0x01, 0x0d, 0x1e, 0x2f, 0x4b, + 0xed, 0x9c, 0x08, 0x8c, 0x03 +}; +static const u8 key38[] __initconst = { + 0x77, 0x52, 0x2a, 0x23, 0xf1, 0xc5, 0x96, 0x2b, + 0x89, 0x4f, 0x3e, 0xf3, 0xff, 0x0e, 0x94, 0xce, + 0xf1, 0xbd, 0x53, 0xf5, 0x77, 0xd6, 0x9e, 0x47, + 0x49, 0x3d, 0x16, 0x64, 0xff, 0x95, 0x42, 0x42 +}; +enum { nonce38 = 0xff629d7b82cef357ULL }; + +static const u8 input39[] __initconst = { + 0x38, 0x26, 0x27, 0xd0, 0xc2, 0xf5, 0x34, 0xba, + 0xda, 0x0f, 0x1c, 0x1c, 0x9a, 0x70, 0xe5, 0x8a, + 0x78, 0x2d, 0x8f, 0x9a, 0xbf, 0x89, 0x6a, 0xfd, + 0xd4, 0x9c, 0x33, 0xf1, 0xb6, 0x89, 0x16, 0xe3, + 0x6a, 0x00, 0xfa, 0x3a, 0x0f, 0x26 +}; +static const u8 output39[] __initconst = { + 0x0f, 0xaf, 0x91, 0x6d, 0x9c, 0x99, 0xa4, 0xf7, + 0x3b, 0x9d, 0x9a, 0x98, 0xca, 0xbb, 0x50, 0x48, + 0xee, 0xcb, 0x5d, 0xa1, 0x37, 0x2d, 0x36, 0x09, + 0x2a, 0xe2, 0x1c, 0x3d, 0x98, 0x40, 0x1c, 0x16, + 0x56, 0xa7, 0x98, 0xe9, 0x7d, 0x2b +}; +static const u8 key39[] __initconst = { + 0x6e, 0x83, 0x15, 0x4d, 0xf8, 0x78, 0xa8, 0x0e, + 0x71, 0x37, 0xd4, 0x6e, 0x28, 0x5c, 0x06, 0xa1, + 0x2d, 0x6c, 0x72, 0x7a, 0xfd, 0xf8, 0x65, 0x1a, + 0xb8, 0xe6, 0x29, 0x7b, 0xe5, 0xb3, 0x23, 0x79 +}; +enum { nonce39 = 0xa4d8c491cf093e9dULL }; + +static const u8 input40[] __initconst = { + 0x8f, 0x32, 0x7c, 0x40, 0x37, 0x95, 0x08, 0x00, + 0x00, 0xfe, 0x2f, 0x95, 0x20, 0x12, 0x40, 0x18, + 0x5e, 0x7e, 0x5e, 0x99, 0xee, 0x8d, 0x91, 0x7d, + 0x50, 0x7d, 0x21, 0x45, 0x27, 0xe1, 0x7f, 0xd4, + 0x73, 0x10, 0xe1, 0x33, 0xbc, 0xf8, 0xdd +}; +static const u8 output40[] __initconst = { + 0x78, 0x7c, 0xdc, 0x55, 0x2b, 0xd9, 0x2b, 0x3a, + 0xdd, 0x56, 0x11, 0x52, 0xd3, 0x2e, 0xe0, 0x0d, + 0x23, 0x20, 0x8a, 0xf1, 0x4f, 0xee, 0xf1, 0x68, + 0xf6, 0xdc, 0x53, 0xcf, 0x17, 0xd4, 0xf0, 0x6c, + 0xdc, 0x80, 0x5f, 0x1c, 0xa4, 0x91, 0x05 +}; +static const u8 key40[] __initconst = { + 0x0d, 0x86, 0xbf, 0x8a, 0xba, 0x9e, 0x39, 0x91, + 0xa8, 0xe7, 0x22, 0xf0, 0x0c, 0x43, 0x18, 0xe4, + 0x1f, 0xb0, 0xaf, 0x8a, 0x34, 0x31, 0xf4, 0x41, + 0xf0, 0x89, 0x85, 0xca, 0x5d, 0x05, 0x3b, 0x94 +}; +enum { nonce40 = 0xae7acc4f5986439eULL }; + +static const u8 input41[] __initconst = { + 0x20, 0x5f, 0xc1, 0x83, 0x36, 0x02, 0x76, 0x96, + 0xf0, 0xbf, 0x8e, 0x0e, 0x1a, 0xd1, 0xc7, 0x88, + 0x18, 0xc7, 0x09, 0xc4, 0x15, 0xd9, 0x4f, 0x5e, + 0x1f, 0xb3, 0xb4, 0x6d, 0xcb, 0xa0, 0xd6, 0x8a, + 0x3b, 0x40, 0x8e, 0x80, 0xf1, 0xe8, 0x8f, 0x5f +}; +static const u8 output41[] __initconst = { + 0x0b, 0xd1, 0x49, 0x9a, 0x9d, 0xe8, 0x97, 0xb8, + 0xd1, 0xeb, 0x90, 0x62, 0x37, 0xd2, 0x99, 0x15, + 0x67, 0x6d, 0x27, 0x93, 0xce, 0x37, 0x65, 0xa2, + 0x94, 0x88, 0xd6, 0x17, 0xbc, 0x1c, 0x6e, 0xa2, + 0xcc, 0xfb, 0x81, 0x0e, 0x30, 0x60, 0x5a, 0x6f +}; +static const u8 key41[] __initconst = { + 0x36, 0x27, 0x57, 0x01, 0x21, 0x68, 0x97, 0xc7, + 0x00, 0x67, 0x7b, 0xe9, 0x0f, 0x55, 0x49, 0xbb, + 0x92, 0x18, 0x98, 0xf5, 0x5e, 0xbc, 0xe7, 0x5a, + 0x9d, 0x3d, 0xc7, 0xbd, 0x59, 0xec, 0x82, 0x8e +}; +enum { nonce41 = 0x5da05e4c8dfab464ULL }; + +static const u8 input42[] __initconst = { + 0xca, 0x30, 0xcd, 0x63, 0xf0, 0x2d, 0xf1, 0x03, + 0x4d, 0x0d, 0xf2, 0xf7, 0x6f, 0xae, 0xd6, 0x34, + 0xea, 0xf6, 0x13, 0xcf, 0x1c, 0xa0, 0xd0, 0xe8, + 0xa4, 0x78, 0x80, 0x3b, 0x1e, 0xa5, 0x32, 0x4c, + 0x73, 0x12, 0xd4, 0x6a, 0x94, 0xbc, 0xba, 0x80, + 0x5e +}; +static const u8 output42[] __initconst = { + 0xec, 0x3f, 0x18, 0x31, 0xc0, 0x7b, 0xb5, 0xe2, + 0xad, 0xf3, 0xec, 0xa0, 0x16, 0x9d, 0xef, 0xce, + 0x05, 0x65, 0x59, 0x9d, 0x5a, 0xca, 0x3e, 0x13, + 0xb9, 0x5d, 0x5d, 0xb5, 0xeb, 0xae, 0xc0, 0x87, + 0xbb, 0xfd, 0xe7, 0xe4, 0x89, 0x5b, 0xd2, 0x6c, + 0x56 +}; +static const u8 key42[] __initconst = { + 0x7c, 0x6b, 0x7e, 0x77, 0xcc, 0x8c, 0x1b, 0x03, + 0x8b, 0x2a, 0xb3, 0x7c, 0x5a, 0x73, 0xcc, 0xac, + 0xdd, 0x53, 0x54, 0x0c, 0x85, 0xed, 0xcd, 0x47, + 0x24, 0xc1, 0xb8, 0x9b, 0x2e, 0x41, 0x92, 0x36 +}; +enum { nonce42 = 0xe4d7348b09682c9cULL }; + +static const u8 input43[] __initconst = { + 0x52, 0xf2, 0x4b, 0x7c, 0xe5, 0x58, 0xe8, 0xd2, + 0xb7, 0xf3, 0xa1, 0x29, 0x68, 0xa2, 0x50, 0x50, + 0xae, 0x9c, 0x1b, 0xe2, 0x67, 0x77, 0xe2, 0xdb, + 0x85, 0x55, 0x7e, 0x84, 0x8a, 0x12, 0x3c, 0xb6, + 0x2e, 0xed, 0xd3, 0xec, 0x47, 0x68, 0xfa, 0x52, + 0x46, 0x9d +}; +static const u8 output43[] __initconst = { + 0x1b, 0xf0, 0x05, 0xe4, 0x1c, 0xd8, 0x74, 0x9a, + 0xf0, 0xee, 0x00, 0x54, 0xce, 0x02, 0x83, 0x15, + 0xfb, 0x23, 0x35, 0x78, 0xc3, 0xda, 0x98, 0xd8, + 0x9d, 0x1b, 0xb2, 0x51, 0x82, 0xb0, 0xff, 0xbe, + 0x05, 0xa9, 0xa4, 0x04, 0xba, 0xea, 0x4b, 0x73, + 0x47, 0x6e +}; +static const u8 key43[] __initconst = { + 0xeb, 0xec, 0x0e, 0xa1, 0x65, 0xe2, 0x99, 0x46, + 0xd8, 0x54, 0x8c, 0x4a, 0x93, 0xdf, 0x6d, 0xbf, + 0x93, 0x34, 0x94, 0x57, 0xc9, 0x12, 0x9d, 0x68, + 0x05, 0xc5, 0x05, 0xad, 0x5a, 0xc9, 0x2a, 0x3b +}; +enum { nonce43 = 0xe14f6a902b7827fULL }; + +static const u8 input44[] __initconst = { + 0x3e, 0x22, 0x3e, 0x8e, 0xcd, 0x18, 0xe2, 0xa3, + 0x8d, 0x8b, 0x38, 0xc3, 0x02, 0xa3, 0x31, 0x48, + 0xc6, 0x0e, 0xec, 0x99, 0x51, 0x11, 0x6d, 0x8b, + 0x32, 0x35, 0x3b, 0x08, 0x58, 0x76, 0x25, 0x30, + 0xe2, 0xfc, 0xa2, 0x46, 0x7d, 0x6e, 0x34, 0x87, + 0xac, 0x42, 0xbf +}; +static const u8 output44[] __initconst = { + 0x08, 0x92, 0x58, 0x02, 0x1a, 0xf4, 0x1f, 0x3d, + 0x38, 0x7b, 0x6b, 0xf6, 0x84, 0x07, 0xa3, 0x19, + 0x17, 0x2a, 0xed, 0x57, 0x1c, 0xf9, 0x55, 0x37, + 0x4e, 0xf4, 0x68, 0x68, 0x82, 0x02, 0x4f, 0xca, + 0x21, 0x00, 0xc6, 0x66, 0x79, 0x53, 0x19, 0xef, + 0x7f, 0xdd, 0x74 +}; +static const u8 key44[] __initconst = { + 0x73, 0xb6, 0x3e, 0xf4, 0x57, 0x52, 0xa6, 0x43, + 0x51, 0xd8, 0x25, 0x00, 0xdb, 0xb4, 0x52, 0x69, + 0xd6, 0x27, 0x49, 0xeb, 0x9b, 0xf1, 0x7b, 0xa0, + 0xd6, 0x7c, 0x9c, 0xd8, 0x95, 0x03, 0x69, 0x26 +}; +enum { nonce44 = 0xf5e6dc4f35ce24e5ULL }; + +static const u8 input45[] __initconst = { + 0x55, 0x76, 0xc0, 0xf1, 0x74, 0x03, 0x7a, 0x6d, + 0x14, 0xd8, 0x36, 0x2c, 0x9f, 0x9a, 0x59, 0x7a, + 0x2a, 0xf5, 0x77, 0x84, 0x70, 0x7c, 0x1d, 0x04, + 0x90, 0x45, 0xa4, 0xc1, 0x5e, 0xdd, 0x2e, 0x07, + 0x18, 0x34, 0xa6, 0x85, 0x56, 0x4f, 0x09, 0xaf, + 0x2f, 0x83, 0xe1, 0xc6 +}; +static const u8 output45[] __initconst = { + 0x22, 0x46, 0xe4, 0x0b, 0x3a, 0x55, 0xcc, 0x9b, + 0xf0, 0xc0, 0x53, 0xcd, 0x95, 0xc7, 0x57, 0x6c, + 0x77, 0x46, 0x41, 0x72, 0x07, 0xbf, 0xa8, 0xe5, + 0x68, 0x69, 0xd8, 0x1e, 0x45, 0xc1, 0xa2, 0x50, + 0xa5, 0xd1, 0x62, 0xc9, 0x5a, 0x7d, 0x08, 0x14, + 0xae, 0x44, 0x16, 0xb9 +}; +static const u8 key45[] __initconst = { + 0x41, 0xf3, 0x88, 0xb2, 0x51, 0x25, 0x47, 0x02, + 0x39, 0xe8, 0x15, 0x3a, 0x22, 0x78, 0x86, 0x0b, + 0xf9, 0x1e, 0x8d, 0x98, 0xb2, 0x22, 0x82, 0xac, + 0x42, 0x94, 0xde, 0x64, 0xf0, 0xfd, 0xb3, 0x6c +}; +enum { nonce45 = 0xf51a582daf4aa01aULL }; + +static const u8 input46[] __initconst = { + 0xf6, 0xff, 0x20, 0xf9, 0x26, 0x7e, 0x0f, 0xa8, + 0x6a, 0x45, 0x5a, 0x91, 0x73, 0xc4, 0x4c, 0x63, + 0xe5, 0x61, 0x59, 0xca, 0xec, 0xc0, 0x20, 0x35, + 0xbc, 0x9f, 0x58, 0x9c, 0x5e, 0xa1, 0x17, 0x46, + 0xcc, 0xab, 0x6e, 0xd0, 0x4f, 0x24, 0xeb, 0x05, + 0x4d, 0x40, 0x41, 0xe0, 0x9d +}; +static const u8 output46[] __initconst = { + 0x31, 0x6e, 0x63, 0x3f, 0x9c, 0xe6, 0xb1, 0xb7, + 0xef, 0x47, 0x46, 0xd7, 0xb1, 0x53, 0x42, 0x2f, + 0x2c, 0xc8, 0x01, 0xae, 0x8b, 0xec, 0x42, 0x2c, + 0x6b, 0x2c, 0x9c, 0xb2, 0xf0, 0x29, 0x06, 0xa5, + 0xcd, 0x7e, 0xc7, 0x3a, 0x38, 0x98, 0x8a, 0xde, + 0x03, 0x29, 0x14, 0x8f, 0xf9 +}; +static const u8 key46[] __initconst = { + 0xac, 0xa6, 0x44, 0x4a, 0x0d, 0x42, 0x10, 0xbc, + 0xd3, 0xc9, 0x8e, 0x9e, 0x71, 0xa3, 0x1c, 0x14, + 0x9d, 0x65, 0x0d, 0x49, 0x4d, 0x8c, 0xec, 0x46, + 0xe1, 0x41, 0xcd, 0xf5, 0xfc, 0x82, 0x75, 0x34 +}; +enum { nonce46 = 0x25f85182df84dec5ULL }; + +static const u8 input47[] __initconst = { + 0xa1, 0xd2, 0xf2, 0x52, 0x2f, 0x79, 0x50, 0xb2, + 0x42, 0x29, 0x5b, 0x44, 0x20, 0xf9, 0xbd, 0x85, + 0xb7, 0x65, 0x77, 0x86, 0xce, 0x3e, 0x1c, 0xe4, + 0x70, 0x80, 0xdd, 0x72, 0x07, 0x48, 0x0f, 0x84, + 0x0d, 0xfd, 0x97, 0xc0, 0xb7, 0x48, 0x9b, 0xb4, + 0xec, 0xff, 0x73, 0x14, 0x99, 0xe4 +}; +static const u8 output47[] __initconst = { + 0xe5, 0x3c, 0x78, 0x66, 0x31, 0x1e, 0xd6, 0xc4, + 0x9e, 0x71, 0xb3, 0xd7, 0xd5, 0xad, 0x84, 0xf2, + 0x78, 0x61, 0x77, 0xf8, 0x31, 0xf0, 0x13, 0xad, + 0x66, 0xf5, 0x31, 0x7d, 0xeb, 0xdf, 0xaf, 0xcb, + 0xac, 0x28, 0x6c, 0xc2, 0x9e, 0xe7, 0x78, 0xa2, + 0xa2, 0x58, 0xce, 0x84, 0x76, 0x70 +}; +static const u8 key47[] __initconst = { + 0x05, 0x7f, 0xc0, 0x7f, 0x37, 0x20, 0x71, 0x02, + 0x3a, 0xe7, 0x20, 0x5a, 0x0a, 0x8f, 0x79, 0x5a, + 0xfe, 0xbb, 0x43, 0x4d, 0x2f, 0xcb, 0xf6, 0x9e, + 0xa2, 0x97, 0x00, 0xad, 0x0d, 0x51, 0x7e, 0x17 +}; +enum { nonce47 = 0xae707c60f54de32bULL }; + +static const u8 input48[] __initconst = { + 0x80, 0x93, 0x77, 0x2e, 0x8d, 0xe8, 0xe6, 0xc1, + 0x27, 0xe6, 0xf2, 0x89, 0x5b, 0x33, 0x62, 0x18, + 0x80, 0x6e, 0x17, 0x22, 0x8e, 0x83, 0x31, 0x40, + 0x8f, 0xc9, 0x5c, 0x52, 0x6c, 0x0e, 0xa5, 0xe9, + 0x6c, 0x7f, 0xd4, 0x6a, 0x27, 0x56, 0x99, 0xce, + 0x8d, 0x37, 0x59, 0xaf, 0xc0, 0x0e, 0xe1 +}; +static const u8 output48[] __initconst = { + 0x02, 0xa4, 0x2e, 0x33, 0xb7, 0x7c, 0x2b, 0x9a, + 0x18, 0x5a, 0xba, 0x53, 0x38, 0xaf, 0x00, 0xeb, + 0xd8, 0x3d, 0x02, 0x77, 0x43, 0x45, 0x03, 0x91, + 0xe2, 0x5e, 0x4e, 0xeb, 0x50, 0xd5, 0x5b, 0xe0, + 0xf3, 0x33, 0xa7, 0xa2, 0xac, 0x07, 0x6f, 0xeb, + 0x3f, 0x6c, 0xcd, 0xf2, 0x6c, 0x61, 0x64 +}; +static const u8 key48[] __initconst = { + 0xf3, 0x79, 0xe7, 0xf8, 0x0e, 0x02, 0x05, 0x6b, + 0x83, 0x1a, 0xe7, 0x86, 0x6b, 0xe6, 0x8f, 0x3f, + 0xd3, 0xa3, 0xe4, 0x6e, 0x29, 0x06, 0xad, 0xbc, + 0xe8, 0x33, 0x56, 0x39, 0xdf, 0xb0, 0xe2, 0xfe +}; +enum { nonce48 = 0xd849b938c6569da0ULL }; + +static const u8 input49[] __initconst = { + 0x89, 0x3b, 0x88, 0x9e, 0x7b, 0x38, 0x16, 0x9f, + 0xa1, 0x28, 0xf6, 0xf5, 0x23, 0x74, 0x28, 0xb0, + 0xdf, 0x6c, 0x9e, 0x8a, 0x71, 0xaf, 0xed, 0x7a, + 0x39, 0x21, 0x57, 0x7d, 0x31, 0x6c, 0xee, 0x0d, + 0x11, 0x8d, 0x41, 0x9a, 0x5f, 0xb7, 0x27, 0x40, + 0x08, 0xad, 0xc6, 0xe0, 0x00, 0x43, 0x9e, 0xae +}; +static const u8 output49[] __initconst = { + 0x4d, 0xfd, 0xdb, 0x4c, 0x77, 0xc1, 0x05, 0x07, + 0x4d, 0x6d, 0x32, 0xcb, 0x2e, 0x0e, 0xff, 0x65, + 0xc9, 0x27, 0xeb, 0xa9, 0x46, 0x5b, 0xab, 0x06, + 0xe6, 0xb6, 0x5a, 0x1e, 0x00, 0xfb, 0xcf, 0xe4, + 0xb9, 0x71, 0x40, 0x10, 0xef, 0x12, 0x39, 0xf0, + 0xea, 0x40, 0xb8, 0x9a, 0xa2, 0x85, 0x38, 0x48 +}; +static const u8 key49[] __initconst = { + 0xe7, 0x10, 0x40, 0xd9, 0x66, 0xc0, 0xa8, 0x6d, + 0xa3, 0xcc, 0x8b, 0xdd, 0x93, 0xf2, 0x6e, 0xe0, + 0x90, 0x7f, 0xd0, 0xf4, 0x37, 0x0c, 0x8b, 0x9b, + 0x4c, 0x4d, 0xe6, 0xf2, 0x1f, 0xe9, 0x95, 0x24 +}; +enum { nonce49 = 0xf269817bdae01bc0ULL }; + +static const u8 input50[] __initconst = { + 0xda, 0x5b, 0x60, 0xcd, 0xed, 0x58, 0x8e, 0x7f, + 0xae, 0xdd, 0xc8, 0x2e, 0x16, 0x90, 0xea, 0x4b, + 0x0c, 0x74, 0x14, 0x35, 0xeb, 0xee, 0x2c, 0xff, + 0x46, 0x99, 0x97, 0x6e, 0xae, 0xa7, 0x8e, 0x6e, + 0x38, 0xfe, 0x63, 0xe7, 0x51, 0xd9, 0xaa, 0xce, + 0x7b, 0x1e, 0x7e, 0x5d, 0xc0, 0xe8, 0x10, 0x06, + 0x14 +}; +static const u8 output50[] __initconst = { + 0xe4, 0xe5, 0x86, 0x1b, 0x66, 0x19, 0xac, 0x49, + 0x1c, 0xbd, 0xee, 0x03, 0xaf, 0x11, 0xfc, 0x1f, + 0x6a, 0xd2, 0x50, 0x5c, 0xea, 0x2c, 0xa5, 0x75, + 0xfd, 0xb7, 0x0e, 0x80, 0x8f, 0xed, 0x3f, 0x31, + 0x47, 0xac, 0x67, 0x43, 0xb8, 0x2e, 0xb4, 0x81, + 0x6d, 0xe4, 0x1e, 0xb7, 0x8b, 0x0c, 0x53, 0xa9, + 0x26 +}; +static const u8 key50[] __initconst = { + 0xd7, 0xb2, 0x04, 0x76, 0x30, 0xcc, 0x38, 0x45, + 0xef, 0xdb, 0xc5, 0x86, 0x08, 0x61, 0xf0, 0xee, + 0x6d, 0xd8, 0x22, 0x04, 0x8c, 0xfb, 0xcb, 0x37, + 0xa6, 0xfb, 0x95, 0x22, 0xe1, 0x87, 0xb7, 0x6f +}; +enum { nonce50 = 0x3b44d09c45607d38ULL }; + +static const u8 input51[] __initconst = { + 0xa9, 0x41, 0x02, 0x4b, 0xd7, 0xd5, 0xd1, 0xf1, + 0x21, 0x55, 0xb2, 0x75, 0x6d, 0x77, 0x1b, 0x86, + 0xa9, 0xc8, 0x90, 0xfd, 0xed, 0x4a, 0x7b, 0x6c, + 0xb2, 0x5f, 0x9b, 0x5f, 0x16, 0xa1, 0x54, 0xdb, + 0xd6, 0x3f, 0x6a, 0x7f, 0x2e, 0x51, 0x9d, 0x49, + 0x5b, 0xa5, 0x0e, 0xf9, 0xfb, 0x2a, 0x38, 0xff, + 0x20, 0x8c +}; +static const u8 output51[] __initconst = { + 0x18, 0xf7, 0x88, 0xc1, 0x72, 0xfd, 0x90, 0x4b, + 0xa9, 0x2d, 0xdb, 0x47, 0xb0, 0xa5, 0xc4, 0x37, + 0x01, 0x95, 0xc4, 0xb1, 0xab, 0xc5, 0x5b, 0xcd, + 0xe1, 0x97, 0x78, 0x13, 0xde, 0x6a, 0xff, 0x36, + 0xce, 0xa4, 0x67, 0xc5, 0x4a, 0x45, 0x2b, 0xd9, + 0xff, 0x8f, 0x06, 0x7c, 0x63, 0xbb, 0x83, 0x17, + 0xb4, 0x6b +}; +static const u8 key51[] __initconst = { + 0x82, 0x1a, 0x79, 0xab, 0x9a, 0xb5, 0x49, 0x6a, + 0x30, 0x6b, 0x99, 0x19, 0x11, 0xc7, 0xa2, 0xf4, + 0xca, 0x55, 0xb9, 0xdd, 0xe7, 0x2f, 0xe7, 0xc1, + 0xdd, 0x27, 0xad, 0x80, 0xf2, 0x56, 0xad, 0xf3 +}; +enum { nonce51 = 0xe93aff94ca71a4a6ULL }; + +static const u8 input52[] __initconst = { + 0x89, 0xdd, 0xf3, 0xfa, 0xb6, 0xc1, 0xaa, 0x9a, + 0xc8, 0xad, 0x6b, 0x00, 0xa1, 0x65, 0xea, 0x14, + 0x55, 0x54, 0x31, 0x8f, 0xf0, 0x03, 0x84, 0x51, + 0x17, 0x1e, 0x0a, 0x93, 0x6e, 0x79, 0x96, 0xa3, + 0x2a, 0x85, 0x9c, 0x89, 0xf8, 0xd1, 0xe2, 0x15, + 0x95, 0x05, 0xf4, 0x43, 0x4d, 0x6b, 0xf0, 0x71, + 0x3b, 0x3e, 0xba +}; +static const u8 output52[] __initconst = { + 0x0c, 0x42, 0x6a, 0xb3, 0x66, 0x63, 0x5d, 0x2c, + 0x9f, 0x3d, 0xa6, 0x6e, 0xc7, 0x5f, 0x79, 0x2f, + 0x50, 0xe3, 0xd6, 0x07, 0x56, 0xa4, 0x2b, 0x2d, + 0x8d, 0x10, 0xc0, 0x6c, 0xa2, 0xfc, 0x97, 0xec, + 0x3f, 0x5c, 0x8d, 0x59, 0xbe, 0x84, 0xf1, 0x3e, + 0x38, 0x47, 0x4f, 0x75, 0x25, 0x66, 0x88, 0x14, + 0x03, 0xdd, 0xde +}; +static const u8 key52[] __initconst = { + 0x4f, 0xb0, 0x27, 0xb6, 0xdd, 0x24, 0x0c, 0xdb, + 0x6b, 0x71, 0x2e, 0xac, 0xfc, 0x3f, 0xa6, 0x48, + 0x5d, 0xd5, 0xff, 0x53, 0xb5, 0x62, 0xf1, 0xe0, + 0x93, 0xfe, 0x39, 0x4c, 0x9f, 0x03, 0x11, 0xa7 +}; +enum { nonce52 = 0xed8becec3bdf6f25ULL }; + +static const u8 input53[] __initconst = { + 0x68, 0xd1, 0xc7, 0x74, 0x44, 0x1c, 0x84, 0xde, + 0x27, 0x27, 0x35, 0xf0, 0x18, 0x0b, 0x57, 0xaa, + 0xd0, 0x1a, 0xd3, 0x3b, 0x5e, 0x5c, 0x62, 0x93, + 0xd7, 0x6b, 0x84, 0x3b, 0x71, 0x83, 0x77, 0x01, + 0x3e, 0x59, 0x45, 0xf4, 0x77, 0x6c, 0x6b, 0xcb, + 0x88, 0x45, 0x09, 0x1d, 0xc6, 0x45, 0x6e, 0xdc, + 0x6e, 0x51, 0xb8, 0x28 +}; +static const u8 output53[] __initconst = { + 0xc5, 0x90, 0x96, 0x78, 0x02, 0xf5, 0xc4, 0x3c, + 0xde, 0xd4, 0xd4, 0xc6, 0xa7, 0xad, 0x12, 0x47, + 0x45, 0xce, 0xcd, 0x8c, 0x35, 0xcc, 0xa6, 0x9e, + 0x5a, 0xc6, 0x60, 0xbb, 0xe3, 0xed, 0xec, 0x68, + 0x3f, 0x64, 0xf7, 0x06, 0x63, 0x9c, 0x8c, 0xc8, + 0x05, 0x3a, 0xad, 0x32, 0x79, 0x8b, 0x45, 0x96, + 0x93, 0x73, 0x4c, 0xe0 +}; +static const u8 key53[] __initconst = { + 0x42, 0x4b, 0x20, 0x81, 0x49, 0x50, 0xe9, 0xc2, + 0x43, 0x69, 0x36, 0xe7, 0x68, 0xae, 0xd5, 0x7e, + 0x42, 0x1a, 0x1b, 0xb4, 0x06, 0x4d, 0xa7, 0x17, + 0xb5, 0x31, 0xd6, 0x0c, 0xb0, 0x5c, 0x41, 0x0b +}; +enum { nonce53 = 0xf44ce1931fbda3d7ULL }; + +static const u8 input54[] __initconst = { + 0x7b, 0xf6, 0x8b, 0xae, 0xc0, 0xcb, 0x10, 0x8e, + 0xe8, 0xd8, 0x2e, 0x3b, 0x14, 0xba, 0xb4, 0xd2, + 0x58, 0x6b, 0x2c, 0xec, 0xc1, 0x81, 0x71, 0xb4, + 0xc6, 0xea, 0x08, 0xc5, 0xc9, 0x78, 0xdb, 0xa2, + 0xfa, 0x44, 0x50, 0x9b, 0xc8, 0x53, 0x8d, 0x45, + 0x42, 0xe7, 0x09, 0xc4, 0x29, 0xd8, 0x75, 0x02, + 0xbb, 0xb2, 0x78, 0xcf, 0xe7 +}; +static const u8 output54[] __initconst = { + 0xaf, 0x2c, 0x83, 0x26, 0x6e, 0x7f, 0xa6, 0xe9, + 0x03, 0x75, 0xfe, 0xfe, 0x87, 0x58, 0xcf, 0xb5, + 0xbc, 0x3c, 0x9d, 0xa1, 0x6e, 0x13, 0xf1, 0x0f, + 0x9e, 0xbc, 0xe0, 0x54, 0x24, 0x32, 0xce, 0x95, + 0xe6, 0xa5, 0x59, 0x3d, 0x24, 0x1d, 0x8f, 0xb1, + 0x74, 0x6c, 0x56, 0xe7, 0x96, 0xc1, 0x91, 0xc8, + 0x2d, 0x0e, 0xb7, 0x51, 0x10 +}; +static const u8 key54[] __initconst = { + 0x00, 0x68, 0x74, 0xdc, 0x30, 0x9e, 0xe3, 0x52, + 0xa9, 0xae, 0xb6, 0x7c, 0xa1, 0xdc, 0x12, 0x2d, + 0x98, 0x32, 0x7a, 0x77, 0xe1, 0xdd, 0xa3, 0x76, + 0x72, 0x34, 0x83, 0xd8, 0xb7, 0x69, 0xba, 0x77 +}; +enum { nonce54 = 0xbea57d79b798b63aULL }; + +static const u8 input55[] __initconst = { + 0xb5, 0xf4, 0x2f, 0xc1, 0x5e, 0x10, 0xa7, 0x4e, + 0x74, 0x3d, 0xa3, 0x96, 0xc0, 0x4d, 0x7b, 0x92, + 0x8f, 0xdb, 0x2d, 0x15, 0x52, 0x6a, 0x95, 0x5e, + 0x40, 0x81, 0x4f, 0x70, 0x73, 0xea, 0x84, 0x65, + 0x3d, 0x9a, 0x4e, 0x03, 0x95, 0xf8, 0x5d, 0x2f, + 0x07, 0x02, 0x13, 0x13, 0xdd, 0x82, 0xe6, 0x3b, + 0xe1, 0x5f, 0xb3, 0x37, 0x9b, 0x88 +}; +static const u8 output55[] __initconst = { + 0xc1, 0x88, 0xbd, 0x92, 0x77, 0xad, 0x7c, 0x5f, + 0xaf, 0xa8, 0x57, 0x0e, 0x40, 0x0a, 0xdc, 0x70, + 0xfb, 0xc6, 0x71, 0xfd, 0xc4, 0x74, 0x60, 0xcc, + 0xa0, 0x89, 0x8e, 0x99, 0xf0, 0x06, 0xa6, 0x7c, + 0x97, 0x42, 0x21, 0x81, 0x6a, 0x07, 0xe7, 0xb3, + 0xf7, 0xa5, 0x03, 0x71, 0x50, 0x05, 0x63, 0x17, + 0xa9, 0x46, 0x0b, 0xff, 0x30, 0x78 +}; +static const u8 key55[] __initconst = { + 0x19, 0x8f, 0xe7, 0xd7, 0x6b, 0x7f, 0x6f, 0x69, + 0x86, 0x91, 0x0f, 0xa7, 0x4a, 0x69, 0x8e, 0x34, + 0xf3, 0xdb, 0xde, 0xaf, 0xf2, 0x66, 0x1d, 0x64, + 0x97, 0x0c, 0xcf, 0xfa, 0x33, 0x84, 0xfd, 0x0c +}; +enum { nonce55 = 0x80aa3d3e2c51ef06ULL }; + +static const u8 input56[] __initconst = { + 0x6b, 0xe9, 0x73, 0x42, 0x27, 0x5e, 0x12, 0xcd, + 0xaa, 0x45, 0x12, 0x8b, 0xb3, 0xe6, 0x54, 0x33, + 0x31, 0x7d, 0xe2, 0x25, 0xc6, 0x86, 0x47, 0x67, + 0x86, 0x83, 0xe4, 0x46, 0xb5, 0x8f, 0x2c, 0xbb, + 0xe4, 0xb8, 0x9f, 0xa2, 0xa4, 0xe8, 0x75, 0x96, + 0x92, 0x51, 0x51, 0xac, 0x8e, 0x2e, 0x6f, 0xfc, + 0xbd, 0x0d, 0xa3, 0x9f, 0x16, 0x55, 0x3e +}; +static const u8 output56[] __initconst = { + 0x42, 0x99, 0x73, 0x6c, 0xd9, 0x4b, 0x16, 0xe5, + 0x18, 0x63, 0x1a, 0xd9, 0x0e, 0xf1, 0x15, 0x2e, + 0x0f, 0x4b, 0xe4, 0x5f, 0xa0, 0x4d, 0xde, 0x9f, + 0xa7, 0x18, 0xc1, 0x0c, 0x0b, 0xae, 0x55, 0xe4, + 0x89, 0x18, 0xa4, 0x78, 0x9d, 0x25, 0x0d, 0xd5, + 0x94, 0x0f, 0xf9, 0x78, 0xa3, 0xa6, 0xe9, 0x9e, + 0x2c, 0x73, 0xf0, 0xf7, 0x35, 0xf3, 0x2b +}; +static const u8 key56[] __initconst = { + 0x7d, 0x12, 0xad, 0x51, 0xd5, 0x6f, 0x8f, 0x96, + 0xc0, 0x5d, 0x9a, 0xd1, 0x7e, 0x20, 0x98, 0x0e, + 0x3c, 0x0a, 0x67, 0x6b, 0x1b, 0x88, 0x69, 0xd4, + 0x07, 0x8c, 0xaf, 0x0f, 0x3a, 0x28, 0xe4, 0x5d +}; +enum { nonce56 = 0x70f4c372fb8b5984ULL }; + +static const u8 input57[] __initconst = { + 0x28, 0xa3, 0x06, 0xe8, 0xe7, 0x08, 0xb9, 0xef, + 0x0d, 0x63, 0x15, 0x99, 0xb2, 0x78, 0x7e, 0xaf, + 0x30, 0x50, 0xcf, 0xea, 0xc9, 0x91, 0x41, 0x2f, + 0x3b, 0x38, 0x70, 0xc4, 0x87, 0xb0, 0x3a, 0xee, + 0x4a, 0xea, 0xe3, 0x83, 0x68, 0x8b, 0xcf, 0xda, + 0x04, 0xa5, 0xbd, 0xb2, 0xde, 0x3c, 0x55, 0x13, + 0xfe, 0x96, 0xad, 0xc1, 0x61, 0x1b, 0x98, 0xde +}; +static const u8 output57[] __initconst = { + 0xf4, 0x44, 0xe9, 0xd2, 0x6d, 0xc2, 0x5a, 0xe9, + 0xfd, 0x7e, 0x41, 0x54, 0x3f, 0xf4, 0x12, 0xd8, + 0x55, 0x0d, 0x12, 0x9b, 0xd5, 0x2e, 0x95, 0xe5, + 0x77, 0x42, 0x3f, 0x2c, 0xfb, 0x28, 0x9d, 0x72, + 0x6d, 0x89, 0x82, 0x27, 0x64, 0x6f, 0x0d, 0x57, + 0xa1, 0x25, 0xa3, 0x6b, 0x88, 0x9a, 0xac, 0x0c, + 0x76, 0x19, 0x90, 0xe2, 0x50, 0x5a, 0xf8, 0x12 +}; +static const u8 key57[] __initconst = { + 0x08, 0x26, 0xb8, 0xac, 0xf3, 0xa5, 0xc6, 0xa3, + 0x7f, 0x09, 0x87, 0xf5, 0x6c, 0x5a, 0x85, 0x6c, + 0x3d, 0xbd, 0xde, 0xd5, 0x87, 0xa3, 0x98, 0x7a, + 0xaa, 0x40, 0x3e, 0xf7, 0xff, 0x44, 0x5d, 0xee +}; +enum { nonce57 = 0xc03a6130bf06b089ULL }; + +static const u8 input58[] __initconst = { + 0x82, 0xa5, 0x38, 0x6f, 0xaa, 0xb4, 0xaf, 0xb2, + 0x42, 0x01, 0xa8, 0x39, 0x3f, 0x15, 0x51, 0xa8, + 0x11, 0x1b, 0x93, 0xca, 0x9c, 0xa0, 0x57, 0x68, + 0x8f, 0xdb, 0x68, 0x53, 0x51, 0x6d, 0x13, 0x22, + 0x12, 0x9b, 0xbd, 0x33, 0xa8, 0x52, 0x40, 0x57, + 0x80, 0x9b, 0x98, 0xef, 0x56, 0x70, 0x11, 0xfa, + 0x36, 0x69, 0x7d, 0x15, 0x48, 0xf9, 0x3b, 0xeb, + 0x42 +}; +static const u8 output58[] __initconst = { + 0xff, 0x3a, 0x74, 0xc3, 0x3e, 0x44, 0x64, 0x4d, + 0x0e, 0x5f, 0x9d, 0xa8, 0xdb, 0xbe, 0x12, 0xef, + 0xba, 0x56, 0x65, 0x50, 0x76, 0xaf, 0xa4, 0x4e, + 0x01, 0xc1, 0xd3, 0x31, 0x14, 0xe2, 0xbe, 0x7b, + 0xa5, 0x67, 0xb4, 0xe3, 0x68, 0x40, 0x9c, 0xb0, + 0xb1, 0x78, 0xef, 0x49, 0x03, 0x0f, 0x2d, 0x56, + 0xb4, 0x37, 0xdb, 0xbc, 0x2d, 0x68, 0x1c, 0x3c, + 0xf1 +}; +static const u8 key58[] __initconst = { + 0x7e, 0xf1, 0x7c, 0x20, 0x65, 0xed, 0xcd, 0xd7, + 0x57, 0xe8, 0xdb, 0x90, 0x87, 0xdb, 0x5f, 0x63, + 0x3d, 0xdd, 0xb8, 0x2b, 0x75, 0x8e, 0x04, 0xb5, + 0xf4, 0x12, 0x79, 0xa9, 0x4d, 0x42, 0x16, 0x7f +}; +enum { nonce58 = 0x92838183f80d2f7fULL }; + +static const u8 input59[] __initconst = { + 0x37, 0xf1, 0x9d, 0xdd, 0xd7, 0x08, 0x9f, 0x13, + 0xc5, 0x21, 0x82, 0x75, 0x08, 0x9e, 0x25, 0x16, + 0xb1, 0xd1, 0x71, 0x42, 0x28, 0x63, 0xac, 0x47, + 0x71, 0x54, 0xb1, 0xfc, 0x39, 0xf0, 0x61, 0x4f, + 0x7c, 0x6d, 0x4f, 0xc8, 0x33, 0xef, 0x7e, 0xc8, + 0xc0, 0x97, 0xfc, 0x1a, 0x61, 0xb4, 0x87, 0x6f, + 0xdd, 0x5a, 0x15, 0x7b, 0x1b, 0x95, 0x50, 0x94, + 0x1d, 0xba +}; +static const u8 output59[] __initconst = { + 0x73, 0x67, 0xc5, 0x07, 0xbb, 0x57, 0x79, 0xd5, + 0xc9, 0x04, 0xdd, 0x88, 0xf3, 0x86, 0xe5, 0x70, + 0x49, 0x31, 0xe0, 0xcc, 0x3b, 0x1d, 0xdf, 0xb0, + 0xaf, 0xf4, 0x2d, 0xe0, 0x06, 0x10, 0x91, 0x8d, + 0x1c, 0xcf, 0x31, 0x0b, 0xf6, 0x73, 0xda, 0x1c, + 0xf0, 0x17, 0x52, 0x9e, 0x20, 0x2e, 0x9f, 0x8c, + 0xb3, 0x59, 0xce, 0xd4, 0xd3, 0xc1, 0x81, 0xe9, + 0x11, 0x36 +}; +static const u8 key59[] __initconst = { + 0xbd, 0x07, 0xd0, 0x53, 0x2c, 0xb3, 0xcc, 0x3f, + 0xc4, 0x95, 0xfd, 0xe7, 0x81, 0xb3, 0x29, 0x99, + 0x05, 0x45, 0xd6, 0x95, 0x25, 0x0b, 0x72, 0xd3, + 0xcd, 0xbb, 0x73, 0xf8, 0xfa, 0xc0, 0x9b, 0x7a +}; +enum { nonce59 = 0x4a0db819b0d519e2ULL }; + +static const u8 input60[] __initconst = { + 0x58, 0x4e, 0xdf, 0x94, 0x3c, 0x76, 0x0a, 0x79, + 0x47, 0xf1, 0xbe, 0x88, 0xd3, 0xba, 0x94, 0xd8, + 0xe2, 0x8f, 0xe3, 0x2f, 0x2f, 0x74, 0x82, 0x55, + 0xc3, 0xda, 0xe2, 0x4e, 0x2c, 0x8c, 0x45, 0x1d, + 0x72, 0x8f, 0x54, 0x41, 0xb5, 0xb7, 0x69, 0xe4, + 0xdc, 0xd2, 0x36, 0x21, 0x5c, 0x28, 0x52, 0xf7, + 0x98, 0x8e, 0x72, 0xa7, 0x6d, 0x57, 0xed, 0xdc, + 0x3c, 0xe6, 0x6a +}; +static const u8 output60[] __initconst = { + 0xda, 0xaf, 0xb5, 0xe3, 0x30, 0x65, 0x5c, 0xb1, + 0x48, 0x08, 0x43, 0x7b, 0x9e, 0xd2, 0x6a, 0x62, + 0x56, 0x7c, 0xad, 0xd9, 0xe5, 0xf6, 0x09, 0x71, + 0xcd, 0xe6, 0x05, 0x6b, 0x3f, 0x44, 0x3a, 0x5c, + 0xf6, 0xf8, 0xd7, 0xce, 0x7d, 0xd1, 0xe0, 0x4f, + 0x88, 0x15, 0x04, 0xd8, 0x20, 0xf0, 0x3e, 0xef, + 0xae, 0xa6, 0x27, 0xa3, 0x0e, 0xfc, 0x18, 0x90, + 0x33, 0xcd, 0xd3 +}; +static const u8 key60[] __initconst = { + 0xbf, 0xfd, 0x25, 0xb5, 0xb2, 0xfc, 0x78, 0x0c, + 0x8e, 0xb9, 0x57, 0x2f, 0x26, 0x4a, 0x7e, 0x71, + 0xcc, 0xf2, 0xe0, 0xfd, 0x24, 0x11, 0x20, 0x23, + 0x57, 0x00, 0xff, 0x80, 0x11, 0x0c, 0x1e, 0xff +}; +enum { nonce60 = 0xf18df56fdb7954adULL }; + +static const u8 input61[] __initconst = { + 0xb0, 0xf3, 0x06, 0xbc, 0x22, 0xae, 0x49, 0x40, + 0xae, 0xff, 0x1b, 0x31, 0xa7, 0x98, 0xab, 0x1d, + 0xe7, 0x40, 0x23, 0x18, 0x4f, 0xab, 0x8e, 0x93, + 0x82, 0xf4, 0x56, 0x61, 0xfd, 0x2b, 0xcf, 0xa7, + 0xc4, 0xb4, 0x0a, 0xf4, 0xcb, 0xc7, 0x8c, 0x40, + 0x57, 0xac, 0x0b, 0x3e, 0x2a, 0x0a, 0x67, 0x83, + 0x50, 0xbf, 0xec, 0xb0, 0xc7, 0xf1, 0x32, 0x26, + 0x98, 0x80, 0x33, 0xb4 +}; +static const u8 output61[] __initconst = { + 0x9d, 0x23, 0x0e, 0xff, 0xcc, 0x7c, 0xd5, 0xcf, + 0x1a, 0xb8, 0x59, 0x1e, 0x92, 0xfd, 0x7f, 0xca, + 0xca, 0x3c, 0x18, 0x81, 0xde, 0xfa, 0x59, 0xc8, + 0x6f, 0x9c, 0x24, 0x3f, 0x3a, 0xe6, 0x0b, 0xb4, + 0x34, 0x48, 0x69, 0xfc, 0xb6, 0xea, 0xb2, 0xde, + 0x9f, 0xfd, 0x92, 0x36, 0x18, 0x98, 0x99, 0xaa, + 0x65, 0xe2, 0xea, 0xf4, 0xb1, 0x47, 0x8e, 0xb0, + 0xe7, 0xd4, 0x7a, 0x2c +}; +static const u8 key61[] __initconst = { + 0xd7, 0xfd, 0x9b, 0xbd, 0x8f, 0x65, 0x0d, 0x00, + 0xca, 0xa1, 0x6c, 0x85, 0x85, 0xa4, 0x6d, 0xf1, + 0xb1, 0x68, 0x0c, 0x8b, 0x5d, 0x37, 0x72, 0xd0, + 0xd8, 0xd2, 0x25, 0xab, 0x9f, 0x7b, 0x7d, 0x95 +}; +enum { nonce61 = 0xd82caf72a9c4864fULL }; + +static const u8 input62[] __initconst = { + 0x10, 0x77, 0xf3, 0x2f, 0xc2, 0x50, 0xd6, 0x0c, + 0xba, 0xa8, 0x8d, 0xce, 0x0d, 0x58, 0x9e, 0x87, + 0xb1, 0x59, 0x66, 0x0a, 0x4a, 0xb3, 0xd8, 0xca, + 0x0a, 0x6b, 0xf8, 0xc6, 0x2b, 0x3f, 0x8e, 0x09, + 0xe0, 0x0a, 0x15, 0x85, 0xfe, 0xaa, 0xc6, 0xbd, + 0x30, 0xef, 0xe4, 0x10, 0x78, 0x03, 0xc1, 0xc7, + 0x8a, 0xd9, 0xde, 0x0b, 0x51, 0x07, 0xc4, 0x7b, + 0xe2, 0x2e, 0x36, 0x3a, 0xc2 +}; +static const u8 output62[] __initconst = { + 0xa0, 0x0c, 0xfc, 0xc1, 0xf6, 0xaf, 0xc2, 0xb8, + 0x5c, 0xef, 0x6e, 0xf3, 0xce, 0x15, 0x48, 0x05, + 0xb5, 0x78, 0x49, 0x51, 0x1f, 0x9d, 0xf4, 0xbf, + 0x2f, 0x53, 0xa2, 0xd1, 0x15, 0x20, 0x82, 0x6b, + 0xd2, 0x22, 0x6c, 0x4e, 0x14, 0x87, 0xe3, 0xd7, + 0x49, 0x45, 0x84, 0xdb, 0x5f, 0x68, 0x60, 0xc4, + 0xb3, 0xe6, 0x3f, 0xd1, 0xfc, 0xa5, 0x73, 0xf3, + 0xfc, 0xbb, 0xbe, 0xc8, 0x9d +}; +static const u8 key62[] __initconst = { + 0x6e, 0xc9, 0xaf, 0xce, 0x35, 0xb9, 0x86, 0xd1, + 0xce, 0x5f, 0xd9, 0xbb, 0xd5, 0x1f, 0x7c, 0xcd, + 0xfe, 0x19, 0xaa, 0x3d, 0xea, 0x64, 0xc1, 0x28, + 0x40, 0xba, 0xa1, 0x28, 0xcd, 0x40, 0xb6, 0xf2 +}; +enum { nonce62 = 0xa1c0c265f900cde8ULL }; + +static const u8 input63[] __initconst = { + 0x7a, 0x70, 0x21, 0x2c, 0xef, 0xa6, 0x36, 0xd4, + 0xe0, 0xab, 0x8c, 0x25, 0x73, 0x34, 0xc8, 0x94, + 0x6c, 0x81, 0xcb, 0x19, 0x8d, 0x5a, 0x49, 0xaa, + 0x6f, 0xba, 0x83, 0x72, 0x02, 0x5e, 0xf5, 0x89, + 0xce, 0x79, 0x7e, 0x13, 0x3d, 0x5b, 0x98, 0x60, + 0x5d, 0xd9, 0xfb, 0x15, 0x93, 0x4c, 0xf3, 0x51, + 0x49, 0x55, 0xd1, 0x58, 0xdd, 0x7e, 0x6d, 0xfe, + 0xdd, 0x84, 0x23, 0x05, 0xba, 0xe9 +}; +static const u8 output63[] __initconst = { + 0x20, 0xb3, 0x5c, 0x03, 0x03, 0x78, 0x17, 0xfc, + 0x3b, 0x35, 0x30, 0x9a, 0x00, 0x18, 0xf5, 0xc5, + 0x06, 0x53, 0xf5, 0x04, 0x24, 0x9d, 0xd1, 0xb2, + 0xac, 0x5a, 0xb6, 0x2a, 0xa5, 0xda, 0x50, 0x00, + 0xec, 0xff, 0xa0, 0x7a, 0x14, 0x7b, 0xe4, 0x6b, + 0x63, 0xe8, 0x66, 0x86, 0x34, 0xfd, 0x74, 0x44, + 0xa2, 0x50, 0x97, 0x0d, 0xdc, 0xc3, 0x84, 0xf8, + 0x71, 0x02, 0x31, 0x95, 0xed, 0x54 +}; +static const u8 key63[] __initconst = { + 0x7d, 0x64, 0xb4, 0x12, 0x81, 0xe4, 0xe6, 0x8f, + 0xcc, 0xe7, 0xd1, 0x1f, 0x70, 0x20, 0xfd, 0xb8, + 0x3a, 0x7d, 0xa6, 0x53, 0x65, 0x30, 0x5d, 0xe3, + 0x1a, 0x44, 0xbe, 0x62, 0xed, 0x90, 0xc4, 0xd1 +}; +enum { nonce63 = 0xe8e849596c942276ULL }; + +static const u8 input64[] __initconst = { + 0x84, 0xf8, 0xda, 0x87, 0x23, 0x39, 0x60, 0xcf, + 0xc5, 0x50, 0x7e, 0xc5, 0x47, 0x29, 0x7c, 0x05, + 0xc2, 0xb4, 0xf4, 0xb2, 0xec, 0x5d, 0x48, 0x36, + 0xbf, 0xfc, 0x06, 0x8c, 0xf2, 0x0e, 0x88, 0xe7, + 0xc9, 0xc5, 0xa4, 0xa2, 0x83, 0x20, 0xa1, 0x6f, + 0x37, 0xe5, 0x2d, 0xa1, 0x72, 0xa1, 0x19, 0xef, + 0x05, 0x42, 0x08, 0xf2, 0x57, 0x47, 0x31, 0x1e, + 0x17, 0x76, 0x13, 0xd3, 0xcc, 0x75, 0x2c +}; +static const u8 output64[] __initconst = { + 0xcb, 0xec, 0x90, 0x88, 0xeb, 0x31, 0x69, 0x20, + 0xa6, 0xdc, 0xff, 0x76, 0x98, 0xb0, 0x24, 0x49, + 0x7b, 0x20, 0xd9, 0xd1, 0x1b, 0xe3, 0x61, 0xdc, + 0xcf, 0x51, 0xf6, 0x70, 0x72, 0x33, 0x28, 0x94, + 0xac, 0x73, 0x18, 0xcf, 0x93, 0xfd, 0xca, 0x08, + 0x0d, 0xa2, 0xb9, 0x57, 0x1e, 0x51, 0xb6, 0x07, + 0x5c, 0xc1, 0x13, 0x64, 0x1d, 0x18, 0x6f, 0xe6, + 0x0b, 0xb7, 0x14, 0x03, 0x43, 0xb6, 0xaf +}; +static const u8 key64[] __initconst = { + 0xbf, 0x82, 0x65, 0xe4, 0x50, 0xf9, 0x5e, 0xea, + 0x28, 0x91, 0xd1, 0xd2, 0x17, 0x7c, 0x13, 0x7e, + 0xf5, 0xd5, 0x6b, 0x06, 0x1c, 0x20, 0xc2, 0x82, + 0xa1, 0x7a, 0xa2, 0x14, 0xa1, 0xb0, 0x54, 0x58 +}; +enum { nonce64 = 0xe57c5095aa5723c9ULL }; + +static const u8 input65[] __initconst = { + 0x1c, 0xfb, 0xd3, 0x3f, 0x85, 0xd7, 0xba, 0x7b, + 0xae, 0xb1, 0xa5, 0xd2, 0xe5, 0x40, 0xce, 0x4d, + 0x3e, 0xab, 0x17, 0x9d, 0x7d, 0x9f, 0x03, 0x98, + 0x3f, 0x9f, 0xc8, 0xdd, 0x36, 0x17, 0x43, 0x5c, + 0x34, 0xd1, 0x23, 0xe0, 0x77, 0xbf, 0x35, 0x5d, + 0x8f, 0xb1, 0xcb, 0x82, 0xbb, 0x39, 0x69, 0xd8, + 0x90, 0x45, 0x37, 0xfd, 0x98, 0x25, 0xf7, 0x5b, + 0xce, 0x06, 0x43, 0xba, 0x61, 0xa8, 0x47, 0xb9 +}; +static const u8 output65[] __initconst = { + 0x73, 0xa5, 0x68, 0xab, 0x8b, 0xa5, 0xc3, 0x7e, + 0x74, 0xf8, 0x9d, 0xf5, 0x93, 0x6e, 0xf2, 0x71, + 0x6d, 0xde, 0x82, 0xc5, 0x40, 0xa0, 0x46, 0xb3, + 0x9a, 0x78, 0xa8, 0xf7, 0xdf, 0xb1, 0xc3, 0xdd, + 0x8d, 0x90, 0x00, 0x68, 0x21, 0x48, 0xe8, 0xba, + 0x56, 0x9f, 0x8f, 0xe7, 0xa4, 0x4d, 0x36, 0x55, + 0xd0, 0x34, 0x99, 0xa6, 0x1c, 0x4c, 0xc1, 0xe2, + 0x65, 0x98, 0x14, 0x8e, 0x6a, 0x05, 0xb1, 0x2b +}; +static const u8 key65[] __initconst = { + 0xbd, 0x5c, 0x8a, 0xb0, 0x11, 0x29, 0xf3, 0x00, + 0x7a, 0x78, 0x32, 0x63, 0x34, 0x00, 0xe6, 0x7d, + 0x30, 0x54, 0xde, 0x37, 0xda, 0xc2, 0xc4, 0x3d, + 0x92, 0x6b, 0x4c, 0xc2, 0x92, 0xe9, 0x9e, 0x2a +}; +enum { nonce65 = 0xf654a3031de746f2ULL }; + +static const u8 input66[] __initconst = { + 0x4b, 0x27, 0x30, 0x8f, 0x28, 0xd8, 0x60, 0x46, + 0x39, 0x06, 0x49, 0xea, 0x1b, 0x71, 0x26, 0xe0, + 0x99, 0x2b, 0xd4, 0x8f, 0x64, 0x64, 0xcd, 0xac, + 0x1d, 0x78, 0x88, 0x90, 0xe1, 0x5c, 0x24, 0x4b, + 0xdc, 0x2d, 0xb7, 0xee, 0x3a, 0xe6, 0x86, 0x2c, + 0x21, 0xe4, 0x2b, 0xfc, 0xe8, 0x19, 0xca, 0x65, + 0xe7, 0xdd, 0x6f, 0x52, 0xb3, 0x11, 0xe1, 0xe2, + 0xbf, 0xe8, 0x70, 0xe3, 0x0d, 0x45, 0xb8, 0xa5, + 0x20, 0xb7, 0xb5, 0xaf, 0xff, 0x08, 0xcf, 0x23, + 0x65, 0xdf, 0x8d, 0xc3, 0x31, 0xf3, 0x1e, 0x6a, + 0x58, 0x8d, 0xcc, 0x45, 0x16, 0x86, 0x1f, 0x31, + 0x5c, 0x27, 0xcd, 0xc8, 0x6b, 0x19, 0x1e, 0xec, + 0x44, 0x75, 0x63, 0x97, 0xfd, 0x79, 0xf6, 0x62, + 0xc5, 0xba, 0x17, 0xc7, 0xab, 0x8f, 0xbb, 0xed, + 0x85, 0x2a, 0x98, 0x79, 0x21, 0xec, 0x6e, 0x4d, + 0xdc, 0xfa, 0x72, 0x52, 0xba, 0xc8, 0x4c +}; +static const u8 output66[] __initconst = { + 0x76, 0x5b, 0x2c, 0xa7, 0x62, 0xb9, 0x08, 0x4a, + 0xc6, 0x4a, 0x92, 0xc3, 0xbb, 0x10, 0xb3, 0xee, + 0xff, 0xb9, 0x07, 0xc7, 0x27, 0xcb, 0x1e, 0xcf, + 0x58, 0x6f, 0xa1, 0x64, 0xe8, 0xf1, 0x4e, 0xe1, + 0xef, 0x18, 0x96, 0xab, 0x97, 0x28, 0xd1, 0x7c, + 0x71, 0x6c, 0xd1, 0xe2, 0xfa, 0xd9, 0x75, 0xcb, + 0xeb, 0xea, 0x0c, 0x86, 0x82, 0xd8, 0xf4, 0xcc, + 0xea, 0xa3, 0x00, 0xfa, 0x82, 0xd2, 0xcd, 0xcb, + 0xdb, 0x63, 0x28, 0xe2, 0x82, 0xe9, 0x01, 0xed, + 0x31, 0xe6, 0x71, 0x45, 0x08, 0x89, 0x8a, 0x23, + 0xa8, 0xb5, 0xc2, 0xe2, 0x9f, 0xe9, 0xb8, 0x9a, + 0xc4, 0x79, 0x6d, 0x71, 0x52, 0x61, 0x74, 0x6c, + 0x1b, 0xd7, 0x65, 0x6d, 0x03, 0xc4, 0x1a, 0xc0, + 0x50, 0xba, 0xd6, 0xc9, 0x43, 0x50, 0xbe, 0x09, + 0x09, 0x8a, 0xdb, 0xaa, 0x76, 0x4e, 0x3b, 0x61, + 0x3c, 0x7c, 0x44, 0xe7, 0xdb, 0x10, 0xa7 +}; +static const u8 key66[] __initconst = { + 0x88, 0xdf, 0xca, 0x68, 0xaf, 0x4f, 0xb3, 0xfd, + 0x6e, 0xa7, 0x95, 0x35, 0x8a, 0xe8, 0x37, 0xe8, + 0xc8, 0x55, 0xa2, 0x2a, 0x6d, 0x77, 0xf8, 0x93, + 0x7a, 0x41, 0xf3, 0x7b, 0x95, 0xdf, 0x89, 0xf5 +}; +enum { nonce66 = 0x1024b4fdd415cf82ULL }; + +static const u8 input67[] __initconst = { + 0xd4, 0x2e, 0xfa, 0x92, 0xe9, 0x29, 0x68, 0xb7, + 0x54, 0x2c, 0xf7, 0xa4, 0x2d, 0xb7, 0x50, 0xb5, + 0xc5, 0xb2, 0x9d, 0x17, 0x5e, 0x0a, 0xca, 0x37, + 0xbf, 0x60, 0xae, 0xd2, 0x98, 0xe9, 0xfa, 0x59, + 0x67, 0x62, 0xe6, 0x43, 0x0c, 0x77, 0x80, 0x82, + 0x33, 0x61, 0xa3, 0xff, 0xc1, 0xa0, 0x8f, 0x56, + 0xbc, 0xec, 0x65, 0x43, 0x88, 0xa5, 0xff, 0x51, + 0x64, 0x30, 0xee, 0x34, 0xb7, 0x5c, 0x28, 0x68, + 0xc3, 0x52, 0xd2, 0xac, 0x78, 0x2a, 0xa6, 0x10, + 0xb8, 0xb2, 0x4c, 0x80, 0x4f, 0x99, 0xb2, 0x36, + 0x94, 0x8f, 0x66, 0xcb, 0xa1, 0x91, 0xed, 0x06, + 0x42, 0x6d, 0xc1, 0xae, 0x55, 0x93, 0xdd, 0x93, + 0x9e, 0x88, 0x34, 0x7f, 0x98, 0xeb, 0xbe, 0x61, + 0xf9, 0xa9, 0x0f, 0xd9, 0xc4, 0x87, 0xd5, 0xef, + 0xcc, 0x71, 0x8c, 0x0e, 0xce, 0xad, 0x02, 0xcf, + 0xa2, 0x61, 0xdf, 0xb1, 0xfe, 0x3b, 0xdc, 0xc0, + 0x58, 0xb5, 0x71, 0xa1, 0x83, 0xc9, 0xb4, 0xaf, + 0x9d, 0x54, 0x12, 0xcd, 0xea, 0x06, 0xd6, 0x4e, + 0xe5, 0x27, 0x0c, 0xc3, 0xbb, 0xa8, 0x0a, 0x81, + 0x75, 0xc3, 0xc9, 0xd4, 0x35, 0x3e, 0x53, 0x9f, + 0xaa, 0x20, 0xc0, 0x68, 0x39, 0x2c, 0x96, 0x39, + 0x53, 0x81, 0xda, 0x07, 0x0f, 0x44, 0xa5, 0x47, + 0x0e, 0xb3, 0x87, 0x0d, 0x1b, 0xc1, 0xe5, 0x41, + 0x35, 0x12, 0x58, 0x96, 0x69, 0x8a, 0x1a, 0xa3, + 0x9d, 0x3d, 0xd4, 0xb1, 0x8e, 0x1f, 0x96, 0x87, + 0xda, 0xd3, 0x19, 0xe2, 0xb1, 0x3a, 0x19, 0x74, + 0xa0, 0x00, 0x9f, 0x4d, 0xbc, 0xcb, 0x0c, 0xe9, + 0xec, 0x10, 0xdf, 0x2a, 0x88, 0xdc, 0x30, 0x51, + 0x46, 0x56, 0x53, 0x98, 0x6a, 0x26, 0x14, 0x05, + 0x54, 0x81, 0x55, 0x0b, 0x3c, 0x85, 0xdd, 0x33, + 0x81, 0x11, 0x29, 0x82, 0x46, 0x35, 0xe1, 0xdb, + 0x59, 0x7b +}; +static const u8 output67[] __initconst = { + 0x64, 0x6c, 0xda, 0x7f, 0xd4, 0xa9, 0x2a, 0x5e, + 0x22, 0xae, 0x8d, 0x67, 0xdb, 0xee, 0xfd, 0xd0, + 0x44, 0x80, 0x17, 0xb2, 0xe3, 0x87, 0xad, 0x57, + 0x15, 0xcb, 0x88, 0x64, 0xc0, 0xf1, 0x49, 0x3d, + 0xfa, 0xbe, 0xa8, 0x9f, 0x12, 0xc3, 0x57, 0x56, + 0x70, 0xa5, 0xc5, 0x6b, 0xf1, 0xab, 0xd5, 0xde, + 0x77, 0x92, 0x6a, 0x56, 0x03, 0xf5, 0x21, 0x0d, + 0xb6, 0xc4, 0xcc, 0x62, 0x44, 0x3f, 0xb1, 0xc1, + 0x61, 0x41, 0x90, 0xb2, 0xd5, 0xb8, 0xf3, 0x57, + 0xfb, 0xc2, 0x6b, 0x25, 0x58, 0xc8, 0x45, 0x20, + 0x72, 0x29, 0x6f, 0x9d, 0xb5, 0x81, 0x4d, 0x2b, + 0xb2, 0x89, 0x9e, 0x91, 0x53, 0x97, 0x1c, 0xd9, + 0x3d, 0x79, 0xdc, 0x14, 0xae, 0x01, 0x73, 0x75, + 0xf0, 0xca, 0xd5, 0xab, 0x62, 0x5c, 0x7a, 0x7d, + 0x3f, 0xfe, 0x22, 0x7d, 0xee, 0xe2, 0xcb, 0x76, + 0x55, 0xec, 0x06, 0xdd, 0x41, 0x47, 0x18, 0x62, + 0x1d, 0x57, 0xd0, 0xd6, 0xb6, 0x0f, 0x4b, 0xfc, + 0x79, 0x19, 0xf4, 0xd6, 0x37, 0x86, 0x18, 0x1f, + 0x98, 0x0d, 0x9e, 0x15, 0x2d, 0xb6, 0x9a, 0x8a, + 0x8c, 0x80, 0x22, 0x2f, 0x82, 0xc4, 0xc7, 0x36, + 0xfa, 0xfa, 0x07, 0xbd, 0xc2, 0x2a, 0xe2, 0xea, + 0x93, 0xc8, 0xb2, 0x90, 0x33, 0xf2, 0xee, 0x4b, + 0x1b, 0xf4, 0x37, 0x92, 0x13, 0xbb, 0xe2, 0xce, + 0xe3, 0x03, 0xcf, 0x07, 0x94, 0xab, 0x9a, 0xc9, + 0xff, 0x83, 0x69, 0x3a, 0xda, 0x2c, 0xd0, 0x47, + 0x3d, 0x6c, 0x1a, 0x60, 0x68, 0x47, 0xb9, 0x36, + 0x52, 0xdd, 0x16, 0xef, 0x6c, 0xbf, 0x54, 0x11, + 0x72, 0x62, 0xce, 0x8c, 0x9d, 0x90, 0xa0, 0x25, + 0x06, 0x92, 0x3e, 0x12, 0x7e, 0x1a, 0x1d, 0xe5, + 0xa2, 0x71, 0xce, 0x1c, 0x4c, 0x6a, 0x7c, 0xdc, + 0x3d, 0xe3, 0x6e, 0x48, 0x9d, 0xb3, 0x64, 0x7d, + 0x78, 0x40 +}; +static const u8 key67[] __initconst = { + 0xa9, 0x20, 0x75, 0x89, 0x7e, 0x37, 0x85, 0x48, + 0xa3, 0xfb, 0x7b, 0xe8, 0x30, 0xa7, 0xe3, 0x6e, + 0xa6, 0xc1, 0x71, 0x17, 0xc1, 0x6c, 0x9b, 0xc2, + 0xde, 0xf0, 0xa7, 0x19, 0xec, 0xce, 0xc6, 0x53 +}; +enum { nonce67 = 0x4adc4d1f968c8a10ULL }; + +static const u8 input68[] __initconst = { + 0x99, 0xae, 0x72, 0xfb, 0x16, 0xe1, 0xf1, 0x59, + 0x43, 0x15, 0x4e, 0x33, 0xa0, 0x95, 0xe7, 0x6c, + 0x74, 0x24, 0x31, 0xca, 0x3b, 0x2e, 0xeb, 0xd7, + 0x11, 0xd8, 0xe0, 0x56, 0x92, 0x91, 0x61, 0x57, + 0xe2, 0x82, 0x9f, 0x8f, 0x37, 0xf5, 0x3d, 0x24, + 0x92, 0x9d, 0x87, 0x00, 0x8d, 0x89, 0xe0, 0x25, + 0x8b, 0xe4, 0x20, 0x5b, 0x8a, 0x26, 0x2c, 0x61, + 0x78, 0xb0, 0xa6, 0x3e, 0x82, 0x18, 0xcf, 0xdc, + 0x2d, 0x24, 0xdd, 0x81, 0x42, 0xc4, 0x95, 0xf0, + 0x48, 0x60, 0x71, 0xe3, 0xe3, 0xac, 0xec, 0xbe, + 0x98, 0x6b, 0x0c, 0xb5, 0x6a, 0xa9, 0xc8, 0x79, + 0x23, 0x2e, 0x38, 0x0b, 0x72, 0x88, 0x8c, 0xe7, + 0x71, 0x8b, 0x36, 0xe3, 0x58, 0x3d, 0x9c, 0xa0, + 0xa2, 0xea, 0xcf, 0x0c, 0x6a, 0x6c, 0x64, 0xdf, + 0x97, 0x21, 0x8f, 0x93, 0xfb, 0xba, 0xf3, 0x5a, + 0xd7, 0x8f, 0xa6, 0x37, 0x15, 0x50, 0x43, 0x02, + 0x46, 0x7f, 0x93, 0x46, 0x86, 0x31, 0xe2, 0xaa, + 0x24, 0xa8, 0x26, 0xae, 0xe6, 0xc0, 0x05, 0x73, + 0x0b, 0x4f, 0x7e, 0xed, 0x65, 0xeb, 0x56, 0x1e, + 0xb6, 0xb3, 0x0b, 0xc3, 0x0e, 0x31, 0x95, 0xa9, + 0x18, 0x4d, 0xaf, 0x38, 0xd7, 0xec, 0xc6, 0x44, + 0x72, 0x77, 0x4e, 0x25, 0x4b, 0x25, 0xdd, 0x1e, + 0x8c, 0xa2, 0xdf, 0xf6, 0x2a, 0x97, 0x1a, 0x88, + 0x2c, 0x8a, 0x5d, 0xfe, 0xe8, 0xfb, 0x35, 0xe8, + 0x0f, 0x2b, 0x7a, 0x18, 0x69, 0x43, 0x31, 0x1d, + 0x38, 0x6a, 0x62, 0x95, 0x0f, 0x20, 0x4b, 0xbb, + 0x97, 0x3c, 0xe0, 0x64, 0x2f, 0x52, 0xc9, 0x2d, + 0x4d, 0x9d, 0x54, 0x04, 0x3d, 0xc9, 0xea, 0xeb, + 0xd0, 0x86, 0x52, 0xff, 0x42, 0xe1, 0x0d, 0x7a, + 0xad, 0x88, 0xf9, 0x9b, 0x1e, 0x5e, 0x12, 0x27, + 0x95, 0x3e, 0x0c, 0x2c, 0x13, 0x00, 0x6f, 0x8e, + 0x93, 0x69, 0x0e, 0x01, 0x8c, 0xc1, 0xfd, 0xb3 +}; +static const u8 output68[] __initconst = { + 0x26, 0x3e, 0xf2, 0xb1, 0xf5, 0xef, 0x81, 0xa4, + 0xb7, 0x42, 0xd4, 0x26, 0x18, 0x4b, 0xdd, 0x6a, + 0x47, 0x15, 0xcb, 0x0e, 0x57, 0xdb, 0xa7, 0x29, + 0x7e, 0x7b, 0x3f, 0x47, 0x89, 0x57, 0xab, 0xea, + 0x14, 0x7b, 0xcf, 0x37, 0xdb, 0x1c, 0xe1, 0x11, + 0x77, 0xae, 0x2e, 0x4c, 0xd2, 0x08, 0x3f, 0xa6, + 0x62, 0x86, 0xa6, 0xb2, 0x07, 0xd5, 0x3f, 0x9b, + 0xdc, 0xc8, 0x50, 0x4b, 0x7b, 0xb9, 0x06, 0xe6, + 0xeb, 0xac, 0x98, 0x8c, 0x36, 0x0c, 0x1e, 0xb2, + 0xc8, 0xfb, 0x24, 0x60, 0x2c, 0x08, 0x17, 0x26, + 0x5b, 0xc8, 0xc2, 0xdf, 0x9c, 0x73, 0x67, 0x4a, + 0xdb, 0xcf, 0xd5, 0x2c, 0x2b, 0xca, 0x24, 0xcc, + 0xdb, 0xc9, 0xa8, 0xf2, 0x5d, 0x67, 0xdf, 0x5c, + 0x62, 0x0b, 0x58, 0xc0, 0x83, 0xde, 0x8b, 0xf6, + 0x15, 0x0a, 0xd6, 0x32, 0xd8, 0xf5, 0xf2, 0x5f, + 0x33, 0xce, 0x7e, 0xab, 0x76, 0xcd, 0x14, 0x91, + 0xd8, 0x41, 0x90, 0x93, 0xa1, 0xaf, 0xf3, 0x45, + 0x6c, 0x1b, 0x25, 0xbd, 0x48, 0x51, 0x6d, 0x15, + 0x47, 0xe6, 0x23, 0x50, 0x32, 0x69, 0x1e, 0xb5, + 0x94, 0xd3, 0x97, 0xba, 0xd7, 0x37, 0x4a, 0xba, + 0xb9, 0xcd, 0xfb, 0x96, 0x9a, 0x90, 0xe0, 0x37, + 0xf8, 0xdf, 0x91, 0x6c, 0x62, 0x13, 0x19, 0x21, + 0x4b, 0xa9, 0xf1, 0x12, 0x66, 0xe2, 0x74, 0xd7, + 0x81, 0xa0, 0x74, 0x8d, 0x7e, 0x7e, 0xc9, 0xb1, + 0x69, 0x8f, 0xed, 0xb3, 0xf6, 0x97, 0xcd, 0x72, + 0x78, 0x93, 0xd3, 0x54, 0x6b, 0x43, 0xac, 0x29, + 0xb4, 0xbc, 0x7d, 0xa4, 0x26, 0x4b, 0x7b, 0xab, + 0xd6, 0x67, 0x22, 0xff, 0x03, 0x92, 0xb6, 0xd4, + 0x96, 0x94, 0x5a, 0xe5, 0x02, 0x35, 0x77, 0xfa, + 0x3f, 0x54, 0x1d, 0xdd, 0x35, 0x39, 0xfe, 0x03, + 0xdd, 0x8e, 0x3c, 0x8c, 0xc2, 0x69, 0x2a, 0xb1, + 0xb7, 0xb3, 0xa1, 0x89, 0x84, 0xea, 0x16, 0xe2 +}; +static const u8 key68[] __initconst = { + 0xd2, 0x49, 0x7f, 0xd7, 0x49, 0x66, 0x0d, 0xb3, + 0x5a, 0x7e, 0x3c, 0xfc, 0x37, 0x83, 0x0e, 0xf7, + 0x96, 0xd8, 0xd6, 0x33, 0x79, 0x2b, 0x84, 0x53, + 0x06, 0xbc, 0x6c, 0x0a, 0x55, 0x84, 0xfe, 0xab +}; +enum { nonce68 = 0x6a6df7ff0a20de06ULL }; + +static const u8 input69[] __initconst = { + 0xf9, 0x18, 0x4c, 0xd2, 0x3f, 0xf7, 0x22, 0xd9, + 0x58, 0xb6, 0x3b, 0x38, 0x69, 0x79, 0xf4, 0x71, + 0x5f, 0x38, 0x52, 0x1f, 0x17, 0x6f, 0x6f, 0xd9, + 0x09, 0x2b, 0xfb, 0x67, 0xdc, 0xc9, 0xe8, 0x4a, + 0x70, 0x9f, 0x2e, 0x3c, 0x06, 0xe5, 0x12, 0x20, + 0x25, 0x29, 0xd0, 0xdc, 0x81, 0xc5, 0xc6, 0x0f, + 0xd2, 0xa8, 0x81, 0x15, 0x98, 0xb2, 0x71, 0x5a, + 0x9a, 0xe9, 0xfb, 0xaf, 0x0e, 0x5f, 0x8a, 0xf3, + 0x16, 0x4a, 0x47, 0xf2, 0x5c, 0xbf, 0xda, 0x52, + 0x9a, 0xa6, 0x36, 0xfd, 0xc6, 0xf7, 0x66, 0x00, + 0xcc, 0x6c, 0xd4, 0xb3, 0x07, 0x6d, 0xeb, 0xfe, + 0x92, 0x71, 0x25, 0xd0, 0xcf, 0x9c, 0xe8, 0x65, + 0x45, 0x10, 0xcf, 0x62, 0x74, 0x7d, 0xf2, 0x1b, + 0x57, 0xa0, 0xf1, 0x6b, 0xa4, 0xd5, 0xfa, 0x12, + 0x27, 0x5a, 0xf7, 0x99, 0xfc, 0xca, 0xf3, 0xb8, + 0x2c, 0x8b, 0xba, 0x28, 0x74, 0xde, 0x8f, 0x78, + 0xa2, 0x8c, 0xaf, 0x89, 0x4b, 0x05, 0xe2, 0xf3, + 0xf8, 0xd2, 0xef, 0xac, 0xa4, 0xc4, 0xe2, 0xe2, + 0x36, 0xbb, 0x5e, 0xae, 0xe6, 0x87, 0x3d, 0x88, + 0x9f, 0xb8, 0x11, 0xbb, 0xcf, 0x57, 0xce, 0xd0, + 0xba, 0x62, 0xf4, 0xf8, 0x9b, 0x95, 0x04, 0xc9, + 0xcf, 0x01, 0xe9, 0xf1, 0xc8, 0xc6, 0x22, 0xa4, + 0xf2, 0x8b, 0x2f, 0x24, 0x0a, 0xf5, 0x6e, 0xb7, + 0xd4, 0x2c, 0xb6, 0xf7, 0x5c, 0x97, 0x61, 0x0b, + 0xd9, 0xb5, 0x06, 0xcd, 0xed, 0x3e, 0x1f, 0xc5, + 0xb2, 0x6c, 0xa3, 0xea, 0xb8, 0xad, 0xa6, 0x42, + 0x88, 0x7a, 0x52, 0xd5, 0x64, 0xba, 0xb5, 0x20, + 0x10, 0xa0, 0x0f, 0x0d, 0xea, 0xef, 0x5a, 0x9b, + 0x27, 0xb8, 0xca, 0x20, 0x19, 0x6d, 0xa8, 0xc4, + 0x46, 0x04, 0xb3, 0xe8, 0xf8, 0x66, 0x1b, 0x0a, + 0xce, 0x76, 0x5d, 0x59, 0x58, 0x05, 0xee, 0x3e, + 0x3c, 0x86, 0x5b, 0x49, 0x1c, 0x72, 0x18, 0x01, + 0x62, 0x92, 0x0f, 0x3e, 0xd1, 0x57, 0x5e, 0x20, + 0x7b, 0xfb, 0x4d, 0x3c, 0xc5, 0x35, 0x43, 0x2f, + 0xb0, 0xc5, 0x7c, 0xe4, 0xa2, 0x84, 0x13, 0x77 +}; +static const u8 output69[] __initconst = { + 0xbb, 0x4a, 0x7f, 0x7c, 0xd5, 0x2f, 0x89, 0x06, + 0xec, 0x20, 0xf1, 0x9a, 0x11, 0x09, 0x14, 0x2e, + 0x17, 0x50, 0xf9, 0xd5, 0xf5, 0x48, 0x7c, 0x7a, + 0x55, 0xc0, 0x57, 0x03, 0xe3, 0xc4, 0xb2, 0xb7, + 0x18, 0x47, 0x95, 0xde, 0xaf, 0x80, 0x06, 0x3c, + 0x5a, 0xf2, 0xc3, 0x53, 0xe3, 0x29, 0x92, 0xf8, + 0xff, 0x64, 0x85, 0xb9, 0xf7, 0xd3, 0x80, 0xd2, + 0x0c, 0x5d, 0x7b, 0x57, 0x0c, 0x51, 0x79, 0x86, + 0xf3, 0x20, 0xd2, 0xb8, 0x6e, 0x0c, 0x5a, 0xce, + 0xeb, 0x88, 0x02, 0x8b, 0x82, 0x1b, 0x7f, 0xf5, + 0xde, 0x7f, 0x48, 0x48, 0xdf, 0xa0, 0x55, 0xc6, + 0x0c, 0x22, 0xa1, 0x80, 0x8d, 0x3b, 0xcb, 0x40, + 0x2d, 0x3d, 0x0b, 0xf2, 0xe0, 0x22, 0x13, 0x99, + 0xe1, 0xa7, 0x27, 0x68, 0x31, 0xe1, 0x24, 0x5d, + 0xd2, 0xee, 0x16, 0xc1, 0xd7, 0xa8, 0x14, 0x19, + 0x23, 0x72, 0x67, 0x27, 0xdc, 0x5e, 0xb9, 0xc7, + 0xd8, 0xe3, 0x55, 0x50, 0x40, 0x98, 0x7b, 0xe7, + 0x34, 0x1c, 0x3b, 0x18, 0x14, 0xd8, 0x62, 0xc1, + 0x93, 0x84, 0xf3, 0x5b, 0xdd, 0x9e, 0x1f, 0x3b, + 0x0b, 0xbc, 0x4e, 0x5b, 0x79, 0xa3, 0xca, 0x74, + 0x2a, 0x98, 0xe8, 0x04, 0x39, 0xef, 0xc6, 0x76, + 0x6d, 0xee, 0x9f, 0x67, 0x5b, 0x59, 0x3a, 0xe5, + 0xf2, 0x3b, 0xca, 0x89, 0xe8, 0x9b, 0x03, 0x3d, + 0x11, 0xd2, 0x4a, 0x70, 0xaf, 0x88, 0xb0, 0x94, + 0x96, 0x26, 0xab, 0x3c, 0xc1, 0xb8, 0xe4, 0xe7, + 0x14, 0x61, 0x64, 0x3a, 0x61, 0x08, 0x0f, 0xa9, + 0xce, 0x64, 0xb2, 0x40, 0xf8, 0x20, 0x3a, 0xa9, + 0x31, 0xbd, 0x7e, 0x16, 0xca, 0xf5, 0x62, 0x0f, + 0x91, 0x9f, 0x8e, 0x1d, 0xa4, 0x77, 0xf3, 0x87, + 0x61, 0xe8, 0x14, 0xde, 0x18, 0x68, 0x4e, 0x9d, + 0x73, 0xcd, 0x8a, 0xe4, 0x80, 0x84, 0x23, 0xaa, + 0x9d, 0x64, 0x1c, 0x80, 0x41, 0xca, 0x82, 0x40, + 0x94, 0x55, 0xe3, 0x28, 0xa1, 0x97, 0x71, 0xba, + 0xf2, 0x2c, 0x39, 0x62, 0x29, 0x56, 0xd0, 0xff, + 0xb2, 0x82, 0x20, 0x59, 0x1f, 0xc3, 0x64, 0x57 +}; +static const u8 key69[] __initconst = { + 0x19, 0x09, 0xe9, 0x7c, 0xd9, 0x02, 0x4a, 0x0c, + 0x52, 0x25, 0xad, 0x5c, 0x2e, 0x8d, 0x86, 0x10, + 0x85, 0x2b, 0xba, 0xa4, 0x44, 0x5b, 0x39, 0x3e, + 0x18, 0xaa, 0xce, 0x0e, 0xe2, 0x69, 0x3c, 0xcf +}; +enum { nonce69 = 0xdb925a1948f0f060ULL }; + +static const u8 input70[] __initconst = { + 0x10, 0xe7, 0x83, 0xcf, 0x42, 0x9f, 0xf2, 0x41, + 0xc7, 0xe4, 0xdb, 0xf9, 0xa3, 0x02, 0x1d, 0x8d, + 0x50, 0x81, 0x2c, 0x6b, 0x92, 0xe0, 0x4e, 0xea, + 0x26, 0x83, 0x2a, 0xd0, 0x31, 0xf1, 0x23, 0xf3, + 0x0e, 0x88, 0x14, 0x31, 0xf9, 0x01, 0x63, 0x59, + 0x21, 0xd1, 0x8b, 0xdd, 0x06, 0xd0, 0xc6, 0xab, + 0x91, 0x71, 0x82, 0x4d, 0xd4, 0x62, 0x37, 0x17, + 0xf9, 0x50, 0xf9, 0xb5, 0x74, 0xce, 0x39, 0x80, + 0x80, 0x78, 0xf8, 0xdc, 0x1c, 0xdb, 0x7c, 0x3d, + 0xd4, 0x86, 0x31, 0x00, 0x75, 0x7b, 0xd1, 0x42, + 0x9f, 0x1b, 0x97, 0x88, 0x0e, 0x14, 0x0e, 0x1e, + 0x7d, 0x7b, 0xc4, 0xd2, 0xf3, 0xc1, 0x6d, 0x17, + 0x5d, 0xc4, 0x75, 0x54, 0x0f, 0x38, 0x65, 0x89, + 0xd8, 0x7d, 0xab, 0xc9, 0xa7, 0x0a, 0x21, 0x0b, + 0x37, 0x12, 0x05, 0x07, 0xb5, 0x68, 0x32, 0x32, + 0xb9, 0xf8, 0x97, 0x17, 0x03, 0xed, 0x51, 0x8f, + 0x3d, 0x5a, 0xd0, 0x12, 0x01, 0x6e, 0x2e, 0x91, + 0x1c, 0xbe, 0x6b, 0xa3, 0xcc, 0x75, 0x62, 0x06, + 0x8e, 0x65, 0xbb, 0xe2, 0x29, 0x71, 0x4b, 0x89, + 0x6a, 0x9d, 0x85, 0x8c, 0x8c, 0xdf, 0x94, 0x95, + 0x23, 0x66, 0xf8, 0x92, 0xee, 0x56, 0xeb, 0xb3, + 0xeb, 0xd2, 0x4a, 0x3b, 0x77, 0x8a, 0x6e, 0xf6, + 0xca, 0xd2, 0x34, 0x00, 0xde, 0xbe, 0x1d, 0x7a, + 0x73, 0xef, 0x2b, 0x80, 0x56, 0x16, 0x29, 0xbf, + 0x6e, 0x33, 0xed, 0x0d, 0xe2, 0x02, 0x60, 0x74, + 0xe9, 0x0a, 0xbc, 0xd1, 0xc5, 0xe8, 0x53, 0x02, + 0x79, 0x0f, 0x25, 0x0c, 0xef, 0xab, 0xd3, 0xbc, + 0xb7, 0xfc, 0xf3, 0xb0, 0x34, 0xd1, 0x07, 0xd2, + 0x5a, 0x31, 0x1f, 0xec, 0x1f, 0x87, 0xed, 0xdd, + 0x6a, 0xc1, 0xe8, 0xb3, 0x25, 0x4c, 0xc6, 0x9b, + 0x91, 0x73, 0xec, 0x06, 0x73, 0x9e, 0x57, 0x65, + 0x32, 0x75, 0x11, 0x74, 0x6e, 0xa4, 0x7d, 0x0d, + 0x74, 0x9f, 0x51, 0x10, 0x10, 0x47, 0xc9, 0x71, + 0x6e, 0x97, 0xae, 0x44, 0x41, 0xef, 0x98, 0x78, + 0xf4, 0xc5, 0xbd, 0x5e, 0x00, 0xe5, 0xfd, 0xe2, + 0xbe, 0x8c, 0xc2, 0xae, 0xc2, 0xee, 0x59, 0xf6, + 0xcb, 0x20, 0x54, 0x84, 0xc3, 0x31, 0x7e, 0x67, + 0x71, 0xb6, 0x76, 0xbe, 0x81, 0x8f, 0x82, 0xad, + 0x01, 0x8f, 0xc4, 0x00, 0x04, 0x3d, 0x8d, 0x34, + 0xaa, 0xea, 0xc0, 0xea, 0x91, 0x42, 0xb6, 0xb8, + 0x43, 0xf3, 0x17, 0xb2, 0x73, 0x64, 0x82, 0x97, + 0xd5, 0xc9, 0x07, 0x77, 0xb1, 0x26, 0xe2, 0x00, + 0x6a, 0xae, 0x70, 0x0b, 0xbe, 0xe6, 0xb8, 0x42, + 0x81, 0x55, 0xf7, 0xb8, 0x96, 0x41, 0x9d, 0xd4, + 0x2c, 0x27, 0x00, 0xcc, 0x91, 0x28, 0x22, 0xa4, + 0x7b, 0x42, 0x51, 0x9e, 0xd6, 0xec, 0xf3, 0x6b, + 0x00, 0xff, 0x5c, 0xa2, 0xac, 0x47, 0x33, 0x2d, + 0xf8, 0x11, 0x65, 0x5f, 0x4d, 0x79, 0x8b, 0x4f, + 0xad, 0xf0, 0x9d, 0xcd, 0xb9, 0x7b, 0x08, 0xf7, + 0x32, 0x51, 0xfa, 0x39, 0xaa, 0x78, 0x05, 0xb1, + 0xf3, 0x5d, 0xe8, 0x7c, 0x8e, 0x4f, 0xa2, 0xe0, + 0x98, 0x0c, 0xb2, 0xa7, 0xf0, 0x35, 0x8e, 0x70, + 0x7c, 0x82, 0xf3, 0x1b, 0x26, 0x28, 0x12, 0xe5, + 0x23, 0x57, 0xe4, 0xb4, 0x9b, 0x00, 0x39, 0x97, + 0xef, 0x7c, 0x46, 0x9b, 0x34, 0x6b, 0xe7, 0x0e, + 0xa3, 0x2a, 0x18, 0x11, 0x64, 0xc6, 0x7c, 0x8b, + 0x06, 0x02, 0xf5, 0x69, 0x76, 0xf9, 0xaa, 0x09, + 0x5f, 0x68, 0xf8, 0x4a, 0x79, 0x58, 0xec, 0x37, + 0xcf, 0x3a, 0xcc, 0x97, 0x70, 0x1d, 0x3e, 0x52, + 0x18, 0x0a, 0xad, 0x28, 0x5b, 0x3b, 0xe9, 0x03, + 0x84, 0xe9, 0x68, 0x50, 0xce, 0xc4, 0xbc, 0x3e, + 0x21, 0xad, 0x63, 0xfe, 0xc6, 0xfd, 0x6e, 0x69, + 0x84, 0xa9, 0x30, 0xb1, 0x7a, 0xc4, 0x31, 0x10, + 0xc1, 0x1f, 0x6e, 0xeb, 0xa5, 0xa6, 0x01 +}; +static const u8 output70[] __initconst = { + 0x0f, 0x93, 0x2a, 0x20, 0xb3, 0x87, 0x2d, 0xce, + 0xd1, 0x3b, 0x30, 0xfd, 0x06, 0x6d, 0x0a, 0xaa, + 0x3e, 0xc4, 0x29, 0x02, 0x8a, 0xde, 0xa6, 0x4b, + 0x45, 0x1b, 0x4f, 0x25, 0x59, 0xd5, 0x56, 0x6a, + 0x3b, 0x37, 0xbd, 0x3e, 0x47, 0x12, 0x2c, 0x4e, + 0x60, 0x5f, 0x05, 0x75, 0x61, 0x23, 0x05, 0x74, + 0xcb, 0xfc, 0x5a, 0xb3, 0xac, 0x5c, 0x3d, 0xab, + 0x52, 0x5f, 0x05, 0xbc, 0x57, 0xc0, 0x7e, 0xcf, + 0x34, 0x5d, 0x7f, 0x41, 0xa3, 0x17, 0x78, 0xd5, + 0x9f, 0xec, 0x0f, 0x1e, 0xf9, 0xfe, 0xa3, 0xbd, + 0x28, 0xb0, 0xba, 0x4d, 0x84, 0xdb, 0xae, 0x8f, + 0x1d, 0x98, 0xb7, 0xdc, 0xf9, 0xad, 0x55, 0x9c, + 0x89, 0xfe, 0x9b, 0x9c, 0xa9, 0x89, 0xf6, 0x97, + 0x9c, 0x3f, 0x09, 0x3e, 0xc6, 0x02, 0xc2, 0x55, + 0x58, 0x09, 0x54, 0x66, 0xe4, 0x36, 0x81, 0x35, + 0xca, 0x88, 0x17, 0x89, 0x80, 0x24, 0x2b, 0x21, + 0x89, 0xee, 0x45, 0x5a, 0xe7, 0x1f, 0xd5, 0xa5, + 0x16, 0xa4, 0xda, 0x70, 0x7e, 0xe9, 0x4f, 0x24, + 0x61, 0x97, 0xab, 0xa0, 0xe0, 0xe7, 0xb8, 0x5c, + 0x0f, 0x25, 0x17, 0x37, 0x75, 0x12, 0xb5, 0x40, + 0xde, 0x1c, 0x0d, 0x8a, 0x77, 0x62, 0x3c, 0x86, + 0xd9, 0x70, 0x2e, 0x96, 0x30, 0xd2, 0x55, 0xb3, + 0x6b, 0xc3, 0xf2, 0x9c, 0x47, 0xf3, 0x3a, 0x24, + 0x52, 0xc6, 0x38, 0xd8, 0x22, 0xb3, 0x0c, 0xfd, + 0x2f, 0xa3, 0x3c, 0xb5, 0xe8, 0x26, 0xe1, 0xa3, + 0xad, 0xb0, 0x82, 0x17, 0xc1, 0x53, 0xb8, 0x34, + 0x48, 0xee, 0x39, 0xae, 0x51, 0x43, 0xec, 0x82, + 0xce, 0x87, 0xc6, 0x76, 0xb9, 0x76, 0xd3, 0x53, + 0xfe, 0x49, 0x24, 0x7d, 0x02, 0x42, 0x2b, 0x72, + 0xfb, 0xcb, 0xd8, 0x96, 0x02, 0xc6, 0x9a, 0x20, + 0xf3, 0x5a, 0x67, 0xe8, 0x13, 0xf8, 0xb2, 0xcb, + 0xa2, 0xec, 0x18, 0x20, 0x4a, 0xb0, 0x73, 0x53, + 0x21, 0xb0, 0x77, 0x53, 0xd8, 0x76, 0xa1, 0x30, + 0x17, 0x72, 0x2e, 0x33, 0x5f, 0x33, 0x6b, 0x28, + 0xfb, 0xb0, 0xf4, 0xec, 0x8e, 0xed, 0x20, 0x7d, + 0x57, 0x8c, 0x74, 0x28, 0x64, 0x8b, 0xeb, 0x59, + 0x38, 0x3f, 0xe7, 0x83, 0x2e, 0xe5, 0x64, 0x4d, + 0x5c, 0x1f, 0xe1, 0x3b, 0xd9, 0x84, 0xdb, 0xc9, + 0xec, 0xd8, 0xc1, 0x7c, 0x1f, 0x1b, 0x68, 0x35, + 0xc6, 0x34, 0x10, 0xef, 0x19, 0xc9, 0x0a, 0xd6, + 0x43, 0x7f, 0xa6, 0xcb, 0x9d, 0xf4, 0xf0, 0x16, + 0xb1, 0xb1, 0x96, 0x64, 0xec, 0x8d, 0x22, 0x4c, + 0x4b, 0xe8, 0x1a, 0xba, 0x6f, 0xb7, 0xfc, 0xa5, + 0x69, 0x3e, 0xad, 0x78, 0x79, 0x19, 0xb5, 0x04, + 0x69, 0xe5, 0x3f, 0xff, 0x60, 0x8c, 0xda, 0x0b, + 0x7b, 0xf7, 0xe7, 0xe6, 0x29, 0x3a, 0x85, 0xba, + 0xb5, 0xb0, 0x35, 0xbd, 0x38, 0xce, 0x34, 0x5e, + 0xf2, 0xdc, 0xd1, 0x8f, 0xc3, 0x03, 0x24, 0xa2, + 0x03, 0xf7, 0x4e, 0x49, 0x5b, 0xcf, 0x6d, 0xb0, + 0xeb, 0xe3, 0x30, 0x28, 0xd5, 0x5b, 0x82, 0x5f, + 0xe4, 0x7c, 0x1e, 0xec, 0xd2, 0x39, 0xf9, 0x6f, + 0x2e, 0xb3, 0xcd, 0x01, 0xb1, 0x67, 0xaa, 0xea, + 0xaa, 0xb3, 0x63, 0xaf, 0xd9, 0xb2, 0x1f, 0xba, + 0x05, 0x20, 0xeb, 0x19, 0x32, 0xf0, 0x6c, 0x3f, + 0x40, 0xcc, 0x93, 0xb3, 0xd8, 0x25, 0xa6, 0xe4, + 0xce, 0xd7, 0x7e, 0x48, 0x99, 0x65, 0x7f, 0x86, + 0xc5, 0xd4, 0x79, 0x6b, 0xab, 0x43, 0xb8, 0x6b, + 0xf1, 0x2f, 0xea, 0x4c, 0x5e, 0xf0, 0x3b, 0xb4, + 0xb8, 0xb0, 0x94, 0x0c, 0x6b, 0xe7, 0x22, 0x93, + 0xaa, 0x01, 0xcb, 0xf1, 0x11, 0x60, 0xf6, 0x69, + 0xcf, 0x14, 0xde, 0xfb, 0x90, 0x05, 0x27, 0x0c, + 0x1a, 0x9e, 0xf0, 0xb4, 0xc6, 0xa1, 0xe8, 0xdd, + 0xd0, 0x4c, 0x25, 0x4f, 0x9c, 0xb7, 0xb1, 0xb0, + 0x21, 0xdb, 0x87, 0x09, 0x03, 0xf2, 0xb3 +}; +static const u8 key70[] __initconst = { + 0x3b, 0x5b, 0x59, 0x36, 0x44, 0xd1, 0xba, 0x71, + 0x55, 0x87, 0x4d, 0x62, 0x3d, 0xc2, 0xfc, 0xaa, + 0x3f, 0x4e, 0x1a, 0xe4, 0xca, 0x09, 0xfc, 0x6a, + 0xb2, 0xd6, 0x5d, 0x79, 0xf9, 0x1a, 0x91, 0xa7 +}; +enum { nonce70 = 0x3fd6786dd147a85ULL }; + +static const u8 input71[] __initconst = { + 0x18, 0x78, 0xd6, 0x79, 0xe4, 0x9a, 0x6c, 0x73, + 0x17, 0xd4, 0x05, 0x0f, 0x1e, 0x9f, 0xd9, 0x2b, + 0x86, 0x48, 0x7d, 0xf4, 0xd9, 0x1c, 0x76, 0xfc, + 0x8e, 0x22, 0x34, 0xe1, 0x48, 0x4a, 0x8d, 0x79, + 0xb7, 0xbb, 0x88, 0xab, 0x90, 0xde, 0xc5, 0xb4, + 0xb4, 0xe7, 0x85, 0x49, 0xda, 0x57, 0xeb, 0xc9, + 0xcd, 0x21, 0xfc, 0x45, 0x6e, 0x32, 0x67, 0xf2, + 0x4f, 0xa6, 0x54, 0xe5, 0x20, 0xed, 0xcf, 0xc6, + 0x62, 0x25, 0x8e, 0x00, 0xf8, 0x6b, 0xa2, 0x80, + 0xac, 0x88, 0xa6, 0x59, 0x27, 0x83, 0x95, 0x11, + 0x3f, 0x70, 0x5e, 0x3f, 0x11, 0xfb, 0x26, 0xbf, + 0xe1, 0x48, 0x75, 0xf9, 0x86, 0xbf, 0xa6, 0x5d, + 0x15, 0x61, 0x66, 0xbf, 0x78, 0x8f, 0x6b, 0x9b, + 0xda, 0x98, 0xb7, 0x19, 0xe2, 0xf2, 0xa3, 0x9c, + 0x7c, 0x6a, 0x9a, 0xd8, 0x3d, 0x4c, 0x2c, 0xe1, + 0x09, 0xb4, 0x28, 0x82, 0x4e, 0xab, 0x0c, 0x75, + 0x63, 0xeb, 0xbc, 0xd0, 0x71, 0xa2, 0x73, 0x85, + 0xed, 0x53, 0x7a, 0x3f, 0x68, 0x9f, 0xd0, 0xa9, + 0x00, 0x5a, 0x9e, 0x80, 0x55, 0x00, 0xe6, 0xae, + 0x0c, 0x03, 0x40, 0xed, 0xfc, 0x68, 0x4a, 0xb7, + 0x1e, 0x09, 0x65, 0x30, 0x5a, 0x3d, 0x97, 0x4d, + 0x5e, 0x51, 0x8e, 0xda, 0xc3, 0x55, 0x8c, 0xfb, + 0xcf, 0x83, 0x05, 0x35, 0x0d, 0x08, 0x1b, 0xf3, + 0x3a, 0x57, 0x96, 0xac, 0x58, 0x8b, 0xfa, 0x00, + 0x49, 0x15, 0x78, 0xd2, 0x4b, 0xed, 0xb8, 0x59, + 0x78, 0x9b, 0x7f, 0xaa, 0xfc, 0xe7, 0x46, 0xdc, + 0x7b, 0x34, 0xd0, 0x34, 0xe5, 0x10, 0xff, 0x4d, + 0x5a, 0x4d, 0x60, 0xa7, 0x16, 0x54, 0xc4, 0xfd, + 0xca, 0x5d, 0x68, 0xc7, 0x4a, 0x01, 0x8d, 0x7f, + 0x74, 0x5d, 0xff, 0xb8, 0x37, 0x15, 0x62, 0xfa, + 0x44, 0x45, 0xcf, 0x77, 0x3b, 0x1d, 0xb2, 0xd2, + 0x0d, 0x42, 0x00, 0x39, 0x68, 0x1f, 0xcc, 0x89, + 0x73, 0x5d, 0xa9, 0x2e, 0xfd, 0x58, 0x62, 0xca, + 0x35, 0x8e, 0x70, 0x70, 0xaa, 0x6e, 0x14, 0xe9, + 0xa4, 0xe2, 0x10, 0x66, 0x71, 0xdc, 0x4c, 0xfc, + 0xa9, 0xdc, 0x8f, 0x57, 0x4d, 0xc5, 0xac, 0xd7, + 0xa9, 0xf3, 0xf3, 0xa1, 0xff, 0x62, 0xa0, 0x8f, + 0xe4, 0x96, 0x3e, 0xcb, 0x9f, 0x76, 0x42, 0x39, + 0x1f, 0x24, 0xfd, 0xfd, 0x79, 0xe8, 0x27, 0xdf, + 0xa8, 0xf6, 0x33, 0x8b, 0x31, 0x59, 0x69, 0xcf, + 0x6a, 0xef, 0x89, 0x4d, 0xa7, 0xf6, 0x7e, 0x97, + 0x14, 0xbd, 0xda, 0xdd, 0xb4, 0x84, 0x04, 0x24, + 0xe0, 0x17, 0xe1, 0x0f, 0x1f, 0x8a, 0x6a, 0x71, + 0x74, 0x41, 0xdc, 0x59, 0x5c, 0x8f, 0x01, 0x25, + 0x92, 0xf0, 0x2e, 0x15, 0x62, 0x71, 0x9a, 0x9f, + 0x87, 0xdf, 0x62, 0x49, 0x7f, 0x86, 0x62, 0xfc, + 0x20, 0x84, 0xd7, 0xe3, 0x3a, 0xd9, 0x37, 0x85, + 0xb7, 0x84, 0x5a, 0xf9, 0xed, 0x21, 0x32, 0x94, + 0x3e, 0x04, 0xe7, 0x8c, 0x46, 0x76, 0x21, 0x67, + 0xf6, 0x95, 0x64, 0x92, 0xb7, 0x15, 0xf6, 0xe3, + 0x41, 0x27, 0x9d, 0xd7, 0xe3, 0x79, 0x75, 0x92, + 0xd0, 0xc1, 0xf3, 0x40, 0x92, 0x08, 0xde, 0x90, + 0x22, 0x82, 0xb2, 0x69, 0xae, 0x1a, 0x35, 0x11, + 0x89, 0xc8, 0x06, 0x82, 0x95, 0x23, 0x44, 0x08, + 0x22, 0xf2, 0x71, 0x73, 0x1b, 0x88, 0x11, 0xcf, + 0x1c, 0x7e, 0x8a, 0x2e, 0xdc, 0x79, 0x57, 0xce, + 0x1f, 0xe7, 0x6c, 0x07, 0xd8, 0x06, 0xbe, 0xec, + 0xa3, 0xcf, 0xf9, 0x68, 0xa5, 0xb8, 0xf0, 0xe3, + 0x3f, 0x01, 0x92, 0xda, 0xf1, 0xa0, 0x2d, 0x7b, + 0xab, 0x57, 0x58, 0x2a, 0xaf, 0xab, 0xbd, 0xf2, + 0xe5, 0xaf, 0x7e, 0x1f, 0x46, 0x24, 0x9e, 0x20, + 0x22, 0x0f, 0x84, 0x4c, 0xb7, 0xd8, 0x03, 0xe8, + 0x09, 0x73, 0x6c, 0xc6, 0x9b, 0x90, 0xe0, 0xdb, + 0xf2, 0x71, 0xba, 0xad, 0xb3, 0xec, 0xda, 0x7a +}; +static const u8 output71[] __initconst = { + 0x28, 0xc5, 0x9b, 0x92, 0xf9, 0x21, 0x4f, 0xbb, + 0xef, 0x3b, 0xf0, 0xf5, 0x3a, 0x6d, 0x7f, 0xd6, + 0x6a, 0x8d, 0xa1, 0x01, 0x5c, 0x62, 0x20, 0x8b, + 0x5b, 0x39, 0xd5, 0xd3, 0xc2, 0xf6, 0x9d, 0x5e, + 0xcc, 0xe1, 0xa2, 0x61, 0x16, 0xe2, 0xce, 0xe9, + 0x86, 0xd0, 0xfc, 0xce, 0x9a, 0x28, 0x27, 0xc4, + 0x0c, 0xb9, 0xaa, 0x8d, 0x48, 0xdb, 0xbf, 0x82, + 0x7d, 0xd0, 0x35, 0xc4, 0x06, 0x34, 0xb4, 0x19, + 0x51, 0x73, 0xf4, 0x7a, 0xf4, 0xfd, 0xe9, 0x1d, + 0xdc, 0x0f, 0x7e, 0xf7, 0x96, 0x03, 0xe3, 0xb1, + 0x2e, 0x22, 0x59, 0xb7, 0x6d, 0x1c, 0x97, 0x8c, + 0xd7, 0x31, 0x08, 0x26, 0x4c, 0x6d, 0xc6, 0x14, + 0xa5, 0xeb, 0x45, 0x6a, 0x88, 0xa3, 0xa2, 0x36, + 0xc4, 0x35, 0xb1, 0x5a, 0xa0, 0xad, 0xf7, 0x06, + 0x9b, 0x5d, 0xc1, 0x15, 0xc1, 0xce, 0x0a, 0xb0, + 0x57, 0x2e, 0x3f, 0x6f, 0x0d, 0x10, 0xd9, 0x11, + 0x2c, 0x9c, 0xad, 0x2d, 0xa5, 0x81, 0xfb, 0x4e, + 0x8f, 0xd5, 0x32, 0x4e, 0xaf, 0x5c, 0xc1, 0x86, + 0xde, 0x56, 0x5a, 0x33, 0x29, 0xf7, 0x67, 0xc6, + 0x37, 0x6f, 0xb2, 0x37, 0x4e, 0xd4, 0x69, 0x79, + 0xaf, 0xd5, 0x17, 0x79, 0xe0, 0xba, 0x62, 0xa3, + 0x68, 0xa4, 0x87, 0x93, 0x8d, 0x7e, 0x8f, 0xa3, + 0x9c, 0xef, 0xda, 0xe3, 0xa5, 0x1f, 0xcd, 0x30, + 0xa6, 0x55, 0xac, 0x4c, 0x69, 0x74, 0x02, 0xc7, + 0x5d, 0x95, 0x81, 0x4a, 0x68, 0x11, 0xd3, 0xa9, + 0x98, 0xb1, 0x0b, 0x0d, 0xae, 0x40, 0x86, 0x65, + 0xbf, 0xcc, 0x2d, 0xef, 0x57, 0xca, 0x1f, 0xe4, + 0x34, 0x4e, 0xa6, 0x5e, 0x82, 0x6e, 0x61, 0xad, + 0x0b, 0x3c, 0xf8, 0xeb, 0x01, 0x43, 0x7f, 0x87, + 0xa2, 0xa7, 0x6a, 0xe9, 0x62, 0x23, 0x24, 0x61, + 0xf1, 0xf7, 0x36, 0xdb, 0x10, 0xe5, 0x57, 0x72, + 0x3a, 0xc2, 0xae, 0xcc, 0x75, 0xc7, 0x80, 0x05, + 0x0a, 0x5c, 0x4c, 0x95, 0xda, 0x02, 0x01, 0x14, + 0x06, 0x6b, 0x5c, 0x65, 0xc2, 0xb8, 0x4a, 0xd6, + 0xd3, 0xb4, 0xd8, 0x12, 0x52, 0xb5, 0x60, 0xd3, + 0x8e, 0x5f, 0x5c, 0x76, 0x33, 0x7a, 0x05, 0xe5, + 0xcb, 0xef, 0x4f, 0x89, 0xf1, 0xba, 0x32, 0x6f, + 0x33, 0xcd, 0x15, 0x8d, 0xa3, 0x0c, 0x3f, 0x63, + 0x11, 0xe7, 0x0e, 0xe0, 0x00, 0x01, 0xe9, 0xe8, + 0x8e, 0x36, 0x34, 0x8d, 0x96, 0xb5, 0x03, 0xcf, + 0x55, 0x62, 0x49, 0x7a, 0x34, 0x44, 0xa5, 0xee, + 0x8c, 0x46, 0x06, 0x22, 0xab, 0x1d, 0x53, 0x9c, + 0xa1, 0xf9, 0x67, 0x18, 0x57, 0x89, 0xf9, 0xc2, + 0xd1, 0x7e, 0xbe, 0x36, 0x40, 0xcb, 0xe9, 0x04, + 0xde, 0xb1, 0x3b, 0x29, 0x52, 0xc5, 0x9a, 0xb5, + 0xa2, 0x7c, 0x7b, 0xfe, 0xe5, 0x92, 0x73, 0xea, + 0xea, 0x7b, 0xba, 0x0a, 0x8c, 0x88, 0x15, 0xe6, + 0x53, 0xbf, 0x1c, 0x33, 0xf4, 0x9b, 0x9a, 0x5e, + 0x8d, 0xae, 0x60, 0xdc, 0xcb, 0x5d, 0xfa, 0xbe, + 0x06, 0xc3, 0x3f, 0x06, 0xe7, 0x00, 0x40, 0x7b, + 0xaa, 0x94, 0xfa, 0x6d, 0x1f, 0xe4, 0xc5, 0xa9, + 0x1b, 0x5f, 0x36, 0xea, 0x5a, 0xdd, 0xa5, 0x48, + 0x6a, 0x55, 0xd2, 0x47, 0x28, 0xbf, 0x96, 0xf1, + 0x9f, 0xb6, 0x11, 0x4b, 0xd3, 0x44, 0x7d, 0x48, + 0x41, 0x61, 0xdb, 0x12, 0xd4, 0xc2, 0x59, 0x82, + 0x4c, 0x47, 0x5c, 0x04, 0xf6, 0x7b, 0xd3, 0x92, + 0x2e, 0xe8, 0x40, 0xef, 0x15, 0x32, 0x97, 0xdc, + 0x35, 0x4c, 0x6e, 0xa4, 0x97, 0xe9, 0x24, 0xde, + 0x63, 0x8b, 0xb1, 0x6b, 0x48, 0xbb, 0x46, 0x1f, + 0x84, 0xd6, 0x17, 0xb0, 0x5a, 0x4a, 0x4e, 0xd5, + 0x31, 0xd7, 0xcf, 0xa0, 0x39, 0xc6, 0x2e, 0xfc, + 0xa6, 0xa3, 0xd3, 0x0f, 0xa4, 0x28, 0xac, 0xb2, + 0xf4, 0x48, 0x8d, 0x50, 0xa5, 0x1c, 0x44, 0x5d, + 0x6e, 0x38, 0xb7, 0x2b, 0x8a, 0x45, 0xa7, 0x3d +}; +static const u8 key71[] __initconst = { + 0x8b, 0x68, 0xc4, 0xb7, 0x0d, 0x81, 0xef, 0x52, + 0x1e, 0x05, 0x96, 0x72, 0x62, 0x89, 0x27, 0x83, + 0xd0, 0xc7, 0x33, 0x6d, 0xf2, 0xcc, 0x69, 0xf9, + 0x23, 0xae, 0x99, 0xb1, 0xd1, 0x05, 0x4e, 0x54 +}; +enum { nonce71 = 0x983f03656d64b5f6ULL }; + +static const u8 input72[] __initconst = { + 0x6b, 0x09, 0xc9, 0x57, 0x3d, 0x79, 0x04, 0x8c, + 0x65, 0xad, 0x4a, 0x0f, 0xa1, 0x31, 0x3a, 0xdd, + 0x14, 0x8e, 0xe8, 0xfe, 0xbf, 0x42, 0x87, 0x98, + 0x2e, 0x8d, 0x83, 0xa3, 0xf8, 0x55, 0x3d, 0x84, + 0x1e, 0x0e, 0x05, 0x4a, 0x38, 0x9e, 0xe7, 0xfe, + 0xd0, 0x4d, 0x79, 0x74, 0x3a, 0x0b, 0x9b, 0xe1, + 0xfd, 0x51, 0x84, 0x4e, 0xb2, 0x25, 0xe4, 0x64, + 0x4c, 0xda, 0xcf, 0x46, 0xec, 0xba, 0x12, 0xeb, + 0x5a, 0x33, 0x09, 0x6e, 0x78, 0x77, 0x8f, 0x30, + 0xb1, 0x7d, 0x3f, 0x60, 0x8c, 0xf2, 0x1d, 0x8e, + 0xb4, 0x70, 0xa2, 0x90, 0x7c, 0x79, 0x1a, 0x2c, + 0xf6, 0x28, 0x79, 0x7c, 0x53, 0xc5, 0xfa, 0xcc, + 0x65, 0x9b, 0xe1, 0x51, 0xd1, 0x7f, 0x1d, 0xc4, + 0xdb, 0xd4, 0xd9, 0x04, 0x61, 0x7d, 0xbe, 0x12, + 0xfc, 0xcd, 0xaf, 0xe4, 0x0f, 0x9c, 0x20, 0xb5, + 0x22, 0x40, 0x18, 0xda, 0xe4, 0xda, 0x8c, 0x2d, + 0x84, 0xe3, 0x5f, 0x53, 0x17, 0xed, 0x78, 0xdc, + 0x2f, 0xe8, 0x31, 0xc7, 0xe6, 0x39, 0x71, 0x40, + 0xb4, 0x0f, 0xc9, 0xa9, 0x7e, 0x78, 0x87, 0xc1, + 0x05, 0x78, 0xbb, 0x01, 0xf2, 0x8f, 0x33, 0xb0, + 0x6e, 0x84, 0xcd, 0x36, 0x33, 0x5c, 0x5b, 0x8e, + 0xf1, 0xac, 0x30, 0xfe, 0x33, 0xec, 0x08, 0xf3, + 0x7e, 0xf2, 0xf0, 0x4c, 0xf2, 0xad, 0xd8, 0xc1, + 0xd4, 0x4e, 0x87, 0x06, 0xd4, 0x75, 0xe7, 0xe3, + 0x09, 0xd3, 0x4d, 0xe3, 0x21, 0x32, 0xba, 0xb4, + 0x68, 0x68, 0xcb, 0x4c, 0xa3, 0x1e, 0xb3, 0x87, + 0x7b, 0xd3, 0x0c, 0x63, 0x37, 0x71, 0x79, 0xfb, + 0x58, 0x36, 0x57, 0x0f, 0x34, 0x1d, 0xc1, 0x42, + 0x02, 0x17, 0xe7, 0xed, 0xe8, 0xe7, 0x76, 0xcb, + 0x42, 0xc4, 0x4b, 0xe2, 0xb2, 0x5e, 0x42, 0xd5, + 0xec, 0x9d, 0xc1, 0x32, 0x71, 0xe4, 0xeb, 0x10, + 0x68, 0x1a, 0x6e, 0x99, 0x8e, 0x73, 0x12, 0x1f, + 0x97, 0x0c, 0x9e, 0xcd, 0x02, 0x3e, 0x4c, 0xa0, + 0xf2, 0x8d, 0xe5, 0x44, 0xca, 0x6d, 0xfe, 0x07, + 0xe3, 0xe8, 0x9b, 0x76, 0xc1, 0x6d, 0xb7, 0x6e, + 0x0d, 0x14, 0x00, 0x6f, 0x8a, 0xfd, 0x43, 0xc6, + 0x43, 0xa5, 0x9c, 0x02, 0x47, 0x10, 0xd4, 0xb4, + 0x9b, 0x55, 0x67, 0xc8, 0x7f, 0xc1, 0x8a, 0x1f, + 0x1e, 0xd1, 0xbc, 0x99, 0x5d, 0x50, 0x4f, 0x89, + 0xf1, 0xe6, 0x5d, 0x91, 0x40, 0xdc, 0x20, 0x67, + 0x56, 0xc2, 0xef, 0xbd, 0x2c, 0xa2, 0x99, 0x38, + 0xe0, 0x45, 0xec, 0x44, 0x05, 0x52, 0x65, 0x11, + 0xfc, 0x3b, 0x19, 0xcb, 0x71, 0xc2, 0x8e, 0x0e, + 0x03, 0x2a, 0x03, 0x3b, 0x63, 0x06, 0x31, 0x9a, + 0xac, 0x53, 0x04, 0x14, 0xd4, 0x80, 0x9d, 0x6b, + 0x42, 0x7e, 0x7e, 0x4e, 0xdc, 0xc7, 0x01, 0x49, + 0x9f, 0xf5, 0x19, 0x86, 0x13, 0x28, 0x2b, 0xa6, + 0xa6, 0xbe, 0xa1, 0x7e, 0x71, 0x05, 0x00, 0xff, + 0x59, 0x2d, 0xb6, 0x63, 0xf0, 0x1e, 0x2e, 0x69, + 0x9b, 0x85, 0xf1, 0x1e, 0x8a, 0x64, 0x39, 0xab, + 0x00, 0x12, 0xe4, 0x33, 0x4b, 0xb5, 0xd8, 0xb3, + 0x6b, 0x5b, 0x8b, 0x5c, 0xd7, 0x6f, 0x23, 0xcf, + 0x3f, 0x2e, 0x5e, 0x47, 0xb9, 0xb8, 0x1f, 0xf0, + 0x1d, 0xda, 0xe7, 0x4f, 0x6e, 0xab, 0xc3, 0x36, + 0xb4, 0x74, 0x6b, 0xeb, 0xc7, 0x5d, 0x91, 0xe5, + 0xda, 0xf2, 0xc2, 0x11, 0x17, 0x48, 0xf8, 0x9c, + 0xc9, 0x8b, 0xc1, 0xa2, 0xf4, 0xcd, 0x16, 0xf8, + 0x27, 0xd9, 0x6c, 0x6f, 0xb5, 0x8f, 0x77, 0xca, + 0x1b, 0xd8, 0xef, 0x84, 0x68, 0x71, 0x53, 0xc1, + 0x43, 0x0f, 0x9f, 0x98, 0xae, 0x7e, 0x31, 0xd2, + 0x98, 0xfb, 0x20, 0xa2, 0xad, 0x00, 0x10, 0x83, + 0x00, 0x8b, 0xeb, 0x56, 0xd2, 0xc4, 0xcc, 0x7f, + 0x2f, 0x4e, 0xfa, 0x88, 0x13, 0xa4, 0x2c, 0xde, + 0x6b, 0x77, 0x86, 0x10, 0x6a, 0xab, 0x43, 0x0a, + 0x02 +}; +static const u8 output72[] __initconst = { + 0x42, 0x89, 0xa4, 0x80, 0xd2, 0xcb, 0x5f, 0x7f, + 0x2a, 0x1a, 0x23, 0x00, 0xa5, 0x6a, 0x95, 0xa3, + 0x9a, 0x41, 0xa1, 0xd0, 0x2d, 0x1e, 0xd6, 0x13, + 0x34, 0x40, 0x4e, 0x7f, 0x1a, 0xbe, 0xa0, 0x3d, + 0x33, 0x9c, 0x56, 0x2e, 0x89, 0x25, 0x45, 0xf9, + 0xf0, 0xba, 0x9c, 0x6d, 0xd1, 0xd1, 0xde, 0x51, + 0x47, 0x63, 0xc9, 0xbd, 0xfa, 0xa2, 0x9e, 0xad, + 0x6a, 0x7b, 0x21, 0x1a, 0x6c, 0x3e, 0xff, 0x46, + 0xbe, 0xf3, 0x35, 0x7a, 0x6e, 0xb3, 0xb9, 0xf7, + 0xda, 0x5e, 0xf0, 0x14, 0xb5, 0x70, 0xa4, 0x2b, + 0xdb, 0xbb, 0xc7, 0x31, 0x4b, 0x69, 0x5a, 0x83, + 0x70, 0xd9, 0x58, 0xd4, 0x33, 0x84, 0x23, 0xf0, + 0xae, 0xbb, 0x6d, 0x26, 0x7c, 0xc8, 0x30, 0xf7, + 0x24, 0xad, 0xbd, 0xe4, 0x2c, 0x38, 0x38, 0xac, + 0xe1, 0x4a, 0x9b, 0xac, 0x33, 0x0e, 0x4a, 0xf4, + 0x93, 0xed, 0x07, 0x82, 0x81, 0x4f, 0x8f, 0xb1, + 0xdd, 0x73, 0xd5, 0x50, 0x6d, 0x44, 0x1e, 0xbe, + 0xa7, 0xcd, 0x17, 0x57, 0xd5, 0x3b, 0x62, 0x36, + 0xcf, 0x7d, 0xc8, 0xd8, 0xd1, 0x78, 0xd7, 0x85, + 0x46, 0x76, 0x5d, 0xcc, 0xfe, 0xe8, 0x94, 0xc5, + 0xad, 0xbc, 0x5e, 0xbc, 0x8d, 0x1d, 0xdf, 0x03, + 0xc9, 0x6b, 0x1b, 0x81, 0xd1, 0xb6, 0x5a, 0x24, + 0xe3, 0xdc, 0x3f, 0x20, 0xc9, 0x07, 0x73, 0x4c, + 0x43, 0x13, 0x87, 0x58, 0x34, 0x0d, 0x14, 0x63, + 0x0f, 0x6f, 0xad, 0x8d, 0xac, 0x7c, 0x67, 0x68, + 0xa3, 0x9d, 0x7f, 0x00, 0xdf, 0x28, 0xee, 0x67, + 0xf4, 0x5c, 0x26, 0xcb, 0xef, 0x56, 0x71, 0xc8, + 0xc6, 0x67, 0x5f, 0x38, 0xbb, 0xa0, 0xb1, 0x5c, + 0x1f, 0xb3, 0x08, 0xd9, 0x38, 0xcf, 0x74, 0x54, + 0xc6, 0xa4, 0xc4, 0xc0, 0x9f, 0xb3, 0xd0, 0xda, + 0x62, 0x67, 0x8b, 0x81, 0x33, 0xf0, 0xa9, 0x73, + 0xa4, 0xd1, 0x46, 0x88, 0x8d, 0x85, 0x12, 0x40, + 0xba, 0x1a, 0xcd, 0x82, 0xd8, 0x8d, 0xc4, 0x52, + 0xe7, 0x01, 0x94, 0x2e, 0x0e, 0xd0, 0xaf, 0xe7, + 0x2d, 0x3f, 0x3c, 0xaa, 0xf4, 0xf5, 0xa7, 0x01, + 0x4c, 0x14, 0xe2, 0xc2, 0x96, 0x76, 0xbe, 0x05, + 0xaa, 0x19, 0xb1, 0xbd, 0x95, 0xbb, 0x5a, 0xf9, + 0xa5, 0xa7, 0xe6, 0x16, 0x38, 0x34, 0xf7, 0x9d, + 0x19, 0x66, 0x16, 0x8e, 0x7f, 0x2b, 0x5a, 0xfb, + 0xb5, 0x29, 0x79, 0xbf, 0x52, 0xae, 0x30, 0x95, + 0x3f, 0x31, 0x33, 0x28, 0xde, 0xc5, 0x0d, 0x55, + 0x89, 0xec, 0x21, 0x11, 0x0f, 0x8b, 0xfe, 0x63, + 0x3a, 0xf1, 0x95, 0x5c, 0xcd, 0x50, 0xe4, 0x5d, + 0x8f, 0xa7, 0xc8, 0xca, 0x93, 0xa0, 0x67, 0x82, + 0x63, 0x5c, 0xd0, 0xed, 0xe7, 0x08, 0xc5, 0x60, + 0xf8, 0xb4, 0x47, 0xf0, 0x1a, 0x65, 0x4e, 0xa3, + 0x51, 0x68, 0xc7, 0x14, 0xa1, 0xd9, 0x39, 0x72, + 0xa8, 0x6f, 0x7c, 0x7e, 0xf6, 0x03, 0x0b, 0x25, + 0x9b, 0xf2, 0xca, 0x49, 0xae, 0x5b, 0xf8, 0x0f, + 0x71, 0x51, 0x01, 0xa6, 0x23, 0xa9, 0xdf, 0xd0, + 0x7a, 0x39, 0x19, 0xf5, 0xc5, 0x26, 0x44, 0x7b, + 0x0a, 0x4a, 0x41, 0xbf, 0xf2, 0x8e, 0x83, 0x50, + 0x91, 0x96, 0x72, 0x02, 0xf6, 0x80, 0xbf, 0x95, + 0x41, 0xac, 0xda, 0xb0, 0xba, 0xe3, 0x76, 0xb1, + 0x9d, 0xff, 0x1f, 0x33, 0x02, 0x85, 0xfc, 0x2a, + 0x29, 0xe6, 0xe3, 0x9d, 0xd0, 0xef, 0xc2, 0xd6, + 0x9c, 0x4a, 0x62, 0xac, 0xcb, 0xea, 0x8b, 0xc3, + 0x08, 0x6e, 0x49, 0x09, 0x26, 0x19, 0xc1, 0x30, + 0xcc, 0x27, 0xaa, 0xc6, 0x45, 0x88, 0xbd, 0xae, + 0xd6, 0x79, 0xff, 0x4e, 0xfc, 0x66, 0x4d, 0x02, + 0xa5, 0xee, 0x8e, 0xa5, 0xb6, 0x15, 0x72, 0x24, + 0xb1, 0xbf, 0xbf, 0x64, 0xcf, 0xcc, 0x93, 0xe9, + 0xb6, 0xfd, 0xb4, 0xb6, 0x21, 0xb5, 0x48, 0x08, + 0x0f, 0x11, 0x65, 0xe1, 0x47, 0xee, 0x93, 0x29, + 0xad +}; +static const u8 key72[] __initconst = { + 0xb9, 0xa2, 0xfc, 0x59, 0x06, 0x3f, 0x77, 0xa5, + 0x66, 0xd0, 0x2b, 0x22, 0x74, 0x22, 0x4c, 0x1e, + 0x6a, 0x39, 0xdf, 0xe1, 0x0d, 0x4c, 0x64, 0x99, + 0x54, 0x8a, 0xba, 0x1d, 0x2c, 0x21, 0x5f, 0xc3 +}; +enum { nonce72 = 0x3d069308fa3db04bULL }; + +static const u8 input73[] __initconst = { + 0xe4, 0xdd, 0x36, 0xd4, 0xf5, 0x70, 0x51, 0x73, + 0x97, 0x1d, 0x45, 0x05, 0x92, 0xe7, 0xeb, 0xb7, + 0x09, 0x82, 0x6e, 0x25, 0x6c, 0x50, 0xf5, 0x40, + 0x19, 0xba, 0xbc, 0xf4, 0x39, 0x14, 0xc5, 0x15, + 0x83, 0x40, 0xbd, 0x26, 0xe0, 0xff, 0x3b, 0x22, + 0x7c, 0x7c, 0xd7, 0x0b, 0xe9, 0x25, 0x0c, 0x3d, + 0x92, 0x38, 0xbe, 0xe4, 0x22, 0x75, 0x65, 0xf1, + 0x03, 0x85, 0x34, 0x09, 0xb8, 0x77, 0xfb, 0x48, + 0xb1, 0x2e, 0x21, 0x67, 0x9b, 0x9d, 0xad, 0x18, + 0x82, 0x0d, 0x6b, 0xc3, 0xcf, 0x00, 0x61, 0x6e, + 0xda, 0xdc, 0xa7, 0x0b, 0x5c, 0x02, 0x1d, 0xa6, + 0x4e, 0x0d, 0x7f, 0x37, 0x01, 0x5a, 0x37, 0xf3, + 0x2b, 0xbf, 0xba, 0xe2, 0x1c, 0xb3, 0xa3, 0xbc, + 0x1c, 0x93, 0x1a, 0xb1, 0x71, 0xaf, 0xe2, 0xdd, + 0x17, 0xee, 0x53, 0xfa, 0xfb, 0x02, 0x40, 0x3e, + 0x03, 0xca, 0xe7, 0xc3, 0x51, 0x81, 0xcc, 0x8c, + 0xca, 0xcf, 0x4e, 0xc5, 0x78, 0x99, 0xfd, 0xbf, + 0xea, 0xab, 0x38, 0x81, 0xfc, 0xd1, 0x9e, 0x41, + 0x0b, 0x84, 0x25, 0xf1, 0x6b, 0x3c, 0xf5, 0x40, + 0x0d, 0xc4, 0x3e, 0xb3, 0x6a, 0xec, 0x6e, 0x75, + 0xdc, 0x9b, 0xdf, 0x08, 0x21, 0x16, 0xfb, 0x7a, + 0x8e, 0x19, 0x13, 0x02, 0xa7, 0xfc, 0x58, 0x21, + 0xc3, 0xb3, 0x59, 0x5a, 0x9c, 0xef, 0x38, 0xbd, + 0x87, 0x55, 0xd7, 0x0d, 0x1f, 0x84, 0xdc, 0x98, + 0x22, 0xca, 0x87, 0x96, 0x71, 0x6d, 0x68, 0x00, + 0xcb, 0x4f, 0x2f, 0xc4, 0x64, 0x0c, 0xc1, 0x53, + 0x0c, 0x90, 0xe7, 0x3c, 0x88, 0xca, 0xc5, 0x85, + 0xa3, 0x2a, 0x96, 0x7c, 0x82, 0x6d, 0x45, 0xf5, + 0xb7, 0x8d, 0x17, 0x69, 0xd6, 0xcd, 0x3c, 0xd3, + 0xe7, 0x1c, 0xce, 0x93, 0x50, 0xd4, 0x59, 0xa2, + 0xd8, 0x8b, 0x72, 0x60, 0x5b, 0x25, 0x14, 0xcd, + 0x5a, 0xe8, 0x8c, 0xdb, 0x23, 0x8d, 0x2b, 0x59, + 0x12, 0x13, 0x10, 0x47, 0xa4, 0xc8, 0x3c, 0xc1, + 0x81, 0x89, 0x6c, 0x98, 0xec, 0x8f, 0x7b, 0x32, + 0xf2, 0x87, 0xd9, 0xa2, 0x0d, 0xc2, 0x08, 0xf9, + 0xd5, 0xf3, 0x91, 0xe7, 0xb3, 0x87, 0xa7, 0x0b, + 0x64, 0x8f, 0xb9, 0x55, 0x1c, 0x81, 0x96, 0x6c, + 0xa1, 0xc9, 0x6e, 0x3b, 0xcd, 0x17, 0x1b, 0xfc, + 0xa6, 0x05, 0xba, 0x4a, 0x7d, 0x03, 0x3c, 0x59, + 0xc8, 0xee, 0x50, 0xb2, 0x5b, 0xe1, 0x4d, 0x6a, + 0x1f, 0x09, 0xdc, 0xa2, 0x51, 0xd1, 0x93, 0x3a, + 0x5f, 0x72, 0x1d, 0x26, 0x14, 0x62, 0xa2, 0x41, + 0x3d, 0x08, 0x70, 0x7b, 0x27, 0x3d, 0xbc, 0xdf, + 0x15, 0xfa, 0xb9, 0x5f, 0xb5, 0x38, 0x84, 0x0b, + 0x58, 0x3d, 0xee, 0x3f, 0x32, 0x65, 0x6d, 0xd7, + 0xce, 0x97, 0x3c, 0x8d, 0xfb, 0x63, 0xb9, 0xb0, + 0xa8, 0x4a, 0x72, 0x99, 0x97, 0x58, 0xc8, 0xa7, + 0xf9, 0x4c, 0xae, 0xc1, 0x63, 0xb9, 0x57, 0x18, + 0x8a, 0xfa, 0xab, 0xe9, 0xf3, 0x67, 0xe6, 0xfd, + 0xd2, 0x9d, 0x5c, 0xa9, 0x8e, 0x11, 0x0a, 0xf4, + 0x4b, 0xf1, 0xec, 0x1a, 0xaf, 0x50, 0x5d, 0x16, + 0x13, 0x69, 0x2e, 0xbd, 0x0d, 0xe6, 0xf0, 0xb2, + 0xed, 0xb4, 0x4c, 0x59, 0x77, 0x37, 0x00, 0x0b, + 0xc7, 0xa7, 0x9e, 0x37, 0xf3, 0x60, 0x70, 0xef, + 0xf3, 0xc1, 0x74, 0x52, 0x87, 0xc6, 0xa1, 0x81, + 0xbd, 0x0a, 0x2c, 0x5d, 0x2c, 0x0c, 0x6a, 0x81, + 0xa1, 0xfe, 0x26, 0x78, 0x6c, 0x03, 0x06, 0x07, + 0x34, 0xaa, 0xd1, 0x1b, 0x40, 0x03, 0x39, 0x56, + 0xcf, 0x2a, 0x92, 0xc1, 0x4e, 0xdf, 0x29, 0x24, + 0x83, 0x22, 0x7a, 0xea, 0x67, 0x1e, 0xe7, 0x54, + 0x64, 0xd3, 0xbd, 0x3a, 0x5d, 0xae, 0xca, 0xf0, + 0x9c, 0xd6, 0x5a, 0x9a, 0x62, 0xc8, 0xc7, 0x83, + 0xf9, 0x89, 0xde, 0x2d, 0x53, 0x64, 0x61, 0xf7, + 0xa3, 0xa7, 0x31, 0x38, 0xc6, 0x22, 0x9c, 0xb4, + 0x87, 0xe0 +}; +static const u8 output73[] __initconst = { + 0x34, 0xed, 0x05, 0xb0, 0x14, 0xbc, 0x8c, 0xcc, + 0x95, 0xbd, 0x99, 0x0f, 0xb1, 0x98, 0x17, 0x10, + 0xae, 0xe0, 0x08, 0x53, 0xa3, 0x69, 0xd2, 0xed, + 0x66, 0xdb, 0x2a, 0x34, 0x8d, 0x0c, 0x6e, 0xce, + 0x63, 0x69, 0xc9, 0xe4, 0x57, 0xc3, 0x0c, 0x8b, + 0xa6, 0x2c, 0xa7, 0xd2, 0x08, 0xff, 0x4f, 0xec, + 0x61, 0x8c, 0xee, 0x0d, 0xfa, 0x6b, 0xe0, 0xe8, + 0x71, 0xbc, 0x41, 0x46, 0xd7, 0x33, 0x1d, 0xc0, + 0xfd, 0xad, 0xca, 0x8b, 0x34, 0x56, 0xa4, 0x86, + 0x71, 0x62, 0xae, 0x5e, 0x3d, 0x2b, 0x66, 0x3e, + 0xae, 0xd8, 0xc0, 0xe1, 0x21, 0x3b, 0xca, 0xd2, + 0x6b, 0xa2, 0xb8, 0xc7, 0x98, 0x4a, 0xf3, 0xcf, + 0xb8, 0x62, 0xd8, 0x33, 0xe6, 0x80, 0xdb, 0x2f, + 0x0a, 0xaf, 0x90, 0x3c, 0xe1, 0xec, 0xe9, 0x21, + 0x29, 0x42, 0x9e, 0xa5, 0x50, 0xe9, 0x93, 0xd3, + 0x53, 0x1f, 0xac, 0x2a, 0x24, 0x07, 0xb8, 0xed, + 0xed, 0x38, 0x2c, 0xc4, 0xa1, 0x2b, 0x31, 0x5d, + 0x9c, 0x24, 0x7b, 0xbf, 0xd9, 0xbb, 0x4e, 0x87, + 0x8f, 0x32, 0x30, 0xf1, 0x11, 0x29, 0x54, 0x94, + 0x00, 0x95, 0x1d, 0x1d, 0x24, 0xc0, 0xd4, 0x34, + 0x49, 0x1d, 0xd5, 0xe3, 0xa6, 0xde, 0x8b, 0xbf, + 0x5a, 0x9f, 0x58, 0x5a, 0x9b, 0x70, 0xe5, 0x9b, + 0xb3, 0xdb, 0xe8, 0xb8, 0xca, 0x1b, 0x43, 0xe3, + 0xc6, 0x6f, 0x0a, 0xd6, 0x32, 0x11, 0xd4, 0x04, + 0xef, 0xa3, 0xe4, 0x3f, 0x12, 0xd8, 0xc1, 0x73, + 0x51, 0x87, 0x03, 0xbd, 0xba, 0x60, 0x79, 0xee, + 0x08, 0xcc, 0xf7, 0xc0, 0xaa, 0x4c, 0x33, 0xc4, + 0xc7, 0x09, 0xf5, 0x91, 0xcb, 0x74, 0x57, 0x08, + 0x1b, 0x90, 0xa9, 0x1b, 0x60, 0x02, 0xd2, 0x3f, + 0x7a, 0xbb, 0xfd, 0x78, 0xf0, 0x15, 0xf9, 0x29, + 0x82, 0x8f, 0xc4, 0xb2, 0x88, 0x1f, 0xbc, 0xcc, + 0x53, 0x27, 0x8b, 0x07, 0x5f, 0xfc, 0x91, 0x29, + 0x82, 0x80, 0x59, 0x0a, 0x3c, 0xea, 0xc4, 0x7e, + 0xad, 0xd2, 0x70, 0x46, 0xbd, 0x9e, 0x3b, 0x1c, + 0x8a, 0x62, 0xea, 0x69, 0xbd, 0xf6, 0x96, 0x15, + 0xb5, 0x57, 0xe8, 0x63, 0x5f, 0x65, 0x46, 0x84, + 0x58, 0x50, 0x87, 0x4b, 0x0e, 0x5b, 0x52, 0x90, + 0xb0, 0xae, 0x37, 0x0f, 0xdd, 0x7e, 0xa2, 0xa0, + 0x8b, 0x78, 0xc8, 0x5a, 0x1f, 0x53, 0xdb, 0xc5, + 0xbf, 0x73, 0x20, 0xa9, 0x44, 0xfb, 0x1e, 0xc7, + 0x97, 0xb2, 0x3a, 0x5a, 0x17, 0xe6, 0x8b, 0x9b, + 0xe8, 0xf8, 0x2a, 0x01, 0x27, 0xa3, 0x71, 0x28, + 0xe3, 0x19, 0xc6, 0xaf, 0xf5, 0x3a, 0x26, 0xc0, + 0x5c, 0x69, 0x30, 0x78, 0x75, 0x27, 0xf2, 0x0c, + 0x22, 0x71, 0x65, 0xc6, 0x8e, 0x7b, 0x47, 0xe3, + 0x31, 0xaf, 0x7b, 0xc6, 0xc2, 0x55, 0x68, 0x81, + 0xaa, 0x1b, 0x21, 0x65, 0xfb, 0x18, 0x35, 0x45, + 0x36, 0x9a, 0x44, 0xba, 0x5c, 0xff, 0x06, 0xde, + 0x3a, 0xc8, 0x44, 0x0b, 0xaa, 0x8e, 0x34, 0xe2, + 0x84, 0xac, 0x18, 0xfe, 0x9b, 0xe1, 0x4f, 0xaa, + 0xb6, 0x90, 0x0b, 0x1c, 0x2c, 0xd9, 0x9a, 0x10, + 0x18, 0xf9, 0x49, 0x41, 0x42, 0x1b, 0xb5, 0xe1, + 0x26, 0xac, 0x2d, 0x38, 0x00, 0x00, 0xe4, 0xb4, + 0x50, 0x6f, 0x14, 0x18, 0xd6, 0x3d, 0x00, 0x59, + 0x3c, 0x45, 0xf3, 0x42, 0x13, 0x44, 0xb8, 0x57, + 0xd4, 0x43, 0x5c, 0x8a, 0x2a, 0xb4, 0xfc, 0x0a, + 0x25, 0x5a, 0xdc, 0x8f, 0x11, 0x0b, 0x11, 0x44, + 0xc7, 0x0e, 0x54, 0x8b, 0x22, 0x01, 0x7e, 0x67, + 0x2e, 0x15, 0x3a, 0xb9, 0xee, 0x84, 0x10, 0xd4, + 0x80, 0x57, 0xd7, 0x75, 0xcf, 0x8b, 0xcb, 0x03, + 0xc9, 0x92, 0x2b, 0x69, 0xd8, 0x5a, 0x9b, 0x06, + 0x85, 0x47, 0xaa, 0x4c, 0x28, 0xde, 0x49, 0x58, + 0xe6, 0x11, 0x1e, 0x5e, 0x64, 0x8e, 0x3b, 0xe0, + 0x40, 0x2e, 0xac, 0x96, 0x97, 0x15, 0x37, 0x1e, + 0x30, 0xdd +}; +static const u8 key73[] __initconst = { + 0x96, 0x06, 0x1e, 0xc1, 0x6d, 0xba, 0x49, 0x5b, + 0x65, 0x80, 0x79, 0xdd, 0xf3, 0x67, 0xa8, 0x6e, + 0x2d, 0x9c, 0x54, 0x46, 0xd8, 0x4a, 0xeb, 0x7e, + 0x23, 0x86, 0x51, 0xd8, 0x49, 0x49, 0x56, 0xe0 +}; +enum { nonce73 = 0xbefb83cb67e11ffdULL }; + +static const u8 input74[] __initconst = { + 0x47, 0x22, 0x70, 0xe5, 0x2f, 0x41, 0x18, 0x45, + 0x07, 0xd3, 0x6d, 0x32, 0x0d, 0x43, 0x92, 0x2b, + 0x9b, 0x65, 0x73, 0x13, 0x1a, 0x4f, 0x49, 0x8f, + 0xff, 0xf8, 0xcc, 0xae, 0x15, 0xab, 0x9d, 0x7d, + 0xee, 0x22, 0x5d, 0x8b, 0xde, 0x81, 0x5b, 0x81, + 0x83, 0x49, 0x35, 0x9b, 0xb4, 0xbc, 0x4e, 0x01, + 0xc2, 0x29, 0xa7, 0xf1, 0xca, 0x3a, 0xce, 0x3f, + 0xf5, 0x31, 0x93, 0xa8, 0xe2, 0xc9, 0x7d, 0x03, + 0x26, 0xa4, 0xbc, 0xa8, 0x9c, 0xb9, 0x68, 0xf3, + 0xb3, 0x91, 0xe8, 0xe6, 0xc7, 0x2b, 0x1a, 0xce, + 0xd2, 0x41, 0x53, 0xbd, 0xa3, 0x2c, 0x54, 0x94, + 0x21, 0xa1, 0x40, 0xae, 0xc9, 0x0c, 0x11, 0x92, + 0xfd, 0x91, 0xa9, 0x40, 0xca, 0xde, 0x21, 0x4e, + 0x1e, 0x3d, 0xcc, 0x2c, 0x87, 0x11, 0xef, 0x46, + 0xed, 0x52, 0x03, 0x11, 0x19, 0x43, 0x25, 0xc7, + 0x0d, 0xc3, 0x37, 0x5f, 0xd3, 0x6f, 0x0c, 0x6a, + 0x45, 0x30, 0x88, 0xec, 0xf0, 0x21, 0xef, 0x1d, + 0x7b, 0x38, 0x63, 0x4b, 0x49, 0x0c, 0x72, 0xf6, + 0x4c, 0x40, 0xc3, 0xcc, 0x03, 0xa7, 0xae, 0xa8, + 0x8c, 0x37, 0x03, 0x1c, 0x11, 0xae, 0x0d, 0x1b, + 0x62, 0x97, 0x27, 0xfc, 0x56, 0x4b, 0xb7, 0xfd, + 0xbc, 0xfb, 0x0e, 0xfc, 0x61, 0xad, 0xc6, 0xb5, + 0x9c, 0x8c, 0xc6, 0x38, 0x27, 0x91, 0x29, 0x3d, + 0x29, 0xc8, 0x37, 0xc9, 0x96, 0x69, 0xe3, 0xdc, + 0x3e, 0x61, 0x35, 0x9b, 0x99, 0x4f, 0xb9, 0x4e, + 0x5a, 0x29, 0x1c, 0x2e, 0xcf, 0x16, 0xcb, 0x69, + 0x87, 0xe4, 0x1a, 0xc4, 0x6e, 0x78, 0x43, 0x00, + 0x03, 0xb2, 0x8b, 0x03, 0xd0, 0xb4, 0xf1, 0xd2, + 0x7d, 0x2d, 0x7e, 0xfc, 0x19, 0x66, 0x5b, 0xa3, + 0x60, 0x3f, 0x9d, 0xbd, 0xfa, 0x3e, 0xca, 0x7b, + 0x26, 0x08, 0x19, 0x16, 0x93, 0x5d, 0x83, 0xfd, + 0xf9, 0x21, 0xc6, 0x31, 0x34, 0x6f, 0x0c, 0xaa, + 0x28, 0xf9, 0x18, 0xa2, 0xc4, 0x78, 0x3b, 0x56, + 0xc0, 0x88, 0x16, 0xba, 0x22, 0x2c, 0x07, 0x2f, + 0x70, 0xd0, 0xb0, 0x46, 0x35, 0xc7, 0x14, 0xdc, + 0xbb, 0x56, 0x23, 0x1e, 0x36, 0x36, 0x2d, 0x73, + 0x78, 0xc7, 0xce, 0xf3, 0x58, 0xf7, 0x58, 0xb5, + 0x51, 0xff, 0x33, 0x86, 0x0e, 0x3b, 0x39, 0xfb, + 0x1a, 0xfd, 0xf8, 0x8b, 0x09, 0x33, 0x1b, 0x83, + 0xf2, 0xe6, 0x38, 0x37, 0xef, 0x47, 0x84, 0xd9, + 0x82, 0x77, 0x2b, 0x82, 0xcc, 0xf9, 0xee, 0x94, + 0x71, 0x78, 0x81, 0xc8, 0x4d, 0x91, 0xd7, 0x35, + 0x29, 0x31, 0x30, 0x5c, 0x4a, 0x23, 0x23, 0xb1, + 0x38, 0x6b, 0xac, 0x22, 0x3f, 0x80, 0xc7, 0xe0, + 0x7d, 0xfa, 0x76, 0x47, 0xd4, 0x6f, 0x93, 0xa0, + 0xa0, 0x93, 0x5d, 0x68, 0xf7, 0x43, 0x25, 0x8f, + 0x1b, 0xc7, 0x87, 0xea, 0x59, 0x0c, 0xa2, 0xfa, + 0xdb, 0x2f, 0x72, 0x43, 0xcf, 0x90, 0xf1, 0xd6, + 0x58, 0xf3, 0x17, 0x6a, 0xdf, 0xb3, 0x4e, 0x0e, + 0x38, 0x24, 0x48, 0x1f, 0xb7, 0x01, 0xec, 0x81, + 0xb1, 0x87, 0x5b, 0xec, 0x9c, 0x11, 0x1a, 0xff, + 0xa5, 0xca, 0x5a, 0x63, 0x31, 0xb2, 0xe4, 0xc6, + 0x3c, 0x1d, 0xaf, 0x27, 0xb2, 0xd4, 0x19, 0xa2, + 0xcc, 0x04, 0x92, 0x42, 0xd2, 0xc1, 0x8c, 0x3b, + 0xce, 0xf5, 0x74, 0xc1, 0x81, 0xf8, 0x20, 0x23, + 0x6f, 0x20, 0x6d, 0x78, 0x36, 0x72, 0x2c, 0x52, + 0xdf, 0x5e, 0xe8, 0x75, 0xce, 0x1c, 0x49, 0x9d, + 0x93, 0x6f, 0x65, 0xeb, 0xb1, 0xbd, 0x8e, 0x5e, + 0xe5, 0x89, 0xc4, 0x8a, 0x81, 0x3d, 0x9a, 0xa7, + 0x11, 0x82, 0x8e, 0x38, 0x5b, 0x5b, 0xca, 0x7d, + 0x4b, 0x72, 0xc2, 0x9c, 0x30, 0x5e, 0x7f, 0xc0, + 0x6f, 0x91, 0xd5, 0x67, 0x8c, 0x3e, 0xae, 0xda, + 0x2b, 0x3c, 0x53, 0xcc, 0x50, 0x97, 0x36, 0x0b, + 0x79, 0xd6, 0x73, 0x6e, 0x7d, 0x42, 0x56, 0xe1, + 0xaa, 0xfc, 0xb3, 0xa7, 0xc8, 0x01, 0xaa, 0xc1, + 0xfc, 0x5c, 0x72, 0x8e, 0x63, 0xa8, 0x46, 0x18, + 0xee, 0x11, 0xe7, 0x30, 0x09, 0x83, 0x6c, 0xd9, + 0xf4, 0x7a, 0x7b, 0xb5, 0x1f, 0x6d, 0xc7, 0xbc, + 0xcb, 0x55, 0xea, 0x40, 0x58, 0x7a, 0x00, 0x00, + 0x90, 0x60, 0xc5, 0x64, 0x69, 0x05, 0x99, 0xd2, + 0x49, 0x62, 0x4f, 0xcb, 0x97, 0xdf, 0xdd, 0x6b, + 0x60, 0x75, 0xe2, 0xe0, 0x6f, 0x76, 0xd0, 0x37, + 0x67, 0x0a, 0xcf, 0xff, 0xc8, 0x61, 0x84, 0x14, + 0x80, 0x7c, 0x1d, 0x31, 0x8d, 0x90, 0xde, 0x0b, + 0x1c, 0x74, 0x9f, 0x82, 0x96, 0x80, 0xda, 0xaf, + 0x8d, 0x99, 0x86, 0x9f, 0x24, 0x99, 0x28, 0x3e, + 0xe0, 0xa3, 0xc3, 0x90, 0x2d, 0x14, 0x65, 0x1e, + 0x3b, 0xb9, 0xba, 0x13, 0xa5, 0x77, 0x73, 0x63, + 0x9a, 0x06, 0x3d, 0xa9, 0x28, 0x9b, 0xba, 0x25, + 0x61, 0xc9, 0xcd, 0xcf, 0x7a, 0x4d, 0x96, 0x09, + 0xcb, 0xca, 0x03, 0x9c, 0x54, 0x34, 0x31, 0x85, + 0xa0, 0x3d, 0xe5, 0xbc, 0xa5, 0x5f, 0x1b, 0xd3, + 0x10, 0x63, 0x74, 0x9d, 0x01, 0x92, 0x88, 0xf0, + 0x27, 0x9c, 0x28, 0xd9, 0xfd, 0xe2, 0x4e, 0x01, + 0x8d, 0x61, 0x79, 0x60, 0x61, 0x5b, 0x76, 0xab, + 0x06, 0xd3, 0x44, 0x87, 0x43, 0x52, 0xcd, 0x06, + 0x68, 0x1e, 0x2d, 0xc5, 0xb0, 0x07, 0x25, 0xdf, + 0x0a, 0x50, 0xd7, 0xd9, 0x08, 0x53, 0x65, 0xf1, + 0x0c, 0x2c, 0xde, 0x3f, 0x9d, 0x03, 0x1f, 0xe1, + 0x49, 0x43, 0x3c, 0x83, 0x81, 0x37, 0xf8, 0xa2, + 0x0b, 0xf9, 0x61, 0x1c, 0xc1, 0xdb, 0x79, 0xbc, + 0x64, 0xce, 0x06, 0x4e, 0x87, 0x89, 0x62, 0x73, + 0x51, 0xbc, 0xa4, 0x32, 0xd4, 0x18, 0x62, 0xab, + 0x65, 0x7e, 0xad, 0x1e, 0x91, 0xa3, 0xfa, 0x2d, + 0x58, 0x9e, 0x2a, 0xe9, 0x74, 0x44, 0x64, 0x11, + 0xe6, 0xb6, 0xb3, 0x00, 0x7e, 0xa3, 0x16, 0xef, + 0x72 +}; +static const u8 output74[] __initconst = { + 0xf5, 0xca, 0x45, 0x65, 0x50, 0x35, 0x47, 0x67, + 0x6f, 0x4f, 0x67, 0xff, 0x34, 0xd9, 0xc3, 0x37, + 0x2a, 0x26, 0xb0, 0x4f, 0x08, 0x1e, 0x45, 0x13, + 0xc7, 0x2c, 0x14, 0x75, 0x33, 0xd8, 0x8e, 0x1e, + 0x1b, 0x11, 0x0d, 0x97, 0x04, 0x33, 0x8a, 0xe4, + 0xd8, 0x8d, 0x0e, 0x12, 0x8d, 0xdb, 0x6e, 0x02, + 0xfa, 0xe5, 0xbd, 0x3a, 0xb5, 0x28, 0x07, 0x7d, + 0x20, 0xf0, 0x12, 0x64, 0x83, 0x2f, 0x59, 0x79, + 0x17, 0x88, 0x3c, 0x2d, 0x08, 0x2f, 0x55, 0xda, + 0xcc, 0x02, 0x3a, 0x82, 0xcd, 0x03, 0x94, 0xdf, + 0xdf, 0xab, 0x8a, 0x13, 0xf5, 0xe6, 0x74, 0xdf, + 0x7b, 0xe2, 0xab, 0x34, 0xbc, 0x00, 0x85, 0xbf, + 0x5a, 0x48, 0xc8, 0xff, 0x8d, 0x6c, 0x27, 0x48, + 0x19, 0x2d, 0x08, 0xfa, 0x82, 0x62, 0x39, 0x55, + 0x32, 0x11, 0xa8, 0xd7, 0xb9, 0x08, 0x2c, 0xd6, + 0x7a, 0xd9, 0x83, 0x9f, 0x9b, 0xfb, 0xec, 0x3a, + 0xd1, 0x08, 0xc7, 0xad, 0xdc, 0x98, 0x4c, 0xbc, + 0x98, 0xeb, 0x36, 0xb0, 0x39, 0xf4, 0x3a, 0xd6, + 0x53, 0x02, 0xa0, 0xa9, 0x73, 0xa1, 0xca, 0xef, + 0xd8, 0xd2, 0xec, 0x0e, 0xf8, 0xf5, 0xac, 0x8d, + 0x34, 0x41, 0x06, 0xa8, 0xc6, 0xc3, 0x31, 0xbc, + 0xe5, 0xcc, 0x7e, 0x72, 0x63, 0x59, 0x3e, 0x63, + 0xc2, 0x8d, 0x2b, 0xd5, 0xb9, 0xfd, 0x1e, 0x31, + 0x69, 0x32, 0x05, 0xd6, 0xde, 0xc9, 0xe6, 0x4c, + 0xac, 0x68, 0xf7, 0x1f, 0x9d, 0xcd, 0x0e, 0xa2, + 0x15, 0x3d, 0xd6, 0x47, 0x99, 0xab, 0x08, 0x5f, + 0x28, 0xc3, 0x4c, 0xc2, 0xd5, 0xdd, 0x10, 0xb7, + 0xbd, 0xdb, 0x9b, 0xcf, 0x85, 0x27, 0x29, 0x76, + 0x98, 0xeb, 0xad, 0x31, 0x64, 0xe7, 0xfb, 0x61, + 0xe0, 0xd8, 0x1a, 0xa6, 0xe2, 0xe7, 0x43, 0x42, + 0x77, 0xc9, 0x82, 0x00, 0xac, 0x85, 0xe0, 0xa2, + 0xd4, 0x62, 0xe3, 0xb7, 0x17, 0x6e, 0xb2, 0x9e, + 0x21, 0x58, 0x73, 0xa9, 0x53, 0x2d, 0x3c, 0xe1, + 0xdd, 0xd6, 0x6e, 0x92, 0xf2, 0x1d, 0xc2, 0x22, + 0x5f, 0x9a, 0x7e, 0xd0, 0x52, 0xbf, 0x54, 0x19, + 0xd7, 0x80, 0x63, 0x3e, 0xd0, 0x08, 0x2d, 0x37, + 0x0c, 0x15, 0xf7, 0xde, 0xab, 0x2b, 0xe3, 0x16, + 0x21, 0x3a, 0xee, 0xa5, 0xdc, 0xdf, 0xde, 0xa3, + 0x69, 0xcb, 0xfd, 0x92, 0x89, 0x75, 0xcf, 0xc9, + 0x8a, 0xa4, 0xc8, 0xdd, 0xcc, 0x21, 0xe6, 0xfe, + 0x9e, 0x43, 0x76, 0xb2, 0x45, 0x22, 0xb9, 0xb5, + 0xac, 0x7e, 0x3d, 0x26, 0xb0, 0x53, 0xc8, 0xab, + 0xfd, 0xea, 0x2c, 0xd1, 0x44, 0xc5, 0x60, 0x1b, + 0x8a, 0x99, 0x0d, 0xa5, 0x0e, 0x67, 0x6e, 0x3a, + 0x96, 0x55, 0xec, 0xe8, 0xcc, 0xbe, 0x49, 0xd9, + 0xf2, 0x72, 0x9f, 0x30, 0x21, 0x97, 0x57, 0x19, + 0xbe, 0x5e, 0x33, 0x0c, 0xee, 0xc0, 0x72, 0x0d, + 0x2e, 0xd1, 0xe1, 0x52, 0xc2, 0xea, 0x41, 0xbb, + 0xe1, 0x6d, 0xd4, 0x17, 0xa9, 0x8d, 0x89, 0xa9, + 0xd6, 0x4b, 0xc6, 0x4c, 0xf2, 0x88, 0x97, 0x54, + 0x3f, 0x4f, 0x57, 0xb7, 0x37, 0xf0, 0x2c, 0x11, + 0x15, 0x56, 0xdb, 0x28, 0xb5, 0x16, 0x84, 0x66, + 0xce, 0x45, 0x3f, 0x61, 0x75, 0xb6, 0xbe, 0x00, + 0xd1, 0xe4, 0xf5, 0x27, 0x54, 0x7f, 0xc2, 0xf1, + 0xb3, 0x32, 0x9a, 0xe8, 0x07, 0x02, 0xf3, 0xdb, + 0xa9, 0xd1, 0xc2, 0xdf, 0xee, 0xad, 0xe5, 0x8a, + 0x3c, 0xfa, 0x67, 0xec, 0x6b, 0xa4, 0x08, 0xfe, + 0xba, 0x5a, 0x58, 0x0b, 0x78, 0x11, 0x91, 0x76, + 0xe3, 0x1a, 0x28, 0x54, 0x5e, 0xbd, 0x71, 0x1b, + 0x8b, 0xdc, 0x6c, 0xf4, 0x6f, 0xd7, 0xf4, 0xf3, + 0xe1, 0x03, 0xa4, 0x3c, 0x8d, 0x91, 0x2e, 0xba, + 0x5f, 0x7f, 0x8c, 0xaf, 0x69, 0x89, 0x29, 0x0a, + 0x5b, 0x25, 0x13, 0xc4, 0x2e, 0x16, 0xc2, 0x15, + 0x07, 0x5d, 0x58, 0x33, 0x7c, 0xe0, 0xf0, 0x55, + 0x5f, 0xbf, 0x5e, 0xf0, 0x71, 0x48, 0x8f, 0xf7, + 0x48, 0xb3, 0xf7, 0x0d, 0xa1, 0xd0, 0x63, 0xb1, + 0xad, 0xae, 0xb5, 0xb0, 0x5f, 0x71, 0xaf, 0x24, + 0x8b, 0xb9, 0x1c, 0x44, 0xd2, 0x1a, 0x53, 0xd1, + 0xd5, 0xb4, 0xa9, 0xff, 0x88, 0x73, 0xb5, 0xaa, + 0x15, 0x32, 0x5f, 0x59, 0x9d, 0x2e, 0xb5, 0xcb, + 0xde, 0x21, 0x2e, 0xe9, 0x35, 0xed, 0xfd, 0x0f, + 0xb6, 0xbb, 0xe6, 0x4b, 0x16, 0xf1, 0x45, 0x1e, + 0xb4, 0x84, 0xe9, 0x58, 0x1c, 0x0c, 0x95, 0xc0, + 0xcf, 0x49, 0x8b, 0x59, 0xa1, 0x78, 0xe6, 0x80, + 0x12, 0x49, 0x7a, 0xd4, 0x66, 0x62, 0xdf, 0x9c, + 0x18, 0xc8, 0x8c, 0xda, 0xc1, 0xa6, 0xbc, 0x65, + 0x28, 0xd2, 0xa4, 0xe8, 0xf1, 0x35, 0xdb, 0x5a, + 0x75, 0x1f, 0x73, 0x60, 0xec, 0xa8, 0xda, 0x5a, + 0x43, 0x15, 0x83, 0x9b, 0xe7, 0xb1, 0xa6, 0x81, + 0xbb, 0xef, 0xf3, 0x8f, 0x0f, 0xd3, 0x79, 0xa2, + 0xe5, 0xaa, 0x42, 0xef, 0xa0, 0x13, 0x4e, 0x91, + 0x2d, 0xcb, 0x61, 0x7a, 0x9a, 0x33, 0x14, 0x50, + 0x77, 0x4a, 0xd0, 0x91, 0x48, 0xe0, 0x0c, 0xe0, + 0x11, 0xcb, 0xdf, 0xb0, 0xce, 0x06, 0xd2, 0x79, + 0x4d, 0x69, 0xb9, 0xc9, 0x36, 0x74, 0x8f, 0x81, + 0x72, 0x73, 0xf3, 0x17, 0xb7, 0x13, 0xcb, 0x5b, + 0xd2, 0x5c, 0x33, 0x61, 0xb7, 0x61, 0x79, 0xb0, + 0xc0, 0x4d, 0xa1, 0xc7, 0x5d, 0x98, 0xc9, 0xe1, + 0x98, 0xbd, 0x78, 0x5a, 0x2c, 0x64, 0x53, 0xaf, + 0xaf, 0x66, 0x51, 0x47, 0xe4, 0x48, 0x66, 0x8b, + 0x07, 0x52, 0xa3, 0x03, 0x93, 0x28, 0xad, 0xcc, + 0xa3, 0x86, 0xad, 0x63, 0x04, 0x35, 0x6c, 0x49, + 0xd5, 0x28, 0x0e, 0x00, 0x47, 0xf4, 0xd4, 0x32, + 0x27, 0x19, 0xb3, 0x29, 0xe7, 0xbc, 0xbb, 0xce, + 0x3e, 0x3e, 0xd5, 0x67, 0x20, 0xe4, 0x0b, 0x75, + 0x95, 0x24, 0xe0, 0x6c, 0xb6, 0x29, 0x0c, 0x14, + 0xfd +}; +static const u8 key74[] __initconst = { + 0xf0, 0x41, 0x5b, 0x00, 0x56, 0xc4, 0xac, 0xf6, + 0xa2, 0x4c, 0x33, 0x41, 0x16, 0x09, 0x1b, 0x8e, + 0x4d, 0xe8, 0x8c, 0xd9, 0x48, 0xab, 0x3e, 0x60, + 0xcb, 0x49, 0x3e, 0xaf, 0x2b, 0x8b, 0xc8, 0xf0 +}; +enum { nonce74 = 0xcbdb0ffd0e923384ULL }; + +static const struct chacha20_testvec chacha20_testvecs[] __initconst = { + { input01, output01, key01, nonce01, sizeof(input01) }, + { input02, output02, key02, nonce02, sizeof(input02) }, + { input03, output03, key03, nonce03, sizeof(input03) }, + { input04, output04, key04, nonce04, sizeof(input04) }, + { input05, output05, key05, nonce05, sizeof(input05) }, + { input06, output06, key06, nonce06, sizeof(input06) }, + { input07, output07, key07, nonce07, sizeof(input07) }, + { input08, output08, key08, nonce08, sizeof(input08) }, + { input09, output09, key09, nonce09, sizeof(input09) }, + { input10, output10, key10, nonce10, sizeof(input10) }, + { input11, output11, key11, nonce11, sizeof(input11) }, + { input12, output12, key12, nonce12, sizeof(input12) }, + { input13, output13, key13, nonce13, sizeof(input13) }, + { input14, output14, key14, nonce14, sizeof(input14) }, + { input15, output15, key15, nonce15, sizeof(input15) }, + { input16, output16, key16, nonce16, sizeof(input16) }, + { input17, output17, key17, nonce17, sizeof(input17) }, + { input18, output18, key18, nonce18, sizeof(input18) }, + { input19, output19, key19, nonce19, sizeof(input19) }, + { input20, output20, key20, nonce20, sizeof(input20) }, + { input21, output21, key21, nonce21, sizeof(input21) }, + { input22, output22, key22, nonce22, sizeof(input22) }, + { input23, output23, key23, nonce23, sizeof(input23) }, + { input24, output24, key24, nonce24, sizeof(input24) }, + { input25, output25, key25, nonce25, sizeof(input25) }, + { input26, output26, key26, nonce26, sizeof(input26) }, + { input27, output27, key27, nonce27, sizeof(input27) }, + { input28, output28, key28, nonce28, sizeof(input28) }, + { input29, output29, key29, nonce29, sizeof(input29) }, + { input30, output30, key30, nonce30, sizeof(input30) }, + { input31, output31, key31, nonce31, sizeof(input31) }, + { input32, output32, key32, nonce32, sizeof(input32) }, + { input33, output33, key33, nonce33, sizeof(input33) }, + { input34, output34, key34, nonce34, sizeof(input34) }, + { input35, output35, key35, nonce35, sizeof(input35) }, + { input36, output36, key36, nonce36, sizeof(input36) }, + { input37, output37, key37, nonce37, sizeof(input37) }, + { input38, output38, key38, nonce38, sizeof(input38) }, + { input39, output39, key39, nonce39, sizeof(input39) }, + { input40, output40, key40, nonce40, sizeof(input40) }, + { input41, output41, key41, nonce41, sizeof(input41) }, + { input42, output42, key42, nonce42, sizeof(input42) }, + { input43, output43, key43, nonce43, sizeof(input43) }, + { input44, output44, key44, nonce44, sizeof(input44) }, + { input45, output45, key45, nonce45, sizeof(input45) }, + { input46, output46, key46, nonce46, sizeof(input46) }, + { input47, output47, key47, nonce47, sizeof(input47) }, + { input48, output48, key48, nonce48, sizeof(input48) }, + { input49, output49, key49, nonce49, sizeof(input49) }, + { input50, output50, key50, nonce50, sizeof(input50) }, + { input51, output51, key51, nonce51, sizeof(input51) }, + { input52, output52, key52, nonce52, sizeof(input52) }, + { input53, output53, key53, nonce53, sizeof(input53) }, + { input54, output54, key54, nonce54, sizeof(input54) }, + { input55, output55, key55, nonce55, sizeof(input55) }, + { input56, output56, key56, nonce56, sizeof(input56) }, + { input57, output57, key57, nonce57, sizeof(input57) }, + { input58, output58, key58, nonce58, sizeof(input58) }, + { input59, output59, key59, nonce59, sizeof(input59) }, + { input60, output60, key60, nonce60, sizeof(input60) }, + { input61, output61, key61, nonce61, sizeof(input61) }, + { input62, output62, key62, nonce62, sizeof(input62) }, + { input63, output63, key63, nonce63, sizeof(input63) }, + { input64, output64, key64, nonce64, sizeof(input64) }, + { input65, output65, key65, nonce65, sizeof(input65) }, + { input66, output66, key66, nonce66, sizeof(input66) }, + { input67, output67, key67, nonce67, sizeof(input67) }, + { input68, output68, key68, nonce68, sizeof(input68) }, + { input69, output69, key69, nonce69, sizeof(input69) }, + { input70, output70, key70, nonce70, sizeof(input70) }, + { input71, output71, key71, nonce71, sizeof(input71) }, + { input72, output72, key72, nonce72, sizeof(input72) }, + { input73, output73, key73, nonce73, sizeof(input73) }, + { input74, output74, key74, nonce74, sizeof(input74) } +}; + +static const struct hchacha20_testvec hchacha20_testvecs[] __initconst = {{ + .key = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }, + .nonce = { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a, + 0x00, 0x00, 0x00, 0x00, 0x31, 0x41, 0x59, 0x27 }, + .output = { 0x82, 0x41, 0x3b, 0x42, 0x27, 0xb2, 0x7b, 0xfe, + 0xd3, 0x0e, 0x42, 0x50, 0x8a, 0x87, 0x7d, 0x73, + 0xa0, 0xf9, 0xe4, 0xd5, 0x8a, 0x74, 0xa8, 0x53, + 0xc1, 0x2e, 0xc4, 0x13, 0x26, 0xd3, 0xec, 0xdc } +}}; + +static bool __init chacha20_selftest(void) +{ + enum { + MAXIMUM_TEST_BUFFER_LEN = 1UL << 10, + OUTRAGEOUSLY_HUGE_BUFFER_LEN = PAGE_SIZE * 35 + 17 /* 143k */ + }; + size_t i, j, k; + u32 derived_key[CHACHA20_KEY_WORDS]; + u8 *offset_input = NULL, *computed_output = NULL, *massive_input = NULL; + u8 offset_key[CHACHA20_KEY_SIZE + 1] + __aligned(__alignof__(unsigned long)); + struct chacha20_ctx state; + bool success = true; + simd_context_t simd_context; + + offset_input = kmalloc(MAXIMUM_TEST_BUFFER_LEN + 1, GFP_KERNEL); + computed_output = kmalloc(MAXIMUM_TEST_BUFFER_LEN + 1, GFP_KERNEL); + massive_input = vzalloc(OUTRAGEOUSLY_HUGE_BUFFER_LEN); + if (!computed_output || !offset_input || !massive_input) { + pr_err("chacha20 self-test malloc: FAIL\n"); + success = false; + goto out; + } + + simd_get(&simd_context); + for (i = 0; i < ARRAY_SIZE(chacha20_testvecs); ++i) { + /* Boring case */ + memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN + 1); + memset(&state, 0, sizeof(state)); + chacha20_init(&state, chacha20_testvecs[i].key, + chacha20_testvecs[i].nonce); + chacha20(&state, computed_output, chacha20_testvecs[i].input, + chacha20_testvecs[i].ilen, &simd_context); + if (memcmp(computed_output, chacha20_testvecs[i].output, + chacha20_testvecs[i].ilen)) { + pr_err("chacha20 self-test %zu: FAIL\n", i + 1); + success = false; + } + for (k = chacha20_testvecs[i].ilen; + k < MAXIMUM_TEST_BUFFER_LEN + 1; ++k) { + if (computed_output[k]) { + pr_err("chacha20 self-test %zu (zero check): FAIL\n", + i + 1); + success = false; + break; + } + } + + /* Unaligned case */ + memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN + 1); + memset(&state, 0, sizeof(state)); + memcpy(offset_input + 1, chacha20_testvecs[i].input, + chacha20_testvecs[i].ilen); + memcpy(offset_key + 1, chacha20_testvecs[i].key, + CHACHA20_KEY_SIZE); + chacha20_init(&state, offset_key + 1, chacha20_testvecs[i].nonce); + chacha20(&state, computed_output + 1, offset_input + 1, + chacha20_testvecs[i].ilen, &simd_context); + if (memcmp(computed_output + 1, chacha20_testvecs[i].output, + chacha20_testvecs[i].ilen)) { + pr_err("chacha20 self-test %zu (unaligned): FAIL\n", + i + 1); + success = false; + } + if (computed_output[0]) { + pr_err("chacha20 self-test %zu (unaligned, zero check): FAIL\n", + i + 1); + success = false; + } + for (k = chacha20_testvecs[i].ilen + 1; + k < MAXIMUM_TEST_BUFFER_LEN + 1; ++k) { + if (computed_output[k]) { + pr_err("chacha20 self-test %zu (unaligned, zero check): FAIL\n", + i + 1); + success = false; + break; + } + } + + /* Chunked case */ + if (chacha20_testvecs[i].ilen <= CHACHA20_BLOCK_SIZE) + goto next_test; + memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN + 1); + memset(&state, 0, sizeof(state)); + chacha20_init(&state, chacha20_testvecs[i].key, + chacha20_testvecs[i].nonce); + chacha20(&state, computed_output, chacha20_testvecs[i].input, + CHACHA20_BLOCK_SIZE, &simd_context); + chacha20(&state, computed_output + CHACHA20_BLOCK_SIZE, + chacha20_testvecs[i].input + CHACHA20_BLOCK_SIZE, + chacha20_testvecs[i].ilen - CHACHA20_BLOCK_SIZE, + &simd_context); + if (memcmp(computed_output, chacha20_testvecs[i].output, + chacha20_testvecs[i].ilen)) { + pr_err("chacha20 self-test %zu (chunked): FAIL\n", + i + 1); + success = false; + } + for (k = chacha20_testvecs[i].ilen; + k < MAXIMUM_TEST_BUFFER_LEN + 1; ++k) { + if (computed_output[k]) { + pr_err("chacha20 self-test %zu (chunked, zero check): FAIL\n", + i + 1); + success = false; + break; + } + } + +next_test: + /* Sliding unaligned case */ + if (chacha20_testvecs[i].ilen > CHACHA20_BLOCK_SIZE + 1 || + !chacha20_testvecs[i].ilen) + continue; + for (j = 1; j < CHACHA20_BLOCK_SIZE; ++j) { + memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN + 1); + memset(&state, 0, sizeof(state)); + memcpy(offset_input + j, chacha20_testvecs[i].input, + chacha20_testvecs[i].ilen); + chacha20_init(&state, chacha20_testvecs[i].key, + chacha20_testvecs[i].nonce); + chacha20(&state, computed_output + j, offset_input + j, + chacha20_testvecs[i].ilen, &simd_context); + if (memcmp(computed_output + j, + chacha20_testvecs[i].output, + chacha20_testvecs[i].ilen)) { + pr_err("chacha20 self-test %zu (unaligned, slide %zu): FAIL\n", + i + 1, j); + success = false; + } + for (k = j; k < j; ++k) { + if (computed_output[k]) { + pr_err("chacha20 self-test %zu (unaligned, slide %zu, zero check): FAIL\n", + i + 1, j); + success = false; + break; + } + } + for (k = chacha20_testvecs[i].ilen + j; + k < MAXIMUM_TEST_BUFFER_LEN + 1; ++k) { + if (computed_output[k]) { + pr_err("chacha20 self-test %zu (unaligned, slide %zu, zero check): FAIL\n", + i + 1, j); + success = false; + break; + } + } + } + } + for (i = 0; i < ARRAY_SIZE(hchacha20_testvecs); ++i) { + memset(&derived_key, 0, sizeof(derived_key)); + hchacha20(derived_key, hchacha20_testvecs[i].nonce, + hchacha20_testvecs[i].key, &simd_context); + cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key)); + if (memcmp(derived_key, hchacha20_testvecs[i].output, + CHACHA20_KEY_SIZE)) { + pr_err("hchacha20 self-test %zu: FAIL\n", i + 1); + success = false; + } + } + memset(&state, 0, sizeof(state)); + chacha20_init(&state, chacha20_testvecs[0].key, + chacha20_testvecs[0].nonce); + chacha20(&state, massive_input, massive_input, + OUTRAGEOUSLY_HUGE_BUFFER_LEN, &simd_context); + chacha20_init(&state, chacha20_testvecs[0].key, + chacha20_testvecs[0].nonce); + chacha20(&state, massive_input, massive_input, + OUTRAGEOUSLY_HUGE_BUFFER_LEN, DONT_USE_SIMD); + for (k = 0; k < OUTRAGEOUSLY_HUGE_BUFFER_LEN; ++k) { + if (massive_input[k]) { + pr_err("chacha20 self-test massive: FAIL\n"); + success = false; + break; + } + } + + simd_put(&simd_context); + +out: + kfree(offset_input); + kfree(computed_output); + vfree(massive_input); + return success; +} diff --git a/net/wireguard/crypto/zinc/selftest/chacha20poly1305.c b/net/wireguard/crypto/zinc/selftest/chacha20poly1305.c new file mode 100644 index 000000000000..c58ac6e69d54 --- /dev/null +++ b/net/wireguard/crypto/zinc/selftest/chacha20poly1305.c @@ -0,0 +1,9076 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +struct chacha20poly1305_testvec { + const u8 *input, *output, *assoc, *nonce, *key; + size_t ilen, alen, nlen; + bool failure; +}; + +/* The first of these are the ChaCha20-Poly1305 AEAD test vectors from RFC7539 + * 2.8.2. After they are generated by reference implementations. And the final + * marked ones are taken from wycheproof, but we only do these for the encrypt + * side, because mostly we're stressing the primitives rather than the actual + * chapoly construction. This also requires adding a 96-bit nonce construction, + * just for the purpose of the tests. + */ + +static const u8 enc_input001[] __initconst = { + 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74, + 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20, + 0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66, + 0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, + 0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69, + 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20, + 0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20, + 0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d, + 0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e, + 0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65, + 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, + 0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, + 0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f, + 0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64, + 0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65, + 0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, + 0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61, + 0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e, + 0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69, + 0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72, + 0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20, + 0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65, + 0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61, + 0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72, + 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, + 0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61, + 0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20, + 0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65, + 0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20, + 0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20, + 0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b, + 0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67, + 0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80, + 0x9d +}; +static const u8 enc_output001[] __initconst = { + 0x64, 0xa0, 0x86, 0x15, 0x75, 0x86, 0x1a, 0xf4, + 0x60, 0xf0, 0x62, 0xc7, 0x9b, 0xe6, 0x43, 0xbd, + 0x5e, 0x80, 0x5c, 0xfd, 0x34, 0x5c, 0xf3, 0x89, + 0xf1, 0x08, 0x67, 0x0a, 0xc7, 0x6c, 0x8c, 0xb2, + 0x4c, 0x6c, 0xfc, 0x18, 0x75, 0x5d, 0x43, 0xee, + 0xa0, 0x9e, 0xe9, 0x4e, 0x38, 0x2d, 0x26, 0xb0, + 0xbd, 0xb7, 0xb7, 0x3c, 0x32, 0x1b, 0x01, 0x00, + 0xd4, 0xf0, 0x3b, 0x7f, 0x35, 0x58, 0x94, 0xcf, + 0x33, 0x2f, 0x83, 0x0e, 0x71, 0x0b, 0x97, 0xce, + 0x98, 0xc8, 0xa8, 0x4a, 0xbd, 0x0b, 0x94, 0x81, + 0x14, 0xad, 0x17, 0x6e, 0x00, 0x8d, 0x33, 0xbd, + 0x60, 0xf9, 0x82, 0xb1, 0xff, 0x37, 0xc8, 0x55, + 0x97, 0x97, 0xa0, 0x6e, 0xf4, 0xf0, 0xef, 0x61, + 0xc1, 0x86, 0x32, 0x4e, 0x2b, 0x35, 0x06, 0x38, + 0x36, 0x06, 0x90, 0x7b, 0x6a, 0x7c, 0x02, 0xb0, + 0xf9, 0xf6, 0x15, 0x7b, 0x53, 0xc8, 0x67, 0xe4, + 0xb9, 0x16, 0x6c, 0x76, 0x7b, 0x80, 0x4d, 0x46, + 0xa5, 0x9b, 0x52, 0x16, 0xcd, 0xe7, 0xa4, 0xe9, + 0x90, 0x40, 0xc5, 0xa4, 0x04, 0x33, 0x22, 0x5e, + 0xe2, 0x82, 0xa1, 0xb0, 0xa0, 0x6c, 0x52, 0x3e, + 0xaf, 0x45, 0x34, 0xd7, 0xf8, 0x3f, 0xa1, 0x15, + 0x5b, 0x00, 0x47, 0x71, 0x8c, 0xbc, 0x54, 0x6a, + 0x0d, 0x07, 0x2b, 0x04, 0xb3, 0x56, 0x4e, 0xea, + 0x1b, 0x42, 0x22, 0x73, 0xf5, 0x48, 0x27, 0x1a, + 0x0b, 0xb2, 0x31, 0x60, 0x53, 0xfa, 0x76, 0x99, + 0x19, 0x55, 0xeb, 0xd6, 0x31, 0x59, 0x43, 0x4e, + 0xce, 0xbb, 0x4e, 0x46, 0x6d, 0xae, 0x5a, 0x10, + 0x73, 0xa6, 0x72, 0x76, 0x27, 0x09, 0x7a, 0x10, + 0x49, 0xe6, 0x17, 0xd9, 0x1d, 0x36, 0x10, 0x94, + 0xfa, 0x68, 0xf0, 0xff, 0x77, 0x98, 0x71, 0x30, + 0x30, 0x5b, 0xea, 0xba, 0x2e, 0xda, 0x04, 0xdf, + 0x99, 0x7b, 0x71, 0x4d, 0x6c, 0x6f, 0x2c, 0x29, + 0xa6, 0xad, 0x5c, 0xb4, 0x02, 0x2b, 0x02, 0x70, + 0x9b, 0xee, 0xad, 0x9d, 0x67, 0x89, 0x0c, 0xbb, + 0x22, 0x39, 0x23, 0x36, 0xfe, 0xa1, 0x85, 0x1f, + 0x38 +}; +static const u8 enc_assoc001[] __initconst = { + 0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x4e, 0x91 +}; +static const u8 enc_nonce001[] __initconst = { + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 +}; +static const u8 enc_key001[] __initconst = { + 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a, + 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0, + 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09, + 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0 +}; + +static const u8 enc_input002[] __initconst = { }; +static const u8 enc_output002[] __initconst = { + 0xea, 0xe0, 0x1e, 0x9e, 0x2c, 0x91, 0xaa, 0xe1, + 0xdb, 0x5d, 0x99, 0x3f, 0x8a, 0xf7, 0x69, 0x92 +}; +static const u8 enc_assoc002[] __initconst = { }; +static const u8 enc_nonce002[] __initconst = { + 0xca, 0xbf, 0x33, 0x71, 0x32, 0x45, 0x77, 0x8e +}; +static const u8 enc_key002[] __initconst = { + 0x4c, 0xf5, 0x96, 0x83, 0x38, 0xe6, 0xae, 0x7f, + 0x2d, 0x29, 0x25, 0x76, 0xd5, 0x75, 0x27, 0x86, + 0x91, 0x9a, 0x27, 0x7a, 0xfb, 0x46, 0xc5, 0xef, + 0x94, 0x81, 0x79, 0x57, 0x14, 0x59, 0x40, 0x68 +}; + +static const u8 enc_input003[] __initconst = { }; +static const u8 enc_output003[] __initconst = { + 0xdd, 0x6b, 0x3b, 0x82, 0xce, 0x5a, 0xbd, 0xd6, + 0xa9, 0x35, 0x83, 0xd8, 0x8c, 0x3d, 0x85, 0x77 +}; +static const u8 enc_assoc003[] __initconst = { + 0x33, 0x10, 0x41, 0x12, 0x1f, 0xf3, 0xd2, 0x6b +}; +static const u8 enc_nonce003[] __initconst = { + 0x3d, 0x86, 0xb5, 0x6b, 0xc8, 0xa3, 0x1f, 0x1d +}; +static const u8 enc_key003[] __initconst = { + 0x2d, 0xb0, 0x5d, 0x40, 0xc8, 0xed, 0x44, 0x88, + 0x34, 0xd1, 0x13, 0xaf, 0x57, 0xa1, 0xeb, 0x3a, + 0x2a, 0x80, 0x51, 0x36, 0xec, 0x5b, 0xbc, 0x08, + 0x93, 0x84, 0x21, 0xb5, 0x13, 0x88, 0x3c, 0x0d +}; + +static const u8 enc_input004[] __initconst = { + 0xa4 +}; +static const u8 enc_output004[] __initconst = { + 0xb7, 0x1b, 0xb0, 0x73, 0x59, 0xb0, 0x84, 0xb2, + 0x6d, 0x8e, 0xab, 0x94, 0x31, 0xa1, 0xae, 0xac, + 0x89 +}; +static const u8 enc_assoc004[] __initconst = { + 0x6a, 0xe2, 0xad, 0x3f, 0x88, 0x39, 0x5a, 0x40 +}; +static const u8 enc_nonce004[] __initconst = { + 0xd2, 0x32, 0x1f, 0x29, 0x28, 0xc6, 0xc4, 0xc4 +}; +static const u8 enc_key004[] __initconst = { + 0x4b, 0x28, 0x4b, 0xa3, 0x7b, 0xbe, 0xe9, 0xf8, + 0x31, 0x80, 0x82, 0xd7, 0xd8, 0xe8, 0xb5, 0xa1, + 0xe2, 0x18, 0x18, 0x8a, 0x9c, 0xfa, 0xa3, 0x3d, + 0x25, 0x71, 0x3e, 0x40, 0xbc, 0x54, 0x7a, 0x3e +}; + +static const u8 enc_input005[] __initconst = { + 0x2d +}; +static const u8 enc_output005[] __initconst = { + 0xbf, 0xe1, 0x5b, 0x0b, 0xdb, 0x6b, 0xf5, 0x5e, + 0x6c, 0x5d, 0x84, 0x44, 0x39, 0x81, 0xc1, 0x9c, + 0xac +}; +static const u8 enc_assoc005[] __initconst = { }; +static const u8 enc_nonce005[] __initconst = { + 0x20, 0x1c, 0xaa, 0x5f, 0x9c, 0xbf, 0x92, 0x30 +}; +static const u8 enc_key005[] __initconst = { + 0x66, 0xca, 0x9c, 0x23, 0x2a, 0x4b, 0x4b, 0x31, + 0x0e, 0x92, 0x89, 0x8b, 0xf4, 0x93, 0xc7, 0x87, + 0x98, 0xa3, 0xd8, 0x39, 0xf8, 0xf4, 0xa7, 0x01, + 0xc0, 0x2e, 0x0a, 0xa6, 0x7e, 0x5a, 0x78, 0x87 +}; + +static const u8 enc_input006[] __initconst = { + 0x33, 0x2f, 0x94, 0xc1, 0xa4, 0xef, 0xcc, 0x2a, + 0x5b, 0xa6, 0xe5, 0x8f, 0x1d, 0x40, 0xf0, 0x92, + 0x3c, 0xd9, 0x24, 0x11, 0xa9, 0x71, 0xf9, 0x37, + 0x14, 0x99, 0xfa, 0xbe, 0xe6, 0x80, 0xde, 0x50, + 0xc9, 0x96, 0xd4, 0xb0, 0xec, 0x9e, 0x17, 0xec, + 0xd2, 0x5e, 0x72, 0x99, 0xfc, 0x0a, 0xe1, 0xcb, + 0x48, 0xd2, 0x85, 0xdd, 0x2f, 0x90, 0xe0, 0x66, + 0x3b, 0xe6, 0x20, 0x74, 0xbe, 0x23, 0x8f, 0xcb, + 0xb4, 0xe4, 0xda, 0x48, 0x40, 0xa6, 0xd1, 0x1b, + 0xc7, 0x42, 0xce, 0x2f, 0x0c, 0xa6, 0x85, 0x6e, + 0x87, 0x37, 0x03, 0xb1, 0x7c, 0x25, 0x96, 0xa3, + 0x05, 0xd8, 0xb0, 0xf4, 0xed, 0xea, 0xc2, 0xf0, + 0x31, 0x98, 0x6c, 0xd1, 0x14, 0x25, 0xc0, 0xcb, + 0x01, 0x74, 0xd0, 0x82, 0xf4, 0x36, 0xf5, 0x41, + 0xd5, 0xdc, 0xca, 0xc5, 0xbb, 0x98, 0xfe, 0xfc, + 0x69, 0x21, 0x70, 0xd8, 0xa4, 0x4b, 0xc8, 0xde, + 0x8f +}; +static const u8 enc_output006[] __initconst = { + 0x8b, 0x06, 0xd3, 0x31, 0xb0, 0x93, 0x45, 0xb1, + 0x75, 0x6e, 0x26, 0xf9, 0x67, 0xbc, 0x90, 0x15, + 0x81, 0x2c, 0xb5, 0xf0, 0xc6, 0x2b, 0xc7, 0x8c, + 0x56, 0xd1, 0xbf, 0x69, 0x6c, 0x07, 0xa0, 0xda, + 0x65, 0x27, 0xc9, 0x90, 0x3d, 0xef, 0x4b, 0x11, + 0x0f, 0x19, 0x07, 0xfd, 0x29, 0x92, 0xd9, 0xc8, + 0xf7, 0x99, 0x2e, 0x4a, 0xd0, 0xb8, 0x2c, 0xdc, + 0x93, 0xf5, 0x9e, 0x33, 0x78, 0xd1, 0x37, 0xc3, + 0x66, 0xd7, 0x5e, 0xbc, 0x44, 0xbf, 0x53, 0xa5, + 0xbc, 0xc4, 0xcb, 0x7b, 0x3a, 0x8e, 0x7f, 0x02, + 0xbd, 0xbb, 0xe7, 0xca, 0xa6, 0x6c, 0x6b, 0x93, + 0x21, 0x93, 0x10, 0x61, 0xe7, 0x69, 0xd0, 0x78, + 0xf3, 0x07, 0x5a, 0x1a, 0x8f, 0x73, 0xaa, 0xb1, + 0x4e, 0xd3, 0xda, 0x4f, 0xf3, 0x32, 0xe1, 0x66, + 0x3e, 0x6c, 0xc6, 0x13, 0xba, 0x06, 0x5b, 0xfc, + 0x6a, 0xe5, 0x6f, 0x60, 0xfb, 0x07, 0x40, 0xb0, + 0x8c, 0x9d, 0x84, 0x43, 0x6b, 0xc1, 0xf7, 0x8d, + 0x8d, 0x31, 0xf7, 0x7a, 0x39, 0x4d, 0x8f, 0x9a, + 0xeb +}; +static const u8 enc_assoc006[] __initconst = { + 0x70, 0xd3, 0x33, 0xf3, 0x8b, 0x18, 0x0b +}; +static const u8 enc_nonce006[] __initconst = { + 0xdf, 0x51, 0x84, 0x82, 0x42, 0x0c, 0x75, 0x9c +}; +static const u8 enc_key006[] __initconst = { + 0x68, 0x7b, 0x8d, 0x8e, 0xe3, 0xc4, 0xdd, 0xae, + 0xdf, 0x72, 0x7f, 0x53, 0x72, 0x25, 0x1e, 0x78, + 0x91, 0xcb, 0x69, 0x76, 0x1f, 0x49, 0x93, 0xf9, + 0x6f, 0x21, 0xcc, 0x39, 0x9c, 0xad, 0xb1, 0x01 +}; + +static const u8 enc_input007[] __initconst = { + 0x9b, 0x18, 0xdb, 0xdd, 0x9a, 0x0f, 0x3e, 0xa5, + 0x15, 0x17, 0xde, 0xdf, 0x08, 0x9d, 0x65, 0x0a, + 0x67, 0x30, 0x12, 0xe2, 0x34, 0x77, 0x4b, 0xc1, + 0xd9, 0xc6, 0x1f, 0xab, 0xc6, 0x18, 0x50, 0x17, + 0xa7, 0x9d, 0x3c, 0xa6, 0xc5, 0x35, 0x8c, 0x1c, + 0xc0, 0xa1, 0x7c, 0x9f, 0x03, 0x89, 0xca, 0xe1, + 0xe6, 0xe9, 0xd4, 0xd3, 0x88, 0xdb, 0xb4, 0x51, + 0x9d, 0xec, 0xb4, 0xfc, 0x52, 0xee, 0x6d, 0xf1, + 0x75, 0x42, 0xc6, 0xfd, 0xbd, 0x7a, 0x8e, 0x86, + 0xfc, 0x44, 0xb3, 0x4f, 0xf3, 0xea, 0x67, 0x5a, + 0x41, 0x13, 0xba, 0xb0, 0xdc, 0xe1, 0xd3, 0x2a, + 0x7c, 0x22, 0xb3, 0xca, 0xac, 0x6a, 0x37, 0x98, + 0x3e, 0x1d, 0x40, 0x97, 0xf7, 0x9b, 0x1d, 0x36, + 0x6b, 0xb3, 0x28, 0xbd, 0x60, 0x82, 0x47, 0x34, + 0xaa, 0x2f, 0x7d, 0xe9, 0xa8, 0x70, 0x81, 0x57, + 0xd4, 0xb9, 0x77, 0x0a, 0x9d, 0x29, 0xa7, 0x84, + 0x52, 0x4f, 0xc2, 0x4a, 0x40, 0x3b, 0x3c, 0xd4, + 0xc9, 0x2a, 0xdb, 0x4a, 0x53, 0xc4, 0xbe, 0x80, + 0xe9, 0x51, 0x7f, 0x8f, 0xc7, 0xa2, 0xce, 0x82, + 0x5c, 0x91, 0x1e, 0x74, 0xd9, 0xd0, 0xbd, 0xd5, + 0xf3, 0xfd, 0xda, 0x4d, 0x25, 0xb4, 0xbb, 0x2d, + 0xac, 0x2f, 0x3d, 0x71, 0x85, 0x7b, 0xcf, 0x3c, + 0x7b, 0x3e, 0x0e, 0x22, 0x78, 0x0c, 0x29, 0xbf, + 0xe4, 0xf4, 0x57, 0xb3, 0xcb, 0x49, 0xa0, 0xfc, + 0x1e, 0x05, 0x4e, 0x16, 0xbc, 0xd5, 0xa8, 0xa3, + 0xee, 0x05, 0x35, 0xc6, 0x7c, 0xab, 0x60, 0x14, + 0x55, 0x1a, 0x8e, 0xc5, 0x88, 0x5d, 0xd5, 0x81, + 0xc2, 0x81, 0xa5, 0xc4, 0x60, 0xdb, 0xaf, 0x77, + 0x91, 0xe1, 0xce, 0xa2, 0x7e, 0x7f, 0x42, 0xe3, + 0xb0, 0x13, 0x1c, 0x1f, 0x25, 0x60, 0x21, 0xe2, + 0x40, 0x5f, 0x99, 0xb7, 0x73, 0xec, 0x9b, 0x2b, + 0xf0, 0x65, 0x11, 0xc8, 0xd0, 0x0a, 0x9f, 0xd3 +}; +static const u8 enc_output007[] __initconst = { + 0x85, 0x04, 0xc2, 0xed, 0x8d, 0xfd, 0x97, 0x5c, + 0xd2, 0xb7, 0xe2, 0xc1, 0x6b, 0xa3, 0xba, 0xf8, + 0xc9, 0x50, 0xc3, 0xc6, 0xa5, 0xe3, 0xa4, 0x7c, + 0xc3, 0x23, 0x49, 0x5e, 0xa9, 0xb9, 0x32, 0xeb, + 0x8a, 0x7c, 0xca, 0xe5, 0xec, 0xfb, 0x7c, 0xc0, + 0xcb, 0x7d, 0xdc, 0x2c, 0x9d, 0x92, 0x55, 0x21, + 0x0a, 0xc8, 0x43, 0x63, 0x59, 0x0a, 0x31, 0x70, + 0x82, 0x67, 0x41, 0x03, 0xf8, 0xdf, 0xf2, 0xac, + 0xa7, 0x02, 0xd4, 0xd5, 0x8a, 0x2d, 0xc8, 0x99, + 0x19, 0x66, 0xd0, 0xf6, 0x88, 0x2c, 0x77, 0xd9, + 0xd4, 0x0d, 0x6c, 0xbd, 0x98, 0xde, 0xe7, 0x7f, + 0xad, 0x7e, 0x8a, 0xfb, 0xe9, 0x4b, 0xe5, 0xf7, + 0xe5, 0x50, 0xa0, 0x90, 0x3f, 0xd6, 0x22, 0x53, + 0xe3, 0xfe, 0x1b, 0xcc, 0x79, 0x3b, 0xec, 0x12, + 0x47, 0x52, 0xa7, 0xd6, 0x04, 0xe3, 0x52, 0xe6, + 0x93, 0x90, 0x91, 0x32, 0x73, 0x79, 0xb8, 0xd0, + 0x31, 0xde, 0x1f, 0x9f, 0x2f, 0x05, 0x38, 0x54, + 0x2f, 0x35, 0x04, 0x39, 0xe0, 0xa7, 0xba, 0xc6, + 0x52, 0xf6, 0x37, 0x65, 0x4c, 0x07, 0xa9, 0x7e, + 0xb3, 0x21, 0x6f, 0x74, 0x8c, 0xc9, 0xde, 0xdb, + 0x65, 0x1b, 0x9b, 0xaa, 0x60, 0xb1, 0x03, 0x30, + 0x6b, 0xb2, 0x03, 0xc4, 0x1c, 0x04, 0xf8, 0x0f, + 0x64, 0xaf, 0x46, 0xe4, 0x65, 0x99, 0x49, 0xe2, + 0xea, 0xce, 0x78, 0x00, 0xd8, 0x8b, 0xd5, 0x2e, + 0xcf, 0xfc, 0x40, 0x49, 0xe8, 0x58, 0xdc, 0x34, + 0x9c, 0x8c, 0x61, 0xbf, 0x0a, 0x8e, 0xec, 0x39, + 0xa9, 0x30, 0x05, 0x5a, 0xd2, 0x56, 0x01, 0xc7, + 0xda, 0x8f, 0x4e, 0xbb, 0x43, 0xa3, 0x3a, 0xf9, + 0x15, 0x2a, 0xd0, 0xa0, 0x7a, 0x87, 0x34, 0x82, + 0xfe, 0x8a, 0xd1, 0x2d, 0x5e, 0xc7, 0xbf, 0x04, + 0x53, 0x5f, 0x3b, 0x36, 0xd4, 0x25, 0x5c, 0x34, + 0x7a, 0x8d, 0xd5, 0x05, 0xce, 0x72, 0xca, 0xef, + 0x7a, 0x4b, 0xbc, 0xb0, 0x10, 0x5c, 0x96, 0x42, + 0x3a, 0x00, 0x98, 0xcd, 0x15, 0xe8, 0xb7, 0x53 +}; +static const u8 enc_assoc007[] __initconst = { }; +static const u8 enc_nonce007[] __initconst = { + 0xde, 0x7b, 0xef, 0xc3, 0x65, 0x1b, 0x68, 0xb0 +}; +static const u8 enc_key007[] __initconst = { + 0x8d, 0xb8, 0x91, 0x48, 0xf0, 0xe7, 0x0a, 0xbd, + 0xf9, 0x3f, 0xcd, 0xd9, 0xa0, 0x1e, 0x42, 0x4c, + 0xe7, 0xde, 0x25, 0x3d, 0xa3, 0xd7, 0x05, 0x80, + 0x8d, 0xf2, 0x82, 0xac, 0x44, 0x16, 0x51, 0x01 +}; + +static const u8 enc_input008[] __initconst = { + 0xc3, 0x09, 0x94, 0x62, 0xe6, 0x46, 0x2e, 0x10, + 0xbe, 0x00, 0xe4, 0xfc, 0xf3, 0x40, 0xa3, 0xe2, + 0x0f, 0xc2, 0x8b, 0x28, 0xdc, 0xba, 0xb4, 0x3c, + 0xe4, 0x21, 0x58, 0x61, 0xcd, 0x8b, 0xcd, 0xfb, + 0xac, 0x94, 0xa1, 0x45, 0xf5, 0x1c, 0xe1, 0x12, + 0xe0, 0x3b, 0x67, 0x21, 0x54, 0x5e, 0x8c, 0xaa, + 0xcf, 0xdb, 0xb4, 0x51, 0xd4, 0x13, 0xda, 0xe6, + 0x83, 0x89, 0xb6, 0x92, 0xe9, 0x21, 0x76, 0xa4, + 0x93, 0x7d, 0x0e, 0xfd, 0x96, 0x36, 0x03, 0x91, + 0x43, 0x5c, 0x92, 0x49, 0x62, 0x61, 0x7b, 0xeb, + 0x43, 0x89, 0xb8, 0x12, 0x20, 0x43, 0xd4, 0x47, + 0x06, 0x84, 0xee, 0x47, 0xe9, 0x8a, 0x73, 0x15, + 0x0f, 0x72, 0xcf, 0xed, 0xce, 0x96, 0xb2, 0x7f, + 0x21, 0x45, 0x76, 0xeb, 0x26, 0x28, 0x83, 0x6a, + 0xad, 0xaa, 0xa6, 0x81, 0xd8, 0x55, 0xb1, 0xa3, + 0x85, 0xb3, 0x0c, 0xdf, 0xf1, 0x69, 0x2d, 0x97, + 0x05, 0x2a, 0xbc, 0x7c, 0x7b, 0x25, 0xf8, 0x80, + 0x9d, 0x39, 0x25, 0xf3, 0x62, 0xf0, 0x66, 0x5e, + 0xf4, 0xa0, 0xcf, 0xd8, 0xfd, 0x4f, 0xb1, 0x1f, + 0x60, 0x3a, 0x08, 0x47, 0xaf, 0xe1, 0xf6, 0x10, + 0x77, 0x09, 0xa7, 0x27, 0x8f, 0x9a, 0x97, 0x5a, + 0x26, 0xfa, 0xfe, 0x41, 0x32, 0x83, 0x10, 0xe0, + 0x1d, 0xbf, 0x64, 0x0d, 0xf4, 0x1c, 0x32, 0x35, + 0xe5, 0x1b, 0x36, 0xef, 0xd4, 0x4a, 0x93, 0x4d, + 0x00, 0x7c, 0xec, 0x02, 0x07, 0x8b, 0x5d, 0x7d, + 0x1b, 0x0e, 0xd1, 0xa6, 0xa5, 0x5d, 0x7d, 0x57, + 0x88, 0xa8, 0xcc, 0x81, 0xb4, 0x86, 0x4e, 0xb4, + 0x40, 0xe9, 0x1d, 0xc3, 0xb1, 0x24, 0x3e, 0x7f, + 0xcc, 0x8a, 0x24, 0x9b, 0xdf, 0x6d, 0xf0, 0x39, + 0x69, 0x3e, 0x4c, 0xc0, 0x96, 0xe4, 0x13, 0xda, + 0x90, 0xda, 0xf4, 0x95, 0x66, 0x8b, 0x17, 0x17, + 0xfe, 0x39, 0x43, 0x25, 0xaa, 0xda, 0xa0, 0x43, + 0x3c, 0xb1, 0x41, 0x02, 0xa3, 0xf0, 0xa7, 0x19, + 0x59, 0xbc, 0x1d, 0x7d, 0x6c, 0x6d, 0x91, 0x09, + 0x5c, 0xb7, 0x5b, 0x01, 0xd1, 0x6f, 0x17, 0x21, + 0x97, 0xbf, 0x89, 0x71, 0xa5, 0xb0, 0x6e, 0x07, + 0x45, 0xfd, 0x9d, 0xea, 0x07, 0xf6, 0x7a, 0x9f, + 0x10, 0x18, 0x22, 0x30, 0x73, 0xac, 0xd4, 0x6b, + 0x72, 0x44, 0xed, 0xd9, 0x19, 0x9b, 0x2d, 0x4a, + 0x41, 0xdd, 0xd1, 0x85, 0x5e, 0x37, 0x19, 0xed, + 0xd2, 0x15, 0x8f, 0x5e, 0x91, 0xdb, 0x33, 0xf2, + 0xe4, 0xdb, 0xff, 0x98, 0xfb, 0xa3, 0xb5, 0xca, + 0x21, 0x69, 0x08, 0xe7, 0x8a, 0xdf, 0x90, 0xff, + 0x3e, 0xe9, 0x20, 0x86, 0x3c, 0xe9, 0xfc, 0x0b, + 0xfe, 0x5c, 0x61, 0xaa, 0x13, 0x92, 0x7f, 0x7b, + 0xec, 0xe0, 0x6d, 0xa8, 0x23, 0x22, 0xf6, 0x6b, + 0x77, 0xc4, 0xfe, 0x40, 0x07, 0x3b, 0xb6, 0xf6, + 0x8e, 0x5f, 0xd4, 0xb9, 0xb7, 0x0f, 0x21, 0x04, + 0xef, 0x83, 0x63, 0x91, 0x69, 0x40, 0xa3, 0x48, + 0x5c, 0xd2, 0x60, 0xf9, 0x4f, 0x6c, 0x47, 0x8b, + 0x3b, 0xb1, 0x9f, 0x8e, 0xee, 0x16, 0x8a, 0x13, + 0xfc, 0x46, 0x17, 0xc3, 0xc3, 0x32, 0x56, 0xf8, + 0x3c, 0x85, 0x3a, 0xb6, 0x3e, 0xaa, 0x89, 0x4f, + 0xb3, 0xdf, 0x38, 0xfd, 0xf1, 0xe4, 0x3a, 0xc0, + 0xe6, 0x58, 0xb5, 0x8f, 0xc5, 0x29, 0xa2, 0x92, + 0x4a, 0xb6, 0xa0, 0x34, 0x7f, 0xab, 0xb5, 0x8a, + 0x90, 0xa1, 0xdb, 0x4d, 0xca, 0xb6, 0x2c, 0x41, + 0x3c, 0xf7, 0x2b, 0x21, 0xc3, 0xfd, 0xf4, 0x17, + 0x5c, 0xb5, 0x33, 0x17, 0x68, 0x2b, 0x08, 0x30, + 0xf3, 0xf7, 0x30, 0x3c, 0x96, 0xe6, 0x6a, 0x20, + 0x97, 0xe7, 0x4d, 0x10, 0x5f, 0x47, 0x5f, 0x49, + 0x96, 0x09, 0xf0, 0x27, 0x91, 0xc8, 0xf8, 0x5a, + 0x2e, 0x79, 0xb5, 0xe2, 0xb8, 0xe8, 0xb9, 0x7b, + 0xd5, 0x10, 0xcb, 0xff, 0x5d, 0x14, 0x73, 0xf3 +}; +static const u8 enc_output008[] __initconst = { + 0x14, 0xf6, 0x41, 0x37, 0xa6, 0xd4, 0x27, 0xcd, + 0xdb, 0x06, 0x3e, 0x9a, 0x4e, 0xab, 0xd5, 0xb1, + 0x1e, 0x6b, 0xd2, 0xbc, 0x11, 0xf4, 0x28, 0x93, + 0x63, 0x54, 0xef, 0xbb, 0x5e, 0x1d, 0x3a, 0x1d, + 0x37, 0x3c, 0x0a, 0x6c, 0x1e, 0xc2, 0xd1, 0x2c, + 0xb5, 0xa3, 0xb5, 0x7b, 0xb8, 0x8f, 0x25, 0xa6, + 0x1b, 0x61, 0x1c, 0xec, 0x28, 0x58, 0x26, 0xa4, + 0xa8, 0x33, 0x28, 0x25, 0x5c, 0x45, 0x05, 0xe5, + 0x6c, 0x99, 0xe5, 0x45, 0xc4, 0xa2, 0x03, 0x84, + 0x03, 0x73, 0x1e, 0x8c, 0x49, 0xac, 0x20, 0xdd, + 0x8d, 0xb3, 0xc4, 0xf5, 0xe7, 0x4f, 0xf1, 0xed, + 0xa1, 0x98, 0xde, 0xa4, 0x96, 0xdd, 0x2f, 0xab, + 0xab, 0x97, 0xcf, 0x3e, 0xd2, 0x9e, 0xb8, 0x13, + 0x07, 0x28, 0x29, 0x19, 0xaf, 0xfd, 0xf2, 0x49, + 0x43, 0xea, 0x49, 0x26, 0x91, 0xc1, 0x07, 0xd6, + 0xbb, 0x81, 0x75, 0x35, 0x0d, 0x24, 0x7f, 0xc8, + 0xda, 0xd4, 0xb7, 0xeb, 0xe8, 0x5c, 0x09, 0xa2, + 0x2f, 0xdc, 0x28, 0x7d, 0x3a, 0x03, 0xfa, 0x94, + 0xb5, 0x1d, 0x17, 0x99, 0x36, 0xc3, 0x1c, 0x18, + 0x34, 0xe3, 0x9f, 0xf5, 0x55, 0x7c, 0xb0, 0x60, + 0x9d, 0xff, 0xac, 0xd4, 0x61, 0xf2, 0xad, 0xf8, + 0xce, 0xc7, 0xbe, 0x5c, 0xd2, 0x95, 0xa8, 0x4b, + 0x77, 0x13, 0x19, 0x59, 0x26, 0xc9, 0xb7, 0x8f, + 0x6a, 0xcb, 0x2d, 0x37, 0x91, 0xea, 0x92, 0x9c, + 0x94, 0x5b, 0xda, 0x0b, 0xce, 0xfe, 0x30, 0x20, + 0xf8, 0x51, 0xad, 0xf2, 0xbe, 0xe7, 0xc7, 0xff, + 0xb3, 0x33, 0x91, 0x6a, 0xc9, 0x1a, 0x41, 0xc9, + 0x0f, 0xf3, 0x10, 0x0e, 0xfd, 0x53, 0xff, 0x6c, + 0x16, 0x52, 0xd9, 0xf3, 0xf7, 0x98, 0x2e, 0xc9, + 0x07, 0x31, 0x2c, 0x0c, 0x72, 0xd7, 0xc5, 0xc6, + 0x08, 0x2a, 0x7b, 0xda, 0xbd, 0x7e, 0x02, 0xea, + 0x1a, 0xbb, 0xf2, 0x04, 0x27, 0x61, 0x28, 0x8e, + 0xf5, 0x04, 0x03, 0x1f, 0x4c, 0x07, 0x55, 0x82, + 0xec, 0x1e, 0xd7, 0x8b, 0x2f, 0x65, 0x56, 0xd1, + 0xd9, 0x1e, 0x3c, 0xe9, 0x1f, 0x5e, 0x98, 0x70, + 0x38, 0x4a, 0x8c, 0x49, 0xc5, 0x43, 0xa0, 0xa1, + 0x8b, 0x74, 0x9d, 0x4c, 0x62, 0x0d, 0x10, 0x0c, + 0xf4, 0x6c, 0x8f, 0xe0, 0xaa, 0x9a, 0x8d, 0xb7, + 0xe0, 0xbe, 0x4c, 0x87, 0xf1, 0x98, 0x2f, 0xcc, + 0xed, 0xc0, 0x52, 0x29, 0xdc, 0x83, 0xf8, 0xfc, + 0x2c, 0x0e, 0xa8, 0x51, 0x4d, 0x80, 0x0d, 0xa3, + 0xfe, 0xd8, 0x37, 0xe7, 0x41, 0x24, 0xfc, 0xfb, + 0x75, 0xe3, 0x71, 0x7b, 0x57, 0x45, 0xf5, 0x97, + 0x73, 0x65, 0x63, 0x14, 0x74, 0xb8, 0x82, 0x9f, + 0xf8, 0x60, 0x2f, 0x8a, 0xf2, 0x4e, 0xf1, 0x39, + 0xda, 0x33, 0x91, 0xf8, 0x36, 0xe0, 0x8d, 0x3f, + 0x1f, 0x3b, 0x56, 0xdc, 0xa0, 0x8f, 0x3c, 0x9d, + 0x71, 0x52, 0xa7, 0xb8, 0xc0, 0xa5, 0xc6, 0xa2, + 0x73, 0xda, 0xf4, 0x4b, 0x74, 0x5b, 0x00, 0x3d, + 0x99, 0xd7, 0x96, 0xba, 0xe6, 0xe1, 0xa6, 0x96, + 0x38, 0xad, 0xb3, 0xc0, 0xd2, 0xba, 0x91, 0x6b, + 0xf9, 0x19, 0xdd, 0x3b, 0xbe, 0xbe, 0x9c, 0x20, + 0x50, 0xba, 0xa1, 0xd0, 0xce, 0x11, 0xbd, 0x95, + 0xd8, 0xd1, 0xdd, 0x33, 0x85, 0x74, 0xdc, 0xdb, + 0x66, 0x76, 0x44, 0xdc, 0x03, 0x74, 0x48, 0x35, + 0x98, 0xb1, 0x18, 0x47, 0x94, 0x7d, 0xff, 0x62, + 0xe4, 0x58, 0x78, 0xab, 0xed, 0x95, 0x36, 0xd9, + 0x84, 0x91, 0x82, 0x64, 0x41, 0xbb, 0x58, 0xe6, + 0x1c, 0x20, 0x6d, 0x15, 0x6b, 0x13, 0x96, 0xe8, + 0x35, 0x7f, 0xdc, 0x40, 0x2c, 0xe9, 0xbc, 0x8a, + 0x4f, 0x92, 0xec, 0x06, 0x2d, 0x50, 0xdf, 0x93, + 0x5d, 0x65, 0x5a, 0xa8, 0xfc, 0x20, 0x50, 0x14, + 0xa9, 0x8a, 0x7e, 0x1d, 0x08, 0x1f, 0xe2, 0x99, + 0xd0, 0xbe, 0xfb, 0x3a, 0x21, 0x9d, 0xad, 0x86, + 0x54, 0xfd, 0x0d, 0x98, 0x1c, 0x5a, 0x6f, 0x1f, + 0x9a, 0x40, 0xcd, 0xa2, 0xff, 0x6a, 0xf1, 0x54 +}; +static const u8 enc_assoc008[] __initconst = { }; +static const u8 enc_nonce008[] __initconst = { + 0x0e, 0x0d, 0x57, 0xbb, 0x7b, 0x40, 0x54, 0x02 +}; +static const u8 enc_key008[] __initconst = { + 0xf2, 0xaa, 0x4f, 0x99, 0xfd, 0x3e, 0xa8, 0x53, + 0xc1, 0x44, 0xe9, 0x81, 0x18, 0xdc, 0xf5, 0xf0, + 0x3e, 0x44, 0x15, 0x59, 0xe0, 0xc5, 0x44, 0x86, + 0xc3, 0x91, 0xa8, 0x75, 0xc0, 0x12, 0x46, 0xba +}; + +static const u8 enc_input009[] __initconst = { + 0xe6, 0xc3, 0xdb, 0x63, 0x55, 0x15, 0xe3, 0x5b, + 0xb7, 0x4b, 0x27, 0x8b, 0x5a, 0xdd, 0xc2, 0xe8, + 0x3a, 0x6b, 0xd7, 0x81, 0x96, 0x35, 0x97, 0xca, + 0xd7, 0x68, 0xe8, 0xef, 0xce, 0xab, 0xda, 0x09, + 0x6e, 0xd6, 0x8e, 0xcb, 0x55, 0xb5, 0xe1, 0xe5, + 0x57, 0xfd, 0xc4, 0xe3, 0xe0, 0x18, 0x4f, 0x85, + 0xf5, 0x3f, 0x7e, 0x4b, 0x88, 0xc9, 0x52, 0x44, + 0x0f, 0xea, 0xaf, 0x1f, 0x71, 0x48, 0x9f, 0x97, + 0x6d, 0xb9, 0x6f, 0x00, 0xa6, 0xde, 0x2b, 0x77, + 0x8b, 0x15, 0xad, 0x10, 0xa0, 0x2b, 0x7b, 0x41, + 0x90, 0x03, 0x2d, 0x69, 0xae, 0xcc, 0x77, 0x7c, + 0xa5, 0x9d, 0x29, 0x22, 0xc2, 0xea, 0xb4, 0x00, + 0x1a, 0xd2, 0x7a, 0x98, 0x8a, 0xf9, 0xf7, 0x82, + 0xb0, 0xab, 0xd8, 0xa6, 0x94, 0x8d, 0x58, 0x2f, + 0x01, 0x9e, 0x00, 0x20, 0xfc, 0x49, 0xdc, 0x0e, + 0x03, 0xe8, 0x45, 0x10, 0xd6, 0xa8, 0xda, 0x55, + 0x10, 0x9a, 0xdf, 0x67, 0x22, 0x8b, 0x43, 0xab, + 0x00, 0xbb, 0x02, 0xc8, 0xdd, 0x7b, 0x97, 0x17, + 0xd7, 0x1d, 0x9e, 0x02, 0x5e, 0x48, 0xde, 0x8e, + 0xcf, 0x99, 0x07, 0x95, 0x92, 0x3c, 0x5f, 0x9f, + 0xc5, 0x8a, 0xc0, 0x23, 0xaa, 0xd5, 0x8c, 0x82, + 0x6e, 0x16, 0x92, 0xb1, 0x12, 0x17, 0x07, 0xc3, + 0xfb, 0x36, 0xf5, 0x6c, 0x35, 0xd6, 0x06, 0x1f, + 0x9f, 0xa7, 0x94, 0xa2, 0x38, 0x63, 0x9c, 0xb0, + 0x71, 0xb3, 0xa5, 0xd2, 0xd8, 0xba, 0x9f, 0x08, + 0x01, 0xb3, 0xff, 0x04, 0x97, 0x73, 0x45, 0x1b, + 0xd5, 0xa9, 0x9c, 0x80, 0xaf, 0x04, 0x9a, 0x85, + 0xdb, 0x32, 0x5b, 0x5d, 0x1a, 0xc1, 0x36, 0x28, + 0x10, 0x79, 0xf1, 0x3c, 0xbf, 0x1a, 0x41, 0x5c, + 0x4e, 0xdf, 0xb2, 0x7c, 0x79, 0x3b, 0x7a, 0x62, + 0x3d, 0x4b, 0xc9, 0x9b, 0x2a, 0x2e, 0x7c, 0xa2, + 0xb1, 0x11, 0x98, 0xa7, 0x34, 0x1a, 0x00, 0xf3, + 0xd1, 0xbc, 0x18, 0x22, 0xba, 0x02, 0x56, 0x62, + 0x31, 0x10, 0x11, 0x6d, 0xe0, 0x54, 0x9d, 0x40, + 0x1f, 0x26, 0x80, 0x41, 0xca, 0x3f, 0x68, 0x0f, + 0x32, 0x1d, 0x0a, 0x8e, 0x79, 0xd8, 0xa4, 0x1b, + 0x29, 0x1c, 0x90, 0x8e, 0xc5, 0xe3, 0xb4, 0x91, + 0x37, 0x9a, 0x97, 0x86, 0x99, 0xd5, 0x09, 0xc5, + 0xbb, 0xa3, 0x3f, 0x21, 0x29, 0x82, 0x14, 0x5c, + 0xab, 0x25, 0xfb, 0xf2, 0x4f, 0x58, 0x26, 0xd4, + 0x83, 0xaa, 0x66, 0x89, 0x67, 0x7e, 0xc0, 0x49, + 0xe1, 0x11, 0x10, 0x7f, 0x7a, 0xda, 0x29, 0x04, + 0xff, 0xf0, 0xcb, 0x09, 0x7c, 0x9d, 0xfa, 0x03, + 0x6f, 0x81, 0x09, 0x31, 0x60, 0xfb, 0x08, 0xfa, + 0x74, 0xd3, 0x64, 0x44, 0x7c, 0x55, 0x85, 0xec, + 0x9c, 0x6e, 0x25, 0xb7, 0x6c, 0xc5, 0x37, 0xb6, + 0x83, 0x87, 0x72, 0x95, 0x8b, 0x9d, 0xe1, 0x69, + 0x5c, 0x31, 0x95, 0x42, 0xa6, 0x2c, 0xd1, 0x36, + 0x47, 0x1f, 0xec, 0x54, 0xab, 0xa2, 0x1c, 0xd8, + 0x00, 0xcc, 0xbc, 0x0d, 0x65, 0xe2, 0x67, 0xbf, + 0xbc, 0xea, 0xee, 0x9e, 0xe4, 0x36, 0x95, 0xbe, + 0x73, 0xd9, 0xa6, 0xd9, 0x0f, 0xa0, 0xcc, 0x82, + 0x76, 0x26, 0xad, 0x5b, 0x58, 0x6c, 0x4e, 0xab, + 0x29, 0x64, 0xd3, 0xd9, 0xa9, 0x08, 0x8c, 0x1d, + 0xa1, 0x4f, 0x80, 0xd8, 0x3f, 0x94, 0xfb, 0xd3, + 0x7b, 0xfc, 0xd1, 0x2b, 0xc3, 0x21, 0xeb, 0xe5, + 0x1c, 0x84, 0x23, 0x7f, 0x4b, 0xfa, 0xdb, 0x34, + 0x18, 0xa2, 0xc2, 0xe5, 0x13, 0xfe, 0x6c, 0x49, + 0x81, 0xd2, 0x73, 0xe7, 0xe2, 0xd7, 0xe4, 0x4f, + 0x4b, 0x08, 0x6e, 0xb1, 0x12, 0x22, 0x10, 0x9d, + 0xac, 0x51, 0x1e, 0x17, 0xd9, 0x8a, 0x0b, 0x42, + 0x88, 0x16, 0x81, 0x37, 0x7c, 0x6a, 0xf7, 0xef, + 0x2d, 0xe3, 0xd9, 0xf8, 0x5f, 0xe0, 0x53, 0x27, + 0x74, 0xb9, 0xe2, 0xd6, 0x1c, 0x80, 0x2c, 0x52, + 0x65 +}; +static const u8 enc_output009[] __initconst = { + 0xfd, 0x81, 0x8d, 0xd0, 0x3d, 0xb4, 0xd5, 0xdf, + 0xd3, 0x42, 0x47, 0x5a, 0x6d, 0x19, 0x27, 0x66, + 0x4b, 0x2e, 0x0c, 0x27, 0x9c, 0x96, 0x4c, 0x72, + 0x02, 0xa3, 0x65, 0xc3, 0xb3, 0x6f, 0x2e, 0xbd, + 0x63, 0x8a, 0x4a, 0x5d, 0x29, 0xa2, 0xd0, 0x28, + 0x48, 0xc5, 0x3d, 0x98, 0xa3, 0xbc, 0xe0, 0xbe, + 0x3b, 0x3f, 0xe6, 0x8a, 0xa4, 0x7f, 0x53, 0x06, + 0xfa, 0x7f, 0x27, 0x76, 0x72, 0x31, 0xa1, 0xf5, + 0xd6, 0x0c, 0x52, 0x47, 0xba, 0xcd, 0x4f, 0xd7, + 0xeb, 0x05, 0x48, 0x0d, 0x7c, 0x35, 0x4a, 0x09, + 0xc9, 0x76, 0x71, 0x02, 0xa3, 0xfb, 0xb7, 0x1a, + 0x65, 0xb7, 0xed, 0x98, 0xc6, 0x30, 0x8a, 0x00, + 0xae, 0xa1, 0x31, 0xe5, 0xb5, 0x9e, 0x6d, 0x62, + 0xda, 0xda, 0x07, 0x0f, 0x38, 0x38, 0xd3, 0xcb, + 0xc1, 0xb0, 0xad, 0xec, 0x72, 0xec, 0xb1, 0xa2, + 0x7b, 0x59, 0xf3, 0x3d, 0x2b, 0xef, 0xcd, 0x28, + 0x5b, 0x83, 0xcc, 0x18, 0x91, 0x88, 0xb0, 0x2e, + 0xf9, 0x29, 0x31, 0x18, 0xf9, 0x4e, 0xe9, 0x0a, + 0x91, 0x92, 0x9f, 0xae, 0x2d, 0xad, 0xf4, 0xe6, + 0x1a, 0xe2, 0xa4, 0xee, 0x47, 0x15, 0xbf, 0x83, + 0x6e, 0xd7, 0x72, 0x12, 0x3b, 0x2d, 0x24, 0xe9, + 0xb2, 0x55, 0xcb, 0x3c, 0x10, 0xf0, 0x24, 0x8a, + 0x4a, 0x02, 0xea, 0x90, 0x25, 0xf0, 0xb4, 0x79, + 0x3a, 0xef, 0x6e, 0xf5, 0x52, 0xdf, 0xb0, 0x0a, + 0xcd, 0x24, 0x1c, 0xd3, 0x2e, 0x22, 0x74, 0xea, + 0x21, 0x6f, 0xe9, 0xbd, 0xc8, 0x3e, 0x36, 0x5b, + 0x19, 0xf1, 0xca, 0x99, 0x0a, 0xb4, 0xa7, 0x52, + 0x1a, 0x4e, 0xf2, 0xad, 0x8d, 0x56, 0x85, 0xbb, + 0x64, 0x89, 0xba, 0x26, 0xf9, 0xc7, 0xe1, 0x89, + 0x19, 0x22, 0x77, 0xc3, 0xa8, 0xfc, 0xff, 0xad, + 0xfe, 0xb9, 0x48, 0xae, 0x12, 0x30, 0x9f, 0x19, + 0xfb, 0x1b, 0xef, 0x14, 0x87, 0x8a, 0x78, 0x71, + 0xf3, 0xf4, 0xb7, 0x00, 0x9c, 0x1d, 0xb5, 0x3d, + 0x49, 0x00, 0x0c, 0x06, 0xd4, 0x50, 0xf9, 0x54, + 0x45, 0xb2, 0x5b, 0x43, 0xdb, 0x6d, 0xcf, 0x1a, + 0xe9, 0x7a, 0x7a, 0xcf, 0xfc, 0x8a, 0x4e, 0x4d, + 0x0b, 0x07, 0x63, 0x28, 0xd8, 0xe7, 0x08, 0x95, + 0xdf, 0xa6, 0x72, 0x93, 0x2e, 0xbb, 0xa0, 0x42, + 0x89, 0x16, 0xf1, 0xd9, 0x0c, 0xf9, 0xa1, 0x16, + 0xfd, 0xd9, 0x03, 0xb4, 0x3b, 0x8a, 0xf5, 0xf6, + 0xe7, 0x6b, 0x2e, 0x8e, 0x4c, 0x3d, 0xe2, 0xaf, + 0x08, 0x45, 0x03, 0xff, 0x09, 0xb6, 0xeb, 0x2d, + 0xc6, 0x1b, 0x88, 0x94, 0xac, 0x3e, 0xf1, 0x9f, + 0x0e, 0x0e, 0x2b, 0xd5, 0x00, 0x4d, 0x3f, 0x3b, + 0x53, 0xae, 0xaf, 0x1c, 0x33, 0x5f, 0x55, 0x6e, + 0x8d, 0xaf, 0x05, 0x7a, 0x10, 0x34, 0xc9, 0xf4, + 0x66, 0xcb, 0x62, 0x12, 0xa6, 0xee, 0xe8, 0x1c, + 0x5d, 0x12, 0x86, 0xdb, 0x6f, 0x1c, 0x33, 0xc4, + 0x1c, 0xda, 0x82, 0x2d, 0x3b, 0x59, 0xfe, 0xb1, + 0xa4, 0x59, 0x41, 0x86, 0xd0, 0xef, 0xae, 0xfb, + 0xda, 0x6d, 0x11, 0xb8, 0xca, 0xe9, 0x6e, 0xff, + 0xf7, 0xa9, 0xd9, 0x70, 0x30, 0xfc, 0x53, 0xe2, + 0xd7, 0xa2, 0x4e, 0xc7, 0x91, 0xd9, 0x07, 0x06, + 0xaa, 0xdd, 0xb0, 0x59, 0x28, 0x1d, 0x00, 0x66, + 0xc5, 0x54, 0xc2, 0xfc, 0x06, 0xda, 0x05, 0x90, + 0x52, 0x1d, 0x37, 0x66, 0xee, 0xf0, 0xb2, 0x55, + 0x8a, 0x5d, 0xd2, 0x38, 0x86, 0x94, 0x9b, 0xfc, + 0x10, 0x4c, 0xa1, 0xb9, 0x64, 0x3e, 0x44, 0xb8, + 0x5f, 0xb0, 0x0c, 0xec, 0xe0, 0xc9, 0xe5, 0x62, + 0x75, 0x3f, 0x09, 0xd5, 0xf5, 0xd9, 0x26, 0xba, + 0x9e, 0xd2, 0xf4, 0xb9, 0x48, 0x0a, 0xbc, 0xa2, + 0xd6, 0x7c, 0x36, 0x11, 0x7d, 0x26, 0x81, 0x89, + 0xcf, 0xa4, 0xad, 0x73, 0x0e, 0xee, 0xcc, 0x06, + 0xa9, 0xdb, 0xb1, 0xfd, 0xfb, 0x09, 0x7f, 0x90, + 0x42, 0x37, 0x2f, 0xe1, 0x9c, 0x0f, 0x6f, 0xcf, + 0x43, 0xb5, 0xd9, 0x90, 0xe1, 0x85, 0xf5, 0xa8, + 0xae +}; +static const u8 enc_assoc009[] __initconst = { + 0x5a, 0x27, 0xff, 0xeb, 0xdf, 0x84, 0xb2, 0x9e, + 0xef +}; +static const u8 enc_nonce009[] __initconst = { + 0xef, 0x2d, 0x63, 0xee, 0x6b, 0x80, 0x8b, 0x78 +}; +static const u8 enc_key009[] __initconst = { + 0xea, 0xbc, 0x56, 0x99, 0xe3, 0x50, 0xff, 0xc5, + 0xcc, 0x1a, 0xd7, 0xc1, 0x57, 0x72, 0xea, 0x86, + 0x5b, 0x89, 0x88, 0x61, 0x3d, 0x2f, 0x9b, 0xb2, + 0xe7, 0x9c, 0xec, 0x74, 0x6e, 0x3e, 0xf4, 0x3b +}; + +static const u8 enc_input010[] __initconst = { + 0x42, 0x93, 0xe4, 0xeb, 0x97, 0xb0, 0x57, 0xbf, + 0x1a, 0x8b, 0x1f, 0xe4, 0x5f, 0x36, 0x20, 0x3c, + 0xef, 0x0a, 0xa9, 0x48, 0x5f, 0x5f, 0x37, 0x22, + 0x3a, 0xde, 0xe3, 0xae, 0xbe, 0xad, 0x07, 0xcc, + 0xb1, 0xf6, 0xf5, 0xf9, 0x56, 0xdd, 0xe7, 0x16, + 0x1e, 0x7f, 0xdf, 0x7a, 0x9e, 0x75, 0xb7, 0xc7, + 0xbe, 0xbe, 0x8a, 0x36, 0x04, 0xc0, 0x10, 0xf4, + 0x95, 0x20, 0x03, 0xec, 0xdc, 0x05, 0xa1, 0x7d, + 0xc4, 0xa9, 0x2c, 0x82, 0xd0, 0xbc, 0x8b, 0xc5, + 0xc7, 0x45, 0x50, 0xf6, 0xa2, 0x1a, 0xb5, 0x46, + 0x3b, 0x73, 0x02, 0xa6, 0x83, 0x4b, 0x73, 0x82, + 0x58, 0x5e, 0x3b, 0x65, 0x2f, 0x0e, 0xfd, 0x2b, + 0x59, 0x16, 0xce, 0xa1, 0x60, 0x9c, 0xe8, 0x3a, + 0x99, 0xed, 0x8d, 0x5a, 0xcf, 0xf6, 0x83, 0xaf, + 0xba, 0xd7, 0x73, 0x73, 0x40, 0x97, 0x3d, 0xca, + 0xef, 0x07, 0x57, 0xe6, 0xd9, 0x70, 0x0e, 0x95, + 0xae, 0xa6, 0x8d, 0x04, 0xcc, 0xee, 0xf7, 0x09, + 0x31, 0x77, 0x12, 0xa3, 0x23, 0x97, 0x62, 0xb3, + 0x7b, 0x32, 0xfb, 0x80, 0x14, 0x48, 0x81, 0xc3, + 0xe5, 0xea, 0x91, 0x39, 0x52, 0x81, 0xa2, 0x4f, + 0xe4, 0xb3, 0x09, 0xff, 0xde, 0x5e, 0xe9, 0x58, + 0x84, 0x6e, 0xf9, 0x3d, 0xdf, 0x25, 0xea, 0xad, + 0xae, 0xe6, 0x9a, 0xd1, 0x89, 0x55, 0xd3, 0xde, + 0x6c, 0x52, 0xdb, 0x70, 0xfe, 0x37, 0xce, 0x44, + 0x0a, 0xa8, 0x25, 0x5f, 0x92, 0xc1, 0x33, 0x4a, + 0x4f, 0x9b, 0x62, 0x35, 0xff, 0xce, 0xc0, 0xa9, + 0x60, 0xce, 0x52, 0x00, 0x97, 0x51, 0x35, 0x26, + 0x2e, 0xb9, 0x36, 0xa9, 0x87, 0x6e, 0x1e, 0xcc, + 0x91, 0x78, 0x53, 0x98, 0x86, 0x5b, 0x9c, 0x74, + 0x7d, 0x88, 0x33, 0xe1, 0xdf, 0x37, 0x69, 0x2b, + 0xbb, 0xf1, 0x4d, 0xf4, 0xd1, 0xf1, 0x39, 0x93, + 0x17, 0x51, 0x19, 0xe3, 0x19, 0x1e, 0x76, 0x37, + 0x25, 0xfb, 0x09, 0x27, 0x6a, 0xab, 0x67, 0x6f, + 0x14, 0x12, 0x64, 0xe7, 0xc4, 0x07, 0xdf, 0x4d, + 0x17, 0xbb, 0x6d, 0xe0, 0xe9, 0xb9, 0xab, 0xca, + 0x10, 0x68, 0xaf, 0x7e, 0xb7, 0x33, 0x54, 0x73, + 0x07, 0x6e, 0xf7, 0x81, 0x97, 0x9c, 0x05, 0x6f, + 0x84, 0x5f, 0xd2, 0x42, 0xfb, 0x38, 0xcf, 0xd1, + 0x2f, 0x14, 0x30, 0x88, 0x98, 0x4d, 0x5a, 0xa9, + 0x76, 0xd5, 0x4f, 0x3e, 0x70, 0x6c, 0x85, 0x76, + 0xd7, 0x01, 0xa0, 0x1a, 0xc8, 0x4e, 0xaa, 0xac, + 0x78, 0xfe, 0x46, 0xde, 0x6a, 0x05, 0x46, 0xa7, + 0x43, 0x0c, 0xb9, 0xde, 0xb9, 0x68, 0xfb, 0xce, + 0x42, 0x99, 0x07, 0x4d, 0x0b, 0x3b, 0x5a, 0x30, + 0x35, 0xa8, 0xf9, 0x3a, 0x73, 0xef, 0x0f, 0xdb, + 0x1e, 0x16, 0x42, 0xc4, 0xba, 0xae, 0x58, 0xaa, + 0xf8, 0xe5, 0x75, 0x2f, 0x1b, 0x15, 0x5c, 0xfd, + 0x0a, 0x97, 0xd0, 0xe4, 0x37, 0x83, 0x61, 0x5f, + 0x43, 0xa6, 0xc7, 0x3f, 0x38, 0x59, 0xe6, 0xeb, + 0xa3, 0x90, 0xc3, 0xaa, 0xaa, 0x5a, 0xd3, 0x34, + 0xd4, 0x17, 0xc8, 0x65, 0x3e, 0x57, 0xbc, 0x5e, + 0xdd, 0x9e, 0xb7, 0xf0, 0x2e, 0x5b, 0xb2, 0x1f, + 0x8a, 0x08, 0x0d, 0x45, 0x91, 0x0b, 0x29, 0x53, + 0x4f, 0x4c, 0x5a, 0x73, 0x56, 0xfe, 0xaf, 0x41, + 0x01, 0x39, 0x0a, 0x24, 0x3c, 0x7e, 0xbe, 0x4e, + 0x53, 0xf3, 0xeb, 0x06, 0x66, 0x51, 0x28, 0x1d, + 0xbd, 0x41, 0x0a, 0x01, 0xab, 0x16, 0x47, 0x27, + 0x47, 0x47, 0xf7, 0xcb, 0x46, 0x0a, 0x70, 0x9e, + 0x01, 0x9c, 0x09, 0xe1, 0x2a, 0x00, 0x1a, 0xd8, + 0xd4, 0x79, 0x9d, 0x80, 0x15, 0x8e, 0x53, 0x2a, + 0x65, 0x83, 0x78, 0x3e, 0x03, 0x00, 0x07, 0x12, + 0x1f, 0x33, 0x3e, 0x7b, 0x13, 0x37, 0xf1, 0xc3, + 0xef, 0xb7, 0xc1, 0x20, 0x3c, 0x3e, 0x67, 0x66, + 0x5d, 0x88, 0xa7, 0x7d, 0x33, 0x50, 0x77, 0xb0, + 0x28, 0x8e, 0xe7, 0x2c, 0x2e, 0x7a, 0xf4, 0x3c, + 0x8d, 0x74, 0x83, 0xaf, 0x8e, 0x87, 0x0f, 0xe4, + 0x50, 0xff, 0x84, 0x5c, 0x47, 0x0c, 0x6a, 0x49, + 0xbf, 0x42, 0x86, 0x77, 0x15, 0x48, 0xa5, 0x90, + 0x5d, 0x93, 0xd6, 0x2a, 0x11, 0xd5, 0xd5, 0x11, + 0xaa, 0xce, 0xe7, 0x6f, 0xa5, 0xb0, 0x09, 0x2c, + 0x8d, 0xd3, 0x92, 0xf0, 0x5a, 0x2a, 0xda, 0x5b, + 0x1e, 0xd5, 0x9a, 0xc4, 0xc4, 0xf3, 0x49, 0x74, + 0x41, 0xca, 0xe8, 0xc1, 0xf8, 0x44, 0xd6, 0x3c, + 0xae, 0x6c, 0x1d, 0x9a, 0x30, 0x04, 0x4d, 0x27, + 0x0e, 0xb1, 0x5f, 0x59, 0xa2, 0x24, 0xe8, 0xe1, + 0x98, 0xc5, 0x6a, 0x4c, 0xfe, 0x41, 0xd2, 0x27, + 0x42, 0x52, 0xe1, 0xe9, 0x7d, 0x62, 0xe4, 0x88, + 0x0f, 0xad, 0xb2, 0x70, 0xcb, 0x9d, 0x4c, 0x27, + 0x2e, 0x76, 0x1e, 0x1a, 0x63, 0x65, 0xf5, 0x3b, + 0xf8, 0x57, 0x69, 0xeb, 0x5b, 0x38, 0x26, 0x39, + 0x33, 0x25, 0x45, 0x3e, 0x91, 0xb8, 0xd8, 0xc7, + 0xd5, 0x42, 0xc0, 0x22, 0x31, 0x74, 0xf4, 0xbc, + 0x0c, 0x23, 0xf1, 0xca, 0xc1, 0x8d, 0xd7, 0xbe, + 0xc9, 0x62, 0xe4, 0x08, 0x1a, 0xcf, 0x36, 0xd5, + 0xfe, 0x55, 0x21, 0x59, 0x91, 0x87, 0x87, 0xdf, + 0x06, 0xdb, 0xdf, 0x96, 0x45, 0x58, 0xda, 0x05, + 0xcd, 0x50, 0x4d, 0xd2, 0x7d, 0x05, 0x18, 0x73, + 0x6a, 0x8d, 0x11, 0x85, 0xa6, 0x88, 0xe8, 0xda, + 0xe6, 0x30, 0x33, 0xa4, 0x89, 0x31, 0x75, 0xbe, + 0x69, 0x43, 0x84, 0x43, 0x50, 0x87, 0xdd, 0x71, + 0x36, 0x83, 0xc3, 0x78, 0x74, 0x24, 0x0a, 0xed, + 0x7b, 0xdb, 0xa4, 0x24, 0x0b, 0xb9, 0x7e, 0x5d, + 0xff, 0xde, 0xb1, 0xef, 0x61, 0x5a, 0x45, 0x33, + 0xf6, 0x17, 0x07, 0x08, 0x98, 0x83, 0x92, 0x0f, + 0x23, 0x6d, 0xe6, 0xaa, 0x17, 0x54, 0xad, 0x6a, + 0xc8, 0xdb, 0x26, 0xbe, 0xb8, 0xb6, 0x08, 0xfa, + 0x68, 0xf1, 0xd7, 0x79, 0x6f, 0x18, 0xb4, 0x9e, + 0x2d, 0x3f, 0x1b, 0x64, 0xaf, 0x8d, 0x06, 0x0e, + 0x49, 0x28, 0xe0, 0x5d, 0x45, 0x68, 0x13, 0x87, + 0xfa, 0xde, 0x40, 0x7b, 0xd2, 0xc3, 0x94, 0xd5, + 0xe1, 0xd9, 0xc2, 0xaf, 0x55, 0x89, 0xeb, 0xb4, + 0x12, 0x59, 0xa8, 0xd4, 0xc5, 0x29, 0x66, 0x38, + 0xe6, 0xac, 0x22, 0x22, 0xd9, 0x64, 0x9b, 0x34, + 0x0a, 0x32, 0x9f, 0xc2, 0xbf, 0x17, 0x6c, 0x3f, + 0x71, 0x7a, 0x38, 0x6b, 0x98, 0xfb, 0x49, 0x36, + 0x89, 0xc9, 0xe2, 0xd6, 0xc7, 0x5d, 0xd0, 0x69, + 0x5f, 0x23, 0x35, 0xc9, 0x30, 0xe2, 0xfd, 0x44, + 0x58, 0x39, 0xd7, 0x97, 0xfb, 0x5c, 0x00, 0xd5, + 0x4f, 0x7a, 0x1a, 0x95, 0x8b, 0x62, 0x4b, 0xce, + 0xe5, 0x91, 0x21, 0x7b, 0x30, 0x00, 0xd6, 0xdd, + 0x6d, 0x02, 0x86, 0x49, 0x0f, 0x3c, 0x1a, 0x27, + 0x3c, 0xd3, 0x0e, 0x71, 0xf2, 0xff, 0xf5, 0x2f, + 0x87, 0xac, 0x67, 0x59, 0x81, 0xa3, 0xf7, 0xf8, + 0xd6, 0x11, 0x0c, 0x84, 0xa9, 0x03, 0xee, 0x2a, + 0xc4, 0xf3, 0x22, 0xab, 0x7c, 0xe2, 0x25, 0xf5, + 0x67, 0xa3, 0xe4, 0x11, 0xe0, 0x59, 0xb3, 0xca, + 0x87, 0xa0, 0xae, 0xc9, 0xa6, 0x62, 0x1b, 0x6e, + 0x4d, 0x02, 0x6b, 0x07, 0x9d, 0xfd, 0xd0, 0x92, + 0x06, 0xe1, 0xb2, 0x9a, 0x4a, 0x1f, 0x1f, 0x13, + 0x49, 0x99, 0x97, 0x08, 0xde, 0x7f, 0x98, 0xaf, + 0x51, 0x98, 0xee, 0x2c, 0xcb, 0xf0, 0x0b, 0xc6, + 0xb6, 0xb7, 0x2d, 0x9a, 0xb1, 0xac, 0xa6, 0xe3, + 0x15, 0x77, 0x9d, 0x6b, 0x1a, 0xe4, 0xfc, 0x8b, + 0xf2, 0x17, 0x59, 0x08, 0x04, 0x58, 0x81, 0x9d, + 0x1b, 0x1b, 0x69, 0x55, 0xc2, 0xb4, 0x3c, 0x1f, + 0x50, 0xf1, 0x7f, 0x77, 0x90, 0x4c, 0x66, 0x40, + 0x5a, 0xc0, 0x33, 0x1f, 0xcb, 0x05, 0x6d, 0x5c, + 0x06, 0x87, 0x52, 0xa2, 0x8f, 0x26, 0xd5, 0x4f +}; +static const u8 enc_output010[] __initconst = { + 0xe5, 0x26, 0xa4, 0x3d, 0xbd, 0x33, 0xd0, 0x4b, + 0x6f, 0x05, 0xa7, 0x6e, 0x12, 0x7a, 0xd2, 0x74, + 0xa6, 0xdd, 0xbd, 0x95, 0xeb, 0xf9, 0xa4, 0xf1, + 0x59, 0x93, 0x91, 0x70, 0xd9, 0xfe, 0x9a, 0xcd, + 0x53, 0x1f, 0x3a, 0xab, 0xa6, 0x7c, 0x9f, 0xa6, + 0x9e, 0xbd, 0x99, 0xd9, 0xb5, 0x97, 0x44, 0xd5, + 0x14, 0x48, 0x4d, 0x9d, 0xc0, 0xd0, 0x05, 0x96, + 0xeb, 0x4c, 0x78, 0x55, 0x09, 0x08, 0x01, 0x02, + 0x30, 0x90, 0x7b, 0x96, 0x7a, 0x7b, 0x5f, 0x30, + 0x41, 0x24, 0xce, 0x68, 0x61, 0x49, 0x86, 0x57, + 0x82, 0xdd, 0x53, 0x1c, 0x51, 0x28, 0x2b, 0x53, + 0x6e, 0x2d, 0xc2, 0x20, 0x4c, 0xdd, 0x8f, 0x65, + 0x10, 0x20, 0x50, 0xdd, 0x9d, 0x50, 0xe5, 0x71, + 0x40, 0x53, 0x69, 0xfc, 0x77, 0x48, 0x11, 0xb9, + 0xde, 0xa4, 0x8d, 0x58, 0xe4, 0xa6, 0x1a, 0x18, + 0x47, 0x81, 0x7e, 0xfc, 0xdd, 0xf6, 0xef, 0xce, + 0x2f, 0x43, 0x68, 0xd6, 0x06, 0xe2, 0x74, 0x6a, + 0xad, 0x90, 0xf5, 0x37, 0xf3, 0x3d, 0x82, 0x69, + 0x40, 0xe9, 0x6b, 0xa7, 0x3d, 0xa8, 0x1e, 0xd2, + 0x02, 0x7c, 0xb7, 0x9b, 0xe4, 0xda, 0x8f, 0x95, + 0x06, 0xc5, 0xdf, 0x73, 0xa3, 0x20, 0x9a, 0x49, + 0xde, 0x9c, 0xbc, 0xee, 0x14, 0x3f, 0x81, 0x5e, + 0xf8, 0x3b, 0x59, 0x3c, 0xe1, 0x68, 0x12, 0x5a, + 0x3a, 0x76, 0x3a, 0x3f, 0xf7, 0x87, 0x33, 0x0a, + 0x01, 0xb8, 0xd4, 0xed, 0xb6, 0xbe, 0x94, 0x5e, + 0x70, 0x40, 0x56, 0x67, 0x1f, 0x50, 0x44, 0x19, + 0xce, 0x82, 0x70, 0x10, 0x87, 0x13, 0x20, 0x0b, + 0x4c, 0x5a, 0xb6, 0xf6, 0xa7, 0xae, 0x81, 0x75, + 0x01, 0x81, 0xe6, 0x4b, 0x57, 0x7c, 0xdd, 0x6d, + 0xf8, 0x1c, 0x29, 0x32, 0xf7, 0xda, 0x3c, 0x2d, + 0xf8, 0x9b, 0x25, 0x6e, 0x00, 0xb4, 0xf7, 0x2f, + 0xf7, 0x04, 0xf7, 0xa1, 0x56, 0xac, 0x4f, 0x1a, + 0x64, 0xb8, 0x47, 0x55, 0x18, 0x7b, 0x07, 0x4d, + 0xbd, 0x47, 0x24, 0x80, 0x5d, 0xa2, 0x70, 0xc5, + 0xdd, 0x8e, 0x82, 0xd4, 0xeb, 0xec, 0xb2, 0x0c, + 0x39, 0xd2, 0x97, 0xc1, 0xcb, 0xeb, 0xf4, 0x77, + 0x59, 0xb4, 0x87, 0xef, 0xcb, 0x43, 0x2d, 0x46, + 0x54, 0xd1, 0xa7, 0xd7, 0x15, 0x99, 0x0a, 0x43, + 0xa1, 0xe0, 0x99, 0x33, 0x71, 0xc1, 0xed, 0xfe, + 0x72, 0x46, 0x33, 0x8e, 0x91, 0x08, 0x9f, 0xc8, + 0x2e, 0xca, 0xfa, 0xdc, 0x59, 0xd5, 0xc3, 0x76, + 0x84, 0x9f, 0xa3, 0x37, 0x68, 0xc3, 0xf0, 0x47, + 0x2c, 0x68, 0xdb, 0x5e, 0xc3, 0x49, 0x4c, 0xe8, + 0x92, 0x85, 0xe2, 0x23, 0xd3, 0x3f, 0xad, 0x32, + 0xe5, 0x2b, 0x82, 0xd7, 0x8f, 0x99, 0x0a, 0x59, + 0x5c, 0x45, 0xd9, 0xb4, 0x51, 0x52, 0xc2, 0xae, + 0xbf, 0x80, 0xcf, 0xc9, 0xc9, 0x51, 0x24, 0x2a, + 0x3b, 0x3a, 0x4d, 0xae, 0xeb, 0xbd, 0x22, 0xc3, + 0x0e, 0x0f, 0x59, 0x25, 0x92, 0x17, 0xe9, 0x74, + 0xc7, 0x8b, 0x70, 0x70, 0x36, 0x55, 0x95, 0x75, + 0x4b, 0xad, 0x61, 0x2b, 0x09, 0xbc, 0x82, 0xf2, + 0x6e, 0x94, 0x43, 0xae, 0xc3, 0xd5, 0xcd, 0x8e, + 0xfe, 0x5b, 0x9a, 0x88, 0x43, 0x01, 0x75, 0xb2, + 0x23, 0x09, 0xf7, 0x89, 0x83, 0xe7, 0xfa, 0xf9, + 0xb4, 0x9b, 0xf8, 0xef, 0xbd, 0x1c, 0x92, 0xc1, + 0xda, 0x7e, 0xfe, 0x05, 0xba, 0x5a, 0xcd, 0x07, + 0x6a, 0x78, 0x9e, 0x5d, 0xfb, 0x11, 0x2f, 0x79, + 0x38, 0xb6, 0xc2, 0x5b, 0x6b, 0x51, 0xb4, 0x71, + 0xdd, 0xf7, 0x2a, 0xe4, 0xf4, 0x72, 0x76, 0xad, + 0xc2, 0xdd, 0x64, 0x5d, 0x79, 0xb6, 0xf5, 0x7a, + 0x77, 0x20, 0x05, 0x3d, 0x30, 0x06, 0xd4, 0x4c, + 0x0a, 0x2c, 0x98, 0x5a, 0xb9, 0xd4, 0x98, 0xa9, + 0x3f, 0xc6, 0x12, 0xea, 0x3b, 0x4b, 0xc5, 0x79, + 0x64, 0x63, 0x6b, 0x09, 0x54, 0x3b, 0x14, 0x27, + 0xba, 0x99, 0x80, 0xc8, 0x72, 0xa8, 0x12, 0x90, + 0x29, 0xba, 0x40, 0x54, 0x97, 0x2b, 0x7b, 0xfe, + 0xeb, 0xcd, 0x01, 0x05, 0x44, 0x72, 0xdb, 0x99, + 0xe4, 0x61, 0xc9, 0x69, 0xd6, 0xb9, 0x28, 0xd1, + 0x05, 0x3e, 0xf9, 0x0b, 0x49, 0x0a, 0x49, 0xe9, + 0x8d, 0x0e, 0xa7, 0x4a, 0x0f, 0xaf, 0x32, 0xd0, + 0xe0, 0xb2, 0x3a, 0x55, 0x58, 0xfe, 0x5c, 0x28, + 0x70, 0x51, 0x23, 0xb0, 0x7b, 0x6a, 0x5f, 0x1e, + 0xb8, 0x17, 0xd7, 0x94, 0x15, 0x8f, 0xee, 0x20, + 0xc7, 0x42, 0x25, 0x3e, 0x9a, 0x14, 0xd7, 0x60, + 0x72, 0x39, 0x47, 0x48, 0xa9, 0xfe, 0xdd, 0x47, + 0x0a, 0xb1, 0xe6, 0x60, 0x28, 0x8c, 0x11, 0x68, + 0xe1, 0xff, 0xd7, 0xce, 0xc8, 0xbe, 0xb3, 0xfe, + 0x27, 0x30, 0x09, 0x70, 0xd7, 0xfa, 0x02, 0x33, + 0x3a, 0x61, 0x2e, 0xc7, 0xff, 0xa4, 0x2a, 0xa8, + 0x6e, 0xb4, 0x79, 0x35, 0x6d, 0x4c, 0x1e, 0x38, + 0xf8, 0xee, 0xd4, 0x84, 0x4e, 0x6e, 0x28, 0xa7, + 0xce, 0xc8, 0xc1, 0xcf, 0x80, 0x05, 0xf3, 0x04, + 0xef, 0xc8, 0x18, 0x28, 0x2e, 0x8d, 0x5e, 0x0c, + 0xdf, 0xb8, 0x5f, 0x96, 0xe8, 0xc6, 0x9c, 0x2f, + 0xe5, 0xa6, 0x44, 0xd7, 0xe7, 0x99, 0x44, 0x0c, + 0xec, 0xd7, 0x05, 0x60, 0x97, 0xbb, 0x74, 0x77, + 0x58, 0xd5, 0xbb, 0x48, 0xde, 0x5a, 0xb2, 0x54, + 0x7f, 0x0e, 0x46, 0x70, 0x6a, 0x6f, 0x78, 0xa5, + 0x08, 0x89, 0x05, 0x4e, 0x7e, 0xa0, 0x69, 0xb4, + 0x40, 0x60, 0x55, 0x77, 0x75, 0x9b, 0x19, 0xf2, + 0xd5, 0x13, 0x80, 0x77, 0xf9, 0x4b, 0x3f, 0x1e, + 0xee, 0xe6, 0x76, 0x84, 0x7b, 0x8c, 0xe5, 0x27, + 0xa8, 0x0a, 0x91, 0x01, 0x68, 0x71, 0x8a, 0x3f, + 0x06, 0xab, 0xf6, 0xa9, 0xa5, 0xe6, 0x72, 0x92, + 0xe4, 0x67, 0xe2, 0xa2, 0x46, 0x35, 0x84, 0x55, + 0x7d, 0xca, 0xa8, 0x85, 0xd0, 0xf1, 0x3f, 0xbe, + 0xd7, 0x34, 0x64, 0xfc, 0xae, 0xe3, 0xe4, 0x04, + 0x9f, 0x66, 0x02, 0xb9, 0x88, 0x10, 0xd9, 0xc4, + 0x4c, 0x31, 0x43, 0x7a, 0x93, 0xe2, 0x9b, 0x56, + 0x43, 0x84, 0xdc, 0xdc, 0xde, 0x1d, 0xa4, 0x02, + 0x0e, 0xc2, 0xef, 0xc3, 0xf8, 0x78, 0xd1, 0xb2, + 0x6b, 0x63, 0x18, 0xc9, 0xa9, 0xe5, 0x72, 0xd8, + 0xf3, 0xb9, 0xd1, 0x8a, 0xc7, 0x1a, 0x02, 0x27, + 0x20, 0x77, 0x10, 0xe5, 0xc8, 0xd4, 0x4a, 0x47, + 0xe5, 0xdf, 0x5f, 0x01, 0xaa, 0xb0, 0xd4, 0x10, + 0xbb, 0x69, 0xe3, 0x36, 0xc8, 0xe1, 0x3d, 0x43, + 0xfb, 0x86, 0xcd, 0xcc, 0xbf, 0xf4, 0x88, 0xe0, + 0x20, 0xca, 0xb7, 0x1b, 0xf1, 0x2f, 0x5c, 0xee, + 0xd4, 0xd3, 0xa3, 0xcc, 0xa4, 0x1e, 0x1c, 0x47, + 0xfb, 0xbf, 0xfc, 0xa2, 0x41, 0x55, 0x9d, 0xf6, + 0x5a, 0x5e, 0x65, 0x32, 0x34, 0x7b, 0x52, 0x8d, + 0xd5, 0xd0, 0x20, 0x60, 0x03, 0xab, 0x3f, 0x8c, + 0xd4, 0x21, 0xea, 0x2a, 0xd9, 0xc4, 0xd0, 0xd3, + 0x65, 0xd8, 0x7a, 0x13, 0x28, 0x62, 0x32, 0x4b, + 0x2c, 0x87, 0x93, 0xa8, 0xb4, 0x52, 0x45, 0x09, + 0x44, 0xec, 0xec, 0xc3, 0x17, 0xdb, 0x9a, 0x4d, + 0x5c, 0xa9, 0x11, 0xd4, 0x7d, 0xaf, 0x9e, 0xf1, + 0x2d, 0xb2, 0x66, 0xc5, 0x1d, 0xed, 0xb7, 0xcd, + 0x0b, 0x25, 0x5e, 0x30, 0x47, 0x3f, 0x40, 0xf4, + 0xa1, 0xa0, 0x00, 0x94, 0x10, 0xc5, 0x6a, 0x63, + 0x1a, 0xd5, 0x88, 0x92, 0x8e, 0x82, 0x39, 0x87, + 0x3c, 0x78, 0x65, 0x58, 0x42, 0x75, 0x5b, 0xdd, + 0x77, 0x3e, 0x09, 0x4e, 0x76, 0x5b, 0xe6, 0x0e, + 0x4d, 0x38, 0xb2, 0xc0, 0xb8, 0x95, 0x01, 0x7a, + 0x10, 0xe0, 0xfb, 0x07, 0xf2, 0xab, 0x2d, 0x8c, + 0x32, 0xed, 0x2b, 0xc0, 0x46, 0xc2, 0xf5, 0x38, + 0x83, 0xf0, 0x17, 0xec, 0xc1, 0x20, 0x6a, 0x9a, + 0x0b, 0x00, 0xa0, 0x98, 0x22, 0x50, 0x23, 0xd5, + 0x80, 0x6b, 0xf6, 0x1f, 0xc3, 0xcc, 0x97, 0xc9, + 0x24, 0x9f, 0xf3, 0xaf, 0x43, 0x14, 0xd5, 0xa0 +}; +static const u8 enc_assoc010[] __initconst = { + 0xd2, 0xa1, 0x70, 0xdb, 0x7a, 0xf8, 0xfa, 0x27, + 0xba, 0x73, 0x0f, 0xbf, 0x3d, 0x1e, 0x82, 0xb2 +}; +static const u8 enc_nonce010[] __initconst = { + 0xdb, 0x92, 0x0f, 0x7f, 0x17, 0x54, 0x0c, 0x30 +}; +static const u8 enc_key010[] __initconst = { + 0x47, 0x11, 0xeb, 0x86, 0x2b, 0x2c, 0xab, 0x44, + 0x34, 0xda, 0x7f, 0x57, 0x03, 0x39, 0x0c, 0xaf, + 0x2c, 0x14, 0xfd, 0x65, 0x23, 0xe9, 0x8e, 0x74, + 0xd5, 0x08, 0x68, 0x08, 0xe7, 0xb4, 0x72, 0xd7 +}; + +static const u8 enc_input011[] __initconst = { + 0x7a, 0x57, 0xf2, 0xc7, 0x06, 0x3f, 0x50, 0x7b, + 0x36, 0x1a, 0x66, 0x5c, 0xb9, 0x0e, 0x5e, 0x3b, + 0x45, 0x60, 0xbe, 0x9a, 0x31, 0x9f, 0xff, 0x5d, + 0x66, 0x34, 0xb4, 0xdc, 0xfb, 0x9d, 0x8e, 0xee, + 0x6a, 0x33, 0xa4, 0x07, 0x3c, 0xf9, 0x4c, 0x30, + 0xa1, 0x24, 0x52, 0xf9, 0x50, 0x46, 0x88, 0x20, + 0x02, 0x32, 0x3a, 0x0e, 0x99, 0x63, 0xaf, 0x1f, + 0x15, 0x28, 0x2a, 0x05, 0xff, 0x57, 0x59, 0x5e, + 0x18, 0xa1, 0x1f, 0xd0, 0x92, 0x5c, 0x88, 0x66, + 0x1b, 0x00, 0x64, 0xa5, 0x93, 0x8d, 0x06, 0x46, + 0xb0, 0x64, 0x8b, 0x8b, 0xef, 0x99, 0x05, 0x35, + 0x85, 0xb3, 0xf3, 0x33, 0xbb, 0xec, 0x66, 0xb6, + 0x3d, 0x57, 0x42, 0xe3, 0xb4, 0xc6, 0xaa, 0xb0, + 0x41, 0x2a, 0xb9, 0x59, 0xa9, 0xf6, 0x3e, 0x15, + 0x26, 0x12, 0x03, 0x21, 0x4c, 0x74, 0x43, 0x13, + 0x2a, 0x03, 0x27, 0x09, 0xb4, 0xfb, 0xe7, 0xb7, + 0x40, 0xff, 0x5e, 0xce, 0x48, 0x9a, 0x60, 0xe3, + 0x8b, 0x80, 0x8c, 0x38, 0x2d, 0xcb, 0x93, 0x37, + 0x74, 0x05, 0x52, 0x6f, 0x73, 0x3e, 0xc3, 0xbc, + 0xca, 0x72, 0x0a, 0xeb, 0xf1, 0x3b, 0xa0, 0x95, + 0xdc, 0x8a, 0xc4, 0xa9, 0xdc, 0xca, 0x44, 0xd8, + 0x08, 0x63, 0x6a, 0x36, 0xd3, 0x3c, 0xb8, 0xac, + 0x46, 0x7d, 0xfd, 0xaa, 0xeb, 0x3e, 0x0f, 0x45, + 0x8f, 0x49, 0xda, 0x2b, 0xf2, 0x12, 0xbd, 0xaf, + 0x67, 0x8a, 0x63, 0x48, 0x4b, 0x55, 0x5f, 0x6d, + 0x8c, 0xb9, 0x76, 0x34, 0x84, 0xae, 0xc2, 0xfc, + 0x52, 0x64, 0x82, 0xf7, 0xb0, 0x06, 0xf0, 0x45, + 0x73, 0x12, 0x50, 0x30, 0x72, 0xea, 0x78, 0x9a, + 0xa8, 0xaf, 0xb5, 0xe3, 0xbb, 0x77, 0x52, 0xec, + 0x59, 0x84, 0xbf, 0x6b, 0x8f, 0xce, 0x86, 0x5e, + 0x1f, 0x23, 0xe9, 0xfb, 0x08, 0x86, 0xf7, 0x10, + 0xb9, 0xf2, 0x44, 0x96, 0x44, 0x63, 0xa9, 0xa8, + 0x78, 0x00, 0x23, 0xd6, 0xc7, 0xe7, 0x6e, 0x66, + 0x4f, 0xcc, 0xee, 0x15, 0xb3, 0xbd, 0x1d, 0xa0, + 0xe5, 0x9c, 0x1b, 0x24, 0x2c, 0x4d, 0x3c, 0x62, + 0x35, 0x9c, 0x88, 0x59, 0x09, 0xdd, 0x82, 0x1b, + 0xcf, 0x0a, 0x83, 0x6b, 0x3f, 0xae, 0x03, 0xc4, + 0xb4, 0xdd, 0x7e, 0x5b, 0x28, 0x76, 0x25, 0x96, + 0xd9, 0xc9, 0x9d, 0x5f, 0x86, 0xfa, 0xf6, 0xd7, + 0xd2, 0xe6, 0x76, 0x1d, 0x0f, 0xa1, 0xdc, 0x74, + 0x05, 0x1b, 0x1d, 0xe0, 0xcd, 0x16, 0xb0, 0xa8, + 0x8a, 0x34, 0x7b, 0x15, 0x11, 0x77, 0xe5, 0x7b, + 0x7e, 0x20, 0xf7, 0xda, 0x38, 0xda, 0xce, 0x70, + 0xe9, 0xf5, 0x6c, 0xd9, 0xbe, 0x0c, 0x4c, 0x95, + 0x4c, 0xc2, 0x9b, 0x34, 0x55, 0x55, 0xe1, 0xf3, + 0x46, 0x8e, 0x48, 0x74, 0x14, 0x4f, 0x9d, 0xc9, + 0xf5, 0xe8, 0x1a, 0xf0, 0x11, 0x4a, 0xc1, 0x8d, + 0xe0, 0x93, 0xa0, 0xbe, 0x09, 0x1c, 0x2b, 0x4e, + 0x0f, 0xb2, 0x87, 0x8b, 0x84, 0xfe, 0x92, 0x32, + 0x14, 0xd7, 0x93, 0xdf, 0xe7, 0x44, 0xbc, 0xc5, + 0xae, 0x53, 0x69, 0xd8, 0xb3, 0x79, 0x37, 0x80, + 0xe3, 0x17, 0x5c, 0xec, 0x53, 0x00, 0x9a, 0xe3, + 0x8e, 0xdc, 0x38, 0xb8, 0x66, 0xf0, 0xd3, 0xad, + 0x1d, 0x02, 0x96, 0x86, 0x3e, 0x9d, 0x3b, 0x5d, + 0xa5, 0x7f, 0x21, 0x10, 0xf1, 0x1f, 0x13, 0x20, + 0xf9, 0x57, 0x87, 0x20, 0xf5, 0x5f, 0xf1, 0x17, + 0x48, 0x0a, 0x51, 0x5a, 0xcd, 0x19, 0x03, 0xa6, + 0x5a, 0xd1, 0x12, 0x97, 0xe9, 0x48, 0xe2, 0x1d, + 0x83, 0x75, 0x50, 0xd9, 0x75, 0x7d, 0x6a, 0x82, + 0xa1, 0xf9, 0x4e, 0x54, 0x87, 0x89, 0xc9, 0x0c, + 0xb7, 0x5b, 0x6a, 0x91, 0xc1, 0x9c, 0xb2, 0xa9, + 0xdc, 0x9a, 0xa4, 0x49, 0x0a, 0x6d, 0x0d, 0xbb, + 0xde, 0x86, 0x44, 0xdd, 0x5d, 0x89, 0x2b, 0x96, + 0x0f, 0x23, 0x95, 0xad, 0xcc, 0xa2, 0xb3, 0xb9, + 0x7e, 0x74, 0x38, 0xba, 0x9f, 0x73, 0xae, 0x5f, + 0xf8, 0x68, 0xa2, 0xe0, 0xa9, 0xce, 0xbd, 0x40, + 0xd4, 0x4c, 0x6b, 0xd2, 0x56, 0x62, 0xb0, 0xcc, + 0x63, 0x7e, 0x5b, 0xd3, 0xae, 0xd1, 0x75, 0xce, + 0xbb, 0xb4, 0x5b, 0xa8, 0xf8, 0xb4, 0xac, 0x71, + 0x75, 0xaa, 0xc9, 0x9f, 0xbb, 0x6c, 0xad, 0x0f, + 0x55, 0x5d, 0xe8, 0x85, 0x7d, 0xf9, 0x21, 0x35, + 0xea, 0x92, 0x85, 0x2b, 0x00, 0xec, 0x84, 0x90, + 0x0a, 0x63, 0x96, 0xe4, 0x6b, 0xa9, 0x77, 0xb8, + 0x91, 0xf8, 0x46, 0x15, 0x72, 0x63, 0x70, 0x01, + 0x40, 0xa3, 0xa5, 0x76, 0x62, 0x2b, 0xbf, 0xf1, + 0xe5, 0x8d, 0x9f, 0xa3, 0xfa, 0x9b, 0x03, 0xbe, + 0xfe, 0x65, 0x6f, 0xa2, 0x29, 0x0d, 0x54, 0xb4, + 0x71, 0xce, 0xa9, 0xd6, 0x3d, 0x88, 0xf9, 0xaf, + 0x6b, 0xa8, 0x9e, 0xf4, 0x16, 0x96, 0x36, 0xb9, + 0x00, 0xdc, 0x10, 0xab, 0xb5, 0x08, 0x31, 0x1f, + 0x00, 0xb1, 0x3c, 0xd9, 0x38, 0x3e, 0xc6, 0x04, + 0xa7, 0x4e, 0xe8, 0xae, 0xed, 0x98, 0xc2, 0xf7, + 0xb9, 0x00, 0x5f, 0x8c, 0x60, 0xd1, 0xe5, 0x15, + 0xf7, 0xae, 0x1e, 0x84, 0x88, 0xd1, 0xf6, 0xbc, + 0x3a, 0x89, 0x35, 0x22, 0x83, 0x7c, 0xca, 0xf0, + 0x33, 0x82, 0x4c, 0x79, 0x3c, 0xfd, 0xb1, 0xae, + 0x52, 0x62, 0x55, 0xd2, 0x41, 0x60, 0xc6, 0xbb, + 0xfa, 0x0e, 0x59, 0xd6, 0xa8, 0xfe, 0x5d, 0xed, + 0x47, 0x3d, 0xe0, 0xea, 0x1f, 0x6e, 0x43, 0x51, + 0xec, 0x10, 0x52, 0x56, 0x77, 0x42, 0x6b, 0x52, + 0x87, 0xd8, 0xec, 0xe0, 0xaa, 0x76, 0xa5, 0x84, + 0x2a, 0x22, 0x24, 0xfd, 0x92, 0x40, 0x88, 0xd5, + 0x85, 0x1c, 0x1f, 0x6b, 0x47, 0xa0, 0xc4, 0xe4, + 0xef, 0xf4, 0xea, 0xd7, 0x59, 0xac, 0x2a, 0x9e, + 0x8c, 0xfa, 0x1f, 0x42, 0x08, 0xfe, 0x4f, 0x74, + 0xa0, 0x26, 0xf5, 0xb3, 0x84, 0xf6, 0x58, 0x5f, + 0x26, 0x66, 0x3e, 0xd7, 0xe4, 0x22, 0x91, 0x13, + 0xc8, 0xac, 0x25, 0x96, 0x23, 0xd8, 0x09, 0xea, + 0x45, 0x75, 0x23, 0xb8, 0x5f, 0xc2, 0x90, 0x8b, + 0x09, 0xc4, 0xfc, 0x47, 0x6c, 0x6d, 0x0a, 0xef, + 0x69, 0xa4, 0x38, 0x19, 0xcf, 0x7d, 0xf9, 0x09, + 0x73, 0x9b, 0x60, 0x5a, 0xf7, 0x37, 0xb5, 0xfe, + 0x9f, 0xe3, 0x2b, 0x4c, 0x0d, 0x6e, 0x19, 0xf1, + 0xd6, 0xc0, 0x70, 0xf3, 0x9d, 0x22, 0x3c, 0xf9, + 0x49, 0xce, 0x30, 0x8e, 0x44, 0xb5, 0x76, 0x15, + 0x8f, 0x52, 0xfd, 0xa5, 0x04, 0xb8, 0x55, 0x6a, + 0x36, 0x59, 0x7c, 0xc4, 0x48, 0xb8, 0xd7, 0xab, + 0x05, 0x66, 0xe9, 0x5e, 0x21, 0x6f, 0x6b, 0x36, + 0x29, 0xbb, 0xe9, 0xe3, 0xa2, 0x9a, 0xa8, 0xcd, + 0x55, 0x25, 0x11, 0xba, 0x5a, 0x58, 0xa0, 0xde, + 0xae, 0x19, 0x2a, 0x48, 0x5a, 0xff, 0x36, 0xcd, + 0x6d, 0x16, 0x7a, 0x73, 0x38, 0x46, 0xe5, 0x47, + 0x59, 0xc8, 0xa2, 0xf6, 0xe2, 0x6c, 0x83, 0xc5, + 0x36, 0x2c, 0x83, 0x7d, 0xb4, 0x01, 0x05, 0x69, + 0xe7, 0xaf, 0x5c, 0xc4, 0x64, 0x82, 0x12, 0x21, + 0xef, 0xf7, 0xd1, 0x7d, 0xb8, 0x8d, 0x8c, 0x98, + 0x7c, 0x5f, 0x7d, 0x92, 0x88, 0xb9, 0x94, 0x07, + 0x9c, 0xd8, 0xe9, 0x9c, 0x17, 0x38, 0xe3, 0x57, + 0x6c, 0xe0, 0xdc, 0xa5, 0x92, 0x42, 0xb3, 0xbd, + 0x50, 0xa2, 0x7e, 0xb5, 0xb1, 0x52, 0x72, 0x03, + 0x97, 0xd8, 0xaa, 0x9a, 0x1e, 0x75, 0x41, 0x11, + 0xa3, 0x4f, 0xcc, 0xd4, 0xe3, 0x73, 0xad, 0x96, + 0xdc, 0x47, 0x41, 0x9f, 0xb0, 0xbe, 0x79, 0x91, + 0xf5, 0xb6, 0x18, 0xfe, 0xc2, 0x83, 0x18, 0x7d, + 0x73, 0xd9, 0x4f, 0x83, 0x84, 0x03, 0xb3, 0xf0, + 0x77, 0x66, 0x3d, 0x83, 0x63, 0x2e, 0x2c, 0xf9, + 0xdd, 0xa6, 0x1f, 0x89, 0x82, 0xb8, 0x23, 0x42, + 0xeb, 0xe2, 0xca, 0x70, 0x82, 0x61, 0x41, 0x0a, + 0x6d, 0x5f, 0x75, 0xc5, 0xe2, 0xc4, 0x91, 0x18, + 0x44, 0x22, 0xfa, 0x34, 0x10, 0xf5, 0x20, 0xdc, + 0xb7, 0xdd, 0x2a, 0x20, 0x77, 0xf5, 0xf9, 0xce, + 0xdb, 0xa0, 0x0a, 0x52, 0x2a, 0x4e, 0xdd, 0xcc, + 0x97, 0xdf, 0x05, 0xe4, 0x5e, 0xb7, 0xaa, 0xf0, + 0xe2, 0x80, 0xff, 0xba, 0x1a, 0x0f, 0xac, 0xdf, + 0x02, 0x32, 0xe6, 0xf7, 0xc7, 0x17, 0x13, 0xb7, + 0xfc, 0x98, 0x48, 0x8c, 0x0d, 0x82, 0xc9, 0x80, + 0x7a, 0xe2, 0x0a, 0xc5, 0xb4, 0xde, 0x7c, 0x3c, + 0x79, 0x81, 0x0e, 0x28, 0x65, 0x79, 0x67, 0x82, + 0x69, 0x44, 0x66, 0x09, 0xf7, 0x16, 0x1a, 0xf9, + 0x7d, 0x80, 0xa1, 0x79, 0x14, 0xa9, 0xc8, 0x20, + 0xfb, 0xa2, 0x46, 0xbe, 0x08, 0x35, 0x17, 0x58, + 0xc1, 0x1a, 0xda, 0x2a, 0x6b, 0x2e, 0x1e, 0xe6, + 0x27, 0x55, 0x7b, 0x19, 0xe2, 0xfb, 0x64, 0xfc, + 0x5e, 0x15, 0x54, 0x3c, 0xe7, 0xc2, 0x11, 0x50, + 0x30, 0xb8, 0x72, 0x03, 0x0b, 0x1a, 0x9f, 0x86, + 0x27, 0x11, 0x5c, 0x06, 0x2b, 0xbd, 0x75, 0x1a, + 0x0a, 0xda, 0x01, 0xfa, 0x5c, 0x4a, 0xc1, 0x80, + 0x3a, 0x6e, 0x30, 0xc8, 0x2c, 0xeb, 0x56, 0xec, + 0x89, 0xfa, 0x35, 0x7b, 0xb2, 0xf0, 0x97, 0x08, + 0x86, 0x53, 0xbe, 0xbd, 0x40, 0x41, 0x38, 0x1c, + 0xb4, 0x8b, 0x79, 0x2e, 0x18, 0x96, 0x94, 0xde, + 0xe8, 0xca, 0xe5, 0x9f, 0x92, 0x9f, 0x15, 0x5d, + 0x56, 0x60, 0x5c, 0x09, 0xf9, 0x16, 0xf4, 0x17, + 0x0f, 0xf6, 0x4c, 0xda, 0xe6, 0x67, 0x89, 0x9f, + 0xca, 0x6c, 0xe7, 0x9b, 0x04, 0x62, 0x0e, 0x26, + 0xa6, 0x52, 0xbd, 0x29, 0xff, 0xc7, 0xa4, 0x96, + 0xe6, 0x6a, 0x02, 0xa5, 0x2e, 0x7b, 0xfe, 0x97, + 0x68, 0x3e, 0x2e, 0x5f, 0x3b, 0x0f, 0x36, 0xd6, + 0x98, 0x19, 0x59, 0x48, 0xd2, 0xc6, 0xe1, 0x55, + 0x1a, 0x6e, 0xd6, 0xed, 0x2c, 0xba, 0xc3, 0x9e, + 0x64, 0xc9, 0x95, 0x86, 0x35, 0x5e, 0x3e, 0x88, + 0x69, 0x99, 0x4b, 0xee, 0xbe, 0x9a, 0x99, 0xb5, + 0x6e, 0x58, 0xae, 0xdd, 0x22, 0xdb, 0xdd, 0x6b, + 0xfc, 0xaf, 0x90, 0xa3, 0x3d, 0xa4, 0xc1, 0x15, + 0x92, 0x18, 0x8d, 0xd2, 0x4b, 0x7b, 0x06, 0xd1, + 0x37, 0xb5, 0xe2, 0x7c, 0x2c, 0xf0, 0x25, 0xe4, + 0x94, 0x2a, 0xbd, 0xe3, 0x82, 0x70, 0x78, 0xa3, + 0x82, 0x10, 0x5a, 0x90, 0xd7, 0xa4, 0xfa, 0xaf, + 0x1a, 0x88, 0x59, 0xdc, 0x74, 0x12, 0xb4, 0x8e, + 0xd7, 0x19, 0x46, 0xf4, 0x84, 0x69, 0x9f, 0xbb, + 0x70, 0xa8, 0x4c, 0x52, 0x81, 0xa9, 0xff, 0x76, + 0x1c, 0xae, 0xd8, 0x11, 0x3d, 0x7f, 0x7d, 0xc5, + 0x12, 0x59, 0x28, 0x18, 0xc2, 0xa2, 0xb7, 0x1c, + 0x88, 0xf8, 0xd6, 0x1b, 0xa6, 0x7d, 0x9e, 0xde, + 0x29, 0xf8, 0xed, 0xff, 0xeb, 0x92, 0x24, 0x4f, + 0x05, 0xaa, 0xd9, 0x49, 0xba, 0x87, 0x59, 0x51, + 0xc9, 0x20, 0x5c, 0x9b, 0x74, 0xcf, 0x03, 0xd9, + 0x2d, 0x34, 0xc7, 0x5b, 0xa5, 0x40, 0xb2, 0x99, + 0xf5, 0xcb, 0xb4, 0xf6, 0xb7, 0x72, 0x4a, 0xd6, + 0xbd, 0xb0, 0xf3, 0x93, 0xe0, 0x1b, 0xa8, 0x04, + 0x1e, 0x35, 0xd4, 0x80, 0x20, 0xf4, 0x9c, 0x31, + 0x6b, 0x45, 0xb9, 0x15, 0xb0, 0x5e, 0xdd, 0x0a, + 0x33, 0x9c, 0x83, 0xcd, 0x58, 0x89, 0x50, 0x56, + 0xbb, 0x81, 0x00, 0x91, 0x32, 0xf3, 0x1b, 0x3e, + 0xcf, 0x45, 0xe1, 0xf9, 0xe1, 0x2c, 0x26, 0x78, + 0x93, 0x9a, 0x60, 0x46, 0xc9, 0xb5, 0x5e, 0x6a, + 0x28, 0x92, 0x87, 0x3f, 0x63, 0x7b, 0xdb, 0xf7, + 0xd0, 0x13, 0x9d, 0x32, 0x40, 0x5e, 0xcf, 0xfb, + 0x79, 0x68, 0x47, 0x4c, 0xfd, 0x01, 0x17, 0xe6, + 0x97, 0x93, 0x78, 0xbb, 0xa6, 0x27, 0xa3, 0xe8, + 0x1a, 0xe8, 0x94, 0x55, 0x7d, 0x08, 0xe5, 0xdc, + 0x66, 0xa3, 0x69, 0xc8, 0xca, 0xc5, 0xa1, 0x84, + 0x55, 0xde, 0x08, 0x91, 0x16, 0x3a, 0x0c, 0x86, + 0xab, 0x27, 0x2b, 0x64, 0x34, 0x02, 0x6c, 0x76, + 0x8b, 0xc6, 0xaf, 0xcc, 0xe1, 0xd6, 0x8c, 0x2a, + 0x18, 0x3d, 0xa6, 0x1b, 0x37, 0x75, 0x45, 0x73, + 0xc2, 0x75, 0xd7, 0x53, 0x78, 0x3a, 0xd6, 0xe8, + 0x29, 0xd2, 0x4a, 0xa8, 0x1e, 0x82, 0xf6, 0xb6, + 0x81, 0xde, 0x21, 0xed, 0x2b, 0x56, 0xbb, 0xf2, + 0xd0, 0x57, 0xc1, 0x7c, 0xd2, 0x6a, 0xd2, 0x56, + 0xf5, 0x13, 0x5f, 0x1c, 0x6a, 0x0b, 0x74, 0xfb, + 0xe9, 0xfe, 0x9e, 0xea, 0x95, 0xb2, 0x46, 0xab, + 0x0a, 0xfc, 0xfd, 0xf3, 0xbb, 0x04, 0x2b, 0x76, + 0x1b, 0xa4, 0x74, 0xb0, 0xc1, 0x78, 0xc3, 0x69, + 0xe2, 0xb0, 0x01, 0xe1, 0xde, 0x32, 0x4c, 0x8d, + 0x1a, 0xb3, 0x38, 0x08, 0xd5, 0xfc, 0x1f, 0xdc, + 0x0e, 0x2c, 0x9c, 0xb1, 0xa1, 0x63, 0x17, 0x22, + 0xf5, 0x6c, 0x93, 0x70, 0x74, 0x00, 0xf8, 0x39, + 0x01, 0x94, 0xd1, 0x32, 0x23, 0x56, 0x5d, 0xa6, + 0x02, 0x76, 0x76, 0x93, 0xce, 0x2f, 0x19, 0xe9, + 0x17, 0x52, 0xae, 0x6e, 0x2c, 0x6d, 0x61, 0x7f, + 0x3b, 0xaa, 0xe0, 0x52, 0x85, 0xc5, 0x65, 0xc1, + 0xbb, 0x8e, 0x5b, 0x21, 0xd5, 0xc9, 0x78, 0x83, + 0x07, 0x97, 0x4c, 0x62, 0x61, 0x41, 0xd4, 0xfc, + 0xc9, 0x39, 0xe3, 0x9b, 0xd0, 0xcc, 0x75, 0xc4, + 0x97, 0xe6, 0xdd, 0x2a, 0x5f, 0xa6, 0xe8, 0x59, + 0x6c, 0x98, 0xb9, 0x02, 0xe2, 0xa2, 0xd6, 0x68, + 0xee, 0x3b, 0x1d, 0xe3, 0x4d, 0x5b, 0x30, 0xef, + 0x03, 0xf2, 0xeb, 0x18, 0x57, 0x36, 0xe8, 0xa1, + 0xf4, 0x47, 0xfb, 0xcb, 0x8f, 0xcb, 0xc8, 0xf3, + 0x4f, 0x74, 0x9d, 0x9d, 0xb1, 0x8d, 0x14, 0x44, + 0xd9, 0x19, 0xb4, 0x54, 0x4f, 0x75, 0x19, 0x09, + 0xa0, 0x75, 0xbc, 0x3b, 0x82, 0xc6, 0x3f, 0xb8, + 0x83, 0x19, 0x6e, 0xd6, 0x37, 0xfe, 0x6e, 0x8a, + 0x4e, 0xe0, 0x4a, 0xab, 0x7b, 0xc8, 0xb4, 0x1d, + 0xf4, 0xed, 0x27, 0x03, 0x65, 0xa2, 0xa1, 0xae, + 0x11, 0xe7, 0x98, 0x78, 0x48, 0x91, 0xd2, 0xd2, + 0xd4, 0x23, 0x78, 0x50, 0xb1, 0x5b, 0x85, 0x10, + 0x8d, 0xca, 0x5f, 0x0f, 0x71, 0xae, 0x72, 0x9a, + 0xf6, 0x25, 0x19, 0x60, 0x06, 0xf7, 0x10, 0x34, + 0x18, 0x0d, 0xc9, 0x9f, 0x7b, 0x0c, 0x9b, 0x8f, + 0x91, 0x1b, 0x9f, 0xcd, 0x10, 0xee, 0x75, 0xf9, + 0x97, 0x66, 0xfc, 0x4d, 0x33, 0x6e, 0x28, 0x2b, + 0x92, 0x85, 0x4f, 0xab, 0x43, 0x8d, 0x8f, 0x7d, + 0x86, 0xa7, 0xc7, 0xd8, 0xd3, 0x0b, 0x8b, 0x57, + 0xb6, 0x1d, 0x95, 0x0d, 0xe9, 0xbc, 0xd9, 0x03, + 0xd9, 0x10, 0x19, 0xc3, 0x46, 0x63, 0x55, 0x87, + 0x61, 0x79, 0x6c, 0x95, 0x0e, 0x9c, 0xdd, 0xca, + 0xc3, 0xf3, 0x64, 0xf0, 0x7d, 0x76, 0xb7, 0x53, + 0x67, 0x2b, 0x1e, 0x44, 0x56, 0x81, 0xea, 0x8f, + 0x5c, 0x42, 0x16, 0xb8, 0x28, 0xeb, 0x1b, 0x61, + 0x10, 0x1e, 0xbf, 0xec, 0xa8 +}; +static const u8 enc_output011[] __initconst = { + 0x6a, 0xfc, 0x4b, 0x25, 0xdf, 0xc0, 0xe4, 0xe8, + 0x17, 0x4d, 0x4c, 0xc9, 0x7e, 0xde, 0x3a, 0xcc, + 0x3c, 0xba, 0x6a, 0x77, 0x47, 0xdb, 0xe3, 0x74, + 0x7a, 0x4d, 0x5f, 0x8d, 0x37, 0x55, 0x80, 0x73, + 0x90, 0x66, 0x5d, 0x3a, 0x7d, 0x5d, 0x86, 0x5e, + 0x8d, 0xfd, 0x83, 0xff, 0x4e, 0x74, 0x6f, 0xf9, + 0xe6, 0x70, 0x17, 0x70, 0x3e, 0x96, 0xa7, 0x7e, + 0xcb, 0xab, 0x8f, 0x58, 0x24, 0x9b, 0x01, 0xfd, + 0xcb, 0xe6, 0x4d, 0x9b, 0xf0, 0x88, 0x94, 0x57, + 0x66, 0xef, 0x72, 0x4c, 0x42, 0x6e, 0x16, 0x19, + 0x15, 0xea, 0x70, 0x5b, 0xac, 0x13, 0xdb, 0x9f, + 0x18, 0xe2, 0x3c, 0x26, 0x97, 0xbc, 0xdc, 0x45, + 0x8c, 0x6c, 0x24, 0x69, 0x9c, 0xf7, 0x65, 0x1e, + 0x18, 0x59, 0x31, 0x7c, 0xe4, 0x73, 0xbc, 0x39, + 0x62, 0xc6, 0x5c, 0x9f, 0xbf, 0xfa, 0x90, 0x03, + 0xc9, 0x72, 0x26, 0xb6, 0x1b, 0xc2, 0xb7, 0x3f, + 0xf2, 0x13, 0x77, 0xf2, 0x8d, 0xb9, 0x47, 0xd0, + 0x53, 0xdd, 0xc8, 0x91, 0x83, 0x8b, 0xb1, 0xce, + 0xa3, 0xfe, 0xcd, 0xd9, 0xdd, 0x92, 0x7b, 0xdb, + 0xb8, 0xfb, 0xc9, 0x2d, 0x01, 0x59, 0x39, 0x52, + 0xad, 0x1b, 0xec, 0xcf, 0xd7, 0x70, 0x13, 0x21, + 0xf5, 0x47, 0xaa, 0x18, 0x21, 0x5c, 0xc9, 0x9a, + 0xd2, 0x6b, 0x05, 0x9c, 0x01, 0xa1, 0xda, 0x35, + 0x5d, 0xb3, 0x70, 0xe6, 0xa9, 0x80, 0x8b, 0x91, + 0xb7, 0xb3, 0x5f, 0x24, 0x9a, 0xb7, 0xd1, 0x6b, + 0xa1, 0x1c, 0x50, 0xba, 0x49, 0xe0, 0xee, 0x2e, + 0x75, 0xac, 0x69, 0xc0, 0xeb, 0x03, 0xdd, 0x19, + 0xe5, 0xf6, 0x06, 0xdd, 0xc3, 0xd7, 0x2b, 0x07, + 0x07, 0x30, 0xa7, 0x19, 0x0c, 0xbf, 0xe6, 0x18, + 0xcc, 0xb1, 0x01, 0x11, 0x85, 0x77, 0x1d, 0x96, + 0xa7, 0xa3, 0x00, 0x84, 0x02, 0xa2, 0x83, 0x68, + 0xda, 0x17, 0x27, 0xc8, 0x7f, 0x23, 0xb7, 0xf4, + 0x13, 0x85, 0xcf, 0xdd, 0x7a, 0x7d, 0x24, 0x57, + 0xfe, 0x05, 0x93, 0xf5, 0x74, 0xce, 0xed, 0x0c, + 0x20, 0x98, 0x8d, 0x92, 0x30, 0xa1, 0x29, 0x23, + 0x1a, 0xa0, 0x4f, 0x69, 0x56, 0x4c, 0xe1, 0xc8, + 0xce, 0xf6, 0x9a, 0x0c, 0xa4, 0xfa, 0x04, 0xf6, + 0x62, 0x95, 0xf2, 0xfa, 0xc7, 0x40, 0x68, 0x40, + 0x8f, 0x41, 0xda, 0xb4, 0x26, 0x6f, 0x70, 0xab, + 0x40, 0x61, 0xa4, 0x0e, 0x75, 0xfb, 0x86, 0xeb, + 0x9d, 0x9a, 0x1f, 0xec, 0x76, 0x99, 0xe7, 0xea, + 0xaa, 0x1e, 0x2d, 0xb5, 0xd4, 0xa6, 0x1a, 0xb8, + 0x61, 0x0a, 0x1d, 0x16, 0x5b, 0x98, 0xc2, 0x31, + 0x40, 0xe7, 0x23, 0x1d, 0x66, 0x99, 0xc8, 0xc0, + 0xd7, 0xce, 0xf3, 0x57, 0x40, 0x04, 0x3f, 0xfc, + 0xea, 0xb3, 0xfc, 0xd2, 0xd3, 0x99, 0xa4, 0x94, + 0x69, 0xa0, 0xef, 0xd1, 0x85, 0xb3, 0xa6, 0xb1, + 0x28, 0xbf, 0x94, 0x67, 0x22, 0xc3, 0x36, 0x46, + 0xf8, 0xd2, 0x0f, 0x5f, 0xf4, 0x59, 0x80, 0xe6, + 0x2d, 0x43, 0x08, 0x7d, 0x19, 0x09, 0x97, 0xa7, + 0x4c, 0x3d, 0x8d, 0xba, 0x65, 0x62, 0xa3, 0x71, + 0x33, 0x29, 0x62, 0xdb, 0xc1, 0x33, 0x34, 0x1a, + 0x63, 0x33, 0x16, 0xb6, 0x64, 0x7e, 0xab, 0x33, + 0xf0, 0xe6, 0x26, 0x68, 0xba, 0x1d, 0x2e, 0x38, + 0x08, 0xe6, 0x02, 0xd3, 0x25, 0x2c, 0x47, 0x23, + 0x58, 0x34, 0x0f, 0x9d, 0x63, 0x4f, 0x63, 0xbb, + 0x7f, 0x3b, 0x34, 0x38, 0xa7, 0xb5, 0x8d, 0x65, + 0xd9, 0x9f, 0x79, 0x55, 0x3e, 0x4d, 0xe7, 0x73, + 0xd8, 0xf6, 0x98, 0x97, 0x84, 0x60, 0x9c, 0xc8, + 0xa9, 0x3c, 0xf6, 0xdc, 0x12, 0x5c, 0xe1, 0xbb, + 0x0b, 0x8b, 0x98, 0x9c, 0x9d, 0x26, 0x7c, 0x4a, + 0xe6, 0x46, 0x36, 0x58, 0x21, 0x4a, 0xee, 0xca, + 0xd7, 0x3b, 0xc2, 0x6c, 0x49, 0x2f, 0xe5, 0xd5, + 0x03, 0x59, 0x84, 0x53, 0xcb, 0xfe, 0x92, 0x71, + 0x2e, 0x7c, 0x21, 0xcc, 0x99, 0x85, 0x7f, 0xb8, + 0x74, 0x90, 0x13, 0x42, 0x3f, 0xe0, 0x6b, 0x1d, + 0xf2, 0x4d, 0x54, 0xd4, 0xfc, 0x3a, 0x05, 0xe6, + 0x74, 0xaf, 0xa6, 0xa0, 0x2a, 0x20, 0x23, 0x5d, + 0x34, 0x5c, 0xd9, 0x3e, 0x4e, 0xfa, 0x93, 0xe7, + 0xaa, 0xe9, 0x6f, 0x08, 0x43, 0x67, 0x41, 0xc5, + 0xad, 0xfb, 0x31, 0x95, 0x82, 0x73, 0x32, 0xd8, + 0xa6, 0xa3, 0xed, 0x0e, 0x2d, 0xf6, 0x5f, 0xfd, + 0x80, 0xa6, 0x7a, 0xe0, 0xdf, 0x78, 0x15, 0x29, + 0x74, 0x33, 0xd0, 0x9e, 0x83, 0x86, 0x72, 0x22, + 0x57, 0x29, 0xb9, 0x9e, 0x5d, 0xd3, 0x1a, 0xb5, + 0x96, 0x72, 0x41, 0x3d, 0xf1, 0x64, 0x43, 0x67, + 0xee, 0xaa, 0x5c, 0xd3, 0x9a, 0x96, 0x13, 0x11, + 0x5d, 0xf3, 0x0c, 0x87, 0x82, 0x1e, 0x41, 0x9e, + 0xd0, 0x27, 0xd7, 0x54, 0x3b, 0x67, 0x73, 0x09, + 0x91, 0xe9, 0xd5, 0x36, 0xa7, 0xb5, 0x55, 0xe4, + 0xf3, 0x21, 0x51, 0x49, 0x22, 0x07, 0x55, 0x4f, + 0x44, 0x4b, 0xd2, 0x15, 0x93, 0x17, 0x2a, 0xfa, + 0x4d, 0x4a, 0x57, 0xdb, 0x4c, 0xa6, 0xeb, 0xec, + 0x53, 0x25, 0x6c, 0x21, 0xed, 0x00, 0x4c, 0x3b, + 0xca, 0x14, 0x57, 0xa9, 0xd6, 0x6a, 0xcd, 0x8d, + 0x5e, 0x74, 0xac, 0x72, 0xc1, 0x97, 0xe5, 0x1b, + 0x45, 0x4e, 0xda, 0xfc, 0xcc, 0x40, 0xe8, 0x48, + 0x88, 0x0b, 0xa3, 0xe3, 0x8d, 0x83, 0x42, 0xc3, + 0x23, 0xfd, 0x68, 0xb5, 0x8e, 0xf1, 0x9d, 0x63, + 0x77, 0xe9, 0xa3, 0x8e, 0x8c, 0x26, 0x6b, 0xbd, + 0x72, 0x73, 0x35, 0x0c, 0x03, 0xf8, 0x43, 0x78, + 0x52, 0x71, 0x15, 0x1f, 0x71, 0x5d, 0x6e, 0xed, + 0xb9, 0xcc, 0x86, 0x30, 0xdb, 0x2b, 0xd3, 0x82, + 0x88, 0x23, 0x71, 0x90, 0x53, 0x5c, 0xa9, 0x2f, + 0x76, 0x01, 0xb7, 0x9a, 0xfe, 0x43, 0x55, 0xa3, + 0x04, 0x9b, 0x0e, 0xe4, 0x59, 0xdf, 0xc9, 0xe9, + 0xb1, 0xea, 0x29, 0x28, 0x3c, 0x5c, 0xae, 0x72, + 0x84, 0xb6, 0xc6, 0xeb, 0x0c, 0x27, 0x07, 0x74, + 0x90, 0x0d, 0x31, 0xb0, 0x00, 0x77, 0xe9, 0x40, + 0x70, 0x6f, 0x68, 0xa7, 0xfd, 0x06, 0xec, 0x4b, + 0xc0, 0xb7, 0xac, 0xbc, 0x33, 0xb7, 0x6d, 0x0a, + 0xbd, 0x12, 0x1b, 0x59, 0xcb, 0xdd, 0x32, 0xf5, + 0x1d, 0x94, 0x57, 0x76, 0x9e, 0x0c, 0x18, 0x98, + 0x71, 0xd7, 0x2a, 0xdb, 0x0b, 0x7b, 0xa7, 0x71, + 0xb7, 0x67, 0x81, 0x23, 0x96, 0xae, 0xb9, 0x7e, + 0x32, 0x43, 0x92, 0x8a, 0x19, 0xa0, 0xc4, 0xd4, + 0x3b, 0x57, 0xf9, 0x4a, 0x2c, 0xfb, 0x51, 0x46, + 0xbb, 0xcb, 0x5d, 0xb3, 0xef, 0x13, 0x93, 0x6e, + 0x68, 0x42, 0x54, 0x57, 0xd3, 0x6a, 0x3a, 0x8f, + 0x9d, 0x66, 0xbf, 0xbd, 0x36, 0x23, 0xf5, 0x93, + 0x83, 0x7b, 0x9c, 0xc0, 0xdd, 0xc5, 0x49, 0xc0, + 0x64, 0xed, 0x07, 0x12, 0xb3, 0xe6, 0xe4, 0xe5, + 0x38, 0x95, 0x23, 0xb1, 0xa0, 0x3b, 0x1a, 0x61, + 0xda, 0x17, 0xac, 0xc3, 0x58, 0xdd, 0x74, 0x64, + 0x22, 0x11, 0xe8, 0x32, 0x1d, 0x16, 0x93, 0x85, + 0x99, 0xa5, 0x9c, 0x34, 0x55, 0xb1, 0xe9, 0x20, + 0x72, 0xc9, 0x28, 0x7b, 0x79, 0x00, 0xa1, 0xa6, + 0xa3, 0x27, 0x40, 0x18, 0x8a, 0x54, 0xe0, 0xcc, + 0xe8, 0x4e, 0x8e, 0x43, 0x96, 0xe7, 0x3f, 0xc8, + 0xe9, 0xb2, 0xf9, 0xc9, 0xda, 0x04, 0x71, 0x50, + 0x47, 0xe4, 0xaa, 0xce, 0xa2, 0x30, 0xc8, 0xe4, + 0xac, 0xc7, 0x0d, 0x06, 0x2e, 0xe6, 0xe8, 0x80, + 0x36, 0x29, 0x9e, 0x01, 0xb8, 0xc3, 0xf0, 0xa0, + 0x5d, 0x7a, 0xca, 0x4d, 0xa0, 0x57, 0xbd, 0x2a, + 0x45, 0xa7, 0x7f, 0x9c, 0x93, 0x07, 0x8f, 0x35, + 0x67, 0x92, 0xe3, 0xe9, 0x7f, 0xa8, 0x61, 0x43, + 0x9e, 0x25, 0x4f, 0x33, 0x76, 0x13, 0x6e, 0x12, + 0xb9, 0xdd, 0xa4, 0x7c, 0x08, 0x9f, 0x7c, 0xe7, + 0x0a, 0x8d, 0x84, 0x06, 0xa4, 0x33, 0x17, 0x34, + 0x5e, 0x10, 0x7c, 0xc0, 0xa8, 0x3d, 0x1f, 0x42, + 0x20, 0x51, 0x65, 0x5d, 0x09, 0xc3, 0xaa, 0xc0, + 0xc8, 0x0d, 0xf0, 0x79, 0xbc, 0x20, 0x1b, 0x95, + 0xe7, 0x06, 0x7d, 0x47, 0x20, 0x03, 0x1a, 0x74, + 0xdd, 0xe2, 0xd4, 0xae, 0x38, 0x71, 0x9b, 0xf5, + 0x80, 0xec, 0x08, 0x4e, 0x56, 0xba, 0x76, 0x12, + 0x1a, 0xdf, 0x48, 0xf3, 0xae, 0xb3, 0xe6, 0xe6, + 0xbe, 0xc0, 0x91, 0x2e, 0x01, 0xb3, 0x01, 0x86, + 0xa2, 0xb9, 0x52, 0xd1, 0x21, 0xae, 0xd4, 0x97, + 0x1d, 0xef, 0x41, 0x12, 0x95, 0x3d, 0x48, 0x45, + 0x1c, 0x56, 0x32, 0x8f, 0xb8, 0x43, 0xbb, 0x19, + 0xf3, 0xca, 0xe9, 0xeb, 0x6d, 0x84, 0xbe, 0x86, + 0x06, 0xe2, 0x36, 0xb2, 0x62, 0x9d, 0xd3, 0x4c, + 0x48, 0x18, 0x54, 0x13, 0x4e, 0xcf, 0xfd, 0xba, + 0x84, 0xb9, 0x30, 0x53, 0xcf, 0xfb, 0xb9, 0x29, + 0x8f, 0xdc, 0x9f, 0xef, 0x60, 0x0b, 0x64, 0xf6, + 0x8b, 0xee, 0xa6, 0x91, 0xc2, 0x41, 0x6c, 0xf6, + 0xfa, 0x79, 0x67, 0x4b, 0xc1, 0x3f, 0xaf, 0x09, + 0x81, 0xd4, 0x5d, 0xcb, 0x09, 0xdf, 0x36, 0x31, + 0xc0, 0x14, 0x3c, 0x7c, 0x0e, 0x65, 0x95, 0x99, + 0x6d, 0xa3, 0xf4, 0xd7, 0x38, 0xee, 0x1a, 0x2b, + 0x37, 0xe2, 0xa4, 0x3b, 0x4b, 0xd0, 0x65, 0xca, + 0xf8, 0xc3, 0xe8, 0x15, 0x20, 0xef, 0xf2, 0x00, + 0xfd, 0x01, 0x09, 0xc5, 0xc8, 0x17, 0x04, 0x93, + 0xd0, 0x93, 0x03, 0x55, 0xc5, 0xfe, 0x32, 0xa3, + 0x3e, 0x28, 0x2d, 0x3b, 0x93, 0x8a, 0xcc, 0x07, + 0x72, 0x80, 0x8b, 0x74, 0x16, 0x24, 0xbb, 0xda, + 0x94, 0x39, 0x30, 0x8f, 0xb1, 0xcd, 0x4a, 0x90, + 0x92, 0x7c, 0x14, 0x8f, 0x95, 0x4e, 0xac, 0x9b, + 0xd8, 0x8f, 0x1a, 0x87, 0xa4, 0x32, 0x27, 0x8a, + 0xba, 0xf7, 0x41, 0xcf, 0x84, 0x37, 0x19, 0xe6, + 0x06, 0xf5, 0x0e, 0xcf, 0x36, 0xf5, 0x9e, 0x6c, + 0xde, 0xbc, 0xff, 0x64, 0x7e, 0x4e, 0x59, 0x57, + 0x48, 0xfe, 0x14, 0xf7, 0x9c, 0x93, 0x5d, 0x15, + 0xad, 0xcc, 0x11, 0xb1, 0x17, 0x18, 0xb2, 0x7e, + 0xcc, 0xab, 0xe9, 0xce, 0x7d, 0x77, 0x5b, 0x51, + 0x1b, 0x1e, 0x20, 0xa8, 0x32, 0x06, 0x0e, 0x75, + 0x93, 0xac, 0xdb, 0x35, 0x37, 0x1f, 0xe9, 0x19, + 0x1d, 0xb4, 0x71, 0x97, 0xd6, 0x4e, 0x2c, 0x08, + 0xa5, 0x13, 0xf9, 0x0e, 0x7e, 0x78, 0x6e, 0x14, + 0xe0, 0xa9, 0xb9, 0x96, 0x4c, 0x80, 0x82, 0xba, + 0x17, 0xb3, 0x9d, 0x69, 0xb0, 0x84, 0x46, 0xff, + 0xf9, 0x52, 0x79, 0x94, 0x58, 0x3a, 0x62, 0x90, + 0x15, 0x35, 0x71, 0x10, 0x37, 0xed, 0xa1, 0x8e, + 0x53, 0x6e, 0xf4, 0x26, 0x57, 0x93, 0x15, 0x93, + 0xf6, 0x81, 0x2c, 0x5a, 0x10, 0xda, 0x92, 0xad, + 0x2f, 0xdb, 0x28, 0x31, 0x2d, 0x55, 0x04, 0xd2, + 0x06, 0x28, 0x8c, 0x1e, 0xdc, 0xea, 0x54, 0xac, + 0xff, 0xb7, 0x6c, 0x30, 0x15, 0xd4, 0xb4, 0x0d, + 0x00, 0x93, 0x57, 0xdd, 0xd2, 0x07, 0x07, 0x06, + 0xd9, 0x43, 0x9b, 0xcd, 0x3a, 0xf4, 0x7d, 0x4c, + 0x36, 0x5d, 0x23, 0xa2, 0xcc, 0x57, 0x40, 0x91, + 0xe9, 0x2c, 0x2f, 0x2c, 0xd5, 0x30, 0x9b, 0x17, + 0xb0, 0xc9, 0xf7, 0xa7, 0x2f, 0xd1, 0x93, 0x20, + 0x6b, 0xc6, 0xc1, 0xe4, 0x6f, 0xcb, 0xd1, 0xe7, + 0x09, 0x0f, 0x9e, 0xdc, 0xaa, 0x9f, 0x2f, 0xdf, + 0x56, 0x9f, 0xd4, 0x33, 0x04, 0xaf, 0xd3, 0x6c, + 0x58, 0x61, 0xf0, 0x30, 0xec, 0xf2, 0x7f, 0xf2, + 0x9c, 0xdf, 0x39, 0xbb, 0x6f, 0xa2, 0x8c, 0x7e, + 0xc4, 0x22, 0x51, 0x71, 0xc0, 0x4d, 0x14, 0x1a, + 0xc4, 0xcd, 0x04, 0xd9, 0x87, 0x08, 0x50, 0x05, + 0xcc, 0xaf, 0xf6, 0xf0, 0x8f, 0x92, 0x54, 0x58, + 0xc2, 0xc7, 0x09, 0x7a, 0x59, 0x02, 0x05, 0xe8, + 0xb0, 0x86, 0xd9, 0xbf, 0x7b, 0x35, 0x51, 0x4d, + 0xaf, 0x08, 0x97, 0x2c, 0x65, 0xda, 0x2a, 0x71, + 0x3a, 0xa8, 0x51, 0xcc, 0xf2, 0x73, 0x27, 0xc3, + 0xfd, 0x62, 0xcf, 0xe3, 0xb2, 0xca, 0xcb, 0xbe, + 0x1a, 0x0a, 0xa1, 0x34, 0x7b, 0x77, 0xc4, 0x62, + 0x68, 0x78, 0x5f, 0x94, 0x07, 0x04, 0x65, 0x16, + 0x4b, 0x61, 0xcb, 0xff, 0x75, 0x26, 0x50, 0x66, + 0x1f, 0x6e, 0x93, 0xf8, 0xc5, 0x51, 0xeb, 0xa4, + 0x4a, 0x48, 0x68, 0x6b, 0xe2, 0x5e, 0x44, 0xb2, + 0x50, 0x2c, 0x6c, 0xae, 0x79, 0x4e, 0x66, 0x35, + 0x81, 0x50, 0xac, 0xbc, 0x3f, 0xb1, 0x0c, 0xf3, + 0x05, 0x3c, 0x4a, 0xa3, 0x6c, 0x2a, 0x79, 0xb4, + 0xb7, 0xab, 0xca, 0xc7, 0x9b, 0x8e, 0xcd, 0x5f, + 0x11, 0x03, 0xcb, 0x30, 0xa3, 0xab, 0xda, 0xfe, + 0x64, 0xb9, 0xbb, 0xd8, 0x5e, 0x3a, 0x1a, 0x56, + 0xe5, 0x05, 0x48, 0x90, 0x1e, 0x61, 0x69, 0x1b, + 0x22, 0xe6, 0x1a, 0x3c, 0x75, 0xad, 0x1f, 0x37, + 0x28, 0xdc, 0xe4, 0x6d, 0xbd, 0x42, 0xdc, 0xd3, + 0xc8, 0xb6, 0x1c, 0x48, 0xfe, 0x94, 0x77, 0x7f, + 0xbd, 0x62, 0xac, 0xa3, 0x47, 0x27, 0xcf, 0x5f, + 0xd9, 0xdb, 0xaf, 0xec, 0xf7, 0x5e, 0xc1, 0xb0, + 0x9d, 0x01, 0x26, 0x99, 0x7e, 0x8f, 0x03, 0x70, + 0xb5, 0x42, 0xbe, 0x67, 0x28, 0x1b, 0x7c, 0xbd, + 0x61, 0x21, 0x97, 0xcc, 0x5c, 0xe1, 0x97, 0x8f, + 0x8d, 0xde, 0x2b, 0xaa, 0xa7, 0x71, 0x1d, 0x1e, + 0x02, 0x73, 0x70, 0x58, 0x32, 0x5b, 0x1d, 0x67, + 0x3d, 0xe0, 0x74, 0x4f, 0x03, 0xf2, 0x70, 0x51, + 0x79, 0xf1, 0x61, 0x70, 0x15, 0x74, 0x9d, 0x23, + 0x89, 0xde, 0xac, 0xfd, 0xde, 0xd0, 0x1f, 0xc3, + 0x87, 0x44, 0x35, 0x4b, 0xe5, 0xb0, 0x60, 0xc5, + 0x22, 0xe4, 0x9e, 0xca, 0xeb, 0xd5, 0x3a, 0x09, + 0x45, 0xa4, 0xdb, 0xfa, 0x3f, 0xeb, 0x1b, 0xc7, + 0xc8, 0x14, 0x99, 0x51, 0x92, 0x10, 0xed, 0xed, + 0x28, 0xe0, 0xa1, 0xf8, 0x26, 0xcf, 0xcd, 0xcb, + 0x63, 0xa1, 0x3b, 0xe3, 0xdf, 0x7e, 0xfe, 0xa6, + 0xf0, 0x81, 0x9a, 0xbf, 0x55, 0xde, 0x54, 0xd5, + 0x56, 0x60, 0x98, 0x10, 0x68, 0xf4, 0x38, 0x96, + 0x8e, 0x6f, 0x1d, 0x44, 0x7f, 0xd6, 0x2f, 0xfe, + 0x55, 0xfb, 0x0c, 0x7e, 0x67, 0xe2, 0x61, 0x44, + 0xed, 0xf2, 0x35, 0x30, 0x5d, 0xe9, 0xc7, 0xd6, + 0x6d, 0xe0, 0xa0, 0xed, 0xf3, 0xfc, 0xd8, 0x3e, + 0x0a, 0x7b, 0xcd, 0xaf, 0x65, 0x68, 0x18, 0xc0, + 0xec, 0x04, 0x1c, 0x74, 0x6d, 0xe2, 0x6e, 0x79, + 0xd4, 0x11, 0x2b, 0x62, 0xd5, 0x27, 0xad, 0x4f, + 0x01, 0x59, 0x73, 0xcc, 0x6a, 0x53, 0xfb, 0x2d, + 0xd5, 0x4e, 0x99, 0x21, 0x65, 0x4d, 0xf5, 0x82, + 0xf7, 0xd8, 0x42, 0xce, 0x6f, 0x3d, 0x36, 0x47, + 0xf1, 0x05, 0x16, 0xe8, 0x1b, 0x6a, 0x8f, 0x93, + 0xf2, 0x8f, 0x37, 0x40, 0x12, 0x28, 0xa3, 0xe6, + 0xb9, 0x17, 0x4a, 0x1f, 0xb1, 0xd1, 0x66, 0x69, + 0x86, 0xc4, 0xfc, 0x97, 0xae, 0x3f, 0x8f, 0x1e, + 0x2b, 0xdf, 0xcd, 0xf9, 0x3c +}; +static const u8 enc_assoc011[] __initconst = { + 0xd6, 0x31, 0xda, 0x5d, 0x42, 0x5e, 0xd7 +}; +static const u8 enc_nonce011[] __initconst = { + 0xfd, 0x87, 0xd4, 0xd8, 0x62, 0xfd, 0xec, 0xaa +}; +static const u8 enc_key011[] __initconst = { + 0x35, 0x4e, 0xb5, 0x70, 0x50, 0x42, 0x8a, 0x85, + 0xf2, 0xfb, 0xed, 0x7b, 0xd0, 0x9e, 0x97, 0xca, + 0xfa, 0x98, 0x66, 0x63, 0xee, 0x37, 0xcc, 0x52, + 0xfe, 0xd1, 0xdf, 0x95, 0x15, 0x34, 0x29, 0x38 +}; + +static const u8 enc_input012[] __initconst = { + 0x74, 0xa6, 0x3e, 0xe4, 0xb1, 0xcb, 0xaf, 0xb0, + 0x40, 0xe5, 0x0f, 0x9e, 0xf1, 0xf2, 0x89, 0xb5, + 0x42, 0x34, 0x8a, 0xa1, 0x03, 0xb7, 0xe9, 0x57, + 0x46, 0xbe, 0x20, 0xe4, 0x6e, 0xb0, 0xeb, 0xff, + 0xea, 0x07, 0x7e, 0xef, 0xe2, 0x55, 0x9f, 0xe5, + 0x78, 0x3a, 0xb7, 0x83, 0xc2, 0x18, 0x40, 0x7b, + 0xeb, 0xcd, 0x81, 0xfb, 0x90, 0x12, 0x9e, 0x46, + 0xa9, 0xd6, 0x4a, 0xba, 0xb0, 0x62, 0xdb, 0x6b, + 0x99, 0xc4, 0xdb, 0x54, 0x4b, 0xb8, 0xa5, 0x71, + 0xcb, 0xcd, 0x63, 0x32, 0x55, 0xfb, 0x31, 0xf0, + 0x38, 0xf5, 0xbe, 0x78, 0xe4, 0x45, 0xce, 0x1b, + 0x6a, 0x5b, 0x0e, 0xf4, 0x16, 0xe4, 0xb1, 0x3d, + 0xf6, 0x63, 0x7b, 0xa7, 0x0c, 0xde, 0x6f, 0x8f, + 0x74, 0xdf, 0xe0, 0x1e, 0x9d, 0xce, 0x8f, 0x24, + 0xef, 0x23, 0x35, 0x33, 0x7b, 0x83, 0x34, 0x23, + 0x58, 0x74, 0x14, 0x77, 0x1f, 0xc2, 0x4f, 0x4e, + 0xc6, 0x89, 0xf9, 0x52, 0x09, 0x37, 0x64, 0x14, + 0xc4, 0x01, 0x6b, 0x9d, 0x77, 0xe8, 0x90, 0x5d, + 0xa8, 0x4a, 0x2a, 0xef, 0x5c, 0x7f, 0xeb, 0xbb, + 0xb2, 0xc6, 0x93, 0x99, 0x66, 0xdc, 0x7f, 0xd4, + 0x9e, 0x2a, 0xca, 0x8d, 0xdb, 0xe7, 0x20, 0xcf, + 0xe4, 0x73, 0xae, 0x49, 0x7d, 0x64, 0x0f, 0x0e, + 0x28, 0x46, 0xa9, 0xa8, 0x32, 0xe4, 0x0e, 0xf6, + 0x51, 0x53, 0xb8, 0x3c, 0xb1, 0xff, 0xa3, 0x33, + 0x41, 0x75, 0xff, 0xf1, 0x6f, 0xf1, 0xfb, 0xbb, + 0x83, 0x7f, 0x06, 0x9b, 0xe7, 0x1b, 0x0a, 0xe0, + 0x5c, 0x33, 0x60, 0x5b, 0xdb, 0x5b, 0xed, 0xfe, + 0xa5, 0x16, 0x19, 0x72, 0xa3, 0x64, 0x23, 0x00, + 0x02, 0xc7, 0xf3, 0x6a, 0x81, 0x3e, 0x44, 0x1d, + 0x79, 0x15, 0x5f, 0x9a, 0xde, 0xe2, 0xfd, 0x1b, + 0x73, 0xc1, 0xbc, 0x23, 0xba, 0x31, 0xd2, 0x50, + 0xd5, 0xad, 0x7f, 0x74, 0xa7, 0xc9, 0xf8, 0x3e, + 0x2b, 0x26, 0x10, 0xf6, 0x03, 0x36, 0x74, 0xe4, + 0x0e, 0x6a, 0x72, 0xb7, 0x73, 0x0a, 0x42, 0x28, + 0xc2, 0xad, 0x5e, 0x03, 0xbe, 0xb8, 0x0b, 0xa8, + 0x5b, 0xd4, 0xb8, 0xba, 0x52, 0x89, 0xb1, 0x9b, + 0xc1, 0xc3, 0x65, 0x87, 0xed, 0xa5, 0xf4, 0x86, + 0xfd, 0x41, 0x80, 0x91, 0x27, 0x59, 0x53, 0x67, + 0x15, 0x78, 0x54, 0x8b, 0x2d, 0x3d, 0xc7, 0xff, + 0x02, 0x92, 0x07, 0x5f, 0x7a, 0x4b, 0x60, 0x59, + 0x3c, 0x6f, 0x5c, 0xd8, 0xec, 0x95, 0xd2, 0xfe, + 0xa0, 0x3b, 0xd8, 0x3f, 0xd1, 0x69, 0xa6, 0xd6, + 0x41, 0xb2, 0xf4, 0x4d, 0x12, 0xf4, 0x58, 0x3e, + 0x66, 0x64, 0x80, 0x31, 0x9b, 0xa8, 0x4c, 0x8b, + 0x07, 0xb2, 0xec, 0x66, 0x94, 0x66, 0x47, 0x50, + 0x50, 0x5f, 0x18, 0x0b, 0x0e, 0xd6, 0xc0, 0x39, + 0x21, 0x13, 0x9e, 0x33, 0xbc, 0x79, 0x36, 0x02, + 0x96, 0x70, 0xf0, 0x48, 0x67, 0x2f, 0x26, 0xe9, + 0x6d, 0x10, 0xbb, 0xd6, 0x3f, 0xd1, 0x64, 0x7a, + 0x2e, 0xbe, 0x0c, 0x61, 0xf0, 0x75, 0x42, 0x38, + 0x23, 0xb1, 0x9e, 0x9f, 0x7c, 0x67, 0x66, 0xd9, + 0x58, 0x9a, 0xf1, 0xbb, 0x41, 0x2a, 0x8d, 0x65, + 0x84, 0x94, 0xfc, 0xdc, 0x6a, 0x50, 0x64, 0xdb, + 0x56, 0x33, 0x76, 0x00, 0x10, 0xed, 0xbe, 0xd2, + 0x12, 0xf6, 0xf6, 0x1b, 0xa2, 0x16, 0xde, 0xae, + 0x31, 0x95, 0xdd, 0xb1, 0x08, 0x7e, 0x4e, 0xee, + 0xe7, 0xf9, 0xa5, 0xfb, 0x5b, 0x61, 0x43, 0x00, + 0x40, 0xf6, 0x7e, 0x02, 0x04, 0x32, 0x4e, 0x0c, + 0xe2, 0x66, 0x0d, 0xd7, 0x07, 0x98, 0x0e, 0xf8, + 0x72, 0x34, 0x6d, 0x95, 0x86, 0xd7, 0xcb, 0x31, + 0x54, 0x47, 0xd0, 0x38, 0x29, 0x9c, 0x5a, 0x68, + 0xd4, 0x87, 0x76, 0xc9, 0xe7, 0x7e, 0xe3, 0xf4, + 0x81, 0x6d, 0x18, 0xcb, 0xc9, 0x05, 0xaf, 0xa0, + 0xfb, 0x66, 0xf7, 0xf1, 0x1c, 0xc6, 0x14, 0x11, + 0x4f, 0x2b, 0x79, 0x42, 0x8b, 0xbc, 0xac, 0xe7, + 0x6c, 0xfe, 0x0f, 0x58, 0xe7, 0x7c, 0x78, 0x39, + 0x30, 0xb0, 0x66, 0x2c, 0x9b, 0x6d, 0x3a, 0xe1, + 0xcf, 0xc9, 0xa4, 0x0e, 0x6d, 0x6d, 0x8a, 0xa1, + 0x3a, 0xe7, 0x28, 0xd4, 0x78, 0x4c, 0xa6, 0xa2, + 0x2a, 0xa6, 0x03, 0x30, 0xd7, 0xa8, 0x25, 0x66, + 0x87, 0x2f, 0x69, 0x5c, 0x4e, 0xdd, 0xa5, 0x49, + 0x5d, 0x37, 0x4a, 0x59, 0xc4, 0xaf, 0x1f, 0xa2, + 0xe4, 0xf8, 0xa6, 0x12, 0x97, 0xd5, 0x79, 0xf5, + 0xe2, 0x4a, 0x2b, 0x5f, 0x61, 0xe4, 0x9e, 0xe3, + 0xee, 0xb8, 0xa7, 0x5b, 0x2f, 0xf4, 0x9e, 0x6c, + 0xfb, 0xd1, 0xc6, 0x56, 0x77, 0xba, 0x75, 0xaa, + 0x3d, 0x1a, 0xa8, 0x0b, 0xb3, 0x68, 0x24, 0x00, + 0x10, 0x7f, 0xfd, 0xd7, 0xa1, 0x8d, 0x83, 0x54, + 0x4f, 0x1f, 0xd8, 0x2a, 0xbe, 0x8a, 0x0c, 0x87, + 0xab, 0xa2, 0xde, 0xc3, 0x39, 0xbf, 0x09, 0x03, + 0xa5, 0xf3, 0x05, 0x28, 0xe1, 0xe1, 0xee, 0x39, + 0x70, 0x9c, 0xd8, 0x81, 0x12, 0x1e, 0x02, 0x40, + 0xd2, 0x6e, 0xf0, 0xeb, 0x1b, 0x3d, 0x22, 0xc6, + 0xe5, 0xe3, 0xb4, 0x5a, 0x98, 0xbb, 0xf0, 0x22, + 0x28, 0x8d, 0xe5, 0xd3, 0x16, 0x48, 0x24, 0xa5, + 0xe6, 0x66, 0x0c, 0xf9, 0x08, 0xf9, 0x7e, 0x1e, + 0xe1, 0x28, 0x26, 0x22, 0xc7, 0xc7, 0x0a, 0x32, + 0x47, 0xfa, 0xa3, 0xbe, 0x3c, 0xc4, 0xc5, 0x53, + 0x0a, 0xd5, 0x94, 0x4a, 0xd7, 0x93, 0xd8, 0x42, + 0x99, 0xb9, 0x0a, 0xdb, 0x56, 0xf7, 0xb9, 0x1c, + 0x53, 0x4f, 0xfa, 0xd3, 0x74, 0xad, 0xd9, 0x68, + 0xf1, 0x1b, 0xdf, 0x61, 0xc6, 0x5e, 0xa8, 0x48, + 0xfc, 0xd4, 0x4a, 0x4c, 0x3c, 0x32, 0xf7, 0x1c, + 0x96, 0x21, 0x9b, 0xf9, 0xa3, 0xcc, 0x5a, 0xce, + 0xd5, 0xd7, 0x08, 0x24, 0xf6, 0x1c, 0xfd, 0xdd, + 0x38, 0xc2, 0x32, 0xe9, 0xb8, 0xe7, 0xb6, 0xfa, + 0x9d, 0x45, 0x13, 0x2c, 0x83, 0xfd, 0x4a, 0x69, + 0x82, 0xcd, 0xdc, 0xb3, 0x76, 0x0c, 0x9e, 0xd8, + 0xf4, 0x1b, 0x45, 0x15, 0xb4, 0x97, 0xe7, 0x58, + 0x34, 0xe2, 0x03, 0x29, 0x5a, 0xbf, 0xb6, 0xe0, + 0x5d, 0x13, 0xd9, 0x2b, 0xb4, 0x80, 0xb2, 0x45, + 0x81, 0x6a, 0x2e, 0x6c, 0x89, 0x7d, 0xee, 0xbb, + 0x52, 0xdd, 0x1f, 0x18, 0xe7, 0x13, 0x6b, 0x33, + 0x0e, 0xea, 0x36, 0x92, 0x77, 0x7b, 0x6d, 0x9c, + 0x5a, 0x5f, 0x45, 0x7b, 0x7b, 0x35, 0x62, 0x23, + 0xd1, 0xbf, 0x0f, 0xd0, 0x08, 0x1b, 0x2b, 0x80, + 0x6b, 0x7e, 0xf1, 0x21, 0x47, 0xb0, 0x57, 0xd1, + 0x98, 0x72, 0x90, 0x34, 0x1c, 0x20, 0x04, 0xff, + 0x3d, 0x5c, 0xee, 0x0e, 0x57, 0x5f, 0x6f, 0x24, + 0x4e, 0x3c, 0xea, 0xfc, 0xa5, 0xa9, 0x83, 0xc9, + 0x61, 0xb4, 0x51, 0x24, 0xf8, 0x27, 0x5e, 0x46, + 0x8c, 0xb1, 0x53, 0x02, 0x96, 0x35, 0xba, 0xb8, + 0x4c, 0x71, 0xd3, 0x15, 0x59, 0x35, 0x22, 0x20, + 0xad, 0x03, 0x9f, 0x66, 0x44, 0x3b, 0x9c, 0x35, + 0x37, 0x1f, 0x9b, 0xbb, 0xf3, 0xdb, 0x35, 0x63, + 0x30, 0x64, 0xaa, 0xa2, 0x06, 0xa8, 0x5d, 0xbb, + 0xe1, 0x9f, 0x70, 0xec, 0x82, 0x11, 0x06, 0x36, + 0xec, 0x8b, 0x69, 0x66, 0x24, 0x44, 0xc9, 0x4a, + 0x57, 0xbb, 0x9b, 0x78, 0x13, 0xce, 0x9c, 0x0c, + 0xba, 0x92, 0x93, 0x63, 0xb8, 0xe2, 0x95, 0x0f, + 0x0f, 0x16, 0x39, 0x52, 0xfd, 0x3a, 0x6d, 0x02, + 0x4b, 0xdf, 0x13, 0xd3, 0x2a, 0x22, 0xb4, 0x03, + 0x7c, 0x54, 0x49, 0x96, 0x68, 0x54, 0x10, 0xfa, + 0xef, 0xaa, 0x6c, 0xe8, 0x22, 0xdc, 0x71, 0x16, + 0x13, 0x1a, 0xf6, 0x28, 0xe5, 0x6d, 0x77, 0x3d, + 0xcd, 0x30, 0x63, 0xb1, 0x70, 0x52, 0xa1, 0xc5, + 0x94, 0x5f, 0xcf, 0xe8, 0xb8, 0x26, 0x98, 0xf7, + 0x06, 0xa0, 0x0a, 0x70, 0xfa, 0x03, 0x80, 0xac, + 0xc1, 0xec, 0xd6, 0x4c, 0x54, 0xd7, 0xfe, 0x47, + 0xb6, 0x88, 0x4a, 0xf7, 0x71, 0x24, 0xee, 0xf3, + 0xd2, 0xc2, 0x4a, 0x7f, 0xfe, 0x61, 0xc7, 0x35, + 0xc9, 0x37, 0x67, 0xcb, 0x24, 0x35, 0xda, 0x7e, + 0xca, 0x5f, 0xf3, 0x8d, 0xd4, 0x13, 0x8e, 0xd6, + 0xcb, 0x4d, 0x53, 0x8f, 0x53, 0x1f, 0xc0, 0x74, + 0xf7, 0x53, 0xb9, 0x5e, 0x23, 0x37, 0xba, 0x6e, + 0xe3, 0x9d, 0x07, 0x55, 0x25, 0x7b, 0xe6, 0x2a, + 0x64, 0xd1, 0x32, 0xdd, 0x54, 0x1b, 0x4b, 0xc0, + 0xe1, 0xd7, 0x69, 0x58, 0xf8, 0x93, 0x29, 0xc4, + 0xdd, 0x23, 0x2f, 0xa5, 0xfc, 0x9d, 0x7e, 0xf8, + 0xd4, 0x90, 0xcd, 0x82, 0x55, 0xdc, 0x16, 0x16, + 0x9f, 0x07, 0x52, 0x9b, 0x9d, 0x25, 0xed, 0x32, + 0xc5, 0x7b, 0xdf, 0xf6, 0x83, 0x46, 0x3d, 0x65, + 0xb7, 0xef, 0x87, 0x7a, 0x12, 0x69, 0x8f, 0x06, + 0x7c, 0x51, 0x15, 0x4a, 0x08, 0xe8, 0xac, 0x9a, + 0x0c, 0x24, 0xa7, 0x27, 0xd8, 0x46, 0x2f, 0xe7, + 0x01, 0x0e, 0x1c, 0xc6, 0x91, 0xb0, 0x6e, 0x85, + 0x65, 0xf0, 0x29, 0x0d, 0x2e, 0x6b, 0x3b, 0xfb, + 0x4b, 0xdf, 0xe4, 0x80, 0x93, 0x03, 0x66, 0x46, + 0x3e, 0x8a, 0x6e, 0xf3, 0x5e, 0x4d, 0x62, 0x0e, + 0x49, 0x05, 0xaf, 0xd4, 0xf8, 0x21, 0x20, 0x61, + 0x1d, 0x39, 0x17, 0xf4, 0x61, 0x47, 0x95, 0xfb, + 0x15, 0x2e, 0xb3, 0x4f, 0xd0, 0x5d, 0xf5, 0x7d, + 0x40, 0xda, 0x90, 0x3c, 0x6b, 0xcb, 0x17, 0x00, + 0x13, 0x3b, 0x64, 0x34, 0x1b, 0xf0, 0xf2, 0xe5, + 0x3b, 0xb2, 0xc7, 0xd3, 0x5f, 0x3a, 0x44, 0xa6, + 0x9b, 0xb7, 0x78, 0x0e, 0x42, 0x5d, 0x4c, 0xc1, + 0xe9, 0xd2, 0xcb, 0xb7, 0x78, 0xd1, 0xfe, 0x9a, + 0xb5, 0x07, 0xe9, 0xe0, 0xbe, 0xe2, 0x8a, 0xa7, + 0x01, 0x83, 0x00, 0x8c, 0x5c, 0x08, 0xe6, 0x63, + 0x12, 0x92, 0xb7, 0xb7, 0xa6, 0x19, 0x7d, 0x38, + 0x13, 0x38, 0x92, 0x87, 0x24, 0xf9, 0x48, 0xb3, + 0x5e, 0x87, 0x6a, 0x40, 0x39, 0x5c, 0x3f, 0xed, + 0x8f, 0xee, 0xdb, 0x15, 0x82, 0x06, 0xda, 0x49, + 0x21, 0x2b, 0xb5, 0xbf, 0x32, 0x7c, 0x9f, 0x42, + 0x28, 0x63, 0xcf, 0xaf, 0x1e, 0xf8, 0xc6, 0xa0, + 0xd1, 0x02, 0x43, 0x57, 0x62, 0xec, 0x9b, 0x0f, + 0x01, 0x9e, 0x71, 0xd8, 0x87, 0x9d, 0x01, 0xc1, + 0x58, 0x77, 0xd9, 0xaf, 0xb1, 0x10, 0x7e, 0xdd, + 0xa6, 0x50, 0x96, 0xe5, 0xf0, 0x72, 0x00, 0x6d, + 0x4b, 0xf8, 0x2a, 0x8f, 0x19, 0xf3, 0x22, 0x88, + 0x11, 0x4a, 0x8b, 0x7c, 0xfd, 0xb7, 0xed, 0xe1, + 0xf6, 0x40, 0x39, 0xe0, 0xe9, 0xf6, 0x3d, 0x25, + 0xe6, 0x74, 0x3c, 0x58, 0x57, 0x7f, 0xe1, 0x22, + 0x96, 0x47, 0x31, 0x91, 0xba, 0x70, 0x85, 0x28, + 0x6b, 0x9f, 0x6e, 0x25, 0xac, 0x23, 0x66, 0x2f, + 0x29, 0x88, 0x28, 0xce, 0x8c, 0x5c, 0x88, 0x53, + 0xd1, 0x3b, 0xcc, 0x6a, 0x51, 0xb2, 0xe1, 0x28, + 0x3f, 0x91, 0xb4, 0x0d, 0x00, 0x3a, 0xe3, 0xf8, + 0xc3, 0x8f, 0xd7, 0x96, 0x62, 0x0e, 0x2e, 0xfc, + 0xc8, 0x6c, 0x77, 0xa6, 0x1d, 0x22, 0xc1, 0xb8, + 0xe6, 0x61, 0xd7, 0x67, 0x36, 0x13, 0x7b, 0xbb, + 0x9b, 0x59, 0x09, 0xa6, 0xdf, 0xf7, 0x6b, 0xa3, + 0x40, 0x1a, 0xf5, 0x4f, 0xb4, 0xda, 0xd3, 0xf3, + 0x81, 0x93, 0xc6, 0x18, 0xd9, 0x26, 0xee, 0xac, + 0xf0, 0xaa, 0xdf, 0xc5, 0x9c, 0xca, 0xc2, 0xa2, + 0xcc, 0x7b, 0x5c, 0x24, 0xb0, 0xbc, 0xd0, 0x6a, + 0x4d, 0x89, 0x09, 0xb8, 0x07, 0xfe, 0x87, 0xad, + 0x0a, 0xea, 0xb8, 0x42, 0xf9, 0x5e, 0xb3, 0x3e, + 0x36, 0x4c, 0xaf, 0x75, 0x9e, 0x1c, 0xeb, 0xbd, + 0xbc, 0xbb, 0x80, 0x40, 0xa7, 0x3a, 0x30, 0xbf, + 0xa8, 0x44, 0xf4, 0xeb, 0x38, 0xad, 0x29, 0xba, + 0x23, 0xed, 0x41, 0x0c, 0xea, 0xd2, 0xbb, 0x41, + 0x18, 0xd6, 0xb9, 0xba, 0x65, 0x2b, 0xa3, 0x91, + 0x6d, 0x1f, 0xa9, 0xf4, 0xd1, 0x25, 0x8d, 0x4d, + 0x38, 0xff, 0x64, 0xa0, 0xec, 0xde, 0xa6, 0xb6, + 0x79, 0xab, 0x8e, 0x33, 0x6c, 0x47, 0xde, 0xaf, + 0x94, 0xa4, 0xa5, 0x86, 0x77, 0x55, 0x09, 0x92, + 0x81, 0x31, 0x76, 0xc7, 0x34, 0x22, 0x89, 0x8e, + 0x3d, 0x26, 0x26, 0xd7, 0xfc, 0x1e, 0x16, 0x72, + 0x13, 0x33, 0x63, 0xd5, 0x22, 0xbe, 0xb8, 0x04, + 0x34, 0x84, 0x41, 0xbb, 0x80, 0xd0, 0x9f, 0x46, + 0x48, 0x07, 0xa7, 0xfc, 0x2b, 0x3a, 0x75, 0x55, + 0x8c, 0xc7, 0x6a, 0xbd, 0x7e, 0x46, 0x08, 0x84, + 0x0f, 0xd5, 0x74, 0xc0, 0x82, 0x8e, 0xaa, 0x61, + 0x05, 0x01, 0xb2, 0x47, 0x6e, 0x20, 0x6a, 0x2d, + 0x58, 0x70, 0x48, 0x32, 0xa7, 0x37, 0xd2, 0xb8, + 0x82, 0x1a, 0x51, 0xb9, 0x61, 0xdd, 0xfd, 0x9d, + 0x6b, 0x0e, 0x18, 0x97, 0xf8, 0x45, 0x5f, 0x87, + 0x10, 0xcf, 0x34, 0x72, 0x45, 0x26, 0x49, 0x70, + 0xe7, 0xa3, 0x78, 0xe0, 0x52, 0x89, 0x84, 0x94, + 0x83, 0x82, 0xc2, 0x69, 0x8f, 0xe3, 0xe1, 0x3f, + 0x60, 0x74, 0x88, 0xc4, 0xf7, 0x75, 0x2c, 0xfb, + 0xbd, 0xb6, 0xc4, 0x7e, 0x10, 0x0a, 0x6c, 0x90, + 0x04, 0x9e, 0xc3, 0x3f, 0x59, 0x7c, 0xce, 0x31, + 0x18, 0x60, 0x57, 0x73, 0x46, 0x94, 0x7d, 0x06, + 0xa0, 0x6d, 0x44, 0xec, 0xa2, 0x0a, 0x9e, 0x05, + 0x15, 0xef, 0xca, 0x5c, 0xbf, 0x00, 0xeb, 0xf7, + 0x3d, 0x32, 0xd4, 0xa5, 0xef, 0x49, 0x89, 0x5e, + 0x46, 0xb0, 0xa6, 0x63, 0x5b, 0x8a, 0x73, 0xae, + 0x6f, 0xd5, 0x9d, 0xf8, 0x4f, 0x40, 0xb5, 0xb2, + 0x6e, 0xd3, 0xb6, 0x01, 0xa9, 0x26, 0xa2, 0x21, + 0xcf, 0x33, 0x7a, 0x3a, 0xa4, 0x23, 0x13, 0xb0, + 0x69, 0x6a, 0xee, 0xce, 0xd8, 0x9d, 0x01, 0x1d, + 0x50, 0xc1, 0x30, 0x6c, 0xb1, 0xcd, 0xa0, 0xf0, + 0xf0, 0xa2, 0x64, 0x6f, 0xbb, 0xbf, 0x5e, 0xe6, + 0xab, 0x87, 0xb4, 0x0f, 0x4f, 0x15, 0xaf, 0xb5, + 0x25, 0xa1, 0xb2, 0xd0, 0x80, 0x2c, 0xfb, 0xf9, + 0xfe, 0xd2, 0x33, 0xbb, 0x76, 0xfe, 0x7c, 0xa8, + 0x66, 0xf7, 0xe7, 0x85, 0x9f, 0x1f, 0x85, 0x57, + 0x88, 0xe1, 0xe9, 0x63, 0xe4, 0xd8, 0x1c, 0xa1, + 0xfb, 0xda, 0x44, 0x05, 0x2e, 0x1d, 0x3a, 0x1c, + 0xff, 0xc8, 0x3b, 0xc0, 0xfe, 0xda, 0x22, 0x0b, + 0x43, 0xd6, 0x88, 0x39, 0x4c, 0x4a, 0xa6, 0x69, + 0x18, 0x93, 0x42, 0x4e, 0xb5, 0xcc, 0x66, 0x0d, + 0x09, 0xf8, 0x1e, 0x7c, 0xd3, 0x3c, 0x99, 0x0d, + 0x50, 0x1d, 0x62, 0xe9, 0x57, 0x06, 0xbf, 0x19, + 0x88, 0xdd, 0xad, 0x7b, 0x4f, 0xf9, 0xc7, 0x82, + 0x6d, 0x8d, 0xc8, 0xc4, 0xc5, 0x78, 0x17, 0x20, + 0x15, 0xc5, 0x52, 0x41, 0xcf, 0x5b, 0xd6, 0x7f, + 0x94, 0x02, 0x41, 0xe0, 0x40, 0x22, 0x03, 0x5e, + 0xd1, 0x53, 0xd4, 0x86, 0xd3, 0x2c, 0x9f, 0x0f, + 0x96, 0xe3, 0x6b, 0x9a, 0x76, 0x32, 0x06, 0x47, + 0x4b, 0x11, 0xb3, 0xdd, 0x03, 0x65, 0xbd, 0x9b, + 0x01, 0xda, 0x9c, 0xb9, 0x7e, 0x3f, 0x6a, 0xc4, + 0x7b, 0xea, 0xd4, 0x3c, 0xb9, 0xfb, 0x5c, 0x6b, + 0x64, 0x33, 0x52, 0xba, 0x64, 0x78, 0x8f, 0xa4, + 0xaf, 0x7a, 0x61, 0x8d, 0xbc, 0xc5, 0x73, 0xe9, + 0x6b, 0x58, 0x97, 0x4b, 0xbf, 0x63, 0x22, 0xd3, + 0x37, 0x02, 0x54, 0xc5, 0xb9, 0x16, 0x4a, 0xf0, + 0x19, 0xd8, 0x94, 0x57, 0xb8, 0x8a, 0xb3, 0x16, + 0x3b, 0xd0, 0x84, 0x8e, 0x67, 0xa6, 0xa3, 0x7d, + 0x78, 0xec, 0x00 +}; +static const u8 enc_output012[] __initconst = { + 0x52, 0x34, 0xb3, 0x65, 0x3b, 0xb7, 0xe5, 0xd3, + 0xab, 0x49, 0x17, 0x60, 0xd2, 0x52, 0x56, 0xdf, + 0xdf, 0x34, 0x56, 0x82, 0xe2, 0xbe, 0xe5, 0xe1, + 0x28, 0xd1, 0x4e, 0x5f, 0x4f, 0x01, 0x7d, 0x3f, + 0x99, 0x6b, 0x30, 0x6e, 0x1a, 0x7c, 0x4c, 0x8e, + 0x62, 0x81, 0xae, 0x86, 0x3f, 0x6b, 0xd0, 0xb5, + 0xa9, 0xcf, 0x50, 0xf1, 0x02, 0x12, 0xa0, 0x0b, + 0x24, 0xe9, 0xe6, 0x72, 0x89, 0x2c, 0x52, 0x1b, + 0x34, 0x38, 0xf8, 0x75, 0x5f, 0xa0, 0x74, 0xe2, + 0x99, 0xdd, 0xa6, 0x4b, 0x14, 0x50, 0x4e, 0xf1, + 0xbe, 0xd6, 0x9e, 0xdb, 0xb2, 0x24, 0x27, 0x74, + 0x12, 0x4a, 0x78, 0x78, 0x17, 0xa5, 0x58, 0x8e, + 0x2f, 0xf9, 0xf4, 0x8d, 0xee, 0x03, 0x88, 0xae, + 0xb8, 0x29, 0xa1, 0x2f, 0x4b, 0xee, 0x92, 0xbd, + 0x87, 0xb3, 0xce, 0x34, 0x21, 0x57, 0x46, 0x04, + 0x49, 0x0c, 0x80, 0xf2, 0x01, 0x13, 0xa1, 0x55, + 0xb3, 0xff, 0x44, 0x30, 0x3c, 0x1c, 0xd0, 0xef, + 0xbc, 0x18, 0x74, 0x26, 0xad, 0x41, 0x5b, 0x5b, + 0x3e, 0x9a, 0x7a, 0x46, 0x4f, 0x16, 0xd6, 0x74, + 0x5a, 0xb7, 0x3a, 0x28, 0x31, 0xd8, 0xae, 0x26, + 0xac, 0x50, 0x53, 0x86, 0xf2, 0x56, 0xd7, 0x3f, + 0x29, 0xbc, 0x45, 0x68, 0x8e, 0xcb, 0x98, 0x64, + 0xdd, 0xc9, 0xba, 0xb8, 0x4b, 0x7b, 0x82, 0xdd, + 0x14, 0xa7, 0xcb, 0x71, 0x72, 0x00, 0x5c, 0xad, + 0x7b, 0x6a, 0x89, 0xa4, 0x3d, 0xbf, 0xb5, 0x4b, + 0x3e, 0x7c, 0x5a, 0xcf, 0xb8, 0xa1, 0xc5, 0x6e, + 0xc8, 0xb6, 0x31, 0x57, 0x7b, 0xdf, 0xa5, 0x7e, + 0xb1, 0xd6, 0x42, 0x2a, 0x31, 0x36, 0xd1, 0xd0, + 0x3f, 0x7a, 0xe5, 0x94, 0xd6, 0x36, 0xa0, 0x6f, + 0xb7, 0x40, 0x7d, 0x37, 0xc6, 0x55, 0x7c, 0x50, + 0x40, 0x6d, 0x29, 0x89, 0xe3, 0x5a, 0xae, 0x97, + 0xe7, 0x44, 0x49, 0x6e, 0xbd, 0x81, 0x3d, 0x03, + 0x93, 0x06, 0x12, 0x06, 0xe2, 0x41, 0x12, 0x4a, + 0xf1, 0x6a, 0xa4, 0x58, 0xa2, 0xfb, 0xd2, 0x15, + 0xba, 0xc9, 0x79, 0xc9, 0xce, 0x5e, 0x13, 0xbb, + 0xf1, 0x09, 0x04, 0xcc, 0xfd, 0xe8, 0x51, 0x34, + 0x6a, 0xe8, 0x61, 0x88, 0xda, 0xed, 0x01, 0x47, + 0x84, 0xf5, 0x73, 0x25, 0xf9, 0x1c, 0x42, 0x86, + 0x07, 0xf3, 0x5b, 0x1a, 0x01, 0xb3, 0xeb, 0x24, + 0x32, 0x8d, 0xf6, 0xed, 0x7c, 0x4b, 0xeb, 0x3c, + 0x36, 0x42, 0x28, 0xdf, 0xdf, 0xb6, 0xbe, 0xd9, + 0x8c, 0x52, 0xd3, 0x2b, 0x08, 0x90, 0x8c, 0xe7, + 0x98, 0x31, 0xe2, 0x32, 0x8e, 0xfc, 0x11, 0x48, + 0x00, 0xa8, 0x6a, 0x42, 0x4a, 0x02, 0xc6, 0x4b, + 0x09, 0xf1, 0xe3, 0x49, 0xf3, 0x45, 0x1f, 0x0e, + 0xbc, 0x56, 0xe2, 0xe4, 0xdf, 0xfb, 0xeb, 0x61, + 0xfa, 0x24, 0xc1, 0x63, 0x75, 0xbb, 0x47, 0x75, + 0xaf, 0xe1, 0x53, 0x16, 0x96, 0x21, 0x85, 0x26, + 0x11, 0xb3, 0x76, 0xe3, 0x23, 0xa1, 0x6b, 0x74, + 0x37, 0xd0, 0xde, 0x06, 0x90, 0x71, 0x5d, 0x43, + 0x88, 0x9b, 0x00, 0x54, 0xa6, 0x75, 0x2f, 0xa1, + 0xc2, 0x0b, 0x73, 0x20, 0x1d, 0xb6, 0x21, 0x79, + 0x57, 0x3f, 0xfa, 0x09, 0xbe, 0x8a, 0x33, 0xc3, + 0x52, 0xf0, 0x1d, 0x82, 0x31, 0xd1, 0x55, 0xb5, + 0x6c, 0x99, 0x25, 0xcf, 0x5c, 0x32, 0xce, 0xe9, + 0x0d, 0xfa, 0x69, 0x2c, 0xd5, 0x0d, 0xc5, 0x6d, + 0x86, 0xd0, 0x0c, 0x3b, 0x06, 0x50, 0x79, 0xe8, + 0xc3, 0xae, 0x04, 0xe6, 0xcd, 0x51, 0xe4, 0x26, + 0x9b, 0x4f, 0x7e, 0xa6, 0x0f, 0xab, 0xd8, 0xe5, + 0xde, 0xa9, 0x00, 0x95, 0xbe, 0xa3, 0x9d, 0x5d, + 0xb2, 0x09, 0x70, 0x18, 0x1c, 0xf0, 0xac, 0x29, + 0x23, 0x02, 0x29, 0x28, 0xd2, 0x74, 0x35, 0x57, + 0x62, 0x0f, 0x24, 0xea, 0x5e, 0x33, 0xc2, 0x92, + 0xf3, 0x78, 0x4d, 0x30, 0x1e, 0xa1, 0x99, 0xa9, + 0x82, 0xb0, 0x42, 0x31, 0x8d, 0xad, 0x8a, 0xbc, + 0xfc, 0xd4, 0x57, 0x47, 0x3e, 0xb4, 0x50, 0xdd, + 0x6e, 0x2c, 0x80, 0x4d, 0x22, 0xf1, 0xfb, 0x57, + 0xc4, 0xdd, 0x17, 0xe1, 0x8a, 0x36, 0x4a, 0xb3, + 0x37, 0xca, 0xc9, 0x4e, 0xab, 0xd5, 0x69, 0xc4, + 0xf4, 0xbc, 0x0b, 0x3b, 0x44, 0x4b, 0x29, 0x9c, + 0xee, 0xd4, 0x35, 0x22, 0x21, 0xb0, 0x1f, 0x27, + 0x64, 0xa8, 0x51, 0x1b, 0xf0, 0x9f, 0x19, 0x5c, + 0xfb, 0x5a, 0x64, 0x74, 0x70, 0x45, 0x09, 0xf5, + 0x64, 0xfe, 0x1a, 0x2d, 0xc9, 0x14, 0x04, 0x14, + 0xcf, 0xd5, 0x7d, 0x60, 0xaf, 0x94, 0x39, 0x94, + 0xe2, 0x7d, 0x79, 0x82, 0xd0, 0x65, 0x3b, 0x6b, + 0x9c, 0x19, 0x84, 0xb4, 0x6d, 0xb3, 0x0c, 0x99, + 0xc0, 0x56, 0xa8, 0xbd, 0x73, 0xce, 0x05, 0x84, + 0x3e, 0x30, 0xaa, 0xc4, 0x9b, 0x1b, 0x04, 0x2a, + 0x9f, 0xd7, 0x43, 0x2b, 0x23, 0xdf, 0xbf, 0xaa, + 0xd5, 0xc2, 0x43, 0x2d, 0x70, 0xab, 0xdc, 0x75, + 0xad, 0xac, 0xf7, 0xc0, 0xbe, 0x67, 0xb2, 0x74, + 0xed, 0x67, 0x10, 0x4a, 0x92, 0x60, 0xc1, 0x40, + 0x50, 0x19, 0x8a, 0x8a, 0x8c, 0x09, 0x0e, 0x72, + 0xe1, 0x73, 0x5e, 0xe8, 0x41, 0x85, 0x63, 0x9f, + 0x3f, 0xd7, 0x7d, 0xc4, 0xfb, 0x22, 0x5d, 0x92, + 0x6c, 0xb3, 0x1e, 0xe2, 0x50, 0x2f, 0x82, 0xa8, + 0x28, 0xc0, 0xb5, 0xd7, 0x5f, 0x68, 0x0d, 0x2c, + 0x2d, 0xaf, 0x7e, 0xfa, 0x2e, 0x08, 0x0f, 0x1f, + 0x70, 0x9f, 0xe9, 0x19, 0x72, 0x55, 0xf8, 0xfb, + 0x51, 0xd2, 0x33, 0x5d, 0xa0, 0xd3, 0x2b, 0x0a, + 0x6c, 0xbc, 0x4e, 0xcf, 0x36, 0x4d, 0xdc, 0x3b, + 0xe9, 0x3e, 0x81, 0x7c, 0x61, 0xdb, 0x20, 0x2d, + 0x3a, 0xc3, 0xb3, 0x0c, 0x1e, 0x00, 0xb9, 0x7c, + 0xf5, 0xca, 0x10, 0x5f, 0x3a, 0x71, 0xb3, 0xe4, + 0x20, 0xdb, 0x0c, 0x2a, 0x98, 0x63, 0x45, 0x00, + 0x58, 0xf6, 0x68, 0xe4, 0x0b, 0xda, 0x13, 0x3b, + 0x60, 0x5c, 0x76, 0xdb, 0xb9, 0x97, 0x71, 0xe4, + 0xd9, 0xb7, 0xdb, 0xbd, 0x68, 0xc7, 0x84, 0x84, + 0xaa, 0x7c, 0x68, 0x62, 0x5e, 0x16, 0xfc, 0xba, + 0x72, 0xaa, 0x9a, 0xa9, 0xeb, 0x7c, 0x75, 0x47, + 0x97, 0x7e, 0xad, 0xe2, 0xd9, 0x91, 0xe8, 0xe4, + 0xa5, 0x31, 0xd7, 0x01, 0x8e, 0xa2, 0x11, 0x88, + 0x95, 0xb9, 0xf2, 0x9b, 0xd3, 0x7f, 0x1b, 0x81, + 0x22, 0xf7, 0x98, 0x60, 0x0a, 0x64, 0xa6, 0xc1, + 0xf6, 0x49, 0xc7, 0xe3, 0x07, 0x4d, 0x94, 0x7a, + 0xcf, 0x6e, 0x68, 0x0c, 0x1b, 0x3f, 0x6e, 0x2e, + 0xee, 0x92, 0xfa, 0x52, 0xb3, 0x59, 0xf8, 0xf1, + 0x8f, 0x6a, 0x66, 0xa3, 0x82, 0x76, 0x4a, 0x07, + 0x1a, 0xc7, 0xdd, 0xf5, 0xda, 0x9c, 0x3c, 0x24, + 0xbf, 0xfd, 0x42, 0xa1, 0x10, 0x64, 0x6a, 0x0f, + 0x89, 0xee, 0x36, 0xa5, 0xce, 0x99, 0x48, 0x6a, + 0xf0, 0x9f, 0x9e, 0x69, 0xa4, 0x40, 0x20, 0xe9, + 0x16, 0x15, 0xf7, 0xdb, 0x75, 0x02, 0xcb, 0xe9, + 0x73, 0x8b, 0x3b, 0x49, 0x2f, 0xf0, 0xaf, 0x51, + 0x06, 0x5c, 0xdf, 0x27, 0x27, 0x49, 0x6a, 0xd1, + 0xcc, 0xc7, 0xb5, 0x63, 0xb5, 0xfc, 0xb8, 0x5c, + 0x87, 0x7f, 0x84, 0xb4, 0xcc, 0x14, 0xa9, 0x53, + 0xda, 0xa4, 0x56, 0xf8, 0xb6, 0x1b, 0xcc, 0x40, + 0x27, 0x52, 0x06, 0x5a, 0x13, 0x81, 0xd7, 0x3a, + 0xd4, 0x3b, 0xfb, 0x49, 0x65, 0x31, 0x33, 0xb2, + 0xfa, 0xcd, 0xad, 0x58, 0x4e, 0x2b, 0xae, 0xd2, + 0x20, 0xfb, 0x1a, 0x48, 0xb4, 0x3f, 0x9a, 0xd8, + 0x7a, 0x35, 0x4a, 0xc8, 0xee, 0x88, 0x5e, 0x07, + 0x66, 0x54, 0xb9, 0xec, 0x9f, 0xa3, 0xe3, 0xb9, + 0x37, 0xaa, 0x49, 0x76, 0x31, 0xda, 0x74, 0x2d, + 0x3c, 0xa4, 0x65, 0x10, 0x32, 0x38, 0xf0, 0xde, + 0xd3, 0x99, 0x17, 0xaa, 0x71, 0xaa, 0x8f, 0x0f, + 0x8c, 0xaf, 0xa2, 0xf8, 0x5d, 0x64, 0xba, 0x1d, + 0xa3, 0xef, 0x96, 0x73, 0xe8, 0xa1, 0x02, 0x8d, + 0x0c, 0x6d, 0xb8, 0x06, 0x90, 0xb8, 0x08, 0x56, + 0x2c, 0xa7, 0x06, 0xc9, 0xc2, 0x38, 0xdb, 0x7c, + 0x63, 0xb1, 0x57, 0x8e, 0xea, 0x7c, 0x79, 0xf3, + 0x49, 0x1d, 0xfe, 0x9f, 0xf3, 0x6e, 0xb1, 0x1d, + 0xba, 0x19, 0x80, 0x1a, 0x0a, 0xd3, 0xb0, 0x26, + 0x21, 0x40, 0xb1, 0x7c, 0xf9, 0x4d, 0x8d, 0x10, + 0xc1, 0x7e, 0xf4, 0xf6, 0x3c, 0xa8, 0xfd, 0x7c, + 0xa3, 0x92, 0xb2, 0x0f, 0xaa, 0xcc, 0xa6, 0x11, + 0xfe, 0x04, 0xe3, 0xd1, 0x7a, 0x32, 0x89, 0xdf, + 0x0d, 0xc4, 0x8f, 0x79, 0x6b, 0xca, 0x16, 0x7c, + 0x6e, 0xf9, 0xad, 0x0f, 0xf6, 0xfe, 0x27, 0xdb, + 0xc4, 0x13, 0x70, 0xf1, 0x62, 0x1a, 0x4f, 0x79, + 0x40, 0xc9, 0x9b, 0x8b, 0x21, 0xea, 0x84, 0xfa, + 0xf5, 0xf1, 0x89, 0xce, 0xb7, 0x55, 0x0a, 0x80, + 0x39, 0x2f, 0x55, 0x36, 0x16, 0x9c, 0x7b, 0x08, + 0xbd, 0x87, 0x0d, 0xa5, 0x32, 0xf1, 0x52, 0x7c, + 0xe8, 0x55, 0x60, 0x5b, 0xd7, 0x69, 0xe4, 0xfc, + 0xfa, 0x12, 0x85, 0x96, 0xea, 0x50, 0x28, 0xab, + 0x8a, 0xf7, 0xbb, 0x0e, 0x53, 0x74, 0xca, 0xa6, + 0x27, 0x09, 0xc2, 0xb5, 0xde, 0x18, 0x14, 0xd9, + 0xea, 0xe5, 0x29, 0x1c, 0x40, 0x56, 0xcf, 0xd7, + 0xae, 0x05, 0x3f, 0x65, 0xaf, 0x05, 0x73, 0xe2, + 0x35, 0x96, 0x27, 0x07, 0x14, 0xc0, 0xad, 0x33, + 0xf1, 0xdc, 0x44, 0x7a, 0x89, 0x17, 0x77, 0xd2, + 0x9c, 0x58, 0x60, 0xf0, 0x3f, 0x7b, 0x2d, 0x2e, + 0x57, 0x95, 0x54, 0x87, 0xed, 0xf2, 0xc7, 0x4c, + 0xf0, 0xae, 0x56, 0x29, 0x19, 0x7d, 0x66, 0x4b, + 0x9b, 0x83, 0x84, 0x42, 0x3b, 0x01, 0x25, 0x66, + 0x8e, 0x02, 0xde, 0xb9, 0x83, 0x54, 0x19, 0xf6, + 0x9f, 0x79, 0x0d, 0x67, 0xc5, 0x1d, 0x7a, 0x44, + 0x02, 0x98, 0xa7, 0x16, 0x1c, 0x29, 0x0d, 0x74, + 0xff, 0x85, 0x40, 0x06, 0xef, 0x2c, 0xa9, 0xc6, + 0xf5, 0x53, 0x07, 0x06, 0xae, 0xe4, 0xfa, 0x5f, + 0xd8, 0x39, 0x4d, 0xf1, 0x9b, 0x6b, 0xd9, 0x24, + 0x84, 0xfe, 0x03, 0x4c, 0xb2, 0x3f, 0xdf, 0xa1, + 0x05, 0x9e, 0x50, 0x14, 0x5a, 0xd9, 0x1a, 0xa2, + 0xa7, 0xfa, 0xfa, 0x17, 0xf7, 0x78, 0xd6, 0xb5, + 0x92, 0x61, 0x91, 0xac, 0x36, 0xfa, 0x56, 0x0d, + 0x38, 0x32, 0x18, 0x85, 0x08, 0x58, 0x37, 0xf0, + 0x4b, 0xdb, 0x59, 0xe7, 0xa4, 0x34, 0xc0, 0x1b, + 0x01, 0xaf, 0x2d, 0xde, 0xa1, 0xaa, 0x5d, 0xd3, + 0xec, 0xe1, 0xd4, 0xf7, 0xe6, 0x54, 0x68, 0xf0, + 0x51, 0x97, 0xa7, 0x89, 0xea, 0x24, 0xad, 0xd3, + 0x6e, 0x47, 0x93, 0x8b, 0x4b, 0xb4, 0xf7, 0x1c, + 0x42, 0x06, 0x67, 0xe8, 0x99, 0xf6, 0xf5, 0x7b, + 0x85, 0xb5, 0x65, 0xb5, 0xb5, 0xd2, 0x37, 0xf5, + 0xf3, 0x02, 0xa6, 0x4d, 0x11, 0xa7, 0xdc, 0x51, + 0x09, 0x7f, 0xa0, 0xd8, 0x88, 0x1c, 0x13, 0x71, + 0xae, 0x9c, 0xb7, 0x7b, 0x34, 0xd6, 0x4e, 0x68, + 0x26, 0x83, 0x51, 0xaf, 0x1d, 0xee, 0x8b, 0xbb, + 0x69, 0x43, 0x2b, 0x9e, 0x8a, 0xbc, 0x02, 0x0e, + 0xa0, 0x1b, 0xe0, 0xa8, 0x5f, 0x6f, 0xaf, 0x1b, + 0x8f, 0xe7, 0x64, 0x71, 0x74, 0x11, 0x7e, 0xa8, + 0xd8, 0xf9, 0x97, 0x06, 0xc3, 0xb6, 0xfb, 0xfb, + 0xb7, 0x3d, 0x35, 0x9d, 0x3b, 0x52, 0xed, 0x54, + 0xca, 0xf4, 0x81, 0x01, 0x2d, 0x1b, 0xc3, 0xa7, + 0x00, 0x3d, 0x1a, 0x39, 0x54, 0xe1, 0xf6, 0xff, + 0xed, 0x6f, 0x0b, 0x5a, 0x68, 0xda, 0x58, 0xdd, + 0xa9, 0xcf, 0x5c, 0x4a, 0xe5, 0x09, 0x4e, 0xde, + 0x9d, 0xbc, 0x3e, 0xee, 0x5a, 0x00, 0x3b, 0x2c, + 0x87, 0x10, 0x65, 0x60, 0xdd, 0xd7, 0x56, 0xd1, + 0x4c, 0x64, 0x45, 0xe4, 0x21, 0xec, 0x78, 0xf8, + 0x25, 0x7a, 0x3e, 0x16, 0x5d, 0x09, 0x53, 0x14, + 0xbe, 0x4f, 0xae, 0x87, 0xd8, 0xd1, 0xaa, 0x3c, + 0xf6, 0x3e, 0xa4, 0x70, 0x8c, 0x5e, 0x70, 0xa4, + 0xb3, 0x6b, 0x66, 0x73, 0xd3, 0xbf, 0x31, 0x06, + 0x19, 0x62, 0x93, 0x15, 0xf2, 0x86, 0xe4, 0x52, + 0x7e, 0x53, 0x4c, 0x12, 0x38, 0xcc, 0x34, 0x7d, + 0x57, 0xf6, 0x42, 0x93, 0x8a, 0xc4, 0xee, 0x5c, + 0x8a, 0xe1, 0x52, 0x8f, 0x56, 0x64, 0xf6, 0xa6, + 0xd1, 0x91, 0x57, 0x70, 0xcd, 0x11, 0x76, 0xf5, + 0x59, 0x60, 0x60, 0x3c, 0xc1, 0xc3, 0x0b, 0x7f, + 0x58, 0x1a, 0x50, 0x91, 0xf1, 0x68, 0x8f, 0x6e, + 0x74, 0x74, 0xa8, 0x51, 0x0b, 0xf7, 0x7a, 0x98, + 0x37, 0xf2, 0x0a, 0x0e, 0xa4, 0x97, 0x04, 0xb8, + 0x9b, 0xfd, 0xa0, 0xea, 0xf7, 0x0d, 0xe1, 0xdb, + 0x03, 0xf0, 0x31, 0x29, 0xf8, 0xdd, 0x6b, 0x8b, + 0x5d, 0xd8, 0x59, 0xa9, 0x29, 0xcf, 0x9a, 0x79, + 0x89, 0x19, 0x63, 0x46, 0x09, 0x79, 0x6a, 0x11, + 0xda, 0x63, 0x68, 0x48, 0x77, 0x23, 0xfb, 0x7d, + 0x3a, 0x43, 0xcb, 0x02, 0x3b, 0x7a, 0x6d, 0x10, + 0x2a, 0x9e, 0xac, 0xf1, 0xd4, 0x19, 0xf8, 0x23, + 0x64, 0x1d, 0x2c, 0x5f, 0xf2, 0xb0, 0x5c, 0x23, + 0x27, 0xf7, 0x27, 0x30, 0x16, 0x37, 0xb1, 0x90, + 0xab, 0x38, 0xfb, 0x55, 0xcd, 0x78, 0x58, 0xd4, + 0x7d, 0x43, 0xf6, 0x45, 0x5e, 0x55, 0x8d, 0xb1, + 0x02, 0x65, 0x58, 0xb4, 0x13, 0x4b, 0x36, 0xf7, + 0xcc, 0xfe, 0x3d, 0x0b, 0x82, 0xe2, 0x12, 0x11, + 0xbb, 0xe6, 0xb8, 0x3a, 0x48, 0x71, 0xc7, 0x50, + 0x06, 0x16, 0x3a, 0xe6, 0x7c, 0x05, 0xc7, 0xc8, + 0x4d, 0x2f, 0x08, 0x6a, 0x17, 0x9a, 0x95, 0x97, + 0x50, 0x68, 0xdc, 0x28, 0x18, 0xc4, 0x61, 0x38, + 0xb9, 0xe0, 0x3e, 0x78, 0xdb, 0x29, 0xe0, 0x9f, + 0x52, 0xdd, 0xf8, 0x4f, 0x91, 0xc1, 0xd0, 0x33, + 0xa1, 0x7a, 0x8e, 0x30, 0x13, 0x82, 0x07, 0x9f, + 0xd3, 0x31, 0x0f, 0x23, 0xbe, 0x32, 0x5a, 0x75, + 0xcf, 0x96, 0xb2, 0xec, 0xb5, 0x32, 0xac, 0x21, + 0xd1, 0x82, 0x33, 0xd3, 0x15, 0x74, 0xbd, 0x90, + 0xf1, 0x2c, 0xe6, 0x5f, 0x8d, 0xe3, 0x02, 0xe8, + 0xe9, 0xc4, 0xca, 0x96, 0xeb, 0x0e, 0xbc, 0x91, + 0xf4, 0xb9, 0xea, 0xd9, 0x1b, 0x75, 0xbd, 0xe1, + 0xac, 0x2a, 0x05, 0x37, 0x52, 0x9b, 0x1b, 0x3f, + 0x5a, 0xdc, 0x21, 0xc3, 0x98, 0xbb, 0xaf, 0xa3, + 0xf2, 0x00, 0xbf, 0x0d, 0x30, 0x89, 0x05, 0xcc, + 0xa5, 0x76, 0xf5, 0x06, 0xf0, 0xc6, 0x54, 0x8a, + 0x5d, 0xd4, 0x1e, 0xc1, 0xf2, 0xce, 0xb0, 0x62, + 0xc8, 0xfc, 0x59, 0x42, 0x9a, 0x90, 0x60, 0x55, + 0xfe, 0x88, 0xa5, 0x8b, 0xb8, 0x33, 0x0c, 0x23, + 0x24, 0x0d, 0x15, 0x70, 0x37, 0x1e, 0x3d, 0xf6, + 0xd2, 0xea, 0x92, 0x10, 0xb2, 0xc4, 0x51, 0xac, + 0xf2, 0xac, 0xf3, 0x6b, 0x6c, 0xaa, 0xcf, 0x12, + 0xc5, 0x6c, 0x90, 0x50, 0xb5, 0x0c, 0xfc, 0x1a, + 0x15, 0x52, 0xe9, 0x26, 0xc6, 0x52, 0xa4, 0xe7, + 0x81, 0x69, 0xe1, 0xe7, 0x9e, 0x30, 0x01, 0xec, + 0x84, 0x89, 0xb2, 0x0d, 0x66, 0xdd, 0xce, 0x28, + 0x5c, 0xec, 0x98, 0x46, 0x68, 0x21, 0x9f, 0x88, + 0x3f, 0x1f, 0x42, 0x77, 0xce, 0xd0, 0x61, 0xd4, + 0x20, 0xa7, 0xff, 0x53, 0xad, 0x37, 0xd0, 0x17, + 0x35, 0xc9, 0xfc, 0xba, 0x0a, 0x78, 0x3f, 0xf2, + 0xcc, 0x86, 0x89, 0xe8, 0x4b, 0x3c, 0x48, 0x33, + 0x09, 0x7f, 0xc6, 0xc0, 0xdd, 0xb8, 0xfd, 0x7a, + 0x66, 0x66, 0x65, 0xeb, 0x47, 0xa7, 0x04, 0x28, + 0xa3, 0x19, 0x8e, 0xa9, 0xb1, 0x13, 0x67, 0x62, + 0x70, 0xcf, 0xd6 +}; +static const u8 enc_assoc012[] __initconst = { + 0xb1, 0x69, 0x83, 0x87, 0x30, 0xaa, 0x5d, 0xb8, + 0x77, 0xe8, 0x21, 0xff, 0x06, 0x59, 0x35, 0xce, + 0x75, 0xfe, 0x38, 0xef, 0xb8, 0x91, 0x43, 0x8c, + 0xcf, 0x70, 0xdd, 0x0a, 0x68, 0xbf, 0xd4, 0xbc, + 0x16, 0x76, 0x99, 0x36, 0x1e, 0x58, 0x79, 0x5e, + 0xd4, 0x29, 0xf7, 0x33, 0x93, 0x48, 0xdb, 0x5f, + 0x01, 0xae, 0x9c, 0xb6, 0xe4, 0x88, 0x6d, 0x2b, + 0x76, 0x75, 0xe0, 0xf3, 0x74, 0xe2, 0xc9 +}; +static const u8 enc_nonce012[] __initconst = { + 0x05, 0xa3, 0x93, 0xed, 0x30, 0xc5, 0xa2, 0x06 +}; +static const u8 enc_key012[] __initconst = { + 0xb3, 0x35, 0x50, 0x03, 0x54, 0x2e, 0x40, 0x5e, + 0x8f, 0x59, 0x8e, 0xc5, 0x90, 0xd5, 0x27, 0x2d, + 0xba, 0x29, 0x2e, 0xcb, 0x1b, 0x70, 0x44, 0x1e, + 0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64 +}; + +/* wycheproof - rfc7539 */ +static const u8 enc_input013[] __initconst = { + 0x4c, 0x61, 0x64, 0x69, 0x65, 0x73, 0x20, 0x61, + 0x6e, 0x64, 0x20, 0x47, 0x65, 0x6e, 0x74, 0x6c, + 0x65, 0x6d, 0x65, 0x6e, 0x20, 0x6f, 0x66, 0x20, + 0x74, 0x68, 0x65, 0x20, 0x63, 0x6c, 0x61, 0x73, + 0x73, 0x20, 0x6f, 0x66, 0x20, 0x27, 0x39, 0x39, + 0x3a, 0x20, 0x49, 0x66, 0x20, 0x49, 0x20, 0x63, + 0x6f, 0x75, 0x6c, 0x64, 0x20, 0x6f, 0x66, 0x66, + 0x65, 0x72, 0x20, 0x79, 0x6f, 0x75, 0x20, 0x6f, + 0x6e, 0x6c, 0x79, 0x20, 0x6f, 0x6e, 0x65, 0x20, + 0x74, 0x69, 0x70, 0x20, 0x66, 0x6f, 0x72, 0x20, + 0x74, 0x68, 0x65, 0x20, 0x66, 0x75, 0x74, 0x75, + 0x72, 0x65, 0x2c, 0x20, 0x73, 0x75, 0x6e, 0x73, + 0x63, 0x72, 0x65, 0x65, 0x6e, 0x20, 0x77, 0x6f, + 0x75, 0x6c, 0x64, 0x20, 0x62, 0x65, 0x20, 0x69, + 0x74, 0x2e +}; +static const u8 enc_output013[] __initconst = { + 0xd3, 0x1a, 0x8d, 0x34, 0x64, 0x8e, 0x60, 0xdb, + 0x7b, 0x86, 0xaf, 0xbc, 0x53, 0xef, 0x7e, 0xc2, + 0xa4, 0xad, 0xed, 0x51, 0x29, 0x6e, 0x08, 0xfe, + 0xa9, 0xe2, 0xb5, 0xa7, 0x36, 0xee, 0x62, 0xd6, + 0x3d, 0xbe, 0xa4, 0x5e, 0x8c, 0xa9, 0x67, 0x12, + 0x82, 0xfa, 0xfb, 0x69, 0xda, 0x92, 0x72, 0x8b, + 0x1a, 0x71, 0xde, 0x0a, 0x9e, 0x06, 0x0b, 0x29, + 0x05, 0xd6, 0xa5, 0xb6, 0x7e, 0xcd, 0x3b, 0x36, + 0x92, 0xdd, 0xbd, 0x7f, 0x2d, 0x77, 0x8b, 0x8c, + 0x98, 0x03, 0xae, 0xe3, 0x28, 0x09, 0x1b, 0x58, + 0xfa, 0xb3, 0x24, 0xe4, 0xfa, 0xd6, 0x75, 0x94, + 0x55, 0x85, 0x80, 0x8b, 0x48, 0x31, 0xd7, 0xbc, + 0x3f, 0xf4, 0xde, 0xf0, 0x8e, 0x4b, 0x7a, 0x9d, + 0xe5, 0x76, 0xd2, 0x65, 0x86, 0xce, 0xc6, 0x4b, + 0x61, 0x16, 0x1a, 0xe1, 0x0b, 0x59, 0x4f, 0x09, + 0xe2, 0x6a, 0x7e, 0x90, 0x2e, 0xcb, 0xd0, 0x60, + 0x06, 0x91 +}; +static const u8 enc_assoc013[] __initconst = { + 0x50, 0x51, 0x52, 0x53, 0xc0, 0xc1, 0xc2, 0xc3, + 0xc4, 0xc5, 0xc6, 0xc7 +}; +static const u8 enc_nonce013[] __initconst = { + 0x07, 0x00, 0x00, 0x00, 0x40, 0x41, 0x42, 0x43, + 0x44, 0x45, 0x46, 0x47 +}; +static const u8 enc_key013[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - misc */ +static const u8 enc_input014[] __initconst = { }; +static const u8 enc_output014[] __initconst = { + 0x76, 0xac, 0xb3, 0x42, 0xcf, 0x31, 0x66, 0xa5, + 0xb6, 0x3c, 0x0c, 0x0e, 0xa1, 0x38, 0x3c, 0x8d +}; +static const u8 enc_assoc014[] __initconst = { }; +static const u8 enc_nonce014[] __initconst = { + 0x4d, 0xa5, 0xbf, 0x8d, 0xfd, 0x58, 0x52, 0xc1, + 0xea, 0x12, 0x37, 0x9d +}; +static const u8 enc_key014[] __initconst = { + 0x80, 0xba, 0x31, 0x92, 0xc8, 0x03, 0xce, 0x96, + 0x5e, 0xa3, 0x71, 0xd5, 0xff, 0x07, 0x3c, 0xf0, + 0xf4, 0x3b, 0x6a, 0x2a, 0xb5, 0x76, 0xb2, 0x08, + 0x42, 0x6e, 0x11, 0x40, 0x9c, 0x09, 0xb9, 0xb0 +}; + +/* wycheproof - misc */ +static const u8 enc_input015[] __initconst = { }; +static const u8 enc_output015[] __initconst = { + 0x90, 0x6f, 0xa6, 0x28, 0x4b, 0x52, 0xf8, 0x7b, + 0x73, 0x59, 0xcb, 0xaa, 0x75, 0x63, 0xc7, 0x09 +}; +static const u8 enc_assoc015[] __initconst = { + 0xbd, 0x50, 0x67, 0x64, 0xf2, 0xd2, 0xc4, 0x10 +}; +static const u8 enc_nonce015[] __initconst = { + 0xa9, 0x2e, 0xf0, 0xac, 0x99, 0x1d, 0xd5, 0x16, + 0xa3, 0xc6, 0xf6, 0x89 +}; +static const u8 enc_key015[] __initconst = { + 0x7a, 0x4c, 0xd7, 0x59, 0x17, 0x2e, 0x02, 0xeb, + 0x20, 0x4d, 0xb2, 0xc3, 0xf5, 0xc7, 0x46, 0x22, + 0x7d, 0xf5, 0x84, 0xfc, 0x13, 0x45, 0x19, 0x63, + 0x91, 0xdb, 0xb9, 0x57, 0x7a, 0x25, 0x07, 0x42 +}; + +/* wycheproof - misc */ +static const u8 enc_input016[] __initconst = { + 0x2a +}; +static const u8 enc_output016[] __initconst = { + 0x3a, 0xca, 0xc2, 0x7d, 0xec, 0x09, 0x68, 0x80, + 0x1e, 0x9f, 0x6e, 0xde, 0xd6, 0x9d, 0x80, 0x75, + 0x22 +}; +static const u8 enc_assoc016[] __initconst = { }; +static const u8 enc_nonce016[] __initconst = { + 0x99, 0xe2, 0x3e, 0xc4, 0x89, 0x85, 0xbc, 0xcd, + 0xee, 0xab, 0x60, 0xf1 +}; +static const u8 enc_key016[] __initconst = { + 0xcc, 0x56, 0xb6, 0x80, 0x55, 0x2e, 0xb7, 0x50, + 0x08, 0xf5, 0x48, 0x4b, 0x4c, 0xb8, 0x03, 0xfa, + 0x50, 0x63, 0xeb, 0xd6, 0xea, 0xb9, 0x1f, 0x6a, + 0xb6, 0xae, 0xf4, 0x91, 0x6a, 0x76, 0x62, 0x73 +}; + +/* wycheproof - misc */ +static const u8 enc_input017[] __initconst = { + 0x51 +}; +static const u8 enc_output017[] __initconst = { + 0xc4, 0x16, 0x83, 0x10, 0xca, 0x45, 0xb1, 0xf7, + 0xc6, 0x6c, 0xad, 0x4e, 0x99, 0xe4, 0x3f, 0x72, + 0xb9 +}; +static const u8 enc_assoc017[] __initconst = { + 0x91, 0xca, 0x6c, 0x59, 0x2c, 0xbc, 0xca, 0x53 +}; +static const u8 enc_nonce017[] __initconst = { + 0xab, 0x0d, 0xca, 0x71, 0x6e, 0xe0, 0x51, 0xd2, + 0x78, 0x2f, 0x44, 0x03 +}; +static const u8 enc_key017[] __initconst = { + 0x46, 0xf0, 0x25, 0x49, 0x65, 0xf7, 0x69, 0xd5, + 0x2b, 0xdb, 0x4a, 0x70, 0xb4, 0x43, 0x19, 0x9f, + 0x8e, 0xf2, 0x07, 0x52, 0x0d, 0x12, 0x20, 0xc5, + 0x5e, 0x4b, 0x70, 0xf0, 0xfd, 0xa6, 0x20, 0xee +}; + +/* wycheproof - misc */ +static const u8 enc_input018[] __initconst = { + 0x5c, 0x60 +}; +static const u8 enc_output018[] __initconst = { + 0x4d, 0x13, 0x91, 0xe8, 0xb6, 0x1e, 0xfb, 0x39, + 0xc1, 0x22, 0x19, 0x54, 0x53, 0x07, 0x7b, 0x22, + 0xe5, 0xe2 +}; +static const u8 enc_assoc018[] __initconst = { }; +static const u8 enc_nonce018[] __initconst = { + 0x46, 0x1a, 0xf1, 0x22, 0xe9, 0xf2, 0xe0, 0x34, + 0x7e, 0x03, 0xf2, 0xdb +}; +static const u8 enc_key018[] __initconst = { + 0x2f, 0x7f, 0x7e, 0x4f, 0x59, 0x2b, 0xb3, 0x89, + 0x19, 0x49, 0x89, 0x74, 0x35, 0x07, 0xbf, 0x3e, + 0xe9, 0xcb, 0xde, 0x17, 0x86, 0xb6, 0x69, 0x5f, + 0xe6, 0xc0, 0x25, 0xfd, 0x9b, 0xa4, 0xc1, 0x00 +}; + +/* wycheproof - misc */ +static const u8 enc_input019[] __initconst = { + 0xdd, 0xf2 +}; +static const u8 enc_output019[] __initconst = { + 0xb6, 0x0d, 0xea, 0xd0, 0xfd, 0x46, 0x97, 0xec, + 0x2e, 0x55, 0x58, 0x23, 0x77, 0x19, 0xd0, 0x24, + 0x37, 0xa2 +}; +static const u8 enc_assoc019[] __initconst = { + 0x88, 0x36, 0x4f, 0xc8, 0x06, 0x05, 0x18, 0xbf +}; +static const u8 enc_nonce019[] __initconst = { + 0x61, 0x54, 0x6b, 0xa5, 0xf1, 0x72, 0x05, 0x90, + 0xb6, 0x04, 0x0a, 0xc6 +}; +static const u8 enc_key019[] __initconst = { + 0xc8, 0x83, 0x3d, 0xce, 0x5e, 0xa9, 0xf2, 0x48, + 0xaa, 0x20, 0x30, 0xea, 0xcf, 0xe7, 0x2b, 0xff, + 0xe6, 0x9a, 0x62, 0x0c, 0xaf, 0x79, 0x33, 0x44, + 0xe5, 0x71, 0x8f, 0xe0, 0xd7, 0xab, 0x1a, 0x58 +}; + +/* wycheproof - misc */ +static const u8 enc_input020[] __initconst = { + 0xab, 0x85, 0xe9, 0xc1, 0x57, 0x17, 0x31 +}; +static const u8 enc_output020[] __initconst = { + 0x5d, 0xfe, 0x34, 0x40, 0xdb, 0xb3, 0xc3, 0xed, + 0x7a, 0x43, 0x4e, 0x26, 0x02, 0xd3, 0x94, 0x28, + 0x1e, 0x0a, 0xfa, 0x9f, 0xb7, 0xaa, 0x42 +}; +static const u8 enc_assoc020[] __initconst = { }; +static const u8 enc_nonce020[] __initconst = { + 0x3c, 0x4e, 0x65, 0x4d, 0x66, 0x3f, 0xa4, 0x59, + 0x6d, 0xc5, 0x5b, 0xb7 +}; +static const u8 enc_key020[] __initconst = { + 0x55, 0x56, 0x81, 0x58, 0xd3, 0xa6, 0x48, 0x3f, + 0x1f, 0x70, 0x21, 0xea, 0xb6, 0x9b, 0x70, 0x3f, + 0x61, 0x42, 0x51, 0xca, 0xdc, 0x1a, 0xf5, 0xd3, + 0x4a, 0x37, 0x4f, 0xdb, 0xfc, 0x5a, 0xda, 0xc7 +}; + +/* wycheproof - misc */ +static const u8 enc_input021[] __initconst = { + 0x4e, 0xe5, 0xcd, 0xa2, 0x0d, 0x42, 0x90 +}; +static const u8 enc_output021[] __initconst = { + 0x4b, 0xd4, 0x72, 0x12, 0x94, 0x1c, 0xe3, 0x18, + 0x5f, 0x14, 0x08, 0xee, 0x7f, 0xbf, 0x18, 0xf5, + 0xab, 0xad, 0x6e, 0x22, 0x53, 0xa1, 0xba +}; +static const u8 enc_assoc021[] __initconst = { + 0x84, 0xe4, 0x6b, 0xe8, 0xc0, 0x91, 0x90, 0x53 +}; +static const u8 enc_nonce021[] __initconst = { + 0x58, 0x38, 0x93, 0x75, 0xc6, 0x9e, 0xe3, 0x98, + 0xde, 0x94, 0x83, 0x96 +}; +static const u8 enc_key021[] __initconst = { + 0xe3, 0xc0, 0x9e, 0x7f, 0xab, 0x1a, 0xef, 0xb5, + 0x16, 0xda, 0x6a, 0x33, 0x02, 0x2a, 0x1d, 0xd4, + 0xeb, 0x27, 0x2c, 0x80, 0xd5, 0x40, 0xc5, 0xda, + 0x52, 0xa7, 0x30, 0xf3, 0x4d, 0x84, 0x0d, 0x7f +}; + +/* wycheproof - misc */ +static const u8 enc_input022[] __initconst = { + 0xbe, 0x33, 0x08, 0xf7, 0x2a, 0x2c, 0x6a, 0xed +}; +static const u8 enc_output022[] __initconst = { + 0x8e, 0x94, 0x39, 0xa5, 0x6e, 0xee, 0xc8, 0x17, + 0xfb, 0xe8, 0xa6, 0xed, 0x8f, 0xab, 0xb1, 0x93, + 0x75, 0x39, 0xdd, 0x6c, 0x00, 0xe9, 0x00, 0x21 +}; +static const u8 enc_assoc022[] __initconst = { }; +static const u8 enc_nonce022[] __initconst = { + 0x4f, 0x07, 0xaf, 0xed, 0xfd, 0xc3, 0xb6, 0xc2, + 0x36, 0x18, 0x23, 0xd3 +}; +static const u8 enc_key022[] __initconst = { + 0x51, 0xe4, 0xbf, 0x2b, 0xad, 0x92, 0xb7, 0xaf, + 0xf1, 0xa4, 0xbc, 0x05, 0x55, 0x0b, 0xa8, 0x1d, + 0xf4, 0xb9, 0x6f, 0xab, 0xf4, 0x1c, 0x12, 0xc7, + 0xb0, 0x0e, 0x60, 0xe4, 0x8d, 0xb7, 0xe1, 0x52 +}; + +/* wycheproof - misc */ +static const u8 enc_input023[] __initconst = { + 0xa4, 0xc9, 0xc2, 0x80, 0x1b, 0x71, 0xf7, 0xdf +}; +static const u8 enc_output023[] __initconst = { + 0xb9, 0xb9, 0x10, 0x43, 0x3a, 0xf0, 0x52, 0xb0, + 0x45, 0x30, 0xf5, 0x1a, 0xee, 0xe0, 0x24, 0xe0, + 0xa4, 0x45, 0xa6, 0x32, 0x8f, 0xa6, 0x7a, 0x18 +}; +static const u8 enc_assoc023[] __initconst = { + 0x66, 0xc0, 0xae, 0x70, 0x07, 0x6c, 0xb1, 0x4d +}; +static const u8 enc_nonce023[] __initconst = { + 0xb4, 0xea, 0x66, 0x6e, 0xe1, 0x19, 0x56, 0x33, + 0x66, 0x48, 0x4a, 0x78 +}; +static const u8 enc_key023[] __initconst = { + 0x11, 0x31, 0xc1, 0x41, 0x85, 0x77, 0xa0, 0x54, + 0xde, 0x7a, 0x4a, 0xc5, 0x51, 0x95, 0x0f, 0x1a, + 0x05, 0x3f, 0x9a, 0xe4, 0x6e, 0x5b, 0x75, 0xfe, + 0x4a, 0xbd, 0x56, 0x08, 0xd7, 0xcd, 0xda, 0xdd +}; + +/* wycheproof - misc */ +static const u8 enc_input024[] __initconst = { + 0x42, 0xba, 0xae, 0x59, 0x78, 0xfe, 0xaf, 0x5c, + 0x36, 0x8d, 0x14, 0xe0 +}; +static const u8 enc_output024[] __initconst = { + 0xff, 0x7d, 0xc2, 0x03, 0xb2, 0x6c, 0x46, 0x7a, + 0x6b, 0x50, 0xdb, 0x33, 0x57, 0x8c, 0x0f, 0x27, + 0x58, 0xc2, 0xe1, 0x4e, 0x36, 0xd4, 0xfc, 0x10, + 0x6d, 0xcb, 0x29, 0xb4 +}; +static const u8 enc_assoc024[] __initconst = { }; +static const u8 enc_nonce024[] __initconst = { + 0x9a, 0x59, 0xfc, 0xe2, 0x6d, 0xf0, 0x00, 0x5e, + 0x07, 0x53, 0x86, 0x56 +}; +static const u8 enc_key024[] __initconst = { + 0x99, 0xb6, 0x2b, 0xd5, 0xaf, 0xbe, 0x3f, 0xb0, + 0x15, 0xbd, 0xe9, 0x3f, 0x0a, 0xbf, 0x48, 0x39, + 0x57, 0xa1, 0xc3, 0xeb, 0x3c, 0xa5, 0x9c, 0xb5, + 0x0b, 0x39, 0xf7, 0xf8, 0xa9, 0xcc, 0x51, 0xbe +}; + +/* wycheproof - misc */ +static const u8 enc_input025[] __initconst = { + 0xfd, 0xc8, 0x5b, 0x94, 0xa4, 0xb2, 0xa6, 0xb7, + 0x59, 0xb1, 0xa0, 0xda +}; +static const u8 enc_output025[] __initconst = { + 0x9f, 0x88, 0x16, 0xde, 0x09, 0x94, 0xe9, 0x38, + 0xd9, 0xe5, 0x3f, 0x95, 0xd0, 0x86, 0xfc, 0x6c, + 0x9d, 0x8f, 0xa9, 0x15, 0xfd, 0x84, 0x23, 0xa7, + 0xcf, 0x05, 0x07, 0x2f +}; +static const u8 enc_assoc025[] __initconst = { + 0xa5, 0x06, 0xe1, 0xa5, 0xc6, 0x90, 0x93, 0xf9 +}; +static const u8 enc_nonce025[] __initconst = { + 0x58, 0xdb, 0xd4, 0xad, 0x2c, 0x4a, 0xd3, 0x5d, + 0xd9, 0x06, 0xe9, 0xce +}; +static const u8 enc_key025[] __initconst = { + 0x85, 0xf3, 0x5b, 0x62, 0x82, 0xcf, 0xf4, 0x40, + 0xbc, 0x10, 0x20, 0xc8, 0x13, 0x6f, 0xf2, 0x70, + 0x31, 0x11, 0x0f, 0xa6, 0x3e, 0xc1, 0x6f, 0x1e, + 0x82, 0x51, 0x18, 0xb0, 0x06, 0xb9, 0x12, 0x57 +}; + +/* wycheproof - misc */ +static const u8 enc_input026[] __initconst = { + 0x51, 0xf8, 0xc1, 0xf7, 0x31, 0xea, 0x14, 0xac, + 0xdb, 0x21, 0x0a, 0x6d, 0x97, 0x3e, 0x07 +}; +static const u8 enc_output026[] __initconst = { + 0x0b, 0x29, 0x63, 0x8e, 0x1f, 0xbd, 0xd6, 0xdf, + 0x53, 0x97, 0x0b, 0xe2, 0x21, 0x00, 0x42, 0x2a, + 0x91, 0x34, 0x08, 0x7d, 0x67, 0xa4, 0x6e, 0x79, + 0x17, 0x8d, 0x0a, 0x93, 0xf5, 0xe1, 0xd2 +}; +static const u8 enc_assoc026[] __initconst = { }; +static const u8 enc_nonce026[] __initconst = { + 0x68, 0xab, 0x7f, 0xdb, 0xf6, 0x19, 0x01, 0xda, + 0xd4, 0x61, 0xd2, 0x3c +}; +static const u8 enc_key026[] __initconst = { + 0x67, 0x11, 0x96, 0x27, 0xbd, 0x98, 0x8e, 0xda, + 0x90, 0x62, 0x19, 0xe0, 0x8c, 0x0d, 0x0d, 0x77, + 0x9a, 0x07, 0xd2, 0x08, 0xce, 0x8a, 0x4f, 0xe0, + 0x70, 0x9a, 0xf7, 0x55, 0xee, 0xec, 0x6d, 0xcb +}; + +/* wycheproof - misc */ +static const u8 enc_input027[] __initconst = { + 0x97, 0x46, 0x9d, 0xa6, 0x67, 0xd6, 0x11, 0x0f, + 0x9c, 0xbd, 0xa1, 0xd1, 0xa2, 0x06, 0x73 +}; +static const u8 enc_output027[] __initconst = { + 0x32, 0xdb, 0x66, 0xc4, 0xa3, 0x81, 0x9d, 0x81, + 0x55, 0x74, 0x55, 0xe5, 0x98, 0x0f, 0xed, 0xfe, + 0xae, 0x30, 0xde, 0xc9, 0x4e, 0x6a, 0xd3, 0xa9, + 0xee, 0xa0, 0x6a, 0x0d, 0x70, 0x39, 0x17 +}; +static const u8 enc_assoc027[] __initconst = { + 0x64, 0x53, 0xa5, 0x33, 0x84, 0x63, 0x22, 0x12 +}; +static const u8 enc_nonce027[] __initconst = { + 0xd9, 0x5b, 0x32, 0x43, 0xaf, 0xae, 0xf7, 0x14, + 0xc5, 0x03, 0x5b, 0x6a +}; +static const u8 enc_key027[] __initconst = { + 0xe6, 0xf1, 0x11, 0x8d, 0x41, 0xe4, 0xb4, 0x3f, + 0xb5, 0x82, 0x21, 0xb7, 0xed, 0x79, 0x67, 0x38, + 0x34, 0xe0, 0xd8, 0xac, 0x5c, 0x4f, 0xa6, 0x0b, + 0xbc, 0x8b, 0xc4, 0x89, 0x3a, 0x58, 0x89, 0x4d +}; + +/* wycheproof - misc */ +static const u8 enc_input028[] __initconst = { + 0x54, 0x9b, 0x36, 0x5a, 0xf9, 0x13, 0xf3, 0xb0, + 0x81, 0x13, 0x1c, 0xcb, 0x6b, 0x82, 0x55, 0x88 +}; +static const u8 enc_output028[] __initconst = { + 0xe9, 0x11, 0x0e, 0x9f, 0x56, 0xab, 0x3c, 0xa4, + 0x83, 0x50, 0x0c, 0xea, 0xba, 0xb6, 0x7a, 0x13, + 0x83, 0x6c, 0xca, 0xbf, 0x15, 0xa6, 0xa2, 0x2a, + 0x51, 0xc1, 0x07, 0x1c, 0xfa, 0x68, 0xfa, 0x0c +}; +static const u8 enc_assoc028[] __initconst = { }; +static const u8 enc_nonce028[] __initconst = { + 0x2f, 0xcb, 0x1b, 0x38, 0xa9, 0x9e, 0x71, 0xb8, + 0x47, 0x40, 0xad, 0x9b +}; +static const u8 enc_key028[] __initconst = { + 0x59, 0xd4, 0xea, 0xfb, 0x4d, 0xe0, 0xcf, 0xc7, + 0xd3, 0xdb, 0x99, 0xa8, 0xf5, 0x4b, 0x15, 0xd7, + 0xb3, 0x9f, 0x0a, 0xcc, 0x8d, 0xa6, 0x97, 0x63, + 0xb0, 0x19, 0xc1, 0x69, 0x9f, 0x87, 0x67, 0x4a +}; + +/* wycheproof - misc */ +static const u8 enc_input029[] __initconst = { + 0x55, 0xa4, 0x65, 0x64, 0x4f, 0x5b, 0x65, 0x09, + 0x28, 0xcb, 0xee, 0x7c, 0x06, 0x32, 0x14, 0xd6 +}; +static const u8 enc_output029[] __initconst = { + 0xe4, 0xb1, 0x13, 0xcb, 0x77, 0x59, 0x45, 0xf3, + 0xd3, 0xa8, 0xae, 0x9e, 0xc1, 0x41, 0xc0, 0x0c, + 0x7c, 0x43, 0xf1, 0x6c, 0xe0, 0x96, 0xd0, 0xdc, + 0x27, 0xc9, 0x58, 0x49, 0xdc, 0x38, 0x3b, 0x7d +}; +static const u8 enc_assoc029[] __initconst = { + 0x03, 0x45, 0x85, 0x62, 0x1a, 0xf8, 0xd7, 0xff +}; +static const u8 enc_nonce029[] __initconst = { + 0x11, 0x8a, 0x69, 0x64, 0xc2, 0xd3, 0xe3, 0x80, + 0x07, 0x1f, 0x52, 0x66 +}; +static const u8 enc_key029[] __initconst = { + 0xb9, 0x07, 0xa4, 0x50, 0x75, 0x51, 0x3f, 0xe8, + 0xa8, 0x01, 0x9e, 0xde, 0xe3, 0xf2, 0x59, 0x14, + 0x87, 0xb2, 0xa0, 0x30, 0xb0, 0x3c, 0x6e, 0x1d, + 0x77, 0x1c, 0x86, 0x25, 0x71, 0xd2, 0xea, 0x1e +}; + +/* wycheproof - misc */ +static const u8 enc_input030[] __initconst = { + 0x3f, 0xf1, 0x51, 0x4b, 0x1c, 0x50, 0x39, 0x15, + 0x91, 0x8f, 0x0c, 0x0c, 0x31, 0x09, 0x4a, 0x6e, + 0x1f +}; +static const u8 enc_output030[] __initconst = { + 0x02, 0xcc, 0x3a, 0xcb, 0x5e, 0xe1, 0xfc, 0xdd, + 0x12, 0xa0, 0x3b, 0xb8, 0x57, 0x97, 0x64, 0x74, + 0xd3, 0xd8, 0x3b, 0x74, 0x63, 0xa2, 0xc3, 0x80, + 0x0f, 0xe9, 0x58, 0xc2, 0x8e, 0xaa, 0x29, 0x08, + 0x13 +}; +static const u8 enc_assoc030[] __initconst = { }; +static const u8 enc_nonce030[] __initconst = { + 0x45, 0xaa, 0xa3, 0xe5, 0xd1, 0x6d, 0x2d, 0x42, + 0xdc, 0x03, 0x44, 0x5d +}; +static const u8 enc_key030[] __initconst = { + 0x3b, 0x24, 0x58, 0xd8, 0x17, 0x6e, 0x16, 0x21, + 0xc0, 0xcc, 0x24, 0xc0, 0xc0, 0xe2, 0x4c, 0x1e, + 0x80, 0xd7, 0x2f, 0x7e, 0xe9, 0x14, 0x9a, 0x4b, + 0x16, 0x61, 0x76, 0x62, 0x96, 0x16, 0xd0, 0x11 +}; + +/* wycheproof - misc */ +static const u8 enc_input031[] __initconst = { + 0x63, 0x85, 0x8c, 0xa3, 0xe2, 0xce, 0x69, 0x88, + 0x7b, 0x57, 0x8a, 0x3c, 0x16, 0x7b, 0x42, 0x1c, + 0x9c +}; +static const u8 enc_output031[] __initconst = { + 0x35, 0x76, 0x64, 0x88, 0xd2, 0xbc, 0x7c, 0x2b, + 0x8d, 0x17, 0xcb, 0xbb, 0x9a, 0xbf, 0xad, 0x9e, + 0x6d, 0x1f, 0x39, 0x1e, 0x65, 0x7b, 0x27, 0x38, + 0xdd, 0xa0, 0x84, 0x48, 0xcb, 0xa2, 0x81, 0x1c, + 0xeb +}; +static const u8 enc_assoc031[] __initconst = { + 0x9a, 0xaf, 0x29, 0x9e, 0xee, 0xa7, 0x8f, 0x79 +}; +static const u8 enc_nonce031[] __initconst = { + 0xf0, 0x38, 0x4f, 0xb8, 0x76, 0x12, 0x14, 0x10, + 0x63, 0x3d, 0x99, 0x3d +}; +static const u8 enc_key031[] __initconst = { + 0xf6, 0x0c, 0x6a, 0x1b, 0x62, 0x57, 0x25, 0xf7, + 0x6c, 0x70, 0x37, 0xb4, 0x8f, 0xe3, 0x57, 0x7f, + 0xa7, 0xf7, 0xb8, 0x7b, 0x1b, 0xd5, 0xa9, 0x82, + 0x17, 0x6d, 0x18, 0x23, 0x06, 0xff, 0xb8, 0x70 +}; + +/* wycheproof - misc */ +static const u8 enc_input032[] __initconst = { + 0x10, 0xf1, 0xec, 0xf9, 0xc6, 0x05, 0x84, 0x66, + 0x5d, 0x9a, 0xe5, 0xef, 0xe2, 0x79, 0xe7, 0xf7, + 0x37, 0x7e, 0xea, 0x69, 0x16, 0xd2, 0xb1, 0x11 +}; +static const u8 enc_output032[] __initconst = { + 0x42, 0xf2, 0x6c, 0x56, 0xcb, 0x4b, 0xe2, 0x1d, + 0x9d, 0x8d, 0x0c, 0x80, 0xfc, 0x99, 0xdd, 0xe0, + 0x0d, 0x75, 0xf3, 0x80, 0x74, 0xbf, 0xe7, 0x64, + 0x54, 0xaa, 0x7e, 0x13, 0xd4, 0x8f, 0xff, 0x7d, + 0x75, 0x57, 0x03, 0x94, 0x57, 0x04, 0x0a, 0x3a +}; +static const u8 enc_assoc032[] __initconst = { }; +static const u8 enc_nonce032[] __initconst = { + 0xe6, 0xb1, 0xad, 0xf2, 0xfd, 0x58, 0xa8, 0x76, + 0x2c, 0x65, 0xf3, 0x1b +}; +static const u8 enc_key032[] __initconst = { + 0x02, 0x12, 0xa8, 0xde, 0x50, 0x07, 0xed, 0x87, + 0xb3, 0x3f, 0x1a, 0x70, 0x90, 0xb6, 0x11, 0x4f, + 0x9e, 0x08, 0xce, 0xfd, 0x96, 0x07, 0xf2, 0xc2, + 0x76, 0xbd, 0xcf, 0xdb, 0xc5, 0xce, 0x9c, 0xd7 +}; + +/* wycheproof - misc */ +static const u8 enc_input033[] __initconst = { + 0x92, 0x22, 0xf9, 0x01, 0x8e, 0x54, 0xfd, 0x6d, + 0xe1, 0x20, 0x08, 0x06, 0xa9, 0xee, 0x8e, 0x4c, + 0xc9, 0x04, 0xd2, 0x9f, 0x25, 0xcb, 0xa1, 0x93 +}; +static const u8 enc_output033[] __initconst = { + 0x12, 0x30, 0x32, 0x43, 0x7b, 0x4b, 0xfd, 0x69, + 0x20, 0xe8, 0xf7, 0xe7, 0xe0, 0x08, 0x7a, 0xe4, + 0x88, 0x9e, 0xbe, 0x7a, 0x0a, 0xd0, 0xe9, 0x00, + 0x3c, 0xf6, 0x8f, 0x17, 0x95, 0x50, 0xda, 0x63, + 0xd3, 0xb9, 0x6c, 0x2d, 0x55, 0x41, 0x18, 0x65 +}; +static const u8 enc_assoc033[] __initconst = { + 0x3e, 0x8b, 0xc5, 0xad, 0xe1, 0x82, 0xff, 0x08 +}; +static const u8 enc_nonce033[] __initconst = { + 0x6b, 0x28, 0x2e, 0xbe, 0xcc, 0x54, 0x1b, 0xcd, + 0x78, 0x34, 0xed, 0x55 +}; +static const u8 enc_key033[] __initconst = { + 0xc5, 0xbc, 0x09, 0x56, 0x56, 0x46, 0xe7, 0xed, + 0xda, 0x95, 0x4f, 0x1f, 0x73, 0x92, 0x23, 0xda, + 0xda, 0x20, 0xb9, 0x5c, 0x44, 0xab, 0x03, 0x3d, + 0x0f, 0xae, 0x4b, 0x02, 0x83, 0xd1, 0x8b, 0xe3 +}; + +/* wycheproof - misc */ +static const u8 enc_input034[] __initconst = { + 0xb0, 0x53, 0x99, 0x92, 0x86, 0xa2, 0x82, 0x4f, + 0x42, 0xcc, 0x8c, 0x20, 0x3a, 0xb2, 0x4e, 0x2c, + 0x97, 0xa6, 0x85, 0xad, 0xcc, 0x2a, 0xd3, 0x26, + 0x62, 0x55, 0x8e, 0x55, 0xa5, 0xc7, 0x29 +}; +static const u8 enc_output034[] __initconst = { + 0x45, 0xc7, 0xd6, 0xb5, 0x3a, 0xca, 0xd4, 0xab, + 0xb6, 0x88, 0x76, 0xa6, 0xe9, 0x6a, 0x48, 0xfb, + 0x59, 0x52, 0x4d, 0x2c, 0x92, 0xc9, 0xd8, 0xa1, + 0x89, 0xc9, 0xfd, 0x2d, 0xb9, 0x17, 0x46, 0x56, + 0x6d, 0x3c, 0xa1, 0x0e, 0x31, 0x1b, 0x69, 0x5f, + 0x3e, 0xae, 0x15, 0x51, 0x65, 0x24, 0x93 +}; +static const u8 enc_assoc034[] __initconst = { }; +static const u8 enc_nonce034[] __initconst = { + 0x04, 0xa9, 0xbe, 0x03, 0x50, 0x8a, 0x5f, 0x31, + 0x37, 0x1a, 0x6f, 0xd2 +}; +static const u8 enc_key034[] __initconst = { + 0x2e, 0xb5, 0x1c, 0x46, 0x9a, 0xa8, 0xeb, 0x9e, + 0x6c, 0x54, 0xa8, 0x34, 0x9b, 0xae, 0x50, 0xa2, + 0x0f, 0x0e, 0x38, 0x27, 0x11, 0xbb, 0xa1, 0x15, + 0x2c, 0x42, 0x4f, 0x03, 0xb6, 0x67, 0x1d, 0x71 +}; + +/* wycheproof - misc */ +static const u8 enc_input035[] __initconst = { + 0xf4, 0x52, 0x06, 0xab, 0xc2, 0x55, 0x52, 0xb2, + 0xab, 0xc9, 0xab, 0x7f, 0xa2, 0x43, 0x03, 0x5f, + 0xed, 0xaa, 0xdd, 0xc3, 0xb2, 0x29, 0x39, 0x56, + 0xf1, 0xea, 0x6e, 0x71, 0x56, 0xe7, 0xeb +}; +static const u8 enc_output035[] __initconst = { + 0x46, 0xa8, 0x0c, 0x41, 0x87, 0x02, 0x47, 0x20, + 0x08, 0x46, 0x27, 0x58, 0x00, 0x80, 0xdd, 0xe5, + 0xa3, 0xf4, 0xa1, 0x10, 0x93, 0xa7, 0x07, 0x6e, + 0xd6, 0xf3, 0xd3, 0x26, 0xbc, 0x7b, 0x70, 0x53, + 0x4d, 0x4a, 0xa2, 0x83, 0x5a, 0x52, 0xe7, 0x2d, + 0x14, 0xdf, 0x0e, 0x4f, 0x47, 0xf2, 0x5f +}; +static const u8 enc_assoc035[] __initconst = { + 0x37, 0x46, 0x18, 0xa0, 0x6e, 0xa9, 0x8a, 0x48 +}; +static const u8 enc_nonce035[] __initconst = { + 0x47, 0x0a, 0x33, 0x9e, 0xcb, 0x32, 0x19, 0xb8, + 0xb8, 0x1a, 0x1f, 0x8b +}; +static const u8 enc_key035[] __initconst = { + 0x7f, 0x5b, 0x74, 0xc0, 0x7e, 0xd1, 0xb4, 0x0f, + 0xd1, 0x43, 0x58, 0xfe, 0x2f, 0xf2, 0xa7, 0x40, + 0xc1, 0x16, 0xc7, 0x70, 0x65, 0x10, 0xe6, 0xa4, + 0x37, 0xf1, 0x9e, 0xa4, 0x99, 0x11, 0xce, 0xc4 +}; + +/* wycheproof - misc */ +static const u8 enc_input036[] __initconst = { + 0xb9, 0xc5, 0x54, 0xcb, 0xc3, 0x6a, 0xc1, 0x8a, + 0xe8, 0x97, 0xdf, 0x7b, 0xee, 0xca, 0xc1, 0xdb, + 0xeb, 0x4e, 0xaf, 0xa1, 0x56, 0xbb, 0x60, 0xce, + 0x2e, 0x5d, 0x48, 0xf0, 0x57, 0x15, 0xe6, 0x78 +}; +static const u8 enc_output036[] __initconst = { + 0xea, 0x29, 0xaf, 0xa4, 0x9d, 0x36, 0xe8, 0x76, + 0x0f, 0x5f, 0xe1, 0x97, 0x23, 0xb9, 0x81, 0x1e, + 0xd5, 0xd5, 0x19, 0x93, 0x4a, 0x44, 0x0f, 0x50, + 0x81, 0xac, 0x43, 0x0b, 0x95, 0x3b, 0x0e, 0x21, + 0x22, 0x25, 0x41, 0xaf, 0x46, 0xb8, 0x65, 0x33, + 0xc6, 0xb6, 0x8d, 0x2f, 0xf1, 0x08, 0xa7, 0xea +}; +static const u8 enc_assoc036[] __initconst = { }; +static const u8 enc_nonce036[] __initconst = { + 0x72, 0xcf, 0xd9, 0x0e, 0xf3, 0x02, 0x6c, 0xa2, + 0x2b, 0x7e, 0x6e, 0x6a +}; +static const u8 enc_key036[] __initconst = { + 0xe1, 0x73, 0x1d, 0x58, 0x54, 0xe1, 0xb7, 0x0c, + 0xb3, 0xff, 0xe8, 0xb7, 0x86, 0xa2, 0xb3, 0xeb, + 0xf0, 0x99, 0x43, 0x70, 0x95, 0x47, 0x57, 0xb9, + 0xdc, 0x8c, 0x7b, 0xc5, 0x35, 0x46, 0x34, 0xa3 +}; + +/* wycheproof - misc */ +static const u8 enc_input037[] __initconst = { + 0x6b, 0x26, 0x04, 0x99, 0x6c, 0xd3, 0x0c, 0x14, + 0xa1, 0x3a, 0x52, 0x57, 0xed, 0x6c, 0xff, 0xd3, + 0xbc, 0x5e, 0x29, 0xd6, 0xb9, 0x7e, 0xb1, 0x79, + 0x9e, 0xb3, 0x35, 0xe2, 0x81, 0xea, 0x45, 0x1e +}; +static const u8 enc_output037[] __initconst = { + 0x6d, 0xad, 0x63, 0x78, 0x97, 0x54, 0x4d, 0x8b, + 0xf6, 0xbe, 0x95, 0x07, 0xed, 0x4d, 0x1b, 0xb2, + 0xe9, 0x54, 0xbc, 0x42, 0x7e, 0x5d, 0xe7, 0x29, + 0xda, 0xf5, 0x07, 0x62, 0x84, 0x6f, 0xf2, 0xf4, + 0x7b, 0x99, 0x7d, 0x93, 0xc9, 0x82, 0x18, 0x9d, + 0x70, 0x95, 0xdc, 0x79, 0x4c, 0x74, 0x62, 0x32 +}; +static const u8 enc_assoc037[] __initconst = { + 0x23, 0x33, 0xe5, 0xce, 0x0f, 0x93, 0xb0, 0x59 +}; +static const u8 enc_nonce037[] __initconst = { + 0x26, 0x28, 0x80, 0xd4, 0x75, 0xf3, 0xda, 0xc5, + 0x34, 0x0d, 0xd1, 0xb8 +}; +static const u8 enc_key037[] __initconst = { + 0x27, 0xd8, 0x60, 0x63, 0x1b, 0x04, 0x85, 0xa4, + 0x10, 0x70, 0x2f, 0xea, 0x61, 0xbc, 0x87, 0x3f, + 0x34, 0x42, 0x26, 0x0c, 0xad, 0xed, 0x4a, 0xbd, + 0xe2, 0x5b, 0x78, 0x6a, 0x2d, 0x97, 0xf1, 0x45 +}; + +/* wycheproof - misc */ +static const u8 enc_input038[] __initconst = { + 0x97, 0x3d, 0x0c, 0x75, 0x38, 0x26, 0xba, 0xe4, + 0x66, 0xcf, 0x9a, 0xbb, 0x34, 0x93, 0x15, 0x2e, + 0x9d, 0xe7, 0x81, 0x9e, 0x2b, 0xd0, 0xc7, 0x11, + 0x71, 0x34, 0x6b, 0x4d, 0x2c, 0xeb, 0xf8, 0x04, + 0x1a, 0xa3, 0xce, 0xdc, 0x0d, 0xfd, 0x7b, 0x46, + 0x7e, 0x26, 0x22, 0x8b, 0xc8, 0x6c, 0x9a +}; +static const u8 enc_output038[] __initconst = { + 0xfb, 0xa7, 0x8a, 0xe4, 0xf9, 0xd8, 0x08, 0xa6, + 0x2e, 0x3d, 0xa4, 0x0b, 0xe2, 0xcb, 0x77, 0x00, + 0xc3, 0x61, 0x3d, 0x9e, 0xb2, 0xc5, 0x29, 0xc6, + 0x52, 0xe7, 0x6a, 0x43, 0x2c, 0x65, 0x8d, 0x27, + 0x09, 0x5f, 0x0e, 0xb8, 0xf9, 0x40, 0xc3, 0x24, + 0x98, 0x1e, 0xa9, 0x35, 0xe5, 0x07, 0xf9, 0x8f, + 0x04, 0x69, 0x56, 0xdb, 0x3a, 0x51, 0x29, 0x08, + 0xbd, 0x7a, 0xfc, 0x8f, 0x2a, 0xb0, 0xa9 +}; +static const u8 enc_assoc038[] __initconst = { }; +static const u8 enc_nonce038[] __initconst = { + 0xe7, 0x4a, 0x51, 0x5e, 0x7e, 0x21, 0x02, 0xb9, + 0x0b, 0xef, 0x55, 0xd2 +}; +static const u8 enc_key038[] __initconst = { + 0xcf, 0x0d, 0x40, 0xa4, 0x64, 0x4e, 0x5f, 0x51, + 0x81, 0x51, 0x65, 0xd5, 0x30, 0x1b, 0x22, 0x63, + 0x1f, 0x45, 0x44, 0xc4, 0x9a, 0x18, 0x78, 0xe3, + 0xa0, 0xa5, 0xe8, 0xe1, 0xaa, 0xe0, 0xf2, 0x64 +}; + +/* wycheproof - misc */ +static const u8 enc_input039[] __initconst = { + 0xa9, 0x89, 0x95, 0x50, 0x4d, 0xf1, 0x6f, 0x74, + 0x8b, 0xfb, 0x77, 0x85, 0xff, 0x91, 0xee, 0xb3, + 0xb6, 0x60, 0xea, 0x9e, 0xd3, 0x45, 0x0c, 0x3d, + 0x5e, 0x7b, 0x0e, 0x79, 0xef, 0x65, 0x36, 0x59, + 0xa9, 0x97, 0x8d, 0x75, 0x54, 0x2e, 0xf9, 0x1c, + 0x45, 0x67, 0x62, 0x21, 0x56, 0x40, 0xb9 +}; +static const u8 enc_output039[] __initconst = { + 0xa1, 0xff, 0xed, 0x80, 0x76, 0x18, 0x29, 0xec, + 0xce, 0x24, 0x2e, 0x0e, 0x88, 0xb1, 0x38, 0x04, + 0x90, 0x16, 0xbc, 0xa0, 0x18, 0xda, 0x2b, 0x6e, + 0x19, 0x98, 0x6b, 0x3e, 0x31, 0x8c, 0xae, 0x8d, + 0x80, 0x61, 0x98, 0xfb, 0x4c, 0x52, 0x7c, 0xc3, + 0x93, 0x50, 0xeb, 0xdd, 0xea, 0xc5, 0x73, 0xc4, + 0xcb, 0xf0, 0xbe, 0xfd, 0xa0, 0xb7, 0x02, 0x42, + 0xc6, 0x40, 0xd7, 0xcd, 0x02, 0xd7, 0xa3 +}; +static const u8 enc_assoc039[] __initconst = { + 0xb3, 0xe4, 0x06, 0x46, 0x83, 0xb0, 0x2d, 0x84 +}; +static const u8 enc_nonce039[] __initconst = { + 0xd4, 0xd8, 0x07, 0x34, 0x16, 0x83, 0x82, 0x5b, + 0x31, 0xcd, 0x4d, 0x95 +}; +static const u8 enc_key039[] __initconst = { + 0x6c, 0xbf, 0xd7, 0x1c, 0x64, 0x5d, 0x18, 0x4c, + 0xf5, 0xd2, 0x3c, 0x40, 0x2b, 0xdb, 0x0d, 0x25, + 0xec, 0x54, 0x89, 0x8c, 0x8a, 0x02, 0x73, 0xd4, + 0x2e, 0xb5, 0xbe, 0x10, 0x9f, 0xdc, 0xb2, 0xac +}; + +/* wycheproof - misc */ +static const u8 enc_input040[] __initconst = { + 0xd0, 0x96, 0x80, 0x31, 0x81, 0xbe, 0xef, 0x9e, + 0x00, 0x8f, 0xf8, 0x5d, 0x5d, 0xdc, 0x38, 0xdd, + 0xac, 0xf0, 0xf0, 0x9e, 0xe5, 0xf7, 0xe0, 0x7f, + 0x1e, 0x40, 0x79, 0xcb, 0x64, 0xd0, 0xdc, 0x8f, + 0x5e, 0x67, 0x11, 0xcd, 0x49, 0x21, 0xa7, 0x88, + 0x7d, 0xe7, 0x6e, 0x26, 0x78, 0xfd, 0xc6, 0x76, + 0x18, 0xf1, 0x18, 0x55, 0x86, 0xbf, 0xea, 0x9d, + 0x4c, 0x68, 0x5d, 0x50, 0xe4, 0xbb, 0x9a, 0x82 +}; +static const u8 enc_output040[] __initconst = { + 0x9a, 0x4e, 0xf2, 0x2b, 0x18, 0x16, 0x77, 0xb5, + 0x75, 0x5c, 0x08, 0xf7, 0x47, 0xc0, 0xf8, 0xd8, + 0xe8, 0xd4, 0xc1, 0x8a, 0x9c, 0xc2, 0x40, 0x5c, + 0x12, 0xbb, 0x51, 0xbb, 0x18, 0x72, 0xc8, 0xe8, + 0xb8, 0x77, 0x67, 0x8b, 0xec, 0x44, 0x2c, 0xfc, + 0xbb, 0x0f, 0xf4, 0x64, 0xa6, 0x4b, 0x74, 0x33, + 0x2c, 0xf0, 0x72, 0x89, 0x8c, 0x7e, 0x0e, 0xdd, + 0xf6, 0x23, 0x2e, 0xa6, 0xe2, 0x7e, 0xfe, 0x50, + 0x9f, 0xf3, 0x42, 0x7a, 0x0f, 0x32, 0xfa, 0x56, + 0x6d, 0x9c, 0xa0, 0xa7, 0x8a, 0xef, 0xc0, 0x13 +}; +static const u8 enc_assoc040[] __initconst = { }; +static const u8 enc_nonce040[] __initconst = { + 0xd6, 0x10, 0x40, 0xa3, 0x13, 0xed, 0x49, 0x28, + 0x23, 0xcc, 0x06, 0x5b +}; +static const u8 enc_key040[] __initconst = { + 0x5b, 0x1d, 0x10, 0x35, 0xc0, 0xb1, 0x7e, 0xe0, + 0xb0, 0x44, 0x47, 0x67, 0xf8, 0x0a, 0x25, 0xb8, + 0xc1, 0xb7, 0x41, 0xf4, 0xb5, 0x0a, 0x4d, 0x30, + 0x52, 0x22, 0x6b, 0xaa, 0x1c, 0x6f, 0xb7, 0x01 +}; + +/* wycheproof - misc */ +static const u8 enc_input041[] __initconst = { + 0x94, 0xee, 0x16, 0x6d, 0x6d, 0x6e, 0xcf, 0x88, + 0x32, 0x43, 0x71, 0x36, 0xb4, 0xae, 0x80, 0x5d, + 0x42, 0x88, 0x64, 0x35, 0x95, 0x86, 0xd9, 0x19, + 0x3a, 0x25, 0x01, 0x62, 0x93, 0xed, 0xba, 0x44, + 0x3c, 0x58, 0xe0, 0x7e, 0x7b, 0x71, 0x95, 0xec, + 0x5b, 0xd8, 0x45, 0x82, 0xa9, 0xd5, 0x6c, 0x8d, + 0x4a, 0x10, 0x8c, 0x7d, 0x7c, 0xe3, 0x4e, 0x6c, + 0x6f, 0x8e, 0xa1, 0xbe, 0xc0, 0x56, 0x73, 0x17 +}; +static const u8 enc_output041[] __initconst = { + 0x5f, 0xbb, 0xde, 0xcc, 0x34, 0xbe, 0x20, 0x16, + 0x14, 0xf6, 0x36, 0x03, 0x1e, 0xeb, 0x42, 0xf1, + 0xca, 0xce, 0x3c, 0x79, 0xa1, 0x2c, 0xff, 0xd8, + 0x71, 0xee, 0x8e, 0x73, 0x82, 0x0c, 0x82, 0x97, + 0x49, 0xf1, 0xab, 0xb4, 0x29, 0x43, 0x67, 0x84, + 0x9f, 0xb6, 0xc2, 0xaa, 0x56, 0xbd, 0xa8, 0xa3, + 0x07, 0x8f, 0x72, 0x3d, 0x7c, 0x1c, 0x85, 0x20, + 0x24, 0xb0, 0x17, 0xb5, 0x89, 0x73, 0xfb, 0x1e, + 0x09, 0x26, 0x3d, 0xa7, 0xb4, 0xcb, 0x92, 0x14, + 0x52, 0xf9, 0x7d, 0xca, 0x40, 0xf5, 0x80, 0xec +}; +static const u8 enc_assoc041[] __initconst = { + 0x71, 0x93, 0xf6, 0x23, 0x66, 0x33, 0x21, 0xa2 +}; +static const u8 enc_nonce041[] __initconst = { + 0xd3, 0x1c, 0x21, 0xab, 0xa1, 0x75, 0xb7, 0x0d, + 0xe4, 0xeb, 0xb1, 0x9c +}; +static const u8 enc_key041[] __initconst = { + 0x97, 0xd6, 0x35, 0xc4, 0xf4, 0x75, 0x74, 0xd9, + 0x99, 0x8a, 0x90, 0x87, 0x5d, 0xa1, 0xd3, 0xa2, + 0x84, 0xb7, 0x55, 0xb2, 0xd3, 0x92, 0x97, 0xa5, + 0x72, 0x52, 0x35, 0x19, 0x0e, 0x10, 0xa9, 0x7e +}; + +/* wycheproof - misc */ +static const u8 enc_input042[] __initconst = { + 0xb4, 0x29, 0xeb, 0x80, 0xfb, 0x8f, 0xe8, 0xba, + 0xed, 0xa0, 0xc8, 0x5b, 0x9c, 0x33, 0x34, 0x58, + 0xe7, 0xc2, 0x99, 0x2e, 0x55, 0x84, 0x75, 0x06, + 0x9d, 0x12, 0xd4, 0x5c, 0x22, 0x21, 0x75, 0x64, + 0x12, 0x15, 0x88, 0x03, 0x22, 0x97, 0xef, 0xf5, + 0x67, 0x83, 0x74, 0x2a, 0x5f, 0xc2, 0x2d, 0x74, + 0x10, 0xff, 0xb2, 0x9d, 0x66, 0x09, 0x86, 0x61, + 0xd7, 0x6f, 0x12, 0x6c, 0x3c, 0x27, 0x68, 0x9e, + 0x43, 0xb3, 0x72, 0x67, 0xca, 0xc5, 0xa3, 0xa6, + 0xd3, 0xab, 0x49, 0xe3, 0x91, 0xda, 0x29, 0xcd, + 0x30, 0x54, 0xa5, 0x69, 0x2e, 0x28, 0x07, 0xe4, + 0xc3, 0xea, 0x46, 0xc8, 0x76, 0x1d, 0x50, 0xf5, + 0x92 +}; +static const u8 enc_output042[] __initconst = { + 0xd0, 0x10, 0x2f, 0x6c, 0x25, 0x8b, 0xf4, 0x97, + 0x42, 0xce, 0xc3, 0x4c, 0xf2, 0xd0, 0xfe, 0xdf, + 0x23, 0xd1, 0x05, 0xfb, 0x4c, 0x84, 0xcf, 0x98, + 0x51, 0x5e, 0x1b, 0xc9, 0xa6, 0x4f, 0x8a, 0xd5, + 0xbe, 0x8f, 0x07, 0x21, 0xbd, 0xe5, 0x06, 0x45, + 0xd0, 0x00, 0x83, 0xc3, 0xa2, 0x63, 0xa3, 0x10, + 0x53, 0xb7, 0x60, 0x24, 0x5f, 0x52, 0xae, 0x28, + 0x66, 0xa5, 0xec, 0x83, 0xb1, 0x9f, 0x61, 0xbe, + 0x1d, 0x30, 0xd5, 0xc5, 0xd9, 0xfe, 0xcc, 0x4c, + 0xbb, 0xe0, 0x8f, 0xd3, 0x85, 0x81, 0x3a, 0x2a, + 0xa3, 0x9a, 0x00, 0xff, 0x9c, 0x10, 0xf7, 0xf2, + 0x37, 0x02, 0xad, 0xd1, 0xe4, 0xb2, 0xff, 0xa3, + 0x1c, 0x41, 0x86, 0x5f, 0xc7, 0x1d, 0xe1, 0x2b, + 0x19, 0x61, 0x21, 0x27, 0xce, 0x49, 0x99, 0x3b, + 0xb0 +}; +static const u8 enc_assoc042[] __initconst = { }; +static const u8 enc_nonce042[] __initconst = { + 0x17, 0xc8, 0x6a, 0x8a, 0xbb, 0xb7, 0xe0, 0x03, + 0xac, 0xde, 0x27, 0x99 +}; +static const u8 enc_key042[] __initconst = { + 0xfe, 0x6e, 0x55, 0xbd, 0xae, 0xd1, 0xf7, 0x28, + 0x4c, 0xa5, 0xfc, 0x0f, 0x8c, 0x5f, 0x2b, 0x8d, + 0xf5, 0x6d, 0xc0, 0xf4, 0x9e, 0x8c, 0xa6, 0x6a, + 0x41, 0x99, 0x5e, 0x78, 0x33, 0x51, 0xf9, 0x01 +}; + +/* wycheproof - misc */ +static const u8 enc_input043[] __initconst = { + 0xce, 0xb5, 0x34, 0xce, 0x50, 0xdc, 0x23, 0xff, + 0x63, 0x8a, 0xce, 0x3e, 0xf6, 0x3a, 0xb2, 0xcc, + 0x29, 0x73, 0xee, 0xad, 0xa8, 0x07, 0x85, 0xfc, + 0x16, 0x5d, 0x06, 0xc2, 0xf5, 0x10, 0x0f, 0xf5, + 0xe8, 0xab, 0x28, 0x82, 0xc4, 0x75, 0xaf, 0xcd, + 0x05, 0xcc, 0xd4, 0x9f, 0x2e, 0x7d, 0x8f, 0x55, + 0xef, 0x3a, 0x72, 0xe3, 0xdc, 0x51, 0xd6, 0x85, + 0x2b, 0x8e, 0x6b, 0x9e, 0x7a, 0xec, 0xe5, 0x7b, + 0xe6, 0x55, 0x6b, 0x0b, 0x6d, 0x94, 0x13, 0xe3, + 0x3f, 0xc5, 0xfc, 0x24, 0xa9, 0xa2, 0x05, 0xad, + 0x59, 0x57, 0x4b, 0xb3, 0x9d, 0x94, 0x4a, 0x92, + 0xdc, 0x47, 0x97, 0x0d, 0x84, 0xa6, 0xad, 0x31, + 0x76 +}; +static const u8 enc_output043[] __initconst = { + 0x75, 0x45, 0x39, 0x1b, 0x51, 0xde, 0x01, 0xd5, + 0xc5, 0x3d, 0xfa, 0xca, 0x77, 0x79, 0x09, 0x06, + 0x3e, 0x58, 0xed, 0xee, 0x4b, 0xb1, 0x22, 0x7e, + 0x71, 0x10, 0xac, 0x4d, 0x26, 0x20, 0xc2, 0xae, + 0xc2, 0xf8, 0x48, 0xf5, 0x6d, 0xee, 0xb0, 0x37, + 0xa8, 0xdc, 0xed, 0x75, 0xaf, 0xa8, 0xa6, 0xc8, + 0x90, 0xe2, 0xde, 0xe4, 0x2f, 0x95, 0x0b, 0xb3, + 0x3d, 0x9e, 0x24, 0x24, 0xd0, 0x8a, 0x50, 0x5d, + 0x89, 0x95, 0x63, 0x97, 0x3e, 0xd3, 0x88, 0x70, + 0xf3, 0xde, 0x6e, 0xe2, 0xad, 0xc7, 0xfe, 0x07, + 0x2c, 0x36, 0x6c, 0x14, 0xe2, 0xcf, 0x7c, 0xa6, + 0x2f, 0xb3, 0xd3, 0x6b, 0xee, 0x11, 0x68, 0x54, + 0x61, 0xb7, 0x0d, 0x44, 0xef, 0x8c, 0x66, 0xc5, + 0xc7, 0xbb, 0xf1, 0x0d, 0xca, 0xdd, 0x7f, 0xac, + 0xf6 +}; +static const u8 enc_assoc043[] __initconst = { + 0xa1, 0x1c, 0x40, 0xb6, 0x03, 0x76, 0x73, 0x30 +}; +static const u8 enc_nonce043[] __initconst = { + 0x46, 0x36, 0x2f, 0x45, 0xd6, 0x37, 0x9e, 0x63, + 0xe5, 0x22, 0x94, 0x60 +}; +static const u8 enc_key043[] __initconst = { + 0xaa, 0xbc, 0x06, 0x34, 0x74, 0xe6, 0x5c, 0x4c, + 0x3e, 0x9b, 0xdc, 0x48, 0x0d, 0xea, 0x97, 0xb4, + 0x51, 0x10, 0xc8, 0x61, 0x88, 0x46, 0xff, 0x6b, + 0x15, 0xbd, 0xd2, 0xa4, 0xa5, 0x68, 0x2c, 0x4e +}; + +/* wycheproof - misc */ +static const u8 enc_input044[] __initconst = { + 0xe5, 0xcc, 0xaa, 0x44, 0x1b, 0xc8, 0x14, 0x68, + 0x8f, 0x8f, 0x6e, 0x8f, 0x28, 0xb5, 0x00, 0xb2 +}; +static const u8 enc_output044[] __initconst = { + 0x7e, 0x72, 0xf5, 0xa1, 0x85, 0xaf, 0x16, 0xa6, + 0x11, 0x92, 0x1b, 0x43, 0x8f, 0x74, 0x9f, 0x0b, + 0x12, 0x42, 0xc6, 0x70, 0x73, 0x23, 0x34, 0x02, + 0x9a, 0xdf, 0xe1, 0xc5, 0x00, 0x16, 0x51, 0xe4 +}; +static const u8 enc_assoc044[] __initconst = { + 0x02 +}; +static const u8 enc_nonce044[] __initconst = { + 0x87, 0x34, 0x5f, 0x10, 0x55, 0xfd, 0x9e, 0x21, + 0x02, 0xd5, 0x06, 0x56 +}; +static const u8 enc_key044[] __initconst = { + 0x7d, 0x00, 0xb4, 0x80, 0x95, 0xad, 0xfa, 0x32, + 0x72, 0x05, 0x06, 0x07, 0xb2, 0x64, 0x18, 0x50, + 0x02, 0xba, 0x99, 0x95, 0x7c, 0x49, 0x8b, 0xe0, + 0x22, 0x77, 0x0f, 0x2c, 0xe2, 0xf3, 0x14, 0x3c +}; + +/* wycheproof - misc */ +static const u8 enc_input045[] __initconst = { + 0x02, 0xcd, 0xe1, 0x68, 0xfb, 0xa3, 0xf5, 0x44, + 0xbb, 0xd0, 0x33, 0x2f, 0x7a, 0xde, 0xad, 0xa8 +}; +static const u8 enc_output045[] __initconst = { + 0x85, 0xf2, 0x9a, 0x71, 0x95, 0x57, 0xcd, 0xd1, + 0x4d, 0x1f, 0x8f, 0xff, 0xab, 0x6d, 0x9e, 0x60, + 0x73, 0x2c, 0xa3, 0x2b, 0xec, 0xd5, 0x15, 0xa1, + 0xed, 0x35, 0x3f, 0x54, 0x2e, 0x99, 0x98, 0x58 +}; +static const u8 enc_assoc045[] __initconst = { + 0xb6, 0x48 +}; +static const u8 enc_nonce045[] __initconst = { + 0x87, 0xa3, 0x16, 0x3e, 0xc0, 0x59, 0x8a, 0xd9, + 0x5b, 0x3a, 0xa7, 0x13 +}; +static const u8 enc_key045[] __initconst = { + 0x64, 0x32, 0x71, 0x7f, 0x1d, 0xb8, 0x5e, 0x41, + 0xac, 0x78, 0x36, 0xbc, 0xe2, 0x51, 0x85, 0xa0, + 0x80, 0xd5, 0x76, 0x2b, 0x9e, 0x2b, 0x18, 0x44, + 0x4b, 0x6e, 0xc7, 0x2c, 0x3b, 0xd8, 0xe4, 0xdc +}; + +/* wycheproof - misc */ +static const u8 enc_input046[] __initconst = { + 0x16, 0xdd, 0xd2, 0x3f, 0xf5, 0x3f, 0x3d, 0x23, + 0xc0, 0x63, 0x34, 0x48, 0x70, 0x40, 0xeb, 0x47 +}; +static const u8 enc_output046[] __initconst = { + 0xc1, 0xb2, 0x95, 0x93, 0x6d, 0x56, 0xfa, 0xda, + 0xc0, 0x3e, 0x5f, 0x74, 0x2b, 0xff, 0x73, 0xa1, + 0x39, 0xc4, 0x57, 0xdb, 0xab, 0x66, 0x38, 0x2b, + 0xab, 0xb3, 0xb5, 0x58, 0x00, 0xcd, 0xa5, 0xb8 +}; +static const u8 enc_assoc046[] __initconst = { + 0xbd, 0x4c, 0xd0, 0x2f, 0xc7, 0x50, 0x2b, 0xbd, + 0xbd, 0xf6, 0xc9, 0xa3, 0xcb, 0xe8, 0xf0 +}; +static const u8 enc_nonce046[] __initconst = { + 0x6f, 0x57, 0x3a, 0xa8, 0x6b, 0xaa, 0x49, 0x2b, + 0xa4, 0x65, 0x96, 0xdf +}; +static const u8 enc_key046[] __initconst = { + 0x8e, 0x34, 0xcf, 0x73, 0xd2, 0x45, 0xa1, 0x08, + 0x2a, 0x92, 0x0b, 0x86, 0x36, 0x4e, 0xb8, 0x96, + 0xc4, 0x94, 0x64, 0x67, 0xbc, 0xb3, 0xd5, 0x89, + 0x29, 0xfc, 0xb3, 0x66, 0x90, 0xe6, 0x39, 0x4f +}; + +/* wycheproof - misc */ +static const u8 enc_input047[] __initconst = { + 0x62, 0x3b, 0x78, 0x50, 0xc3, 0x21, 0xe2, 0xcf, + 0x0c, 0x6f, 0xbc, 0xc8, 0xdf, 0xd1, 0xaf, 0xf2 +}; +static const u8 enc_output047[] __initconst = { + 0xc8, 0x4c, 0x9b, 0xb7, 0xc6, 0x1c, 0x1b, 0xcb, + 0x17, 0x77, 0x2a, 0x1c, 0x50, 0x0c, 0x50, 0x95, + 0xdb, 0xad, 0xf7, 0xa5, 0x13, 0x8c, 0xa0, 0x34, + 0x59, 0xa2, 0xcd, 0x65, 0x83, 0x1e, 0x09, 0x2f +}; +static const u8 enc_assoc047[] __initconst = { + 0x89, 0xcc, 0xe9, 0xfb, 0x47, 0x44, 0x1d, 0x07, + 0xe0, 0x24, 0x5a, 0x66, 0xfe, 0x8b, 0x77, 0x8b +}; +static const u8 enc_nonce047[] __initconst = { + 0x1a, 0x65, 0x18, 0xf0, 0x2e, 0xde, 0x1d, 0xa6, + 0x80, 0x92, 0x66, 0xd9 +}; +static const u8 enc_key047[] __initconst = { + 0xcb, 0x55, 0x75, 0xf5, 0xc7, 0xc4, 0x5c, 0x91, + 0xcf, 0x32, 0x0b, 0x13, 0x9f, 0xb5, 0x94, 0x23, + 0x75, 0x60, 0xd0, 0xa3, 0xe6, 0xf8, 0x65, 0xa6, + 0x7d, 0x4f, 0x63, 0x3f, 0x2c, 0x08, 0xf0, 0x16 +}; + +/* wycheproof - misc */ +static const u8 enc_input048[] __initconst = { + 0x87, 0xb3, 0xa4, 0xd7, 0xb2, 0x6d, 0x8d, 0x32, + 0x03, 0xa0, 0xde, 0x1d, 0x64, 0xef, 0x82, 0xe3 +}; +static const u8 enc_output048[] __initconst = { + 0x94, 0xbc, 0x80, 0x62, 0x1e, 0xd1, 0xe7, 0x1b, + 0x1f, 0xd2, 0xb5, 0xc3, 0xa1, 0x5e, 0x35, 0x68, + 0x33, 0x35, 0x11, 0x86, 0x17, 0x96, 0x97, 0x84, + 0x01, 0x59, 0x8b, 0x96, 0x37, 0x22, 0xf5, 0xb3 +}; +static const u8 enc_assoc048[] __initconst = { + 0xd1, 0x9f, 0x2d, 0x98, 0x90, 0x95, 0xf7, 0xab, + 0x03, 0xa5, 0xfd, 0xe8, 0x44, 0x16, 0xe0, 0x0c, + 0x0e +}; +static const u8 enc_nonce048[] __initconst = { + 0x56, 0x4d, 0xee, 0x49, 0xab, 0x00, 0xd2, 0x40, + 0xfc, 0x10, 0x68, 0xc3 +}; +static const u8 enc_key048[] __initconst = { + 0xa5, 0x56, 0x9e, 0x72, 0x9a, 0x69, 0xb2, 0x4b, + 0xa6, 0xe0, 0xff, 0x15, 0xc4, 0x62, 0x78, 0x97, + 0x43, 0x68, 0x24, 0xc9, 0x41, 0xe9, 0xd0, 0x0b, + 0x2e, 0x93, 0xfd, 0xdc, 0x4b, 0xa7, 0x76, 0x57 +}; + +/* wycheproof - misc */ +static const u8 enc_input049[] __initconst = { + 0xe6, 0x01, 0xb3, 0x85, 0x57, 0x79, 0x7d, 0xa2, + 0xf8, 0xa4, 0x10, 0x6a, 0x08, 0x9d, 0x1d, 0xa6 +}; +static const u8 enc_output049[] __initconst = { + 0x29, 0x9b, 0x5d, 0x3f, 0x3d, 0x03, 0xc0, 0x87, + 0x20, 0x9a, 0x16, 0xe2, 0x85, 0x14, 0x31, 0x11, + 0x4b, 0x45, 0x4e, 0xd1, 0x98, 0xde, 0x11, 0x7e, + 0x83, 0xec, 0x49, 0xfa, 0x8d, 0x85, 0x08, 0xd6 +}; +static const u8 enc_assoc049[] __initconst = { + 0x5e, 0x64, 0x70, 0xfa, 0xcd, 0x99, 0xc1, 0xd8, + 0x1e, 0x37, 0xcd, 0x44, 0x01, 0x5f, 0xe1, 0x94, + 0x80, 0xa2, 0xa4, 0xd3, 0x35, 0x2a, 0x4f, 0xf5, + 0x60, 0xc0, 0x64, 0x0f, 0xdb, 0xda +}; +static const u8 enc_nonce049[] __initconst = { + 0xdf, 0x87, 0x13, 0xe8, 0x7e, 0xc3, 0xdb, 0xcf, + 0xad, 0x14, 0xd5, 0x3e +}; +static const u8 enc_key049[] __initconst = { + 0x56, 0x20, 0x74, 0x65, 0xb4, 0xe4, 0x8e, 0x6d, + 0x04, 0x63, 0x0f, 0x4a, 0x42, 0xf3, 0x5c, 0xfc, + 0x16, 0x3a, 0xb2, 0x89, 0xc2, 0x2a, 0x2b, 0x47, + 0x84, 0xf6, 0xf9, 0x29, 0x03, 0x30, 0xbe, 0xe0 +}; + +/* wycheproof - misc */ +static const u8 enc_input050[] __initconst = { + 0xdc, 0x9e, 0x9e, 0xaf, 0x11, 0xe3, 0x14, 0x18, + 0x2d, 0xf6, 0xa4, 0xeb, 0xa1, 0x7a, 0xec, 0x9c +}; +static const u8 enc_output050[] __initconst = { + 0x60, 0x5b, 0xbf, 0x90, 0xae, 0xb9, 0x74, 0xf6, + 0x60, 0x2b, 0xc7, 0x78, 0x05, 0x6f, 0x0d, 0xca, + 0x38, 0xea, 0x23, 0xd9, 0x90, 0x54, 0xb4, 0x6b, + 0x42, 0xff, 0xe0, 0x04, 0x12, 0x9d, 0x22, 0x04 +}; +static const u8 enc_assoc050[] __initconst = { + 0xba, 0x44, 0x6f, 0x6f, 0x9a, 0x0c, 0xed, 0x22, + 0x45, 0x0f, 0xeb, 0x10, 0x73, 0x7d, 0x90, 0x07, + 0xfd, 0x69, 0xab, 0xc1, 0x9b, 0x1d, 0x4d, 0x90, + 0x49, 0xa5, 0x55, 0x1e, 0x86, 0xec, 0x2b, 0x37 +}; +static const u8 enc_nonce050[] __initconst = { + 0x8d, 0xf4, 0xb1, 0x5a, 0x88, 0x8c, 0x33, 0x28, + 0x6a, 0x7b, 0x76, 0x51 +}; +static const u8 enc_key050[] __initconst = { + 0x39, 0x37, 0x98, 0x6a, 0xf8, 0x6d, 0xaf, 0xc1, + 0xba, 0x0c, 0x46, 0x72, 0xd8, 0xab, 0xc4, 0x6c, + 0x20, 0x70, 0x62, 0x68, 0x2d, 0x9c, 0x26, 0x4a, + 0xb0, 0x6d, 0x6c, 0x58, 0x07, 0x20, 0x51, 0x30 +}; + +/* wycheproof - misc */ +static const u8 enc_input051[] __initconst = { + 0x81, 0xce, 0x84, 0xed, 0xe9, 0xb3, 0x58, 0x59, + 0xcc, 0x8c, 0x49, 0xa8, 0xf6, 0xbe, 0x7d, 0xc6 +}; +static const u8 enc_output051[] __initconst = { + 0x7b, 0x7c, 0xe0, 0xd8, 0x24, 0x80, 0x9a, 0x70, + 0xde, 0x32, 0x56, 0x2c, 0xcf, 0x2c, 0x2b, 0xbd, + 0x15, 0xd4, 0x4a, 0x00, 0xce, 0x0d, 0x19, 0xb4, + 0x23, 0x1f, 0x92, 0x1e, 0x22, 0xbc, 0x0a, 0x43 +}; +static const u8 enc_assoc051[] __initconst = { + 0xd4, 0x1a, 0x82, 0x8d, 0x5e, 0x71, 0x82, 0x92, + 0x47, 0x02, 0x19, 0x05, 0x40, 0x2e, 0xa2, 0x57, + 0xdc, 0xcb, 0xc3, 0xb8, 0x0f, 0xcd, 0x56, 0x75, + 0x05, 0x6b, 0x68, 0xbb, 0x59, 0xe6, 0x2e, 0x88, + 0x73 +}; +static const u8 enc_nonce051[] __initconst = { + 0xbe, 0x40, 0xe5, 0xf1, 0xa1, 0x18, 0x17, 0xa0, + 0xa8, 0xfa, 0x89, 0x49 +}; +static const u8 enc_key051[] __initconst = { + 0x36, 0x37, 0x2a, 0xbc, 0xdb, 0x78, 0xe0, 0x27, + 0x96, 0x46, 0xac, 0x3d, 0x17, 0x6b, 0x96, 0x74, + 0xe9, 0x15, 0x4e, 0xec, 0xf0, 0xd5, 0x46, 0x9c, + 0x65, 0x1e, 0xc7, 0xe1, 0x6b, 0x4c, 0x11, 0x99 +}; + +/* wycheproof - misc */ +static const u8 enc_input052[] __initconst = { + 0xa6, 0x67, 0x47, 0xc8, 0x9e, 0x85, 0x7a, 0xf3, + 0xa1, 0x8e, 0x2c, 0x79, 0x50, 0x00, 0x87, 0xed +}; +static const u8 enc_output052[] __initconst = { + 0xca, 0x82, 0xbf, 0xf3, 0xe2, 0xf3, 0x10, 0xcc, + 0xc9, 0x76, 0x67, 0x2c, 0x44, 0x15, 0xe6, 0x9b, + 0x57, 0x63, 0x8c, 0x62, 0xa5, 0xd8, 0x5d, 0xed, + 0x77, 0x4f, 0x91, 0x3c, 0x81, 0x3e, 0xa0, 0x32 +}; +static const u8 enc_assoc052[] __initconst = { + 0x3f, 0x2d, 0xd4, 0x9b, 0xbf, 0x09, 0xd6, 0x9a, + 0x78, 0xa3, 0xd8, 0x0e, 0xa2, 0x56, 0x66, 0x14, + 0xfc, 0x37, 0x94, 0x74, 0x19, 0x6c, 0x1a, 0xae, + 0x84, 0x58, 0x3d, 0xa7, 0x3d, 0x7f, 0xf8, 0x5c, + 0x6f, 0x42, 0xca, 0x42, 0x05, 0x6a, 0x97, 0x92, + 0xcc, 0x1b, 0x9f, 0xb3, 0xc7, 0xd2, 0x61 +}; +static const u8 enc_nonce052[] __initconst = { + 0x84, 0xc8, 0x7d, 0xae, 0x4e, 0xee, 0x27, 0x73, + 0x0e, 0xc3, 0x5d, 0x12 +}; +static const u8 enc_key052[] __initconst = { + 0x9f, 0x14, 0x79, 0xed, 0x09, 0x7d, 0x7f, 0xe5, + 0x29, 0xc1, 0x1f, 0x2f, 0x5a, 0xdd, 0x9a, 0xaf, + 0xf4, 0xa1, 0xca, 0x0b, 0x68, 0x99, 0x7a, 0x2c, + 0xb7, 0xf7, 0x97, 0x49, 0xbd, 0x90, 0xaa, 0xf4 +}; + +/* wycheproof - misc */ +static const u8 enc_input053[] __initconst = { + 0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83, + 0x55, 0xd3, 0x04, 0x84, 0x64, 0x43, 0xfe, 0xe8, + 0xdf, 0x99, 0x47, 0x03, 0x03, 0xfb, 0x3b, 0x7b, + 0x80, 0xe0, 0x30, 0xbe, 0xeb, 0xd3, 0x29, 0xbe +}; +static const u8 enc_output053[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xe6, 0xd3, 0xd7, 0x32, 0x4a, 0x1c, 0xbb, 0xa7, + 0x77, 0xbb, 0xb0, 0xec, 0xdd, 0xa3, 0x78, 0x07 +}; +static const u8 enc_assoc053[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; +static const u8 enc_nonce053[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 +}; +static const u8 enc_key053[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - misc */ +static const u8 enc_input054[] __initconst = { + 0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83, + 0x55, 0xd3, 0x04, 0x84, 0x64, 0x43, 0xfe, 0xe8, + 0xdf, 0x99, 0x47, 0x03, 0x03, 0xfb, 0x3b, 0x7b, + 0x80, 0xe0, 0x30, 0xbe, 0xeb, 0xd3, 0x29, 0xbe, + 0xe3, 0xbc, 0xdb, 0x5b, 0x1e, 0xde, 0xfc, 0xfe, + 0x8b, 0xcd, 0xa1, 0xb6, 0xa1, 0x5c, 0x8c, 0x2b, + 0x08, 0x69, 0xff, 0xd2, 0xec, 0x5e, 0x26, 0xe5, + 0x53, 0xb7, 0xb2, 0x27, 0xfe, 0x87, 0xfd, 0xbd +}; +static const u8 enc_output054[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x06, 0x2d, 0xe6, 0x79, 0x5f, 0x27, 0x4f, 0xd2, + 0xa3, 0x05, 0xd7, 0x69, 0x80, 0xbc, 0x9c, 0xce +}; +static const u8 enc_assoc054[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; +static const u8 enc_nonce054[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 +}; +static const u8 enc_key054[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - misc */ +static const u8 enc_input055[] __initconst = { + 0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83, + 0x55, 0xd3, 0x04, 0x84, 0x64, 0x43, 0xfe, 0xe8, + 0xdf, 0x99, 0x47, 0x03, 0x03, 0xfb, 0x3b, 0x7b, + 0x80, 0xe0, 0x30, 0xbe, 0xeb, 0xd3, 0x29, 0xbe, + 0xe3, 0xbc, 0xdb, 0x5b, 0x1e, 0xde, 0xfc, 0xfe, + 0x8b, 0xcd, 0xa1, 0xb6, 0xa1, 0x5c, 0x8c, 0x2b, + 0x08, 0x69, 0xff, 0xd2, 0xec, 0x5e, 0x26, 0xe5, + 0x53, 0xb7, 0xb2, 0x27, 0xfe, 0x87, 0xfd, 0xbd, + 0x7a, 0xda, 0x44, 0x42, 0x42, 0x69, 0xbf, 0xfa, + 0x55, 0x27, 0xf2, 0x70, 0xac, 0xf6, 0x85, 0x02, + 0xb7, 0x4c, 0x5a, 0xe2, 0xe6, 0x0c, 0x05, 0x80, + 0x98, 0x1a, 0x49, 0x38, 0x45, 0x93, 0x92, 0xc4, + 0x9b, 0xb2, 0xf2, 0x84, 0xb6, 0x46, 0xef, 0xc7, + 0xf3, 0xf0, 0xb1, 0x36, 0x1d, 0xc3, 0x48, 0xed, + 0x77, 0xd3, 0x0b, 0xc5, 0x76, 0x92, 0xed, 0x38, + 0xfb, 0xac, 0x01, 0x88, 0x38, 0x04, 0x88, 0xc7 +}; +static const u8 enc_output055[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xd8, 0xb4, 0x79, 0x02, 0xba, 0xae, 0xaf, 0xb3, + 0x42, 0x03, 0x05, 0x15, 0x29, 0xaf, 0x28, 0x2e +}; +static const u8 enc_assoc055[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; +static const u8 enc_nonce055[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 +}; +static const u8 enc_key055[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - misc */ +static const u8 enc_input056[] __initconst = { + 0xda, 0x92, 0xbf, 0x77, 0x7f, 0x6b, 0xe8, 0x7c, + 0xaa, 0x2c, 0xfb, 0x7b, 0x9b, 0xbc, 0x01, 0x17, + 0x20, 0x66, 0xb8, 0xfc, 0xfc, 0x04, 0xc4, 0x84, + 0x7f, 0x1f, 0xcf, 0x41, 0x14, 0x2c, 0xd6, 0x41 +}; +static const u8 enc_output056[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xb3, 0x89, 0x1c, 0x84, 0x9c, 0xb5, 0x2c, 0x27, + 0x74, 0x7e, 0xdf, 0xcf, 0x31, 0x21, 0x3b, 0xb6 +}; +static const u8 enc_assoc056[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce056[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 +}; +static const u8 enc_key056[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - misc */ +static const u8 enc_input057[] __initconst = { + 0xda, 0x92, 0xbf, 0x77, 0x7f, 0x6b, 0xe8, 0x7c, + 0xaa, 0x2c, 0xfb, 0x7b, 0x9b, 0xbc, 0x01, 0x17, + 0x20, 0x66, 0xb8, 0xfc, 0xfc, 0x04, 0xc4, 0x84, + 0x7f, 0x1f, 0xcf, 0x41, 0x14, 0x2c, 0xd6, 0x41, + 0x1c, 0x43, 0x24, 0xa4, 0xe1, 0x21, 0x03, 0x01, + 0x74, 0x32, 0x5e, 0x49, 0x5e, 0xa3, 0x73, 0xd4, + 0xf7, 0x96, 0x00, 0x2d, 0x13, 0xa1, 0xd9, 0x1a, + 0xac, 0x48, 0x4d, 0xd8, 0x01, 0x78, 0x02, 0x42 +}; +static const u8 enc_output057[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xf0, 0xc1, 0x2d, 0x26, 0xef, 0x03, 0x02, 0x9b, + 0x62, 0xc0, 0x08, 0xda, 0x27, 0xc5, 0xdc, 0x68 +}; +static const u8 enc_assoc057[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce057[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 +}; +static const u8 enc_key057[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - misc */ +static const u8 enc_input058[] __initconst = { + 0xda, 0x92, 0xbf, 0x77, 0x7f, 0x6b, 0xe8, 0x7c, + 0xaa, 0x2c, 0xfb, 0x7b, 0x9b, 0xbc, 0x01, 0x17, + 0x20, 0x66, 0xb8, 0xfc, 0xfc, 0x04, 0xc4, 0x84, + 0x7f, 0x1f, 0xcf, 0x41, 0x14, 0x2c, 0xd6, 0x41, + 0x1c, 0x43, 0x24, 0xa4, 0xe1, 0x21, 0x03, 0x01, + 0x74, 0x32, 0x5e, 0x49, 0x5e, 0xa3, 0x73, 0xd4, + 0xf7, 0x96, 0x00, 0x2d, 0x13, 0xa1, 0xd9, 0x1a, + 0xac, 0x48, 0x4d, 0xd8, 0x01, 0x78, 0x02, 0x42, + 0x85, 0x25, 0xbb, 0xbd, 0xbd, 0x96, 0x40, 0x05, + 0xaa, 0xd8, 0x0d, 0x8f, 0x53, 0x09, 0x7a, 0xfd, + 0x48, 0xb3, 0xa5, 0x1d, 0x19, 0xf3, 0xfa, 0x7f, + 0x67, 0xe5, 0xb6, 0xc7, 0xba, 0x6c, 0x6d, 0x3b, + 0x64, 0x4d, 0x0d, 0x7b, 0x49, 0xb9, 0x10, 0x38, + 0x0c, 0x0f, 0x4e, 0xc9, 0xe2, 0x3c, 0xb7, 0x12, + 0x88, 0x2c, 0xf4, 0x3a, 0x89, 0x6d, 0x12, 0xc7, + 0x04, 0x53, 0xfe, 0x77, 0xc7, 0xfb, 0x77, 0x38 +}; +static const u8 enc_output058[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xee, 0x65, 0x78, 0x30, 0x01, 0xc2, 0x56, 0x91, + 0xfa, 0x28, 0xd0, 0xf5, 0xf1, 0xc1, 0xd7, 0x62 +}; +static const u8 enc_assoc058[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce058[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 +}; +static const u8 enc_key058[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - misc */ +static const u8 enc_input059[] __initconst = { + 0x25, 0x6d, 0x40, 0x08, 0x80, 0x94, 0x17, 0x03, + 0x55, 0xd3, 0x04, 0x04, 0x64, 0x43, 0xfe, 0x68, + 0xdf, 0x99, 0x47, 0x83, 0x03, 0xfb, 0x3b, 0xfb, + 0x80, 0xe0, 0x30, 0x3e, 0xeb, 0xd3, 0x29, 0x3e +}; +static const u8 enc_output059[] __initconst = { + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x79, 0xba, 0x7a, 0x29, 0xf5, 0xa7, 0xbb, 0x75, + 0x79, 0x7a, 0xf8, 0x7a, 0x61, 0x01, 0x29, 0xa4 +}; +static const u8 enc_assoc059[] __initconst = { + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 +}; +static const u8 enc_nonce059[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 +}; +static const u8 enc_key059[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - misc */ +static const u8 enc_input060[] __initconst = { + 0x25, 0x6d, 0x40, 0x08, 0x80, 0x94, 0x17, 0x03, + 0x55, 0xd3, 0x04, 0x04, 0x64, 0x43, 0xfe, 0x68, + 0xdf, 0x99, 0x47, 0x83, 0x03, 0xfb, 0x3b, 0xfb, + 0x80, 0xe0, 0x30, 0x3e, 0xeb, 0xd3, 0x29, 0x3e, + 0xe3, 0xbc, 0xdb, 0xdb, 0x1e, 0xde, 0xfc, 0x7e, + 0x8b, 0xcd, 0xa1, 0x36, 0xa1, 0x5c, 0x8c, 0xab, + 0x08, 0x69, 0xff, 0x52, 0xec, 0x5e, 0x26, 0x65, + 0x53, 0xb7, 0xb2, 0xa7, 0xfe, 0x87, 0xfd, 0x3d +}; +static const u8 enc_output060[] __initconst = { + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x36, 0xb1, 0x74, 0x38, 0x19, 0xe1, 0xb9, 0xba, + 0x15, 0x51, 0xe8, 0xed, 0x92, 0x2a, 0x95, 0x9a +}; +static const u8 enc_assoc060[] __initconst = { + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 +}; +static const u8 enc_nonce060[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 +}; +static const u8 enc_key060[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - misc */ +static const u8 enc_input061[] __initconst = { + 0x25, 0x6d, 0x40, 0x08, 0x80, 0x94, 0x17, 0x03, + 0x55, 0xd3, 0x04, 0x04, 0x64, 0x43, 0xfe, 0x68, + 0xdf, 0x99, 0x47, 0x83, 0x03, 0xfb, 0x3b, 0xfb, + 0x80, 0xe0, 0x30, 0x3e, 0xeb, 0xd3, 0x29, 0x3e, + 0xe3, 0xbc, 0xdb, 0xdb, 0x1e, 0xde, 0xfc, 0x7e, + 0x8b, 0xcd, 0xa1, 0x36, 0xa1, 0x5c, 0x8c, 0xab, + 0x08, 0x69, 0xff, 0x52, 0xec, 0x5e, 0x26, 0x65, + 0x53, 0xb7, 0xb2, 0xa7, 0xfe, 0x87, 0xfd, 0x3d, + 0x7a, 0xda, 0x44, 0xc2, 0x42, 0x69, 0xbf, 0x7a, + 0x55, 0x27, 0xf2, 0xf0, 0xac, 0xf6, 0x85, 0x82, + 0xb7, 0x4c, 0x5a, 0x62, 0xe6, 0x0c, 0x05, 0x00, + 0x98, 0x1a, 0x49, 0xb8, 0x45, 0x93, 0x92, 0x44, + 0x9b, 0xb2, 0xf2, 0x04, 0xb6, 0x46, 0xef, 0x47, + 0xf3, 0xf0, 0xb1, 0xb6, 0x1d, 0xc3, 0x48, 0x6d, + 0x77, 0xd3, 0x0b, 0x45, 0x76, 0x92, 0xed, 0xb8, + 0xfb, 0xac, 0x01, 0x08, 0x38, 0x04, 0x88, 0x47 +}; +static const u8 enc_output061[] __initconst = { + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0xfe, 0xac, 0x49, 0x55, 0x55, 0x4e, 0x80, 0x6f, + 0x3a, 0x19, 0x02, 0xe2, 0x44, 0x32, 0xc0, 0x8a +}; +static const u8 enc_assoc061[] __initconst = { + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 +}; +static const u8 enc_nonce061[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 +}; +static const u8 enc_key061[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - misc */ +static const u8 enc_input062[] __initconst = { + 0xda, 0x92, 0xbf, 0xf7, 0x7f, 0x6b, 0xe8, 0xfc, + 0xaa, 0x2c, 0xfb, 0xfb, 0x9b, 0xbc, 0x01, 0x97, + 0x20, 0x66, 0xb8, 0x7c, 0xfc, 0x04, 0xc4, 0x04, + 0x7f, 0x1f, 0xcf, 0xc1, 0x14, 0x2c, 0xd6, 0xc1 +}; +static const u8 enc_output062[] __initconst = { + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0x20, 0xa3, 0x79, 0x8d, 0xf1, 0x29, 0x2c, 0x59, + 0x72, 0xbf, 0x97, 0x41, 0xae, 0xc3, 0x8a, 0x19 +}; +static const u8 enc_assoc062[] __initconst = { + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f +}; +static const u8 enc_nonce062[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 +}; +static const u8 enc_key062[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - misc */ +static const u8 enc_input063[] __initconst = { + 0xda, 0x92, 0xbf, 0xf7, 0x7f, 0x6b, 0xe8, 0xfc, + 0xaa, 0x2c, 0xfb, 0xfb, 0x9b, 0xbc, 0x01, 0x97, + 0x20, 0x66, 0xb8, 0x7c, 0xfc, 0x04, 0xc4, 0x04, + 0x7f, 0x1f, 0xcf, 0xc1, 0x14, 0x2c, 0xd6, 0xc1, + 0x1c, 0x43, 0x24, 0x24, 0xe1, 0x21, 0x03, 0x81, + 0x74, 0x32, 0x5e, 0xc9, 0x5e, 0xa3, 0x73, 0x54, + 0xf7, 0x96, 0x00, 0xad, 0x13, 0xa1, 0xd9, 0x9a, + 0xac, 0x48, 0x4d, 0x58, 0x01, 0x78, 0x02, 0xc2 +}; +static const u8 enc_output063[] __initconst = { + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xc0, 0x3d, 0x9f, 0x67, 0x35, 0x4a, 0x97, 0xb2, + 0xf0, 0x74, 0xf7, 0x55, 0x15, 0x57, 0xe4, 0x9c +}; +static const u8 enc_assoc063[] __initconst = { + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f +}; +static const u8 enc_nonce063[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 +}; +static const u8 enc_key063[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - misc */ +static const u8 enc_input064[] __initconst = { + 0xda, 0x92, 0xbf, 0xf7, 0x7f, 0x6b, 0xe8, 0xfc, + 0xaa, 0x2c, 0xfb, 0xfb, 0x9b, 0xbc, 0x01, 0x97, + 0x20, 0x66, 0xb8, 0x7c, 0xfc, 0x04, 0xc4, 0x04, + 0x7f, 0x1f, 0xcf, 0xc1, 0x14, 0x2c, 0xd6, 0xc1, + 0x1c, 0x43, 0x24, 0x24, 0xe1, 0x21, 0x03, 0x81, + 0x74, 0x32, 0x5e, 0xc9, 0x5e, 0xa3, 0x73, 0x54, + 0xf7, 0x96, 0x00, 0xad, 0x13, 0xa1, 0xd9, 0x9a, + 0xac, 0x48, 0x4d, 0x58, 0x01, 0x78, 0x02, 0xc2, + 0x85, 0x25, 0xbb, 0x3d, 0xbd, 0x96, 0x40, 0x85, + 0xaa, 0xd8, 0x0d, 0x0f, 0x53, 0x09, 0x7a, 0x7d, + 0x48, 0xb3, 0xa5, 0x9d, 0x19, 0xf3, 0xfa, 0xff, + 0x67, 0xe5, 0xb6, 0x47, 0xba, 0x6c, 0x6d, 0xbb, + 0x64, 0x4d, 0x0d, 0xfb, 0x49, 0xb9, 0x10, 0xb8, + 0x0c, 0x0f, 0x4e, 0x49, 0xe2, 0x3c, 0xb7, 0x92, + 0x88, 0x2c, 0xf4, 0xba, 0x89, 0x6d, 0x12, 0x47, + 0x04, 0x53, 0xfe, 0xf7, 0xc7, 0xfb, 0x77, 0xb8 +}; +static const u8 enc_output064[] __initconst = { + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xc8, 0x6d, 0xa8, 0xdd, 0x65, 0x22, 0x86, 0xd5, + 0x02, 0x13, 0xd3, 0x28, 0xd6, 0x3e, 0x40, 0x06 +}; +static const u8 enc_assoc064[] __initconst = { + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f +}; +static const u8 enc_nonce064[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 +}; +static const u8 enc_key064[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - misc */ +static const u8 enc_input065[] __initconst = { + 0x5a, 0x92, 0xbf, 0x77, 0xff, 0x6b, 0xe8, 0x7c, + 0x2a, 0x2c, 0xfb, 0x7b, 0x1b, 0xbc, 0x01, 0x17, + 0xa0, 0x66, 0xb8, 0xfc, 0x7c, 0x04, 0xc4, 0x84, + 0xff, 0x1f, 0xcf, 0x41, 0x94, 0x2c, 0xd6, 0x41 +}; +static const u8 enc_output065[] __initconst = { + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0xbe, 0xde, 0x90, 0x83, 0xce, 0xb3, 0x6d, 0xdf, + 0xe5, 0xfa, 0x81, 0x1f, 0x95, 0x47, 0x1c, 0x67 +}; +static const u8 enc_assoc065[] __initconst = { + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce065[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 +}; +static const u8 enc_key065[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - misc */ +static const u8 enc_input066[] __initconst = { + 0x5a, 0x92, 0xbf, 0x77, 0xff, 0x6b, 0xe8, 0x7c, + 0x2a, 0x2c, 0xfb, 0x7b, 0x1b, 0xbc, 0x01, 0x17, + 0xa0, 0x66, 0xb8, 0xfc, 0x7c, 0x04, 0xc4, 0x84, + 0xff, 0x1f, 0xcf, 0x41, 0x94, 0x2c, 0xd6, 0x41, + 0x9c, 0x43, 0x24, 0xa4, 0x61, 0x21, 0x03, 0x01, + 0xf4, 0x32, 0x5e, 0x49, 0xde, 0xa3, 0x73, 0xd4, + 0x77, 0x96, 0x00, 0x2d, 0x93, 0xa1, 0xd9, 0x1a, + 0x2c, 0x48, 0x4d, 0xd8, 0x81, 0x78, 0x02, 0x42 +}; +static const u8 enc_output066[] __initconst = { + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x30, 0x08, 0x74, 0xbb, 0x06, 0x92, 0xb6, 0x89, + 0xde, 0xad, 0x9a, 0xe1, 0x5b, 0x06, 0x73, 0x90 +}; +static const u8 enc_assoc066[] __initconst = { + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce066[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 +}; +static const u8 enc_key066[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - misc */ +static const u8 enc_input067[] __initconst = { + 0x5a, 0x92, 0xbf, 0x77, 0xff, 0x6b, 0xe8, 0x7c, + 0x2a, 0x2c, 0xfb, 0x7b, 0x1b, 0xbc, 0x01, 0x17, + 0xa0, 0x66, 0xb8, 0xfc, 0x7c, 0x04, 0xc4, 0x84, + 0xff, 0x1f, 0xcf, 0x41, 0x94, 0x2c, 0xd6, 0x41, + 0x9c, 0x43, 0x24, 0xa4, 0x61, 0x21, 0x03, 0x01, + 0xf4, 0x32, 0x5e, 0x49, 0xde, 0xa3, 0x73, 0xd4, + 0x77, 0x96, 0x00, 0x2d, 0x93, 0xa1, 0xd9, 0x1a, + 0x2c, 0x48, 0x4d, 0xd8, 0x81, 0x78, 0x02, 0x42, + 0x05, 0x25, 0xbb, 0xbd, 0x3d, 0x96, 0x40, 0x05, + 0x2a, 0xd8, 0x0d, 0x8f, 0xd3, 0x09, 0x7a, 0xfd, + 0xc8, 0xb3, 0xa5, 0x1d, 0x99, 0xf3, 0xfa, 0x7f, + 0xe7, 0xe5, 0xb6, 0xc7, 0x3a, 0x6c, 0x6d, 0x3b, + 0xe4, 0x4d, 0x0d, 0x7b, 0xc9, 0xb9, 0x10, 0x38, + 0x8c, 0x0f, 0x4e, 0xc9, 0x62, 0x3c, 0xb7, 0x12, + 0x08, 0x2c, 0xf4, 0x3a, 0x09, 0x6d, 0x12, 0xc7, + 0x84, 0x53, 0xfe, 0x77, 0x47, 0xfb, 0x77, 0x38 +}; +static const u8 enc_output067[] __initconst = { + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x99, 0xca, 0xd8, 0x5f, 0x45, 0xca, 0x40, 0x94, + 0x2d, 0x0d, 0x4d, 0x5e, 0x95, 0x0a, 0xde, 0x22 +}; +static const u8 enc_assoc067[] __initconst = { + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce067[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 +}; +static const u8 enc_key067[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - misc */ +static const u8 enc_input068[] __initconst = { + 0x25, 0x6d, 0x40, 0x88, 0x7f, 0x6b, 0xe8, 0x7c, + 0x55, 0xd3, 0x04, 0x84, 0x9b, 0xbc, 0x01, 0x17, + 0xdf, 0x99, 0x47, 0x03, 0xfc, 0x04, 0xc4, 0x84, + 0x80, 0xe0, 0x30, 0xbe, 0x14, 0x2c, 0xd6, 0x41 +}; +static const u8 enc_output068[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x8b, 0xbe, 0x14, 0x52, 0x72, 0xe7, 0xc2, 0xd9, + 0xa1, 0x89, 0x1a, 0x3a, 0xb0, 0x98, 0x3d, 0x9d +}; +static const u8 enc_assoc068[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce068[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 +}; +static const u8 enc_key068[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - misc */ +static const u8 enc_input069[] __initconst = { + 0x25, 0x6d, 0x40, 0x88, 0x7f, 0x6b, 0xe8, 0x7c, + 0x55, 0xd3, 0x04, 0x84, 0x9b, 0xbc, 0x01, 0x17, + 0xdf, 0x99, 0x47, 0x03, 0xfc, 0x04, 0xc4, 0x84, + 0x80, 0xe0, 0x30, 0xbe, 0x14, 0x2c, 0xd6, 0x41, + 0xe3, 0xbc, 0xdb, 0x5b, 0xe1, 0x21, 0x03, 0x01, + 0x8b, 0xcd, 0xa1, 0xb6, 0x5e, 0xa3, 0x73, 0xd4, + 0x08, 0x69, 0xff, 0xd2, 0x13, 0xa1, 0xd9, 0x1a, + 0x53, 0xb7, 0xb2, 0x27, 0x01, 0x78, 0x02, 0x42 +}; +static const u8 enc_output069[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x3b, 0x41, 0x86, 0x19, 0x13, 0xa8, 0xf6, 0xde, + 0x7f, 0x61, 0xe2, 0x25, 0x63, 0x1b, 0xc3, 0x82 +}; +static const u8 enc_assoc069[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce069[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 +}; +static const u8 enc_key069[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - misc */ +static const u8 enc_input070[] __initconst = { + 0x25, 0x6d, 0x40, 0x88, 0x7f, 0x6b, 0xe8, 0x7c, + 0x55, 0xd3, 0x04, 0x84, 0x9b, 0xbc, 0x01, 0x17, + 0xdf, 0x99, 0x47, 0x03, 0xfc, 0x04, 0xc4, 0x84, + 0x80, 0xe0, 0x30, 0xbe, 0x14, 0x2c, 0xd6, 0x41, + 0xe3, 0xbc, 0xdb, 0x5b, 0xe1, 0x21, 0x03, 0x01, + 0x8b, 0xcd, 0xa1, 0xb6, 0x5e, 0xa3, 0x73, 0xd4, + 0x08, 0x69, 0xff, 0xd2, 0x13, 0xa1, 0xd9, 0x1a, + 0x53, 0xb7, 0xb2, 0x27, 0x01, 0x78, 0x02, 0x42, + 0x7a, 0xda, 0x44, 0x42, 0xbd, 0x96, 0x40, 0x05, + 0x55, 0x27, 0xf2, 0x70, 0x53, 0x09, 0x7a, 0xfd, + 0xb7, 0x4c, 0x5a, 0xe2, 0x19, 0xf3, 0xfa, 0x7f, + 0x98, 0x1a, 0x49, 0x38, 0xba, 0x6c, 0x6d, 0x3b, + 0x9b, 0xb2, 0xf2, 0x84, 0x49, 0xb9, 0x10, 0x38, + 0xf3, 0xf0, 0xb1, 0x36, 0xe2, 0x3c, 0xb7, 0x12, + 0x77, 0xd3, 0x0b, 0xc5, 0x89, 0x6d, 0x12, 0xc7, + 0xfb, 0xac, 0x01, 0x88, 0xc7, 0xfb, 0x77, 0x38 +}; +static const u8 enc_output070[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x84, 0x28, 0xbc, 0xf0, 0x23, 0xec, 0x6b, 0xf3, + 0x1f, 0xd9, 0xef, 0xb2, 0x03, 0xff, 0x08, 0x71 +}; +static const u8 enc_assoc070[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce070[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 +}; +static const u8 enc_key070[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - misc */ +static const u8 enc_input071[] __initconst = { + 0xda, 0x92, 0xbf, 0x77, 0x80, 0x94, 0x17, 0x83, + 0xaa, 0x2c, 0xfb, 0x7b, 0x64, 0x43, 0xfe, 0xe8, + 0x20, 0x66, 0xb8, 0xfc, 0x03, 0xfb, 0x3b, 0x7b, + 0x7f, 0x1f, 0xcf, 0x41, 0xeb, 0xd3, 0x29, 0xbe +}; +static const u8 enc_output071[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x13, 0x9f, 0xdf, 0x64, 0x74, 0xea, 0x24, 0xf5, + 0x49, 0xb0, 0x75, 0x82, 0x5f, 0x2c, 0x76, 0x20 +}; +static const u8 enc_assoc071[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 +}; +static const u8 enc_nonce071[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 +}; +static const u8 enc_key071[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - misc */ +static const u8 enc_input072[] __initconst = { + 0xda, 0x92, 0xbf, 0x77, 0x80, 0x94, 0x17, 0x83, + 0xaa, 0x2c, 0xfb, 0x7b, 0x64, 0x43, 0xfe, 0xe8, + 0x20, 0x66, 0xb8, 0xfc, 0x03, 0xfb, 0x3b, 0x7b, + 0x7f, 0x1f, 0xcf, 0x41, 0xeb, 0xd3, 0x29, 0xbe, + 0x1c, 0x43, 0x24, 0xa4, 0x1e, 0xde, 0xfc, 0xfe, + 0x74, 0x32, 0x5e, 0x49, 0xa1, 0x5c, 0x8c, 0x2b, + 0xf7, 0x96, 0x00, 0x2d, 0xec, 0x5e, 0x26, 0xe5, + 0xac, 0x48, 0x4d, 0xd8, 0xfe, 0x87, 0xfd, 0xbd +}; +static const u8 enc_output072[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xbb, 0xad, 0x8d, 0x86, 0x3b, 0x83, 0x5a, 0x8e, + 0x86, 0x64, 0xfd, 0x1d, 0x45, 0x66, 0xb6, 0xb4 +}; +static const u8 enc_assoc072[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 +}; +static const u8 enc_nonce072[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 +}; +static const u8 enc_key072[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - misc */ +static const u8 enc_input073[] __initconst = { + 0xda, 0x92, 0xbf, 0x77, 0x80, 0x94, 0x17, 0x83, + 0xaa, 0x2c, 0xfb, 0x7b, 0x64, 0x43, 0xfe, 0xe8, + 0x20, 0x66, 0xb8, 0xfc, 0x03, 0xfb, 0x3b, 0x7b, + 0x7f, 0x1f, 0xcf, 0x41, 0xeb, 0xd3, 0x29, 0xbe, + 0x1c, 0x43, 0x24, 0xa4, 0x1e, 0xde, 0xfc, 0xfe, + 0x74, 0x32, 0x5e, 0x49, 0xa1, 0x5c, 0x8c, 0x2b, + 0xf7, 0x96, 0x00, 0x2d, 0xec, 0x5e, 0x26, 0xe5, + 0xac, 0x48, 0x4d, 0xd8, 0xfe, 0x87, 0xfd, 0xbd, + 0x85, 0x25, 0xbb, 0xbd, 0x42, 0x69, 0xbf, 0xfa, + 0xaa, 0xd8, 0x0d, 0x8f, 0xac, 0xf6, 0x85, 0x02, + 0x48, 0xb3, 0xa5, 0x1d, 0xe6, 0x0c, 0x05, 0x80, + 0x67, 0xe5, 0xb6, 0xc7, 0x45, 0x93, 0x92, 0xc4, + 0x64, 0x4d, 0x0d, 0x7b, 0xb6, 0x46, 0xef, 0xc7, + 0x0c, 0x0f, 0x4e, 0xc9, 0x1d, 0xc3, 0x48, 0xed, + 0x88, 0x2c, 0xf4, 0x3a, 0x76, 0x92, 0xed, 0x38, + 0x04, 0x53, 0xfe, 0x77, 0x38, 0x04, 0x88, 0xc7 +}; +static const u8 enc_output073[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x42, 0xf2, 0x35, 0x42, 0x97, 0x84, 0x9a, 0x51, + 0x1d, 0x53, 0xe5, 0x57, 0x17, 0x72, 0xf7, 0x1f +}; +static const u8 enc_assoc073[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 +}; +static const u8 enc_nonce073[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 +}; +static const u8 enc_key073[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - checking for int overflows */ +static const u8 enc_input074[] __initconst = { + 0xd4, 0x50, 0x0b, 0xf0, 0x09, 0x49, 0x35, 0x51, + 0xc3, 0x80, 0xad, 0xf5, 0x2c, 0x57, 0x3a, 0x69, + 0xdf, 0x7e, 0x8b, 0x76, 0x24, 0x63, 0x33, 0x0f, + 0xac, 0xc1, 0x6a, 0x57, 0x26, 0xbe, 0x71, 0x90, + 0xc6, 0x3c, 0x5a, 0x1c, 0x92, 0x65, 0x84, 0xa0, + 0x96, 0x75, 0x68, 0x28, 0xdc, 0xdc, 0x64, 0xac, + 0xdf, 0x96, 0x3d, 0x93, 0x1b, 0xf1, 0xda, 0xe2, + 0x38, 0xf3, 0xf1, 0x57, 0x22, 0x4a, 0xc4, 0xb5, + 0x42, 0xd7, 0x85, 0xb0, 0xdd, 0x84, 0xdb, 0x6b, + 0xe3, 0xbc, 0x5a, 0x36, 0x63, 0xe8, 0x41, 0x49, + 0xff, 0xbe, 0xd0, 0x9e, 0x54, 0xf7, 0x8f, 0x16, + 0xa8, 0x22, 0x3b, 0x24, 0xcb, 0x01, 0x9f, 0x58, + 0xb2, 0x1b, 0x0e, 0x55, 0x1e, 0x7a, 0xa0, 0x73, + 0x27, 0x62, 0x95, 0x51, 0x37, 0x6c, 0xcb, 0xc3, + 0x93, 0x76, 0x71, 0xa0, 0x62, 0x9b, 0xd9, 0x5c, + 0x99, 0x15, 0xc7, 0x85, 0x55, 0x77, 0x1e, 0x7a +}; +static const u8 enc_output074[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x0b, 0x30, 0x0d, 0x8d, 0xa5, 0x6c, 0x21, 0x85, + 0x75, 0x52, 0x79, 0x55, 0x3c, 0x4c, 0x82, 0xca +}; +static const u8 enc_assoc074[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce074[] __initconst = { + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x00, 0x02, 0x50, 0x6e +}; +static const u8 enc_key074[] __initconst = { + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 +}; + +/* wycheproof - checking for int overflows */ +static const u8 enc_input075[] __initconst = { + 0x7d, 0xe8, 0x7f, 0x67, 0x29, 0x94, 0x52, 0x75, + 0xd0, 0x65, 0x5d, 0xa4, 0xc7, 0xfd, 0xe4, 0x56, + 0x9e, 0x16, 0xf1, 0x11, 0xb5, 0xeb, 0x26, 0xc2, + 0x2d, 0x85, 0x9e, 0x3f, 0xf8, 0x22, 0xec, 0xed, + 0x3a, 0x6d, 0xd9, 0xa6, 0x0f, 0x22, 0x95, 0x7f, + 0x7b, 0x7c, 0x85, 0x7e, 0x88, 0x22, 0xeb, 0x9f, + 0xe0, 0xb8, 0xd7, 0x02, 0x21, 0x41, 0xf2, 0xd0, + 0xb4, 0x8f, 0x4b, 0x56, 0x12, 0xd3, 0x22, 0xa8, + 0x8d, 0xd0, 0xfe, 0x0b, 0x4d, 0x91, 0x79, 0x32, + 0x4f, 0x7c, 0x6c, 0x9e, 0x99, 0x0e, 0xfb, 0xd8, + 0x0e, 0x5e, 0xd6, 0x77, 0x58, 0x26, 0x49, 0x8b, + 0x1e, 0xfe, 0x0f, 0x71, 0xa0, 0xf3, 0xec, 0x5b, + 0x29, 0xcb, 0x28, 0xc2, 0x54, 0x0a, 0x7d, 0xcd, + 0x51, 0xb7, 0xda, 0xae, 0xe0, 0xff, 0x4a, 0x7f, + 0x3a, 0xc1, 0xee, 0x54, 0xc2, 0x9e, 0xe4, 0xc1, + 0x70, 0xde, 0x40, 0x8f, 0x66, 0x69, 0x21, 0x94 +}; +static const u8 enc_output075[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xc5, 0x78, 0xe2, 0xaa, 0x44, 0xd3, 0x09, 0xb7, + 0xb6, 0xa5, 0x19, 0x3b, 0xdc, 0x61, 0x18, 0xf5 +}; +static const u8 enc_assoc075[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce075[] __initconst = { + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x00, 0x03, 0x18, 0xa5 +}; +static const u8 enc_key075[] __initconst = { + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 +}; + +/* wycheproof - checking for int overflows */ +static const u8 enc_input076[] __initconst = { + 0x1b, 0x99, 0x6f, 0x9a, 0x3c, 0xcc, 0x67, 0x85, + 0xde, 0x22, 0xff, 0x5b, 0x8a, 0xdd, 0x95, 0x02, + 0xce, 0x03, 0xa0, 0xfa, 0xf5, 0x99, 0x2a, 0x09, + 0x52, 0x2c, 0xdd, 0x12, 0x06, 0xd2, 0x20, 0xb8, + 0xf8, 0xbd, 0x07, 0xd1, 0xf1, 0xf5, 0xa1, 0xbd, + 0x9a, 0x71, 0xd1, 0x1c, 0x7f, 0x57, 0x9b, 0x85, + 0x58, 0x18, 0xc0, 0x8d, 0x4d, 0xe0, 0x36, 0x39, + 0x31, 0x83, 0xb7, 0xf5, 0x90, 0xb3, 0x35, 0xae, + 0xd8, 0xde, 0x5b, 0x57, 0xb1, 0x3c, 0x5f, 0xed, + 0xe2, 0x44, 0x1c, 0x3e, 0x18, 0x4a, 0xa9, 0xd4, + 0x6e, 0x61, 0x59, 0x85, 0x06, 0xb3, 0xe1, 0x1c, + 0x43, 0xc6, 0x2c, 0xbc, 0xac, 0xec, 0xed, 0x33, + 0x19, 0x08, 0x75, 0xb0, 0x12, 0x21, 0x8b, 0x19, + 0x30, 0xfb, 0x7c, 0x38, 0xec, 0x45, 0xac, 0x11, + 0xc3, 0x53, 0xd0, 0xcf, 0x93, 0x8d, 0xcc, 0xb9, + 0xef, 0xad, 0x8f, 0xed, 0xbe, 0x46, 0xda, 0xa5 +}; +static const u8 enc_output076[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x4b, 0x0b, 0xda, 0x8a, 0xd0, 0x43, 0x83, 0x0d, + 0x83, 0x19, 0xab, 0x82, 0xc5, 0x0c, 0x76, 0x63 +}; +static const u8 enc_assoc076[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce076[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0xb4, 0xf0 +}; +static const u8 enc_key076[] __initconst = { + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 +}; + +/* wycheproof - checking for int overflows */ +static const u8 enc_input077[] __initconst = { + 0x86, 0xcb, 0xac, 0xae, 0x4d, 0x3f, 0x74, 0xae, + 0x01, 0x21, 0x3e, 0x05, 0x51, 0xcc, 0x15, 0x16, + 0x0e, 0xa1, 0xbe, 0x84, 0x08, 0xe3, 0xd5, 0xd7, + 0x4f, 0x01, 0x46, 0x49, 0x95, 0xa6, 0x9e, 0x61, + 0x76, 0xcb, 0x9e, 0x02, 0xb2, 0x24, 0x7e, 0xd2, + 0x99, 0x89, 0x2f, 0x91, 0x82, 0xa4, 0x5c, 0xaf, + 0x4c, 0x69, 0x40, 0x56, 0x11, 0x76, 0x6e, 0xdf, + 0xaf, 0xdc, 0x28, 0x55, 0x19, 0xea, 0x30, 0x48, + 0x0c, 0x44, 0xf0, 0x5e, 0x78, 0x1e, 0xac, 0xf8, + 0xfc, 0xec, 0xc7, 0x09, 0x0a, 0xbb, 0x28, 0xfa, + 0x5f, 0xd5, 0x85, 0xac, 0x8c, 0xda, 0x7e, 0x87, + 0x72, 0xe5, 0x94, 0xe4, 0xce, 0x6c, 0x88, 0x32, + 0x81, 0x93, 0x2e, 0x0f, 0x89, 0xf8, 0x77, 0xa1, + 0xf0, 0x4d, 0x9c, 0x32, 0xb0, 0x6c, 0xf9, 0x0b, + 0x0e, 0x76, 0x2b, 0x43, 0x0c, 0x4d, 0x51, 0x7c, + 0x97, 0x10, 0x70, 0x68, 0xf4, 0x98, 0xef, 0x7f +}; +static const u8 enc_output077[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x4b, 0xc9, 0x8f, 0x72, 0xc4, 0x94, 0xc2, 0xa4, + 0x3c, 0x2b, 0x15, 0xa1, 0x04, 0x3f, 0x1c, 0xfa +}; +static const u8 enc_assoc077[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce077[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0xfb, 0x66 +}; +static const u8 enc_key077[] __initconst = { + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 +}; + +/* wycheproof - checking for int overflows */ +static const u8 enc_input078[] __initconst = { + 0xfa, 0xb1, 0xcd, 0xdf, 0x4f, 0xe1, 0x98, 0xef, + 0x63, 0xad, 0xd8, 0x81, 0xd6, 0xea, 0xd6, 0xc5, + 0x76, 0x37, 0xbb, 0xe9, 0x20, 0x18, 0xca, 0x7c, + 0x0b, 0x96, 0xfb, 0xa0, 0x87, 0x1e, 0x93, 0x2d, + 0xb1, 0xfb, 0xf9, 0x07, 0x61, 0xbe, 0x25, 0xdf, + 0x8d, 0xfa, 0xf9, 0x31, 0xce, 0x57, 0x57, 0xe6, + 0x17, 0xb3, 0xd7, 0xa9, 0xf0, 0xbf, 0x0f, 0xfe, + 0x5d, 0x59, 0x1a, 0x33, 0xc1, 0x43, 0xb8, 0xf5, + 0x3f, 0xd0, 0xb5, 0xa1, 0x96, 0x09, 0xfd, 0x62, + 0xe5, 0xc2, 0x51, 0xa4, 0x28, 0x1a, 0x20, 0x0c, + 0xfd, 0xc3, 0x4f, 0x28, 0x17, 0x10, 0x40, 0x6f, + 0x4e, 0x37, 0x62, 0x54, 0x46, 0xff, 0x6e, 0xf2, + 0x24, 0x91, 0x3d, 0xeb, 0x0d, 0x89, 0xaf, 0x33, + 0x71, 0x28, 0xe3, 0xd1, 0x55, 0xd1, 0x6d, 0x3e, + 0xc3, 0x24, 0x60, 0x41, 0x43, 0x21, 0x43, 0xe9, + 0xab, 0x3a, 0x6d, 0x2c, 0xcc, 0x2f, 0x4d, 0x62 +}; +static const u8 enc_output078[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xf7, 0xe9, 0xe1, 0x51, 0xb0, 0x25, 0x33, 0xc7, + 0x46, 0x58, 0xbf, 0xc7, 0x73, 0x7c, 0x68, 0x0d +}; +static const u8 enc_assoc078[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce078[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0xbb, 0x90 +}; +static const u8 enc_key078[] __initconst = { + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 +}; + +/* wycheproof - checking for int overflows */ +static const u8 enc_input079[] __initconst = { + 0x22, 0x72, 0x02, 0xbe, 0x7f, 0x35, 0x15, 0xe9, + 0xd1, 0xc0, 0x2e, 0xea, 0x2f, 0x19, 0x50, 0xb6, + 0x48, 0x1b, 0x04, 0x8a, 0x4c, 0x91, 0x50, 0x6c, + 0xb4, 0x0d, 0x50, 0x4e, 0x6c, 0x94, 0x9f, 0x82, + 0xd1, 0x97, 0xc2, 0x5a, 0xd1, 0x7d, 0xc7, 0x21, + 0x65, 0x11, 0x25, 0x78, 0x2a, 0xc7, 0xa7, 0x12, + 0x47, 0xfe, 0xae, 0xf3, 0x2f, 0x1f, 0x25, 0x0c, + 0xe4, 0xbb, 0x8f, 0x79, 0xac, 0xaa, 0x17, 0x9d, + 0x45, 0xa7, 0xb0, 0x54, 0x5f, 0x09, 0x24, 0x32, + 0x5e, 0xfa, 0x87, 0xd5, 0xe4, 0x41, 0xd2, 0x84, + 0x78, 0xc6, 0x1f, 0x22, 0x23, 0xee, 0x67, 0xc3, + 0xb4, 0x1f, 0x43, 0x94, 0x53, 0x5e, 0x2a, 0x24, + 0x36, 0x9a, 0x2e, 0x16, 0x61, 0x3c, 0x45, 0x94, + 0x90, 0xc1, 0x4f, 0xb1, 0xd7, 0x55, 0xfe, 0x53, + 0xfb, 0xe1, 0xee, 0x45, 0xb1, 0xb2, 0x1f, 0x71, + 0x62, 0xe2, 0xfc, 0xaa, 0x74, 0x2a, 0xbe, 0xfd +}; +static const u8 enc_output079[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x79, 0x5b, 0xcf, 0xf6, 0x47, 0xc5, 0x53, 0xc2, + 0xe4, 0xeb, 0x6e, 0x0e, 0xaf, 0xd9, 0xe0, 0x4e +}; +static const u8 enc_assoc079[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce079[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x48, 0x4a +}; +static const u8 enc_key079[] __initconst = { + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 +}; + +/* wycheproof - checking for int overflows */ +static const u8 enc_input080[] __initconst = { + 0xfa, 0xe5, 0x83, 0x45, 0xc1, 0x6c, 0xb0, 0xf5, + 0xcc, 0x53, 0x7f, 0x2b, 0x1b, 0x34, 0x69, 0xc9, + 0x69, 0x46, 0x3b, 0x3e, 0xa7, 0x1b, 0xcf, 0x6b, + 0x98, 0xd6, 0x69, 0xa8, 0xe6, 0x0e, 0x04, 0xfc, + 0x08, 0xd5, 0xfd, 0x06, 0x9c, 0x36, 0x26, 0x38, + 0xe3, 0x40, 0x0e, 0xf4, 0xcb, 0x24, 0x2e, 0x27, + 0xe2, 0x24, 0x5e, 0x68, 0xcb, 0x9e, 0xc5, 0x83, + 0xda, 0x53, 0x40, 0xb1, 0x2e, 0xdf, 0x42, 0x3b, + 0x73, 0x26, 0xad, 0x20, 0xfe, 0xeb, 0x57, 0xda, + 0xca, 0x2e, 0x04, 0x67, 0xa3, 0x28, 0x99, 0xb4, + 0x2d, 0xf8, 0xe5, 0x6d, 0x84, 0xe0, 0x06, 0xbc, + 0x8a, 0x7a, 0xcc, 0x73, 0x1e, 0x7c, 0x1f, 0x6b, + 0xec, 0xb5, 0x71, 0x9f, 0x70, 0x77, 0xf0, 0xd4, + 0xf4, 0xc6, 0x1a, 0xb1, 0x1e, 0xba, 0xc1, 0x00, + 0x18, 0x01, 0xce, 0x33, 0xc4, 0xe4, 0xa7, 0x7d, + 0x83, 0x1d, 0x3c, 0xe3, 0x4e, 0x84, 0x10, 0xe1 +}; +static const u8 enc_output080[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x19, 0x46, 0xd6, 0x53, 0x96, 0x0f, 0x94, 0x7a, + 0x74, 0xd3, 0xe8, 0x09, 0x3c, 0xf4, 0x85, 0x02 +}; +static const u8 enc_assoc080[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce080[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x93, 0x2f, 0x40 +}; +static const u8 enc_key080[] __initconst = { + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 +}; + +/* wycheproof - checking for int overflows */ +static const u8 enc_input081[] __initconst = { + 0xeb, 0xb2, 0x16, 0xdd, 0xd7, 0xca, 0x70, 0x92, + 0x15, 0xf5, 0x03, 0xdf, 0x9c, 0xe6, 0x3c, 0x5c, + 0xd2, 0x19, 0x4e, 0x7d, 0x90, 0x99, 0xe8, 0xa9, + 0x0b, 0x2a, 0xfa, 0xad, 0x5e, 0xba, 0x35, 0x06, + 0x99, 0x25, 0xa6, 0x03, 0xfd, 0xbc, 0x34, 0x1a, + 0xae, 0xd4, 0x15, 0x05, 0xb1, 0x09, 0x41, 0xfa, + 0x38, 0x56, 0xa7, 0xe2, 0x47, 0xb1, 0x04, 0x07, + 0x09, 0x74, 0x6c, 0xfc, 0x20, 0x96, 0xca, 0xa6, + 0x31, 0xb2, 0xff, 0xf4, 0x1c, 0x25, 0x05, 0x06, + 0xd8, 0x89, 0xc1, 0xc9, 0x06, 0x71, 0xad, 0xe8, + 0x53, 0xee, 0x63, 0x94, 0xc1, 0x91, 0x92, 0xa5, + 0xcf, 0x37, 0x10, 0xd1, 0x07, 0x30, 0x99, 0xe5, + 0xbc, 0x94, 0x65, 0x82, 0xfc, 0x0f, 0xab, 0x9f, + 0x54, 0x3c, 0x71, 0x6a, 0xe2, 0x48, 0x6a, 0x86, + 0x83, 0xfd, 0xca, 0x39, 0xd2, 0xe1, 0x4f, 0x23, + 0xd0, 0x0a, 0x58, 0x26, 0x64, 0xf4, 0xec, 0xb1 +}; +static const u8 enc_output081[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x36, 0xc3, 0x00, 0x29, 0x85, 0xdd, 0x21, 0xba, + 0xf8, 0x95, 0xd6, 0x33, 0x57, 0x3f, 0x12, 0xc0 +}; +static const u8 enc_assoc081[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce081[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0xe2, 0x93, 0x35 +}; +static const u8 enc_key081[] __initconst = { + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 +}; + +/* wycheproof - checking for int overflows */ +static const u8 enc_input082[] __initconst = { + 0x40, 0x8a, 0xe6, 0xef, 0x1c, 0x7e, 0xf0, 0xfb, + 0x2c, 0x2d, 0x61, 0x08, 0x16, 0xfc, 0x78, 0x49, + 0xef, 0xa5, 0x8f, 0x78, 0x27, 0x3f, 0x5f, 0x16, + 0x6e, 0xa6, 0x5f, 0x81, 0xb5, 0x75, 0x74, 0x7d, + 0x03, 0x5b, 0x30, 0x40, 0xfe, 0xde, 0x1e, 0xb9, + 0x45, 0x97, 0x88, 0x66, 0x97, 0x88, 0x40, 0x8e, + 0x00, 0x41, 0x3b, 0x3e, 0x37, 0x6d, 0x15, 0x2d, + 0x20, 0x4a, 0xa2, 0xb7, 0xa8, 0x35, 0x58, 0xfc, + 0xd4, 0x8a, 0x0e, 0xf7, 0xa2, 0x6b, 0x1c, 0xd6, + 0xd3, 0x5d, 0x23, 0xb3, 0xf5, 0xdf, 0xe0, 0xca, + 0x77, 0xa4, 0xce, 0x32, 0xb9, 0x4a, 0xbf, 0x83, + 0xda, 0x2a, 0xef, 0xca, 0xf0, 0x68, 0x38, 0x08, + 0x79, 0xe8, 0x9f, 0xb0, 0xa3, 0x82, 0x95, 0x95, + 0xcf, 0x44, 0xc3, 0x85, 0x2a, 0xe2, 0xcc, 0x66, + 0x2b, 0x68, 0x9f, 0x93, 0x55, 0xd9, 0xc1, 0x83, + 0x80, 0x1f, 0x6a, 0xcc, 0x31, 0x3f, 0x89, 0x07 +}; +static const u8 enc_output082[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x65, 0x14, 0x51, 0x8e, 0x0a, 0x26, 0x41, 0x42, + 0xe0, 0xb7, 0x35, 0x1f, 0x96, 0x7f, 0xc2, 0xae +}; +static const u8 enc_assoc082[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce082[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xf7, 0xd5 +}; +static const u8 enc_key082[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - checking for int overflows */ +static const u8 enc_input083[] __initconst = { + 0x0a, 0x0a, 0x24, 0x49, 0x9b, 0xca, 0xde, 0x58, + 0xcf, 0x15, 0x76, 0xc3, 0x12, 0xac, 0xa9, 0x84, + 0x71, 0x8c, 0xb4, 0xcc, 0x7e, 0x01, 0x53, 0xf5, + 0xa9, 0x01, 0x58, 0x10, 0x85, 0x96, 0x44, 0xdf, + 0xc0, 0x21, 0x17, 0x4e, 0x0b, 0x06, 0x0a, 0x39, + 0x74, 0x48, 0xde, 0x8b, 0x48, 0x4a, 0x86, 0x03, + 0xbe, 0x68, 0x0a, 0x69, 0x34, 0xc0, 0x90, 0x6f, + 0x30, 0xdd, 0x17, 0xea, 0xe2, 0xd4, 0xc5, 0xfa, + 0xa7, 0x77, 0xf8, 0xca, 0x53, 0x37, 0x0e, 0x08, + 0x33, 0x1b, 0x88, 0xc3, 0x42, 0xba, 0xc9, 0x59, + 0x78, 0x7b, 0xbb, 0x33, 0x93, 0x0e, 0x3b, 0x56, + 0xbe, 0x86, 0xda, 0x7f, 0x2a, 0x6e, 0xb1, 0xf9, + 0x40, 0x89, 0xd1, 0xd1, 0x81, 0x07, 0x4d, 0x43, + 0x02, 0xf8, 0xe0, 0x55, 0x2d, 0x0d, 0xe1, 0xfa, + 0xb3, 0x06, 0xa2, 0x1b, 0x42, 0xd4, 0xc3, 0xba, + 0x6e, 0x6f, 0x0c, 0xbc, 0xc8, 0x1e, 0x87, 0x7a +}; +static const u8 enc_output083[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x4c, 0x19, 0x4d, 0xa6, 0xa9, 0x9f, 0xd6, 0x5b, + 0x40, 0xe9, 0xca, 0xd7, 0x98, 0xf4, 0x4b, 0x19 +}; +static const u8 enc_assoc083[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce083[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x3d, 0xfc, 0xe4 +}; +static const u8 enc_key083[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - checking for int overflows */ +static const u8 enc_input084[] __initconst = { + 0x4a, 0x0a, 0xaf, 0xf8, 0x49, 0x47, 0x29, 0x18, + 0x86, 0x91, 0x70, 0x13, 0x40, 0xf3, 0xce, 0x2b, + 0x8a, 0x78, 0xee, 0xd3, 0xa0, 0xf0, 0x65, 0x99, + 0x4b, 0x72, 0x48, 0x4e, 0x79, 0x91, 0xd2, 0x5c, + 0x29, 0xaa, 0x07, 0x5e, 0xb1, 0xfc, 0x16, 0xde, + 0x93, 0xfe, 0x06, 0x90, 0x58, 0x11, 0x2a, 0xb2, + 0x84, 0xa3, 0xed, 0x18, 0x78, 0x03, 0x26, 0xd1, + 0x25, 0x8a, 0x47, 0x22, 0x2f, 0xa6, 0x33, 0xd8, + 0xb2, 0x9f, 0x3b, 0xd9, 0x15, 0x0b, 0x23, 0x9b, + 0x15, 0x46, 0xc2, 0xbb, 0x9b, 0x9f, 0x41, 0x0f, + 0xeb, 0xea, 0xd3, 0x96, 0x00, 0x0e, 0xe4, 0x77, + 0x70, 0x15, 0x32, 0xc3, 0xd0, 0xf5, 0xfb, 0xf8, + 0x95, 0xd2, 0x80, 0x19, 0x6d, 0x2f, 0x73, 0x7c, + 0x5e, 0x9f, 0xec, 0x50, 0xd9, 0x2b, 0xb0, 0xdf, + 0x5d, 0x7e, 0x51, 0x3b, 0xe5, 0xb8, 0xea, 0x97, + 0x13, 0x10, 0xd5, 0xbf, 0x16, 0xba, 0x7a, 0xee +}; +static const u8 enc_output084[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xc8, 0xae, 0x77, 0x88, 0xcd, 0x28, 0x74, 0xab, + 0xc1, 0x38, 0x54, 0x1e, 0x11, 0xfd, 0x05, 0x87 +}; +static const u8 enc_assoc084[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce084[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0x84, 0x86, 0xa8 +}; +static const u8 enc_key084[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - checking for int overflows */ +static const u8 enc_input085[] __initconst = { + 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, + 0x78, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, + 0x9f, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, + 0x9c, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, + 0x47, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, + 0xd4, 0xd2, 0x06, 0x61, 0x6f, 0x92, 0x93, 0xf6, + 0x5b, 0x45, 0xdb, 0xbc, 0x74, 0xe7, 0xc2, 0xed, + 0xfb, 0xcb, 0xbf, 0x1c, 0xfb, 0x67, 0x9b, 0xb7, + 0x39, 0xa5, 0x86, 0x2d, 0xe2, 0xbc, 0xb9, 0x37, + 0xf7, 0x4d, 0x5b, 0xf8, 0x67, 0x1c, 0x5a, 0x8a, + 0x50, 0x92, 0xf6, 0x1d, 0x54, 0xc9, 0xaa, 0x5b +}; +static const u8 enc_output085[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x93, 0x3a, 0x51, 0x63, 0xc7, 0xf6, 0x23, 0x68, + 0x32, 0x7b, 0x3f, 0xbc, 0x10, 0x36, 0xc9, 0x43 +}; +static const u8 enc_assoc085[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce085[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key085[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - special case tag */ +static const u8 enc_input086[] __initconst = { + 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6, + 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd, + 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b, + 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2, + 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19, + 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4, + 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63, + 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d +}; +static const u8 enc_output086[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f +}; +static const u8 enc_assoc086[] __initconst = { + 0x85, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xa6, 0x90, 0x2f, 0xcb, 0xc8, 0x83, 0xbb, 0xc1, + 0x80, 0xb2, 0x56, 0xae, 0x34, 0xad, 0x7f, 0x00 +}; +static const u8 enc_nonce086[] __initconst = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b +}; +static const u8 enc_key086[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - special case tag */ +static const u8 enc_input087[] __initconst = { + 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6, + 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd, + 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b, + 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2, + 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19, + 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4, + 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63, + 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d +}; +static const u8 enc_output087[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; +static const u8 enc_assoc087[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x24, 0x7e, 0x50, 0x64, 0x2a, 0x1c, 0x0a, 0x2f, + 0x8f, 0x77, 0x21, 0x96, 0x09, 0xdb, 0xa9, 0x58 +}; +static const u8 enc_nonce087[] __initconst = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b +}; +static const u8 enc_key087[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - special case tag */ +static const u8 enc_input088[] __initconst = { + 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6, + 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd, + 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b, + 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2, + 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19, + 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4, + 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63, + 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d +}; +static const u8 enc_output088[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_assoc088[] __initconst = { + 0x7c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xd9, 0xe7, 0x2c, 0x06, 0x4a, 0xc8, 0x96, 0x1f, + 0x3f, 0xa5, 0x85, 0xe0, 0xe2, 0xab, 0xd6, 0x00 +}; +static const u8 enc_nonce088[] __initconst = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b +}; +static const u8 enc_key088[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - special case tag */ +static const u8 enc_input089[] __initconst = { + 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6, + 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd, + 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b, + 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2, + 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19, + 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4, + 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63, + 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d +}; +static const u8 enc_output089[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 +}; +static const u8 enc_assoc089[] __initconst = { + 0x65, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x95, 0xaf, 0x0f, 0x4d, 0x0b, 0x68, 0x6e, 0xae, + 0xcc, 0xca, 0x43, 0x07, 0xd5, 0x96, 0xf5, 0x02 +}; +static const u8 enc_nonce089[] __initconst = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b +}; +static const u8 enc_key089[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - special case tag */ +static const u8 enc_input090[] __initconst = { + 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6, + 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd, + 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b, + 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2, + 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19, + 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4, + 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63, + 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d +}; +static const u8 enc_output090[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f +}; +static const u8 enc_assoc090[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x85, 0x40, 0xb4, 0x64, 0x35, 0x77, 0x07, 0xbe, + 0x3a, 0x39, 0xd5, 0x5c, 0x34, 0xf8, 0xbc, 0xb3 +}; +static const u8 enc_nonce090[] __initconst = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b +}; +static const u8 enc_key090[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - special case tag */ +static const u8 enc_input091[] __initconst = { + 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6, + 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd, + 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b, + 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2, + 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19, + 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4, + 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63, + 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d +}; +static const u8 enc_output091[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00 +}; +static const u8 enc_assoc091[] __initconst = { + 0x4f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x66, 0x23, 0xd9, 0x90, 0xb8, 0x98, 0xd8, 0x30, + 0xd2, 0x12, 0xaf, 0x23, 0x83, 0x33, 0x07, 0x01 +}; +static const u8 enc_nonce091[] __initconst = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b +}; +static const u8 enc_key091[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - special case tag */ +static const u8 enc_input092[] __initconst = { + 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6, + 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd, + 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b, + 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2, + 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19, + 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4, + 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63, + 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d +}; +static const u8 enc_output092[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; +static const u8 enc_assoc092[] __initconst = { + 0x83, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x5f, 0x16, 0xd0, 0x9f, 0x17, 0x78, 0x72, 0x11, + 0xb7, 0xd4, 0x84, 0xe0, 0x24, 0xf8, 0x97, 0x01 +}; +static const u8 enc_nonce092[] __initconst = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b +}; +static const u8 enc_key092[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input093[] __initconst = { + 0x00, 0x52, 0x35, 0xd2, 0xa9, 0x19, 0xf2, 0x8d, + 0x3d, 0xb7, 0x66, 0x4a, 0x34, 0xae, 0x6b, 0x44, + 0x4d, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, + 0x5b, 0x8b, 0x94, 0x50, 0x9e, 0x2b, 0x74, 0xa3, + 0x6d, 0x34, 0x6e, 0x33, 0xd5, 0x72, 0x65, 0x9b, + 0xa9, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, + 0x83, 0xdc, 0xe9, 0xf3, 0x07, 0x3e, 0xfa, 0xdb, + 0x7d, 0x23, 0xb8, 0x7a, 0xce, 0x35, 0x16, 0x8c +}; +static const u8 enc_output093[] __initconst = { + 0x00, 0x39, 0xe2, 0xfd, 0x2f, 0xd3, 0x12, 0x14, + 0x9e, 0x98, 0x98, 0x80, 0x88, 0x48, 0x13, 0xe7, + 0xca, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x3b, 0x0e, 0x86, 0x9a, 0xaa, 0x8e, 0xa4, 0x96, + 0x32, 0xff, 0xff, 0x37, 0xb9, 0xe8, 0xce, 0x00, + 0xca, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x3b, 0x0e, 0x86, 0x9a, 0xaa, 0x8e, 0xa4, 0x96, + 0x32, 0xff, 0xff, 0x37, 0xb9, 0xe8, 0xce, 0x00, + 0xa5, 0x19, 0xac, 0x1a, 0x35, 0xb4, 0xa5, 0x77, + 0x87, 0x51, 0x0a, 0xf7, 0x8d, 0x8d, 0x20, 0x0a +}; +static const u8 enc_assoc093[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce093[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key093[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input094[] __initconst = { + 0xd3, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, + 0xe5, 0xda, 0x78, 0x76, 0x6f, 0xa1, 0x92, 0x90, + 0xc0, 0x31, 0xf7, 0x52, 0x08, 0x50, 0x67, 0x45, + 0xae, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, + 0x49, 0x6d, 0xde, 0xb0, 0x55, 0x09, 0xc6, 0xef, + 0xff, 0xab, 0x75, 0xeb, 0x2d, 0xf4, 0xab, 0x09, + 0x76, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, + 0x01, 0x49, 0xef, 0x50, 0x4b, 0x71, 0xb1, 0x20, + 0xca, 0x4f, 0xf3, 0x95, 0x19, 0xc2, 0xc2, 0x10 +}; +static const u8 enc_output094[] __initconst = { + 0xd3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x62, 0x18, 0xb2, 0x7f, 0x83, 0xb8, 0xb4, 0x66, + 0x02, 0xf6, 0xe1, 0xd8, 0x34, 0x20, 0x7b, 0x02, + 0xce, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x2a, 0x64, 0x16, 0xce, 0xdb, 0x1c, 0xdd, 0x29, + 0x6e, 0xf5, 0xd7, 0xd6, 0x92, 0xda, 0xff, 0x02, + 0xce, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x2a, 0x64, 0x16, 0xce, 0xdb, 0x1c, 0xdd, 0x29, + 0x6e, 0xf5, 0xd7, 0xd6, 0x92, 0xda, 0xff, 0x02, + 0x30, 0x2f, 0xe8, 0x2a, 0xb0, 0xa0, 0x9a, 0xf6, + 0x44, 0x00, 0xd0, 0x15, 0xae, 0x83, 0xd9, 0xcc +}; +static const u8 enc_assoc094[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce094[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key094[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input095[] __initconst = { + 0xe9, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, + 0x6d, 0xf1, 0x39, 0x4e, 0xdc, 0x53, 0x9b, 0x5b, + 0x3a, 0x09, 0x57, 0xbe, 0x0f, 0xb8, 0x59, 0x46, + 0x80, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, + 0xd1, 0x76, 0x9f, 0xe8, 0x06, 0xbb, 0xfe, 0xb6, + 0xf5, 0x90, 0x95, 0x0f, 0x2e, 0xac, 0x9e, 0x0a, + 0x58, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, + 0x99, 0x52, 0xae, 0x08, 0x18, 0xc3, 0x89, 0x79, + 0xc0, 0x74, 0x13, 0x71, 0x1a, 0x9a, 0xf7, 0x13 +}; +static const u8 enc_output095[] __initconst = { + 0xe9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xea, 0x33, 0xf3, 0x47, 0x30, 0x4a, 0xbd, 0xad, + 0xf8, 0xce, 0x41, 0x34, 0x33, 0xc8, 0x45, 0x01, + 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xb2, 0x7f, 0x57, 0x96, 0x88, 0xae, 0xe5, 0x70, + 0x64, 0xce, 0x37, 0x32, 0x91, 0x82, 0xca, 0x01, + 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xb2, 0x7f, 0x57, 0x96, 0x88, 0xae, 0xe5, 0x70, + 0x64, 0xce, 0x37, 0x32, 0x91, 0x82, 0xca, 0x01, + 0x98, 0xa7, 0xe8, 0x36, 0xe0, 0xee, 0x4d, 0x02, + 0x35, 0x00, 0xd0, 0x55, 0x7e, 0xc2, 0xcb, 0xe0 +}; +static const u8 enc_assoc095[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce095[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key095[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input096[] __initconst = { + 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, + 0x64, 0xf9, 0x0f, 0x5b, 0x26, 0x92, 0xb8, 0x60, + 0xd4, 0x59, 0x6f, 0xf4, 0xb3, 0x40, 0x2c, 0x5c, + 0x00, 0xb9, 0xbb, 0x53, 0x70, 0x7a, 0xa6, 0x67, + 0xd3, 0x56, 0xfe, 0x50, 0xc7, 0x19, 0x96, 0x94, + 0x03, 0x35, 0x61, 0xe7, 0xca, 0xca, 0x6d, 0x94, + 0x1d, 0xc3, 0xcd, 0x69, 0x14, 0xad, 0x69, 0x04 +}; +static const u8 enc_output096[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xe3, 0x3b, 0xc5, 0x52, 0xca, 0x8b, 0x9e, 0x96, + 0x16, 0x9e, 0x79, 0x7e, 0x8f, 0x30, 0x30, 0x1b, + 0x60, 0x3c, 0xa9, 0x99, 0x44, 0xdf, 0x76, 0x52, + 0x8c, 0x9d, 0x6f, 0x54, 0xab, 0x83, 0x3d, 0x0f, + 0x60, 0x3c, 0xa9, 0x99, 0x44, 0xdf, 0x76, 0x52, + 0x8c, 0x9d, 0x6f, 0x54, 0xab, 0x83, 0x3d, 0x0f, + 0x6a, 0xb8, 0xdc, 0xe2, 0xc5, 0x9d, 0xa4, 0x73, + 0x71, 0x30, 0xb0, 0x25, 0x2f, 0x68, 0xa8, 0xd8 +}; +static const u8 enc_assoc096[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce096[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key096[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input097[] __initconst = { + 0x68, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, + 0xb0, 0x8f, 0x25, 0x67, 0x5b, 0x9b, 0xcb, 0xf6, + 0xe3, 0x84, 0x07, 0xde, 0x2e, 0xc7, 0x5a, 0x47, + 0x9f, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, + 0x2d, 0x2a, 0xf7, 0xcd, 0x6b, 0x08, 0x05, 0x01, + 0xd3, 0x1b, 0xa5, 0x4f, 0xb2, 0xeb, 0x75, 0x96, + 0x47, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, + 0x65, 0x0e, 0xc6, 0x2d, 0x75, 0x70, 0x72, 0xce, + 0xe6, 0xff, 0x23, 0x31, 0x86, 0xdd, 0x1c, 0x8f +}; +static const u8 enc_output097[] __initconst = { + 0x68, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x37, 0x4d, 0xef, 0x6e, 0xb7, 0x82, 0xed, 0x00, + 0x21, 0x43, 0x11, 0x54, 0x12, 0xb7, 0x46, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x4e, 0x23, 0x3f, 0xb3, 0xe5, 0x1d, 0x1e, 0xc7, + 0x42, 0x45, 0x07, 0x72, 0x0d, 0xc5, 0x21, 0x9d, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x4e, 0x23, 0x3f, 0xb3, 0xe5, 0x1d, 0x1e, 0xc7, + 0x42, 0x45, 0x07, 0x72, 0x0d, 0xc5, 0x21, 0x9d, + 0x04, 0x4d, 0xea, 0x60, 0x88, 0x80, 0x41, 0x2b, + 0xfd, 0xff, 0xcf, 0x35, 0x57, 0x9e, 0x9b, 0x26 +}; +static const u8 enc_assoc097[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce097[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key097[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input098[] __initconst = { + 0x6d, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, + 0xa1, 0x61, 0xb5, 0xab, 0x04, 0x09, 0x00, 0x62, + 0x9e, 0xfe, 0xff, 0x78, 0xd7, 0xd8, 0x6b, 0x45, + 0x9f, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, + 0xc6, 0xf8, 0x07, 0x8c, 0xc8, 0xef, 0x12, 0xa0, + 0xff, 0x65, 0x7d, 0x6d, 0x08, 0xdb, 0x10, 0xb8, + 0x47, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, + 0x8e, 0xdc, 0x36, 0x6c, 0xd6, 0x97, 0x65, 0x6f, + 0xca, 0x81, 0xfb, 0x13, 0x3c, 0xed, 0x79, 0xa1 +}; +static const u8 enc_output098[] __initconst = { + 0x6d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x26, 0xa3, 0x7f, 0xa2, 0xe8, 0x10, 0x26, 0x94, + 0x5c, 0x39, 0xe9, 0xf2, 0xeb, 0xa8, 0x77, 0x02, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xa5, 0xf1, 0xcf, 0xf2, 0x46, 0xfa, 0x09, 0x66, + 0x6e, 0x3b, 0xdf, 0x50, 0xb7, 0xf5, 0x44, 0xb3, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xa5, 0xf1, 0xcf, 0xf2, 0x46, 0xfa, 0x09, 0x66, + 0x6e, 0x3b, 0xdf, 0x50, 0xb7, 0xf5, 0x44, 0xb3, + 0x1e, 0x6b, 0xea, 0x63, 0x14, 0x54, 0x2e, 0x2e, + 0xf9, 0xff, 0xcf, 0x45, 0x0b, 0x2e, 0x98, 0x2b +}; +static const u8 enc_assoc098[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce098[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key098[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input099[] __initconst = { + 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, + 0xfc, 0x01, 0xb8, 0x91, 0xe5, 0xf0, 0xf9, 0x12, + 0x8d, 0x7d, 0x1c, 0x57, 0x91, 0x92, 0xb6, 0x98, + 0x63, 0x41, 0x44, 0x15, 0xb6, 0x99, 0x68, 0x95, + 0x9a, 0x72, 0x91, 0xb7, 0xa5, 0xaf, 0x13, 0x48, + 0x60, 0xcd, 0x9e, 0xa1, 0x0c, 0x29, 0xa3, 0x66, + 0x54, 0xe7, 0xa2, 0x8e, 0x76, 0x1b, 0xec, 0xd8 +}; +static const u8 enc_output099[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x7b, 0xc3, 0x72, 0x98, 0x09, 0xe9, 0xdf, 0xe4, + 0x4f, 0xba, 0x0a, 0xdd, 0xad, 0xe2, 0xaa, 0xdf, + 0x03, 0xc4, 0x56, 0xdf, 0x82, 0x3c, 0xb8, 0xa0, + 0xc5, 0xb9, 0x00, 0xb3, 0xc9, 0x35, 0xb8, 0xd3, + 0x03, 0xc4, 0x56, 0xdf, 0x82, 0x3c, 0xb8, 0xa0, + 0xc5, 0xb9, 0x00, 0xb3, 0xc9, 0x35, 0xb8, 0xd3, + 0xed, 0x20, 0x17, 0xc8, 0xdb, 0xa4, 0x77, 0x56, + 0x29, 0x04, 0x9d, 0x78, 0x6e, 0x3b, 0xce, 0xb1 +}; +static const u8 enc_assoc099[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce099[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key099[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input100[] __initconst = { + 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, + 0x6b, 0x6d, 0xc9, 0xd2, 0x1a, 0x81, 0x9e, 0x70, + 0xb5, 0x77, 0xf4, 0x41, 0x37, 0xd3, 0xd6, 0xbd, + 0x13, 0x35, 0xf5, 0xeb, 0x44, 0x49, 0x40, 0x77, + 0xb2, 0x64, 0x49, 0xa5, 0x4b, 0x6c, 0x7c, 0x75, + 0x10, 0xb9, 0x2f, 0x5f, 0xfe, 0xf9, 0x8b, 0x84, + 0x7c, 0xf1, 0x7a, 0x9c, 0x98, 0xd8, 0x83, 0xe5 +}; +static const u8 enc_output100[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xec, 0xaf, 0x03, 0xdb, 0xf6, 0x98, 0xb8, 0x86, + 0x77, 0xb0, 0xe2, 0xcb, 0x0b, 0xa3, 0xca, 0xfa, + 0x73, 0xb0, 0xe7, 0x21, 0x70, 0xec, 0x90, 0x42, + 0xed, 0xaf, 0xd8, 0xa1, 0x27, 0xf6, 0xd7, 0xee, + 0x73, 0xb0, 0xe7, 0x21, 0x70, 0xec, 0x90, 0x42, + 0xed, 0xaf, 0xd8, 0xa1, 0x27, 0xf6, 0xd7, 0xee, + 0x07, 0x3f, 0x17, 0xcb, 0x67, 0x78, 0x64, 0x59, + 0x25, 0x04, 0x9d, 0x88, 0x22, 0xcb, 0xca, 0xb6 +}; +static const u8 enc_assoc100[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce100[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key100[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input101[] __initconst = { + 0xff, 0xcb, 0x2b, 0x11, 0x06, 0xf8, 0x23, 0x4c, + 0x5e, 0x99, 0xd4, 0xdb, 0x4c, 0x70, 0x48, 0xde, + 0x32, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, + 0x16, 0xe9, 0x88, 0x4a, 0x11, 0x4f, 0x0e, 0x92, + 0x66, 0xce, 0xa3, 0x88, 0x5f, 0xe3, 0x6b, 0x9f, + 0xd6, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, + 0xce, 0xbe, 0xf5, 0xe9, 0x88, 0x5a, 0x80, 0xea, + 0x76, 0xd9, 0x75, 0xc1, 0x44, 0xa4, 0x18, 0x88 +}; +static const u8 enc_output101[] __initconst = { + 0xff, 0xa0, 0xfc, 0x3e, 0x80, 0x32, 0xc3, 0xd5, + 0xfd, 0xb6, 0x2a, 0x11, 0xf0, 0x96, 0x30, 0x7d, + 0xb5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x76, 0x6c, 0x9a, 0x80, 0x25, 0xea, 0xde, 0xa7, + 0x39, 0x05, 0x32, 0x8c, 0x33, 0x79, 0xc0, 0x04, + 0xb5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x76, 0x6c, 0x9a, 0x80, 0x25, 0xea, 0xde, 0xa7, + 0x39, 0x05, 0x32, 0x8c, 0x33, 0x79, 0xc0, 0x04, + 0x8b, 0x9b, 0xb4, 0xb4, 0x86, 0x12, 0x89, 0x65, + 0x8c, 0x69, 0x6a, 0x83, 0x40, 0x15, 0x04, 0x05 +}; +static const u8 enc_assoc101[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce101[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key101[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input102[] __initconst = { + 0x6f, 0x9e, 0x70, 0xed, 0x3b, 0x8b, 0xac, 0xa0, + 0x26, 0xe4, 0x6a, 0x5a, 0x09, 0x43, 0x15, 0x8d, + 0x21, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, + 0x0c, 0x61, 0x2c, 0x5e, 0x8d, 0x89, 0xa8, 0x73, + 0xdb, 0xca, 0xad, 0x5b, 0x73, 0x46, 0x42, 0x9b, + 0xc5, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, + 0xd4, 0x36, 0x51, 0xfd, 0x14, 0x9c, 0x26, 0x0b, + 0xcb, 0xdd, 0x7b, 0x12, 0x68, 0x01, 0x31, 0x8c +}; +static const u8 enc_output102[] __initconst = { + 0x6f, 0xf5, 0xa7, 0xc2, 0xbd, 0x41, 0x4c, 0x39, + 0x85, 0xcb, 0x94, 0x90, 0xb5, 0xa5, 0x6d, 0x2e, + 0xa6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x6c, 0xe4, 0x3e, 0x94, 0xb9, 0x2c, 0x78, 0x46, + 0x84, 0x01, 0x3c, 0x5f, 0x1f, 0xdc, 0xe9, 0x00, + 0xa6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x6c, 0xe4, 0x3e, 0x94, 0xb9, 0x2c, 0x78, 0x46, + 0x84, 0x01, 0x3c, 0x5f, 0x1f, 0xdc, 0xe9, 0x00, + 0x8b, 0x3b, 0xbd, 0x51, 0x64, 0x44, 0x59, 0x56, + 0x8d, 0x81, 0xca, 0x1f, 0xa7, 0x2c, 0xe4, 0x04 +}; +static const u8 enc_assoc102[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce102[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key102[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input103[] __initconst = { + 0x41, 0x2b, 0x08, 0x0a, 0x3e, 0x19, 0xc1, 0x0d, + 0x44, 0xa1, 0xaf, 0x1e, 0xab, 0xde, 0xb4, 0xce, + 0x35, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, + 0x6b, 0x83, 0x94, 0x33, 0x09, 0x21, 0x48, 0x6c, + 0xa1, 0x1d, 0x29, 0x1c, 0x3e, 0x97, 0xee, 0x9a, + 0xd1, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, + 0xb3, 0xd4, 0xe9, 0x90, 0x90, 0x34, 0xc6, 0x14, + 0xb1, 0x0a, 0xff, 0x55, 0x25, 0xd0, 0x9d, 0x8d +}; +static const u8 enc_output103[] __initconst = { + 0x41, 0x40, 0xdf, 0x25, 0xb8, 0xd3, 0x21, 0x94, + 0xe7, 0x8e, 0x51, 0xd4, 0x17, 0x38, 0xcc, 0x6d, + 0xb2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x0b, 0x06, 0x86, 0xf9, 0x3d, 0x84, 0x98, 0x59, + 0xfe, 0xd6, 0xb8, 0x18, 0x52, 0x0d, 0x45, 0x01, + 0xb2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x0b, 0x06, 0x86, 0xf9, 0x3d, 0x84, 0x98, 0x59, + 0xfe, 0xd6, 0xb8, 0x18, 0x52, 0x0d, 0x45, 0x01, + 0x86, 0xfb, 0xab, 0x2b, 0x4a, 0x94, 0xf4, 0x7a, + 0xa5, 0x6f, 0x0a, 0xea, 0x65, 0xd1, 0x10, 0x08 +}; +static const u8 enc_assoc103[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce103[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key103[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input104[] __initconst = { + 0xb2, 0x47, 0xa7, 0x47, 0x23, 0x49, 0x1a, 0xac, + 0xac, 0xaa, 0xd7, 0x09, 0xc9, 0x1e, 0x93, 0x2b, + 0x31, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, + 0x9a, 0xde, 0x04, 0xe7, 0x5b, 0xb7, 0x01, 0xd9, + 0x66, 0x06, 0x01, 0xb3, 0x47, 0x65, 0xde, 0x98, + 0xd5, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, + 0x42, 0x89, 0x79, 0x44, 0xc2, 0xa2, 0x8f, 0xa1, + 0x76, 0x11, 0xd7, 0xfa, 0x5c, 0x22, 0xad, 0x8f +}; +static const u8 enc_output104[] __initconst = { + 0xb2, 0x2c, 0x70, 0x68, 0xa5, 0x83, 0xfa, 0x35, + 0x0f, 0x85, 0x29, 0xc3, 0x75, 0xf8, 0xeb, 0x88, + 0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xfa, 0x5b, 0x16, 0x2d, 0x6f, 0x12, 0xd1, 0xec, + 0x39, 0xcd, 0x90, 0xb7, 0x2b, 0xff, 0x75, 0x03, + 0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xfa, 0x5b, 0x16, 0x2d, 0x6f, 0x12, 0xd1, 0xec, + 0x39, 0xcd, 0x90, 0xb7, 0x2b, 0xff, 0x75, 0x03, + 0xa0, 0x19, 0xac, 0x2e, 0xd6, 0x67, 0xe1, 0x7d, + 0xa1, 0x6f, 0x0a, 0xfa, 0x19, 0x61, 0x0d, 0x0d +}; +static const u8 enc_assoc104[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce104[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key104[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input105[] __initconst = { + 0x74, 0x0f, 0x9e, 0x49, 0xf6, 0x10, 0xef, 0xa5, + 0x85, 0xb6, 0x59, 0xca, 0x6e, 0xd8, 0xb4, 0x99, + 0x2d, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, + 0x41, 0x2d, 0x96, 0xaf, 0xbe, 0x80, 0xec, 0x3e, + 0x79, 0xd4, 0x51, 0xb0, 0x0a, 0x2d, 0xb2, 0x9a, + 0xc9, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, + 0x99, 0x7a, 0xeb, 0x0c, 0x27, 0x95, 0x62, 0x46, + 0x69, 0xc3, 0x87, 0xf9, 0x11, 0x6a, 0xc1, 0x8d +}; +static const u8 enc_output105[] __initconst = { + 0x74, 0x64, 0x49, 0x66, 0x70, 0xda, 0x0f, 0x3c, + 0x26, 0x99, 0xa7, 0x00, 0xd2, 0x3e, 0xcc, 0x3a, + 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x21, 0xa8, 0x84, 0x65, 0x8a, 0x25, 0x3c, 0x0b, + 0x26, 0x1f, 0xc0, 0xb4, 0x66, 0xb7, 0x19, 0x01, + 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x21, 0xa8, 0x84, 0x65, 0x8a, 0x25, 0x3c, 0x0b, + 0x26, 0x1f, 0xc0, 0xb4, 0x66, 0xb7, 0x19, 0x01, + 0x73, 0x6e, 0x18, 0x18, 0x16, 0x96, 0xa5, 0x88, + 0x9c, 0x31, 0x59, 0xfa, 0xab, 0xab, 0x20, 0xfd +}; +static const u8 enc_assoc105[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce105[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key105[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input106[] __initconst = { + 0xad, 0xba, 0x5d, 0x10, 0x5b, 0xc8, 0xaa, 0x06, + 0x2c, 0x23, 0x36, 0xcb, 0x88, 0x9d, 0xdb, 0xd5, + 0x37, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, + 0x17, 0x7c, 0x5f, 0xfe, 0x28, 0x75, 0xf4, 0x68, + 0xf6, 0xc2, 0x96, 0x57, 0x48, 0xf3, 0x59, 0x9a, + 0xd3, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, + 0xcf, 0x2b, 0x22, 0x5d, 0xb1, 0x60, 0x7a, 0x10, + 0xe6, 0xd5, 0x40, 0x1e, 0x53, 0xb4, 0x2a, 0x8d +}; +static const u8 enc_output106[] __initconst = { + 0xad, 0xd1, 0x8a, 0x3f, 0xdd, 0x02, 0x4a, 0x9f, + 0x8f, 0x0c, 0xc8, 0x01, 0x34, 0x7b, 0xa3, 0x76, + 0xb0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x77, 0xf9, 0x4d, 0x34, 0x1c, 0xd0, 0x24, 0x5d, + 0xa9, 0x09, 0x07, 0x53, 0x24, 0x69, 0xf2, 0x01, + 0xb0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x77, 0xf9, 0x4d, 0x34, 0x1c, 0xd0, 0x24, 0x5d, + 0xa9, 0x09, 0x07, 0x53, 0x24, 0x69, 0xf2, 0x01, + 0xba, 0xd5, 0x8f, 0x10, 0xa9, 0x1e, 0x6a, 0x88, + 0x9a, 0xba, 0x32, 0xfd, 0x17, 0xd8, 0x33, 0x1a +}; +static const u8 enc_assoc106[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce106[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key106[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input107[] __initconst = { + 0xfe, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, + 0xc0, 0x01, 0xed, 0xc5, 0xda, 0x44, 0x2e, 0x71, + 0x9b, 0xce, 0x9a, 0xbe, 0x27, 0x3a, 0xf1, 0x44, + 0xb4, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, + 0x48, 0x02, 0x5f, 0x41, 0xfa, 0x4e, 0x33, 0x6c, + 0x78, 0x69, 0x57, 0xa2, 0xa7, 0xc4, 0x93, 0x0a, + 0x6c, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, + 0x00, 0x26, 0x6e, 0xa1, 0xe4, 0x36, 0x44, 0xa3, + 0x4d, 0x8d, 0xd1, 0xdc, 0x93, 0xf2, 0xfa, 0x13 +}; +static const u8 enc_output107[] __initconst = { + 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x47, 0xc3, 0x27, 0xcc, 0x36, 0x5d, 0x08, 0x87, + 0x59, 0x09, 0x8c, 0x34, 0x1b, 0x4a, 0xed, 0x03, + 0xd4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x2b, 0x0b, 0x97, 0x3f, 0x74, 0x5b, 0x28, 0xaa, + 0xe9, 0x37, 0xf5, 0x9f, 0x18, 0xea, 0xc7, 0x01, + 0xd4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x2b, 0x0b, 0x97, 0x3f, 0x74, 0x5b, 0x28, 0xaa, + 0xe9, 0x37, 0xf5, 0x9f, 0x18, 0xea, 0xc7, 0x01, + 0xd6, 0x8c, 0xe1, 0x74, 0x07, 0x9a, 0xdd, 0x02, + 0x8d, 0xd0, 0x5c, 0xf8, 0x14, 0x63, 0x04, 0x88 +}; +static const u8 enc_assoc107[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce107[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key107[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input108[] __initconst = { + 0xb5, 0x13, 0xb0, 0x6a, 0xb9, 0xac, 0x14, 0x43, + 0x5a, 0xcb, 0x8a, 0xa3, 0xa3, 0x7a, 0xfd, 0xb6, + 0x54, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, + 0x61, 0x95, 0x01, 0x93, 0xb1, 0xbf, 0x03, 0x11, + 0xff, 0x11, 0x79, 0x89, 0xae, 0xd9, 0xa9, 0x99, + 0xb0, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, + 0xb9, 0xc2, 0x7c, 0x30, 0x28, 0xaa, 0x8d, 0x69, + 0xef, 0x06, 0xaf, 0xc0, 0xb5, 0x9e, 0xda, 0x8e +}; +static const u8 enc_output108[] __initconst = { + 0xb5, 0x78, 0x67, 0x45, 0x3f, 0x66, 0xf4, 0xda, + 0xf9, 0xe4, 0x74, 0x69, 0x1f, 0x9c, 0x85, 0x15, + 0xd3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x01, 0x10, 0x13, 0x59, 0x85, 0x1a, 0xd3, 0x24, + 0xa0, 0xda, 0xe8, 0x8d, 0xc2, 0x43, 0x02, 0x02, + 0xd3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x01, 0x10, 0x13, 0x59, 0x85, 0x1a, 0xd3, 0x24, + 0xa0, 0xda, 0xe8, 0x8d, 0xc2, 0x43, 0x02, 0x02, + 0xaa, 0x48, 0xa3, 0x88, 0x7d, 0x4b, 0x05, 0x96, + 0x99, 0xc2, 0xfd, 0xf9, 0xc6, 0x78, 0x7e, 0x0a +}; +static const u8 enc_assoc108[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce108[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key108[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input109[] __initconst = { + 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, + 0xd4, 0xf1, 0x09, 0xe8, 0x14, 0xce, 0xa8, 0x5a, + 0x08, 0xc0, 0x11, 0xd8, 0x50, 0xdd, 0x1d, 0xcb, + 0xcf, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, + 0x53, 0x40, 0xb8, 0x5a, 0x9a, 0xa0, 0x82, 0x96, + 0xb7, 0x7a, 0x5f, 0xc3, 0x96, 0x1f, 0x66, 0x0f, + 0x17, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, + 0x1b, 0x64, 0x89, 0xba, 0x84, 0xd8, 0xf5, 0x59, + 0x82, 0x9e, 0xd9, 0xbd, 0xa2, 0x29, 0x0f, 0x16 +}; +static const u8 enc_output109[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x53, 0x33, 0xc3, 0xe1, 0xf8, 0xd7, 0x8e, 0xac, + 0xca, 0x07, 0x07, 0x52, 0x6c, 0xad, 0x01, 0x8c, + 0xaf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x30, 0x49, 0x70, 0x24, 0x14, 0xb5, 0x99, 0x50, + 0x26, 0x24, 0xfd, 0xfe, 0x29, 0x31, 0x32, 0x04, + 0xaf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x30, 0x49, 0x70, 0x24, 0x14, 0xb5, 0x99, 0x50, + 0x26, 0x24, 0xfd, 0xfe, 0x29, 0x31, 0x32, 0x04, + 0xb9, 0x36, 0xa8, 0x17, 0xf2, 0x21, 0x1a, 0xf1, + 0x29, 0xe2, 0xcf, 0x16, 0x0f, 0xd4, 0x2b, 0xcb +}; +static const u8 enc_assoc109[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce109[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key109[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input110[] __initconst = { + 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, + 0xdf, 0x4c, 0x62, 0x03, 0x2d, 0x41, 0x19, 0xb5, + 0x88, 0x47, 0x7e, 0x99, 0x92, 0x5a, 0x56, 0xd9, + 0xd6, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, + 0xfa, 0x84, 0xf0, 0x64, 0x55, 0x36, 0x42, 0x1b, + 0x2b, 0xb9, 0x24, 0x6e, 0xc2, 0x19, 0xed, 0x0b, + 0x0e, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, + 0xb2, 0xa0, 0xc1, 0x84, 0x4b, 0x4e, 0x35, 0xd4, + 0x1e, 0x5d, 0xa2, 0x10, 0xf6, 0x2f, 0x84, 0x12 +}; +static const u8 enc_output110[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x58, 0x8e, 0xa8, 0x0a, 0xc1, 0x58, 0x3f, 0x43, + 0x4a, 0x80, 0x68, 0x13, 0xae, 0x2a, 0x4a, 0x9e, + 0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x99, 0x8d, 0x38, 0x1a, 0xdb, 0x23, 0x59, 0xdd, + 0xba, 0xe7, 0x86, 0x53, 0x7d, 0x37, 0xb9, 0x00, + 0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x99, 0x8d, 0x38, 0x1a, 0xdb, 0x23, 0x59, 0xdd, + 0xba, 0xe7, 0x86, 0x53, 0x7d, 0x37, 0xb9, 0x00, + 0x9f, 0x7a, 0xc4, 0x35, 0x1f, 0x6b, 0x91, 0xe6, + 0x30, 0x97, 0xa7, 0x13, 0x11, 0x5d, 0x05, 0xbe +}; +static const u8 enc_assoc110[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce110[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key110[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input111[] __initconst = { + 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, + 0x13, 0xf8, 0x0a, 0x00, 0x6d, 0xc1, 0xbb, 0xda, + 0xd6, 0x39, 0xa9, 0x2f, 0xc7, 0xec, 0xa6, 0x55, + 0xf7, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, + 0x63, 0x48, 0xb8, 0xfd, 0x29, 0xbf, 0x96, 0xd5, + 0x63, 0xa5, 0x17, 0xe2, 0x7d, 0x7b, 0xfc, 0x0f, + 0x2f, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, + 0x2b, 0x6c, 0x89, 0x1d, 0x37, 0xc7, 0xe1, 0x1a, + 0x56, 0x41, 0x91, 0x9c, 0x49, 0x4d, 0x95, 0x16 +}; +static const u8 enc_output111[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x94, 0x3a, 0xc0, 0x09, 0x81, 0xd8, 0x9d, 0x2c, + 0x14, 0xfe, 0xbf, 0xa5, 0xfb, 0x9c, 0xba, 0x12, + 0x97, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x41, 0x70, 0x83, 0xa7, 0xaa, 0x8d, 0x13, + 0xf2, 0xfb, 0xb5, 0xdf, 0xc2, 0x55, 0xa8, 0x04, + 0x97, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x41, 0x70, 0x83, 0xa7, 0xaa, 0x8d, 0x13, + 0xf2, 0xfb, 0xb5, 0xdf, 0xc2, 0x55, 0xa8, 0x04, + 0x9a, 0x18, 0xa8, 0x28, 0x07, 0x02, 0x69, 0xf4, + 0x47, 0x00, 0xd0, 0x09, 0xe7, 0x17, 0x1c, 0xc9 +}; +static const u8 enc_assoc111[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce111[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key111[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input112[] __initconst = { + 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, + 0x82, 0xe5, 0x9b, 0x45, 0x82, 0x91, 0x50, 0x38, + 0xf9, 0x33, 0x81, 0x1e, 0x65, 0x2d, 0xc6, 0x6a, + 0xfc, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, + 0xb6, 0x71, 0xc8, 0xca, 0xc2, 0x70, 0xc2, 0x65, + 0xa0, 0xac, 0x2f, 0x53, 0x57, 0x99, 0x88, 0x0a, + 0x24, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, + 0xfe, 0x55, 0xf9, 0x2a, 0xdc, 0x08, 0xb5, 0xaa, + 0x95, 0x48, 0xa9, 0x2d, 0x63, 0xaf, 0xe1, 0x13 +}; +static const u8 enc_output112[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x05, 0x27, 0x51, 0x4c, 0x6e, 0x88, 0x76, 0xce, + 0x3b, 0xf4, 0x97, 0x94, 0x59, 0x5d, 0xda, 0x2d, + 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xd5, 0x78, 0x00, 0xb4, 0x4c, 0x65, 0xd9, 0xa3, + 0x31, 0xf2, 0x8d, 0x6e, 0xe8, 0xb7, 0xdc, 0x01, + 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xd5, 0x78, 0x00, 0xb4, 0x4c, 0x65, 0xd9, 0xa3, + 0x31, 0xf2, 0x8d, 0x6e, 0xe8, 0xb7, 0xdc, 0x01, + 0xb4, 0x36, 0xa8, 0x2b, 0x93, 0xd5, 0x55, 0xf7, + 0x43, 0x00, 0xd0, 0x19, 0x9b, 0xa7, 0x18, 0xce +}; +static const u8 enc_assoc112[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce112[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key112[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input113[] __initconst = { + 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, + 0xf1, 0xd1, 0x28, 0x87, 0xb7, 0x21, 0x69, 0x86, + 0xa1, 0x2d, 0x79, 0x09, 0x8b, 0x6d, 0xe6, 0x0f, + 0xc0, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, + 0xa7, 0xc7, 0x58, 0x99, 0xf3, 0xe6, 0x0a, 0xf1, + 0xfc, 0xb6, 0xc7, 0x30, 0x7d, 0x87, 0x59, 0x0f, + 0x18, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, + 0xef, 0xe3, 0x69, 0x79, 0xed, 0x9e, 0x7d, 0x3e, + 0xc9, 0x52, 0x41, 0x4e, 0x49, 0xb1, 0x30, 0x16 +}; +static const u8 enc_output113[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x76, 0x13, 0xe2, 0x8e, 0x5b, 0x38, 0x4f, 0x70, + 0x63, 0xea, 0x6f, 0x83, 0xb7, 0x1d, 0xfa, 0x48, + 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xc4, 0xce, 0x90, 0xe7, 0x7d, 0xf3, 0x11, 0x37, + 0x6d, 0xe8, 0x65, 0x0d, 0xc2, 0xa9, 0x0d, 0x04, + 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xc4, 0xce, 0x90, 0xe7, 0x7d, 0xf3, 0x11, 0x37, + 0x6d, 0xe8, 0x65, 0x0d, 0xc2, 0xa9, 0x0d, 0x04, + 0xce, 0x54, 0xa8, 0x2e, 0x1f, 0xa9, 0x42, 0xfa, + 0x3f, 0x00, 0xd0, 0x29, 0x4f, 0x37, 0x15, 0xd3 +}; +static const u8 enc_assoc113[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce113[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key113[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input114[] __initconst = { + 0xcb, 0xf1, 0xda, 0x9e, 0x0b, 0xa9, 0x37, 0x73, + 0x74, 0xe6, 0x9e, 0x1c, 0x0e, 0x60, 0x0c, 0xfc, + 0x34, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, + 0xbe, 0x3f, 0xa6, 0x6b, 0x6c, 0xe7, 0x80, 0x8a, + 0xa3, 0xe4, 0x59, 0x49, 0xf9, 0x44, 0x64, 0x9f, + 0xd0, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, + 0x66, 0x68, 0xdb, 0xc8, 0xf5, 0xf2, 0x0e, 0xf2, + 0xb3, 0xf3, 0x8f, 0x00, 0xe2, 0x03, 0x17, 0x88 +}; +static const u8 enc_output114[] __initconst = { + 0xcb, 0x9a, 0x0d, 0xb1, 0x8d, 0x63, 0xd7, 0xea, + 0xd7, 0xc9, 0x60, 0xd6, 0xb2, 0x86, 0x74, 0x5f, + 0xb3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xde, 0xba, 0xb4, 0xa1, 0x58, 0x42, 0x50, 0xbf, + 0xfc, 0x2f, 0xc8, 0x4d, 0x95, 0xde, 0xcf, 0x04, + 0xb3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xde, 0xba, 0xb4, 0xa1, 0x58, 0x42, 0x50, 0xbf, + 0xfc, 0x2f, 0xc8, 0x4d, 0x95, 0xde, 0xcf, 0x04, + 0x23, 0x83, 0xab, 0x0b, 0x79, 0x92, 0x05, 0x69, + 0x9b, 0x51, 0x0a, 0xa7, 0x09, 0xbf, 0x31, 0xf1 +}; +static const u8 enc_assoc114[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce114[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key114[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input115[] __initconst = { + 0x8f, 0x27, 0x86, 0x94, 0xc4, 0xe9, 0xda, 0xeb, + 0xd5, 0x8d, 0x3e, 0x5b, 0x96, 0x6e, 0x8b, 0x68, + 0x42, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, + 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, + 0x06, 0x53, 0xe7, 0xa3, 0x31, 0x71, 0x88, 0x33, + 0xac, 0xc3, 0xb9, 0xad, 0xff, 0x1c, 0x31, 0x98, + 0xa6, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, + 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, + 0xde, 0x04, 0x9a, 0x00, 0xa8, 0x64, 0x06, 0x4b, + 0xbc, 0xd4, 0x6f, 0xe4, 0xe4, 0x5b, 0x42, 0x8f +}; +static const u8 enc_output115[] __initconst = { + 0x8f, 0x4c, 0x51, 0xbb, 0x42, 0x23, 0x3a, 0x72, + 0x76, 0xa2, 0xc0, 0x91, 0x2a, 0x88, 0xf3, 0xcb, + 0xc5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x66, 0xd6, 0xf5, 0x69, 0x05, 0xd4, 0x58, 0x06, + 0xf3, 0x08, 0x28, 0xa9, 0x93, 0x86, 0x9a, 0x03, + 0xc5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x66, 0xd6, 0xf5, 0x69, 0x05, 0xd4, 0x58, 0x06, + 0xf3, 0x08, 0x28, 0xa9, 0x93, 0x86, 0x9a, 0x03, + 0x8b, 0xfb, 0xab, 0x17, 0xa9, 0xe0, 0xb8, 0x74, + 0x8b, 0x51, 0x0a, 0xe7, 0xd9, 0xfd, 0x23, 0x05 +}; +static const u8 enc_assoc115[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce115[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key115[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input116[] __initconst = { + 0xd5, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, + 0x9a, 0x22, 0xd7, 0x0a, 0x48, 0xe2, 0x4f, 0xdd, + 0xcd, 0xd4, 0x41, 0x9d, 0xe6, 0x4c, 0x8f, 0x44, + 0xfc, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, + 0x77, 0xb5, 0xc9, 0x07, 0xd9, 0xc9, 0xe1, 0xea, + 0x51, 0x85, 0x1a, 0x20, 0x4a, 0xad, 0x9f, 0x0a, + 0x24, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, + 0x3f, 0x91, 0xf8, 0xe7, 0xc7, 0xb1, 0x96, 0x25, + 0x64, 0x61, 0x9c, 0x5e, 0x7e, 0x9b, 0xf6, 0x13 +}; +static const u8 enc_output116[] __initconst = { + 0xd5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x1d, 0xe0, 0x1d, 0x03, 0xa4, 0xfb, 0x69, 0x2b, + 0x0f, 0x13, 0x57, 0x17, 0xda, 0x3c, 0x93, 0x03, + 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x14, 0xbc, 0x01, 0x79, 0x57, 0xdc, 0xfa, 0x2c, + 0xc0, 0xdb, 0xb8, 0x1d, 0xf5, 0x83, 0xcb, 0x01, + 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x14, 0xbc, 0x01, 0x79, 0x57, 0xdc, 0xfa, 0x2c, + 0xc0, 0xdb, 0xb8, 0x1d, 0xf5, 0x83, 0xcb, 0x01, + 0x49, 0xbc, 0x6e, 0x9f, 0xc5, 0x1c, 0x4d, 0x50, + 0x30, 0x36, 0x64, 0x4d, 0x84, 0x27, 0x73, 0xd2 +}; +static const u8 enc_assoc116[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce116[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key116[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input117[] __initconst = { + 0xdb, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, + 0x75, 0xd5, 0x64, 0x3a, 0xa5, 0xaf, 0x93, 0x4d, + 0x8c, 0xce, 0x39, 0x2c, 0xc3, 0xee, 0xdb, 0x47, + 0xc0, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, + 0x60, 0x1b, 0x5a, 0xd2, 0x06, 0x7f, 0x28, 0x06, + 0x6a, 0x8f, 0x32, 0x81, 0x71, 0x5b, 0xa8, 0x08, + 0x18, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, + 0x28, 0x3f, 0x6b, 0x32, 0x18, 0x07, 0x5f, 0xc9, + 0x5f, 0x6b, 0xb4, 0xff, 0x45, 0x6d, 0xc1, 0x11 +}; +static const u8 enc_output117[] __initconst = { + 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xf2, 0x17, 0xae, 0x33, 0x49, 0xb6, 0xb5, 0xbb, + 0x4e, 0x09, 0x2f, 0xa6, 0xff, 0x9e, 0xc7, 0x00, + 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x03, 0x12, 0x92, 0xac, 0x88, 0x6a, 0x33, 0xc0, + 0xfb, 0xd1, 0x90, 0xbc, 0xce, 0x75, 0xfc, 0x03, + 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x03, 0x12, 0x92, 0xac, 0x88, 0x6a, 0x33, 0xc0, + 0xfb, 0xd1, 0x90, 0xbc, 0xce, 0x75, 0xfc, 0x03, + 0x63, 0xda, 0x6e, 0xa2, 0x51, 0xf0, 0x39, 0x53, + 0x2c, 0x36, 0x64, 0x5d, 0x38, 0xb7, 0x6f, 0xd7 +}; +static const u8 enc_assoc117[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce117[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key117[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +/* wycheproof - edge case intermediate sums in poly1305 */ +static const u8 enc_input118[] __initconst = { + 0x93, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, + 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, + 0x62, 0x48, 0x39, 0x60, 0x42, 0x16, 0xe4, 0x03, + 0xeb, 0xcc, 0x6a, 0xf5, 0x59, 0xec, 0x8b, 0x43, + 0x97, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, + 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, + 0xd8, 0xc8, 0xc3, 0xfa, 0x1a, 0x9e, 0x47, 0x4a, + 0xbe, 0x52, 0xd0, 0x2c, 0x81, 0x87, 0xe9, 0x0f, + 0x4f, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, + 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, + 0x90, 0xec, 0xf2, 0x1a, 0x04, 0xe6, 0x30, 0x85, + 0x8b, 0xb6, 0x56, 0x52, 0xb5, 0xb1, 0x80, 0x16 +}; +static const u8 enc_output118[] __initconst = { + 0x93, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xe5, 0x8a, 0xf3, 0x69, 0xae, 0x0f, 0xc2, 0xf5, + 0x29, 0x0b, 0x7c, 0x7f, 0x65, 0x9c, 0x97, 0x04, + 0xf7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xbb, 0xc1, 0x0b, 0x84, 0x94, 0x8b, 0x5c, 0x8c, + 0x2f, 0x0c, 0x72, 0x11, 0x3e, 0xa9, 0xbd, 0x04, + 0xf7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xbb, 0xc1, 0x0b, 0x84, 0x94, 0x8b, 0x5c, 0x8c, + 0x2f, 0x0c, 0x72, 0x11, 0x3e, 0xa9, 0xbd, 0x04, + 0x73, 0xeb, 0x27, 0x24, 0xb5, 0xc4, 0x05, 0xf0, + 0x4d, 0x00, 0xd0, 0xf1, 0x58, 0x40, 0xa1, 0xc1 +}; +static const u8 enc_assoc118[] __initconst = { + 0xff, 0xff, 0xff, 0xff +}; +static const u8 enc_nonce118[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 +}; +static const u8 enc_key118[] __initconst = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f +}; + +static const struct chacha20poly1305_testvec +chacha20poly1305_enc_vectors[] __initconst = { + { enc_input001, enc_output001, enc_assoc001, enc_nonce001, enc_key001, + sizeof(enc_input001), sizeof(enc_assoc001), sizeof(enc_nonce001) }, + { enc_input002, enc_output002, enc_assoc002, enc_nonce002, enc_key002, + sizeof(enc_input002), sizeof(enc_assoc002), sizeof(enc_nonce002) }, + { enc_input003, enc_output003, enc_assoc003, enc_nonce003, enc_key003, + sizeof(enc_input003), sizeof(enc_assoc003), sizeof(enc_nonce003) }, + { enc_input004, enc_output004, enc_assoc004, enc_nonce004, enc_key004, + sizeof(enc_input004), sizeof(enc_assoc004), sizeof(enc_nonce004) }, + { enc_input005, enc_output005, enc_assoc005, enc_nonce005, enc_key005, + sizeof(enc_input005), sizeof(enc_assoc005), sizeof(enc_nonce005) }, + { enc_input006, enc_output006, enc_assoc006, enc_nonce006, enc_key006, + sizeof(enc_input006), sizeof(enc_assoc006), sizeof(enc_nonce006) }, + { enc_input007, enc_output007, enc_assoc007, enc_nonce007, enc_key007, + sizeof(enc_input007), sizeof(enc_assoc007), sizeof(enc_nonce007) }, + { enc_input008, enc_output008, enc_assoc008, enc_nonce008, enc_key008, + sizeof(enc_input008), sizeof(enc_assoc008), sizeof(enc_nonce008) }, + { enc_input009, enc_output009, enc_assoc009, enc_nonce009, enc_key009, + sizeof(enc_input009), sizeof(enc_assoc009), sizeof(enc_nonce009) }, + { enc_input010, enc_output010, enc_assoc010, enc_nonce010, enc_key010, + sizeof(enc_input010), sizeof(enc_assoc010), sizeof(enc_nonce010) }, + { enc_input011, enc_output011, enc_assoc011, enc_nonce011, enc_key011, + sizeof(enc_input011), sizeof(enc_assoc011), sizeof(enc_nonce011) }, + { enc_input012, enc_output012, enc_assoc012, enc_nonce012, enc_key012, + sizeof(enc_input012), sizeof(enc_assoc012), sizeof(enc_nonce012) }, + { enc_input013, enc_output013, enc_assoc013, enc_nonce013, enc_key013, + sizeof(enc_input013), sizeof(enc_assoc013), sizeof(enc_nonce013) }, + { enc_input014, enc_output014, enc_assoc014, enc_nonce014, enc_key014, + sizeof(enc_input014), sizeof(enc_assoc014), sizeof(enc_nonce014) }, + { enc_input015, enc_output015, enc_assoc015, enc_nonce015, enc_key015, + sizeof(enc_input015), sizeof(enc_assoc015), sizeof(enc_nonce015) }, + { enc_input016, enc_output016, enc_assoc016, enc_nonce016, enc_key016, + sizeof(enc_input016), sizeof(enc_assoc016), sizeof(enc_nonce016) }, + { enc_input017, enc_output017, enc_assoc017, enc_nonce017, enc_key017, + sizeof(enc_input017), sizeof(enc_assoc017), sizeof(enc_nonce017) }, + { enc_input018, enc_output018, enc_assoc018, enc_nonce018, enc_key018, + sizeof(enc_input018), sizeof(enc_assoc018), sizeof(enc_nonce018) }, + { enc_input019, enc_output019, enc_assoc019, enc_nonce019, enc_key019, + sizeof(enc_input019), sizeof(enc_assoc019), sizeof(enc_nonce019) }, + { enc_input020, enc_output020, enc_assoc020, enc_nonce020, enc_key020, + sizeof(enc_input020), sizeof(enc_assoc020), sizeof(enc_nonce020) }, + { enc_input021, enc_output021, enc_assoc021, enc_nonce021, enc_key021, + sizeof(enc_input021), sizeof(enc_assoc021), sizeof(enc_nonce021) }, + { enc_input022, enc_output022, enc_assoc022, enc_nonce022, enc_key022, + sizeof(enc_input022), sizeof(enc_assoc022), sizeof(enc_nonce022) }, + { enc_input023, enc_output023, enc_assoc023, enc_nonce023, enc_key023, + sizeof(enc_input023), sizeof(enc_assoc023), sizeof(enc_nonce023) }, + { enc_input024, enc_output024, enc_assoc024, enc_nonce024, enc_key024, + sizeof(enc_input024), sizeof(enc_assoc024), sizeof(enc_nonce024) }, + { enc_input025, enc_output025, enc_assoc025, enc_nonce025, enc_key025, + sizeof(enc_input025), sizeof(enc_assoc025), sizeof(enc_nonce025) }, + { enc_input026, enc_output026, enc_assoc026, enc_nonce026, enc_key026, + sizeof(enc_input026), sizeof(enc_assoc026), sizeof(enc_nonce026) }, + { enc_input027, enc_output027, enc_assoc027, enc_nonce027, enc_key027, + sizeof(enc_input027), sizeof(enc_assoc027), sizeof(enc_nonce027) }, + { enc_input028, enc_output028, enc_assoc028, enc_nonce028, enc_key028, + sizeof(enc_input028), sizeof(enc_assoc028), sizeof(enc_nonce028) }, + { enc_input029, enc_output029, enc_assoc029, enc_nonce029, enc_key029, + sizeof(enc_input029), sizeof(enc_assoc029), sizeof(enc_nonce029) }, + { enc_input030, enc_output030, enc_assoc030, enc_nonce030, enc_key030, + sizeof(enc_input030), sizeof(enc_assoc030), sizeof(enc_nonce030) }, + { enc_input031, enc_output031, enc_assoc031, enc_nonce031, enc_key031, + sizeof(enc_input031), sizeof(enc_assoc031), sizeof(enc_nonce031) }, + { enc_input032, enc_output032, enc_assoc032, enc_nonce032, enc_key032, + sizeof(enc_input032), sizeof(enc_assoc032), sizeof(enc_nonce032) }, + { enc_input033, enc_output033, enc_assoc033, enc_nonce033, enc_key033, + sizeof(enc_input033), sizeof(enc_assoc033), sizeof(enc_nonce033) }, + { enc_input034, enc_output034, enc_assoc034, enc_nonce034, enc_key034, + sizeof(enc_input034), sizeof(enc_assoc034), sizeof(enc_nonce034) }, + { enc_input035, enc_output035, enc_assoc035, enc_nonce035, enc_key035, + sizeof(enc_input035), sizeof(enc_assoc035), sizeof(enc_nonce035) }, + { enc_input036, enc_output036, enc_assoc036, enc_nonce036, enc_key036, + sizeof(enc_input036), sizeof(enc_assoc036), sizeof(enc_nonce036) }, + { enc_input037, enc_output037, enc_assoc037, enc_nonce037, enc_key037, + sizeof(enc_input037), sizeof(enc_assoc037), sizeof(enc_nonce037) }, + { enc_input038, enc_output038, enc_assoc038, enc_nonce038, enc_key038, + sizeof(enc_input038), sizeof(enc_assoc038), sizeof(enc_nonce038) }, + { enc_input039, enc_output039, enc_assoc039, enc_nonce039, enc_key039, + sizeof(enc_input039), sizeof(enc_assoc039), sizeof(enc_nonce039) }, + { enc_input040, enc_output040, enc_assoc040, enc_nonce040, enc_key040, + sizeof(enc_input040), sizeof(enc_assoc040), sizeof(enc_nonce040) }, + { enc_input041, enc_output041, enc_assoc041, enc_nonce041, enc_key041, + sizeof(enc_input041), sizeof(enc_assoc041), sizeof(enc_nonce041) }, + { enc_input042, enc_output042, enc_assoc042, enc_nonce042, enc_key042, + sizeof(enc_input042), sizeof(enc_assoc042), sizeof(enc_nonce042) }, + { enc_input043, enc_output043, enc_assoc043, enc_nonce043, enc_key043, + sizeof(enc_input043), sizeof(enc_assoc043), sizeof(enc_nonce043) }, + { enc_input044, enc_output044, enc_assoc044, enc_nonce044, enc_key044, + sizeof(enc_input044), sizeof(enc_assoc044), sizeof(enc_nonce044) }, + { enc_input045, enc_output045, enc_assoc045, enc_nonce045, enc_key045, + sizeof(enc_input045), sizeof(enc_assoc045), sizeof(enc_nonce045) }, + { enc_input046, enc_output046, enc_assoc046, enc_nonce046, enc_key046, + sizeof(enc_input046), sizeof(enc_assoc046), sizeof(enc_nonce046) }, + { enc_input047, enc_output047, enc_assoc047, enc_nonce047, enc_key047, + sizeof(enc_input047), sizeof(enc_assoc047), sizeof(enc_nonce047) }, + { enc_input048, enc_output048, enc_assoc048, enc_nonce048, enc_key048, + sizeof(enc_input048), sizeof(enc_assoc048), sizeof(enc_nonce048) }, + { enc_input049, enc_output049, enc_assoc049, enc_nonce049, enc_key049, + sizeof(enc_input049), sizeof(enc_assoc049), sizeof(enc_nonce049) }, + { enc_input050, enc_output050, enc_assoc050, enc_nonce050, enc_key050, + sizeof(enc_input050), sizeof(enc_assoc050), sizeof(enc_nonce050) }, + { enc_input051, enc_output051, enc_assoc051, enc_nonce051, enc_key051, + sizeof(enc_input051), sizeof(enc_assoc051), sizeof(enc_nonce051) }, + { enc_input052, enc_output052, enc_assoc052, enc_nonce052, enc_key052, + sizeof(enc_input052), sizeof(enc_assoc052), sizeof(enc_nonce052) }, + { enc_input053, enc_output053, enc_assoc053, enc_nonce053, enc_key053, + sizeof(enc_input053), sizeof(enc_assoc053), sizeof(enc_nonce053) }, + { enc_input054, enc_output054, enc_assoc054, enc_nonce054, enc_key054, + sizeof(enc_input054), sizeof(enc_assoc054), sizeof(enc_nonce054) }, + { enc_input055, enc_output055, enc_assoc055, enc_nonce055, enc_key055, + sizeof(enc_input055), sizeof(enc_assoc055), sizeof(enc_nonce055) }, + { enc_input056, enc_output056, enc_assoc056, enc_nonce056, enc_key056, + sizeof(enc_input056), sizeof(enc_assoc056), sizeof(enc_nonce056) }, + { enc_input057, enc_output057, enc_assoc057, enc_nonce057, enc_key057, + sizeof(enc_input057), sizeof(enc_assoc057), sizeof(enc_nonce057) }, + { enc_input058, enc_output058, enc_assoc058, enc_nonce058, enc_key058, + sizeof(enc_input058), sizeof(enc_assoc058), sizeof(enc_nonce058) }, + { enc_input059, enc_output059, enc_assoc059, enc_nonce059, enc_key059, + sizeof(enc_input059), sizeof(enc_assoc059), sizeof(enc_nonce059) }, + { enc_input060, enc_output060, enc_assoc060, enc_nonce060, enc_key060, + sizeof(enc_input060), sizeof(enc_assoc060), sizeof(enc_nonce060) }, + { enc_input061, enc_output061, enc_assoc061, enc_nonce061, enc_key061, + sizeof(enc_input061), sizeof(enc_assoc061), sizeof(enc_nonce061) }, + { enc_input062, enc_output062, enc_assoc062, enc_nonce062, enc_key062, + sizeof(enc_input062), sizeof(enc_assoc062), sizeof(enc_nonce062) }, + { enc_input063, enc_output063, enc_assoc063, enc_nonce063, enc_key063, + sizeof(enc_input063), sizeof(enc_assoc063), sizeof(enc_nonce063) }, + { enc_input064, enc_output064, enc_assoc064, enc_nonce064, enc_key064, + sizeof(enc_input064), sizeof(enc_assoc064), sizeof(enc_nonce064) }, + { enc_input065, enc_output065, enc_assoc065, enc_nonce065, enc_key065, + sizeof(enc_input065), sizeof(enc_assoc065), sizeof(enc_nonce065) }, + { enc_input066, enc_output066, enc_assoc066, enc_nonce066, enc_key066, + sizeof(enc_input066), sizeof(enc_assoc066), sizeof(enc_nonce066) }, + { enc_input067, enc_output067, enc_assoc067, enc_nonce067, enc_key067, + sizeof(enc_input067), sizeof(enc_assoc067), sizeof(enc_nonce067) }, + { enc_input068, enc_output068, enc_assoc068, enc_nonce068, enc_key068, + sizeof(enc_input068), sizeof(enc_assoc068), sizeof(enc_nonce068) }, + { enc_input069, enc_output069, enc_assoc069, enc_nonce069, enc_key069, + sizeof(enc_input069), sizeof(enc_assoc069), sizeof(enc_nonce069) }, + { enc_input070, enc_output070, enc_assoc070, enc_nonce070, enc_key070, + sizeof(enc_input070), sizeof(enc_assoc070), sizeof(enc_nonce070) }, + { enc_input071, enc_output071, enc_assoc071, enc_nonce071, enc_key071, + sizeof(enc_input071), sizeof(enc_assoc071), sizeof(enc_nonce071) }, + { enc_input072, enc_output072, enc_assoc072, enc_nonce072, enc_key072, + sizeof(enc_input072), sizeof(enc_assoc072), sizeof(enc_nonce072) }, + { enc_input073, enc_output073, enc_assoc073, enc_nonce073, enc_key073, + sizeof(enc_input073), sizeof(enc_assoc073), sizeof(enc_nonce073) }, + { enc_input074, enc_output074, enc_assoc074, enc_nonce074, enc_key074, + sizeof(enc_input074), sizeof(enc_assoc074), sizeof(enc_nonce074) }, + { enc_input075, enc_output075, enc_assoc075, enc_nonce075, enc_key075, + sizeof(enc_input075), sizeof(enc_assoc075), sizeof(enc_nonce075) }, + { enc_input076, enc_output076, enc_assoc076, enc_nonce076, enc_key076, + sizeof(enc_input076), sizeof(enc_assoc076), sizeof(enc_nonce076) }, + { enc_input077, enc_output077, enc_assoc077, enc_nonce077, enc_key077, + sizeof(enc_input077), sizeof(enc_assoc077), sizeof(enc_nonce077) }, + { enc_input078, enc_output078, enc_assoc078, enc_nonce078, enc_key078, + sizeof(enc_input078), sizeof(enc_assoc078), sizeof(enc_nonce078) }, + { enc_input079, enc_output079, enc_assoc079, enc_nonce079, enc_key079, + sizeof(enc_input079), sizeof(enc_assoc079), sizeof(enc_nonce079) }, + { enc_input080, enc_output080, enc_assoc080, enc_nonce080, enc_key080, + sizeof(enc_input080), sizeof(enc_assoc080), sizeof(enc_nonce080) }, + { enc_input081, enc_output081, enc_assoc081, enc_nonce081, enc_key081, + sizeof(enc_input081), sizeof(enc_assoc081), sizeof(enc_nonce081) }, + { enc_input082, enc_output082, enc_assoc082, enc_nonce082, enc_key082, + sizeof(enc_input082), sizeof(enc_assoc082), sizeof(enc_nonce082) }, + { enc_input083, enc_output083, enc_assoc083, enc_nonce083, enc_key083, + sizeof(enc_input083), sizeof(enc_assoc083), sizeof(enc_nonce083) }, + { enc_input084, enc_output084, enc_assoc084, enc_nonce084, enc_key084, + sizeof(enc_input084), sizeof(enc_assoc084), sizeof(enc_nonce084) }, + { enc_input085, enc_output085, enc_assoc085, enc_nonce085, enc_key085, + sizeof(enc_input085), sizeof(enc_assoc085), sizeof(enc_nonce085) }, + { enc_input086, enc_output086, enc_assoc086, enc_nonce086, enc_key086, + sizeof(enc_input086), sizeof(enc_assoc086), sizeof(enc_nonce086) }, + { enc_input087, enc_output087, enc_assoc087, enc_nonce087, enc_key087, + sizeof(enc_input087), sizeof(enc_assoc087), sizeof(enc_nonce087) }, + { enc_input088, enc_output088, enc_assoc088, enc_nonce088, enc_key088, + sizeof(enc_input088), sizeof(enc_assoc088), sizeof(enc_nonce088) }, + { enc_input089, enc_output089, enc_assoc089, enc_nonce089, enc_key089, + sizeof(enc_input089), sizeof(enc_assoc089), sizeof(enc_nonce089) }, + { enc_input090, enc_output090, enc_assoc090, enc_nonce090, enc_key090, + sizeof(enc_input090), sizeof(enc_assoc090), sizeof(enc_nonce090) }, + { enc_input091, enc_output091, enc_assoc091, enc_nonce091, enc_key091, + sizeof(enc_input091), sizeof(enc_assoc091), sizeof(enc_nonce091) }, + { enc_input092, enc_output092, enc_assoc092, enc_nonce092, enc_key092, + sizeof(enc_input092), sizeof(enc_assoc092), sizeof(enc_nonce092) }, + { enc_input093, enc_output093, enc_assoc093, enc_nonce093, enc_key093, + sizeof(enc_input093), sizeof(enc_assoc093), sizeof(enc_nonce093) }, + { enc_input094, enc_output094, enc_assoc094, enc_nonce094, enc_key094, + sizeof(enc_input094), sizeof(enc_assoc094), sizeof(enc_nonce094) }, + { enc_input095, enc_output095, enc_assoc095, enc_nonce095, enc_key095, + sizeof(enc_input095), sizeof(enc_assoc095), sizeof(enc_nonce095) }, + { enc_input096, enc_output096, enc_assoc096, enc_nonce096, enc_key096, + sizeof(enc_input096), sizeof(enc_assoc096), sizeof(enc_nonce096) }, + { enc_input097, enc_output097, enc_assoc097, enc_nonce097, enc_key097, + sizeof(enc_input097), sizeof(enc_assoc097), sizeof(enc_nonce097) }, + { enc_input098, enc_output098, enc_assoc098, enc_nonce098, enc_key098, + sizeof(enc_input098), sizeof(enc_assoc098), sizeof(enc_nonce098) }, + { enc_input099, enc_output099, enc_assoc099, enc_nonce099, enc_key099, + sizeof(enc_input099), sizeof(enc_assoc099), sizeof(enc_nonce099) }, + { enc_input100, enc_output100, enc_assoc100, enc_nonce100, enc_key100, + sizeof(enc_input100), sizeof(enc_assoc100), sizeof(enc_nonce100) }, + { enc_input101, enc_output101, enc_assoc101, enc_nonce101, enc_key101, + sizeof(enc_input101), sizeof(enc_assoc101), sizeof(enc_nonce101) }, + { enc_input102, enc_output102, enc_assoc102, enc_nonce102, enc_key102, + sizeof(enc_input102), sizeof(enc_assoc102), sizeof(enc_nonce102) }, + { enc_input103, enc_output103, enc_assoc103, enc_nonce103, enc_key103, + sizeof(enc_input103), sizeof(enc_assoc103), sizeof(enc_nonce103) }, + { enc_input104, enc_output104, enc_assoc104, enc_nonce104, enc_key104, + sizeof(enc_input104), sizeof(enc_assoc104), sizeof(enc_nonce104) }, + { enc_input105, enc_output105, enc_assoc105, enc_nonce105, enc_key105, + sizeof(enc_input105), sizeof(enc_assoc105), sizeof(enc_nonce105) }, + { enc_input106, enc_output106, enc_assoc106, enc_nonce106, enc_key106, + sizeof(enc_input106), sizeof(enc_assoc106), sizeof(enc_nonce106) }, + { enc_input107, enc_output107, enc_assoc107, enc_nonce107, enc_key107, + sizeof(enc_input107), sizeof(enc_assoc107), sizeof(enc_nonce107) }, + { enc_input108, enc_output108, enc_assoc108, enc_nonce108, enc_key108, + sizeof(enc_input108), sizeof(enc_assoc108), sizeof(enc_nonce108) }, + { enc_input109, enc_output109, enc_assoc109, enc_nonce109, enc_key109, + sizeof(enc_input109), sizeof(enc_assoc109), sizeof(enc_nonce109) }, + { enc_input110, enc_output110, enc_assoc110, enc_nonce110, enc_key110, + sizeof(enc_input110), sizeof(enc_assoc110), sizeof(enc_nonce110) }, + { enc_input111, enc_output111, enc_assoc111, enc_nonce111, enc_key111, + sizeof(enc_input111), sizeof(enc_assoc111), sizeof(enc_nonce111) }, + { enc_input112, enc_output112, enc_assoc112, enc_nonce112, enc_key112, + sizeof(enc_input112), sizeof(enc_assoc112), sizeof(enc_nonce112) }, + { enc_input113, enc_output113, enc_assoc113, enc_nonce113, enc_key113, + sizeof(enc_input113), sizeof(enc_assoc113), sizeof(enc_nonce113) }, + { enc_input114, enc_output114, enc_assoc114, enc_nonce114, enc_key114, + sizeof(enc_input114), sizeof(enc_assoc114), sizeof(enc_nonce114) }, + { enc_input115, enc_output115, enc_assoc115, enc_nonce115, enc_key115, + sizeof(enc_input115), sizeof(enc_assoc115), sizeof(enc_nonce115) }, + { enc_input116, enc_output116, enc_assoc116, enc_nonce116, enc_key116, + sizeof(enc_input116), sizeof(enc_assoc116), sizeof(enc_nonce116) }, + { enc_input117, enc_output117, enc_assoc117, enc_nonce117, enc_key117, + sizeof(enc_input117), sizeof(enc_assoc117), sizeof(enc_nonce117) }, + { enc_input118, enc_output118, enc_assoc118, enc_nonce118, enc_key118, + sizeof(enc_input118), sizeof(enc_assoc118), sizeof(enc_nonce118) } +}; + +static const u8 dec_input001[] __initconst = { + 0x64, 0xa0, 0x86, 0x15, 0x75, 0x86, 0x1a, 0xf4, + 0x60, 0xf0, 0x62, 0xc7, 0x9b, 0xe6, 0x43, 0xbd, + 0x5e, 0x80, 0x5c, 0xfd, 0x34, 0x5c, 0xf3, 0x89, + 0xf1, 0x08, 0x67, 0x0a, 0xc7, 0x6c, 0x8c, 0xb2, + 0x4c, 0x6c, 0xfc, 0x18, 0x75, 0x5d, 0x43, 0xee, + 0xa0, 0x9e, 0xe9, 0x4e, 0x38, 0x2d, 0x26, 0xb0, + 0xbd, 0xb7, 0xb7, 0x3c, 0x32, 0x1b, 0x01, 0x00, + 0xd4, 0xf0, 0x3b, 0x7f, 0x35, 0x58, 0x94, 0xcf, + 0x33, 0x2f, 0x83, 0x0e, 0x71, 0x0b, 0x97, 0xce, + 0x98, 0xc8, 0xa8, 0x4a, 0xbd, 0x0b, 0x94, 0x81, + 0x14, 0xad, 0x17, 0x6e, 0x00, 0x8d, 0x33, 0xbd, + 0x60, 0xf9, 0x82, 0xb1, 0xff, 0x37, 0xc8, 0x55, + 0x97, 0x97, 0xa0, 0x6e, 0xf4, 0xf0, 0xef, 0x61, + 0xc1, 0x86, 0x32, 0x4e, 0x2b, 0x35, 0x06, 0x38, + 0x36, 0x06, 0x90, 0x7b, 0x6a, 0x7c, 0x02, 0xb0, + 0xf9, 0xf6, 0x15, 0x7b, 0x53, 0xc8, 0x67, 0xe4, + 0xb9, 0x16, 0x6c, 0x76, 0x7b, 0x80, 0x4d, 0x46, + 0xa5, 0x9b, 0x52, 0x16, 0xcd, 0xe7, 0xa4, 0xe9, + 0x90, 0x40, 0xc5, 0xa4, 0x04, 0x33, 0x22, 0x5e, + 0xe2, 0x82, 0xa1, 0xb0, 0xa0, 0x6c, 0x52, 0x3e, + 0xaf, 0x45, 0x34, 0xd7, 0xf8, 0x3f, 0xa1, 0x15, + 0x5b, 0x00, 0x47, 0x71, 0x8c, 0xbc, 0x54, 0x6a, + 0x0d, 0x07, 0x2b, 0x04, 0xb3, 0x56, 0x4e, 0xea, + 0x1b, 0x42, 0x22, 0x73, 0xf5, 0x48, 0x27, 0x1a, + 0x0b, 0xb2, 0x31, 0x60, 0x53, 0xfa, 0x76, 0x99, + 0x19, 0x55, 0xeb, 0xd6, 0x31, 0x59, 0x43, 0x4e, + 0xce, 0xbb, 0x4e, 0x46, 0x6d, 0xae, 0x5a, 0x10, + 0x73, 0xa6, 0x72, 0x76, 0x27, 0x09, 0x7a, 0x10, + 0x49, 0xe6, 0x17, 0xd9, 0x1d, 0x36, 0x10, 0x94, + 0xfa, 0x68, 0xf0, 0xff, 0x77, 0x98, 0x71, 0x30, + 0x30, 0x5b, 0xea, 0xba, 0x2e, 0xda, 0x04, 0xdf, + 0x99, 0x7b, 0x71, 0x4d, 0x6c, 0x6f, 0x2c, 0x29, + 0xa6, 0xad, 0x5c, 0xb4, 0x02, 0x2b, 0x02, 0x70, + 0x9b, 0xee, 0xad, 0x9d, 0x67, 0x89, 0x0c, 0xbb, + 0x22, 0x39, 0x23, 0x36, 0xfe, 0xa1, 0x85, 0x1f, + 0x38 +}; +static const u8 dec_output001[] __initconst = { + 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74, + 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20, + 0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66, + 0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, + 0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69, + 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20, + 0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20, + 0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d, + 0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e, + 0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65, + 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, + 0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, + 0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f, + 0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64, + 0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65, + 0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, + 0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61, + 0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e, + 0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69, + 0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72, + 0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20, + 0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65, + 0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61, + 0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72, + 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, + 0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61, + 0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20, + 0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65, + 0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20, + 0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20, + 0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b, + 0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67, + 0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80, + 0x9d +}; +static const u8 dec_assoc001[] __initconst = { + 0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x4e, 0x91 +}; +static const u8 dec_nonce001[] __initconst = { + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 +}; +static const u8 dec_key001[] __initconst = { + 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a, + 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0, + 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09, + 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0 +}; + +static const u8 dec_input002[] __initconst = { + 0xea, 0xe0, 0x1e, 0x9e, 0x2c, 0x91, 0xaa, 0xe1, + 0xdb, 0x5d, 0x99, 0x3f, 0x8a, 0xf7, 0x69, 0x92 +}; +static const u8 dec_output002[] __initconst = { }; +static const u8 dec_assoc002[] __initconst = { }; +static const u8 dec_nonce002[] __initconst = { + 0xca, 0xbf, 0x33, 0x71, 0x32, 0x45, 0x77, 0x8e +}; +static const u8 dec_key002[] __initconst = { + 0x4c, 0xf5, 0x96, 0x83, 0x38, 0xe6, 0xae, 0x7f, + 0x2d, 0x29, 0x25, 0x76, 0xd5, 0x75, 0x27, 0x86, + 0x91, 0x9a, 0x27, 0x7a, 0xfb, 0x46, 0xc5, 0xef, + 0x94, 0x81, 0x79, 0x57, 0x14, 0x59, 0x40, 0x68 +}; + +static const u8 dec_input003[] __initconst = { + 0xdd, 0x6b, 0x3b, 0x82, 0xce, 0x5a, 0xbd, 0xd6, + 0xa9, 0x35, 0x83, 0xd8, 0x8c, 0x3d, 0x85, 0x77 +}; +static const u8 dec_output003[] __initconst = { }; +static const u8 dec_assoc003[] __initconst = { + 0x33, 0x10, 0x41, 0x12, 0x1f, 0xf3, 0xd2, 0x6b +}; +static const u8 dec_nonce003[] __initconst = { + 0x3d, 0x86, 0xb5, 0x6b, 0xc8, 0xa3, 0x1f, 0x1d +}; +static const u8 dec_key003[] __initconst = { + 0x2d, 0xb0, 0x5d, 0x40, 0xc8, 0xed, 0x44, 0x88, + 0x34, 0xd1, 0x13, 0xaf, 0x57, 0xa1, 0xeb, 0x3a, + 0x2a, 0x80, 0x51, 0x36, 0xec, 0x5b, 0xbc, 0x08, + 0x93, 0x84, 0x21, 0xb5, 0x13, 0x88, 0x3c, 0x0d +}; + +static const u8 dec_input004[] __initconst = { + 0xb7, 0x1b, 0xb0, 0x73, 0x59, 0xb0, 0x84, 0xb2, + 0x6d, 0x8e, 0xab, 0x94, 0x31, 0xa1, 0xae, 0xac, + 0x89 +}; +static const u8 dec_output004[] __initconst = { + 0xa4 +}; +static const u8 dec_assoc004[] __initconst = { + 0x6a, 0xe2, 0xad, 0x3f, 0x88, 0x39, 0x5a, 0x40 +}; +static const u8 dec_nonce004[] __initconst = { + 0xd2, 0x32, 0x1f, 0x29, 0x28, 0xc6, 0xc4, 0xc4 +}; +static const u8 dec_key004[] __initconst = { + 0x4b, 0x28, 0x4b, 0xa3, 0x7b, 0xbe, 0xe9, 0xf8, + 0x31, 0x80, 0x82, 0xd7, 0xd8, 0xe8, 0xb5, 0xa1, + 0xe2, 0x18, 0x18, 0x8a, 0x9c, 0xfa, 0xa3, 0x3d, + 0x25, 0x71, 0x3e, 0x40, 0xbc, 0x54, 0x7a, 0x3e +}; + +static const u8 dec_input005[] __initconst = { + 0xbf, 0xe1, 0x5b, 0x0b, 0xdb, 0x6b, 0xf5, 0x5e, + 0x6c, 0x5d, 0x84, 0x44, 0x39, 0x81, 0xc1, 0x9c, + 0xac +}; +static const u8 dec_output005[] __initconst = { + 0x2d +}; +static const u8 dec_assoc005[] __initconst = { }; +static const u8 dec_nonce005[] __initconst = { + 0x20, 0x1c, 0xaa, 0x5f, 0x9c, 0xbf, 0x92, 0x30 +}; +static const u8 dec_key005[] __initconst = { + 0x66, 0xca, 0x9c, 0x23, 0x2a, 0x4b, 0x4b, 0x31, + 0x0e, 0x92, 0x89, 0x8b, 0xf4, 0x93, 0xc7, 0x87, + 0x98, 0xa3, 0xd8, 0x39, 0xf8, 0xf4, 0xa7, 0x01, + 0xc0, 0x2e, 0x0a, 0xa6, 0x7e, 0x5a, 0x78, 0x87 +}; + +static const u8 dec_input006[] __initconst = { + 0x8b, 0x06, 0xd3, 0x31, 0xb0, 0x93, 0x45, 0xb1, + 0x75, 0x6e, 0x26, 0xf9, 0x67, 0xbc, 0x90, 0x15, + 0x81, 0x2c, 0xb5, 0xf0, 0xc6, 0x2b, 0xc7, 0x8c, + 0x56, 0xd1, 0xbf, 0x69, 0x6c, 0x07, 0xa0, 0xda, + 0x65, 0x27, 0xc9, 0x90, 0x3d, 0xef, 0x4b, 0x11, + 0x0f, 0x19, 0x07, 0xfd, 0x29, 0x92, 0xd9, 0xc8, + 0xf7, 0x99, 0x2e, 0x4a, 0xd0, 0xb8, 0x2c, 0xdc, + 0x93, 0xf5, 0x9e, 0x33, 0x78, 0xd1, 0x37, 0xc3, + 0x66, 0xd7, 0x5e, 0xbc, 0x44, 0xbf, 0x53, 0xa5, + 0xbc, 0xc4, 0xcb, 0x7b, 0x3a, 0x8e, 0x7f, 0x02, + 0xbd, 0xbb, 0xe7, 0xca, 0xa6, 0x6c, 0x6b, 0x93, + 0x21, 0x93, 0x10, 0x61, 0xe7, 0x69, 0xd0, 0x78, + 0xf3, 0x07, 0x5a, 0x1a, 0x8f, 0x73, 0xaa, 0xb1, + 0x4e, 0xd3, 0xda, 0x4f, 0xf3, 0x32, 0xe1, 0x66, + 0x3e, 0x6c, 0xc6, 0x13, 0xba, 0x06, 0x5b, 0xfc, + 0x6a, 0xe5, 0x6f, 0x60, 0xfb, 0x07, 0x40, 0xb0, + 0x8c, 0x9d, 0x84, 0x43, 0x6b, 0xc1, 0xf7, 0x8d, + 0x8d, 0x31, 0xf7, 0x7a, 0x39, 0x4d, 0x8f, 0x9a, + 0xeb +}; +static const u8 dec_output006[] __initconst = { + 0x33, 0x2f, 0x94, 0xc1, 0xa4, 0xef, 0xcc, 0x2a, + 0x5b, 0xa6, 0xe5, 0x8f, 0x1d, 0x40, 0xf0, 0x92, + 0x3c, 0xd9, 0x24, 0x11, 0xa9, 0x71, 0xf9, 0x37, + 0x14, 0x99, 0xfa, 0xbe, 0xe6, 0x80, 0xde, 0x50, + 0xc9, 0x96, 0xd4, 0xb0, 0xec, 0x9e, 0x17, 0xec, + 0xd2, 0x5e, 0x72, 0x99, 0xfc, 0x0a, 0xe1, 0xcb, + 0x48, 0xd2, 0x85, 0xdd, 0x2f, 0x90, 0xe0, 0x66, + 0x3b, 0xe6, 0x20, 0x74, 0xbe, 0x23, 0x8f, 0xcb, + 0xb4, 0xe4, 0xda, 0x48, 0x40, 0xa6, 0xd1, 0x1b, + 0xc7, 0x42, 0xce, 0x2f, 0x0c, 0xa6, 0x85, 0x6e, + 0x87, 0x37, 0x03, 0xb1, 0x7c, 0x25, 0x96, 0xa3, + 0x05, 0xd8, 0xb0, 0xf4, 0xed, 0xea, 0xc2, 0xf0, + 0x31, 0x98, 0x6c, 0xd1, 0x14, 0x25, 0xc0, 0xcb, + 0x01, 0x74, 0xd0, 0x82, 0xf4, 0x36, 0xf5, 0x41, + 0xd5, 0xdc, 0xca, 0xc5, 0xbb, 0x98, 0xfe, 0xfc, + 0x69, 0x21, 0x70, 0xd8, 0xa4, 0x4b, 0xc8, 0xde, + 0x8f +}; +static const u8 dec_assoc006[] __initconst = { + 0x70, 0xd3, 0x33, 0xf3, 0x8b, 0x18, 0x0b +}; +static const u8 dec_nonce006[] __initconst = { + 0xdf, 0x51, 0x84, 0x82, 0x42, 0x0c, 0x75, 0x9c +}; +static const u8 dec_key006[] __initconst = { + 0x68, 0x7b, 0x8d, 0x8e, 0xe3, 0xc4, 0xdd, 0xae, + 0xdf, 0x72, 0x7f, 0x53, 0x72, 0x25, 0x1e, 0x78, + 0x91, 0xcb, 0x69, 0x76, 0x1f, 0x49, 0x93, 0xf9, + 0x6f, 0x21, 0xcc, 0x39, 0x9c, 0xad, 0xb1, 0x01 +}; + +static const u8 dec_input007[] __initconst = { + 0x85, 0x04, 0xc2, 0xed, 0x8d, 0xfd, 0x97, 0x5c, + 0xd2, 0xb7, 0xe2, 0xc1, 0x6b, 0xa3, 0xba, 0xf8, + 0xc9, 0x50, 0xc3, 0xc6, 0xa5, 0xe3, 0xa4, 0x7c, + 0xc3, 0x23, 0x49, 0x5e, 0xa9, 0xb9, 0x32, 0xeb, + 0x8a, 0x7c, 0xca, 0xe5, 0xec, 0xfb, 0x7c, 0xc0, + 0xcb, 0x7d, 0xdc, 0x2c, 0x9d, 0x92, 0x55, 0x21, + 0x0a, 0xc8, 0x43, 0x63, 0x59, 0x0a, 0x31, 0x70, + 0x82, 0x67, 0x41, 0x03, 0xf8, 0xdf, 0xf2, 0xac, + 0xa7, 0x02, 0xd4, 0xd5, 0x8a, 0x2d, 0xc8, 0x99, + 0x19, 0x66, 0xd0, 0xf6, 0x88, 0x2c, 0x77, 0xd9, + 0xd4, 0x0d, 0x6c, 0xbd, 0x98, 0xde, 0xe7, 0x7f, + 0xad, 0x7e, 0x8a, 0xfb, 0xe9, 0x4b, 0xe5, 0xf7, + 0xe5, 0x50, 0xa0, 0x90, 0x3f, 0xd6, 0x22, 0x53, + 0xe3, 0xfe, 0x1b, 0xcc, 0x79, 0x3b, 0xec, 0x12, + 0x47, 0x52, 0xa7, 0xd6, 0x04, 0xe3, 0x52, 0xe6, + 0x93, 0x90, 0x91, 0x32, 0x73, 0x79, 0xb8, 0xd0, + 0x31, 0xde, 0x1f, 0x9f, 0x2f, 0x05, 0x38, 0x54, + 0x2f, 0x35, 0x04, 0x39, 0xe0, 0xa7, 0xba, 0xc6, + 0x52, 0xf6, 0x37, 0x65, 0x4c, 0x07, 0xa9, 0x7e, + 0xb3, 0x21, 0x6f, 0x74, 0x8c, 0xc9, 0xde, 0xdb, + 0x65, 0x1b, 0x9b, 0xaa, 0x60, 0xb1, 0x03, 0x30, + 0x6b, 0xb2, 0x03, 0xc4, 0x1c, 0x04, 0xf8, 0x0f, + 0x64, 0xaf, 0x46, 0xe4, 0x65, 0x99, 0x49, 0xe2, + 0xea, 0xce, 0x78, 0x00, 0xd8, 0x8b, 0xd5, 0x2e, + 0xcf, 0xfc, 0x40, 0x49, 0xe8, 0x58, 0xdc, 0x34, + 0x9c, 0x8c, 0x61, 0xbf, 0x0a, 0x8e, 0xec, 0x39, + 0xa9, 0x30, 0x05, 0x5a, 0xd2, 0x56, 0x01, 0xc7, + 0xda, 0x8f, 0x4e, 0xbb, 0x43, 0xa3, 0x3a, 0xf9, + 0x15, 0x2a, 0xd0, 0xa0, 0x7a, 0x87, 0x34, 0x82, + 0xfe, 0x8a, 0xd1, 0x2d, 0x5e, 0xc7, 0xbf, 0x04, + 0x53, 0x5f, 0x3b, 0x36, 0xd4, 0x25, 0x5c, 0x34, + 0x7a, 0x8d, 0xd5, 0x05, 0xce, 0x72, 0xca, 0xef, + 0x7a, 0x4b, 0xbc, 0xb0, 0x10, 0x5c, 0x96, 0x42, + 0x3a, 0x00, 0x98, 0xcd, 0x15, 0xe8, 0xb7, 0x53 +}; +static const u8 dec_output007[] __initconst = { + 0x9b, 0x18, 0xdb, 0xdd, 0x9a, 0x0f, 0x3e, 0xa5, + 0x15, 0x17, 0xde, 0xdf, 0x08, 0x9d, 0x65, 0x0a, + 0x67, 0x30, 0x12, 0xe2, 0x34, 0x77, 0x4b, 0xc1, + 0xd9, 0xc6, 0x1f, 0xab, 0xc6, 0x18, 0x50, 0x17, + 0xa7, 0x9d, 0x3c, 0xa6, 0xc5, 0x35, 0x8c, 0x1c, + 0xc0, 0xa1, 0x7c, 0x9f, 0x03, 0x89, 0xca, 0xe1, + 0xe6, 0xe9, 0xd4, 0xd3, 0x88, 0xdb, 0xb4, 0x51, + 0x9d, 0xec, 0xb4, 0xfc, 0x52, 0xee, 0x6d, 0xf1, + 0x75, 0x42, 0xc6, 0xfd, 0xbd, 0x7a, 0x8e, 0x86, + 0xfc, 0x44, 0xb3, 0x4f, 0xf3, 0xea, 0x67, 0x5a, + 0x41, 0x13, 0xba, 0xb0, 0xdc, 0xe1, 0xd3, 0x2a, + 0x7c, 0x22, 0xb3, 0xca, 0xac, 0x6a, 0x37, 0x98, + 0x3e, 0x1d, 0x40, 0x97, 0xf7, 0x9b, 0x1d, 0x36, + 0x6b, 0xb3, 0x28, 0xbd, 0x60, 0x82, 0x47, 0x34, + 0xaa, 0x2f, 0x7d, 0xe9, 0xa8, 0x70, 0x81, 0x57, + 0xd4, 0xb9, 0x77, 0x0a, 0x9d, 0x29, 0xa7, 0x84, + 0x52, 0x4f, 0xc2, 0x4a, 0x40, 0x3b, 0x3c, 0xd4, + 0xc9, 0x2a, 0xdb, 0x4a, 0x53, 0xc4, 0xbe, 0x80, + 0xe9, 0x51, 0x7f, 0x8f, 0xc7, 0xa2, 0xce, 0x82, + 0x5c, 0x91, 0x1e, 0x74, 0xd9, 0xd0, 0xbd, 0xd5, + 0xf3, 0xfd, 0xda, 0x4d, 0x25, 0xb4, 0xbb, 0x2d, + 0xac, 0x2f, 0x3d, 0x71, 0x85, 0x7b, 0xcf, 0x3c, + 0x7b, 0x3e, 0x0e, 0x22, 0x78, 0x0c, 0x29, 0xbf, + 0xe4, 0xf4, 0x57, 0xb3, 0xcb, 0x49, 0xa0, 0xfc, + 0x1e, 0x05, 0x4e, 0x16, 0xbc, 0xd5, 0xa8, 0xa3, + 0xee, 0x05, 0x35, 0xc6, 0x7c, 0xab, 0x60, 0x14, + 0x55, 0x1a, 0x8e, 0xc5, 0x88, 0x5d, 0xd5, 0x81, + 0xc2, 0x81, 0xa5, 0xc4, 0x60, 0xdb, 0xaf, 0x77, + 0x91, 0xe1, 0xce, 0xa2, 0x7e, 0x7f, 0x42, 0xe3, + 0xb0, 0x13, 0x1c, 0x1f, 0x25, 0x60, 0x21, 0xe2, + 0x40, 0x5f, 0x99, 0xb7, 0x73, 0xec, 0x9b, 0x2b, + 0xf0, 0x65, 0x11, 0xc8, 0xd0, 0x0a, 0x9f, 0xd3 +}; +static const u8 dec_assoc007[] __initconst = { }; +static const u8 dec_nonce007[] __initconst = { + 0xde, 0x7b, 0xef, 0xc3, 0x65, 0x1b, 0x68, 0xb0 +}; +static const u8 dec_key007[] __initconst = { + 0x8d, 0xb8, 0x91, 0x48, 0xf0, 0xe7, 0x0a, 0xbd, + 0xf9, 0x3f, 0xcd, 0xd9, 0xa0, 0x1e, 0x42, 0x4c, + 0xe7, 0xde, 0x25, 0x3d, 0xa3, 0xd7, 0x05, 0x80, + 0x8d, 0xf2, 0x82, 0xac, 0x44, 0x16, 0x51, 0x01 +}; + +static const u8 dec_input008[] __initconst = { + 0x14, 0xf6, 0x41, 0x37, 0xa6, 0xd4, 0x27, 0xcd, + 0xdb, 0x06, 0x3e, 0x9a, 0x4e, 0xab, 0xd5, 0xb1, + 0x1e, 0x6b, 0xd2, 0xbc, 0x11, 0xf4, 0x28, 0x93, + 0x63, 0x54, 0xef, 0xbb, 0x5e, 0x1d, 0x3a, 0x1d, + 0x37, 0x3c, 0x0a, 0x6c, 0x1e, 0xc2, 0xd1, 0x2c, + 0xb5, 0xa3, 0xb5, 0x7b, 0xb8, 0x8f, 0x25, 0xa6, + 0x1b, 0x61, 0x1c, 0xec, 0x28, 0x58, 0x26, 0xa4, + 0xa8, 0x33, 0x28, 0x25, 0x5c, 0x45, 0x05, 0xe5, + 0x6c, 0x99, 0xe5, 0x45, 0xc4, 0xa2, 0x03, 0x84, + 0x03, 0x73, 0x1e, 0x8c, 0x49, 0xac, 0x20, 0xdd, + 0x8d, 0xb3, 0xc4, 0xf5, 0xe7, 0x4f, 0xf1, 0xed, + 0xa1, 0x98, 0xde, 0xa4, 0x96, 0xdd, 0x2f, 0xab, + 0xab, 0x97, 0xcf, 0x3e, 0xd2, 0x9e, 0xb8, 0x13, + 0x07, 0x28, 0x29, 0x19, 0xaf, 0xfd, 0xf2, 0x49, + 0x43, 0xea, 0x49, 0x26, 0x91, 0xc1, 0x07, 0xd6, + 0xbb, 0x81, 0x75, 0x35, 0x0d, 0x24, 0x7f, 0xc8, + 0xda, 0xd4, 0xb7, 0xeb, 0xe8, 0x5c, 0x09, 0xa2, + 0x2f, 0xdc, 0x28, 0x7d, 0x3a, 0x03, 0xfa, 0x94, + 0xb5, 0x1d, 0x17, 0x99, 0x36, 0xc3, 0x1c, 0x18, + 0x34, 0xe3, 0x9f, 0xf5, 0x55, 0x7c, 0xb0, 0x60, + 0x9d, 0xff, 0xac, 0xd4, 0x61, 0xf2, 0xad, 0xf8, + 0xce, 0xc7, 0xbe, 0x5c, 0xd2, 0x95, 0xa8, 0x4b, + 0x77, 0x13, 0x19, 0x59, 0x26, 0xc9, 0xb7, 0x8f, + 0x6a, 0xcb, 0x2d, 0x37, 0x91, 0xea, 0x92, 0x9c, + 0x94, 0x5b, 0xda, 0x0b, 0xce, 0xfe, 0x30, 0x20, + 0xf8, 0x51, 0xad, 0xf2, 0xbe, 0xe7, 0xc7, 0xff, + 0xb3, 0x33, 0x91, 0x6a, 0xc9, 0x1a, 0x41, 0xc9, + 0x0f, 0xf3, 0x10, 0x0e, 0xfd, 0x53, 0xff, 0x6c, + 0x16, 0x52, 0xd9, 0xf3, 0xf7, 0x98, 0x2e, 0xc9, + 0x07, 0x31, 0x2c, 0x0c, 0x72, 0xd7, 0xc5, 0xc6, + 0x08, 0x2a, 0x7b, 0xda, 0xbd, 0x7e, 0x02, 0xea, + 0x1a, 0xbb, 0xf2, 0x04, 0x27, 0x61, 0x28, 0x8e, + 0xf5, 0x04, 0x03, 0x1f, 0x4c, 0x07, 0x55, 0x82, + 0xec, 0x1e, 0xd7, 0x8b, 0x2f, 0x65, 0x56, 0xd1, + 0xd9, 0x1e, 0x3c, 0xe9, 0x1f, 0x5e, 0x98, 0x70, + 0x38, 0x4a, 0x8c, 0x49, 0xc5, 0x43, 0xa0, 0xa1, + 0x8b, 0x74, 0x9d, 0x4c, 0x62, 0x0d, 0x10, 0x0c, + 0xf4, 0x6c, 0x8f, 0xe0, 0xaa, 0x9a, 0x8d, 0xb7, + 0xe0, 0xbe, 0x4c, 0x87, 0xf1, 0x98, 0x2f, 0xcc, + 0xed, 0xc0, 0x52, 0x29, 0xdc, 0x83, 0xf8, 0xfc, + 0x2c, 0x0e, 0xa8, 0x51, 0x4d, 0x80, 0x0d, 0xa3, + 0xfe, 0xd8, 0x37, 0xe7, 0x41, 0x24, 0xfc, 0xfb, + 0x75, 0xe3, 0x71, 0x7b, 0x57, 0x45, 0xf5, 0x97, + 0x73, 0x65, 0x63, 0x14, 0x74, 0xb8, 0x82, 0x9f, + 0xf8, 0x60, 0x2f, 0x8a, 0xf2, 0x4e, 0xf1, 0x39, + 0xda, 0x33, 0x91, 0xf8, 0x36, 0xe0, 0x8d, 0x3f, + 0x1f, 0x3b, 0x56, 0xdc, 0xa0, 0x8f, 0x3c, 0x9d, + 0x71, 0x52, 0xa7, 0xb8, 0xc0, 0xa5, 0xc6, 0xa2, + 0x73, 0xda, 0xf4, 0x4b, 0x74, 0x5b, 0x00, 0x3d, + 0x99, 0xd7, 0x96, 0xba, 0xe6, 0xe1, 0xa6, 0x96, + 0x38, 0xad, 0xb3, 0xc0, 0xd2, 0xba, 0x91, 0x6b, + 0xf9, 0x19, 0xdd, 0x3b, 0xbe, 0xbe, 0x9c, 0x20, + 0x50, 0xba, 0xa1, 0xd0, 0xce, 0x11, 0xbd, 0x95, + 0xd8, 0xd1, 0xdd, 0x33, 0x85, 0x74, 0xdc, 0xdb, + 0x66, 0x76, 0x44, 0xdc, 0x03, 0x74, 0x48, 0x35, + 0x98, 0xb1, 0x18, 0x47, 0x94, 0x7d, 0xff, 0x62, + 0xe4, 0x58, 0x78, 0xab, 0xed, 0x95, 0x36, 0xd9, + 0x84, 0x91, 0x82, 0x64, 0x41, 0xbb, 0x58, 0xe6, + 0x1c, 0x20, 0x6d, 0x15, 0x6b, 0x13, 0x96, 0xe8, + 0x35, 0x7f, 0xdc, 0x40, 0x2c, 0xe9, 0xbc, 0x8a, + 0x4f, 0x92, 0xec, 0x06, 0x2d, 0x50, 0xdf, 0x93, + 0x5d, 0x65, 0x5a, 0xa8, 0xfc, 0x20, 0x50, 0x14, + 0xa9, 0x8a, 0x7e, 0x1d, 0x08, 0x1f, 0xe2, 0x99, + 0xd0, 0xbe, 0xfb, 0x3a, 0x21, 0x9d, 0xad, 0x86, + 0x54, 0xfd, 0x0d, 0x98, 0x1c, 0x5a, 0x6f, 0x1f, + 0x9a, 0x40, 0xcd, 0xa2, 0xff, 0x6a, 0xf1, 0x54 +}; +static const u8 dec_output008[] __initconst = { + 0xc3, 0x09, 0x94, 0x62, 0xe6, 0x46, 0x2e, 0x10, + 0xbe, 0x00, 0xe4, 0xfc, 0xf3, 0x40, 0xa3, 0xe2, + 0x0f, 0xc2, 0x8b, 0x28, 0xdc, 0xba, 0xb4, 0x3c, + 0xe4, 0x21, 0x58, 0x61, 0xcd, 0x8b, 0xcd, 0xfb, + 0xac, 0x94, 0xa1, 0x45, 0xf5, 0x1c, 0xe1, 0x12, + 0xe0, 0x3b, 0x67, 0x21, 0x54, 0x5e, 0x8c, 0xaa, + 0xcf, 0xdb, 0xb4, 0x51, 0xd4, 0x13, 0xda, 0xe6, + 0x83, 0x89, 0xb6, 0x92, 0xe9, 0x21, 0x76, 0xa4, + 0x93, 0x7d, 0x0e, 0xfd, 0x96, 0x36, 0x03, 0x91, + 0x43, 0x5c, 0x92, 0x49, 0x62, 0x61, 0x7b, 0xeb, + 0x43, 0x89, 0xb8, 0x12, 0x20, 0x43, 0xd4, 0x47, + 0x06, 0x84, 0xee, 0x47, 0xe9, 0x8a, 0x73, 0x15, + 0x0f, 0x72, 0xcf, 0xed, 0xce, 0x96, 0xb2, 0x7f, + 0x21, 0x45, 0x76, 0xeb, 0x26, 0x28, 0x83, 0x6a, + 0xad, 0xaa, 0xa6, 0x81, 0xd8, 0x55, 0xb1, 0xa3, + 0x85, 0xb3, 0x0c, 0xdf, 0xf1, 0x69, 0x2d, 0x97, + 0x05, 0x2a, 0xbc, 0x7c, 0x7b, 0x25, 0xf8, 0x80, + 0x9d, 0x39, 0x25, 0xf3, 0x62, 0xf0, 0x66, 0x5e, + 0xf4, 0xa0, 0xcf, 0xd8, 0xfd, 0x4f, 0xb1, 0x1f, + 0x60, 0x3a, 0x08, 0x47, 0xaf, 0xe1, 0xf6, 0x10, + 0x77, 0x09, 0xa7, 0x27, 0x8f, 0x9a, 0x97, 0x5a, + 0x26, 0xfa, 0xfe, 0x41, 0x32, 0x83, 0x10, 0xe0, + 0x1d, 0xbf, 0x64, 0x0d, 0xf4, 0x1c, 0x32, 0x35, + 0xe5, 0x1b, 0x36, 0xef, 0xd4, 0x4a, 0x93, 0x4d, + 0x00, 0x7c, 0xec, 0x02, 0x07, 0x8b, 0x5d, 0x7d, + 0x1b, 0x0e, 0xd1, 0xa6, 0xa5, 0x5d, 0x7d, 0x57, + 0x88, 0xa8, 0xcc, 0x81, 0xb4, 0x86, 0x4e, 0xb4, + 0x40, 0xe9, 0x1d, 0xc3, 0xb1, 0x24, 0x3e, 0x7f, + 0xcc, 0x8a, 0x24, 0x9b, 0xdf, 0x6d, 0xf0, 0x39, + 0x69, 0x3e, 0x4c, 0xc0, 0x96, 0xe4, 0x13, 0xda, + 0x90, 0xda, 0xf4, 0x95, 0x66, 0x8b, 0x17, 0x17, + 0xfe, 0x39, 0x43, 0x25, 0xaa, 0xda, 0xa0, 0x43, + 0x3c, 0xb1, 0x41, 0x02, 0xa3, 0xf0, 0xa7, 0x19, + 0x59, 0xbc, 0x1d, 0x7d, 0x6c, 0x6d, 0x91, 0x09, + 0x5c, 0xb7, 0x5b, 0x01, 0xd1, 0x6f, 0x17, 0x21, + 0x97, 0xbf, 0x89, 0x71, 0xa5, 0xb0, 0x6e, 0x07, + 0x45, 0xfd, 0x9d, 0xea, 0x07, 0xf6, 0x7a, 0x9f, + 0x10, 0x18, 0x22, 0x30, 0x73, 0xac, 0xd4, 0x6b, + 0x72, 0x44, 0xed, 0xd9, 0x19, 0x9b, 0x2d, 0x4a, + 0x41, 0xdd, 0xd1, 0x85, 0x5e, 0x37, 0x19, 0xed, + 0xd2, 0x15, 0x8f, 0x5e, 0x91, 0xdb, 0x33, 0xf2, + 0xe4, 0xdb, 0xff, 0x98, 0xfb, 0xa3, 0xb5, 0xca, + 0x21, 0x69, 0x08, 0xe7, 0x8a, 0xdf, 0x90, 0xff, + 0x3e, 0xe9, 0x20, 0x86, 0x3c, 0xe9, 0xfc, 0x0b, + 0xfe, 0x5c, 0x61, 0xaa, 0x13, 0x92, 0x7f, 0x7b, + 0xec, 0xe0, 0x6d, 0xa8, 0x23, 0x22, 0xf6, 0x6b, + 0x77, 0xc4, 0xfe, 0x40, 0x07, 0x3b, 0xb6, 0xf6, + 0x8e, 0x5f, 0xd4, 0xb9, 0xb7, 0x0f, 0x21, 0x04, + 0xef, 0x83, 0x63, 0x91, 0x69, 0x40, 0xa3, 0x48, + 0x5c, 0xd2, 0x60, 0xf9, 0x4f, 0x6c, 0x47, 0x8b, + 0x3b, 0xb1, 0x9f, 0x8e, 0xee, 0x16, 0x8a, 0x13, + 0xfc, 0x46, 0x17, 0xc3, 0xc3, 0x32, 0x56, 0xf8, + 0x3c, 0x85, 0x3a, 0xb6, 0x3e, 0xaa, 0x89, 0x4f, + 0xb3, 0xdf, 0x38, 0xfd, 0xf1, 0xe4, 0x3a, 0xc0, + 0xe6, 0x58, 0xb5, 0x8f, 0xc5, 0x29, 0xa2, 0x92, + 0x4a, 0xb6, 0xa0, 0x34, 0x7f, 0xab, 0xb5, 0x8a, + 0x90, 0xa1, 0xdb, 0x4d, 0xca, 0xb6, 0x2c, 0x41, + 0x3c, 0xf7, 0x2b, 0x21, 0xc3, 0xfd, 0xf4, 0x17, + 0x5c, 0xb5, 0x33, 0x17, 0x68, 0x2b, 0x08, 0x30, + 0xf3, 0xf7, 0x30, 0x3c, 0x96, 0xe6, 0x6a, 0x20, + 0x97, 0xe7, 0x4d, 0x10, 0x5f, 0x47, 0x5f, 0x49, + 0x96, 0x09, 0xf0, 0x27, 0x91, 0xc8, 0xf8, 0x5a, + 0x2e, 0x79, 0xb5, 0xe2, 0xb8, 0xe8, 0xb9, 0x7b, + 0xd5, 0x10, 0xcb, 0xff, 0x5d, 0x14, 0x73, 0xf3 +}; +static const u8 dec_assoc008[] __initconst = { }; +static const u8 dec_nonce008[] __initconst = { + 0x0e, 0x0d, 0x57, 0xbb, 0x7b, 0x40, 0x54, 0x02 +}; +static const u8 dec_key008[] __initconst = { + 0xf2, 0xaa, 0x4f, 0x99, 0xfd, 0x3e, 0xa8, 0x53, + 0xc1, 0x44, 0xe9, 0x81, 0x18, 0xdc, 0xf5, 0xf0, + 0x3e, 0x44, 0x15, 0x59, 0xe0, 0xc5, 0x44, 0x86, + 0xc3, 0x91, 0xa8, 0x75, 0xc0, 0x12, 0x46, 0xba +}; + +static const u8 dec_input009[] __initconst = { + 0xfd, 0x81, 0x8d, 0xd0, 0x3d, 0xb4, 0xd5, 0xdf, + 0xd3, 0x42, 0x47, 0x5a, 0x6d, 0x19, 0x27, 0x66, + 0x4b, 0x2e, 0x0c, 0x27, 0x9c, 0x96, 0x4c, 0x72, + 0x02, 0xa3, 0x65, 0xc3, 0xb3, 0x6f, 0x2e, 0xbd, + 0x63, 0x8a, 0x4a, 0x5d, 0x29, 0xa2, 0xd0, 0x28, + 0x48, 0xc5, 0x3d, 0x98, 0xa3, 0xbc, 0xe0, 0xbe, + 0x3b, 0x3f, 0xe6, 0x8a, 0xa4, 0x7f, 0x53, 0x06, + 0xfa, 0x7f, 0x27, 0x76, 0x72, 0x31, 0xa1, 0xf5, + 0xd6, 0x0c, 0x52, 0x47, 0xba, 0xcd, 0x4f, 0xd7, + 0xeb, 0x05, 0x48, 0x0d, 0x7c, 0x35, 0x4a, 0x09, + 0xc9, 0x76, 0x71, 0x02, 0xa3, 0xfb, 0xb7, 0x1a, + 0x65, 0xb7, 0xed, 0x98, 0xc6, 0x30, 0x8a, 0x00, + 0xae, 0xa1, 0x31, 0xe5, 0xb5, 0x9e, 0x6d, 0x62, + 0xda, 0xda, 0x07, 0x0f, 0x38, 0x38, 0xd3, 0xcb, + 0xc1, 0xb0, 0xad, 0xec, 0x72, 0xec, 0xb1, 0xa2, + 0x7b, 0x59, 0xf3, 0x3d, 0x2b, 0xef, 0xcd, 0x28, + 0x5b, 0x83, 0xcc, 0x18, 0x91, 0x88, 0xb0, 0x2e, + 0xf9, 0x29, 0x31, 0x18, 0xf9, 0x4e, 0xe9, 0x0a, + 0x91, 0x92, 0x9f, 0xae, 0x2d, 0xad, 0xf4, 0xe6, + 0x1a, 0xe2, 0xa4, 0xee, 0x47, 0x15, 0xbf, 0x83, + 0x6e, 0xd7, 0x72, 0x12, 0x3b, 0x2d, 0x24, 0xe9, + 0xb2, 0x55, 0xcb, 0x3c, 0x10, 0xf0, 0x24, 0x8a, + 0x4a, 0x02, 0xea, 0x90, 0x25, 0xf0, 0xb4, 0x79, + 0x3a, 0xef, 0x6e, 0xf5, 0x52, 0xdf, 0xb0, 0x0a, + 0xcd, 0x24, 0x1c, 0xd3, 0x2e, 0x22, 0x74, 0xea, + 0x21, 0x6f, 0xe9, 0xbd, 0xc8, 0x3e, 0x36, 0x5b, + 0x19, 0xf1, 0xca, 0x99, 0x0a, 0xb4, 0xa7, 0x52, + 0x1a, 0x4e, 0xf2, 0xad, 0x8d, 0x56, 0x85, 0xbb, + 0x64, 0x89, 0xba, 0x26, 0xf9, 0xc7, 0xe1, 0x89, + 0x19, 0x22, 0x77, 0xc3, 0xa8, 0xfc, 0xff, 0xad, + 0xfe, 0xb9, 0x48, 0xae, 0x12, 0x30, 0x9f, 0x19, + 0xfb, 0x1b, 0xef, 0x14, 0x87, 0x8a, 0x78, 0x71, + 0xf3, 0xf4, 0xb7, 0x00, 0x9c, 0x1d, 0xb5, 0x3d, + 0x49, 0x00, 0x0c, 0x06, 0xd4, 0x50, 0xf9, 0x54, + 0x45, 0xb2, 0x5b, 0x43, 0xdb, 0x6d, 0xcf, 0x1a, + 0xe9, 0x7a, 0x7a, 0xcf, 0xfc, 0x8a, 0x4e, 0x4d, + 0x0b, 0x07, 0x63, 0x28, 0xd8, 0xe7, 0x08, 0x95, + 0xdf, 0xa6, 0x72, 0x93, 0x2e, 0xbb, 0xa0, 0x42, + 0x89, 0x16, 0xf1, 0xd9, 0x0c, 0xf9, 0xa1, 0x16, + 0xfd, 0xd9, 0x03, 0xb4, 0x3b, 0x8a, 0xf5, 0xf6, + 0xe7, 0x6b, 0x2e, 0x8e, 0x4c, 0x3d, 0xe2, 0xaf, + 0x08, 0x45, 0x03, 0xff, 0x09, 0xb6, 0xeb, 0x2d, + 0xc6, 0x1b, 0x88, 0x94, 0xac, 0x3e, 0xf1, 0x9f, + 0x0e, 0x0e, 0x2b, 0xd5, 0x00, 0x4d, 0x3f, 0x3b, + 0x53, 0xae, 0xaf, 0x1c, 0x33, 0x5f, 0x55, 0x6e, + 0x8d, 0xaf, 0x05, 0x7a, 0x10, 0x34, 0xc9, 0xf4, + 0x66, 0xcb, 0x62, 0x12, 0xa6, 0xee, 0xe8, 0x1c, + 0x5d, 0x12, 0x86, 0xdb, 0x6f, 0x1c, 0x33, 0xc4, + 0x1c, 0xda, 0x82, 0x2d, 0x3b, 0x59, 0xfe, 0xb1, + 0xa4, 0x59, 0x41, 0x86, 0xd0, 0xef, 0xae, 0xfb, + 0xda, 0x6d, 0x11, 0xb8, 0xca, 0xe9, 0x6e, 0xff, + 0xf7, 0xa9, 0xd9, 0x70, 0x30, 0xfc, 0x53, 0xe2, + 0xd7, 0xa2, 0x4e, 0xc7, 0x91, 0xd9, 0x07, 0x06, + 0xaa, 0xdd, 0xb0, 0x59, 0x28, 0x1d, 0x00, 0x66, + 0xc5, 0x54, 0xc2, 0xfc, 0x06, 0xda, 0x05, 0x90, + 0x52, 0x1d, 0x37, 0x66, 0xee, 0xf0, 0xb2, 0x55, + 0x8a, 0x5d, 0xd2, 0x38, 0x86, 0x94, 0x9b, 0xfc, + 0x10, 0x4c, 0xa1, 0xb9, 0x64, 0x3e, 0x44, 0xb8, + 0x5f, 0xb0, 0x0c, 0xec, 0xe0, 0xc9, 0xe5, 0x62, + 0x75, 0x3f, 0x09, 0xd5, 0xf5, 0xd9, 0x26, 0xba, + 0x9e, 0xd2, 0xf4, 0xb9, 0x48, 0x0a, 0xbc, 0xa2, + 0xd6, 0x7c, 0x36, 0x11, 0x7d, 0x26, 0x81, 0x89, + 0xcf, 0xa4, 0xad, 0x73, 0x0e, 0xee, 0xcc, 0x06, + 0xa9, 0xdb, 0xb1, 0xfd, 0xfb, 0x09, 0x7f, 0x90, + 0x42, 0x37, 0x2f, 0xe1, 0x9c, 0x0f, 0x6f, 0xcf, + 0x43, 0xb5, 0xd9, 0x90, 0xe1, 0x85, 0xf5, 0xa8, + 0xae +}; +static const u8 dec_output009[] __initconst = { + 0xe6, 0xc3, 0xdb, 0x63, 0x55, 0x15, 0xe3, 0x5b, + 0xb7, 0x4b, 0x27, 0x8b, 0x5a, 0xdd, 0xc2, 0xe8, + 0x3a, 0x6b, 0xd7, 0x81, 0x96, 0x35, 0x97, 0xca, + 0xd7, 0x68, 0xe8, 0xef, 0xce, 0xab, 0xda, 0x09, + 0x6e, 0xd6, 0x8e, 0xcb, 0x55, 0xb5, 0xe1, 0xe5, + 0x57, 0xfd, 0xc4, 0xe3, 0xe0, 0x18, 0x4f, 0x85, + 0xf5, 0x3f, 0x7e, 0x4b, 0x88, 0xc9, 0x52, 0x44, + 0x0f, 0xea, 0xaf, 0x1f, 0x71, 0x48, 0x9f, 0x97, + 0x6d, 0xb9, 0x6f, 0x00, 0xa6, 0xde, 0x2b, 0x77, + 0x8b, 0x15, 0xad, 0x10, 0xa0, 0x2b, 0x7b, 0x41, + 0x90, 0x03, 0x2d, 0x69, 0xae, 0xcc, 0x77, 0x7c, + 0xa5, 0x9d, 0x29, 0x22, 0xc2, 0xea, 0xb4, 0x00, + 0x1a, 0xd2, 0x7a, 0x98, 0x8a, 0xf9, 0xf7, 0x82, + 0xb0, 0xab, 0xd8, 0xa6, 0x94, 0x8d, 0x58, 0x2f, + 0x01, 0x9e, 0x00, 0x20, 0xfc, 0x49, 0xdc, 0x0e, + 0x03, 0xe8, 0x45, 0x10, 0xd6, 0xa8, 0xda, 0x55, + 0x10, 0x9a, 0xdf, 0x67, 0x22, 0x8b, 0x43, 0xab, + 0x00, 0xbb, 0x02, 0xc8, 0xdd, 0x7b, 0x97, 0x17, + 0xd7, 0x1d, 0x9e, 0x02, 0x5e, 0x48, 0xde, 0x8e, + 0xcf, 0x99, 0x07, 0x95, 0x92, 0x3c, 0x5f, 0x9f, + 0xc5, 0x8a, 0xc0, 0x23, 0xaa, 0xd5, 0x8c, 0x82, + 0x6e, 0x16, 0x92, 0xb1, 0x12, 0x17, 0x07, 0xc3, + 0xfb, 0x36, 0xf5, 0x6c, 0x35, 0xd6, 0x06, 0x1f, + 0x9f, 0xa7, 0x94, 0xa2, 0x38, 0x63, 0x9c, 0xb0, + 0x71, 0xb3, 0xa5, 0xd2, 0xd8, 0xba, 0x9f, 0x08, + 0x01, 0xb3, 0xff, 0x04, 0x97, 0x73, 0x45, 0x1b, + 0xd5, 0xa9, 0x9c, 0x80, 0xaf, 0x04, 0x9a, 0x85, + 0xdb, 0x32, 0x5b, 0x5d, 0x1a, 0xc1, 0x36, 0x28, + 0x10, 0x79, 0xf1, 0x3c, 0xbf, 0x1a, 0x41, 0x5c, + 0x4e, 0xdf, 0xb2, 0x7c, 0x79, 0x3b, 0x7a, 0x62, + 0x3d, 0x4b, 0xc9, 0x9b, 0x2a, 0x2e, 0x7c, 0xa2, + 0xb1, 0x11, 0x98, 0xa7, 0x34, 0x1a, 0x00, 0xf3, + 0xd1, 0xbc, 0x18, 0x22, 0xba, 0x02, 0x56, 0x62, + 0x31, 0x10, 0x11, 0x6d, 0xe0, 0x54, 0x9d, 0x40, + 0x1f, 0x26, 0x80, 0x41, 0xca, 0x3f, 0x68, 0x0f, + 0x32, 0x1d, 0x0a, 0x8e, 0x79, 0xd8, 0xa4, 0x1b, + 0x29, 0x1c, 0x90, 0x8e, 0xc5, 0xe3, 0xb4, 0x91, + 0x37, 0x9a, 0x97, 0x86, 0x99, 0xd5, 0x09, 0xc5, + 0xbb, 0xa3, 0x3f, 0x21, 0x29, 0x82, 0x14, 0x5c, + 0xab, 0x25, 0xfb, 0xf2, 0x4f, 0x58, 0x26, 0xd4, + 0x83, 0xaa, 0x66, 0x89, 0x67, 0x7e, 0xc0, 0x49, + 0xe1, 0x11, 0x10, 0x7f, 0x7a, 0xda, 0x29, 0x04, + 0xff, 0xf0, 0xcb, 0x09, 0x7c, 0x9d, 0xfa, 0x03, + 0x6f, 0x81, 0x09, 0x31, 0x60, 0xfb, 0x08, 0xfa, + 0x74, 0xd3, 0x64, 0x44, 0x7c, 0x55, 0x85, 0xec, + 0x9c, 0x6e, 0x25, 0xb7, 0x6c, 0xc5, 0x37, 0xb6, + 0x83, 0x87, 0x72, 0x95, 0x8b, 0x9d, 0xe1, 0x69, + 0x5c, 0x31, 0x95, 0x42, 0xa6, 0x2c, 0xd1, 0x36, + 0x47, 0x1f, 0xec, 0x54, 0xab, 0xa2, 0x1c, 0xd8, + 0x00, 0xcc, 0xbc, 0x0d, 0x65, 0xe2, 0x67, 0xbf, + 0xbc, 0xea, 0xee, 0x9e, 0xe4, 0x36, 0x95, 0xbe, + 0x73, 0xd9, 0xa6, 0xd9, 0x0f, 0xa0, 0xcc, 0x82, + 0x76, 0x26, 0xad, 0x5b, 0x58, 0x6c, 0x4e, 0xab, + 0x29, 0x64, 0xd3, 0xd9, 0xa9, 0x08, 0x8c, 0x1d, + 0xa1, 0x4f, 0x80, 0xd8, 0x3f, 0x94, 0xfb, 0xd3, + 0x7b, 0xfc, 0xd1, 0x2b, 0xc3, 0x21, 0xeb, 0xe5, + 0x1c, 0x84, 0x23, 0x7f, 0x4b, 0xfa, 0xdb, 0x34, + 0x18, 0xa2, 0xc2, 0xe5, 0x13, 0xfe, 0x6c, 0x49, + 0x81, 0xd2, 0x73, 0xe7, 0xe2, 0xd7, 0xe4, 0x4f, + 0x4b, 0x08, 0x6e, 0xb1, 0x12, 0x22, 0x10, 0x9d, + 0xac, 0x51, 0x1e, 0x17, 0xd9, 0x8a, 0x0b, 0x42, + 0x88, 0x16, 0x81, 0x37, 0x7c, 0x6a, 0xf7, 0xef, + 0x2d, 0xe3, 0xd9, 0xf8, 0x5f, 0xe0, 0x53, 0x27, + 0x74, 0xb9, 0xe2, 0xd6, 0x1c, 0x80, 0x2c, 0x52, + 0x65 +}; +static const u8 dec_assoc009[] __initconst = { + 0x5a, 0x27, 0xff, 0xeb, 0xdf, 0x84, 0xb2, 0x9e, + 0xef +}; +static const u8 dec_nonce009[] __initconst = { + 0xef, 0x2d, 0x63, 0xee, 0x6b, 0x80, 0x8b, 0x78 +}; +static const u8 dec_key009[] __initconst = { + 0xea, 0xbc, 0x56, 0x99, 0xe3, 0x50, 0xff, 0xc5, + 0xcc, 0x1a, 0xd7, 0xc1, 0x57, 0x72, 0xea, 0x86, + 0x5b, 0x89, 0x88, 0x61, 0x3d, 0x2f, 0x9b, 0xb2, + 0xe7, 0x9c, 0xec, 0x74, 0x6e, 0x3e, 0xf4, 0x3b +}; + +static const u8 dec_input010[] __initconst = { + 0xe5, 0x26, 0xa4, 0x3d, 0xbd, 0x33, 0xd0, 0x4b, + 0x6f, 0x05, 0xa7, 0x6e, 0x12, 0x7a, 0xd2, 0x74, + 0xa6, 0xdd, 0xbd, 0x95, 0xeb, 0xf9, 0xa4, 0xf1, + 0x59, 0x93, 0x91, 0x70, 0xd9, 0xfe, 0x9a, 0xcd, + 0x53, 0x1f, 0x3a, 0xab, 0xa6, 0x7c, 0x9f, 0xa6, + 0x9e, 0xbd, 0x99, 0xd9, 0xb5, 0x97, 0x44, 0xd5, + 0x14, 0x48, 0x4d, 0x9d, 0xc0, 0xd0, 0x05, 0x96, + 0xeb, 0x4c, 0x78, 0x55, 0x09, 0x08, 0x01, 0x02, + 0x30, 0x90, 0x7b, 0x96, 0x7a, 0x7b, 0x5f, 0x30, + 0x41, 0x24, 0xce, 0x68, 0x61, 0x49, 0x86, 0x57, + 0x82, 0xdd, 0x53, 0x1c, 0x51, 0x28, 0x2b, 0x53, + 0x6e, 0x2d, 0xc2, 0x20, 0x4c, 0xdd, 0x8f, 0x65, + 0x10, 0x20, 0x50, 0xdd, 0x9d, 0x50, 0xe5, 0x71, + 0x40, 0x53, 0x69, 0xfc, 0x77, 0x48, 0x11, 0xb9, + 0xde, 0xa4, 0x8d, 0x58, 0xe4, 0xa6, 0x1a, 0x18, + 0x47, 0x81, 0x7e, 0xfc, 0xdd, 0xf6, 0xef, 0xce, + 0x2f, 0x43, 0x68, 0xd6, 0x06, 0xe2, 0x74, 0x6a, + 0xad, 0x90, 0xf5, 0x37, 0xf3, 0x3d, 0x82, 0x69, + 0x40, 0xe9, 0x6b, 0xa7, 0x3d, 0xa8, 0x1e, 0xd2, + 0x02, 0x7c, 0xb7, 0x9b, 0xe4, 0xda, 0x8f, 0x95, + 0x06, 0xc5, 0xdf, 0x73, 0xa3, 0x20, 0x9a, 0x49, + 0xde, 0x9c, 0xbc, 0xee, 0x14, 0x3f, 0x81, 0x5e, + 0xf8, 0x3b, 0x59, 0x3c, 0xe1, 0x68, 0x12, 0x5a, + 0x3a, 0x76, 0x3a, 0x3f, 0xf7, 0x87, 0x33, 0x0a, + 0x01, 0xb8, 0xd4, 0xed, 0xb6, 0xbe, 0x94, 0x5e, + 0x70, 0x40, 0x56, 0x67, 0x1f, 0x50, 0x44, 0x19, + 0xce, 0x82, 0x70, 0x10, 0x87, 0x13, 0x20, 0x0b, + 0x4c, 0x5a, 0xb6, 0xf6, 0xa7, 0xae, 0x81, 0x75, + 0x01, 0x81, 0xe6, 0x4b, 0x57, 0x7c, 0xdd, 0x6d, + 0xf8, 0x1c, 0x29, 0x32, 0xf7, 0xda, 0x3c, 0x2d, + 0xf8, 0x9b, 0x25, 0x6e, 0x00, 0xb4, 0xf7, 0x2f, + 0xf7, 0x04, 0xf7, 0xa1, 0x56, 0xac, 0x4f, 0x1a, + 0x64, 0xb8, 0x47, 0x55, 0x18, 0x7b, 0x07, 0x4d, + 0xbd, 0x47, 0x24, 0x80, 0x5d, 0xa2, 0x70, 0xc5, + 0xdd, 0x8e, 0x82, 0xd4, 0xeb, 0xec, 0xb2, 0x0c, + 0x39, 0xd2, 0x97, 0xc1, 0xcb, 0xeb, 0xf4, 0x77, + 0x59, 0xb4, 0x87, 0xef, 0xcb, 0x43, 0x2d, 0x46, + 0x54, 0xd1, 0xa7, 0xd7, 0x15, 0x99, 0x0a, 0x43, + 0xa1, 0xe0, 0x99, 0x33, 0x71, 0xc1, 0xed, 0xfe, + 0x72, 0x46, 0x33, 0x8e, 0x91, 0x08, 0x9f, 0xc8, + 0x2e, 0xca, 0xfa, 0xdc, 0x59, 0xd5, 0xc3, 0x76, + 0x84, 0x9f, 0xa3, 0x37, 0x68, 0xc3, 0xf0, 0x47, + 0x2c, 0x68, 0xdb, 0x5e, 0xc3, 0x49, 0x4c, 0xe8, + 0x92, 0x85, 0xe2, 0x23, 0xd3, 0x3f, 0xad, 0x32, + 0xe5, 0x2b, 0x82, 0xd7, 0x8f, 0x99, 0x0a, 0x59, + 0x5c, 0x45, 0xd9, 0xb4, 0x51, 0x52, 0xc2, 0xae, + 0xbf, 0x80, 0xcf, 0xc9, 0xc9, 0x51, 0x24, 0x2a, + 0x3b, 0x3a, 0x4d, 0xae, 0xeb, 0xbd, 0x22, 0xc3, + 0x0e, 0x0f, 0x59, 0x25, 0x92, 0x17, 0xe9, 0x74, + 0xc7, 0x8b, 0x70, 0x70, 0x36, 0x55, 0x95, 0x75, + 0x4b, 0xad, 0x61, 0x2b, 0x09, 0xbc, 0x82, 0xf2, + 0x6e, 0x94, 0x43, 0xae, 0xc3, 0xd5, 0xcd, 0x8e, + 0xfe, 0x5b, 0x9a, 0x88, 0x43, 0x01, 0x75, 0xb2, + 0x23, 0x09, 0xf7, 0x89, 0x83, 0xe7, 0xfa, 0xf9, + 0xb4, 0x9b, 0xf8, 0xef, 0xbd, 0x1c, 0x92, 0xc1, + 0xda, 0x7e, 0xfe, 0x05, 0xba, 0x5a, 0xcd, 0x07, + 0x6a, 0x78, 0x9e, 0x5d, 0xfb, 0x11, 0x2f, 0x79, + 0x38, 0xb6, 0xc2, 0x5b, 0x6b, 0x51, 0xb4, 0x71, + 0xdd, 0xf7, 0x2a, 0xe4, 0xf4, 0x72, 0x76, 0xad, + 0xc2, 0xdd, 0x64, 0x5d, 0x79, 0xb6, 0xf5, 0x7a, + 0x77, 0x20, 0x05, 0x3d, 0x30, 0x06, 0xd4, 0x4c, + 0x0a, 0x2c, 0x98, 0x5a, 0xb9, 0xd4, 0x98, 0xa9, + 0x3f, 0xc6, 0x12, 0xea, 0x3b, 0x4b, 0xc5, 0x79, + 0x64, 0x63, 0x6b, 0x09, 0x54, 0x3b, 0x14, 0x27, + 0xba, 0x99, 0x80, 0xc8, 0x72, 0xa8, 0x12, 0x90, + 0x29, 0xba, 0x40, 0x54, 0x97, 0x2b, 0x7b, 0xfe, + 0xeb, 0xcd, 0x01, 0x05, 0x44, 0x72, 0xdb, 0x99, + 0xe4, 0x61, 0xc9, 0x69, 0xd6, 0xb9, 0x28, 0xd1, + 0x05, 0x3e, 0xf9, 0x0b, 0x49, 0x0a, 0x49, 0xe9, + 0x8d, 0x0e, 0xa7, 0x4a, 0x0f, 0xaf, 0x32, 0xd0, + 0xe0, 0xb2, 0x3a, 0x55, 0x58, 0xfe, 0x5c, 0x28, + 0x70, 0x51, 0x23, 0xb0, 0x7b, 0x6a, 0x5f, 0x1e, + 0xb8, 0x17, 0xd7, 0x94, 0x15, 0x8f, 0xee, 0x20, + 0xc7, 0x42, 0x25, 0x3e, 0x9a, 0x14, 0xd7, 0x60, + 0x72, 0x39, 0x47, 0x48, 0xa9, 0xfe, 0xdd, 0x47, + 0x0a, 0xb1, 0xe6, 0x60, 0x28, 0x8c, 0x11, 0x68, + 0xe1, 0xff, 0xd7, 0xce, 0xc8, 0xbe, 0xb3, 0xfe, + 0x27, 0x30, 0x09, 0x70, 0xd7, 0xfa, 0x02, 0x33, + 0x3a, 0x61, 0x2e, 0xc7, 0xff, 0xa4, 0x2a, 0xa8, + 0x6e, 0xb4, 0x79, 0x35, 0x6d, 0x4c, 0x1e, 0x38, + 0xf8, 0xee, 0xd4, 0x84, 0x4e, 0x6e, 0x28, 0xa7, + 0xce, 0xc8, 0xc1, 0xcf, 0x80, 0x05, 0xf3, 0x04, + 0xef, 0xc8, 0x18, 0x28, 0x2e, 0x8d, 0x5e, 0x0c, + 0xdf, 0xb8, 0x5f, 0x96, 0xe8, 0xc6, 0x9c, 0x2f, + 0xe5, 0xa6, 0x44, 0xd7, 0xe7, 0x99, 0x44, 0x0c, + 0xec, 0xd7, 0x05, 0x60, 0x97, 0xbb, 0x74, 0x77, + 0x58, 0xd5, 0xbb, 0x48, 0xde, 0x5a, 0xb2, 0x54, + 0x7f, 0x0e, 0x46, 0x70, 0x6a, 0x6f, 0x78, 0xa5, + 0x08, 0x89, 0x05, 0x4e, 0x7e, 0xa0, 0x69, 0xb4, + 0x40, 0x60, 0x55, 0x77, 0x75, 0x9b, 0x19, 0xf2, + 0xd5, 0x13, 0x80, 0x77, 0xf9, 0x4b, 0x3f, 0x1e, + 0xee, 0xe6, 0x76, 0x84, 0x7b, 0x8c, 0xe5, 0x27, + 0xa8, 0x0a, 0x91, 0x01, 0x68, 0x71, 0x8a, 0x3f, + 0x06, 0xab, 0xf6, 0xa9, 0xa5, 0xe6, 0x72, 0x92, + 0xe4, 0x67, 0xe2, 0xa2, 0x46, 0x35, 0x84, 0x55, + 0x7d, 0xca, 0xa8, 0x85, 0xd0, 0xf1, 0x3f, 0xbe, + 0xd7, 0x34, 0x64, 0xfc, 0xae, 0xe3, 0xe4, 0x04, + 0x9f, 0x66, 0x02, 0xb9, 0x88, 0x10, 0xd9, 0xc4, + 0x4c, 0x31, 0x43, 0x7a, 0x93, 0xe2, 0x9b, 0x56, + 0x43, 0x84, 0xdc, 0xdc, 0xde, 0x1d, 0xa4, 0x02, + 0x0e, 0xc2, 0xef, 0xc3, 0xf8, 0x78, 0xd1, 0xb2, + 0x6b, 0x63, 0x18, 0xc9, 0xa9, 0xe5, 0x72, 0xd8, + 0xf3, 0xb9, 0xd1, 0x8a, 0xc7, 0x1a, 0x02, 0x27, + 0x20, 0x77, 0x10, 0xe5, 0xc8, 0xd4, 0x4a, 0x47, + 0xe5, 0xdf, 0x5f, 0x01, 0xaa, 0xb0, 0xd4, 0x10, + 0xbb, 0x69, 0xe3, 0x36, 0xc8, 0xe1, 0x3d, 0x43, + 0xfb, 0x86, 0xcd, 0xcc, 0xbf, 0xf4, 0x88, 0xe0, + 0x20, 0xca, 0xb7, 0x1b, 0xf1, 0x2f, 0x5c, 0xee, + 0xd4, 0xd3, 0xa3, 0xcc, 0xa4, 0x1e, 0x1c, 0x47, + 0xfb, 0xbf, 0xfc, 0xa2, 0x41, 0x55, 0x9d, 0xf6, + 0x5a, 0x5e, 0x65, 0x32, 0x34, 0x7b, 0x52, 0x8d, + 0xd5, 0xd0, 0x20, 0x60, 0x03, 0xab, 0x3f, 0x8c, + 0xd4, 0x21, 0xea, 0x2a, 0xd9, 0xc4, 0xd0, 0xd3, + 0x65, 0xd8, 0x7a, 0x13, 0x28, 0x62, 0x32, 0x4b, + 0x2c, 0x87, 0x93, 0xa8, 0xb4, 0x52, 0x45, 0x09, + 0x44, 0xec, 0xec, 0xc3, 0x17, 0xdb, 0x9a, 0x4d, + 0x5c, 0xa9, 0x11, 0xd4, 0x7d, 0xaf, 0x9e, 0xf1, + 0x2d, 0xb2, 0x66, 0xc5, 0x1d, 0xed, 0xb7, 0xcd, + 0x0b, 0x25, 0x5e, 0x30, 0x47, 0x3f, 0x40, 0xf4, + 0xa1, 0xa0, 0x00, 0x94, 0x10, 0xc5, 0x6a, 0x63, + 0x1a, 0xd5, 0x88, 0x92, 0x8e, 0x82, 0x39, 0x87, + 0x3c, 0x78, 0x65, 0x58, 0x42, 0x75, 0x5b, 0xdd, + 0x77, 0x3e, 0x09, 0x4e, 0x76, 0x5b, 0xe6, 0x0e, + 0x4d, 0x38, 0xb2, 0xc0, 0xb8, 0x95, 0x01, 0x7a, + 0x10, 0xe0, 0xfb, 0x07, 0xf2, 0xab, 0x2d, 0x8c, + 0x32, 0xed, 0x2b, 0xc0, 0x46, 0xc2, 0xf5, 0x38, + 0x83, 0xf0, 0x17, 0xec, 0xc1, 0x20, 0x6a, 0x9a, + 0x0b, 0x00, 0xa0, 0x98, 0x22, 0x50, 0x23, 0xd5, + 0x80, 0x6b, 0xf6, 0x1f, 0xc3, 0xcc, 0x97, 0xc9, + 0x24, 0x9f, 0xf3, 0xaf, 0x43, 0x14, 0xd5, 0xa0 +}; +static const u8 dec_output010[] __initconst = { + 0x42, 0x93, 0xe4, 0xeb, 0x97, 0xb0, 0x57, 0xbf, + 0x1a, 0x8b, 0x1f, 0xe4, 0x5f, 0x36, 0x20, 0x3c, + 0xef, 0x0a, 0xa9, 0x48, 0x5f, 0x5f, 0x37, 0x22, + 0x3a, 0xde, 0xe3, 0xae, 0xbe, 0xad, 0x07, 0xcc, + 0xb1, 0xf6, 0xf5, 0xf9, 0x56, 0xdd, 0xe7, 0x16, + 0x1e, 0x7f, 0xdf, 0x7a, 0x9e, 0x75, 0xb7, 0xc7, + 0xbe, 0xbe, 0x8a, 0x36, 0x04, 0xc0, 0x10, 0xf4, + 0x95, 0x20, 0x03, 0xec, 0xdc, 0x05, 0xa1, 0x7d, + 0xc4, 0xa9, 0x2c, 0x82, 0xd0, 0xbc, 0x8b, 0xc5, + 0xc7, 0x45, 0x50, 0xf6, 0xa2, 0x1a, 0xb5, 0x46, + 0x3b, 0x73, 0x02, 0xa6, 0x83, 0x4b, 0x73, 0x82, + 0x58, 0x5e, 0x3b, 0x65, 0x2f, 0x0e, 0xfd, 0x2b, + 0x59, 0x16, 0xce, 0xa1, 0x60, 0x9c, 0xe8, 0x3a, + 0x99, 0xed, 0x8d, 0x5a, 0xcf, 0xf6, 0x83, 0xaf, + 0xba, 0xd7, 0x73, 0x73, 0x40, 0x97, 0x3d, 0xca, + 0xef, 0x07, 0x57, 0xe6, 0xd9, 0x70, 0x0e, 0x95, + 0xae, 0xa6, 0x8d, 0x04, 0xcc, 0xee, 0xf7, 0x09, + 0x31, 0x77, 0x12, 0xa3, 0x23, 0x97, 0x62, 0xb3, + 0x7b, 0x32, 0xfb, 0x80, 0x14, 0x48, 0x81, 0xc3, + 0xe5, 0xea, 0x91, 0x39, 0x52, 0x81, 0xa2, 0x4f, + 0xe4, 0xb3, 0x09, 0xff, 0xde, 0x5e, 0xe9, 0x58, + 0x84, 0x6e, 0xf9, 0x3d, 0xdf, 0x25, 0xea, 0xad, + 0xae, 0xe6, 0x9a, 0xd1, 0x89, 0x55, 0xd3, 0xde, + 0x6c, 0x52, 0xdb, 0x70, 0xfe, 0x37, 0xce, 0x44, + 0x0a, 0xa8, 0x25, 0x5f, 0x92, 0xc1, 0x33, 0x4a, + 0x4f, 0x9b, 0x62, 0x35, 0xff, 0xce, 0xc0, 0xa9, + 0x60, 0xce, 0x52, 0x00, 0x97, 0x51, 0x35, 0x26, + 0x2e, 0xb9, 0x36, 0xa9, 0x87, 0x6e, 0x1e, 0xcc, + 0x91, 0x78, 0x53, 0x98, 0x86, 0x5b, 0x9c, 0x74, + 0x7d, 0x88, 0x33, 0xe1, 0xdf, 0x37, 0x69, 0x2b, + 0xbb, 0xf1, 0x4d, 0xf4, 0xd1, 0xf1, 0x39, 0x93, + 0x17, 0x51, 0x19, 0xe3, 0x19, 0x1e, 0x76, 0x37, + 0x25, 0xfb, 0x09, 0x27, 0x6a, 0xab, 0x67, 0x6f, + 0x14, 0x12, 0x64, 0xe7, 0xc4, 0x07, 0xdf, 0x4d, + 0x17, 0xbb, 0x6d, 0xe0, 0xe9, 0xb9, 0xab, 0xca, + 0x10, 0x68, 0xaf, 0x7e, 0xb7, 0x33, 0x54, 0x73, + 0x07, 0x6e, 0xf7, 0x81, 0x97, 0x9c, 0x05, 0x6f, + 0x84, 0x5f, 0xd2, 0x42, 0xfb, 0x38, 0xcf, 0xd1, + 0x2f, 0x14, 0x30, 0x88, 0x98, 0x4d, 0x5a, 0xa9, + 0x76, 0xd5, 0x4f, 0x3e, 0x70, 0x6c, 0x85, 0x76, + 0xd7, 0x01, 0xa0, 0x1a, 0xc8, 0x4e, 0xaa, 0xac, + 0x78, 0xfe, 0x46, 0xde, 0x6a, 0x05, 0x46, 0xa7, + 0x43, 0x0c, 0xb9, 0xde, 0xb9, 0x68, 0xfb, 0xce, + 0x42, 0x99, 0x07, 0x4d, 0x0b, 0x3b, 0x5a, 0x30, + 0x35, 0xa8, 0xf9, 0x3a, 0x73, 0xef, 0x0f, 0xdb, + 0x1e, 0x16, 0x42, 0xc4, 0xba, 0xae, 0x58, 0xaa, + 0xf8, 0xe5, 0x75, 0x2f, 0x1b, 0x15, 0x5c, 0xfd, + 0x0a, 0x97, 0xd0, 0xe4, 0x37, 0x83, 0x61, 0x5f, + 0x43, 0xa6, 0xc7, 0x3f, 0x38, 0x59, 0xe6, 0xeb, + 0xa3, 0x90, 0xc3, 0xaa, 0xaa, 0x5a, 0xd3, 0x34, + 0xd4, 0x17, 0xc8, 0x65, 0x3e, 0x57, 0xbc, 0x5e, + 0xdd, 0x9e, 0xb7, 0xf0, 0x2e, 0x5b, 0xb2, 0x1f, + 0x8a, 0x08, 0x0d, 0x45, 0x91, 0x0b, 0x29, 0x53, + 0x4f, 0x4c, 0x5a, 0x73, 0x56, 0xfe, 0xaf, 0x41, + 0x01, 0x39, 0x0a, 0x24, 0x3c, 0x7e, 0xbe, 0x4e, + 0x53, 0xf3, 0xeb, 0x06, 0x66, 0x51, 0x28, 0x1d, + 0xbd, 0x41, 0x0a, 0x01, 0xab, 0x16, 0x47, 0x27, + 0x47, 0x47, 0xf7, 0xcb, 0x46, 0x0a, 0x70, 0x9e, + 0x01, 0x9c, 0x09, 0xe1, 0x2a, 0x00, 0x1a, 0xd8, + 0xd4, 0x79, 0x9d, 0x80, 0x15, 0x8e, 0x53, 0x2a, + 0x65, 0x83, 0x78, 0x3e, 0x03, 0x00, 0x07, 0x12, + 0x1f, 0x33, 0x3e, 0x7b, 0x13, 0x37, 0xf1, 0xc3, + 0xef, 0xb7, 0xc1, 0x20, 0x3c, 0x3e, 0x67, 0x66, + 0x5d, 0x88, 0xa7, 0x7d, 0x33, 0x50, 0x77, 0xb0, + 0x28, 0x8e, 0xe7, 0x2c, 0x2e, 0x7a, 0xf4, 0x3c, + 0x8d, 0x74, 0x83, 0xaf, 0x8e, 0x87, 0x0f, 0xe4, + 0x50, 0xff, 0x84, 0x5c, 0x47, 0x0c, 0x6a, 0x49, + 0xbf, 0x42, 0x86, 0x77, 0x15, 0x48, 0xa5, 0x90, + 0x5d, 0x93, 0xd6, 0x2a, 0x11, 0xd5, 0xd5, 0x11, + 0xaa, 0xce, 0xe7, 0x6f, 0xa5, 0xb0, 0x09, 0x2c, + 0x8d, 0xd3, 0x92, 0xf0, 0x5a, 0x2a, 0xda, 0x5b, + 0x1e, 0xd5, 0x9a, 0xc4, 0xc4, 0xf3, 0x49, 0x74, + 0x41, 0xca, 0xe8, 0xc1, 0xf8, 0x44, 0xd6, 0x3c, + 0xae, 0x6c, 0x1d, 0x9a, 0x30, 0x04, 0x4d, 0x27, + 0x0e, 0xb1, 0x5f, 0x59, 0xa2, 0x24, 0xe8, 0xe1, + 0x98, 0xc5, 0x6a, 0x4c, 0xfe, 0x41, 0xd2, 0x27, + 0x42, 0x52, 0xe1, 0xe9, 0x7d, 0x62, 0xe4, 0x88, + 0x0f, 0xad, 0xb2, 0x70, 0xcb, 0x9d, 0x4c, 0x27, + 0x2e, 0x76, 0x1e, 0x1a, 0x63, 0x65, 0xf5, 0x3b, + 0xf8, 0x57, 0x69, 0xeb, 0x5b, 0x38, 0x26, 0x39, + 0x33, 0x25, 0x45, 0x3e, 0x91, 0xb8, 0xd8, 0xc7, + 0xd5, 0x42, 0xc0, 0x22, 0x31, 0x74, 0xf4, 0xbc, + 0x0c, 0x23, 0xf1, 0xca, 0xc1, 0x8d, 0xd7, 0xbe, + 0xc9, 0x62, 0xe4, 0x08, 0x1a, 0xcf, 0x36, 0xd5, + 0xfe, 0x55, 0x21, 0x59, 0x91, 0x87, 0x87, 0xdf, + 0x06, 0xdb, 0xdf, 0x96, 0x45, 0x58, 0xda, 0x05, + 0xcd, 0x50, 0x4d, 0xd2, 0x7d, 0x05, 0x18, 0x73, + 0x6a, 0x8d, 0x11, 0x85, 0xa6, 0x88, 0xe8, 0xda, + 0xe6, 0x30, 0x33, 0xa4, 0x89, 0x31, 0x75, 0xbe, + 0x69, 0x43, 0x84, 0x43, 0x50, 0x87, 0xdd, 0x71, + 0x36, 0x83, 0xc3, 0x78, 0x74, 0x24, 0x0a, 0xed, + 0x7b, 0xdb, 0xa4, 0x24, 0x0b, 0xb9, 0x7e, 0x5d, + 0xff, 0xde, 0xb1, 0xef, 0x61, 0x5a, 0x45, 0x33, + 0xf6, 0x17, 0x07, 0x08, 0x98, 0x83, 0x92, 0x0f, + 0x23, 0x6d, 0xe6, 0xaa, 0x17, 0x54, 0xad, 0x6a, + 0xc8, 0xdb, 0x26, 0xbe, 0xb8, 0xb6, 0x08, 0xfa, + 0x68, 0xf1, 0xd7, 0x79, 0x6f, 0x18, 0xb4, 0x9e, + 0x2d, 0x3f, 0x1b, 0x64, 0xaf, 0x8d, 0x06, 0x0e, + 0x49, 0x28, 0xe0, 0x5d, 0x45, 0x68, 0x13, 0x87, + 0xfa, 0xde, 0x40, 0x7b, 0xd2, 0xc3, 0x94, 0xd5, + 0xe1, 0xd9, 0xc2, 0xaf, 0x55, 0x89, 0xeb, 0xb4, + 0x12, 0x59, 0xa8, 0xd4, 0xc5, 0x29, 0x66, 0x38, + 0xe6, 0xac, 0x22, 0x22, 0xd9, 0x64, 0x9b, 0x34, + 0x0a, 0x32, 0x9f, 0xc2, 0xbf, 0x17, 0x6c, 0x3f, + 0x71, 0x7a, 0x38, 0x6b, 0x98, 0xfb, 0x49, 0x36, + 0x89, 0xc9, 0xe2, 0xd6, 0xc7, 0x5d, 0xd0, 0x69, + 0x5f, 0x23, 0x35, 0xc9, 0x30, 0xe2, 0xfd, 0x44, + 0x58, 0x39, 0xd7, 0x97, 0xfb, 0x5c, 0x00, 0xd5, + 0x4f, 0x7a, 0x1a, 0x95, 0x8b, 0x62, 0x4b, 0xce, + 0xe5, 0x91, 0x21, 0x7b, 0x30, 0x00, 0xd6, 0xdd, + 0x6d, 0x02, 0x86, 0x49, 0x0f, 0x3c, 0x1a, 0x27, + 0x3c, 0xd3, 0x0e, 0x71, 0xf2, 0xff, 0xf5, 0x2f, + 0x87, 0xac, 0x67, 0x59, 0x81, 0xa3, 0xf7, 0xf8, + 0xd6, 0x11, 0x0c, 0x84, 0xa9, 0x03, 0xee, 0x2a, + 0xc4, 0xf3, 0x22, 0xab, 0x7c, 0xe2, 0x25, 0xf5, + 0x67, 0xa3, 0xe4, 0x11, 0xe0, 0x59, 0xb3, 0xca, + 0x87, 0xa0, 0xae, 0xc9, 0xa6, 0x62, 0x1b, 0x6e, + 0x4d, 0x02, 0x6b, 0x07, 0x9d, 0xfd, 0xd0, 0x92, + 0x06, 0xe1, 0xb2, 0x9a, 0x4a, 0x1f, 0x1f, 0x13, + 0x49, 0x99, 0x97, 0x08, 0xde, 0x7f, 0x98, 0xaf, + 0x51, 0x98, 0xee, 0x2c, 0xcb, 0xf0, 0x0b, 0xc6, + 0xb6, 0xb7, 0x2d, 0x9a, 0xb1, 0xac, 0xa6, 0xe3, + 0x15, 0x77, 0x9d, 0x6b, 0x1a, 0xe4, 0xfc, 0x8b, + 0xf2, 0x17, 0x59, 0x08, 0x04, 0x58, 0x81, 0x9d, + 0x1b, 0x1b, 0x69, 0x55, 0xc2, 0xb4, 0x3c, 0x1f, + 0x50, 0xf1, 0x7f, 0x77, 0x90, 0x4c, 0x66, 0x40, + 0x5a, 0xc0, 0x33, 0x1f, 0xcb, 0x05, 0x6d, 0x5c, + 0x06, 0x87, 0x52, 0xa2, 0x8f, 0x26, 0xd5, 0x4f +}; +static const u8 dec_assoc010[] __initconst = { + 0xd2, 0xa1, 0x70, 0xdb, 0x7a, 0xf8, 0xfa, 0x27, + 0xba, 0x73, 0x0f, 0xbf, 0x3d, 0x1e, 0x82, 0xb2 +}; +static const u8 dec_nonce010[] __initconst = { + 0xdb, 0x92, 0x0f, 0x7f, 0x17, 0x54, 0x0c, 0x30 +}; +static const u8 dec_key010[] __initconst = { + 0x47, 0x11, 0xeb, 0x86, 0x2b, 0x2c, 0xab, 0x44, + 0x34, 0xda, 0x7f, 0x57, 0x03, 0x39, 0x0c, 0xaf, + 0x2c, 0x14, 0xfd, 0x65, 0x23, 0xe9, 0x8e, 0x74, + 0xd5, 0x08, 0x68, 0x08, 0xe7, 0xb4, 0x72, 0xd7 +}; + +static const u8 dec_input011[] __initconst = { + 0x6a, 0xfc, 0x4b, 0x25, 0xdf, 0xc0, 0xe4, 0xe8, + 0x17, 0x4d, 0x4c, 0xc9, 0x7e, 0xde, 0x3a, 0xcc, + 0x3c, 0xba, 0x6a, 0x77, 0x47, 0xdb, 0xe3, 0x74, + 0x7a, 0x4d, 0x5f, 0x8d, 0x37, 0x55, 0x80, 0x73, + 0x90, 0x66, 0x5d, 0x3a, 0x7d, 0x5d, 0x86, 0x5e, + 0x8d, 0xfd, 0x83, 0xff, 0x4e, 0x74, 0x6f, 0xf9, + 0xe6, 0x70, 0x17, 0x70, 0x3e, 0x96, 0xa7, 0x7e, + 0xcb, 0xab, 0x8f, 0x58, 0x24, 0x9b, 0x01, 0xfd, + 0xcb, 0xe6, 0x4d, 0x9b, 0xf0, 0x88, 0x94, 0x57, + 0x66, 0xef, 0x72, 0x4c, 0x42, 0x6e, 0x16, 0x19, + 0x15, 0xea, 0x70, 0x5b, 0xac, 0x13, 0xdb, 0x9f, + 0x18, 0xe2, 0x3c, 0x26, 0x97, 0xbc, 0xdc, 0x45, + 0x8c, 0x6c, 0x24, 0x69, 0x9c, 0xf7, 0x65, 0x1e, + 0x18, 0x59, 0x31, 0x7c, 0xe4, 0x73, 0xbc, 0x39, + 0x62, 0xc6, 0x5c, 0x9f, 0xbf, 0xfa, 0x90, 0x03, + 0xc9, 0x72, 0x26, 0xb6, 0x1b, 0xc2, 0xb7, 0x3f, + 0xf2, 0x13, 0x77, 0xf2, 0x8d, 0xb9, 0x47, 0xd0, + 0x53, 0xdd, 0xc8, 0x91, 0x83, 0x8b, 0xb1, 0xce, + 0xa3, 0xfe, 0xcd, 0xd9, 0xdd, 0x92, 0x7b, 0xdb, + 0xb8, 0xfb, 0xc9, 0x2d, 0x01, 0x59, 0x39, 0x52, + 0xad, 0x1b, 0xec, 0xcf, 0xd7, 0x70, 0x13, 0x21, + 0xf5, 0x47, 0xaa, 0x18, 0x21, 0x5c, 0xc9, 0x9a, + 0xd2, 0x6b, 0x05, 0x9c, 0x01, 0xa1, 0xda, 0x35, + 0x5d, 0xb3, 0x70, 0xe6, 0xa9, 0x80, 0x8b, 0x91, + 0xb7, 0xb3, 0x5f, 0x24, 0x9a, 0xb7, 0xd1, 0x6b, + 0xa1, 0x1c, 0x50, 0xba, 0x49, 0xe0, 0xee, 0x2e, + 0x75, 0xac, 0x69, 0xc0, 0xeb, 0x03, 0xdd, 0x19, + 0xe5, 0xf6, 0x06, 0xdd, 0xc3, 0xd7, 0x2b, 0x07, + 0x07, 0x30, 0xa7, 0x19, 0x0c, 0xbf, 0xe6, 0x18, + 0xcc, 0xb1, 0x01, 0x11, 0x85, 0x77, 0x1d, 0x96, + 0xa7, 0xa3, 0x00, 0x84, 0x02, 0xa2, 0x83, 0x68, + 0xda, 0x17, 0x27, 0xc8, 0x7f, 0x23, 0xb7, 0xf4, + 0x13, 0x85, 0xcf, 0xdd, 0x7a, 0x7d, 0x24, 0x57, + 0xfe, 0x05, 0x93, 0xf5, 0x74, 0xce, 0xed, 0x0c, + 0x20, 0x98, 0x8d, 0x92, 0x30, 0xa1, 0x29, 0x23, + 0x1a, 0xa0, 0x4f, 0x69, 0x56, 0x4c, 0xe1, 0xc8, + 0xce, 0xf6, 0x9a, 0x0c, 0xa4, 0xfa, 0x04, 0xf6, + 0x62, 0x95, 0xf2, 0xfa, 0xc7, 0x40, 0x68, 0x40, + 0x8f, 0x41, 0xda, 0xb4, 0x26, 0x6f, 0x70, 0xab, + 0x40, 0x61, 0xa4, 0x0e, 0x75, 0xfb, 0x86, 0xeb, + 0x9d, 0x9a, 0x1f, 0xec, 0x76, 0x99, 0xe7, 0xea, + 0xaa, 0x1e, 0x2d, 0xb5, 0xd4, 0xa6, 0x1a, 0xb8, + 0x61, 0x0a, 0x1d, 0x16, 0x5b, 0x98, 0xc2, 0x31, + 0x40, 0xe7, 0x23, 0x1d, 0x66, 0x99, 0xc8, 0xc0, + 0xd7, 0xce, 0xf3, 0x57, 0x40, 0x04, 0x3f, 0xfc, + 0xea, 0xb3, 0xfc, 0xd2, 0xd3, 0x99, 0xa4, 0x94, + 0x69, 0xa0, 0xef, 0xd1, 0x85, 0xb3, 0xa6, 0xb1, + 0x28, 0xbf, 0x94, 0x67, 0x22, 0xc3, 0x36, 0x46, + 0xf8, 0xd2, 0x0f, 0x5f, 0xf4, 0x59, 0x80, 0xe6, + 0x2d, 0x43, 0x08, 0x7d, 0x19, 0x09, 0x97, 0xa7, + 0x4c, 0x3d, 0x8d, 0xba, 0x65, 0x62, 0xa3, 0x71, + 0x33, 0x29, 0x62, 0xdb, 0xc1, 0x33, 0x34, 0x1a, + 0x63, 0x33, 0x16, 0xb6, 0x64, 0x7e, 0xab, 0x33, + 0xf0, 0xe6, 0x26, 0x68, 0xba, 0x1d, 0x2e, 0x38, + 0x08, 0xe6, 0x02, 0xd3, 0x25, 0x2c, 0x47, 0x23, + 0x58, 0x34, 0x0f, 0x9d, 0x63, 0x4f, 0x63, 0xbb, + 0x7f, 0x3b, 0x34, 0x38, 0xa7, 0xb5, 0x8d, 0x65, + 0xd9, 0x9f, 0x79, 0x55, 0x3e, 0x4d, 0xe7, 0x73, + 0xd8, 0xf6, 0x98, 0x97, 0x84, 0x60, 0x9c, 0xc8, + 0xa9, 0x3c, 0xf6, 0xdc, 0x12, 0x5c, 0xe1, 0xbb, + 0x0b, 0x8b, 0x98, 0x9c, 0x9d, 0x26, 0x7c, 0x4a, + 0xe6, 0x46, 0x36, 0x58, 0x21, 0x4a, 0xee, 0xca, + 0xd7, 0x3b, 0xc2, 0x6c, 0x49, 0x2f, 0xe5, 0xd5, + 0x03, 0x59, 0x84, 0x53, 0xcb, 0xfe, 0x92, 0x71, + 0x2e, 0x7c, 0x21, 0xcc, 0x99, 0x85, 0x7f, 0xb8, + 0x74, 0x90, 0x13, 0x42, 0x3f, 0xe0, 0x6b, 0x1d, + 0xf2, 0x4d, 0x54, 0xd4, 0xfc, 0x3a, 0x05, 0xe6, + 0x74, 0xaf, 0xa6, 0xa0, 0x2a, 0x20, 0x23, 0x5d, + 0x34, 0x5c, 0xd9, 0x3e, 0x4e, 0xfa, 0x93, 0xe7, + 0xaa, 0xe9, 0x6f, 0x08, 0x43, 0x67, 0x41, 0xc5, + 0xad, 0xfb, 0x31, 0x95, 0x82, 0x73, 0x32, 0xd8, + 0xa6, 0xa3, 0xed, 0x0e, 0x2d, 0xf6, 0x5f, 0xfd, + 0x80, 0xa6, 0x7a, 0xe0, 0xdf, 0x78, 0x15, 0x29, + 0x74, 0x33, 0xd0, 0x9e, 0x83, 0x86, 0x72, 0x22, + 0x57, 0x29, 0xb9, 0x9e, 0x5d, 0xd3, 0x1a, 0xb5, + 0x96, 0x72, 0x41, 0x3d, 0xf1, 0x64, 0x43, 0x67, + 0xee, 0xaa, 0x5c, 0xd3, 0x9a, 0x96, 0x13, 0x11, + 0x5d, 0xf3, 0x0c, 0x87, 0x82, 0x1e, 0x41, 0x9e, + 0xd0, 0x27, 0xd7, 0x54, 0x3b, 0x67, 0x73, 0x09, + 0x91, 0xe9, 0xd5, 0x36, 0xa7, 0xb5, 0x55, 0xe4, + 0xf3, 0x21, 0x51, 0x49, 0x22, 0x07, 0x55, 0x4f, + 0x44, 0x4b, 0xd2, 0x15, 0x93, 0x17, 0x2a, 0xfa, + 0x4d, 0x4a, 0x57, 0xdb, 0x4c, 0xa6, 0xeb, 0xec, + 0x53, 0x25, 0x6c, 0x21, 0xed, 0x00, 0x4c, 0x3b, + 0xca, 0x14, 0x57, 0xa9, 0xd6, 0x6a, 0xcd, 0x8d, + 0x5e, 0x74, 0xac, 0x72, 0xc1, 0x97, 0xe5, 0x1b, + 0x45, 0x4e, 0xda, 0xfc, 0xcc, 0x40, 0xe8, 0x48, + 0x88, 0x0b, 0xa3, 0xe3, 0x8d, 0x83, 0x42, 0xc3, + 0x23, 0xfd, 0x68, 0xb5, 0x8e, 0xf1, 0x9d, 0x63, + 0x77, 0xe9, 0xa3, 0x8e, 0x8c, 0x26, 0x6b, 0xbd, + 0x72, 0x73, 0x35, 0x0c, 0x03, 0xf8, 0x43, 0x78, + 0x52, 0x71, 0x15, 0x1f, 0x71, 0x5d, 0x6e, 0xed, + 0xb9, 0xcc, 0x86, 0x30, 0xdb, 0x2b, 0xd3, 0x82, + 0x88, 0x23, 0x71, 0x90, 0x53, 0x5c, 0xa9, 0x2f, + 0x76, 0x01, 0xb7, 0x9a, 0xfe, 0x43, 0x55, 0xa3, + 0x04, 0x9b, 0x0e, 0xe4, 0x59, 0xdf, 0xc9, 0xe9, + 0xb1, 0xea, 0x29, 0x28, 0x3c, 0x5c, 0xae, 0x72, + 0x84, 0xb6, 0xc6, 0xeb, 0x0c, 0x27, 0x07, 0x74, + 0x90, 0x0d, 0x31, 0xb0, 0x00, 0x77, 0xe9, 0x40, + 0x70, 0x6f, 0x68, 0xa7, 0xfd, 0x06, 0xec, 0x4b, + 0xc0, 0xb7, 0xac, 0xbc, 0x33, 0xb7, 0x6d, 0x0a, + 0xbd, 0x12, 0x1b, 0x59, 0xcb, 0xdd, 0x32, 0xf5, + 0x1d, 0x94, 0x57, 0x76, 0x9e, 0x0c, 0x18, 0x98, + 0x71, 0xd7, 0x2a, 0xdb, 0x0b, 0x7b, 0xa7, 0x71, + 0xb7, 0x67, 0x81, 0x23, 0x96, 0xae, 0xb9, 0x7e, + 0x32, 0x43, 0x92, 0x8a, 0x19, 0xa0, 0xc4, 0xd4, + 0x3b, 0x57, 0xf9, 0x4a, 0x2c, 0xfb, 0x51, 0x46, + 0xbb, 0xcb, 0x5d, 0xb3, 0xef, 0x13, 0x93, 0x6e, + 0x68, 0x42, 0x54, 0x57, 0xd3, 0x6a, 0x3a, 0x8f, + 0x9d, 0x66, 0xbf, 0xbd, 0x36, 0x23, 0xf5, 0x93, + 0x83, 0x7b, 0x9c, 0xc0, 0xdd, 0xc5, 0x49, 0xc0, + 0x64, 0xed, 0x07, 0x12, 0xb3, 0xe6, 0xe4, 0xe5, + 0x38, 0x95, 0x23, 0xb1, 0xa0, 0x3b, 0x1a, 0x61, + 0xda, 0x17, 0xac, 0xc3, 0x58, 0xdd, 0x74, 0x64, + 0x22, 0x11, 0xe8, 0x32, 0x1d, 0x16, 0x93, 0x85, + 0x99, 0xa5, 0x9c, 0x34, 0x55, 0xb1, 0xe9, 0x20, + 0x72, 0xc9, 0x28, 0x7b, 0x79, 0x00, 0xa1, 0xa6, + 0xa3, 0x27, 0x40, 0x18, 0x8a, 0x54, 0xe0, 0xcc, + 0xe8, 0x4e, 0x8e, 0x43, 0x96, 0xe7, 0x3f, 0xc8, + 0xe9, 0xb2, 0xf9, 0xc9, 0xda, 0x04, 0x71, 0x50, + 0x47, 0xe4, 0xaa, 0xce, 0xa2, 0x30, 0xc8, 0xe4, + 0xac, 0xc7, 0x0d, 0x06, 0x2e, 0xe6, 0xe8, 0x80, + 0x36, 0x29, 0x9e, 0x01, 0xb8, 0xc3, 0xf0, 0xa0, + 0x5d, 0x7a, 0xca, 0x4d, 0xa0, 0x57, 0xbd, 0x2a, + 0x45, 0xa7, 0x7f, 0x9c, 0x93, 0x07, 0x8f, 0x35, + 0x67, 0x92, 0xe3, 0xe9, 0x7f, 0xa8, 0x61, 0x43, + 0x9e, 0x25, 0x4f, 0x33, 0x76, 0x13, 0x6e, 0x12, + 0xb9, 0xdd, 0xa4, 0x7c, 0x08, 0x9f, 0x7c, 0xe7, + 0x0a, 0x8d, 0x84, 0x06, 0xa4, 0x33, 0x17, 0x34, + 0x5e, 0x10, 0x7c, 0xc0, 0xa8, 0x3d, 0x1f, 0x42, + 0x20, 0x51, 0x65, 0x5d, 0x09, 0xc3, 0xaa, 0xc0, + 0xc8, 0x0d, 0xf0, 0x79, 0xbc, 0x20, 0x1b, 0x95, + 0xe7, 0x06, 0x7d, 0x47, 0x20, 0x03, 0x1a, 0x74, + 0xdd, 0xe2, 0xd4, 0xae, 0x38, 0x71, 0x9b, 0xf5, + 0x80, 0xec, 0x08, 0x4e, 0x56, 0xba, 0x76, 0x12, + 0x1a, 0xdf, 0x48, 0xf3, 0xae, 0xb3, 0xe6, 0xe6, + 0xbe, 0xc0, 0x91, 0x2e, 0x01, 0xb3, 0x01, 0x86, + 0xa2, 0xb9, 0x52, 0xd1, 0x21, 0xae, 0xd4, 0x97, + 0x1d, 0xef, 0x41, 0x12, 0x95, 0x3d, 0x48, 0x45, + 0x1c, 0x56, 0x32, 0x8f, 0xb8, 0x43, 0xbb, 0x19, + 0xf3, 0xca, 0xe9, 0xeb, 0x6d, 0x84, 0xbe, 0x86, + 0x06, 0xe2, 0x36, 0xb2, 0x62, 0x9d, 0xd3, 0x4c, + 0x48, 0x18, 0x54, 0x13, 0x4e, 0xcf, 0xfd, 0xba, + 0x84, 0xb9, 0x30, 0x53, 0xcf, 0xfb, 0xb9, 0x29, + 0x8f, 0xdc, 0x9f, 0xef, 0x60, 0x0b, 0x64, 0xf6, + 0x8b, 0xee, 0xa6, 0x91, 0xc2, 0x41, 0x6c, 0xf6, + 0xfa, 0x79, 0x67, 0x4b, 0xc1, 0x3f, 0xaf, 0x09, + 0x81, 0xd4, 0x5d, 0xcb, 0x09, 0xdf, 0x36, 0x31, + 0xc0, 0x14, 0x3c, 0x7c, 0x0e, 0x65, 0x95, 0x99, + 0x6d, 0xa3, 0xf4, 0xd7, 0x38, 0xee, 0x1a, 0x2b, + 0x37, 0xe2, 0xa4, 0x3b, 0x4b, 0xd0, 0x65, 0xca, + 0xf8, 0xc3, 0xe8, 0x15, 0x20, 0xef, 0xf2, 0x00, + 0xfd, 0x01, 0x09, 0xc5, 0xc8, 0x17, 0x04, 0x93, + 0xd0, 0x93, 0x03, 0x55, 0xc5, 0xfe, 0x32, 0xa3, + 0x3e, 0x28, 0x2d, 0x3b, 0x93, 0x8a, 0xcc, 0x07, + 0x72, 0x80, 0x8b, 0x74, 0x16, 0x24, 0xbb, 0xda, + 0x94, 0x39, 0x30, 0x8f, 0xb1, 0xcd, 0x4a, 0x90, + 0x92, 0x7c, 0x14, 0x8f, 0x95, 0x4e, 0xac, 0x9b, + 0xd8, 0x8f, 0x1a, 0x87, 0xa4, 0x32, 0x27, 0x8a, + 0xba, 0xf7, 0x41, 0xcf, 0x84, 0x37, 0x19, 0xe6, + 0x06, 0xf5, 0x0e, 0xcf, 0x36, 0xf5, 0x9e, 0x6c, + 0xde, 0xbc, 0xff, 0x64, 0x7e, 0x4e, 0x59, 0x57, + 0x48, 0xfe, 0x14, 0xf7, 0x9c, 0x93, 0x5d, 0x15, + 0xad, 0xcc, 0x11, 0xb1, 0x17, 0x18, 0xb2, 0x7e, + 0xcc, 0xab, 0xe9, 0xce, 0x7d, 0x77, 0x5b, 0x51, + 0x1b, 0x1e, 0x20, 0xa8, 0x32, 0x06, 0x0e, 0x75, + 0x93, 0xac, 0xdb, 0x35, 0x37, 0x1f, 0xe9, 0x19, + 0x1d, 0xb4, 0x71, 0x97, 0xd6, 0x4e, 0x2c, 0x08, + 0xa5, 0x13, 0xf9, 0x0e, 0x7e, 0x78, 0x6e, 0x14, + 0xe0, 0xa9, 0xb9, 0x96, 0x4c, 0x80, 0x82, 0xba, + 0x17, 0xb3, 0x9d, 0x69, 0xb0, 0x84, 0x46, 0xff, + 0xf9, 0x52, 0x79, 0x94, 0x58, 0x3a, 0x62, 0x90, + 0x15, 0x35, 0x71, 0x10, 0x37, 0xed, 0xa1, 0x8e, + 0x53, 0x6e, 0xf4, 0x26, 0x57, 0x93, 0x15, 0x93, + 0xf6, 0x81, 0x2c, 0x5a, 0x10, 0xda, 0x92, 0xad, + 0x2f, 0xdb, 0x28, 0x31, 0x2d, 0x55, 0x04, 0xd2, + 0x06, 0x28, 0x8c, 0x1e, 0xdc, 0xea, 0x54, 0xac, + 0xff, 0xb7, 0x6c, 0x30, 0x15, 0xd4, 0xb4, 0x0d, + 0x00, 0x93, 0x57, 0xdd, 0xd2, 0x07, 0x07, 0x06, + 0xd9, 0x43, 0x9b, 0xcd, 0x3a, 0xf4, 0x7d, 0x4c, + 0x36, 0x5d, 0x23, 0xa2, 0xcc, 0x57, 0x40, 0x91, + 0xe9, 0x2c, 0x2f, 0x2c, 0xd5, 0x30, 0x9b, 0x17, + 0xb0, 0xc9, 0xf7, 0xa7, 0x2f, 0xd1, 0x93, 0x20, + 0x6b, 0xc6, 0xc1, 0xe4, 0x6f, 0xcb, 0xd1, 0xe7, + 0x09, 0x0f, 0x9e, 0xdc, 0xaa, 0x9f, 0x2f, 0xdf, + 0x56, 0x9f, 0xd4, 0x33, 0x04, 0xaf, 0xd3, 0x6c, + 0x58, 0x61, 0xf0, 0x30, 0xec, 0xf2, 0x7f, 0xf2, + 0x9c, 0xdf, 0x39, 0xbb, 0x6f, 0xa2, 0x8c, 0x7e, + 0xc4, 0x22, 0x51, 0x71, 0xc0, 0x4d, 0x14, 0x1a, + 0xc4, 0xcd, 0x04, 0xd9, 0x87, 0x08, 0x50, 0x05, + 0xcc, 0xaf, 0xf6, 0xf0, 0x8f, 0x92, 0x54, 0x58, + 0xc2, 0xc7, 0x09, 0x7a, 0x59, 0x02, 0x05, 0xe8, + 0xb0, 0x86, 0xd9, 0xbf, 0x7b, 0x35, 0x51, 0x4d, + 0xaf, 0x08, 0x97, 0x2c, 0x65, 0xda, 0x2a, 0x71, + 0x3a, 0xa8, 0x51, 0xcc, 0xf2, 0x73, 0x27, 0xc3, + 0xfd, 0x62, 0xcf, 0xe3, 0xb2, 0xca, 0xcb, 0xbe, + 0x1a, 0x0a, 0xa1, 0x34, 0x7b, 0x77, 0xc4, 0x62, + 0x68, 0x78, 0x5f, 0x94, 0x07, 0x04, 0x65, 0x16, + 0x4b, 0x61, 0xcb, 0xff, 0x75, 0x26, 0x50, 0x66, + 0x1f, 0x6e, 0x93, 0xf8, 0xc5, 0x51, 0xeb, 0xa4, + 0x4a, 0x48, 0x68, 0x6b, 0xe2, 0x5e, 0x44, 0xb2, + 0x50, 0x2c, 0x6c, 0xae, 0x79, 0x4e, 0x66, 0x35, + 0x81, 0x50, 0xac, 0xbc, 0x3f, 0xb1, 0x0c, 0xf3, + 0x05, 0x3c, 0x4a, 0xa3, 0x6c, 0x2a, 0x79, 0xb4, + 0xb7, 0xab, 0xca, 0xc7, 0x9b, 0x8e, 0xcd, 0x5f, + 0x11, 0x03, 0xcb, 0x30, 0xa3, 0xab, 0xda, 0xfe, + 0x64, 0xb9, 0xbb, 0xd8, 0x5e, 0x3a, 0x1a, 0x56, + 0xe5, 0x05, 0x48, 0x90, 0x1e, 0x61, 0x69, 0x1b, + 0x22, 0xe6, 0x1a, 0x3c, 0x75, 0xad, 0x1f, 0x37, + 0x28, 0xdc, 0xe4, 0x6d, 0xbd, 0x42, 0xdc, 0xd3, + 0xc8, 0xb6, 0x1c, 0x48, 0xfe, 0x94, 0x77, 0x7f, + 0xbd, 0x62, 0xac, 0xa3, 0x47, 0x27, 0xcf, 0x5f, + 0xd9, 0xdb, 0xaf, 0xec, 0xf7, 0x5e, 0xc1, 0xb0, + 0x9d, 0x01, 0x26, 0x99, 0x7e, 0x8f, 0x03, 0x70, + 0xb5, 0x42, 0xbe, 0x67, 0x28, 0x1b, 0x7c, 0xbd, + 0x61, 0x21, 0x97, 0xcc, 0x5c, 0xe1, 0x97, 0x8f, + 0x8d, 0xde, 0x2b, 0xaa, 0xa7, 0x71, 0x1d, 0x1e, + 0x02, 0x73, 0x70, 0x58, 0x32, 0x5b, 0x1d, 0x67, + 0x3d, 0xe0, 0x74, 0x4f, 0x03, 0xf2, 0x70, 0x51, + 0x79, 0xf1, 0x61, 0x70, 0x15, 0x74, 0x9d, 0x23, + 0x89, 0xde, 0xac, 0xfd, 0xde, 0xd0, 0x1f, 0xc3, + 0x87, 0x44, 0x35, 0x4b, 0xe5, 0xb0, 0x60, 0xc5, + 0x22, 0xe4, 0x9e, 0xca, 0xeb, 0xd5, 0x3a, 0x09, + 0x45, 0xa4, 0xdb, 0xfa, 0x3f, 0xeb, 0x1b, 0xc7, + 0xc8, 0x14, 0x99, 0x51, 0x92, 0x10, 0xed, 0xed, + 0x28, 0xe0, 0xa1, 0xf8, 0x26, 0xcf, 0xcd, 0xcb, + 0x63, 0xa1, 0x3b, 0xe3, 0xdf, 0x7e, 0xfe, 0xa6, + 0xf0, 0x81, 0x9a, 0xbf, 0x55, 0xde, 0x54, 0xd5, + 0x56, 0x60, 0x98, 0x10, 0x68, 0xf4, 0x38, 0x96, + 0x8e, 0x6f, 0x1d, 0x44, 0x7f, 0xd6, 0x2f, 0xfe, + 0x55, 0xfb, 0x0c, 0x7e, 0x67, 0xe2, 0x61, 0x44, + 0xed, 0xf2, 0x35, 0x30, 0x5d, 0xe9, 0xc7, 0xd6, + 0x6d, 0xe0, 0xa0, 0xed, 0xf3, 0xfc, 0xd8, 0x3e, + 0x0a, 0x7b, 0xcd, 0xaf, 0x65, 0x68, 0x18, 0xc0, + 0xec, 0x04, 0x1c, 0x74, 0x6d, 0xe2, 0x6e, 0x79, + 0xd4, 0x11, 0x2b, 0x62, 0xd5, 0x27, 0xad, 0x4f, + 0x01, 0x59, 0x73, 0xcc, 0x6a, 0x53, 0xfb, 0x2d, + 0xd5, 0x4e, 0x99, 0x21, 0x65, 0x4d, 0xf5, 0x82, + 0xf7, 0xd8, 0x42, 0xce, 0x6f, 0x3d, 0x36, 0x47, + 0xf1, 0x05, 0x16, 0xe8, 0x1b, 0x6a, 0x8f, 0x93, + 0xf2, 0x8f, 0x37, 0x40, 0x12, 0x28, 0xa3, 0xe6, + 0xb9, 0x17, 0x4a, 0x1f, 0xb1, 0xd1, 0x66, 0x69, + 0x86, 0xc4, 0xfc, 0x97, 0xae, 0x3f, 0x8f, 0x1e, + 0x2b, 0xdf, 0xcd, 0xf9, 0x3c +}; +static const u8 dec_output011[] __initconst = { + 0x7a, 0x57, 0xf2, 0xc7, 0x06, 0x3f, 0x50, 0x7b, + 0x36, 0x1a, 0x66, 0x5c, 0xb9, 0x0e, 0x5e, 0x3b, + 0x45, 0x60, 0xbe, 0x9a, 0x31, 0x9f, 0xff, 0x5d, + 0x66, 0x34, 0xb4, 0xdc, 0xfb, 0x9d, 0x8e, 0xee, + 0x6a, 0x33, 0xa4, 0x07, 0x3c, 0xf9, 0x4c, 0x30, + 0xa1, 0x24, 0x52, 0xf9, 0x50, 0x46, 0x88, 0x20, + 0x02, 0x32, 0x3a, 0x0e, 0x99, 0x63, 0xaf, 0x1f, + 0x15, 0x28, 0x2a, 0x05, 0xff, 0x57, 0x59, 0x5e, + 0x18, 0xa1, 0x1f, 0xd0, 0x92, 0x5c, 0x88, 0x66, + 0x1b, 0x00, 0x64, 0xa5, 0x93, 0x8d, 0x06, 0x46, + 0xb0, 0x64, 0x8b, 0x8b, 0xef, 0x99, 0x05, 0x35, + 0x85, 0xb3, 0xf3, 0x33, 0xbb, 0xec, 0x66, 0xb6, + 0x3d, 0x57, 0x42, 0xe3, 0xb4, 0xc6, 0xaa, 0xb0, + 0x41, 0x2a, 0xb9, 0x59, 0xa9, 0xf6, 0x3e, 0x15, + 0x26, 0x12, 0x03, 0x21, 0x4c, 0x74, 0x43, 0x13, + 0x2a, 0x03, 0x27, 0x09, 0xb4, 0xfb, 0xe7, 0xb7, + 0x40, 0xff, 0x5e, 0xce, 0x48, 0x9a, 0x60, 0xe3, + 0x8b, 0x80, 0x8c, 0x38, 0x2d, 0xcb, 0x93, 0x37, + 0x74, 0x05, 0x52, 0x6f, 0x73, 0x3e, 0xc3, 0xbc, + 0xca, 0x72, 0x0a, 0xeb, 0xf1, 0x3b, 0xa0, 0x95, + 0xdc, 0x8a, 0xc4, 0xa9, 0xdc, 0xca, 0x44, 0xd8, + 0x08, 0x63, 0x6a, 0x36, 0xd3, 0x3c, 0xb8, 0xac, + 0x46, 0x7d, 0xfd, 0xaa, 0xeb, 0x3e, 0x0f, 0x45, + 0x8f, 0x49, 0xda, 0x2b, 0xf2, 0x12, 0xbd, 0xaf, + 0x67, 0x8a, 0x63, 0x48, 0x4b, 0x55, 0x5f, 0x6d, + 0x8c, 0xb9, 0x76, 0x34, 0x84, 0xae, 0xc2, 0xfc, + 0x52, 0x64, 0x82, 0xf7, 0xb0, 0x06, 0xf0, 0x45, + 0x73, 0x12, 0x50, 0x30, 0x72, 0xea, 0x78, 0x9a, + 0xa8, 0xaf, 0xb5, 0xe3, 0xbb, 0x77, 0x52, 0xec, + 0x59, 0x84, 0xbf, 0x6b, 0x8f, 0xce, 0x86, 0x5e, + 0x1f, 0x23, 0xe9, 0xfb, 0x08, 0x86, 0xf7, 0x10, + 0xb9, 0xf2, 0x44, 0x96, 0x44, 0x63, 0xa9, 0xa8, + 0x78, 0x00, 0x23, 0xd6, 0xc7, 0xe7, 0x6e, 0x66, + 0x4f, 0xcc, 0xee, 0x15, 0xb3, 0xbd, 0x1d, 0xa0, + 0xe5, 0x9c, 0x1b, 0x24, 0x2c, 0x4d, 0x3c, 0x62, + 0x35, 0x9c, 0x88, 0x59, 0x09, 0xdd, 0x82, 0x1b, + 0xcf, 0x0a, 0x83, 0x6b, 0x3f, 0xae, 0x03, 0xc4, + 0xb4, 0xdd, 0x7e, 0x5b, 0x28, 0x76, 0x25, 0x96, + 0xd9, 0xc9, 0x9d, 0x5f, 0x86, 0xfa, 0xf6, 0xd7, + 0xd2, 0xe6, 0x76, 0x1d, 0x0f, 0xa1, 0xdc, 0x74, + 0x05, 0x1b, 0x1d, 0xe0, 0xcd, 0x16, 0xb0, 0xa8, + 0x8a, 0x34, 0x7b, 0x15, 0x11, 0x77, 0xe5, 0x7b, + 0x7e, 0x20, 0xf7, 0xda, 0x38, 0xda, 0xce, 0x70, + 0xe9, 0xf5, 0x6c, 0xd9, 0xbe, 0x0c, 0x4c, 0x95, + 0x4c, 0xc2, 0x9b, 0x34, 0x55, 0x55, 0xe1, 0xf3, + 0x46, 0x8e, 0x48, 0x74, 0x14, 0x4f, 0x9d, 0xc9, + 0xf5, 0xe8, 0x1a, 0xf0, 0x11, 0x4a, 0xc1, 0x8d, + 0xe0, 0x93, 0xa0, 0xbe, 0x09, 0x1c, 0x2b, 0x4e, + 0x0f, 0xb2, 0x87, 0x8b, 0x84, 0xfe, 0x92, 0x32, + 0x14, 0xd7, 0x93, 0xdf, 0xe7, 0x44, 0xbc, 0xc5, + 0xae, 0x53, 0x69, 0xd8, 0xb3, 0x79, 0x37, 0x80, + 0xe3, 0x17, 0x5c, 0xec, 0x53, 0x00, 0x9a, 0xe3, + 0x8e, 0xdc, 0x38, 0xb8, 0x66, 0xf0, 0xd3, 0xad, + 0x1d, 0x02, 0x96, 0x86, 0x3e, 0x9d, 0x3b, 0x5d, + 0xa5, 0x7f, 0x21, 0x10, 0xf1, 0x1f, 0x13, 0x20, + 0xf9, 0x57, 0x87, 0x20, 0xf5, 0x5f, 0xf1, 0x17, + 0x48, 0x0a, 0x51, 0x5a, 0xcd, 0x19, 0x03, 0xa6, + 0x5a, 0xd1, 0x12, 0x97, 0xe9, 0x48, 0xe2, 0x1d, + 0x83, 0x75, 0x50, 0xd9, 0x75, 0x7d, 0x6a, 0x82, + 0xa1, 0xf9, 0x4e, 0x54, 0x87, 0x89, 0xc9, 0x0c, + 0xb7, 0x5b, 0x6a, 0x91, 0xc1, 0x9c, 0xb2, 0xa9, + 0xdc, 0x9a, 0xa4, 0x49, 0x0a, 0x6d, 0x0d, 0xbb, + 0xde, 0x86, 0x44, 0xdd, 0x5d, 0x89, 0x2b, 0x96, + 0x0f, 0x23, 0x95, 0xad, 0xcc, 0xa2, 0xb3, 0xb9, + 0x7e, 0x74, 0x38, 0xba, 0x9f, 0x73, 0xae, 0x5f, + 0xf8, 0x68, 0xa2, 0xe0, 0xa9, 0xce, 0xbd, 0x40, + 0xd4, 0x4c, 0x6b, 0xd2, 0x56, 0x62, 0xb0, 0xcc, + 0x63, 0x7e, 0x5b, 0xd3, 0xae, 0xd1, 0x75, 0xce, + 0xbb, 0xb4, 0x5b, 0xa8, 0xf8, 0xb4, 0xac, 0x71, + 0x75, 0xaa, 0xc9, 0x9f, 0xbb, 0x6c, 0xad, 0x0f, + 0x55, 0x5d, 0xe8, 0x85, 0x7d, 0xf9, 0x21, 0x35, + 0xea, 0x92, 0x85, 0x2b, 0x00, 0xec, 0x84, 0x90, + 0x0a, 0x63, 0x96, 0xe4, 0x6b, 0xa9, 0x77, 0xb8, + 0x91, 0xf8, 0x46, 0x15, 0x72, 0x63, 0x70, 0x01, + 0x40, 0xa3, 0xa5, 0x76, 0x62, 0x2b, 0xbf, 0xf1, + 0xe5, 0x8d, 0x9f, 0xa3, 0xfa, 0x9b, 0x03, 0xbe, + 0xfe, 0x65, 0x6f, 0xa2, 0x29, 0x0d, 0x54, 0xb4, + 0x71, 0xce, 0xa9, 0xd6, 0x3d, 0x88, 0xf9, 0xaf, + 0x6b, 0xa8, 0x9e, 0xf4, 0x16, 0x96, 0x36, 0xb9, + 0x00, 0xdc, 0x10, 0xab, 0xb5, 0x08, 0x31, 0x1f, + 0x00, 0xb1, 0x3c, 0xd9, 0x38, 0x3e, 0xc6, 0x04, + 0xa7, 0x4e, 0xe8, 0xae, 0xed, 0x98, 0xc2, 0xf7, + 0xb9, 0x00, 0x5f, 0x8c, 0x60, 0xd1, 0xe5, 0x15, + 0xf7, 0xae, 0x1e, 0x84, 0x88, 0xd1, 0xf6, 0xbc, + 0x3a, 0x89, 0x35, 0x22, 0x83, 0x7c, 0xca, 0xf0, + 0x33, 0x82, 0x4c, 0x79, 0x3c, 0xfd, 0xb1, 0xae, + 0x52, 0x62, 0x55, 0xd2, 0x41, 0x60, 0xc6, 0xbb, + 0xfa, 0x0e, 0x59, 0xd6, 0xa8, 0xfe, 0x5d, 0xed, + 0x47, 0x3d, 0xe0, 0xea, 0x1f, 0x6e, 0x43, 0x51, + 0xec, 0x10, 0x52, 0x56, 0x77, 0x42, 0x6b, 0x52, + 0x87, 0xd8, 0xec, 0xe0, 0xaa, 0x76, 0xa5, 0x84, + 0x2a, 0x22, 0x24, 0xfd, 0x92, 0x40, 0x88, 0xd5, + 0x85, 0x1c, 0x1f, 0x6b, 0x47, 0xa0, 0xc4, 0xe4, + 0xef, 0xf4, 0xea, 0xd7, 0x59, 0xac, 0x2a, 0x9e, + 0x8c, 0xfa, 0x1f, 0x42, 0x08, 0xfe, 0x4f, 0x74, + 0xa0, 0x26, 0xf5, 0xb3, 0x84, 0xf6, 0x58, 0x5f, + 0x26, 0x66, 0x3e, 0xd7, 0xe4, 0x22, 0x91, 0x13, + 0xc8, 0xac, 0x25, 0x96, 0x23, 0xd8, 0x09, 0xea, + 0x45, 0x75, 0x23, 0xb8, 0x5f, 0xc2, 0x90, 0x8b, + 0x09, 0xc4, 0xfc, 0x47, 0x6c, 0x6d, 0x0a, 0xef, + 0x69, 0xa4, 0x38, 0x19, 0xcf, 0x7d, 0xf9, 0x09, + 0x73, 0x9b, 0x60, 0x5a, 0xf7, 0x37, 0xb5, 0xfe, + 0x9f, 0xe3, 0x2b, 0x4c, 0x0d, 0x6e, 0x19, 0xf1, + 0xd6, 0xc0, 0x70, 0xf3, 0x9d, 0x22, 0x3c, 0xf9, + 0x49, 0xce, 0x30, 0x8e, 0x44, 0xb5, 0x76, 0x15, + 0x8f, 0x52, 0xfd, 0xa5, 0x04, 0xb8, 0x55, 0x6a, + 0x36, 0x59, 0x7c, 0xc4, 0x48, 0xb8, 0xd7, 0xab, + 0x05, 0x66, 0xe9, 0x5e, 0x21, 0x6f, 0x6b, 0x36, + 0x29, 0xbb, 0xe9, 0xe3, 0xa2, 0x9a, 0xa8, 0xcd, + 0x55, 0x25, 0x11, 0xba, 0x5a, 0x58, 0xa0, 0xde, + 0xae, 0x19, 0x2a, 0x48, 0x5a, 0xff, 0x36, 0xcd, + 0x6d, 0x16, 0x7a, 0x73, 0x38, 0x46, 0xe5, 0x47, + 0x59, 0xc8, 0xa2, 0xf6, 0xe2, 0x6c, 0x83, 0xc5, + 0x36, 0x2c, 0x83, 0x7d, 0xb4, 0x01, 0x05, 0x69, + 0xe7, 0xaf, 0x5c, 0xc4, 0x64, 0x82, 0x12, 0x21, + 0xef, 0xf7, 0xd1, 0x7d, 0xb8, 0x8d, 0x8c, 0x98, + 0x7c, 0x5f, 0x7d, 0x92, 0x88, 0xb9, 0x94, 0x07, + 0x9c, 0xd8, 0xe9, 0x9c, 0x17, 0x38, 0xe3, 0x57, + 0x6c, 0xe0, 0xdc, 0xa5, 0x92, 0x42, 0xb3, 0xbd, + 0x50, 0xa2, 0x7e, 0xb5, 0xb1, 0x52, 0x72, 0x03, + 0x97, 0xd8, 0xaa, 0x9a, 0x1e, 0x75, 0x41, 0x11, + 0xa3, 0x4f, 0xcc, 0xd4, 0xe3, 0x73, 0xad, 0x96, + 0xdc, 0x47, 0x41, 0x9f, 0xb0, 0xbe, 0x79, 0x91, + 0xf5, 0xb6, 0x18, 0xfe, 0xc2, 0x83, 0x18, 0x7d, + 0x73, 0xd9, 0x4f, 0x83, 0x84, 0x03, 0xb3, 0xf0, + 0x77, 0x66, 0x3d, 0x83, 0x63, 0x2e, 0x2c, 0xf9, + 0xdd, 0xa6, 0x1f, 0x89, 0x82, 0xb8, 0x23, 0x42, + 0xeb, 0xe2, 0xca, 0x70, 0x82, 0x61, 0x41, 0x0a, + 0x6d, 0x5f, 0x75, 0xc5, 0xe2, 0xc4, 0x91, 0x18, + 0x44, 0x22, 0xfa, 0x34, 0x10, 0xf5, 0x20, 0xdc, + 0xb7, 0xdd, 0x2a, 0x20, 0x77, 0xf5, 0xf9, 0xce, + 0xdb, 0xa0, 0x0a, 0x52, 0x2a, 0x4e, 0xdd, 0xcc, + 0x97, 0xdf, 0x05, 0xe4, 0x5e, 0xb7, 0xaa, 0xf0, + 0xe2, 0x80, 0xff, 0xba, 0x1a, 0x0f, 0xac, 0xdf, + 0x02, 0x32, 0xe6, 0xf7, 0xc7, 0x17, 0x13, 0xb7, + 0xfc, 0x98, 0x48, 0x8c, 0x0d, 0x82, 0xc9, 0x80, + 0x7a, 0xe2, 0x0a, 0xc5, 0xb4, 0xde, 0x7c, 0x3c, + 0x79, 0x81, 0x0e, 0x28, 0x65, 0x79, 0x67, 0x82, + 0x69, 0x44, 0x66, 0x09, 0xf7, 0x16, 0x1a, 0xf9, + 0x7d, 0x80, 0xa1, 0x79, 0x14, 0xa9, 0xc8, 0x20, + 0xfb, 0xa2, 0x46, 0xbe, 0x08, 0x35, 0x17, 0x58, + 0xc1, 0x1a, 0xda, 0x2a, 0x6b, 0x2e, 0x1e, 0xe6, + 0x27, 0x55, 0x7b, 0x19, 0xe2, 0xfb, 0x64, 0xfc, + 0x5e, 0x15, 0x54, 0x3c, 0xe7, 0xc2, 0x11, 0x50, + 0x30, 0xb8, 0x72, 0x03, 0x0b, 0x1a, 0x9f, 0x86, + 0x27, 0x11, 0x5c, 0x06, 0x2b, 0xbd, 0x75, 0x1a, + 0x0a, 0xda, 0x01, 0xfa, 0x5c, 0x4a, 0xc1, 0x80, + 0x3a, 0x6e, 0x30, 0xc8, 0x2c, 0xeb, 0x56, 0xec, + 0x89, 0xfa, 0x35, 0x7b, 0xb2, 0xf0, 0x97, 0x08, + 0x86, 0x53, 0xbe, 0xbd, 0x40, 0x41, 0x38, 0x1c, + 0xb4, 0x8b, 0x79, 0x2e, 0x18, 0x96, 0x94, 0xde, + 0xe8, 0xca, 0xe5, 0x9f, 0x92, 0x9f, 0x15, 0x5d, + 0x56, 0x60, 0x5c, 0x09, 0xf9, 0x16, 0xf4, 0x17, + 0x0f, 0xf6, 0x4c, 0xda, 0xe6, 0x67, 0x89, 0x9f, + 0xca, 0x6c, 0xe7, 0x9b, 0x04, 0x62, 0x0e, 0x26, + 0xa6, 0x52, 0xbd, 0x29, 0xff, 0xc7, 0xa4, 0x96, + 0xe6, 0x6a, 0x02, 0xa5, 0x2e, 0x7b, 0xfe, 0x97, + 0x68, 0x3e, 0x2e, 0x5f, 0x3b, 0x0f, 0x36, 0xd6, + 0x98, 0x19, 0x59, 0x48, 0xd2, 0xc6, 0xe1, 0x55, + 0x1a, 0x6e, 0xd6, 0xed, 0x2c, 0xba, 0xc3, 0x9e, + 0x64, 0xc9, 0x95, 0x86, 0x35, 0x5e, 0x3e, 0x88, + 0x69, 0x99, 0x4b, 0xee, 0xbe, 0x9a, 0x99, 0xb5, + 0x6e, 0x58, 0xae, 0xdd, 0x22, 0xdb, 0xdd, 0x6b, + 0xfc, 0xaf, 0x90, 0xa3, 0x3d, 0xa4, 0xc1, 0x15, + 0x92, 0x18, 0x8d, 0xd2, 0x4b, 0x7b, 0x06, 0xd1, + 0x37, 0xb5, 0xe2, 0x7c, 0x2c, 0xf0, 0x25, 0xe4, + 0x94, 0x2a, 0xbd, 0xe3, 0x82, 0x70, 0x78, 0xa3, + 0x82, 0x10, 0x5a, 0x90, 0xd7, 0xa4, 0xfa, 0xaf, + 0x1a, 0x88, 0x59, 0xdc, 0x74, 0x12, 0xb4, 0x8e, + 0xd7, 0x19, 0x46, 0xf4, 0x84, 0x69, 0x9f, 0xbb, + 0x70, 0xa8, 0x4c, 0x52, 0x81, 0xa9, 0xff, 0x76, + 0x1c, 0xae, 0xd8, 0x11, 0x3d, 0x7f, 0x7d, 0xc5, + 0x12, 0x59, 0x28, 0x18, 0xc2, 0xa2, 0xb7, 0x1c, + 0x88, 0xf8, 0xd6, 0x1b, 0xa6, 0x7d, 0x9e, 0xde, + 0x29, 0xf8, 0xed, 0xff, 0xeb, 0x92, 0x24, 0x4f, + 0x05, 0xaa, 0xd9, 0x49, 0xba, 0x87, 0x59, 0x51, + 0xc9, 0x20, 0x5c, 0x9b, 0x74, 0xcf, 0x03, 0xd9, + 0x2d, 0x34, 0xc7, 0x5b, 0xa5, 0x40, 0xb2, 0x99, + 0xf5, 0xcb, 0xb4, 0xf6, 0xb7, 0x72, 0x4a, 0xd6, + 0xbd, 0xb0, 0xf3, 0x93, 0xe0, 0x1b, 0xa8, 0x04, + 0x1e, 0x35, 0xd4, 0x80, 0x20, 0xf4, 0x9c, 0x31, + 0x6b, 0x45, 0xb9, 0x15, 0xb0, 0x5e, 0xdd, 0x0a, + 0x33, 0x9c, 0x83, 0xcd, 0x58, 0x89, 0x50, 0x56, + 0xbb, 0x81, 0x00, 0x91, 0x32, 0xf3, 0x1b, 0x3e, + 0xcf, 0x45, 0xe1, 0xf9, 0xe1, 0x2c, 0x26, 0x78, + 0x93, 0x9a, 0x60, 0x46, 0xc9, 0xb5, 0x5e, 0x6a, + 0x28, 0x92, 0x87, 0x3f, 0x63, 0x7b, 0xdb, 0xf7, + 0xd0, 0x13, 0x9d, 0x32, 0x40, 0x5e, 0xcf, 0xfb, + 0x79, 0x68, 0x47, 0x4c, 0xfd, 0x01, 0x17, 0xe6, + 0x97, 0x93, 0x78, 0xbb, 0xa6, 0x27, 0xa3, 0xe8, + 0x1a, 0xe8, 0x94, 0x55, 0x7d, 0x08, 0xe5, 0xdc, + 0x66, 0xa3, 0x69, 0xc8, 0xca, 0xc5, 0xa1, 0x84, + 0x55, 0xde, 0x08, 0x91, 0x16, 0x3a, 0x0c, 0x86, + 0xab, 0x27, 0x2b, 0x64, 0x34, 0x02, 0x6c, 0x76, + 0x8b, 0xc6, 0xaf, 0xcc, 0xe1, 0xd6, 0x8c, 0x2a, + 0x18, 0x3d, 0xa6, 0x1b, 0x37, 0x75, 0x45, 0x73, + 0xc2, 0x75, 0xd7, 0x53, 0x78, 0x3a, 0xd6, 0xe8, + 0x29, 0xd2, 0x4a, 0xa8, 0x1e, 0x82, 0xf6, 0xb6, + 0x81, 0xde, 0x21, 0xed, 0x2b, 0x56, 0xbb, 0xf2, + 0xd0, 0x57, 0xc1, 0x7c, 0xd2, 0x6a, 0xd2, 0x56, + 0xf5, 0x13, 0x5f, 0x1c, 0x6a, 0x0b, 0x74, 0xfb, + 0xe9, 0xfe, 0x9e, 0xea, 0x95, 0xb2, 0x46, 0xab, + 0x0a, 0xfc, 0xfd, 0xf3, 0xbb, 0x04, 0x2b, 0x76, + 0x1b, 0xa4, 0x74, 0xb0, 0xc1, 0x78, 0xc3, 0x69, + 0xe2, 0xb0, 0x01, 0xe1, 0xde, 0x32, 0x4c, 0x8d, + 0x1a, 0xb3, 0x38, 0x08, 0xd5, 0xfc, 0x1f, 0xdc, + 0x0e, 0x2c, 0x9c, 0xb1, 0xa1, 0x63, 0x17, 0x22, + 0xf5, 0x6c, 0x93, 0x70, 0x74, 0x00, 0xf8, 0x39, + 0x01, 0x94, 0xd1, 0x32, 0x23, 0x56, 0x5d, 0xa6, + 0x02, 0x76, 0x76, 0x93, 0xce, 0x2f, 0x19, 0xe9, + 0x17, 0x52, 0xae, 0x6e, 0x2c, 0x6d, 0x61, 0x7f, + 0x3b, 0xaa, 0xe0, 0x52, 0x85, 0xc5, 0x65, 0xc1, + 0xbb, 0x8e, 0x5b, 0x21, 0xd5, 0xc9, 0x78, 0x83, + 0x07, 0x97, 0x4c, 0x62, 0x61, 0x41, 0xd4, 0xfc, + 0xc9, 0x39, 0xe3, 0x9b, 0xd0, 0xcc, 0x75, 0xc4, + 0x97, 0xe6, 0xdd, 0x2a, 0x5f, 0xa6, 0xe8, 0x59, + 0x6c, 0x98, 0xb9, 0x02, 0xe2, 0xa2, 0xd6, 0x68, + 0xee, 0x3b, 0x1d, 0xe3, 0x4d, 0x5b, 0x30, 0xef, + 0x03, 0xf2, 0xeb, 0x18, 0x57, 0x36, 0xe8, 0xa1, + 0xf4, 0x47, 0xfb, 0xcb, 0x8f, 0xcb, 0xc8, 0xf3, + 0x4f, 0x74, 0x9d, 0x9d, 0xb1, 0x8d, 0x14, 0x44, + 0xd9, 0x19, 0xb4, 0x54, 0x4f, 0x75, 0x19, 0x09, + 0xa0, 0x75, 0xbc, 0x3b, 0x82, 0xc6, 0x3f, 0xb8, + 0x83, 0x19, 0x6e, 0xd6, 0x37, 0xfe, 0x6e, 0x8a, + 0x4e, 0xe0, 0x4a, 0xab, 0x7b, 0xc8, 0xb4, 0x1d, + 0xf4, 0xed, 0x27, 0x03, 0x65, 0xa2, 0xa1, 0xae, + 0x11, 0xe7, 0x98, 0x78, 0x48, 0x91, 0xd2, 0xd2, + 0xd4, 0x23, 0x78, 0x50, 0xb1, 0x5b, 0x85, 0x10, + 0x8d, 0xca, 0x5f, 0x0f, 0x71, 0xae, 0x72, 0x9a, + 0xf6, 0x25, 0x19, 0x60, 0x06, 0xf7, 0x10, 0x34, + 0x18, 0x0d, 0xc9, 0x9f, 0x7b, 0x0c, 0x9b, 0x8f, + 0x91, 0x1b, 0x9f, 0xcd, 0x10, 0xee, 0x75, 0xf9, + 0x97, 0x66, 0xfc, 0x4d, 0x33, 0x6e, 0x28, 0x2b, + 0x92, 0x85, 0x4f, 0xab, 0x43, 0x8d, 0x8f, 0x7d, + 0x86, 0xa7, 0xc7, 0xd8, 0xd3, 0x0b, 0x8b, 0x57, + 0xb6, 0x1d, 0x95, 0x0d, 0xe9, 0xbc, 0xd9, 0x03, + 0xd9, 0x10, 0x19, 0xc3, 0x46, 0x63, 0x55, 0x87, + 0x61, 0x79, 0x6c, 0x95, 0x0e, 0x9c, 0xdd, 0xca, + 0xc3, 0xf3, 0x64, 0xf0, 0x7d, 0x76, 0xb7, 0x53, + 0x67, 0x2b, 0x1e, 0x44, 0x56, 0x81, 0xea, 0x8f, + 0x5c, 0x42, 0x16, 0xb8, 0x28, 0xeb, 0x1b, 0x61, + 0x10, 0x1e, 0xbf, 0xec, 0xa8 +}; +static const u8 dec_assoc011[] __initconst = { + 0xd6, 0x31, 0xda, 0x5d, 0x42, 0x5e, 0xd7 +}; +static const u8 dec_nonce011[] __initconst = { + 0xfd, 0x87, 0xd4, 0xd8, 0x62, 0xfd, 0xec, 0xaa +}; +static const u8 dec_key011[] __initconst = { + 0x35, 0x4e, 0xb5, 0x70, 0x50, 0x42, 0x8a, 0x85, + 0xf2, 0xfb, 0xed, 0x7b, 0xd0, 0x9e, 0x97, 0xca, + 0xfa, 0x98, 0x66, 0x63, 0xee, 0x37, 0xcc, 0x52, + 0xfe, 0xd1, 0xdf, 0x95, 0x15, 0x34, 0x29, 0x38 +}; + +static const u8 dec_input012[] __initconst = { + 0x52, 0x34, 0xb3, 0x65, 0x3b, 0xb7, 0xe5, 0xd3, + 0xab, 0x49, 0x17, 0x60, 0xd2, 0x52, 0x56, 0xdf, + 0xdf, 0x34, 0x56, 0x82, 0xe2, 0xbe, 0xe5, 0xe1, + 0x28, 0xd1, 0x4e, 0x5f, 0x4f, 0x01, 0x7d, 0x3f, + 0x99, 0x6b, 0x30, 0x6e, 0x1a, 0x7c, 0x4c, 0x8e, + 0x62, 0x81, 0xae, 0x86, 0x3f, 0x6b, 0xd0, 0xb5, + 0xa9, 0xcf, 0x50, 0xf1, 0x02, 0x12, 0xa0, 0x0b, + 0x24, 0xe9, 0xe6, 0x72, 0x89, 0x2c, 0x52, 0x1b, + 0x34, 0x38, 0xf8, 0x75, 0x5f, 0xa0, 0x74, 0xe2, + 0x99, 0xdd, 0xa6, 0x4b, 0x14, 0x50, 0x4e, 0xf1, + 0xbe, 0xd6, 0x9e, 0xdb, 0xb2, 0x24, 0x27, 0x74, + 0x12, 0x4a, 0x78, 0x78, 0x17, 0xa5, 0x58, 0x8e, + 0x2f, 0xf9, 0xf4, 0x8d, 0xee, 0x03, 0x88, 0xae, + 0xb8, 0x29, 0xa1, 0x2f, 0x4b, 0xee, 0x92, 0xbd, + 0x87, 0xb3, 0xce, 0x34, 0x21, 0x57, 0x46, 0x04, + 0x49, 0x0c, 0x80, 0xf2, 0x01, 0x13, 0xa1, 0x55, + 0xb3, 0xff, 0x44, 0x30, 0x3c, 0x1c, 0xd0, 0xef, + 0xbc, 0x18, 0x74, 0x26, 0xad, 0x41, 0x5b, 0x5b, + 0x3e, 0x9a, 0x7a, 0x46, 0x4f, 0x16, 0xd6, 0x74, + 0x5a, 0xb7, 0x3a, 0x28, 0x31, 0xd8, 0xae, 0x26, + 0xac, 0x50, 0x53, 0x86, 0xf2, 0x56, 0xd7, 0x3f, + 0x29, 0xbc, 0x45, 0x68, 0x8e, 0xcb, 0x98, 0x64, + 0xdd, 0xc9, 0xba, 0xb8, 0x4b, 0x7b, 0x82, 0xdd, + 0x14, 0xa7, 0xcb, 0x71, 0x72, 0x00, 0x5c, 0xad, + 0x7b, 0x6a, 0x89, 0xa4, 0x3d, 0xbf, 0xb5, 0x4b, + 0x3e, 0x7c, 0x5a, 0xcf, 0xb8, 0xa1, 0xc5, 0x6e, + 0xc8, 0xb6, 0x31, 0x57, 0x7b, 0xdf, 0xa5, 0x7e, + 0xb1, 0xd6, 0x42, 0x2a, 0x31, 0x36, 0xd1, 0xd0, + 0x3f, 0x7a, 0xe5, 0x94, 0xd6, 0x36, 0xa0, 0x6f, + 0xb7, 0x40, 0x7d, 0x37, 0xc6, 0x55, 0x7c, 0x50, + 0x40, 0x6d, 0x29, 0x89, 0xe3, 0x5a, 0xae, 0x97, + 0xe7, 0x44, 0x49, 0x6e, 0xbd, 0x81, 0x3d, 0x03, + 0x93, 0x06, 0x12, 0x06, 0xe2, 0x41, 0x12, 0x4a, + 0xf1, 0x6a, 0xa4, 0x58, 0xa2, 0xfb, 0xd2, 0x15, + 0xba, 0xc9, 0x79, 0xc9, 0xce, 0x5e, 0x13, 0xbb, + 0xf1, 0x09, 0x04, 0xcc, 0xfd, 0xe8, 0x51, 0x34, + 0x6a, 0xe8, 0x61, 0x88, 0xda, 0xed, 0x01, 0x47, + 0x84, 0xf5, 0x73, 0x25, 0xf9, 0x1c, 0x42, 0x86, + 0x07, 0xf3, 0x5b, 0x1a, 0x01, 0xb3, 0xeb, 0x24, + 0x32, 0x8d, 0xf6, 0xed, 0x7c, 0x4b, 0xeb, 0x3c, + 0x36, 0x42, 0x28, 0xdf, 0xdf, 0xb6, 0xbe, 0xd9, + 0x8c, 0x52, 0xd3, 0x2b, 0x08, 0x90, 0x8c, 0xe7, + 0x98, 0x31, 0xe2, 0x32, 0x8e, 0xfc, 0x11, 0x48, + 0x00, 0xa8, 0x6a, 0x42, 0x4a, 0x02, 0xc6, 0x4b, + 0x09, 0xf1, 0xe3, 0x49, 0xf3, 0x45, 0x1f, 0x0e, + 0xbc, 0x56, 0xe2, 0xe4, 0xdf, 0xfb, 0xeb, 0x61, + 0xfa, 0x24, 0xc1, 0x63, 0x75, 0xbb, 0x47, 0x75, + 0xaf, 0xe1, 0x53, 0x16, 0x96, 0x21, 0x85, 0x26, + 0x11, 0xb3, 0x76, 0xe3, 0x23, 0xa1, 0x6b, 0x74, + 0x37, 0xd0, 0xde, 0x06, 0x90, 0x71, 0x5d, 0x43, + 0x88, 0x9b, 0x00, 0x54, 0xa6, 0x75, 0x2f, 0xa1, + 0xc2, 0x0b, 0x73, 0x20, 0x1d, 0xb6, 0x21, 0x79, + 0x57, 0x3f, 0xfa, 0x09, 0xbe, 0x8a, 0x33, 0xc3, + 0x52, 0xf0, 0x1d, 0x82, 0x31, 0xd1, 0x55, 0xb5, + 0x6c, 0x99, 0x25, 0xcf, 0x5c, 0x32, 0xce, 0xe9, + 0x0d, 0xfa, 0x69, 0x2c, 0xd5, 0x0d, 0xc5, 0x6d, + 0x86, 0xd0, 0x0c, 0x3b, 0x06, 0x50, 0x79, 0xe8, + 0xc3, 0xae, 0x04, 0xe6, 0xcd, 0x51, 0xe4, 0x26, + 0x9b, 0x4f, 0x7e, 0xa6, 0x0f, 0xab, 0xd8, 0xe5, + 0xde, 0xa9, 0x00, 0x95, 0xbe, 0xa3, 0x9d, 0x5d, + 0xb2, 0x09, 0x70, 0x18, 0x1c, 0xf0, 0xac, 0x29, + 0x23, 0x02, 0x29, 0x28, 0xd2, 0x74, 0x35, 0x57, + 0x62, 0x0f, 0x24, 0xea, 0x5e, 0x33, 0xc2, 0x92, + 0xf3, 0x78, 0x4d, 0x30, 0x1e, 0xa1, 0x99, 0xa9, + 0x82, 0xb0, 0x42, 0x31, 0x8d, 0xad, 0x8a, 0xbc, + 0xfc, 0xd4, 0x57, 0x47, 0x3e, 0xb4, 0x50, 0xdd, + 0x6e, 0x2c, 0x80, 0x4d, 0x22, 0xf1, 0xfb, 0x57, + 0xc4, 0xdd, 0x17, 0xe1, 0x8a, 0x36, 0x4a, 0xb3, + 0x37, 0xca, 0xc9, 0x4e, 0xab, 0xd5, 0x69, 0xc4, + 0xf4, 0xbc, 0x0b, 0x3b, 0x44, 0x4b, 0x29, 0x9c, + 0xee, 0xd4, 0x35, 0x22, 0x21, 0xb0, 0x1f, 0x27, + 0x64, 0xa8, 0x51, 0x1b, 0xf0, 0x9f, 0x19, 0x5c, + 0xfb, 0x5a, 0x64, 0x74, 0x70, 0x45, 0x09, 0xf5, + 0x64, 0xfe, 0x1a, 0x2d, 0xc9, 0x14, 0x04, 0x14, + 0xcf, 0xd5, 0x7d, 0x60, 0xaf, 0x94, 0x39, 0x94, + 0xe2, 0x7d, 0x79, 0x82, 0xd0, 0x65, 0x3b, 0x6b, + 0x9c, 0x19, 0x84, 0xb4, 0x6d, 0xb3, 0x0c, 0x99, + 0xc0, 0x56, 0xa8, 0xbd, 0x73, 0xce, 0x05, 0x84, + 0x3e, 0x30, 0xaa, 0xc4, 0x9b, 0x1b, 0x04, 0x2a, + 0x9f, 0xd7, 0x43, 0x2b, 0x23, 0xdf, 0xbf, 0xaa, + 0xd5, 0xc2, 0x43, 0x2d, 0x70, 0xab, 0xdc, 0x75, + 0xad, 0xac, 0xf7, 0xc0, 0xbe, 0x67, 0xb2, 0x74, + 0xed, 0x67, 0x10, 0x4a, 0x92, 0x60, 0xc1, 0x40, + 0x50, 0x19, 0x8a, 0x8a, 0x8c, 0x09, 0x0e, 0x72, + 0xe1, 0x73, 0x5e, 0xe8, 0x41, 0x85, 0x63, 0x9f, + 0x3f, 0xd7, 0x7d, 0xc4, 0xfb, 0x22, 0x5d, 0x92, + 0x6c, 0xb3, 0x1e, 0xe2, 0x50, 0x2f, 0x82, 0xa8, + 0x28, 0xc0, 0xb5, 0xd7, 0x5f, 0x68, 0x0d, 0x2c, + 0x2d, 0xaf, 0x7e, 0xfa, 0x2e, 0x08, 0x0f, 0x1f, + 0x70, 0x9f, 0xe9, 0x19, 0x72, 0x55, 0xf8, 0xfb, + 0x51, 0xd2, 0x33, 0x5d, 0xa0, 0xd3, 0x2b, 0x0a, + 0x6c, 0xbc, 0x4e, 0xcf, 0x36, 0x4d, 0xdc, 0x3b, + 0xe9, 0x3e, 0x81, 0x7c, 0x61, 0xdb, 0x20, 0x2d, + 0x3a, 0xc3, 0xb3, 0x0c, 0x1e, 0x00, 0xb9, 0x7c, + 0xf5, 0xca, 0x10, 0x5f, 0x3a, 0x71, 0xb3, 0xe4, + 0x20, 0xdb, 0x0c, 0x2a, 0x98, 0x63, 0x45, 0x00, + 0x58, 0xf6, 0x68, 0xe4, 0x0b, 0xda, 0x13, 0x3b, + 0x60, 0x5c, 0x76, 0xdb, 0xb9, 0x97, 0x71, 0xe4, + 0xd9, 0xb7, 0xdb, 0xbd, 0x68, 0xc7, 0x84, 0x84, + 0xaa, 0x7c, 0x68, 0x62, 0x5e, 0x16, 0xfc, 0xba, + 0x72, 0xaa, 0x9a, 0xa9, 0xeb, 0x7c, 0x75, 0x47, + 0x97, 0x7e, 0xad, 0xe2, 0xd9, 0x91, 0xe8, 0xe4, + 0xa5, 0x31, 0xd7, 0x01, 0x8e, 0xa2, 0x11, 0x88, + 0x95, 0xb9, 0xf2, 0x9b, 0xd3, 0x7f, 0x1b, 0x81, + 0x22, 0xf7, 0x98, 0x60, 0x0a, 0x64, 0xa6, 0xc1, + 0xf6, 0x49, 0xc7, 0xe3, 0x07, 0x4d, 0x94, 0x7a, + 0xcf, 0x6e, 0x68, 0x0c, 0x1b, 0x3f, 0x6e, 0x2e, + 0xee, 0x92, 0xfa, 0x52, 0xb3, 0x59, 0xf8, 0xf1, + 0x8f, 0x6a, 0x66, 0xa3, 0x82, 0x76, 0x4a, 0x07, + 0x1a, 0xc7, 0xdd, 0xf5, 0xda, 0x9c, 0x3c, 0x24, + 0xbf, 0xfd, 0x42, 0xa1, 0x10, 0x64, 0x6a, 0x0f, + 0x89, 0xee, 0x36, 0xa5, 0xce, 0x99, 0x48, 0x6a, + 0xf0, 0x9f, 0x9e, 0x69, 0xa4, 0x40, 0x20, 0xe9, + 0x16, 0x15, 0xf7, 0xdb, 0x75, 0x02, 0xcb, 0xe9, + 0x73, 0x8b, 0x3b, 0x49, 0x2f, 0xf0, 0xaf, 0x51, + 0x06, 0x5c, 0xdf, 0x27, 0x27, 0x49, 0x6a, 0xd1, + 0xcc, 0xc7, 0xb5, 0x63, 0xb5, 0xfc, 0xb8, 0x5c, + 0x87, 0x7f, 0x84, 0xb4, 0xcc, 0x14, 0xa9, 0x53, + 0xda, 0xa4, 0x56, 0xf8, 0xb6, 0x1b, 0xcc, 0x40, + 0x27, 0x52, 0x06, 0x5a, 0x13, 0x81, 0xd7, 0x3a, + 0xd4, 0x3b, 0xfb, 0x49, 0x65, 0x31, 0x33, 0xb2, + 0xfa, 0xcd, 0xad, 0x58, 0x4e, 0x2b, 0xae, 0xd2, + 0x20, 0xfb, 0x1a, 0x48, 0xb4, 0x3f, 0x9a, 0xd8, + 0x7a, 0x35, 0x4a, 0xc8, 0xee, 0x88, 0x5e, 0x07, + 0x66, 0x54, 0xb9, 0xec, 0x9f, 0xa3, 0xe3, 0xb9, + 0x37, 0xaa, 0x49, 0x76, 0x31, 0xda, 0x74, 0x2d, + 0x3c, 0xa4, 0x65, 0x10, 0x32, 0x38, 0xf0, 0xde, + 0xd3, 0x99, 0x17, 0xaa, 0x71, 0xaa, 0x8f, 0x0f, + 0x8c, 0xaf, 0xa2, 0xf8, 0x5d, 0x64, 0xba, 0x1d, + 0xa3, 0xef, 0x96, 0x73, 0xe8, 0xa1, 0x02, 0x8d, + 0x0c, 0x6d, 0xb8, 0x06, 0x90, 0xb8, 0x08, 0x56, + 0x2c, 0xa7, 0x06, 0xc9, 0xc2, 0x38, 0xdb, 0x7c, + 0x63, 0xb1, 0x57, 0x8e, 0xea, 0x7c, 0x79, 0xf3, + 0x49, 0x1d, 0xfe, 0x9f, 0xf3, 0x6e, 0xb1, 0x1d, + 0xba, 0x19, 0x80, 0x1a, 0x0a, 0xd3, 0xb0, 0x26, + 0x21, 0x40, 0xb1, 0x7c, 0xf9, 0x4d, 0x8d, 0x10, + 0xc1, 0x7e, 0xf4, 0xf6, 0x3c, 0xa8, 0xfd, 0x7c, + 0xa3, 0x92, 0xb2, 0x0f, 0xaa, 0xcc, 0xa6, 0x11, + 0xfe, 0x04, 0xe3, 0xd1, 0x7a, 0x32, 0x89, 0xdf, + 0x0d, 0xc4, 0x8f, 0x79, 0x6b, 0xca, 0x16, 0x7c, + 0x6e, 0xf9, 0xad, 0x0f, 0xf6, 0xfe, 0x27, 0xdb, + 0xc4, 0x13, 0x70, 0xf1, 0x62, 0x1a, 0x4f, 0x79, + 0x40, 0xc9, 0x9b, 0x8b, 0x21, 0xea, 0x84, 0xfa, + 0xf5, 0xf1, 0x89, 0xce, 0xb7, 0x55, 0x0a, 0x80, + 0x39, 0x2f, 0x55, 0x36, 0x16, 0x9c, 0x7b, 0x08, + 0xbd, 0x87, 0x0d, 0xa5, 0x32, 0xf1, 0x52, 0x7c, + 0xe8, 0x55, 0x60, 0x5b, 0xd7, 0x69, 0xe4, 0xfc, + 0xfa, 0x12, 0x85, 0x96, 0xea, 0x50, 0x28, 0xab, + 0x8a, 0xf7, 0xbb, 0x0e, 0x53, 0x74, 0xca, 0xa6, + 0x27, 0x09, 0xc2, 0xb5, 0xde, 0x18, 0x14, 0xd9, + 0xea, 0xe5, 0x29, 0x1c, 0x40, 0x56, 0xcf, 0xd7, + 0xae, 0x05, 0x3f, 0x65, 0xaf, 0x05, 0x73, 0xe2, + 0x35, 0x96, 0x27, 0x07, 0x14, 0xc0, 0xad, 0x33, + 0xf1, 0xdc, 0x44, 0x7a, 0x89, 0x17, 0x77, 0xd2, + 0x9c, 0x58, 0x60, 0xf0, 0x3f, 0x7b, 0x2d, 0x2e, + 0x57, 0x95, 0x54, 0x87, 0xed, 0xf2, 0xc7, 0x4c, + 0xf0, 0xae, 0x56, 0x29, 0x19, 0x7d, 0x66, 0x4b, + 0x9b, 0x83, 0x84, 0x42, 0x3b, 0x01, 0x25, 0x66, + 0x8e, 0x02, 0xde, 0xb9, 0x83, 0x54, 0x19, 0xf6, + 0x9f, 0x79, 0x0d, 0x67, 0xc5, 0x1d, 0x7a, 0x44, + 0x02, 0x98, 0xa7, 0x16, 0x1c, 0x29, 0x0d, 0x74, + 0xff, 0x85, 0x40, 0x06, 0xef, 0x2c, 0xa9, 0xc6, + 0xf5, 0x53, 0x07, 0x06, 0xae, 0xe4, 0xfa, 0x5f, + 0xd8, 0x39, 0x4d, 0xf1, 0x9b, 0x6b, 0xd9, 0x24, + 0x84, 0xfe, 0x03, 0x4c, 0xb2, 0x3f, 0xdf, 0xa1, + 0x05, 0x9e, 0x50, 0x14, 0x5a, 0xd9, 0x1a, 0xa2, + 0xa7, 0xfa, 0xfa, 0x17, 0xf7, 0x78, 0xd6, 0xb5, + 0x92, 0x61, 0x91, 0xac, 0x36, 0xfa, 0x56, 0x0d, + 0x38, 0x32, 0x18, 0x85, 0x08, 0x58, 0x37, 0xf0, + 0x4b, 0xdb, 0x59, 0xe7, 0xa4, 0x34, 0xc0, 0x1b, + 0x01, 0xaf, 0x2d, 0xde, 0xa1, 0xaa, 0x5d, 0xd3, + 0xec, 0xe1, 0xd4, 0xf7, 0xe6, 0x54, 0x68, 0xf0, + 0x51, 0x97, 0xa7, 0x89, 0xea, 0x24, 0xad, 0xd3, + 0x6e, 0x47, 0x93, 0x8b, 0x4b, 0xb4, 0xf7, 0x1c, + 0x42, 0x06, 0x67, 0xe8, 0x99, 0xf6, 0xf5, 0x7b, + 0x85, 0xb5, 0x65, 0xb5, 0xb5, 0xd2, 0x37, 0xf5, + 0xf3, 0x02, 0xa6, 0x4d, 0x11, 0xa7, 0xdc, 0x51, + 0x09, 0x7f, 0xa0, 0xd8, 0x88, 0x1c, 0x13, 0x71, + 0xae, 0x9c, 0xb7, 0x7b, 0x34, 0xd6, 0x4e, 0x68, + 0x26, 0x83, 0x51, 0xaf, 0x1d, 0xee, 0x8b, 0xbb, + 0x69, 0x43, 0x2b, 0x9e, 0x8a, 0xbc, 0x02, 0x0e, + 0xa0, 0x1b, 0xe0, 0xa8, 0x5f, 0x6f, 0xaf, 0x1b, + 0x8f, 0xe7, 0x64, 0x71, 0x74, 0x11, 0x7e, 0xa8, + 0xd8, 0xf9, 0x97, 0x06, 0xc3, 0xb6, 0xfb, 0xfb, + 0xb7, 0x3d, 0x35, 0x9d, 0x3b, 0x52, 0xed, 0x54, + 0xca, 0xf4, 0x81, 0x01, 0x2d, 0x1b, 0xc3, 0xa7, + 0x00, 0x3d, 0x1a, 0x39, 0x54, 0xe1, 0xf6, 0xff, + 0xed, 0x6f, 0x0b, 0x5a, 0x68, 0xda, 0x58, 0xdd, + 0xa9, 0xcf, 0x5c, 0x4a, 0xe5, 0x09, 0x4e, 0xde, + 0x9d, 0xbc, 0x3e, 0xee, 0x5a, 0x00, 0x3b, 0x2c, + 0x87, 0x10, 0x65, 0x60, 0xdd, 0xd7, 0x56, 0xd1, + 0x4c, 0x64, 0x45, 0xe4, 0x21, 0xec, 0x78, 0xf8, + 0x25, 0x7a, 0x3e, 0x16, 0x5d, 0x09, 0x53, 0x14, + 0xbe, 0x4f, 0xae, 0x87, 0xd8, 0xd1, 0xaa, 0x3c, + 0xf6, 0x3e, 0xa4, 0x70, 0x8c, 0x5e, 0x70, 0xa4, + 0xb3, 0x6b, 0x66, 0x73, 0xd3, 0xbf, 0x31, 0x06, + 0x19, 0x62, 0x93, 0x15, 0xf2, 0x86, 0xe4, 0x52, + 0x7e, 0x53, 0x4c, 0x12, 0x38, 0xcc, 0x34, 0x7d, + 0x57, 0xf6, 0x42, 0x93, 0x8a, 0xc4, 0xee, 0x5c, + 0x8a, 0xe1, 0x52, 0x8f, 0x56, 0x64, 0xf6, 0xa6, + 0xd1, 0x91, 0x57, 0x70, 0xcd, 0x11, 0x76, 0xf5, + 0x59, 0x60, 0x60, 0x3c, 0xc1, 0xc3, 0x0b, 0x7f, + 0x58, 0x1a, 0x50, 0x91, 0xf1, 0x68, 0x8f, 0x6e, + 0x74, 0x74, 0xa8, 0x51, 0x0b, 0xf7, 0x7a, 0x98, + 0x37, 0xf2, 0x0a, 0x0e, 0xa4, 0x97, 0x04, 0xb8, + 0x9b, 0xfd, 0xa0, 0xea, 0xf7, 0x0d, 0xe1, 0xdb, + 0x03, 0xf0, 0x31, 0x29, 0xf8, 0xdd, 0x6b, 0x8b, + 0x5d, 0xd8, 0x59, 0xa9, 0x29, 0xcf, 0x9a, 0x79, + 0x89, 0x19, 0x63, 0x46, 0x09, 0x79, 0x6a, 0x11, + 0xda, 0x63, 0x68, 0x48, 0x77, 0x23, 0xfb, 0x7d, + 0x3a, 0x43, 0xcb, 0x02, 0x3b, 0x7a, 0x6d, 0x10, + 0x2a, 0x9e, 0xac, 0xf1, 0xd4, 0x19, 0xf8, 0x23, + 0x64, 0x1d, 0x2c, 0x5f, 0xf2, 0xb0, 0x5c, 0x23, + 0x27, 0xf7, 0x27, 0x30, 0x16, 0x37, 0xb1, 0x90, + 0xab, 0x38, 0xfb, 0x55, 0xcd, 0x78, 0x58, 0xd4, + 0x7d, 0x43, 0xf6, 0x45, 0x5e, 0x55, 0x8d, 0xb1, + 0x02, 0x65, 0x58, 0xb4, 0x13, 0x4b, 0x36, 0xf7, + 0xcc, 0xfe, 0x3d, 0x0b, 0x82, 0xe2, 0x12, 0x11, + 0xbb, 0xe6, 0xb8, 0x3a, 0x48, 0x71, 0xc7, 0x50, + 0x06, 0x16, 0x3a, 0xe6, 0x7c, 0x05, 0xc7, 0xc8, + 0x4d, 0x2f, 0x08, 0x6a, 0x17, 0x9a, 0x95, 0x97, + 0x50, 0x68, 0xdc, 0x28, 0x18, 0xc4, 0x61, 0x38, + 0xb9, 0xe0, 0x3e, 0x78, 0xdb, 0x29, 0xe0, 0x9f, + 0x52, 0xdd, 0xf8, 0x4f, 0x91, 0xc1, 0xd0, 0x33, + 0xa1, 0x7a, 0x8e, 0x30, 0x13, 0x82, 0x07, 0x9f, + 0xd3, 0x31, 0x0f, 0x23, 0xbe, 0x32, 0x5a, 0x75, + 0xcf, 0x96, 0xb2, 0xec, 0xb5, 0x32, 0xac, 0x21, + 0xd1, 0x82, 0x33, 0xd3, 0x15, 0x74, 0xbd, 0x90, + 0xf1, 0x2c, 0xe6, 0x5f, 0x8d, 0xe3, 0x02, 0xe8, + 0xe9, 0xc4, 0xca, 0x96, 0xeb, 0x0e, 0xbc, 0x91, + 0xf4, 0xb9, 0xea, 0xd9, 0x1b, 0x75, 0xbd, 0xe1, + 0xac, 0x2a, 0x05, 0x37, 0x52, 0x9b, 0x1b, 0x3f, + 0x5a, 0xdc, 0x21, 0xc3, 0x98, 0xbb, 0xaf, 0xa3, + 0xf2, 0x00, 0xbf, 0x0d, 0x30, 0x89, 0x05, 0xcc, + 0xa5, 0x76, 0xf5, 0x06, 0xf0, 0xc6, 0x54, 0x8a, + 0x5d, 0xd4, 0x1e, 0xc1, 0xf2, 0xce, 0xb0, 0x62, + 0xc8, 0xfc, 0x59, 0x42, 0x9a, 0x90, 0x60, 0x55, + 0xfe, 0x88, 0xa5, 0x8b, 0xb8, 0x33, 0x0c, 0x23, + 0x24, 0x0d, 0x15, 0x70, 0x37, 0x1e, 0x3d, 0xf6, + 0xd2, 0xea, 0x92, 0x10, 0xb2, 0xc4, 0x51, 0xac, + 0xf2, 0xac, 0xf3, 0x6b, 0x6c, 0xaa, 0xcf, 0x12, + 0xc5, 0x6c, 0x90, 0x50, 0xb5, 0x0c, 0xfc, 0x1a, + 0x15, 0x52, 0xe9, 0x26, 0xc6, 0x52, 0xa4, 0xe7, + 0x81, 0x69, 0xe1, 0xe7, 0x9e, 0x30, 0x01, 0xec, + 0x84, 0x89, 0xb2, 0x0d, 0x66, 0xdd, 0xce, 0x28, + 0x5c, 0xec, 0x98, 0x46, 0x68, 0x21, 0x9f, 0x88, + 0x3f, 0x1f, 0x42, 0x77, 0xce, 0xd0, 0x61, 0xd4, + 0x20, 0xa7, 0xff, 0x53, 0xad, 0x37, 0xd0, 0x17, + 0x35, 0xc9, 0xfc, 0xba, 0x0a, 0x78, 0x3f, 0xf2, + 0xcc, 0x86, 0x89, 0xe8, 0x4b, 0x3c, 0x48, 0x33, + 0x09, 0x7f, 0xc6, 0xc0, 0xdd, 0xb8, 0xfd, 0x7a, + 0x66, 0x66, 0x65, 0xeb, 0x47, 0xa7, 0x04, 0x28, + 0xa3, 0x19, 0x8e, 0xa9, 0xb1, 0x13, 0x67, 0x62, + 0x70, 0xcf, 0xd6 +}; +static const u8 dec_output012[] __initconst = { + 0x74, 0xa6, 0x3e, 0xe4, 0xb1, 0xcb, 0xaf, 0xb0, + 0x40, 0xe5, 0x0f, 0x9e, 0xf1, 0xf2, 0x89, 0xb5, + 0x42, 0x34, 0x8a, 0xa1, 0x03, 0xb7, 0xe9, 0x57, + 0x46, 0xbe, 0x20, 0xe4, 0x6e, 0xb0, 0xeb, 0xff, + 0xea, 0x07, 0x7e, 0xef, 0xe2, 0x55, 0x9f, 0xe5, + 0x78, 0x3a, 0xb7, 0x83, 0xc2, 0x18, 0x40, 0x7b, + 0xeb, 0xcd, 0x81, 0xfb, 0x90, 0x12, 0x9e, 0x46, + 0xa9, 0xd6, 0x4a, 0xba, 0xb0, 0x62, 0xdb, 0x6b, + 0x99, 0xc4, 0xdb, 0x54, 0x4b, 0xb8, 0xa5, 0x71, + 0xcb, 0xcd, 0x63, 0x32, 0x55, 0xfb, 0x31, 0xf0, + 0x38, 0xf5, 0xbe, 0x78, 0xe4, 0x45, 0xce, 0x1b, + 0x6a, 0x5b, 0x0e, 0xf4, 0x16, 0xe4, 0xb1, 0x3d, + 0xf6, 0x63, 0x7b, 0xa7, 0x0c, 0xde, 0x6f, 0x8f, + 0x74, 0xdf, 0xe0, 0x1e, 0x9d, 0xce, 0x8f, 0x24, + 0xef, 0x23, 0x35, 0x33, 0x7b, 0x83, 0x34, 0x23, + 0x58, 0x74, 0x14, 0x77, 0x1f, 0xc2, 0x4f, 0x4e, + 0xc6, 0x89, 0xf9, 0x52, 0x09, 0x37, 0x64, 0x14, + 0xc4, 0x01, 0x6b, 0x9d, 0x77, 0xe8, 0x90, 0x5d, + 0xa8, 0x4a, 0x2a, 0xef, 0x5c, 0x7f, 0xeb, 0xbb, + 0xb2, 0xc6, 0x93, 0x99, 0x66, 0xdc, 0x7f, 0xd4, + 0x9e, 0x2a, 0xca, 0x8d, 0xdb, 0xe7, 0x20, 0xcf, + 0xe4, 0x73, 0xae, 0x49, 0x7d, 0x64, 0x0f, 0x0e, + 0x28, 0x46, 0xa9, 0xa8, 0x32, 0xe4, 0x0e, 0xf6, + 0x51, 0x53, 0xb8, 0x3c, 0xb1, 0xff, 0xa3, 0x33, + 0x41, 0x75, 0xff, 0xf1, 0x6f, 0xf1, 0xfb, 0xbb, + 0x83, 0x7f, 0x06, 0x9b, 0xe7, 0x1b, 0x0a, 0xe0, + 0x5c, 0x33, 0x60, 0x5b, 0xdb, 0x5b, 0xed, 0xfe, + 0xa5, 0x16, 0x19, 0x72, 0xa3, 0x64, 0x23, 0x00, + 0x02, 0xc7, 0xf3, 0x6a, 0x81, 0x3e, 0x44, 0x1d, + 0x79, 0x15, 0x5f, 0x9a, 0xde, 0xe2, 0xfd, 0x1b, + 0x73, 0xc1, 0xbc, 0x23, 0xba, 0x31, 0xd2, 0x50, + 0xd5, 0xad, 0x7f, 0x74, 0xa7, 0xc9, 0xf8, 0x3e, + 0x2b, 0x26, 0x10, 0xf6, 0x03, 0x36, 0x74, 0xe4, + 0x0e, 0x6a, 0x72, 0xb7, 0x73, 0x0a, 0x42, 0x28, + 0xc2, 0xad, 0x5e, 0x03, 0xbe, 0xb8, 0x0b, 0xa8, + 0x5b, 0xd4, 0xb8, 0xba, 0x52, 0x89, 0xb1, 0x9b, + 0xc1, 0xc3, 0x65, 0x87, 0xed, 0xa5, 0xf4, 0x86, + 0xfd, 0x41, 0x80, 0x91, 0x27, 0x59, 0x53, 0x67, + 0x15, 0x78, 0x54, 0x8b, 0x2d, 0x3d, 0xc7, 0xff, + 0x02, 0x92, 0x07, 0x5f, 0x7a, 0x4b, 0x60, 0x59, + 0x3c, 0x6f, 0x5c, 0xd8, 0xec, 0x95, 0xd2, 0xfe, + 0xa0, 0x3b, 0xd8, 0x3f, 0xd1, 0x69, 0xa6, 0xd6, + 0x41, 0xb2, 0xf4, 0x4d, 0x12, 0xf4, 0x58, 0x3e, + 0x66, 0x64, 0x80, 0x31, 0x9b, 0xa8, 0x4c, 0x8b, + 0x07, 0xb2, 0xec, 0x66, 0x94, 0x66, 0x47, 0x50, + 0x50, 0x5f, 0x18, 0x0b, 0x0e, 0xd6, 0xc0, 0x39, + 0x21, 0x13, 0x9e, 0x33, 0xbc, 0x79, 0x36, 0x02, + 0x96, 0x70, 0xf0, 0x48, 0x67, 0x2f, 0x26, 0xe9, + 0x6d, 0x10, 0xbb, 0xd6, 0x3f, 0xd1, 0x64, 0x7a, + 0x2e, 0xbe, 0x0c, 0x61, 0xf0, 0x75, 0x42, 0x38, + 0x23, 0xb1, 0x9e, 0x9f, 0x7c, 0x67, 0x66, 0xd9, + 0x58, 0x9a, 0xf1, 0xbb, 0x41, 0x2a, 0x8d, 0x65, + 0x84, 0x94, 0xfc, 0xdc, 0x6a, 0x50, 0x64, 0xdb, + 0x56, 0x33, 0x76, 0x00, 0x10, 0xed, 0xbe, 0xd2, + 0x12, 0xf6, 0xf6, 0x1b, 0xa2, 0x16, 0xde, 0xae, + 0x31, 0x95, 0xdd, 0xb1, 0x08, 0x7e, 0x4e, 0xee, + 0xe7, 0xf9, 0xa5, 0xfb, 0x5b, 0x61, 0x43, 0x00, + 0x40, 0xf6, 0x7e, 0x02, 0x04, 0x32, 0x4e, 0x0c, + 0xe2, 0x66, 0x0d, 0xd7, 0x07, 0x98, 0x0e, 0xf8, + 0x72, 0x34, 0x6d, 0x95, 0x86, 0xd7, 0xcb, 0x31, + 0x54, 0x47, 0xd0, 0x38, 0x29, 0x9c, 0x5a, 0x68, + 0xd4, 0x87, 0x76, 0xc9, 0xe7, 0x7e, 0xe3, 0xf4, + 0x81, 0x6d, 0x18, 0xcb, 0xc9, 0x05, 0xaf, 0xa0, + 0xfb, 0x66, 0xf7, 0xf1, 0x1c, 0xc6, 0x14, 0x11, + 0x4f, 0x2b, 0x79, 0x42, 0x8b, 0xbc, 0xac, 0xe7, + 0x6c, 0xfe, 0x0f, 0x58, 0xe7, 0x7c, 0x78, 0x39, + 0x30, 0xb0, 0x66, 0x2c, 0x9b, 0x6d, 0x3a, 0xe1, + 0xcf, 0xc9, 0xa4, 0x0e, 0x6d, 0x6d, 0x8a, 0xa1, + 0x3a, 0xe7, 0x28, 0xd4, 0x78, 0x4c, 0xa6, 0xa2, + 0x2a, 0xa6, 0x03, 0x30, 0xd7, 0xa8, 0x25, 0x66, + 0x87, 0x2f, 0x69, 0x5c, 0x4e, 0xdd, 0xa5, 0x49, + 0x5d, 0x37, 0x4a, 0x59, 0xc4, 0xaf, 0x1f, 0xa2, + 0xe4, 0xf8, 0xa6, 0x12, 0x97, 0xd5, 0x79, 0xf5, + 0xe2, 0x4a, 0x2b, 0x5f, 0x61, 0xe4, 0x9e, 0xe3, + 0xee, 0xb8, 0xa7, 0x5b, 0x2f, 0xf4, 0x9e, 0x6c, + 0xfb, 0xd1, 0xc6, 0x56, 0x77, 0xba, 0x75, 0xaa, + 0x3d, 0x1a, 0xa8, 0x0b, 0xb3, 0x68, 0x24, 0x00, + 0x10, 0x7f, 0xfd, 0xd7, 0xa1, 0x8d, 0x83, 0x54, + 0x4f, 0x1f, 0xd8, 0x2a, 0xbe, 0x8a, 0x0c, 0x87, + 0xab, 0xa2, 0xde, 0xc3, 0x39, 0xbf, 0x09, 0x03, + 0xa5, 0xf3, 0x05, 0x28, 0xe1, 0xe1, 0xee, 0x39, + 0x70, 0x9c, 0xd8, 0x81, 0x12, 0x1e, 0x02, 0x40, + 0xd2, 0x6e, 0xf0, 0xeb, 0x1b, 0x3d, 0x22, 0xc6, + 0xe5, 0xe3, 0xb4, 0x5a, 0x98, 0xbb, 0xf0, 0x22, + 0x28, 0x8d, 0xe5, 0xd3, 0x16, 0x48, 0x24, 0xa5, + 0xe6, 0x66, 0x0c, 0xf9, 0x08, 0xf9, 0x7e, 0x1e, + 0xe1, 0x28, 0x26, 0x22, 0xc7, 0xc7, 0x0a, 0x32, + 0x47, 0xfa, 0xa3, 0xbe, 0x3c, 0xc4, 0xc5, 0x53, + 0x0a, 0xd5, 0x94, 0x4a, 0xd7, 0x93, 0xd8, 0x42, + 0x99, 0xb9, 0x0a, 0xdb, 0x56, 0xf7, 0xb9, 0x1c, + 0x53, 0x4f, 0xfa, 0xd3, 0x74, 0xad, 0xd9, 0x68, + 0xf1, 0x1b, 0xdf, 0x61, 0xc6, 0x5e, 0xa8, 0x48, + 0xfc, 0xd4, 0x4a, 0x4c, 0x3c, 0x32, 0xf7, 0x1c, + 0x96, 0x21, 0x9b, 0xf9, 0xa3, 0xcc, 0x5a, 0xce, + 0xd5, 0xd7, 0x08, 0x24, 0xf6, 0x1c, 0xfd, 0xdd, + 0x38, 0xc2, 0x32, 0xe9, 0xb8, 0xe7, 0xb6, 0xfa, + 0x9d, 0x45, 0x13, 0x2c, 0x83, 0xfd, 0x4a, 0x69, + 0x82, 0xcd, 0xdc, 0xb3, 0x76, 0x0c, 0x9e, 0xd8, + 0xf4, 0x1b, 0x45, 0x15, 0xb4, 0x97, 0xe7, 0x58, + 0x34, 0xe2, 0x03, 0x29, 0x5a, 0xbf, 0xb6, 0xe0, + 0x5d, 0x13, 0xd9, 0x2b, 0xb4, 0x80, 0xb2, 0x45, + 0x81, 0x6a, 0x2e, 0x6c, 0x89, 0x7d, 0xee, 0xbb, + 0x52, 0xdd, 0x1f, 0x18, 0xe7, 0x13, 0x6b, 0x33, + 0x0e, 0xea, 0x36, 0x92, 0x77, 0x7b, 0x6d, 0x9c, + 0x5a, 0x5f, 0x45, 0x7b, 0x7b, 0x35, 0x62, 0x23, + 0xd1, 0xbf, 0x0f, 0xd0, 0x08, 0x1b, 0x2b, 0x80, + 0x6b, 0x7e, 0xf1, 0x21, 0x47, 0xb0, 0x57, 0xd1, + 0x98, 0x72, 0x90, 0x34, 0x1c, 0x20, 0x04, 0xff, + 0x3d, 0x5c, 0xee, 0x0e, 0x57, 0x5f, 0x6f, 0x24, + 0x4e, 0x3c, 0xea, 0xfc, 0xa5, 0xa9, 0x83, 0xc9, + 0x61, 0xb4, 0x51, 0x24, 0xf8, 0x27, 0x5e, 0x46, + 0x8c, 0xb1, 0x53, 0x02, 0x96, 0x35, 0xba, 0xb8, + 0x4c, 0x71, 0xd3, 0x15, 0x59, 0x35, 0x22, 0x20, + 0xad, 0x03, 0x9f, 0x66, 0x44, 0x3b, 0x9c, 0x35, + 0x37, 0x1f, 0x9b, 0xbb, 0xf3, 0xdb, 0x35, 0x63, + 0x30, 0x64, 0xaa, 0xa2, 0x06, 0xa8, 0x5d, 0xbb, + 0xe1, 0x9f, 0x70, 0xec, 0x82, 0x11, 0x06, 0x36, + 0xec, 0x8b, 0x69, 0x66, 0x24, 0x44, 0xc9, 0x4a, + 0x57, 0xbb, 0x9b, 0x78, 0x13, 0xce, 0x9c, 0x0c, + 0xba, 0x92, 0x93, 0x63, 0xb8, 0xe2, 0x95, 0x0f, + 0x0f, 0x16, 0x39, 0x52, 0xfd, 0x3a, 0x6d, 0x02, + 0x4b, 0xdf, 0x13, 0xd3, 0x2a, 0x22, 0xb4, 0x03, + 0x7c, 0x54, 0x49, 0x96, 0x68, 0x54, 0x10, 0xfa, + 0xef, 0xaa, 0x6c, 0xe8, 0x22, 0xdc, 0x71, 0x16, + 0x13, 0x1a, 0xf6, 0x28, 0xe5, 0x6d, 0x77, 0x3d, + 0xcd, 0x30, 0x63, 0xb1, 0x70, 0x52, 0xa1, 0xc5, + 0x94, 0x5f, 0xcf, 0xe8, 0xb8, 0x26, 0x98, 0xf7, + 0x06, 0xa0, 0x0a, 0x70, 0xfa, 0x03, 0x80, 0xac, + 0xc1, 0xec, 0xd6, 0x4c, 0x54, 0xd7, 0xfe, 0x47, + 0xb6, 0x88, 0x4a, 0xf7, 0x71, 0x24, 0xee, 0xf3, + 0xd2, 0xc2, 0x4a, 0x7f, 0xfe, 0x61, 0xc7, 0x35, + 0xc9, 0x37, 0x67, 0xcb, 0x24, 0x35, 0xda, 0x7e, + 0xca, 0x5f, 0xf3, 0x8d, 0xd4, 0x13, 0x8e, 0xd6, + 0xcb, 0x4d, 0x53, 0x8f, 0x53, 0x1f, 0xc0, 0x74, + 0xf7, 0x53, 0xb9, 0x5e, 0x23, 0x37, 0xba, 0x6e, + 0xe3, 0x9d, 0x07, 0x55, 0x25, 0x7b, 0xe6, 0x2a, + 0x64, 0xd1, 0x32, 0xdd, 0x54, 0x1b, 0x4b, 0xc0, + 0xe1, 0xd7, 0x69, 0x58, 0xf8, 0x93, 0x29, 0xc4, + 0xdd, 0x23, 0x2f, 0xa5, 0xfc, 0x9d, 0x7e, 0xf8, + 0xd4, 0x90, 0xcd, 0x82, 0x55, 0xdc, 0x16, 0x16, + 0x9f, 0x07, 0x52, 0x9b, 0x9d, 0x25, 0xed, 0x32, + 0xc5, 0x7b, 0xdf, 0xf6, 0x83, 0x46, 0x3d, 0x65, + 0xb7, 0xef, 0x87, 0x7a, 0x12, 0x69, 0x8f, 0x06, + 0x7c, 0x51, 0x15, 0x4a, 0x08, 0xe8, 0xac, 0x9a, + 0x0c, 0x24, 0xa7, 0x27, 0xd8, 0x46, 0x2f, 0xe7, + 0x01, 0x0e, 0x1c, 0xc6, 0x91, 0xb0, 0x6e, 0x85, + 0x65, 0xf0, 0x29, 0x0d, 0x2e, 0x6b, 0x3b, 0xfb, + 0x4b, 0xdf, 0xe4, 0x80, 0x93, 0x03, 0x66, 0x46, + 0x3e, 0x8a, 0x6e, 0xf3, 0x5e, 0x4d, 0x62, 0x0e, + 0x49, 0x05, 0xaf, 0xd4, 0xf8, 0x21, 0x20, 0x61, + 0x1d, 0x39, 0x17, 0xf4, 0x61, 0x47, 0x95, 0xfb, + 0x15, 0x2e, 0xb3, 0x4f, 0xd0, 0x5d, 0xf5, 0x7d, + 0x40, 0xda, 0x90, 0x3c, 0x6b, 0xcb, 0x17, 0x00, + 0x13, 0x3b, 0x64, 0x34, 0x1b, 0xf0, 0xf2, 0xe5, + 0x3b, 0xb2, 0xc7, 0xd3, 0x5f, 0x3a, 0x44, 0xa6, + 0x9b, 0xb7, 0x78, 0x0e, 0x42, 0x5d, 0x4c, 0xc1, + 0xe9, 0xd2, 0xcb, 0xb7, 0x78, 0xd1, 0xfe, 0x9a, + 0xb5, 0x07, 0xe9, 0xe0, 0xbe, 0xe2, 0x8a, 0xa7, + 0x01, 0x83, 0x00, 0x8c, 0x5c, 0x08, 0xe6, 0x63, + 0x12, 0x92, 0xb7, 0xb7, 0xa6, 0x19, 0x7d, 0x38, + 0x13, 0x38, 0x92, 0x87, 0x24, 0xf9, 0x48, 0xb3, + 0x5e, 0x87, 0x6a, 0x40, 0x39, 0x5c, 0x3f, 0xed, + 0x8f, 0xee, 0xdb, 0x15, 0x82, 0x06, 0xda, 0x49, + 0x21, 0x2b, 0xb5, 0xbf, 0x32, 0x7c, 0x9f, 0x42, + 0x28, 0x63, 0xcf, 0xaf, 0x1e, 0xf8, 0xc6, 0xa0, + 0xd1, 0x02, 0x43, 0x57, 0x62, 0xec, 0x9b, 0x0f, + 0x01, 0x9e, 0x71, 0xd8, 0x87, 0x9d, 0x01, 0xc1, + 0x58, 0x77, 0xd9, 0xaf, 0xb1, 0x10, 0x7e, 0xdd, + 0xa6, 0x50, 0x96, 0xe5, 0xf0, 0x72, 0x00, 0x6d, + 0x4b, 0xf8, 0x2a, 0x8f, 0x19, 0xf3, 0x22, 0x88, + 0x11, 0x4a, 0x8b, 0x7c, 0xfd, 0xb7, 0xed, 0xe1, + 0xf6, 0x40, 0x39, 0xe0, 0xe9, 0xf6, 0x3d, 0x25, + 0xe6, 0x74, 0x3c, 0x58, 0x57, 0x7f, 0xe1, 0x22, + 0x96, 0x47, 0x31, 0x91, 0xba, 0x70, 0x85, 0x28, + 0x6b, 0x9f, 0x6e, 0x25, 0xac, 0x23, 0x66, 0x2f, + 0x29, 0x88, 0x28, 0xce, 0x8c, 0x5c, 0x88, 0x53, + 0xd1, 0x3b, 0xcc, 0x6a, 0x51, 0xb2, 0xe1, 0x28, + 0x3f, 0x91, 0xb4, 0x0d, 0x00, 0x3a, 0xe3, 0xf8, + 0xc3, 0x8f, 0xd7, 0x96, 0x62, 0x0e, 0x2e, 0xfc, + 0xc8, 0x6c, 0x77, 0xa6, 0x1d, 0x22, 0xc1, 0xb8, + 0xe6, 0x61, 0xd7, 0x67, 0x36, 0x13, 0x7b, 0xbb, + 0x9b, 0x59, 0x09, 0xa6, 0xdf, 0xf7, 0x6b, 0xa3, + 0x40, 0x1a, 0xf5, 0x4f, 0xb4, 0xda, 0xd3, 0xf3, + 0x81, 0x93, 0xc6, 0x18, 0xd9, 0x26, 0xee, 0xac, + 0xf0, 0xaa, 0xdf, 0xc5, 0x9c, 0xca, 0xc2, 0xa2, + 0xcc, 0x7b, 0x5c, 0x24, 0xb0, 0xbc, 0xd0, 0x6a, + 0x4d, 0x89, 0x09, 0xb8, 0x07, 0xfe, 0x87, 0xad, + 0x0a, 0xea, 0xb8, 0x42, 0xf9, 0x5e, 0xb3, 0x3e, + 0x36, 0x4c, 0xaf, 0x75, 0x9e, 0x1c, 0xeb, 0xbd, + 0xbc, 0xbb, 0x80, 0x40, 0xa7, 0x3a, 0x30, 0xbf, + 0xa8, 0x44, 0xf4, 0xeb, 0x38, 0xad, 0x29, 0xba, + 0x23, 0xed, 0x41, 0x0c, 0xea, 0xd2, 0xbb, 0x41, + 0x18, 0xd6, 0xb9, 0xba, 0x65, 0x2b, 0xa3, 0x91, + 0x6d, 0x1f, 0xa9, 0xf4, 0xd1, 0x25, 0x8d, 0x4d, + 0x38, 0xff, 0x64, 0xa0, 0xec, 0xde, 0xa6, 0xb6, + 0x79, 0xab, 0x8e, 0x33, 0x6c, 0x47, 0xde, 0xaf, + 0x94, 0xa4, 0xa5, 0x86, 0x77, 0x55, 0x09, 0x92, + 0x81, 0x31, 0x76, 0xc7, 0x34, 0x22, 0x89, 0x8e, + 0x3d, 0x26, 0x26, 0xd7, 0xfc, 0x1e, 0x16, 0x72, + 0x13, 0x33, 0x63, 0xd5, 0x22, 0xbe, 0xb8, 0x04, + 0x34, 0x84, 0x41, 0xbb, 0x80, 0xd0, 0x9f, 0x46, + 0x48, 0x07, 0xa7, 0xfc, 0x2b, 0x3a, 0x75, 0x55, + 0x8c, 0xc7, 0x6a, 0xbd, 0x7e, 0x46, 0x08, 0x84, + 0x0f, 0xd5, 0x74, 0xc0, 0x82, 0x8e, 0xaa, 0x61, + 0x05, 0x01, 0xb2, 0x47, 0x6e, 0x20, 0x6a, 0x2d, + 0x58, 0x70, 0x48, 0x32, 0xa7, 0x37, 0xd2, 0xb8, + 0x82, 0x1a, 0x51, 0xb9, 0x61, 0xdd, 0xfd, 0x9d, + 0x6b, 0x0e, 0x18, 0x97, 0xf8, 0x45, 0x5f, 0x87, + 0x10, 0xcf, 0x34, 0x72, 0x45, 0x26, 0x49, 0x70, + 0xe7, 0xa3, 0x78, 0xe0, 0x52, 0x89, 0x84, 0x94, + 0x83, 0x82, 0xc2, 0x69, 0x8f, 0xe3, 0xe1, 0x3f, + 0x60, 0x74, 0x88, 0xc4, 0xf7, 0x75, 0x2c, 0xfb, + 0xbd, 0xb6, 0xc4, 0x7e, 0x10, 0x0a, 0x6c, 0x90, + 0x04, 0x9e, 0xc3, 0x3f, 0x59, 0x7c, 0xce, 0x31, + 0x18, 0x60, 0x57, 0x73, 0x46, 0x94, 0x7d, 0x06, + 0xa0, 0x6d, 0x44, 0xec, 0xa2, 0x0a, 0x9e, 0x05, + 0x15, 0xef, 0xca, 0x5c, 0xbf, 0x00, 0xeb, 0xf7, + 0x3d, 0x32, 0xd4, 0xa5, 0xef, 0x49, 0x89, 0x5e, + 0x46, 0xb0, 0xa6, 0x63, 0x5b, 0x8a, 0x73, 0xae, + 0x6f, 0xd5, 0x9d, 0xf8, 0x4f, 0x40, 0xb5, 0xb2, + 0x6e, 0xd3, 0xb6, 0x01, 0xa9, 0x26, 0xa2, 0x21, + 0xcf, 0x33, 0x7a, 0x3a, 0xa4, 0x23, 0x13, 0xb0, + 0x69, 0x6a, 0xee, 0xce, 0xd8, 0x9d, 0x01, 0x1d, + 0x50, 0xc1, 0x30, 0x6c, 0xb1, 0xcd, 0xa0, 0xf0, + 0xf0, 0xa2, 0x64, 0x6f, 0xbb, 0xbf, 0x5e, 0xe6, + 0xab, 0x87, 0xb4, 0x0f, 0x4f, 0x15, 0xaf, 0xb5, + 0x25, 0xa1, 0xb2, 0xd0, 0x80, 0x2c, 0xfb, 0xf9, + 0xfe, 0xd2, 0x33, 0xbb, 0x76, 0xfe, 0x7c, 0xa8, + 0x66, 0xf7, 0xe7, 0x85, 0x9f, 0x1f, 0x85, 0x57, + 0x88, 0xe1, 0xe9, 0x63, 0xe4, 0xd8, 0x1c, 0xa1, + 0xfb, 0xda, 0x44, 0x05, 0x2e, 0x1d, 0x3a, 0x1c, + 0xff, 0xc8, 0x3b, 0xc0, 0xfe, 0xda, 0x22, 0x0b, + 0x43, 0xd6, 0x88, 0x39, 0x4c, 0x4a, 0xa6, 0x69, + 0x18, 0x93, 0x42, 0x4e, 0xb5, 0xcc, 0x66, 0x0d, + 0x09, 0xf8, 0x1e, 0x7c, 0xd3, 0x3c, 0x99, 0x0d, + 0x50, 0x1d, 0x62, 0xe9, 0x57, 0x06, 0xbf, 0x19, + 0x88, 0xdd, 0xad, 0x7b, 0x4f, 0xf9, 0xc7, 0x82, + 0x6d, 0x8d, 0xc8, 0xc4, 0xc5, 0x78, 0x17, 0x20, + 0x15, 0xc5, 0x52, 0x41, 0xcf, 0x5b, 0xd6, 0x7f, + 0x94, 0x02, 0x41, 0xe0, 0x40, 0x22, 0x03, 0x5e, + 0xd1, 0x53, 0xd4, 0x86, 0xd3, 0x2c, 0x9f, 0x0f, + 0x96, 0xe3, 0x6b, 0x9a, 0x76, 0x32, 0x06, 0x47, + 0x4b, 0x11, 0xb3, 0xdd, 0x03, 0x65, 0xbd, 0x9b, + 0x01, 0xda, 0x9c, 0xb9, 0x7e, 0x3f, 0x6a, 0xc4, + 0x7b, 0xea, 0xd4, 0x3c, 0xb9, 0xfb, 0x5c, 0x6b, + 0x64, 0x33, 0x52, 0xba, 0x64, 0x78, 0x8f, 0xa4, + 0xaf, 0x7a, 0x61, 0x8d, 0xbc, 0xc5, 0x73, 0xe9, + 0x6b, 0x58, 0x97, 0x4b, 0xbf, 0x63, 0x22, 0xd3, + 0x37, 0x02, 0x54, 0xc5, 0xb9, 0x16, 0x4a, 0xf0, + 0x19, 0xd8, 0x94, 0x57, 0xb8, 0x8a, 0xb3, 0x16, + 0x3b, 0xd0, 0x84, 0x8e, 0x67, 0xa6, 0xa3, 0x7d, + 0x78, 0xec, 0x00 +}; +static const u8 dec_assoc012[] __initconst = { + 0xb1, 0x69, 0x83, 0x87, 0x30, 0xaa, 0x5d, 0xb8, + 0x77, 0xe8, 0x21, 0xff, 0x06, 0x59, 0x35, 0xce, + 0x75, 0xfe, 0x38, 0xef, 0xb8, 0x91, 0x43, 0x8c, + 0xcf, 0x70, 0xdd, 0x0a, 0x68, 0xbf, 0xd4, 0xbc, + 0x16, 0x76, 0x99, 0x36, 0x1e, 0x58, 0x79, 0x5e, + 0xd4, 0x29, 0xf7, 0x33, 0x93, 0x48, 0xdb, 0x5f, + 0x01, 0xae, 0x9c, 0xb6, 0xe4, 0x88, 0x6d, 0x2b, + 0x76, 0x75, 0xe0, 0xf3, 0x74, 0xe2, 0xc9 +}; +static const u8 dec_nonce012[] __initconst = { + 0x05, 0xa3, 0x93, 0xed, 0x30, 0xc5, 0xa2, 0x06 +}; +static const u8 dec_key012[] __initconst = { + 0xb3, 0x35, 0x50, 0x03, 0x54, 0x2e, 0x40, 0x5e, + 0x8f, 0x59, 0x8e, 0xc5, 0x90, 0xd5, 0x27, 0x2d, + 0xba, 0x29, 0x2e, 0xcb, 0x1b, 0x70, 0x44, 0x1e, + 0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64 +}; + +static const u8 dec_input013[] __initconst = { + 0x52, 0x34, 0xb3, 0x65, 0x3b, 0xb7, 0xe5, 0xd3, + 0xab, 0x49, 0x17, 0x60, 0xd2, 0x52, 0x56, 0xdf, + 0xdf, 0x34, 0x56, 0x82, 0xe2, 0xbe, 0xe5, 0xe1, + 0x28, 0xd1, 0x4e, 0x5f, 0x4f, 0x01, 0x7d, 0x3f, + 0x99, 0x6b, 0x30, 0x6e, 0x1a, 0x7c, 0x4c, 0x8e, + 0x62, 0x81, 0xae, 0x86, 0x3f, 0x6b, 0xd0, 0xb5, + 0xa9, 0xcf, 0x50, 0xf1, 0x02, 0x12, 0xa0, 0x0b, + 0x24, 0xe9, 0xe6, 0x72, 0x89, 0x2c, 0x52, 0x1b, + 0x34, 0x38, 0xf8, 0x75, 0x5f, 0xa0, 0x74, 0xe2, + 0x99, 0xdd, 0xa6, 0x4b, 0x14, 0x50, 0x4e, 0xf1, + 0xbe, 0xd6, 0x9e, 0xdb, 0xb2, 0x24, 0x27, 0x74, + 0x12, 0x4a, 0x78, 0x78, 0x17, 0xa5, 0x58, 0x8e, + 0x2f, 0xf9, 0xf4, 0x8d, 0xee, 0x03, 0x88, 0xae, + 0xb8, 0x29, 0xa1, 0x2f, 0x4b, 0xee, 0x92, 0xbd, + 0x87, 0xb3, 0xce, 0x34, 0x21, 0x57, 0x46, 0x04, + 0x49, 0x0c, 0x80, 0xf2, 0x01, 0x13, 0xa1, 0x55, + 0xb3, 0xff, 0x44, 0x30, 0x3c, 0x1c, 0xd0, 0xef, + 0xbc, 0x18, 0x74, 0x26, 0xad, 0x41, 0x5b, 0x5b, + 0x3e, 0x9a, 0x7a, 0x46, 0x4f, 0x16, 0xd6, 0x74, + 0x5a, 0xb7, 0x3a, 0x28, 0x31, 0xd8, 0xae, 0x26, + 0xac, 0x50, 0x53, 0x86, 0xf2, 0x56, 0xd7, 0x3f, + 0x29, 0xbc, 0x45, 0x68, 0x8e, 0xcb, 0x98, 0x64, + 0xdd, 0xc9, 0xba, 0xb8, 0x4b, 0x7b, 0x82, 0xdd, + 0x14, 0xa7, 0xcb, 0x71, 0x72, 0x00, 0x5c, 0xad, + 0x7b, 0x6a, 0x89, 0xa4, 0x3d, 0xbf, 0xb5, 0x4b, + 0x3e, 0x7c, 0x5a, 0xcf, 0xb8, 0xa1, 0xc5, 0x6e, + 0xc8, 0xb6, 0x31, 0x57, 0x7b, 0xdf, 0xa5, 0x7e, + 0xb1, 0xd6, 0x42, 0x2a, 0x31, 0x36, 0xd1, 0xd0, + 0x3f, 0x7a, 0xe5, 0x94, 0xd6, 0x36, 0xa0, 0x6f, + 0xb7, 0x40, 0x7d, 0x37, 0xc6, 0x55, 0x7c, 0x50, + 0x40, 0x6d, 0x29, 0x89, 0xe3, 0x5a, 0xae, 0x97, + 0xe7, 0x44, 0x49, 0x6e, 0xbd, 0x81, 0x3d, 0x03, + 0x93, 0x06, 0x12, 0x06, 0xe2, 0x41, 0x12, 0x4a, + 0xf1, 0x6a, 0xa4, 0x58, 0xa2, 0xfb, 0xd2, 0x15, + 0xba, 0xc9, 0x79, 0xc9, 0xce, 0x5e, 0x13, 0xbb, + 0xf1, 0x09, 0x04, 0xcc, 0xfd, 0xe8, 0x51, 0x34, + 0x6a, 0xe8, 0x61, 0x88, 0xda, 0xed, 0x01, 0x47, + 0x84, 0xf5, 0x73, 0x25, 0xf9, 0x1c, 0x42, 0x86, + 0x07, 0xf3, 0x5b, 0x1a, 0x01, 0xb3, 0xeb, 0x24, + 0x32, 0x8d, 0xf6, 0xed, 0x7c, 0x4b, 0xeb, 0x3c, + 0x36, 0x42, 0x28, 0xdf, 0xdf, 0xb6, 0xbe, 0xd9, + 0x8c, 0x52, 0xd3, 0x2b, 0x08, 0x90, 0x8c, 0xe7, + 0x98, 0x31, 0xe2, 0x32, 0x8e, 0xfc, 0x11, 0x48, + 0x00, 0xa8, 0x6a, 0x42, 0x4a, 0x02, 0xc6, 0x4b, + 0x09, 0xf1, 0xe3, 0x49, 0xf3, 0x45, 0x1f, 0x0e, + 0xbc, 0x56, 0xe2, 0xe4, 0xdf, 0xfb, 0xeb, 0x61, + 0xfa, 0x24, 0xc1, 0x63, 0x75, 0xbb, 0x47, 0x75, + 0xaf, 0xe1, 0x53, 0x16, 0x96, 0x21, 0x85, 0x26, + 0x11, 0xb3, 0x76, 0xe3, 0x23, 0xa1, 0x6b, 0x74, + 0x37, 0xd0, 0xde, 0x06, 0x90, 0x71, 0x5d, 0x43, + 0x88, 0x9b, 0x00, 0x54, 0xa6, 0x75, 0x2f, 0xa1, + 0xc2, 0x0b, 0x73, 0x20, 0x1d, 0xb6, 0x21, 0x79, + 0x57, 0x3f, 0xfa, 0x09, 0xbe, 0x8a, 0x33, 0xc3, + 0x52, 0xf0, 0x1d, 0x82, 0x31, 0xd1, 0x55, 0xb5, + 0x6c, 0x99, 0x25, 0xcf, 0x5c, 0x32, 0xce, 0xe9, + 0x0d, 0xfa, 0x69, 0x2c, 0xd5, 0x0d, 0xc5, 0x6d, + 0x86, 0xd0, 0x0c, 0x3b, 0x06, 0x50, 0x79, 0xe8, + 0xc3, 0xae, 0x04, 0xe6, 0xcd, 0x51, 0xe4, 0x26, + 0x9b, 0x4f, 0x7e, 0xa6, 0x0f, 0xab, 0xd8, 0xe5, + 0xde, 0xa9, 0x00, 0x95, 0xbe, 0xa3, 0x9d, 0x5d, + 0xb2, 0x09, 0x70, 0x18, 0x1c, 0xf0, 0xac, 0x29, + 0x23, 0x02, 0x29, 0x28, 0xd2, 0x74, 0x35, 0x57, + 0x62, 0x0f, 0x24, 0xea, 0x5e, 0x33, 0xc2, 0x92, + 0xf3, 0x78, 0x4d, 0x30, 0x1e, 0xa1, 0x99, 0xa9, + 0x82, 0xb0, 0x42, 0x31, 0x8d, 0xad, 0x8a, 0xbc, + 0xfc, 0xd4, 0x57, 0x47, 0x3e, 0xb4, 0x50, 0xdd, + 0x6e, 0x2c, 0x80, 0x4d, 0x22, 0xf1, 0xfb, 0x57, + 0xc4, 0xdd, 0x17, 0xe1, 0x8a, 0x36, 0x4a, 0xb3, + 0x37, 0xca, 0xc9, 0x4e, 0xab, 0xd5, 0x69, 0xc4, + 0xf4, 0xbc, 0x0b, 0x3b, 0x44, 0x4b, 0x29, 0x9c, + 0xee, 0xd4, 0x35, 0x22, 0x21, 0xb0, 0x1f, 0x27, + 0x64, 0xa8, 0x51, 0x1b, 0xf0, 0x9f, 0x19, 0x5c, + 0xfb, 0x5a, 0x64, 0x74, 0x70, 0x45, 0x09, 0xf5, + 0x64, 0xfe, 0x1a, 0x2d, 0xc9, 0x14, 0x04, 0x14, + 0xcf, 0xd5, 0x7d, 0x60, 0xaf, 0x94, 0x39, 0x94, + 0xe2, 0x7d, 0x79, 0x82, 0xd0, 0x65, 0x3b, 0x6b, + 0x9c, 0x19, 0x84, 0xb4, 0x6d, 0xb3, 0x0c, 0x99, + 0xc0, 0x56, 0xa8, 0xbd, 0x73, 0xce, 0x05, 0x84, + 0x3e, 0x30, 0xaa, 0xc4, 0x9b, 0x1b, 0x04, 0x2a, + 0x9f, 0xd7, 0x43, 0x2b, 0x23, 0xdf, 0xbf, 0xaa, + 0xd5, 0xc2, 0x43, 0x2d, 0x70, 0xab, 0xdc, 0x75, + 0xad, 0xac, 0xf7, 0xc0, 0xbe, 0x67, 0xb2, 0x74, + 0xed, 0x67, 0x10, 0x4a, 0x92, 0x60, 0xc1, 0x40, + 0x50, 0x19, 0x8a, 0x8a, 0x8c, 0x09, 0x0e, 0x72, + 0xe1, 0x73, 0x5e, 0xe8, 0x41, 0x85, 0x63, 0x9f, + 0x3f, 0xd7, 0x7d, 0xc4, 0xfb, 0x22, 0x5d, 0x92, + 0x6c, 0xb3, 0x1e, 0xe2, 0x50, 0x2f, 0x82, 0xa8, + 0x28, 0xc0, 0xb5, 0xd7, 0x5f, 0x68, 0x0d, 0x2c, + 0x2d, 0xaf, 0x7e, 0xfa, 0x2e, 0x08, 0x0f, 0x1f, + 0x70, 0x9f, 0xe9, 0x19, 0x72, 0x55, 0xf8, 0xfb, + 0x51, 0xd2, 0x33, 0x5d, 0xa0, 0xd3, 0x2b, 0x0a, + 0x6c, 0xbc, 0x4e, 0xcf, 0x36, 0x4d, 0xdc, 0x3b, + 0xe9, 0x3e, 0x81, 0x7c, 0x61, 0xdb, 0x20, 0x2d, + 0x3a, 0xc3, 0xb3, 0x0c, 0x1e, 0x00, 0xb9, 0x7c, + 0xf5, 0xca, 0x10, 0x5f, 0x3a, 0x71, 0xb3, 0xe4, + 0x20, 0xdb, 0x0c, 0x2a, 0x98, 0x63, 0x45, 0x00, + 0x58, 0xf6, 0x68, 0xe4, 0x0b, 0xda, 0x13, 0x3b, + 0x60, 0x5c, 0x76, 0xdb, 0xb9, 0x97, 0x71, 0xe4, + 0xd9, 0xb7, 0xdb, 0xbd, 0x68, 0xc7, 0x84, 0x84, + 0xaa, 0x7c, 0x68, 0x62, 0x5e, 0x16, 0xfc, 0xba, + 0x72, 0xaa, 0x9a, 0xa9, 0xeb, 0x7c, 0x75, 0x47, + 0x97, 0x7e, 0xad, 0xe2, 0xd9, 0x91, 0xe8, 0xe4, + 0xa5, 0x31, 0xd7, 0x01, 0x8e, 0xa2, 0x11, 0x88, + 0x95, 0xb9, 0xf2, 0x9b, 0xd3, 0x7f, 0x1b, 0x81, + 0x22, 0xf7, 0x98, 0x60, 0x0a, 0x64, 0xa6, 0xc1, + 0xf6, 0x49, 0xc7, 0xe3, 0x07, 0x4d, 0x94, 0x7a, + 0xcf, 0x6e, 0x68, 0x0c, 0x1b, 0x3f, 0x6e, 0x2e, + 0xee, 0x92, 0xfa, 0x52, 0xb3, 0x59, 0xf8, 0xf1, + 0x8f, 0x6a, 0x66, 0xa3, 0x82, 0x76, 0x4a, 0x07, + 0x1a, 0xc7, 0xdd, 0xf5, 0xda, 0x9c, 0x3c, 0x24, + 0xbf, 0xfd, 0x42, 0xa1, 0x10, 0x64, 0x6a, 0x0f, + 0x89, 0xee, 0x36, 0xa5, 0xce, 0x99, 0x48, 0x6a, + 0xf0, 0x9f, 0x9e, 0x69, 0xa4, 0x40, 0x20, 0xe9, + 0x16, 0x15, 0xf7, 0xdb, 0x75, 0x02, 0xcb, 0xe9, + 0x73, 0x8b, 0x3b, 0x49, 0x2f, 0xf0, 0xaf, 0x51, + 0x06, 0x5c, 0xdf, 0x27, 0x27, 0x49, 0x6a, 0xd1, + 0xcc, 0xc7, 0xb5, 0x63, 0xb5, 0xfc, 0xb8, 0x5c, + 0x87, 0x7f, 0x84, 0xb4, 0xcc, 0x14, 0xa9, 0x53, + 0xda, 0xa4, 0x56, 0xf8, 0xb6, 0x1b, 0xcc, 0x40, + 0x27, 0x52, 0x06, 0x5a, 0x13, 0x81, 0xd7, 0x3a, + 0xd4, 0x3b, 0xfb, 0x49, 0x65, 0x31, 0x33, 0xb2, + 0xfa, 0xcd, 0xad, 0x58, 0x4e, 0x2b, 0xae, 0xd2, + 0x20, 0xfb, 0x1a, 0x48, 0xb4, 0x3f, 0x9a, 0xd8, + 0x7a, 0x35, 0x4a, 0xc8, 0xee, 0x88, 0x5e, 0x07, + 0x66, 0x54, 0xb9, 0xec, 0x9f, 0xa3, 0xe3, 0xb9, + 0x37, 0xaa, 0x49, 0x76, 0x31, 0xda, 0x74, 0x2d, + 0x3c, 0xa4, 0x65, 0x10, 0x32, 0x38, 0xf0, 0xde, + 0xd3, 0x99, 0x17, 0xaa, 0x71, 0xaa, 0x8f, 0x0f, + 0x8c, 0xaf, 0xa2, 0xf8, 0x5d, 0x64, 0xba, 0x1d, + 0xa3, 0xef, 0x96, 0x73, 0xe8, 0xa1, 0x02, 0x8d, + 0x0c, 0x6d, 0xb8, 0x06, 0x90, 0xb8, 0x08, 0x56, + 0x2c, 0xa7, 0x06, 0xc9, 0xc2, 0x38, 0xdb, 0x7c, + 0x63, 0xb1, 0x57, 0x8e, 0xea, 0x7c, 0x79, 0xf3, + 0x49, 0x1d, 0xfe, 0x9f, 0xf3, 0x6e, 0xb1, 0x1d, + 0xba, 0x19, 0x80, 0x1a, 0x0a, 0xd3, 0xb0, 0x26, + 0x21, 0x40, 0xb1, 0x7c, 0xf9, 0x4d, 0x8d, 0x10, + 0xc1, 0x7e, 0xf4, 0xf6, 0x3c, 0xa8, 0xfd, 0x7c, + 0xa3, 0x92, 0xb2, 0x0f, 0xaa, 0xcc, 0xa6, 0x11, + 0xfe, 0x04, 0xe3, 0xd1, 0x7a, 0x32, 0x89, 0xdf, + 0x0d, 0xc4, 0x8f, 0x79, 0x6b, 0xca, 0x16, 0x7c, + 0x6e, 0xf9, 0xad, 0x0f, 0xf6, 0xfe, 0x27, 0xdb, + 0xc4, 0x13, 0x70, 0xf1, 0x62, 0x1a, 0x4f, 0x79, + 0x40, 0xc9, 0x9b, 0x8b, 0x21, 0xea, 0x84, 0xfa, + 0xf5, 0xf1, 0x89, 0xce, 0xb7, 0x55, 0x0a, 0x80, + 0x39, 0x2f, 0x55, 0x36, 0x16, 0x9c, 0x7b, 0x08, + 0xbd, 0x87, 0x0d, 0xa5, 0x32, 0xf1, 0x52, 0x7c, + 0xe8, 0x55, 0x60, 0x5b, 0xd7, 0x69, 0xe4, 0xfc, + 0xfa, 0x12, 0x85, 0x96, 0xea, 0x50, 0x28, 0xab, + 0x8a, 0xf7, 0xbb, 0x0e, 0x53, 0x74, 0xca, 0xa6, + 0x27, 0x09, 0xc2, 0xb5, 0xde, 0x18, 0x14, 0xd9, + 0xea, 0xe5, 0x29, 0x1c, 0x40, 0x56, 0xcf, 0xd7, + 0xae, 0x05, 0x3f, 0x65, 0xaf, 0x05, 0x73, 0xe2, + 0x35, 0x96, 0x27, 0x07, 0x14, 0xc0, 0xad, 0x33, + 0xf1, 0xdc, 0x44, 0x7a, 0x89, 0x17, 0x77, 0xd2, + 0x9c, 0x58, 0x60, 0xf0, 0x3f, 0x7b, 0x2d, 0x2e, + 0x57, 0x95, 0x54, 0x87, 0xed, 0xf2, 0xc7, 0x4c, + 0xf0, 0xae, 0x56, 0x29, 0x19, 0x7d, 0x66, 0x4b, + 0x9b, 0x83, 0x84, 0x42, 0x3b, 0x01, 0x25, 0x66, + 0x8e, 0x02, 0xde, 0xb9, 0x83, 0x54, 0x19, 0xf6, + 0x9f, 0x79, 0x0d, 0x67, 0xc5, 0x1d, 0x7a, 0x44, + 0x02, 0x98, 0xa7, 0x16, 0x1c, 0x29, 0x0d, 0x74, + 0xff, 0x85, 0x40, 0x06, 0xef, 0x2c, 0xa9, 0xc6, + 0xf5, 0x53, 0x07, 0x06, 0xae, 0xe4, 0xfa, 0x5f, + 0xd8, 0x39, 0x4d, 0xf1, 0x9b, 0x6b, 0xd9, 0x24, + 0x84, 0xfe, 0x03, 0x4c, 0xb2, 0x3f, 0xdf, 0xa1, + 0x05, 0x9e, 0x50, 0x14, 0x5a, 0xd9, 0x1a, 0xa2, + 0xa7, 0xfa, 0xfa, 0x17, 0xf7, 0x78, 0xd6, 0xb5, + 0x92, 0x61, 0x91, 0xac, 0x36, 0xfa, 0x56, 0x0d, + 0x38, 0x32, 0x18, 0x85, 0x08, 0x58, 0x37, 0xf0, + 0x4b, 0xdb, 0x59, 0xe7, 0xa4, 0x34, 0xc0, 0x1b, + 0x01, 0xaf, 0x2d, 0xde, 0xa1, 0xaa, 0x5d, 0xd3, + 0xec, 0xe1, 0xd4, 0xf7, 0xe6, 0x54, 0x68, 0xf0, + 0x51, 0x97, 0xa7, 0x89, 0xea, 0x24, 0xad, 0xd3, + 0x6e, 0x47, 0x93, 0x8b, 0x4b, 0xb4, 0xf7, 0x1c, + 0x42, 0x06, 0x67, 0xe8, 0x99, 0xf6, 0xf5, 0x7b, + 0x85, 0xb5, 0x65, 0xb5, 0xb5, 0xd2, 0x37, 0xf5, + 0xf3, 0x02, 0xa6, 0x4d, 0x11, 0xa7, 0xdc, 0x51, + 0x09, 0x7f, 0xa0, 0xd8, 0x88, 0x1c, 0x13, 0x71, + 0xae, 0x9c, 0xb7, 0x7b, 0x34, 0xd6, 0x4e, 0x68, + 0x26, 0x83, 0x51, 0xaf, 0x1d, 0xee, 0x8b, 0xbb, + 0x69, 0x43, 0x2b, 0x9e, 0x8a, 0xbc, 0x02, 0x0e, + 0xa0, 0x1b, 0xe0, 0xa8, 0x5f, 0x6f, 0xaf, 0x1b, + 0x8f, 0xe7, 0x64, 0x71, 0x74, 0x11, 0x7e, 0xa8, + 0xd8, 0xf9, 0x97, 0x06, 0xc3, 0xb6, 0xfb, 0xfb, + 0xb7, 0x3d, 0x35, 0x9d, 0x3b, 0x52, 0xed, 0x54, + 0xca, 0xf4, 0x81, 0x01, 0x2d, 0x1b, 0xc3, 0xa7, + 0x00, 0x3d, 0x1a, 0x39, 0x54, 0xe1, 0xf6, 0xff, + 0xed, 0x6f, 0x0b, 0x5a, 0x68, 0xda, 0x58, 0xdd, + 0xa9, 0xcf, 0x5c, 0x4a, 0xe5, 0x09, 0x4e, 0xde, + 0x9d, 0xbc, 0x3e, 0xee, 0x5a, 0x00, 0x3b, 0x2c, + 0x87, 0x10, 0x65, 0x60, 0xdd, 0xd7, 0x56, 0xd1, + 0x4c, 0x64, 0x45, 0xe4, 0x21, 0xec, 0x78, 0xf8, + 0x25, 0x7a, 0x3e, 0x16, 0x5d, 0x09, 0x53, 0x14, + 0xbe, 0x4f, 0xae, 0x87, 0xd8, 0xd1, 0xaa, 0x3c, + 0xf6, 0x3e, 0xa4, 0x70, 0x8c, 0x5e, 0x70, 0xa4, + 0xb3, 0x6b, 0x66, 0x73, 0xd3, 0xbf, 0x31, 0x06, + 0x19, 0x62, 0x93, 0x15, 0xf2, 0x86, 0xe4, 0x52, + 0x7e, 0x53, 0x4c, 0x12, 0x38, 0xcc, 0x34, 0x7d, + 0x57, 0xf6, 0x42, 0x93, 0x8a, 0xc4, 0xee, 0x5c, + 0x8a, 0xe1, 0x52, 0x8f, 0x56, 0x64, 0xf6, 0xa6, + 0xd1, 0x91, 0x57, 0x70, 0xcd, 0x11, 0x76, 0xf5, + 0x59, 0x60, 0x60, 0x3c, 0xc1, 0xc3, 0x0b, 0x7f, + 0x58, 0x1a, 0x50, 0x91, 0xf1, 0x68, 0x8f, 0x6e, + 0x74, 0x74, 0xa8, 0x51, 0x0b, 0xf7, 0x7a, 0x98, + 0x37, 0xf2, 0x0a, 0x0e, 0xa4, 0x97, 0x04, 0xb8, + 0x9b, 0xfd, 0xa0, 0xea, 0xf7, 0x0d, 0xe1, 0xdb, + 0x03, 0xf0, 0x31, 0x29, 0xf8, 0xdd, 0x6b, 0x8b, + 0x5d, 0xd8, 0x59, 0xa9, 0x29, 0xcf, 0x9a, 0x79, + 0x89, 0x19, 0x63, 0x46, 0x09, 0x79, 0x6a, 0x11, + 0xda, 0x63, 0x68, 0x48, 0x77, 0x23, 0xfb, 0x7d, + 0x3a, 0x43, 0xcb, 0x02, 0x3b, 0x7a, 0x6d, 0x10, + 0x2a, 0x9e, 0xac, 0xf1, 0xd4, 0x19, 0xf8, 0x23, + 0x64, 0x1d, 0x2c, 0x5f, 0xf2, 0xb0, 0x5c, 0x23, + 0x27, 0xf7, 0x27, 0x30, 0x16, 0x37, 0xb1, 0x90, + 0xab, 0x38, 0xfb, 0x55, 0xcd, 0x78, 0x58, 0xd4, + 0x7d, 0x43, 0xf6, 0x45, 0x5e, 0x55, 0x8d, 0xb1, + 0x02, 0x65, 0x58, 0xb4, 0x13, 0x4b, 0x36, 0xf7, + 0xcc, 0xfe, 0x3d, 0x0b, 0x82, 0xe2, 0x12, 0x11, + 0xbb, 0xe6, 0xb8, 0x3a, 0x48, 0x71, 0xc7, 0x50, + 0x06, 0x16, 0x3a, 0xe6, 0x7c, 0x05, 0xc7, 0xc8, + 0x4d, 0x2f, 0x08, 0x6a, 0x17, 0x9a, 0x95, 0x97, + 0x50, 0x68, 0xdc, 0x28, 0x18, 0xc4, 0x61, 0x38, + 0xb9, 0xe0, 0x3e, 0x78, 0xdb, 0x29, 0xe0, 0x9f, + 0x52, 0xdd, 0xf8, 0x4f, 0x91, 0xc1, 0xd0, 0x33, + 0xa1, 0x7a, 0x8e, 0x30, 0x13, 0x82, 0x07, 0x9f, + 0xd3, 0x31, 0x0f, 0x23, 0xbe, 0x32, 0x5a, 0x75, + 0xcf, 0x96, 0xb2, 0xec, 0xb5, 0x32, 0xac, 0x21, + 0xd1, 0x82, 0x33, 0xd3, 0x15, 0x74, 0xbd, 0x90, + 0xf1, 0x2c, 0xe6, 0x5f, 0x8d, 0xe3, 0x02, 0xe8, + 0xe9, 0xc4, 0xca, 0x96, 0xeb, 0x0e, 0xbc, 0x91, + 0xf4, 0xb9, 0xea, 0xd9, 0x1b, 0x75, 0xbd, 0xe1, + 0xac, 0x2a, 0x05, 0x37, 0x52, 0x9b, 0x1b, 0x3f, + 0x5a, 0xdc, 0x21, 0xc3, 0x98, 0xbb, 0xaf, 0xa3, + 0xf2, 0x00, 0xbf, 0x0d, 0x30, 0x89, 0x05, 0xcc, + 0xa5, 0x76, 0xf5, 0x06, 0xf0, 0xc6, 0x54, 0x8a, + 0x5d, 0xd4, 0x1e, 0xc1, 0xf2, 0xce, 0xb0, 0x62, + 0xc8, 0xfc, 0x59, 0x42, 0x9a, 0x90, 0x60, 0x55, + 0xfe, 0x88, 0xa5, 0x8b, 0xb8, 0x33, 0x0c, 0x23, + 0x24, 0x0d, 0x15, 0x70, 0x37, 0x1e, 0x3d, 0xf6, + 0xd2, 0xea, 0x92, 0x10, 0xb2, 0xc4, 0x51, 0xac, + 0xf2, 0xac, 0xf3, 0x6b, 0x6c, 0xaa, 0xcf, 0x12, + 0xc5, 0x6c, 0x90, 0x50, 0xb5, 0x0c, 0xfc, 0x1a, + 0x15, 0x52, 0xe9, 0x26, 0xc6, 0x52, 0xa4, 0xe7, + 0x81, 0x69, 0xe1, 0xe7, 0x9e, 0x30, 0x01, 0xec, + 0x84, 0x89, 0xb2, 0x0d, 0x66, 0xdd, 0xce, 0x28, + 0x5c, 0xec, 0x98, 0x46, 0x68, 0x21, 0x9f, 0x88, + 0x3f, 0x1f, 0x42, 0x77, 0xce, 0xd0, 0x61, 0xd4, + 0x20, 0xa7, 0xff, 0x53, 0xad, 0x37, 0xd0, 0x17, + 0x35, 0xc9, 0xfc, 0xba, 0x0a, 0x78, 0x3f, 0xf2, + 0xcc, 0x86, 0x89, 0xe8, 0x4b, 0x3c, 0x48, 0x33, + 0x09, 0x7f, 0xc6, 0xc0, 0xdd, 0xb8, 0xfd, 0x7a, + 0x66, 0x66, 0x65, 0xeb, 0x47, 0xa7, 0x04, 0x28, + 0xa3, 0x19, 0x8e, 0xa9, 0xb1, 0x13, 0x67, 0x62, + 0x70, 0xcf, 0xd7 +}; +static const u8 dec_output013[] __initconst = { + 0x74, 0xa6, 0x3e, 0xe4, 0xb1, 0xcb, 0xaf, 0xb0, + 0x40, 0xe5, 0x0f, 0x9e, 0xf1, 0xf2, 0x89, 0xb5, + 0x42, 0x34, 0x8a, 0xa1, 0x03, 0xb7, 0xe9, 0x57, + 0x46, 0xbe, 0x20, 0xe4, 0x6e, 0xb0, 0xeb, 0xff, + 0xea, 0x07, 0x7e, 0xef, 0xe2, 0x55, 0x9f, 0xe5, + 0x78, 0x3a, 0xb7, 0x83, 0xc2, 0x18, 0x40, 0x7b, + 0xeb, 0xcd, 0x81, 0xfb, 0x90, 0x12, 0x9e, 0x46, + 0xa9, 0xd6, 0x4a, 0xba, 0xb0, 0x62, 0xdb, 0x6b, + 0x99, 0xc4, 0xdb, 0x54, 0x4b, 0xb8, 0xa5, 0x71, + 0xcb, 0xcd, 0x63, 0x32, 0x55, 0xfb, 0x31, 0xf0, + 0x38, 0xf5, 0xbe, 0x78, 0xe4, 0x45, 0xce, 0x1b, + 0x6a, 0x5b, 0x0e, 0xf4, 0x16, 0xe4, 0xb1, 0x3d, + 0xf6, 0x63, 0x7b, 0xa7, 0x0c, 0xde, 0x6f, 0x8f, + 0x74, 0xdf, 0xe0, 0x1e, 0x9d, 0xce, 0x8f, 0x24, + 0xef, 0x23, 0x35, 0x33, 0x7b, 0x83, 0x34, 0x23, + 0x58, 0x74, 0x14, 0x77, 0x1f, 0xc2, 0x4f, 0x4e, + 0xc6, 0x89, 0xf9, 0x52, 0x09, 0x37, 0x64, 0x14, + 0xc4, 0x01, 0x6b, 0x9d, 0x77, 0xe8, 0x90, 0x5d, + 0xa8, 0x4a, 0x2a, 0xef, 0x5c, 0x7f, 0xeb, 0xbb, + 0xb2, 0xc6, 0x93, 0x99, 0x66, 0xdc, 0x7f, 0xd4, + 0x9e, 0x2a, 0xca, 0x8d, 0xdb, 0xe7, 0x20, 0xcf, + 0xe4, 0x73, 0xae, 0x49, 0x7d, 0x64, 0x0f, 0x0e, + 0x28, 0x46, 0xa9, 0xa8, 0x32, 0xe4, 0x0e, 0xf6, + 0x51, 0x53, 0xb8, 0x3c, 0xb1, 0xff, 0xa3, 0x33, + 0x41, 0x75, 0xff, 0xf1, 0x6f, 0xf1, 0xfb, 0xbb, + 0x83, 0x7f, 0x06, 0x9b, 0xe7, 0x1b, 0x0a, 0xe0, + 0x5c, 0x33, 0x60, 0x5b, 0xdb, 0x5b, 0xed, 0xfe, + 0xa5, 0x16, 0x19, 0x72, 0xa3, 0x64, 0x23, 0x00, + 0x02, 0xc7, 0xf3, 0x6a, 0x81, 0x3e, 0x44, 0x1d, + 0x79, 0x15, 0x5f, 0x9a, 0xde, 0xe2, 0xfd, 0x1b, + 0x73, 0xc1, 0xbc, 0x23, 0xba, 0x31, 0xd2, 0x50, + 0xd5, 0xad, 0x7f, 0x74, 0xa7, 0xc9, 0xf8, 0x3e, + 0x2b, 0x26, 0x10, 0xf6, 0x03, 0x36, 0x74, 0xe4, + 0x0e, 0x6a, 0x72, 0xb7, 0x73, 0x0a, 0x42, 0x28, + 0xc2, 0xad, 0x5e, 0x03, 0xbe, 0xb8, 0x0b, 0xa8, + 0x5b, 0xd4, 0xb8, 0xba, 0x52, 0x89, 0xb1, 0x9b, + 0xc1, 0xc3, 0x65, 0x87, 0xed, 0xa5, 0xf4, 0x86, + 0xfd, 0x41, 0x80, 0x91, 0x27, 0x59, 0x53, 0x67, + 0x15, 0x78, 0x54, 0x8b, 0x2d, 0x3d, 0xc7, 0xff, + 0x02, 0x92, 0x07, 0x5f, 0x7a, 0x4b, 0x60, 0x59, + 0x3c, 0x6f, 0x5c, 0xd8, 0xec, 0x95, 0xd2, 0xfe, + 0xa0, 0x3b, 0xd8, 0x3f, 0xd1, 0x69, 0xa6, 0xd6, + 0x41, 0xb2, 0xf4, 0x4d, 0x12, 0xf4, 0x58, 0x3e, + 0x66, 0x64, 0x80, 0x31, 0x9b, 0xa8, 0x4c, 0x8b, + 0x07, 0xb2, 0xec, 0x66, 0x94, 0x66, 0x47, 0x50, + 0x50, 0x5f, 0x18, 0x0b, 0x0e, 0xd6, 0xc0, 0x39, + 0x21, 0x13, 0x9e, 0x33, 0xbc, 0x79, 0x36, 0x02, + 0x96, 0x70, 0xf0, 0x48, 0x67, 0x2f, 0x26, 0xe9, + 0x6d, 0x10, 0xbb, 0xd6, 0x3f, 0xd1, 0x64, 0x7a, + 0x2e, 0xbe, 0x0c, 0x61, 0xf0, 0x75, 0x42, 0x38, + 0x23, 0xb1, 0x9e, 0x9f, 0x7c, 0x67, 0x66, 0xd9, + 0x58, 0x9a, 0xf1, 0xbb, 0x41, 0x2a, 0x8d, 0x65, + 0x84, 0x94, 0xfc, 0xdc, 0x6a, 0x50, 0x64, 0xdb, + 0x56, 0x33, 0x76, 0x00, 0x10, 0xed, 0xbe, 0xd2, + 0x12, 0xf6, 0xf6, 0x1b, 0xa2, 0x16, 0xde, 0xae, + 0x31, 0x95, 0xdd, 0xb1, 0x08, 0x7e, 0x4e, 0xee, + 0xe7, 0xf9, 0xa5, 0xfb, 0x5b, 0x61, 0x43, 0x00, + 0x40, 0xf6, 0x7e, 0x02, 0x04, 0x32, 0x4e, 0x0c, + 0xe2, 0x66, 0x0d, 0xd7, 0x07, 0x98, 0x0e, 0xf8, + 0x72, 0x34, 0x6d, 0x95, 0x86, 0xd7, 0xcb, 0x31, + 0x54, 0x47, 0xd0, 0x38, 0x29, 0x9c, 0x5a, 0x68, + 0xd4, 0x87, 0x76, 0xc9, 0xe7, 0x7e, 0xe3, 0xf4, + 0x81, 0x6d, 0x18, 0xcb, 0xc9, 0x05, 0xaf, 0xa0, + 0xfb, 0x66, 0xf7, 0xf1, 0x1c, 0xc6, 0x14, 0x11, + 0x4f, 0x2b, 0x79, 0x42, 0x8b, 0xbc, 0xac, 0xe7, + 0x6c, 0xfe, 0x0f, 0x58, 0xe7, 0x7c, 0x78, 0x39, + 0x30, 0xb0, 0x66, 0x2c, 0x9b, 0x6d, 0x3a, 0xe1, + 0xcf, 0xc9, 0xa4, 0x0e, 0x6d, 0x6d, 0x8a, 0xa1, + 0x3a, 0xe7, 0x28, 0xd4, 0x78, 0x4c, 0xa6, 0xa2, + 0x2a, 0xa6, 0x03, 0x30, 0xd7, 0xa8, 0x25, 0x66, + 0x87, 0x2f, 0x69, 0x5c, 0x4e, 0xdd, 0xa5, 0x49, + 0x5d, 0x37, 0x4a, 0x59, 0xc4, 0xaf, 0x1f, 0xa2, + 0xe4, 0xf8, 0xa6, 0x12, 0x97, 0xd5, 0x79, 0xf5, + 0xe2, 0x4a, 0x2b, 0x5f, 0x61, 0xe4, 0x9e, 0xe3, + 0xee, 0xb8, 0xa7, 0x5b, 0x2f, 0xf4, 0x9e, 0x6c, + 0xfb, 0xd1, 0xc6, 0x56, 0x77, 0xba, 0x75, 0xaa, + 0x3d, 0x1a, 0xa8, 0x0b, 0xb3, 0x68, 0x24, 0x00, + 0x10, 0x7f, 0xfd, 0xd7, 0xa1, 0x8d, 0x83, 0x54, + 0x4f, 0x1f, 0xd8, 0x2a, 0xbe, 0x8a, 0x0c, 0x87, + 0xab, 0xa2, 0xde, 0xc3, 0x39, 0xbf, 0x09, 0x03, + 0xa5, 0xf3, 0x05, 0x28, 0xe1, 0xe1, 0xee, 0x39, + 0x70, 0x9c, 0xd8, 0x81, 0x12, 0x1e, 0x02, 0x40, + 0xd2, 0x6e, 0xf0, 0xeb, 0x1b, 0x3d, 0x22, 0xc6, + 0xe5, 0xe3, 0xb4, 0x5a, 0x98, 0xbb, 0xf0, 0x22, + 0x28, 0x8d, 0xe5, 0xd3, 0x16, 0x48, 0x24, 0xa5, + 0xe6, 0x66, 0x0c, 0xf9, 0x08, 0xf9, 0x7e, 0x1e, + 0xe1, 0x28, 0x26, 0x22, 0xc7, 0xc7, 0x0a, 0x32, + 0x47, 0xfa, 0xa3, 0xbe, 0x3c, 0xc4, 0xc5, 0x53, + 0x0a, 0xd5, 0x94, 0x4a, 0xd7, 0x93, 0xd8, 0x42, + 0x99, 0xb9, 0x0a, 0xdb, 0x56, 0xf7, 0xb9, 0x1c, + 0x53, 0x4f, 0xfa, 0xd3, 0x74, 0xad, 0xd9, 0x68, + 0xf1, 0x1b, 0xdf, 0x61, 0xc6, 0x5e, 0xa8, 0x48, + 0xfc, 0xd4, 0x4a, 0x4c, 0x3c, 0x32, 0xf7, 0x1c, + 0x96, 0x21, 0x9b, 0xf9, 0xa3, 0xcc, 0x5a, 0xce, + 0xd5, 0xd7, 0x08, 0x24, 0xf6, 0x1c, 0xfd, 0xdd, + 0x38, 0xc2, 0x32, 0xe9, 0xb8, 0xe7, 0xb6, 0xfa, + 0x9d, 0x45, 0x13, 0x2c, 0x83, 0xfd, 0x4a, 0x69, + 0x82, 0xcd, 0xdc, 0xb3, 0x76, 0x0c, 0x9e, 0xd8, + 0xf4, 0x1b, 0x45, 0x15, 0xb4, 0x97, 0xe7, 0x58, + 0x34, 0xe2, 0x03, 0x29, 0x5a, 0xbf, 0xb6, 0xe0, + 0x5d, 0x13, 0xd9, 0x2b, 0xb4, 0x80, 0xb2, 0x45, + 0x81, 0x6a, 0x2e, 0x6c, 0x89, 0x7d, 0xee, 0xbb, + 0x52, 0xdd, 0x1f, 0x18, 0xe7, 0x13, 0x6b, 0x33, + 0x0e, 0xea, 0x36, 0x92, 0x77, 0x7b, 0x6d, 0x9c, + 0x5a, 0x5f, 0x45, 0x7b, 0x7b, 0x35, 0x62, 0x23, + 0xd1, 0xbf, 0x0f, 0xd0, 0x08, 0x1b, 0x2b, 0x80, + 0x6b, 0x7e, 0xf1, 0x21, 0x47, 0xb0, 0x57, 0xd1, + 0x98, 0x72, 0x90, 0x34, 0x1c, 0x20, 0x04, 0xff, + 0x3d, 0x5c, 0xee, 0x0e, 0x57, 0x5f, 0x6f, 0x24, + 0x4e, 0x3c, 0xea, 0xfc, 0xa5, 0xa9, 0x83, 0xc9, + 0x61, 0xb4, 0x51, 0x24, 0xf8, 0x27, 0x5e, 0x46, + 0x8c, 0xb1, 0x53, 0x02, 0x96, 0x35, 0xba, 0xb8, + 0x4c, 0x71, 0xd3, 0x15, 0x59, 0x35, 0x22, 0x20, + 0xad, 0x03, 0x9f, 0x66, 0x44, 0x3b, 0x9c, 0x35, + 0x37, 0x1f, 0x9b, 0xbb, 0xf3, 0xdb, 0x35, 0x63, + 0x30, 0x64, 0xaa, 0xa2, 0x06, 0xa8, 0x5d, 0xbb, + 0xe1, 0x9f, 0x70, 0xec, 0x82, 0x11, 0x06, 0x36, + 0xec, 0x8b, 0x69, 0x66, 0x24, 0x44, 0xc9, 0x4a, + 0x57, 0xbb, 0x9b, 0x78, 0x13, 0xce, 0x9c, 0x0c, + 0xba, 0x92, 0x93, 0x63, 0xb8, 0xe2, 0x95, 0x0f, + 0x0f, 0x16, 0x39, 0x52, 0xfd, 0x3a, 0x6d, 0x02, + 0x4b, 0xdf, 0x13, 0xd3, 0x2a, 0x22, 0xb4, 0x03, + 0x7c, 0x54, 0x49, 0x96, 0x68, 0x54, 0x10, 0xfa, + 0xef, 0xaa, 0x6c, 0xe8, 0x22, 0xdc, 0x71, 0x16, + 0x13, 0x1a, 0xf6, 0x28, 0xe5, 0x6d, 0x77, 0x3d, + 0xcd, 0x30, 0x63, 0xb1, 0x70, 0x52, 0xa1, 0xc5, + 0x94, 0x5f, 0xcf, 0xe8, 0xb8, 0x26, 0x98, 0xf7, + 0x06, 0xa0, 0x0a, 0x70, 0xfa, 0x03, 0x80, 0xac, + 0xc1, 0xec, 0xd6, 0x4c, 0x54, 0xd7, 0xfe, 0x47, + 0xb6, 0x88, 0x4a, 0xf7, 0x71, 0x24, 0xee, 0xf3, + 0xd2, 0xc2, 0x4a, 0x7f, 0xfe, 0x61, 0xc7, 0x35, + 0xc9, 0x37, 0x67, 0xcb, 0x24, 0x35, 0xda, 0x7e, + 0xca, 0x5f, 0xf3, 0x8d, 0xd4, 0x13, 0x8e, 0xd6, + 0xcb, 0x4d, 0x53, 0x8f, 0x53, 0x1f, 0xc0, 0x74, + 0xf7, 0x53, 0xb9, 0x5e, 0x23, 0x37, 0xba, 0x6e, + 0xe3, 0x9d, 0x07, 0x55, 0x25, 0x7b, 0xe6, 0x2a, + 0x64, 0xd1, 0x32, 0xdd, 0x54, 0x1b, 0x4b, 0xc0, + 0xe1, 0xd7, 0x69, 0x58, 0xf8, 0x93, 0x29, 0xc4, + 0xdd, 0x23, 0x2f, 0xa5, 0xfc, 0x9d, 0x7e, 0xf8, + 0xd4, 0x90, 0xcd, 0x82, 0x55, 0xdc, 0x16, 0x16, + 0x9f, 0x07, 0x52, 0x9b, 0x9d, 0x25, 0xed, 0x32, + 0xc5, 0x7b, 0xdf, 0xf6, 0x83, 0x46, 0x3d, 0x65, + 0xb7, 0xef, 0x87, 0x7a, 0x12, 0x69, 0x8f, 0x06, + 0x7c, 0x51, 0x15, 0x4a, 0x08, 0xe8, 0xac, 0x9a, + 0x0c, 0x24, 0xa7, 0x27, 0xd8, 0x46, 0x2f, 0xe7, + 0x01, 0x0e, 0x1c, 0xc6, 0x91, 0xb0, 0x6e, 0x85, + 0x65, 0xf0, 0x29, 0x0d, 0x2e, 0x6b, 0x3b, 0xfb, + 0x4b, 0xdf, 0xe4, 0x80, 0x93, 0x03, 0x66, 0x46, + 0x3e, 0x8a, 0x6e, 0xf3, 0x5e, 0x4d, 0x62, 0x0e, + 0x49, 0x05, 0xaf, 0xd4, 0xf8, 0x21, 0x20, 0x61, + 0x1d, 0x39, 0x17, 0xf4, 0x61, 0x47, 0x95, 0xfb, + 0x15, 0x2e, 0xb3, 0x4f, 0xd0, 0x5d, 0xf5, 0x7d, + 0x40, 0xda, 0x90, 0x3c, 0x6b, 0xcb, 0x17, 0x00, + 0x13, 0x3b, 0x64, 0x34, 0x1b, 0xf0, 0xf2, 0xe5, + 0x3b, 0xb2, 0xc7, 0xd3, 0x5f, 0x3a, 0x44, 0xa6, + 0x9b, 0xb7, 0x78, 0x0e, 0x42, 0x5d, 0x4c, 0xc1, + 0xe9, 0xd2, 0xcb, 0xb7, 0x78, 0xd1, 0xfe, 0x9a, + 0xb5, 0x07, 0xe9, 0xe0, 0xbe, 0xe2, 0x8a, 0xa7, + 0x01, 0x83, 0x00, 0x8c, 0x5c, 0x08, 0xe6, 0x63, + 0x12, 0x92, 0xb7, 0xb7, 0xa6, 0x19, 0x7d, 0x38, + 0x13, 0x38, 0x92, 0x87, 0x24, 0xf9, 0x48, 0xb3, + 0x5e, 0x87, 0x6a, 0x40, 0x39, 0x5c, 0x3f, 0xed, + 0x8f, 0xee, 0xdb, 0x15, 0x82, 0x06, 0xda, 0x49, + 0x21, 0x2b, 0xb5, 0xbf, 0x32, 0x7c, 0x9f, 0x42, + 0x28, 0x63, 0xcf, 0xaf, 0x1e, 0xf8, 0xc6, 0xa0, + 0xd1, 0x02, 0x43, 0x57, 0x62, 0xec, 0x9b, 0x0f, + 0x01, 0x9e, 0x71, 0xd8, 0x87, 0x9d, 0x01, 0xc1, + 0x58, 0x77, 0xd9, 0xaf, 0xb1, 0x10, 0x7e, 0xdd, + 0xa6, 0x50, 0x96, 0xe5, 0xf0, 0x72, 0x00, 0x6d, + 0x4b, 0xf8, 0x2a, 0x8f, 0x19, 0xf3, 0x22, 0x88, + 0x11, 0x4a, 0x8b, 0x7c, 0xfd, 0xb7, 0xed, 0xe1, + 0xf6, 0x40, 0x39, 0xe0, 0xe9, 0xf6, 0x3d, 0x25, + 0xe6, 0x74, 0x3c, 0x58, 0x57, 0x7f, 0xe1, 0x22, + 0x96, 0x47, 0x31, 0x91, 0xba, 0x70, 0x85, 0x28, + 0x6b, 0x9f, 0x6e, 0x25, 0xac, 0x23, 0x66, 0x2f, + 0x29, 0x88, 0x28, 0xce, 0x8c, 0x5c, 0x88, 0x53, + 0xd1, 0x3b, 0xcc, 0x6a, 0x51, 0xb2, 0xe1, 0x28, + 0x3f, 0x91, 0xb4, 0x0d, 0x00, 0x3a, 0xe3, 0xf8, + 0xc3, 0x8f, 0xd7, 0x96, 0x62, 0x0e, 0x2e, 0xfc, + 0xc8, 0x6c, 0x77, 0xa6, 0x1d, 0x22, 0xc1, 0xb8, + 0xe6, 0x61, 0xd7, 0x67, 0x36, 0x13, 0x7b, 0xbb, + 0x9b, 0x59, 0x09, 0xa6, 0xdf, 0xf7, 0x6b, 0xa3, + 0x40, 0x1a, 0xf5, 0x4f, 0xb4, 0xda, 0xd3, 0xf3, + 0x81, 0x93, 0xc6, 0x18, 0xd9, 0x26, 0xee, 0xac, + 0xf0, 0xaa, 0xdf, 0xc5, 0x9c, 0xca, 0xc2, 0xa2, + 0xcc, 0x7b, 0x5c, 0x24, 0xb0, 0xbc, 0xd0, 0x6a, + 0x4d, 0x89, 0x09, 0xb8, 0x07, 0xfe, 0x87, 0xad, + 0x0a, 0xea, 0xb8, 0x42, 0xf9, 0x5e, 0xb3, 0x3e, + 0x36, 0x4c, 0xaf, 0x75, 0x9e, 0x1c, 0xeb, 0xbd, + 0xbc, 0xbb, 0x80, 0x40, 0xa7, 0x3a, 0x30, 0xbf, + 0xa8, 0x44, 0xf4, 0xeb, 0x38, 0xad, 0x29, 0xba, + 0x23, 0xed, 0x41, 0x0c, 0xea, 0xd2, 0xbb, 0x41, + 0x18, 0xd6, 0xb9, 0xba, 0x65, 0x2b, 0xa3, 0x91, + 0x6d, 0x1f, 0xa9, 0xf4, 0xd1, 0x25, 0x8d, 0x4d, + 0x38, 0xff, 0x64, 0xa0, 0xec, 0xde, 0xa6, 0xb6, + 0x79, 0xab, 0x8e, 0x33, 0x6c, 0x47, 0xde, 0xaf, + 0x94, 0xa4, 0xa5, 0x86, 0x77, 0x55, 0x09, 0x92, + 0x81, 0x31, 0x76, 0xc7, 0x34, 0x22, 0x89, 0x8e, + 0x3d, 0x26, 0x26, 0xd7, 0xfc, 0x1e, 0x16, 0x72, + 0x13, 0x33, 0x63, 0xd5, 0x22, 0xbe, 0xb8, 0x04, + 0x34, 0x84, 0x41, 0xbb, 0x80, 0xd0, 0x9f, 0x46, + 0x48, 0x07, 0xa7, 0xfc, 0x2b, 0x3a, 0x75, 0x55, + 0x8c, 0xc7, 0x6a, 0xbd, 0x7e, 0x46, 0x08, 0x84, + 0x0f, 0xd5, 0x74, 0xc0, 0x82, 0x8e, 0xaa, 0x61, + 0x05, 0x01, 0xb2, 0x47, 0x6e, 0x20, 0x6a, 0x2d, + 0x58, 0x70, 0x48, 0x32, 0xa7, 0x37, 0xd2, 0xb8, + 0x82, 0x1a, 0x51, 0xb9, 0x61, 0xdd, 0xfd, 0x9d, + 0x6b, 0x0e, 0x18, 0x97, 0xf8, 0x45, 0x5f, 0x87, + 0x10, 0xcf, 0x34, 0x72, 0x45, 0x26, 0x49, 0x70, + 0xe7, 0xa3, 0x78, 0xe0, 0x52, 0x89, 0x84, 0x94, + 0x83, 0x82, 0xc2, 0x69, 0x8f, 0xe3, 0xe1, 0x3f, + 0x60, 0x74, 0x88, 0xc4, 0xf7, 0x75, 0x2c, 0xfb, + 0xbd, 0xb6, 0xc4, 0x7e, 0x10, 0x0a, 0x6c, 0x90, + 0x04, 0x9e, 0xc3, 0x3f, 0x59, 0x7c, 0xce, 0x31, + 0x18, 0x60, 0x57, 0x73, 0x46, 0x94, 0x7d, 0x06, + 0xa0, 0x6d, 0x44, 0xec, 0xa2, 0x0a, 0x9e, 0x05, + 0x15, 0xef, 0xca, 0x5c, 0xbf, 0x00, 0xeb, 0xf7, + 0x3d, 0x32, 0xd4, 0xa5, 0xef, 0x49, 0x89, 0x5e, + 0x46, 0xb0, 0xa6, 0x63, 0x5b, 0x8a, 0x73, 0xae, + 0x6f, 0xd5, 0x9d, 0xf8, 0x4f, 0x40, 0xb5, 0xb2, + 0x6e, 0xd3, 0xb6, 0x01, 0xa9, 0x26, 0xa2, 0x21, + 0xcf, 0x33, 0x7a, 0x3a, 0xa4, 0x23, 0x13, 0xb0, + 0x69, 0x6a, 0xee, 0xce, 0xd8, 0x9d, 0x01, 0x1d, + 0x50, 0xc1, 0x30, 0x6c, 0xb1, 0xcd, 0xa0, 0xf0, + 0xf0, 0xa2, 0x64, 0x6f, 0xbb, 0xbf, 0x5e, 0xe6, + 0xab, 0x87, 0xb4, 0x0f, 0x4f, 0x15, 0xaf, 0xb5, + 0x25, 0xa1, 0xb2, 0xd0, 0x80, 0x2c, 0xfb, 0xf9, + 0xfe, 0xd2, 0x33, 0xbb, 0x76, 0xfe, 0x7c, 0xa8, + 0x66, 0xf7, 0xe7, 0x85, 0x9f, 0x1f, 0x85, 0x57, + 0x88, 0xe1, 0xe9, 0x63, 0xe4, 0xd8, 0x1c, 0xa1, + 0xfb, 0xda, 0x44, 0x05, 0x2e, 0x1d, 0x3a, 0x1c, + 0xff, 0xc8, 0x3b, 0xc0, 0xfe, 0xda, 0x22, 0x0b, + 0x43, 0xd6, 0x88, 0x39, 0x4c, 0x4a, 0xa6, 0x69, + 0x18, 0x93, 0x42, 0x4e, 0xb5, 0xcc, 0x66, 0x0d, + 0x09, 0xf8, 0x1e, 0x7c, 0xd3, 0x3c, 0x99, 0x0d, + 0x50, 0x1d, 0x62, 0xe9, 0x57, 0x06, 0xbf, 0x19, + 0x88, 0xdd, 0xad, 0x7b, 0x4f, 0xf9, 0xc7, 0x82, + 0x6d, 0x8d, 0xc8, 0xc4, 0xc5, 0x78, 0x17, 0x20, + 0x15, 0xc5, 0x52, 0x41, 0xcf, 0x5b, 0xd6, 0x7f, + 0x94, 0x02, 0x41, 0xe0, 0x40, 0x22, 0x03, 0x5e, + 0xd1, 0x53, 0xd4, 0x86, 0xd3, 0x2c, 0x9f, 0x0f, + 0x96, 0xe3, 0x6b, 0x9a, 0x76, 0x32, 0x06, 0x47, + 0x4b, 0x11, 0xb3, 0xdd, 0x03, 0x65, 0xbd, 0x9b, + 0x01, 0xda, 0x9c, 0xb9, 0x7e, 0x3f, 0x6a, 0xc4, + 0x7b, 0xea, 0xd4, 0x3c, 0xb9, 0xfb, 0x5c, 0x6b, + 0x64, 0x33, 0x52, 0xba, 0x64, 0x78, 0x8f, 0xa4, + 0xaf, 0x7a, 0x61, 0x8d, 0xbc, 0xc5, 0x73, 0xe9, + 0x6b, 0x58, 0x97, 0x4b, 0xbf, 0x63, 0x22, 0xd3, + 0x37, 0x02, 0x54, 0xc5, 0xb9, 0x16, 0x4a, 0xf0, + 0x19, 0xd8, 0x94, 0x57, 0xb8, 0x8a, 0xb3, 0x16, + 0x3b, 0xd0, 0x84, 0x8e, 0x67, 0xa6, 0xa3, 0x7d, + 0x78, 0xec, 0x00 +}; +static const u8 dec_assoc013[] __initconst = { + 0xb1, 0x69, 0x83, 0x87, 0x30, 0xaa, 0x5d, 0xb8, + 0x77, 0xe8, 0x21, 0xff, 0x06, 0x59, 0x35, 0xce, + 0x75, 0xfe, 0x38, 0xef, 0xb8, 0x91, 0x43, 0x8c, + 0xcf, 0x70, 0xdd, 0x0a, 0x68, 0xbf, 0xd4, 0xbc, + 0x16, 0x76, 0x99, 0x36, 0x1e, 0x58, 0x79, 0x5e, + 0xd4, 0x29, 0xf7, 0x33, 0x93, 0x48, 0xdb, 0x5f, + 0x01, 0xae, 0x9c, 0xb6, 0xe4, 0x88, 0x6d, 0x2b, + 0x76, 0x75, 0xe0, 0xf3, 0x74, 0xe2, 0xc9 +}; +static const u8 dec_nonce013[] __initconst = { + 0x05, 0xa3, 0x93, 0xed, 0x30, 0xc5, 0xa2, 0x06 +}; +static const u8 dec_key013[] __initconst = { + 0xb3, 0x35, 0x50, 0x03, 0x54, 0x2e, 0x40, 0x5e, + 0x8f, 0x59, 0x8e, 0xc5, 0x90, 0xd5, 0x27, 0x2d, + 0xba, 0x29, 0x2e, 0xcb, 0x1b, 0x70, 0x44, 0x1e, + 0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64 +}; + +static const struct chacha20poly1305_testvec +chacha20poly1305_dec_vectors[] __initconst = { + { dec_input001, dec_output001, dec_assoc001, dec_nonce001, dec_key001, + sizeof(dec_input001), sizeof(dec_assoc001), sizeof(dec_nonce001) }, + { dec_input002, dec_output002, dec_assoc002, dec_nonce002, dec_key002, + sizeof(dec_input002), sizeof(dec_assoc002), sizeof(dec_nonce002) }, + { dec_input003, dec_output003, dec_assoc003, dec_nonce003, dec_key003, + sizeof(dec_input003), sizeof(dec_assoc003), sizeof(dec_nonce003) }, + { dec_input004, dec_output004, dec_assoc004, dec_nonce004, dec_key004, + sizeof(dec_input004), sizeof(dec_assoc004), sizeof(dec_nonce004) }, + { dec_input005, dec_output005, dec_assoc005, dec_nonce005, dec_key005, + sizeof(dec_input005), sizeof(dec_assoc005), sizeof(dec_nonce005) }, + { dec_input006, dec_output006, dec_assoc006, dec_nonce006, dec_key006, + sizeof(dec_input006), sizeof(dec_assoc006), sizeof(dec_nonce006) }, + { dec_input007, dec_output007, dec_assoc007, dec_nonce007, dec_key007, + sizeof(dec_input007), sizeof(dec_assoc007), sizeof(dec_nonce007) }, + { dec_input008, dec_output008, dec_assoc008, dec_nonce008, dec_key008, + sizeof(dec_input008), sizeof(dec_assoc008), sizeof(dec_nonce008) }, + { dec_input009, dec_output009, dec_assoc009, dec_nonce009, dec_key009, + sizeof(dec_input009), sizeof(dec_assoc009), sizeof(dec_nonce009) }, + { dec_input010, dec_output010, dec_assoc010, dec_nonce010, dec_key010, + sizeof(dec_input010), sizeof(dec_assoc010), sizeof(dec_nonce010) }, + { dec_input011, dec_output011, dec_assoc011, dec_nonce011, dec_key011, + sizeof(dec_input011), sizeof(dec_assoc011), sizeof(dec_nonce011) }, + { dec_input012, dec_output012, dec_assoc012, dec_nonce012, dec_key012, + sizeof(dec_input012), sizeof(dec_assoc012), sizeof(dec_nonce012) }, + { dec_input013, dec_output013, dec_assoc013, dec_nonce013, dec_key013, + sizeof(dec_input013), sizeof(dec_assoc013), sizeof(dec_nonce013), + true } +}; + +static const u8 xenc_input001[] __initconst = { + 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74, + 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20, + 0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66, + 0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, + 0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69, + 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20, + 0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20, + 0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d, + 0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e, + 0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65, + 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, + 0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, + 0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f, + 0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64, + 0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65, + 0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, + 0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61, + 0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e, + 0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69, + 0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72, + 0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20, + 0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65, + 0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61, + 0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72, + 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, + 0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61, + 0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20, + 0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65, + 0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20, + 0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20, + 0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b, + 0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67, + 0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80, + 0x9d +}; +static const u8 xenc_output001[] __initconst = { + 0x1a, 0x6e, 0x3a, 0xd9, 0xfd, 0x41, 0x3f, 0x77, + 0x54, 0x72, 0x0a, 0x70, 0x9a, 0xa0, 0x29, 0x92, + 0x2e, 0xed, 0x93, 0xcf, 0x0f, 0x71, 0x88, 0x18, + 0x7a, 0x9d, 0x2d, 0x24, 0xe0, 0xf5, 0xea, 0x3d, + 0x55, 0x64, 0xd7, 0xad, 0x2a, 0x1a, 0x1f, 0x7e, + 0x86, 0x6d, 0xb0, 0xce, 0x80, 0x41, 0x72, 0x86, + 0x26, 0xee, 0x84, 0xd7, 0xef, 0x82, 0x9e, 0xe2, + 0x60, 0x9d, 0x5a, 0xfc, 0xf0, 0xe4, 0x19, 0x85, + 0xea, 0x09, 0xc6, 0xfb, 0xb3, 0xa9, 0x50, 0x09, + 0xec, 0x5e, 0x11, 0x90, 0xa1, 0xc5, 0x4e, 0x49, + 0xef, 0x50, 0xd8, 0x8f, 0xe0, 0x78, 0xd7, 0xfd, + 0xb9, 0x3b, 0xc9, 0xf2, 0x91, 0xc8, 0x25, 0xc8, + 0xa7, 0x63, 0x60, 0xce, 0x10, 0xcd, 0xc6, 0x7f, + 0xf8, 0x16, 0xf8, 0xe1, 0x0a, 0xd9, 0xde, 0x79, + 0x50, 0x33, 0xf2, 0x16, 0x0f, 0x17, 0xba, 0xb8, + 0x5d, 0xd8, 0xdf, 0x4e, 0x51, 0xa8, 0x39, 0xd0, + 0x85, 0xca, 0x46, 0x6a, 0x10, 0xa7, 0xa3, 0x88, + 0xef, 0x79, 0xb9, 0xf8, 0x24, 0xf3, 0xe0, 0x71, + 0x7b, 0x76, 0x28, 0x46, 0x3a, 0x3a, 0x1b, 0x91, + 0xb6, 0xd4, 0x3e, 0x23, 0xe5, 0x44, 0x15, 0xbf, + 0x60, 0x43, 0x9d, 0xa4, 0xbb, 0xd5, 0x5f, 0x89, + 0xeb, 0xef, 0x8e, 0xfd, 0xdd, 0xb4, 0x0d, 0x46, + 0xf0, 0x69, 0x23, 0x63, 0xae, 0x94, 0xf5, 0x5e, + 0xa5, 0xad, 0x13, 0x1c, 0x41, 0x76, 0xe6, 0x90, + 0xd6, 0x6d, 0xa2, 0x8f, 0x97, 0x4c, 0xa8, 0x0b, + 0xcf, 0x8d, 0x43, 0x2b, 0x9c, 0x9b, 0xc5, 0x58, + 0xa5, 0xb6, 0x95, 0x9a, 0xbf, 0x81, 0xc6, 0x54, + 0xc9, 0x66, 0x0c, 0xe5, 0x4f, 0x6a, 0x53, 0xa1, + 0xe5, 0x0c, 0xba, 0x31, 0xde, 0x34, 0x64, 0x73, + 0x8a, 0x3b, 0xbd, 0x92, 0x01, 0xdb, 0x71, 0x69, + 0xf3, 0x58, 0x99, 0xbc, 0xd1, 0xcb, 0x4a, 0x05, + 0xe2, 0x58, 0x9c, 0x25, 0x17, 0xcd, 0xdc, 0x83, + 0xb7, 0xff, 0xfb, 0x09, 0x61, 0xad, 0xbf, 0x13, + 0x5b, 0x5e, 0xed, 0x46, 0x82, 0x6f, 0x22, 0xd8, + 0x93, 0xa6, 0x85, 0x5b, 0x40, 0x39, 0x5c, 0xc5, + 0x9c +}; +static const u8 xenc_assoc001[] __initconst = { + 0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x4e, 0x91 +}; +static const u8 xenc_nonce001[] __initconst = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 +}; +static const u8 xenc_key001[] __initconst = { + 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a, + 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0, + 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09, + 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0 +}; + +static const struct chacha20poly1305_testvec +xchacha20poly1305_enc_vectors[] __initconst = { + { xenc_input001, xenc_output001, xenc_assoc001, xenc_nonce001, xenc_key001, + sizeof(xenc_input001), sizeof(xenc_assoc001), sizeof(xenc_nonce001) } +}; + +static const u8 xdec_input001[] __initconst = { + 0x1a, 0x6e, 0x3a, 0xd9, 0xfd, 0x41, 0x3f, 0x77, + 0x54, 0x72, 0x0a, 0x70, 0x9a, 0xa0, 0x29, 0x92, + 0x2e, 0xed, 0x93, 0xcf, 0x0f, 0x71, 0x88, 0x18, + 0x7a, 0x9d, 0x2d, 0x24, 0xe0, 0xf5, 0xea, 0x3d, + 0x55, 0x64, 0xd7, 0xad, 0x2a, 0x1a, 0x1f, 0x7e, + 0x86, 0x6d, 0xb0, 0xce, 0x80, 0x41, 0x72, 0x86, + 0x26, 0xee, 0x84, 0xd7, 0xef, 0x82, 0x9e, 0xe2, + 0x60, 0x9d, 0x5a, 0xfc, 0xf0, 0xe4, 0x19, 0x85, + 0xea, 0x09, 0xc6, 0xfb, 0xb3, 0xa9, 0x50, 0x09, + 0xec, 0x5e, 0x11, 0x90, 0xa1, 0xc5, 0x4e, 0x49, + 0xef, 0x50, 0xd8, 0x8f, 0xe0, 0x78, 0xd7, 0xfd, + 0xb9, 0x3b, 0xc9, 0xf2, 0x91, 0xc8, 0x25, 0xc8, + 0xa7, 0x63, 0x60, 0xce, 0x10, 0xcd, 0xc6, 0x7f, + 0xf8, 0x16, 0xf8, 0xe1, 0x0a, 0xd9, 0xde, 0x79, + 0x50, 0x33, 0xf2, 0x16, 0x0f, 0x17, 0xba, 0xb8, + 0x5d, 0xd8, 0xdf, 0x4e, 0x51, 0xa8, 0x39, 0xd0, + 0x85, 0xca, 0x46, 0x6a, 0x10, 0xa7, 0xa3, 0x88, + 0xef, 0x79, 0xb9, 0xf8, 0x24, 0xf3, 0xe0, 0x71, + 0x7b, 0x76, 0x28, 0x46, 0x3a, 0x3a, 0x1b, 0x91, + 0xb6, 0xd4, 0x3e, 0x23, 0xe5, 0x44, 0x15, 0xbf, + 0x60, 0x43, 0x9d, 0xa4, 0xbb, 0xd5, 0x5f, 0x89, + 0xeb, 0xef, 0x8e, 0xfd, 0xdd, 0xb4, 0x0d, 0x46, + 0xf0, 0x69, 0x23, 0x63, 0xae, 0x94, 0xf5, 0x5e, + 0xa5, 0xad, 0x13, 0x1c, 0x41, 0x76, 0xe6, 0x90, + 0xd6, 0x6d, 0xa2, 0x8f, 0x97, 0x4c, 0xa8, 0x0b, + 0xcf, 0x8d, 0x43, 0x2b, 0x9c, 0x9b, 0xc5, 0x58, + 0xa5, 0xb6, 0x95, 0x9a, 0xbf, 0x81, 0xc6, 0x54, + 0xc9, 0x66, 0x0c, 0xe5, 0x4f, 0x6a, 0x53, 0xa1, + 0xe5, 0x0c, 0xba, 0x31, 0xde, 0x34, 0x64, 0x73, + 0x8a, 0x3b, 0xbd, 0x92, 0x01, 0xdb, 0x71, 0x69, + 0xf3, 0x58, 0x99, 0xbc, 0xd1, 0xcb, 0x4a, 0x05, + 0xe2, 0x58, 0x9c, 0x25, 0x17, 0xcd, 0xdc, 0x83, + 0xb7, 0xff, 0xfb, 0x09, 0x61, 0xad, 0xbf, 0x13, + 0x5b, 0x5e, 0xed, 0x46, 0x82, 0x6f, 0x22, 0xd8, + 0x93, 0xa6, 0x85, 0x5b, 0x40, 0x39, 0x5c, 0xc5, + 0x9c +}; +static const u8 xdec_output001[] __initconst = { + 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74, + 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20, + 0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66, + 0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, + 0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69, + 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20, + 0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20, + 0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d, + 0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e, + 0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65, + 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, + 0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, + 0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f, + 0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64, + 0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65, + 0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, + 0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61, + 0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e, + 0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69, + 0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72, + 0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20, + 0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65, + 0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61, + 0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72, + 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, + 0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61, + 0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20, + 0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65, + 0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20, + 0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20, + 0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b, + 0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67, + 0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80, + 0x9d +}; +static const u8 xdec_assoc001[] __initconst = { + 0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x4e, 0x91 +}; +static const u8 xdec_nonce001[] __initconst = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 +}; +static const u8 xdec_key001[] __initconst = { + 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a, + 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0, + 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09, + 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0 +}; + +static const struct chacha20poly1305_testvec +xchacha20poly1305_dec_vectors[] __initconst = { + { xdec_input001, xdec_output001, xdec_assoc001, xdec_nonce001, xdec_key001, + sizeof(xdec_input001), sizeof(xdec_assoc001), sizeof(xdec_nonce001) } +}; + +static void __init +chacha20poly1305_selftest_encrypt_bignonce(u8 *dst, const u8 *src, + const size_t src_len, const u8 *ad, + const size_t ad_len, + const u8 nonce[12], + const u8 key[CHACHA20POLY1305_KEY_SIZE]) +{ + simd_context_t simd_context; + struct poly1305_ctx poly1305_state; + struct chacha20_ctx chacha20_state; + union { + u8 block0[POLY1305_KEY_SIZE]; + __le64 lens[2]; + } b = {{ 0 }}; + + simd_get(&simd_context); + chacha20_init(&chacha20_state, key, 0); + chacha20_state.counter[1] = get_unaligned_le32(nonce + 0); + chacha20_state.counter[2] = get_unaligned_le32(nonce + 4); + chacha20_state.counter[3] = get_unaligned_le32(nonce + 8); + chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), + &simd_context); + poly1305_init(&poly1305_state, b.block0); + poly1305_update(&poly1305_state, ad, ad_len, &simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, + &simd_context); + chacha20(&chacha20_state, dst, src, src_len, &simd_context); + poly1305_update(&poly1305_state, dst, src_len, &simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, + &simd_context); + b.lens[0] = cpu_to_le64(ad_len); + b.lens[1] = cpu_to_le64(src_len); + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), + &simd_context); + poly1305_final(&poly1305_state, dst + src_len, &simd_context); + simd_put(&simd_context); + memzero_explicit(&chacha20_state, sizeof(chacha20_state)); + memzero_explicit(&b, sizeof(b)); +} + +static void __init +chacha20poly1305_selftest_encrypt(u8 *dst, const u8 *src, const size_t src_len, + const u8 *ad, const size_t ad_len, + const u8 *nonce, const size_t nonce_len, + const u8 key[CHACHA20POLY1305_KEY_SIZE]) +{ + if (nonce_len == 8) + chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, + get_unaligned_le64(nonce), key); + else if (nonce_len == 12) + chacha20poly1305_selftest_encrypt_bignonce(dst, src, src_len, + ad, ad_len, nonce, + key); + else + BUG(); +} + +static bool __init +decryption_success(bool func_ret, bool expect_failure, int memcmp_result) +{ + if (expect_failure) + return !func_ret; + return func_ret && !memcmp_result; +} + +static bool __init chacha20poly1305_selftest(void) +{ + enum { MAXIMUM_TEST_BUFFER_LEN = 1UL << 12 }; + size_t i, j, k, total_len; + u8 *computed_output = NULL, *input = NULL; + bool success = true, ret; + simd_context_t simd_context; + struct scatterlist sg_src[3]; + + computed_output = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL); + input = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL); + if (!computed_output || !input) { + pr_err("chacha20poly1305 self-test malloc: FAIL\n"); + success = false; + goto out; + } + + for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) { + memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); + chacha20poly1305_selftest_encrypt(computed_output, + chacha20poly1305_enc_vectors[i].input, + chacha20poly1305_enc_vectors[i].ilen, + chacha20poly1305_enc_vectors[i].assoc, + chacha20poly1305_enc_vectors[i].alen, + chacha20poly1305_enc_vectors[i].nonce, + chacha20poly1305_enc_vectors[i].nlen, + chacha20poly1305_enc_vectors[i].key); + if (memcmp(computed_output, + chacha20poly1305_enc_vectors[i].output, + chacha20poly1305_enc_vectors[i].ilen + + POLY1305_MAC_SIZE)) { + pr_err("chacha20poly1305 encryption self-test %zu: FAIL\n", + i + 1); + success = false; + } + } + simd_get(&simd_context); + for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) { + if (chacha20poly1305_enc_vectors[i].nlen != 8) + continue; + memcpy(computed_output, chacha20poly1305_enc_vectors[i].input, + chacha20poly1305_enc_vectors[i].ilen); + sg_init_one(sg_src, computed_output, + chacha20poly1305_enc_vectors[i].ilen + + POLY1305_MAC_SIZE); + ret = chacha20poly1305_encrypt_sg_inplace(sg_src, + chacha20poly1305_enc_vectors[i].ilen, + chacha20poly1305_enc_vectors[i].assoc, + chacha20poly1305_enc_vectors[i].alen, + get_unaligned_le64(chacha20poly1305_enc_vectors[i].nonce), + chacha20poly1305_enc_vectors[i].key, + &simd_context); + if (!ret || memcmp(computed_output, + chacha20poly1305_enc_vectors[i].output, + chacha20poly1305_enc_vectors[i].ilen + + POLY1305_MAC_SIZE)) { + pr_err("chacha20poly1305 sg encryption self-test %zu: FAIL\n", + i + 1); + success = false; + } + } + simd_put(&simd_context); + for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) { + memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); + ret = chacha20poly1305_decrypt(computed_output, + chacha20poly1305_dec_vectors[i].input, + chacha20poly1305_dec_vectors[i].ilen, + chacha20poly1305_dec_vectors[i].assoc, + chacha20poly1305_dec_vectors[i].alen, + get_unaligned_le64(chacha20poly1305_dec_vectors[i].nonce), + chacha20poly1305_dec_vectors[i].key); + if (!decryption_success(ret, + chacha20poly1305_dec_vectors[i].failure, + memcmp(computed_output, + chacha20poly1305_dec_vectors[i].output, + chacha20poly1305_dec_vectors[i].ilen - + POLY1305_MAC_SIZE))) { + pr_err("chacha20poly1305 decryption self-test %zu: FAIL\n", + i + 1); + success = false; + } + } + simd_get(&simd_context); + for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) { + memcpy(computed_output, chacha20poly1305_dec_vectors[i].input, + chacha20poly1305_dec_vectors[i].ilen); + sg_init_one(sg_src, computed_output, + chacha20poly1305_dec_vectors[i].ilen); + ret = chacha20poly1305_decrypt_sg_inplace(sg_src, + chacha20poly1305_dec_vectors[i].ilen, + chacha20poly1305_dec_vectors[i].assoc, + chacha20poly1305_dec_vectors[i].alen, + get_unaligned_le64(chacha20poly1305_dec_vectors[i].nonce), + chacha20poly1305_dec_vectors[i].key, &simd_context); + if (!decryption_success(ret, + chacha20poly1305_dec_vectors[i].failure, + memcmp(computed_output, chacha20poly1305_dec_vectors[i].output, + chacha20poly1305_dec_vectors[i].ilen - + POLY1305_MAC_SIZE))) { + pr_err("chacha20poly1305 sg decryption self-test %zu: FAIL\n", + i + 1); + success = false; + } + } + simd_put(&simd_context); + for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_enc_vectors); ++i) { + memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); + xchacha20poly1305_encrypt(computed_output, + xchacha20poly1305_enc_vectors[i].input, + xchacha20poly1305_enc_vectors[i].ilen, + xchacha20poly1305_enc_vectors[i].assoc, + xchacha20poly1305_enc_vectors[i].alen, + xchacha20poly1305_enc_vectors[i].nonce, + xchacha20poly1305_enc_vectors[i].key); + if (memcmp(computed_output, + xchacha20poly1305_enc_vectors[i].output, + xchacha20poly1305_enc_vectors[i].ilen + + POLY1305_MAC_SIZE)) { + pr_err("xchacha20poly1305 encryption self-test %zu: FAIL\n", + i + 1); + success = false; + } + } + for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_dec_vectors); ++i) { + memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); + ret = xchacha20poly1305_decrypt(computed_output, + xchacha20poly1305_dec_vectors[i].input, + xchacha20poly1305_dec_vectors[i].ilen, + xchacha20poly1305_dec_vectors[i].assoc, + xchacha20poly1305_dec_vectors[i].alen, + xchacha20poly1305_dec_vectors[i].nonce, + xchacha20poly1305_dec_vectors[i].key); + if (!decryption_success(ret, + xchacha20poly1305_dec_vectors[i].failure, + memcmp(computed_output, + xchacha20poly1305_dec_vectors[i].output, + xchacha20poly1305_dec_vectors[i].ilen - + POLY1305_MAC_SIZE))) { + pr_err("xchacha20poly1305 decryption self-test %zu: FAIL\n", + i + 1); + success = false; + } + } + + simd_get(&simd_context); + for (total_len = POLY1305_MAC_SIZE; IS_ENABLED(DEBUG_CHACHA20POLY1305_SLOW_CHUNK_TEST) + && total_len <= 1 << 10; ++total_len) { + for (i = 0; i <= total_len; ++i) { + for (j = i; j <= total_len; ++j) { + sg_init_table(sg_src, 3); + sg_set_buf(&sg_src[0], input, i); + sg_set_buf(&sg_src[1], input + i, j - i); + sg_set_buf(&sg_src[2], input + j, total_len - j); + memset(computed_output, 0, total_len); + memset(input, 0, total_len); + + if (!chacha20poly1305_encrypt_sg_inplace(sg_src, + total_len - POLY1305_MAC_SIZE, NULL, 0, + 0, enc_key001, &simd_context)) + goto chunkfail; + chacha20poly1305_encrypt(computed_output, + computed_output, + total_len - POLY1305_MAC_SIZE, NULL, 0, 0, + enc_key001); + if (memcmp(computed_output, input, total_len)) + goto chunkfail;; + if (!chacha20poly1305_decrypt(computed_output, + input, total_len, NULL, 0, 0, enc_key001)) + goto chunkfail; + for (k = 0; k < total_len - POLY1305_MAC_SIZE; ++k) { + if (computed_output[k]) + goto chunkfail; + } + if (!chacha20poly1305_decrypt_sg_inplace(sg_src, + total_len, NULL, 0, 0, enc_key001, + &simd_context)) + goto chunkfail; + for (k = 0; k < total_len - POLY1305_MAC_SIZE; ++k) { + if (input[k]) + goto chunkfail; + } + continue; + + chunkfail: + pr_err("chacha20poly1305 chunked self-test %zu/%zu/%zu: FAIL\n", + total_len, i, j); + success = false; + } + + } + } + simd_put(&simd_context); + +out: + kfree(computed_output); + kfree(input); + return success; +} diff --git a/net/wireguard/crypto/zinc/selftest/curve25519.c b/net/wireguard/crypto/zinc/selftest/curve25519.c new file mode 100644 index 000000000000..0e3e3af06ba4 --- /dev/null +++ b/net/wireguard/crypto/zinc/selftest/curve25519.c @@ -0,0 +1,1315 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +struct curve25519_test_vector { + u8 private[CURVE25519_KEY_SIZE]; + u8 public[CURVE25519_KEY_SIZE]; + u8 result[CURVE25519_KEY_SIZE]; + bool valid; +}; +static const struct curve25519_test_vector curve25519_test_vectors[] __initconst = { + { + .private = { 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d, + 0x3c, 0x16, 0xc1, 0x72, 0x51, 0xb2, 0x66, 0x45, + 0xdf, 0x4c, 0x2f, 0x87, 0xeb, 0xc0, 0x99, 0x2a, + 0xb1, 0x77, 0xfb, 0xa5, 0x1d, 0xb9, 0x2c, 0x2a }, + .public = { 0xde, 0x9e, 0xdb, 0x7d, 0x7b, 0x7d, 0xc1, 0xb4, + 0xd3, 0x5b, 0x61, 0xc2, 0xec, 0xe4, 0x35, 0x37, + 0x3f, 0x83, 0x43, 0xc8, 0x5b, 0x78, 0x67, 0x4d, + 0xad, 0xfc, 0x7e, 0x14, 0x6f, 0x88, 0x2b, 0x4f }, + .result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, + 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, + 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, + 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 }, + .valid = true + }, + { + .private = { 0x5d, 0xab, 0x08, 0x7e, 0x62, 0x4a, 0x8a, 0x4b, + 0x79, 0xe1, 0x7f, 0x8b, 0x83, 0x80, 0x0e, 0xe6, + 0x6f, 0x3b, 0xb1, 0x29, 0x26, 0x18, 0xb6, 0xfd, + 0x1c, 0x2f, 0x8b, 0x27, 0xff, 0x88, 0xe0, 0xeb }, + .public = { 0x85, 0x20, 0xf0, 0x09, 0x89, 0x30, 0xa7, 0x54, + 0x74, 0x8b, 0x7d, 0xdc, 0xb4, 0x3e, 0xf7, 0x5a, + 0x0d, 0xbf, 0x3a, 0x0d, 0x26, 0x38, 0x1a, 0xf4, + 0xeb, 0xa4, 0xa9, 0x8e, 0xaa, 0x9b, 0x4e, 0x6a }, + .result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, + 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, + 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, + 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 }, + .valid = true + }, + { + .private = { 1 }, + .public = { 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x3c, 0x77, 0x77, 0xca, 0xf9, 0x97, 0xb2, 0x64, + 0x41, 0x60, 0x77, 0x66, 0x5b, 0x4e, 0x22, 0x9d, + 0x0b, 0x95, 0x48, 0xdc, 0x0c, 0xd8, 0x19, 0x98, + 0xdd, 0xcd, 0xc5, 0xc8, 0x53, 0x3c, 0x79, 0x7f }, + .valid = true + }, + { + .private = { 1 }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0xb3, 0x2d, 0x13, 0x62, 0xc2, 0x48, 0xd6, 0x2f, + 0xe6, 0x26, 0x19, 0xcf, 0xf0, 0x4d, 0xd4, 0x3d, + 0xb7, 0x3f, 0xfc, 0x1b, 0x63, 0x08, 0xed, 0xe3, + 0x0b, 0x78, 0xd8, 0x73, 0x80, 0xf1, 0xe8, 0x34 }, + .valid = true + }, + { + .private = { 0xa5, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, + 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, + 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, + 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0xc4 }, + .public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, + 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, + 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, + 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c }, + .result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90, + 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f, + 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7, + 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 }, + .valid = true + }, + { + .private = { 1, 2, 3, 4 }, + .public = { 0 }, + .result = { 0 }, + .valid = false + }, + { + .private = { 2, 4, 6, 8 }, + .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, + 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, + 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, + 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8 }, + .result = { 0 }, + .valid = false + }, + { + .private = { 0xff, 0xff, 0xff, 0xff, 0x0a, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x0a, 0x00, 0xfb, 0x9f }, + .result = { 0x77, 0x52, 0xb6, 0x18, 0xc1, 0x2d, 0x48, 0xd2, + 0xc6, 0x93, 0x46, 0x83, 0x81, 0x7c, 0xc6, 0x57, + 0xf3, 0x31, 0x03, 0x19, 0x49, 0x48, 0x20, 0x05, + 0x42, 0x2b, 0x4e, 0xae, 0x8d, 0x1d, 0x43, 0x23 }, + .valid = true + }, + { + .private = { 0x8e, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8e, 0x06 }, + .result = { 0x5a, 0xdf, 0xaa, 0x25, 0x86, 0x8e, 0x32, 0x3d, + 0xae, 0x49, 0x62, 0xc1, 0x01, 0x5c, 0xb3, 0x12, + 0xe1, 0xc5, 0xc7, 0x9e, 0x95, 0x3f, 0x03, 0x99, + 0xb0, 0xba, 0x16, 0x22, 0xf3, 0xb6, 0xf7, 0x0c }, + .valid = true + }, + /* wycheproof - normal case */ + { + .private = { 0x48, 0x52, 0x83, 0x4d, 0x9d, 0x6b, 0x77, 0xda, + 0xde, 0xab, 0xaa, 0xf2, 0xe1, 0x1d, 0xca, 0x66, + 0xd1, 0x9f, 0xe7, 0x49, 0x93, 0xa7, 0xbe, 0xc3, + 0x6c, 0x6e, 0x16, 0xa0, 0x98, 0x3f, 0xea, 0xba }, + .public = { 0x9c, 0x64, 0x7d, 0x9a, 0xe5, 0x89, 0xb9, 0xf5, + 0x8f, 0xdc, 0x3c, 0xa4, 0x94, 0x7e, 0xfb, 0xc9, + 0x15, 0xc4, 0xb2, 0xe0, 0x8e, 0x74, 0x4a, 0x0e, + 0xdf, 0x46, 0x9d, 0xac, 0x59, 0xc8, 0xf8, 0x5a }, + .result = { 0x87, 0xb7, 0xf2, 0x12, 0xb6, 0x27, 0xf7, 0xa5, + 0x4c, 0xa5, 0xe0, 0xbc, 0xda, 0xdd, 0xd5, 0x38, + 0x9d, 0x9d, 0xe6, 0x15, 0x6c, 0xdb, 0xcf, 0x8e, + 0xbe, 0x14, 0xff, 0xbc, 0xfb, 0x43, 0x65, 0x51 }, + .valid = true + }, + /* wycheproof - public key on twist */ + { + .private = { 0x58, 0x8c, 0x06, 0x1a, 0x50, 0x80, 0x4a, 0xc4, + 0x88, 0xad, 0x77, 0x4a, 0xc7, 0x16, 0xc3, 0xf5, + 0xba, 0x71, 0x4b, 0x27, 0x12, 0xe0, 0x48, 0x49, + 0x13, 0x79, 0xa5, 0x00, 0x21, 0x19, 0x98, 0xa8 }, + .public = { 0x63, 0xaa, 0x40, 0xc6, 0xe3, 0x83, 0x46, 0xc5, + 0xca, 0xf2, 0x3a, 0x6d, 0xf0, 0xa5, 0xe6, 0xc8, + 0x08, 0x89, 0xa0, 0x86, 0x47, 0xe5, 0x51, 0xb3, + 0x56, 0x34, 0x49, 0xbe, 0xfc, 0xfc, 0x97, 0x33 }, + .result = { 0xb1, 0xa7, 0x07, 0x51, 0x94, 0x95, 0xff, 0xff, + 0xb2, 0x98, 0xff, 0x94, 0x17, 0x16, 0xb0, 0x6d, + 0xfa, 0xb8, 0x7c, 0xf8, 0xd9, 0x11, 0x23, 0xfe, + 0x2b, 0xe9, 0xa2, 0x33, 0xdd, 0xa2, 0x22, 0x12 }, + .valid = true + }, + /* wycheproof - public key on twist */ + { + .private = { 0xb0, 0x5b, 0xfd, 0x32, 0xe5, 0x53, 0x25, 0xd9, + 0xfd, 0x64, 0x8c, 0xb3, 0x02, 0x84, 0x80, 0x39, + 0x00, 0x0b, 0x39, 0x0e, 0x44, 0xd5, 0x21, 0xe5, + 0x8a, 0xab, 0x3b, 0x29, 0xa6, 0x96, 0x0b, 0xa8 }, + .public = { 0x0f, 0x83, 0xc3, 0x6f, 0xde, 0xd9, 0xd3, 0x2f, + 0xad, 0xf4, 0xef, 0xa3, 0xae, 0x93, 0xa9, 0x0b, + 0xb5, 0xcf, 0xa6, 0x68, 0x93, 0xbc, 0x41, 0x2c, + 0x43, 0xfa, 0x72, 0x87, 0xdb, 0xb9, 0x97, 0x79 }, + .result = { 0x67, 0xdd, 0x4a, 0x6e, 0x16, 0x55, 0x33, 0x53, + 0x4c, 0x0e, 0x3f, 0x17, 0x2e, 0x4a, 0xb8, 0x57, + 0x6b, 0xca, 0x92, 0x3a, 0x5f, 0x07, 0xb2, 0xc0, + 0x69, 0xb4, 0xc3, 0x10, 0xff, 0x2e, 0x93, 0x5b }, + .valid = true + }, + /* wycheproof - public key on twist */ + { + .private = { 0x70, 0xe3, 0x4b, 0xcb, 0xe1, 0xf4, 0x7f, 0xbc, + 0x0f, 0xdd, 0xfd, 0x7c, 0x1e, 0x1a, 0xa5, 0x3d, + 0x57, 0xbf, 0xe0, 0xf6, 0x6d, 0x24, 0x30, 0x67, + 0xb4, 0x24, 0xbb, 0x62, 0x10, 0xbe, 0xd1, 0x9c }, + .public = { 0x0b, 0x82, 0x11, 0xa2, 0xb6, 0x04, 0x90, 0x97, + 0xf6, 0x87, 0x1c, 0x6c, 0x05, 0x2d, 0x3c, 0x5f, + 0xc1, 0xba, 0x17, 0xda, 0x9e, 0x32, 0xae, 0x45, + 0x84, 0x03, 0xb0, 0x5b, 0xb2, 0x83, 0x09, 0x2a }, + .result = { 0x4a, 0x06, 0x38, 0xcf, 0xaa, 0x9e, 0xf1, 0x93, + 0x3b, 0x47, 0xf8, 0x93, 0x92, 0x96, 0xa6, 0xb2, + 0x5b, 0xe5, 0x41, 0xef, 0x7f, 0x70, 0xe8, 0x44, + 0xc0, 0xbc, 0xc0, 0x0b, 0x13, 0x4d, 0xe6, 0x4a }, + .valid = true + }, + /* wycheproof - public key on twist */ + { + .private = { 0x68, 0xc1, 0xf3, 0xa6, 0x53, 0xa4, 0xcd, 0xb1, + 0xd3, 0x7b, 0xba, 0x94, 0x73, 0x8f, 0x8b, 0x95, + 0x7a, 0x57, 0xbe, 0xb2, 0x4d, 0x64, 0x6e, 0x99, + 0x4d, 0xc2, 0x9a, 0x27, 0x6a, 0xad, 0x45, 0x8d }, + .public = { 0x34, 0x3a, 0xc2, 0x0a, 0x3b, 0x9c, 0x6a, 0x27, + 0xb1, 0x00, 0x81, 0x76, 0x50, 0x9a, 0xd3, 0x07, + 0x35, 0x85, 0x6e, 0xc1, 0xc8, 0xd8, 0xfc, 0xae, + 0x13, 0x91, 0x2d, 0x08, 0xd1, 0x52, 0xf4, 0x6c }, + .result = { 0x39, 0x94, 0x91, 0xfc, 0xe8, 0xdf, 0xab, 0x73, + 0xb4, 0xf9, 0xf6, 0x11, 0xde, 0x8e, 0xa0, 0xb2, + 0x7b, 0x28, 0xf8, 0x59, 0x94, 0x25, 0x0b, 0x0f, + 0x47, 0x5d, 0x58, 0x5d, 0x04, 0x2a, 0xc2, 0x07 }, + .valid = true + }, + /* wycheproof - public key on twist */ + { + .private = { 0xd8, 0x77, 0xb2, 0x6d, 0x06, 0xdf, 0xf9, 0xd9, + 0xf7, 0xfd, 0x4c, 0x5b, 0x37, 0x69, 0xf8, 0xcd, + 0xd5, 0xb3, 0x05, 0x16, 0xa5, 0xab, 0x80, 0x6b, + 0xe3, 0x24, 0xff, 0x3e, 0xb6, 0x9e, 0xa0, 0xb2 }, + .public = { 0xfa, 0x69, 0x5f, 0xc7, 0xbe, 0x8d, 0x1b, 0xe5, + 0xbf, 0x70, 0x48, 0x98, 0xf3, 0x88, 0xc4, 0x52, + 0xba, 0xfd, 0xd3, 0xb8, 0xea, 0xe8, 0x05, 0xf8, + 0x68, 0x1a, 0x8d, 0x15, 0xc2, 0xd4, 0xe1, 0x42 }, + .result = { 0x2c, 0x4f, 0xe1, 0x1d, 0x49, 0x0a, 0x53, 0x86, + 0x17, 0x76, 0xb1, 0x3b, 0x43, 0x54, 0xab, 0xd4, + 0xcf, 0x5a, 0x97, 0x69, 0x9d, 0xb6, 0xe6, 0xc6, + 0x8c, 0x16, 0x26, 0xd0, 0x76, 0x62, 0xf7, 0x58 }, + .valid = true + }, + /* wycheproof - public key = 0 */ + { + .private = { 0x20, 0x74, 0x94, 0x03, 0x8f, 0x2b, 0xb8, 0x11, + 0xd4, 0x78, 0x05, 0xbc, 0xdf, 0x04, 0xa2, 0xac, + 0x58, 0x5a, 0xda, 0x7f, 0x2f, 0x23, 0x38, 0x9b, + 0xfd, 0x46, 0x58, 0xf9, 0xdd, 0xd4, 0xde, 0xbc }, + .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key = 1 */ + { + .private = { 0x20, 0x2e, 0x89, 0x72, 0xb6, 0x1c, 0x7e, 0x61, + 0x93, 0x0e, 0xb9, 0x45, 0x0b, 0x50, 0x70, 0xea, + 0xe1, 0xc6, 0x70, 0x47, 0x56, 0x85, 0x54, 0x1f, + 0x04, 0x76, 0x21, 0x7e, 0x48, 0x18, 0xcf, 0xab }, + .public = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - edge case on twist */ + { + .private = { 0x38, 0xdd, 0xe9, 0xf3, 0xe7, 0xb7, 0x99, 0x04, + 0x5f, 0x9a, 0xc3, 0x79, 0x3d, 0x4a, 0x92, 0x77, + 0xda, 0xde, 0xad, 0xc4, 0x1b, 0xec, 0x02, 0x90, + 0xf8, 0x1f, 0x74, 0x4f, 0x73, 0x77, 0x5f, 0x84 }, + .public = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x9a, 0x2c, 0xfe, 0x84, 0xff, 0x9c, 0x4a, 0x97, + 0x39, 0x62, 0x5c, 0xae, 0x4a, 0x3b, 0x82, 0xa9, + 0x06, 0x87, 0x7a, 0x44, 0x19, 0x46, 0xf8, 0xd7, + 0xb3, 0xd7, 0x95, 0xfe, 0x8f, 0x5d, 0x16, 0x39 }, + .valid = true + }, + /* wycheproof - edge case on twist */ + { + .private = { 0x98, 0x57, 0xa9, 0x14, 0xe3, 0xc2, 0x90, 0x36, + 0xfd, 0x9a, 0x44, 0x2b, 0xa5, 0x26, 0xb5, 0xcd, + 0xcd, 0xf2, 0x82, 0x16, 0x15, 0x3e, 0x63, 0x6c, + 0x10, 0x67, 0x7a, 0xca, 0xb6, 0xbd, 0x6a, 0xa5 }, + .public = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x4d, 0xa4, 0xe0, 0xaa, 0x07, 0x2c, 0x23, 0x2e, + 0xe2, 0xf0, 0xfa, 0x4e, 0x51, 0x9a, 0xe5, 0x0b, + 0x52, 0xc1, 0xed, 0xd0, 0x8a, 0x53, 0x4d, 0x4e, + 0xf3, 0x46, 0xc2, 0xe1, 0x06, 0xd2, 0x1d, 0x60 }, + .valid = true + }, + /* wycheproof - edge case on twist */ + { + .private = { 0x48, 0xe2, 0x13, 0x0d, 0x72, 0x33, 0x05, 0xed, + 0x05, 0xe6, 0xe5, 0x89, 0x4d, 0x39, 0x8a, 0x5e, + 0x33, 0x36, 0x7a, 0x8c, 0x6a, 0xac, 0x8f, 0xcd, + 0xf0, 0xa8, 0x8e, 0x4b, 0x42, 0x82, 0x0d, 0xb7 }, + .public = { 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0xf8, 0xff, + 0xff, 0x1f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x07, 0x00, + 0x00, 0xf0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00 }, + .result = { 0x9e, 0xd1, 0x0c, 0x53, 0x74, 0x7f, 0x64, 0x7f, + 0x82, 0xf4, 0x51, 0x25, 0xd3, 0xde, 0x15, 0xa1, + 0xe6, 0xb8, 0x24, 0x49, 0x6a, 0xb4, 0x04, 0x10, + 0xff, 0xcc, 0x3c, 0xfe, 0x95, 0x76, 0x0f, 0x3b }, + .valid = true + }, + /* wycheproof - edge case on twist */ + { + .private = { 0x28, 0xf4, 0x10, 0x11, 0x69, 0x18, 0x51, 0xb3, + 0xa6, 0x2b, 0x64, 0x15, 0x53, 0xb3, 0x0d, 0x0d, + 0xfd, 0xdc, 0xb8, 0xff, 0xfc, 0xf5, 0x37, 0x00, + 0xa7, 0xbe, 0x2f, 0x6a, 0x87, 0x2e, 0x9f, 0xb0 }, + .public = { 0x00, 0x00, 0x00, 0xfc, 0xff, 0xff, 0x07, 0x00, + 0x00, 0xe0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0xf8, 0xff, + 0xff, 0x0f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0x7f }, + .result = { 0xcf, 0x72, 0xb4, 0xaa, 0x6a, 0xa1, 0xc9, 0xf8, + 0x94, 0xf4, 0x16, 0x5b, 0x86, 0x10, 0x9a, 0xa4, + 0x68, 0x51, 0x76, 0x48, 0xe1, 0xf0, 0xcc, 0x70, + 0xe1, 0xab, 0x08, 0x46, 0x01, 0x76, 0x50, 0x6b }, + .valid = true + }, + /* wycheproof - edge case on twist */ + { + .private = { 0x18, 0xa9, 0x3b, 0x64, 0x99, 0xb9, 0xf6, 0xb3, + 0x22, 0x5c, 0xa0, 0x2f, 0xef, 0x41, 0x0e, 0x0a, + 0xde, 0xc2, 0x35, 0x32, 0x32, 0x1d, 0x2d, 0x8e, + 0xf1, 0xa6, 0xd6, 0x02, 0xa8, 0xc6, 0x5b, 0x83 }, + .public = { 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x5d, 0x50, 0xb6, 0x28, 0x36, 0xbb, 0x69, 0x57, + 0x94, 0x10, 0x38, 0x6c, 0xf7, 0xbb, 0x81, 0x1c, + 0x14, 0xbf, 0x85, 0xb1, 0xc7, 0xb1, 0x7e, 0x59, + 0x24, 0xc7, 0xff, 0xea, 0x91, 0xef, 0x9e, 0x12 }, + .valid = true + }, + /* wycheproof - edge case on twist */ + { + .private = { 0xc0, 0x1d, 0x13, 0x05, 0xa1, 0x33, 0x8a, 0x1f, + 0xca, 0xc2, 0xba, 0x7e, 0x2e, 0x03, 0x2b, 0x42, + 0x7e, 0x0b, 0x04, 0x90, 0x31, 0x65, 0xac, 0xa9, + 0x57, 0xd8, 0xd0, 0x55, 0x3d, 0x87, 0x17, 0xb0 }, + .public = { 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x19, 0x23, 0x0e, 0xb1, 0x48, 0xd5, 0xd6, 0x7c, + 0x3c, 0x22, 0xab, 0x1d, 0xae, 0xff, 0x80, 0xa5, + 0x7e, 0xae, 0x42, 0x65, 0xce, 0x28, 0x72, 0x65, + 0x7b, 0x2c, 0x80, 0x99, 0xfc, 0x69, 0x8e, 0x50 }, + .valid = true + }, + /* wycheproof - edge case for public key */ + { + .private = { 0x38, 0x6f, 0x7f, 0x16, 0xc5, 0x07, 0x31, 0xd6, + 0x4f, 0x82, 0xe6, 0xa1, 0x70, 0xb1, 0x42, 0xa4, + 0xe3, 0x4f, 0x31, 0xfd, 0x77, 0x68, 0xfc, 0xb8, + 0x90, 0x29, 0x25, 0xe7, 0xd1, 0xe2, 0x1a, 0xbe }, + .public = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x0f, 0xca, 0xb5, 0xd8, 0x42, 0xa0, 0x78, 0xd7, + 0xa7, 0x1f, 0xc5, 0x9b, 0x57, 0xbf, 0xb4, 0xca, + 0x0b, 0xe6, 0x87, 0x3b, 0x49, 0xdc, 0xdb, 0x9f, + 0x44, 0xe1, 0x4a, 0xe8, 0xfb, 0xdf, 0xa5, 0x42 }, + .valid = true + }, + /* wycheproof - edge case for public key */ + { + .private = { 0xe0, 0x23, 0xa2, 0x89, 0xbd, 0x5e, 0x90, 0xfa, + 0x28, 0x04, 0xdd, 0xc0, 0x19, 0xa0, 0x5e, 0xf3, + 0xe7, 0x9d, 0x43, 0x4b, 0xb6, 0xea, 0x2f, 0x52, + 0x2e, 0xcb, 0x64, 0x3a, 0x75, 0x29, 0x6e, 0x95 }, + .public = { 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x54, 0xce, 0x8f, 0x22, 0x75, 0xc0, 0x77, 0xe3, + 0xb1, 0x30, 0x6a, 0x39, 0x39, 0xc5, 0xe0, 0x3e, + 0xef, 0x6b, 0xbb, 0x88, 0x06, 0x05, 0x44, 0x75, + 0x8d, 0x9f, 0xef, 0x59, 0xb0, 0xbc, 0x3e, 0x4f }, + .valid = true + }, + /* wycheproof - edge case for public key */ + { + .private = { 0x68, 0xf0, 0x10, 0xd6, 0x2e, 0xe8, 0xd9, 0x26, + 0x05, 0x3a, 0x36, 0x1c, 0x3a, 0x75, 0xc6, 0xea, + 0x4e, 0xbd, 0xc8, 0x60, 0x6a, 0xb2, 0x85, 0x00, + 0x3a, 0x6f, 0x8f, 0x40, 0x76, 0xb0, 0x1e, 0x83 }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 }, + .result = { 0xf1, 0x36, 0x77, 0x5c, 0x5b, 0xeb, 0x0a, 0xf8, + 0x11, 0x0a, 0xf1, 0x0b, 0x20, 0x37, 0x23, 0x32, + 0x04, 0x3c, 0xab, 0x75, 0x24, 0x19, 0x67, 0x87, + 0x75, 0xa2, 0x23, 0xdf, 0x57, 0xc9, 0xd3, 0x0d }, + .valid = true + }, + /* wycheproof - edge case for public key */ + { + .private = { 0x58, 0xeb, 0xcb, 0x35, 0xb0, 0xf8, 0x84, 0x5c, + 0xaf, 0x1e, 0xc6, 0x30, 0xf9, 0x65, 0x76, 0xb6, + 0x2c, 0x4b, 0x7b, 0x6c, 0x36, 0xb2, 0x9d, 0xeb, + 0x2c, 0xb0, 0x08, 0x46, 0x51, 0x75, 0x5c, 0x96 }, + .public = { 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xfb, 0xff, + 0xff, 0xdf, 0xff, 0xff, 0xdf, 0xff, 0xff, 0xff, + 0xfe, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xf7, 0xff, + 0xff, 0xf7, 0xff, 0xff, 0xbf, 0xff, 0xff, 0x3f }, + .result = { 0xbf, 0x9a, 0xff, 0xd0, 0x6b, 0x84, 0x40, 0x85, + 0x58, 0x64, 0x60, 0x96, 0x2e, 0xf2, 0x14, 0x6f, + 0xf3, 0xd4, 0x53, 0x3d, 0x94, 0x44, 0xaa, 0xb0, + 0x06, 0xeb, 0x88, 0xcc, 0x30, 0x54, 0x40, 0x7d }, + .valid = true + }, + /* wycheproof - edge case for public key */ + { + .private = { 0x18, 0x8c, 0x4b, 0xc5, 0xb9, 0xc4, 0x4b, 0x38, + 0xbb, 0x65, 0x8b, 0x9b, 0x2a, 0xe8, 0x2d, 0x5b, + 0x01, 0x01, 0x5e, 0x09, 0x31, 0x84, 0xb1, 0x7c, + 0xb7, 0x86, 0x35, 0x03, 0xa7, 0x83, 0xe1, 0xbb }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, + .result = { 0xd4, 0x80, 0xde, 0x04, 0xf6, 0x99, 0xcb, 0x3b, + 0xe0, 0x68, 0x4a, 0x9c, 0xc2, 0xe3, 0x12, 0x81, + 0xea, 0x0b, 0xc5, 0xa9, 0xdc, 0xc1, 0x57, 0xd3, + 0xd2, 0x01, 0x58, 0xd4, 0x6c, 0xa5, 0x24, 0x6d }, + .valid = true + }, + /* wycheproof - edge case for public key */ + { + .private = { 0xe0, 0x6c, 0x11, 0xbb, 0x2e, 0x13, 0xce, 0x3d, + 0xc7, 0x67, 0x3f, 0x67, 0xf5, 0x48, 0x22, 0x42, + 0x90, 0x94, 0x23, 0xa9, 0xae, 0x95, 0xee, 0x98, + 0x6a, 0x98, 0x8d, 0x98, 0xfa, 0xee, 0x23, 0xa2 }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f }, + .result = { 0x4c, 0x44, 0x01, 0xcc, 0xe6, 0xb5, 0x1e, 0x4c, + 0xb1, 0x8f, 0x27, 0x90, 0x24, 0x6c, 0x9b, 0xf9, + 0x14, 0xdb, 0x66, 0x77, 0x50, 0xa1, 0xcb, 0x89, + 0x06, 0x90, 0x92, 0xaf, 0x07, 0x29, 0x22, 0x76 }, + .valid = true + }, + /* wycheproof - edge case for public key */ + { + .private = { 0xc0, 0x65, 0x8c, 0x46, 0xdd, 0xe1, 0x81, 0x29, + 0x29, 0x38, 0x77, 0x53, 0x5b, 0x11, 0x62, 0xb6, + 0xf9, 0xf5, 0x41, 0x4a, 0x23, 0xcf, 0x4d, 0x2c, + 0xbc, 0x14, 0x0a, 0x4d, 0x99, 0xda, 0x2b, 0x8f }, + .public = { 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x57, 0x8b, 0xa8, 0xcc, 0x2d, 0xbd, 0xc5, 0x75, + 0xaf, 0xcf, 0x9d, 0xf2, 0xb3, 0xee, 0x61, 0x89, + 0xf5, 0x33, 0x7d, 0x68, 0x54, 0xc7, 0x9b, 0x4c, + 0xe1, 0x65, 0xea, 0x12, 0x29, 0x3b, 0x3a, 0x0f }, + .valid = true + }, + /* wycheproof - public key with low order */ + { + .private = { 0x10, 0x25, 0x5c, 0x92, 0x30, 0xa9, 0x7a, 0x30, + 0xa4, 0x58, 0xca, 0x28, 0x4a, 0x62, 0x96, 0x69, + 0x29, 0x3a, 0x31, 0x89, 0x0c, 0xda, 0x9d, 0x14, + 0x7f, 0xeb, 0xc7, 0xd1, 0xe2, 0x2d, 0x6b, 0xb1 }, + .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, + 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, + 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, + 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x00 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0x78, 0xf1, 0xe8, 0xed, 0xf1, 0x44, 0x81, 0xb3, + 0x89, 0x44, 0x8d, 0xac, 0x8f, 0x59, 0xc7, 0x0b, + 0x03, 0x8e, 0x7c, 0xf9, 0x2e, 0xf2, 0xc7, 0xef, + 0xf5, 0x7a, 0x72, 0x46, 0x6e, 0x11, 0x52, 0x96 }, + .public = { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, + 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b, + 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86, + 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0x57 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0xa0, 0xa0, 0x5a, 0x3e, 0x8f, 0x9f, 0x44, 0x20, + 0x4d, 0x5f, 0x80, 0x59, 0xa9, 0x4a, 0xc7, 0xdf, + 0xc3, 0x9a, 0x49, 0xac, 0x01, 0x6d, 0xd7, 0x43, + 0xdb, 0xfa, 0x43, 0xc5, 0xd6, 0x71, 0xfd, 0x88 }, + .public = { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0xd0, 0xdb, 0xb3, 0xed, 0x19, 0x06, 0x66, 0x3f, + 0x15, 0x42, 0x0a, 0xf3, 0x1f, 0x4e, 0xaf, 0x65, + 0x09, 0xd9, 0xa9, 0x94, 0x97, 0x23, 0x50, 0x06, + 0x05, 0xad, 0x7c, 0x1c, 0x6e, 0x74, 0x50, 0xa9 }, + .public = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0xc0, 0xb1, 0xd0, 0xeb, 0x22, 0xb2, 0x44, 0xfe, + 0x32, 0x91, 0x14, 0x00, 0x72, 0xcd, 0xd9, 0xd9, + 0x89, 0xb5, 0xf0, 0xec, 0xd9, 0x6c, 0x10, 0x0f, + 0xeb, 0x5b, 0xca, 0x24, 0x1c, 0x1d, 0x9f, 0x8f }, + .public = { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0x48, 0x0b, 0xf4, 0x5f, 0x59, 0x49, 0x42, 0xa8, + 0xbc, 0x0f, 0x33, 0x53, 0xc6, 0xe8, 0xb8, 0x85, + 0x3d, 0x77, 0xf3, 0x51, 0xf1, 0xc2, 0xca, 0x6c, + 0x2d, 0x1a, 0xbf, 0x8a, 0x00, 0xb4, 0x22, 0x9c }, + .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0x30, 0xf9, 0x93, 0xfc, 0xf8, 0x51, 0x4f, 0xc8, + 0x9b, 0xd8, 0xdb, 0x14, 0xcd, 0x43, 0xba, 0x0d, + 0x4b, 0x25, 0x30, 0xe7, 0x3c, 0x42, 0x76, 0xa0, + 0x5e, 0x1b, 0x14, 0x5d, 0x42, 0x0c, 0xed, 0xb4 }, + .public = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0xc0, 0x49, 0x74, 0xb7, 0x58, 0x38, 0x0e, 0x2a, + 0x5b, 0x5d, 0xf6, 0xeb, 0x09, 0xbb, 0x2f, 0x6b, + 0x34, 0x34, 0xf9, 0x82, 0x72, 0x2a, 0x8e, 0x67, + 0x6d, 0x3d, 0xa2, 0x51, 0xd1, 0xb3, 0xde, 0x83 }, + .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, + 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, + 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, + 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x80 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0x50, 0x2a, 0x31, 0x37, 0x3d, 0xb3, 0x24, 0x46, + 0x84, 0x2f, 0xe5, 0xad, 0xd3, 0xe0, 0x24, 0x02, + 0x2e, 0xa5, 0x4f, 0x27, 0x41, 0x82, 0xaf, 0xc3, + 0xd9, 0xf1, 0xbb, 0x3d, 0x39, 0x53, 0x4e, 0xb5 }, + .public = { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, + 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b, + 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86, + 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0xd7 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0x90, 0xfa, 0x64, 0x17, 0xb0, 0xe3, 0x70, 0x30, + 0xfd, 0x6e, 0x43, 0xef, 0xf2, 0xab, 0xae, 0xf1, + 0x4c, 0x67, 0x93, 0x11, 0x7a, 0x03, 0x9c, 0xf6, + 0x21, 0x31, 0x8b, 0xa9, 0x0f, 0x4e, 0x98, 0xbe }, + .public = { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0x78, 0xad, 0x3f, 0x26, 0x02, 0x7f, 0x1c, 0x9f, + 0xdd, 0x97, 0x5a, 0x16, 0x13, 0xb9, 0x47, 0x77, + 0x9b, 0xad, 0x2c, 0xf2, 0xb7, 0x41, 0xad, 0xe0, + 0x18, 0x40, 0x88, 0x5a, 0x30, 0xbb, 0x97, 0x9c }, + .public = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0x98, 0xe2, 0x3d, 0xe7, 0xb1, 0xe0, 0x92, 0x6e, + 0xd9, 0xc8, 0x7e, 0x7b, 0x14, 0xba, 0xf5, 0x5f, + 0x49, 0x7a, 0x1d, 0x70, 0x96, 0xf9, 0x39, 0x77, + 0x68, 0x0e, 0x44, 0xdc, 0x1c, 0x7b, 0x7b, 0x8b }, + .public = { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key >= p */ + { + .private = { 0xf0, 0x1e, 0x48, 0xda, 0xfa, 0xc9, 0xd7, 0xbc, + 0xf5, 0x89, 0xcb, 0xc3, 0x82, 0xc8, 0x78, 0xd1, + 0x8b, 0xda, 0x35, 0x50, 0x58, 0x9f, 0xfb, 0x5d, + 0x50, 0xb5, 0x23, 0xbe, 0xbe, 0x32, 0x9d, 0xae }, + .public = { 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0xbd, 0x36, 0xa0, 0x79, 0x0e, 0xb8, 0x83, 0x09, + 0x8c, 0x98, 0x8b, 0x21, 0x78, 0x67, 0x73, 0xde, + 0x0b, 0x3a, 0x4d, 0xf1, 0x62, 0x28, 0x2c, 0xf1, + 0x10, 0xde, 0x18, 0xdd, 0x48, 0x4c, 0xe7, 0x4b }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x28, 0x87, 0x96, 0xbc, 0x5a, 0xff, 0x4b, 0x81, + 0xa3, 0x75, 0x01, 0x75, 0x7b, 0xc0, 0x75, 0x3a, + 0x3c, 0x21, 0x96, 0x47, 0x90, 0xd3, 0x86, 0x99, + 0x30, 0x8d, 0xeb, 0xc1, 0x7a, 0x6e, 0xaf, 0x8d }, + .public = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0xb4, 0xe0, 0xdd, 0x76, 0xda, 0x7b, 0x07, 0x17, + 0x28, 0xb6, 0x1f, 0x85, 0x67, 0x71, 0xaa, 0x35, + 0x6e, 0x57, 0xed, 0xa7, 0x8a, 0x5b, 0x16, 0x55, + 0xcc, 0x38, 0x20, 0xfb, 0x5f, 0x85, 0x4c, 0x5c }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x98, 0xdf, 0x84, 0x5f, 0x66, 0x51, 0xbf, 0x11, + 0x38, 0x22, 0x1f, 0x11, 0x90, 0x41, 0xf7, 0x2b, + 0x6d, 0xbc, 0x3c, 0x4a, 0xce, 0x71, 0x43, 0xd9, + 0x9f, 0xd5, 0x5a, 0xd8, 0x67, 0x48, 0x0d, 0xa8 }, + .public = { 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x6f, 0xdf, 0x6c, 0x37, 0x61, 0x1d, 0xbd, 0x53, + 0x04, 0xdc, 0x0f, 0x2e, 0xb7, 0xc9, 0x51, 0x7e, + 0xb3, 0xc5, 0x0e, 0x12, 0xfd, 0x05, 0x0a, 0xc6, + 0xde, 0xc2, 0x70, 0x71, 0xd4, 0xbf, 0xc0, 0x34 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0xf0, 0x94, 0x98, 0xe4, 0x6f, 0x02, 0xf8, 0x78, + 0x82, 0x9e, 0x78, 0xb8, 0x03, 0xd3, 0x16, 0xa2, + 0xed, 0x69, 0x5d, 0x04, 0x98, 0xa0, 0x8a, 0xbd, + 0xf8, 0x27, 0x69, 0x30, 0xe2, 0x4e, 0xdc, 0xb0 }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x4c, 0x8f, 0xc4, 0xb1, 0xc6, 0xab, 0x88, 0xfb, + 0x21, 0xf1, 0x8f, 0x6d, 0x4c, 0x81, 0x02, 0x40, + 0xd4, 0xe9, 0x46, 0x51, 0xba, 0x44, 0xf7, 0xa2, + 0xc8, 0x63, 0xce, 0xc7, 0xdc, 0x56, 0x60, 0x2d }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x18, 0x13, 0xc1, 0x0a, 0x5c, 0x7f, 0x21, 0xf9, + 0x6e, 0x17, 0xf2, 0x88, 0xc0, 0xcc, 0x37, 0x60, + 0x7c, 0x04, 0xc5, 0xf5, 0xae, 0xa2, 0xdb, 0x13, + 0x4f, 0x9e, 0x2f, 0xfc, 0x66, 0xbd, 0x9d, 0xb8 }, + .public = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, + .result = { 0x1c, 0xd0, 0xb2, 0x82, 0x67, 0xdc, 0x54, 0x1c, + 0x64, 0x2d, 0x6d, 0x7d, 0xca, 0x44, 0xa8, 0xb3, + 0x8a, 0x63, 0x73, 0x6e, 0xef, 0x5c, 0x4e, 0x65, + 0x01, 0xff, 0xbb, 0xb1, 0x78, 0x0c, 0x03, 0x3c }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x78, 0x57, 0xfb, 0x80, 0x86, 0x53, 0x64, 0x5a, + 0x0b, 0xeb, 0x13, 0x8a, 0x64, 0xf5, 0xf4, 0xd7, + 0x33, 0xa4, 0x5e, 0xa8, 0x4c, 0x3c, 0xda, 0x11, + 0xa9, 0xc0, 0x6f, 0x7e, 0x71, 0x39, 0x14, 0x9e }, + .public = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, + .result = { 0x87, 0x55, 0xbe, 0x01, 0xc6, 0x0a, 0x7e, 0x82, + 0x5c, 0xff, 0x3e, 0x0e, 0x78, 0xcb, 0x3a, 0xa4, + 0x33, 0x38, 0x61, 0x51, 0x6a, 0xa5, 0x9b, 0x1c, + 0x51, 0xa8, 0xb2, 0xa5, 0x43, 0xdf, 0xa8, 0x22 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0xe0, 0x3a, 0xa8, 0x42, 0xe2, 0xab, 0xc5, 0x6e, + 0x81, 0xe8, 0x7b, 0x8b, 0x9f, 0x41, 0x7b, 0x2a, + 0x1e, 0x59, 0x13, 0xc7, 0x23, 0xee, 0xd2, 0x8d, + 0x75, 0x2f, 0x8d, 0x47, 0xa5, 0x9f, 0x49, 0x8f }, + .public = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, + .result = { 0x54, 0xc9, 0xa1, 0xed, 0x95, 0xe5, 0x46, 0xd2, + 0x78, 0x22, 0xa3, 0x60, 0x93, 0x1d, 0xda, 0x60, + 0xa1, 0xdf, 0x04, 0x9d, 0xa6, 0xf9, 0x04, 0x25, + 0x3c, 0x06, 0x12, 0xbb, 0xdc, 0x08, 0x74, 0x76 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0xf8, 0xf7, 0x07, 0xb7, 0x99, 0x9b, 0x18, 0xcb, + 0x0d, 0x6b, 0x96, 0x12, 0x4f, 0x20, 0x45, 0x97, + 0x2c, 0xa2, 0x74, 0xbf, 0xc1, 0x54, 0xad, 0x0c, + 0x87, 0x03, 0x8c, 0x24, 0xc6, 0xd0, 0xd4, 0xb2 }, + .public = { 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0xcc, 0x1f, 0x40, 0xd7, 0x43, 0xcd, 0xc2, 0x23, + 0x0e, 0x10, 0x43, 0xda, 0xba, 0x8b, 0x75, 0xe8, + 0x10, 0xf1, 0xfb, 0xab, 0x7f, 0x25, 0x52, 0x69, + 0xbd, 0x9e, 0xbb, 0x29, 0xe6, 0xbf, 0x49, 0x4f }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0xa0, 0x34, 0xf6, 0x84, 0xfa, 0x63, 0x1e, 0x1a, + 0x34, 0x81, 0x18, 0xc1, 0xce, 0x4c, 0x98, 0x23, + 0x1f, 0x2d, 0x9e, 0xec, 0x9b, 0xa5, 0x36, 0x5b, + 0x4a, 0x05, 0xd6, 0x9a, 0x78, 0x5b, 0x07, 0x96 }, + .public = { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x54, 0x99, 0x8e, 0xe4, 0x3a, 0x5b, 0x00, 0x7b, + 0xf4, 0x99, 0xf0, 0x78, 0xe7, 0x36, 0x52, 0x44, + 0x00, 0xa8, 0xb5, 0xc7, 0xe9, 0xb9, 0xb4, 0x37, + 0x71, 0x74, 0x8c, 0x7c, 0xdf, 0x88, 0x04, 0x12 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x30, 0xb6, 0xc6, 0xa0, 0xf2, 0xff, 0xa6, 0x80, + 0x76, 0x8f, 0x99, 0x2b, 0xa8, 0x9e, 0x15, 0x2d, + 0x5b, 0xc9, 0x89, 0x3d, 0x38, 0xc9, 0x11, 0x9b, + 0xe4, 0xf7, 0x67, 0xbf, 0xab, 0x6e, 0x0c, 0xa5 }, + .public = { 0xdc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0xea, 0xd9, 0xb3, 0x8e, 0xfd, 0xd7, 0x23, 0x63, + 0x79, 0x34, 0xe5, 0x5a, 0xb7, 0x17, 0xa7, 0xae, + 0x09, 0xeb, 0x86, 0xa2, 0x1d, 0xc3, 0x6a, 0x3f, + 0xee, 0xb8, 0x8b, 0x75, 0x9e, 0x39, 0x1e, 0x09 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x90, 0x1b, 0x9d, 0xcf, 0x88, 0x1e, 0x01, 0xe0, + 0x27, 0x57, 0x50, 0x35, 0xd4, 0x0b, 0x43, 0xbd, + 0xc1, 0xc5, 0x24, 0x2e, 0x03, 0x08, 0x47, 0x49, + 0x5b, 0x0c, 0x72, 0x86, 0x46, 0x9b, 0x65, 0x91 }, + .public = { 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x60, 0x2f, 0xf4, 0x07, 0x89, 0xb5, 0x4b, 0x41, + 0x80, 0x59, 0x15, 0xfe, 0x2a, 0x62, 0x21, 0xf0, + 0x7a, 0x50, 0xff, 0xc2, 0xc3, 0xfc, 0x94, 0xcf, + 0x61, 0xf1, 0x3d, 0x79, 0x04, 0xe8, 0x8e, 0x0e }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x80, 0x46, 0x67, 0x7c, 0x28, 0xfd, 0x82, 0xc9, + 0xa1, 0xbd, 0xb7, 0x1a, 0x1a, 0x1a, 0x34, 0xfa, + 0xba, 0x12, 0x25, 0xe2, 0x50, 0x7f, 0xe3, 0xf5, + 0x4d, 0x10, 0xbd, 0x5b, 0x0d, 0x86, 0x5f, 0x8e }, + .public = { 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0xe0, 0x0a, 0xe8, 0xb1, 0x43, 0x47, 0x12, 0x47, + 0xba, 0x24, 0xf1, 0x2c, 0x88, 0x55, 0x36, 0xc3, + 0xcb, 0x98, 0x1b, 0x58, 0xe1, 0xe5, 0x6b, 0x2b, + 0xaf, 0x35, 0xc1, 0x2a, 0xe1, 0xf7, 0x9c, 0x26 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x60, 0x2f, 0x7e, 0x2f, 0x68, 0xa8, 0x46, 0xb8, + 0x2c, 0xc2, 0x69, 0xb1, 0xd4, 0x8e, 0x93, 0x98, + 0x86, 0xae, 0x54, 0xfd, 0x63, 0x6c, 0x1f, 0xe0, + 0x74, 0xd7, 0x10, 0x12, 0x7d, 0x47, 0x24, 0x91 }, + .public = { 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x98, 0xcb, 0x9b, 0x50, 0xdd, 0x3f, 0xc2, 0xb0, + 0xd4, 0xf2, 0xd2, 0xbf, 0x7c, 0x5c, 0xfd, 0xd1, + 0x0c, 0x8f, 0xcd, 0x31, 0xfc, 0x40, 0xaf, 0x1a, + 0xd4, 0x4f, 0x47, 0xc1, 0x31, 0x37, 0x63, 0x62 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x60, 0x88, 0x7b, 0x3d, 0xc7, 0x24, 0x43, 0x02, + 0x6e, 0xbe, 0xdb, 0xbb, 0xb7, 0x06, 0x65, 0xf4, + 0x2b, 0x87, 0xad, 0xd1, 0x44, 0x0e, 0x77, 0x68, + 0xfb, 0xd7, 0xe8, 0xe2, 0xce, 0x5f, 0x63, 0x9d }, + .public = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x38, 0xd6, 0x30, 0x4c, 0x4a, 0x7e, 0x6d, 0x9f, + 0x79, 0x59, 0x33, 0x4f, 0xb5, 0x24, 0x5b, 0xd2, + 0xc7, 0x54, 0x52, 0x5d, 0x4c, 0x91, 0xdb, 0x95, + 0x02, 0x06, 0x92, 0x62, 0x34, 0xc1, 0xf6, 0x33 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x78, 0xd3, 0x1d, 0xfa, 0x85, 0x44, 0x97, 0xd7, + 0x2d, 0x8d, 0xef, 0x8a, 0x1b, 0x7f, 0xb0, 0x06, + 0xce, 0xc2, 0xd8, 0xc4, 0x92, 0x46, 0x47, 0xc9, + 0x38, 0x14, 0xae, 0x56, 0xfa, 0xed, 0xa4, 0x95 }, + .public = { 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x78, 0x6c, 0xd5, 0x49, 0x96, 0xf0, 0x14, 0xa5, + 0xa0, 0x31, 0xec, 0x14, 0xdb, 0x81, 0x2e, 0xd0, + 0x83, 0x55, 0x06, 0x1f, 0xdb, 0x5d, 0xe6, 0x80, + 0xa8, 0x00, 0xac, 0x52, 0x1f, 0x31, 0x8e, 0x23 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0xc0, 0x4c, 0x5b, 0xae, 0xfa, 0x83, 0x02, 0xdd, + 0xde, 0xd6, 0xa4, 0xbb, 0x95, 0x77, 0x61, 0xb4, + 0xeb, 0x97, 0xae, 0xfa, 0x4f, 0xc3, 0xb8, 0x04, + 0x30, 0x85, 0xf9, 0x6a, 0x56, 0x59, 0xb3, 0xa5 }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x29, 0xae, 0x8b, 0xc7, 0x3e, 0x9b, 0x10, 0xa0, + 0x8b, 0x4f, 0x68, 0x1c, 0x43, 0xc3, 0xe0, 0xac, + 0x1a, 0x17, 0x1d, 0x31, 0xb3, 0x8f, 0x1a, 0x48, + 0xef, 0xba, 0x29, 0xae, 0x63, 0x9e, 0xa1, 0x34 }, + .valid = true + }, + /* wycheproof - RFC 7748 */ + { + .private = { 0xa0, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, + 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, + 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, + 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0x44 }, + .public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, + 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, + 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, + 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c }, + .result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90, + 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f, + 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7, + 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 }, + .valid = true + }, + /* wycheproof - RFC 7748 */ + { + .private = { 0x48, 0x66, 0xe9, 0xd4, 0xd1, 0xb4, 0x67, 0x3c, + 0x5a, 0xd2, 0x26, 0x91, 0x95, 0x7d, 0x6a, 0xf5, + 0xc1, 0x1b, 0x64, 0x21, 0xe0, 0xea, 0x01, 0xd4, + 0x2c, 0xa4, 0x16, 0x9e, 0x79, 0x18, 0xba, 0x4d }, + .public = { 0xe5, 0x21, 0x0f, 0x12, 0x78, 0x68, 0x11, 0xd3, + 0xf4, 0xb7, 0x95, 0x9d, 0x05, 0x38, 0xae, 0x2c, + 0x31, 0xdb, 0xe7, 0x10, 0x6f, 0xc0, 0x3c, 0x3e, + 0xfc, 0x4c, 0xd5, 0x49, 0xc7, 0x15, 0xa4, 0x13 }, + .result = { 0x95, 0xcb, 0xde, 0x94, 0x76, 0xe8, 0x90, 0x7d, + 0x7a, 0xad, 0xe4, 0x5c, 0xb4, 0xb8, 0x73, 0xf8, + 0x8b, 0x59, 0x5a, 0x68, 0x79, 0x9f, 0xa1, 0x52, + 0xe6, 0xf8, 0xf7, 0x64, 0x7a, 0xac, 0x79, 0x57 }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x0a, 0xb4, 0xe7, 0x63, 0x80, 0xd8, 0x4d, 0xde, + 0x4f, 0x68, 0x33, 0xc5, 0x8f, 0x2a, 0x9f, 0xb8, + 0xf8, 0x3b, 0xb0, 0x16, 0x9b, 0x17, 0x2b, 0xe4, + 0xb6, 0xe0, 0x59, 0x28, 0x87, 0x74, 0x1a, 0x36 }, + .result = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x89, 0xe1, 0x0d, 0x57, 0x01, 0xb4, 0x33, 0x7d, + 0x2d, 0x03, 0x21, 0x81, 0x53, 0x8b, 0x10, 0x64, + 0xbd, 0x40, 0x84, 0x40, 0x1c, 0xec, 0xa1, 0xfd, + 0x12, 0x66, 0x3a, 0x19, 0x59, 0x38, 0x80, 0x00 }, + .result = { 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x2b, 0x55, 0xd3, 0xaa, 0x4a, 0x8f, 0x80, 0xc8, + 0xc0, 0xb2, 0xae, 0x5f, 0x93, 0x3e, 0x85, 0xaf, + 0x49, 0xbe, 0xac, 0x36, 0xc2, 0xfa, 0x73, 0x94, + 0xba, 0xb7, 0x6c, 0x89, 0x33, 0xf8, 0xf8, 0x1d }, + .result = { 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x63, 0xe5, 0xb1, 0xfe, 0x96, 0x01, 0xfe, 0x84, + 0x38, 0x5d, 0x88, 0x66, 0xb0, 0x42, 0x12, 0x62, + 0xf7, 0x8f, 0xbf, 0xa5, 0xaf, 0xf9, 0x58, 0x5e, + 0x62, 0x66, 0x79, 0xb1, 0x85, 0x47, 0xd9, 0x59 }, + .result = { 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0xe4, 0x28, 0xf3, 0xda, 0xc1, 0x78, 0x09, 0xf8, + 0x27, 0xa5, 0x22, 0xce, 0x32, 0x35, 0x50, 0x58, + 0xd0, 0x73, 0x69, 0x36, 0x4a, 0xa7, 0x89, 0x02, + 0xee, 0x10, 0x13, 0x9b, 0x9f, 0x9d, 0xd6, 0x53 }, + .result = { 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0xb3, 0xb5, 0x0e, 0x3e, 0xd3, 0xa4, 0x07, 0xb9, + 0x5d, 0xe9, 0x42, 0xef, 0x74, 0x57, 0x5b, 0x5a, + 0xb8, 0xa1, 0x0c, 0x09, 0xee, 0x10, 0x35, 0x44, + 0xd6, 0x0b, 0xdf, 0xed, 0x81, 0x38, 0xab, 0x2b }, + .result = { 0xf9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x21, 0x3f, 0xff, 0xe9, 0x3d, 0x5e, 0xa8, 0xcd, + 0x24, 0x2e, 0x46, 0x28, 0x44, 0x02, 0x99, 0x22, + 0xc4, 0x3c, 0x77, 0xc9, 0xe3, 0xe4, 0x2f, 0x56, + 0x2f, 0x48, 0x5d, 0x24, 0xc5, 0x01, 0xa2, 0x0b }, + .result = { 0xf3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x91, 0xb2, 0x32, 0xa1, 0x78, 0xb3, 0xcd, 0x53, + 0x09, 0x32, 0x44, 0x1e, 0x61, 0x39, 0x41, 0x8f, + 0x72, 0x17, 0x22, 0x92, 0xf1, 0xda, 0x4c, 0x18, + 0x34, 0xfc, 0x5e, 0xbf, 0xef, 0xb5, 0x1e, 0x3f }, + .result = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x04, 0x5c, 0x6e, 0x11, 0xc5, 0xd3, 0x32, 0x55, + 0x6c, 0x78, 0x22, 0xfe, 0x94, 0xeb, 0xf8, 0x9b, + 0x56, 0xa3, 0x87, 0x8d, 0xc2, 0x7c, 0xa0, 0x79, + 0x10, 0x30, 0x58, 0x84, 0x9f, 0xab, 0xcb, 0x4f }, + .result = { 0xe5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x1c, 0xa2, 0x19, 0x0b, 0x71, 0x16, 0x35, 0x39, + 0x06, 0x3c, 0x35, 0x77, 0x3b, 0xda, 0x0c, 0x9c, + 0x92, 0x8e, 0x91, 0x36, 0xf0, 0x62, 0x0a, 0xeb, + 0x09, 0x3f, 0x09, 0x91, 0x97, 0xb7, 0xf7, 0x4e }, + .result = { 0xe3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0xf7, 0x6e, 0x90, 0x10, 0xac, 0x33, 0xc5, 0x04, + 0x3b, 0x2d, 0x3b, 0x76, 0xa8, 0x42, 0x17, 0x10, + 0x00, 0xc4, 0x91, 0x62, 0x22, 0xe9, 0xe8, 0x58, + 0x97, 0xa0, 0xae, 0xc7, 0xf6, 0x35, 0x0b, 0x3c }, + .result = { 0xdd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0xbb, 0x72, 0x68, 0x8d, 0x8f, 0x8a, 0xa7, 0xa3, + 0x9c, 0xd6, 0x06, 0x0c, 0xd5, 0xc8, 0x09, 0x3c, + 0xde, 0xc6, 0xfe, 0x34, 0x19, 0x37, 0xc3, 0x88, + 0x6a, 0x99, 0x34, 0x6c, 0xd0, 0x7f, 0xaa, 0x55 }, + .result = { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x88, 0xfd, 0xde, 0xa1, 0x93, 0x39, 0x1c, 0x6a, + 0x59, 0x33, 0xef, 0x9b, 0x71, 0x90, 0x15, 0x49, + 0x44, 0x72, 0x05, 0xaa, 0xe9, 0xda, 0x92, 0x8a, + 0x6b, 0x91, 0xa3, 0x52, 0xba, 0x10, 0xf4, 0x1f }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x30, 0x3b, 0x39, 0x2f, 0x15, 0x31, 0x16, 0xca, + 0xd9, 0xcc, 0x68, 0x2a, 0x00, 0xcc, 0xc4, 0x4c, + 0x95, 0xff, 0x0d, 0x3b, 0xbe, 0x56, 0x8b, 0xeb, + 0x6c, 0x4e, 0x73, 0x9b, 0xaf, 0xdc, 0x2c, 0x68 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00 }, + .valid = true + }, + /* wycheproof - checking for overflow */ + { + .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, + .public = { 0xfd, 0x30, 0x0a, 0xeb, 0x40, 0xe1, 0xfa, 0x58, + 0x25, 0x18, 0x41, 0x2b, 0x49, 0xb2, 0x08, 0xa7, + 0x84, 0x2b, 0x1e, 0x1f, 0x05, 0x6a, 0x04, 0x01, + 0x78, 0xea, 0x41, 0x41, 0x53, 0x4f, 0x65, 0x2d }, + .result = { 0xb7, 0x34, 0x10, 0x5d, 0xc2, 0x57, 0x58, 0x5d, + 0x73, 0xb5, 0x66, 0xcc, 0xb7, 0x6f, 0x06, 0x27, + 0x95, 0xcc, 0xbe, 0xc8, 0x91, 0x28, 0xe5, 0x2b, + 0x02, 0xf3, 0xe5, 0x96, 0x39, 0xf1, 0x3c, 0x46 }, + .valid = true + }, + /* wycheproof - checking for overflow */ + { + .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, + .public = { 0xc8, 0xef, 0x79, 0xb5, 0x14, 0xd7, 0x68, 0x26, + 0x77, 0xbc, 0x79, 0x31, 0xe0, 0x6e, 0xe5, 0xc2, + 0x7c, 0x9b, 0x39, 0x2b, 0x4a, 0xe9, 0x48, 0x44, + 0x73, 0xf5, 0x54, 0xe6, 0x67, 0x8e, 0xcc, 0x2e }, + .result = { 0x64, 0x7a, 0x46, 0xb6, 0xfc, 0x3f, 0x40, 0xd6, + 0x21, 0x41, 0xee, 0x3c, 0xee, 0x70, 0x6b, 0x4d, + 0x7a, 0x92, 0x71, 0x59, 0x3a, 0x7b, 0x14, 0x3e, + 0x8e, 0x2e, 0x22, 0x79, 0x88, 0x3e, 0x45, 0x50 }, + .valid = true + }, + /* wycheproof - checking for overflow */ + { + .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, + .public = { 0x64, 0xae, 0xac, 0x25, 0x04, 0x14, 0x48, 0x61, + 0x53, 0x2b, 0x7b, 0xbc, 0xb6, 0xc8, 0x7d, 0x67, + 0xdd, 0x4c, 0x1f, 0x07, 0xeb, 0xc2, 0xe0, 0x6e, + 0xff, 0xb9, 0x5a, 0xec, 0xc6, 0x17, 0x0b, 0x2c }, + .result = { 0x4f, 0xf0, 0x3d, 0x5f, 0xb4, 0x3c, 0xd8, 0x65, + 0x7a, 0x3c, 0xf3, 0x7c, 0x13, 0x8c, 0xad, 0xce, + 0xcc, 0xe5, 0x09, 0xe4, 0xeb, 0xa0, 0x89, 0xd0, + 0xef, 0x40, 0xb4, 0xe4, 0xfb, 0x94, 0x61, 0x55 }, + .valid = true + }, + /* wycheproof - checking for overflow */ + { + .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, + .public = { 0xbf, 0x68, 0xe3, 0x5e, 0x9b, 0xdb, 0x7e, 0xee, + 0x1b, 0x50, 0x57, 0x02, 0x21, 0x86, 0x0f, 0x5d, + 0xcd, 0xad, 0x8a, 0xcb, 0xab, 0x03, 0x1b, 0x14, + 0x97, 0x4c, 0xc4, 0x90, 0x13, 0xc4, 0x98, 0x31 }, + .result = { 0x21, 0xce, 0xe5, 0x2e, 0xfd, 0xbc, 0x81, 0x2e, + 0x1d, 0x02, 0x1a, 0x4a, 0xf1, 0xe1, 0xd8, 0xbc, + 0x4d, 0xb3, 0xc4, 0x00, 0xe4, 0xd2, 0xa2, 0xc5, + 0x6a, 0x39, 0x26, 0xdb, 0x4d, 0x99, 0xc6, 0x5b }, + .valid = true + }, + /* wycheproof - checking for overflow */ + { + .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, + .public = { 0x53, 0x47, 0xc4, 0x91, 0x33, 0x1a, 0x64, 0xb4, + 0x3d, 0xdc, 0x68, 0x30, 0x34, 0xe6, 0x77, 0xf5, + 0x3d, 0xc3, 0x2b, 0x52, 0xa5, 0x2a, 0x57, 0x7c, + 0x15, 0xa8, 0x3b, 0xf2, 0x98, 0xe9, 0x9f, 0x19 }, + .result = { 0x18, 0xcb, 0x89, 0xe4, 0xe2, 0x0c, 0x0c, 0x2b, + 0xd3, 0x24, 0x30, 0x52, 0x45, 0x26, 0x6c, 0x93, + 0x27, 0x69, 0x0b, 0xbe, 0x79, 0xac, 0xb8, 0x8f, + 0x5b, 0x8f, 0xb3, 0xf7, 0x4e, 0xca, 0x3e, 0x52 }, + .valid = true + }, + /* wycheproof - private key == -1 (mod order) */ + { + .private = { 0xa0, 0x23, 0xcd, 0xd0, 0x83, 0xef, 0x5b, 0xb8, + 0x2f, 0x10, 0xd6, 0x2e, 0x59, 0xe1, 0x5a, 0x68, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50 }, + .public = { 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e, + 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57, + 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f, + 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 }, + .result = { 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e, + 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57, + 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f, + 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 }, + .valid = true + }, + /* wycheproof - private key == 1 (mod order) on twist */ + { + .private = { 0x58, 0x08, 0x3d, 0xd2, 0x61, 0xad, 0x91, 0xef, + 0xf9, 0x52, 0x32, 0x2e, 0xc8, 0x24, 0xc6, 0x82, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5f }, + .public = { 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f, + 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6, + 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64, + 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 }, + .result = { 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f, + 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6, + 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64, + 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 }, + .valid = true + } +}; + +static bool __init curve25519_selftest(void) +{ + bool success = true, ret, ret2; + size_t i = 0, j; + u8 in[CURVE25519_KEY_SIZE]; + u8 out[CURVE25519_KEY_SIZE], out2[CURVE25519_KEY_SIZE]; + + for (i = 0; i < ARRAY_SIZE(curve25519_test_vectors); ++i) { + memset(out, 0, CURVE25519_KEY_SIZE); + ret = curve25519(out, curve25519_test_vectors[i].private, + curve25519_test_vectors[i].public); + if (ret != curve25519_test_vectors[i].valid || + memcmp(out, curve25519_test_vectors[i].result, + CURVE25519_KEY_SIZE)) { + pr_err("curve25519 self-test %zu: FAIL\n", i + 1); + success = false; + } + } + + for (i = 0; i < 5; ++i) { + get_random_bytes(in, sizeof(in)); + ret = curve25519_generate_public(out, in); + ret2 = curve25519(out2, in, (u8[CURVE25519_KEY_SIZE]){ 9 }); + if (ret != ret2 || memcmp(out, out2, CURVE25519_KEY_SIZE)) { + pr_err("curve25519 basepoint self-test %zu: FAIL: input - 0x", + i + 1); + for (j = CURVE25519_KEY_SIZE; j-- > 0;) + printk(KERN_CONT "%02x", in[j]); + printk(KERN_CONT "\n"); + success = false; + } + } + + return success; +} diff --git a/net/wireguard/crypto/zinc/selftest/poly1305.c b/net/wireguard/crypto/zinc/selftest/poly1305.c new file mode 100644 index 000000000000..b4d7a9c2f6ec --- /dev/null +++ b/net/wireguard/crypto/zinc/selftest/poly1305.c @@ -0,0 +1,1107 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +struct poly1305_testvec { + const u8 *input, *output, *key; + size_t ilen; +}; + +/* RFC7539 */ +static const u8 input01[] __initconst = { + 0x43, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x67, 0x72, + 0x61, 0x70, 0x68, 0x69, 0x63, 0x20, 0x46, 0x6f, + 0x72, 0x75, 0x6d, 0x20, 0x52, 0x65, 0x73, 0x65, + 0x61, 0x72, 0x63, 0x68, 0x20, 0x47, 0x72, 0x6f, + 0x75, 0x70 +}; +static const u8 output01[] __initconst = { + 0xa8, 0x06, 0x1d, 0xc1, 0x30, 0x51, 0x36, 0xc6, + 0xc2, 0x2b, 0x8b, 0xaf, 0x0c, 0x01, 0x27, 0xa9 +}; +static const u8 key01[] __initconst = { + 0x85, 0xd6, 0xbe, 0x78, 0x57, 0x55, 0x6d, 0x33, + 0x7f, 0x44, 0x52, 0xfe, 0x42, 0xd5, 0x06, 0xa8, + 0x01, 0x03, 0x80, 0x8a, 0xfb, 0x0d, 0xb2, 0xfd, + 0x4a, 0xbf, 0xf6, 0xaf, 0x41, 0x49, 0xf5, 0x1b +}; + +/* "The Poly1305-AES message-authentication code" */ +static const u8 input02[] __initconst = { + 0xf3, 0xf6 +}; +static const u8 output02[] __initconst = { + 0xf4, 0xc6, 0x33, 0xc3, 0x04, 0x4f, 0xc1, 0x45, + 0xf8, 0x4f, 0x33, 0x5c, 0xb8, 0x19, 0x53, 0xde +}; +static const u8 key02[] __initconst = { + 0x85, 0x1f, 0xc4, 0x0c, 0x34, 0x67, 0xac, 0x0b, + 0xe0, 0x5c, 0xc2, 0x04, 0x04, 0xf3, 0xf7, 0x00, + 0x58, 0x0b, 0x3b, 0x0f, 0x94, 0x47, 0xbb, 0x1e, + 0x69, 0xd0, 0x95, 0xb5, 0x92, 0x8b, 0x6d, 0xbc +}; + +static const u8 input03[] __initconst = { }; +static const u8 output03[] __initconst = { + 0xdd, 0x3f, 0xab, 0x22, 0x51, 0xf1, 0x1a, 0xc7, + 0x59, 0xf0, 0x88, 0x71, 0x29, 0xcc, 0x2e, 0xe7 +}; +static const u8 key03[] __initconst = { + 0xa0, 0xf3, 0x08, 0x00, 0x00, 0xf4, 0x64, 0x00, + 0xd0, 0xc7, 0xe9, 0x07, 0x6c, 0x83, 0x44, 0x03, + 0xdd, 0x3f, 0xab, 0x22, 0x51, 0xf1, 0x1a, 0xc7, + 0x59, 0xf0, 0x88, 0x71, 0x29, 0xcc, 0x2e, 0xe7 +}; + +static const u8 input04[] __initconst = { + 0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8, + 0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24, + 0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb, + 0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36 +}; +static const u8 output04[] __initconst = { + 0x0e, 0xe1, 0xc1, 0x6b, 0xb7, 0x3f, 0x0f, 0x4f, + 0xd1, 0x98, 0x81, 0x75, 0x3c, 0x01, 0xcd, 0xbe +}; +static const u8 key04[] __initconst = { + 0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09, + 0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08, + 0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88, + 0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef +}; + +static const u8 input05[] __initconst = { + 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34, + 0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1, + 0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81, + 0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0, + 0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2, + 0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67, + 0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61, + 0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9 +}; +static const u8 output05[] __initconst = { + 0x51, 0x54, 0xad, 0x0d, 0x2c, 0xb2, 0x6e, 0x01, + 0x27, 0x4f, 0xc5, 0x11, 0x48, 0x49, 0x1f, 0x1b +}; +static const u8 key05[] __initconst = { + 0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c, + 0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07, + 0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1, + 0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57 +}; + +/* self-generated vectors exercise "significant" lengths, such that they + * are handled by different code paths */ +static const u8 input06[] __initconst = { + 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34, + 0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1, + 0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81, + 0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0, + 0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2, + 0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67, + 0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61, + 0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf +}; +static const u8 output06[] __initconst = { + 0x81, 0x20, 0x59, 0xa5, 0xda, 0x19, 0x86, 0x37, + 0xca, 0xc7, 0xc4, 0xa6, 0x31, 0xbe, 0xe4, 0x66 +}; +static const u8 key06[] __initconst = { + 0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c, + 0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07, + 0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1, + 0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57 +}; + +static const u8 input07[] __initconst = { + 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34, + 0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1, + 0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81, + 0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0, + 0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2, + 0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67 +}; +static const u8 output07[] __initconst = { + 0x5b, 0x88, 0xd7, 0xf6, 0x22, 0x8b, 0x11, 0xe2, + 0xe2, 0x85, 0x79, 0xa5, 0xc0, 0xc1, 0xf7, 0x61 +}; +static const u8 key07[] __initconst = { + 0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c, + 0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07, + 0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1, + 0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57 +}; + +static const u8 input08[] __initconst = { + 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34, + 0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1, + 0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81, + 0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0, + 0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2, + 0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67, + 0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61, + 0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf, + 0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8, + 0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24, + 0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb, + 0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36 +}; +static const u8 output08[] __initconst = { + 0xbb, 0xb6, 0x13, 0xb2, 0xb6, 0xd7, 0x53, 0xba, + 0x07, 0x39, 0x5b, 0x91, 0x6a, 0xae, 0xce, 0x15 +}; +static const u8 key08[] __initconst = { + 0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c, + 0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07, + 0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1, + 0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57 +}; + +static const u8 input09[] __initconst = { + 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34, + 0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1, + 0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81, + 0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0, + 0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2, + 0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67, + 0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61, + 0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf, + 0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09, + 0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08, + 0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88, + 0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef, + 0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8, + 0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24 +}; +static const u8 output09[] __initconst = { + 0xc7, 0x94, 0xd7, 0x05, 0x7d, 0x17, 0x78, 0xc4, + 0xbb, 0xee, 0x0a, 0x39, 0xb3, 0xd9, 0x73, 0x42 +}; +static const u8 key09[] __initconst = { + 0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c, + 0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07, + 0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1, + 0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57 +}; + +static const u8 input10[] __initconst = { + 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34, + 0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1, + 0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81, + 0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0, + 0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2, + 0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67, + 0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61, + 0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf, + 0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09, + 0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08, + 0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88, + 0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef, + 0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8, + 0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24, + 0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb, + 0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36 +}; +static const u8 output10[] __initconst = { + 0xff, 0xbc, 0xb9, 0xb3, 0x71, 0x42, 0x31, 0x52, + 0xd7, 0xfc, 0xa5, 0xad, 0x04, 0x2f, 0xba, 0xa9 +}; +static const u8 key10[] __initconst = { + 0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c, + 0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07, + 0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1, + 0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57 +}; + +static const u8 input11[] __initconst = { + 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34, + 0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1, + 0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81, + 0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0, + 0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2, + 0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67, + 0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61, + 0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf, + 0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09, + 0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08, + 0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88, + 0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef, + 0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8, + 0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24, + 0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb, + 0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36, + 0x81, 0x20, 0x59, 0xa5, 0xda, 0x19, 0x86, 0x37, + 0xca, 0xc7, 0xc4, 0xa6, 0x31, 0xbe, 0xe4, 0x66 +}; +static const u8 output11[] __initconst = { + 0x06, 0x9e, 0xd6, 0xb8, 0xef, 0x0f, 0x20, 0x7b, + 0x3e, 0x24, 0x3b, 0xb1, 0x01, 0x9f, 0xe6, 0x32 +}; +static const u8 key11[] __initconst = { + 0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c, + 0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07, + 0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1, + 0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57 +}; + +static const u8 input12[] __initconst = { + 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34, + 0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1, + 0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81, + 0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0, + 0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2, + 0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67, + 0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61, + 0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf, + 0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09, + 0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08, + 0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88, + 0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef, + 0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8, + 0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24, + 0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb, + 0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36, + 0x81, 0x20, 0x59, 0xa5, 0xda, 0x19, 0x86, 0x37, + 0xca, 0xc7, 0xc4, 0xa6, 0x31, 0xbe, 0xe4, 0x66, + 0x5b, 0x88, 0xd7, 0xf6, 0x22, 0x8b, 0x11, 0xe2, + 0xe2, 0x85, 0x79, 0xa5, 0xc0, 0xc1, 0xf7, 0x61 +}; +static const u8 output12[] __initconst = { + 0xcc, 0xa3, 0x39, 0xd9, 0xa4, 0x5f, 0xa2, 0x36, + 0x8c, 0x2c, 0x68, 0xb3, 0xa4, 0x17, 0x91, 0x33 +}; +static const u8 key12[] __initconst = { + 0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c, + 0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07, + 0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1, + 0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57 +}; + +static const u8 input13[] __initconst = { + 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34, + 0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1, + 0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81, + 0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0, + 0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2, + 0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67, + 0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61, + 0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf, + 0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09, + 0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08, + 0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88, + 0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef, + 0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8, + 0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24, + 0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb, + 0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36, + 0x81, 0x20, 0x59, 0xa5, 0xda, 0x19, 0x86, 0x37, + 0xca, 0xc7, 0xc4, 0xa6, 0x31, 0xbe, 0xe4, 0x66, + 0x5b, 0x88, 0xd7, 0xf6, 0x22, 0x8b, 0x11, 0xe2, + 0xe2, 0x85, 0x79, 0xa5, 0xc0, 0xc1, 0xf7, 0x61, + 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34, + 0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1, + 0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81, + 0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0, + 0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2, + 0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67, + 0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61, + 0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf, + 0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09, + 0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08, + 0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88, + 0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef, + 0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8, + 0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24, + 0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb, + 0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36 +}; +static const u8 output13[] __initconst = { + 0x53, 0xf6, 0xe8, 0x28, 0xa2, 0xf0, 0xfe, 0x0e, + 0xe8, 0x15, 0xbf, 0x0b, 0xd5, 0x84, 0x1a, 0x34 +}; +static const u8 key13[] __initconst = { + 0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c, + 0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07, + 0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1, + 0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57 +}; + +static const u8 input14[] __initconst = { + 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34, + 0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1, + 0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81, + 0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0, + 0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2, + 0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67, + 0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61, + 0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf, + 0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09, + 0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08, + 0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88, + 0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef, + 0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8, + 0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24, + 0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb, + 0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36, + 0x81, 0x20, 0x59, 0xa5, 0xda, 0x19, 0x86, 0x37, + 0xca, 0xc7, 0xc4, 0xa6, 0x31, 0xbe, 0xe4, 0x66, + 0x5b, 0x88, 0xd7, 0xf6, 0x22, 0x8b, 0x11, 0xe2, + 0xe2, 0x85, 0x79, 0xa5, 0xc0, 0xc1, 0xf7, 0x61, + 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34, + 0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1, + 0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81, + 0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0, + 0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2, + 0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67, + 0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61, + 0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf, + 0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09, + 0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08, + 0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88, + 0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef, + 0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8, + 0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24, + 0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb, + 0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36, + 0x81, 0x20, 0x59, 0xa5, 0xda, 0x19, 0x86, 0x37, + 0xca, 0xc7, 0xc4, 0xa6, 0x31, 0xbe, 0xe4, 0x66, + 0x5b, 0x88, 0xd7, 0xf6, 0x22, 0x8b, 0x11, 0xe2, + 0xe2, 0x85, 0x79, 0xa5, 0xc0, 0xc1, 0xf7, 0x61 +}; +static const u8 output14[] __initconst = { + 0xb8, 0x46, 0xd4, 0x4e, 0x9b, 0xbd, 0x53, 0xce, + 0xdf, 0xfb, 0xfb, 0xb6, 0xb7, 0xfa, 0x49, 0x33 +}; +static const u8 key14[] __initconst = { + 0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c, + 0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07, + 0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1, + 0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57 +}; + +/* 4th power of the key spills to 131th bit in SIMD key setup */ +static const u8 input15[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const u8 output15[] __initconst = { + 0x07, 0x14, 0x5a, 0x4c, 0x02, 0xfe, 0x5f, 0xa3, + 0x20, 0x36, 0xde, 0x68, 0xfa, 0xbe, 0x90, 0x66 +}; +static const u8 key15[] __initconst = { + 0xad, 0x62, 0x81, 0x07, 0xe8, 0x35, 0x1d, 0x0f, + 0x2c, 0x23, 0x1a, 0x05, 0xdc, 0x4a, 0x41, 0x06, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* OpenSSL's poly1305_ieee754.c failed this in final stage */ +static const u8 input16[] __initconst = { + 0x84, 0x23, 0x64, 0xe1, 0x56, 0x33, 0x6c, 0x09, + 0x98, 0xb9, 0x33, 0xa6, 0x23, 0x77, 0x26, 0x18, + 0x0d, 0x9e, 0x3f, 0xdc, 0xbd, 0xe4, 0xcd, 0x5d, + 0x17, 0x08, 0x0f, 0xc3, 0xbe, 0xb4, 0x96, 0x14, + 0xd7, 0x12, 0x2c, 0x03, 0x74, 0x63, 0xff, 0x10, + 0x4d, 0x73, 0xf1, 0x9c, 0x12, 0x70, 0x46, 0x28, + 0xd4, 0x17, 0xc4, 0xc5, 0x4a, 0x3f, 0xe3, 0x0d, + 0x3c, 0x3d, 0x77, 0x14, 0x38, 0x2d, 0x43, 0xb0, + 0x38, 0x2a, 0x50, 0xa5, 0xde, 0xe5, 0x4b, 0xe8, + 0x44, 0xb0, 0x76, 0xe8, 0xdf, 0x88, 0x20, 0x1a, + 0x1c, 0xd4, 0x3b, 0x90, 0xeb, 0x21, 0x64, 0x3f, + 0xa9, 0x6f, 0x39, 0xb5, 0x18, 0xaa, 0x83, 0x40, + 0xc9, 0x42, 0xff, 0x3c, 0x31, 0xba, 0xf7, 0xc9, + 0xbd, 0xbf, 0x0f, 0x31, 0xae, 0x3f, 0xa0, 0x96, + 0xbf, 0x8c, 0x63, 0x03, 0x06, 0x09, 0x82, 0x9f, + 0xe7, 0x2e, 0x17, 0x98, 0x24, 0x89, 0x0b, 0xc8, + 0xe0, 0x8c, 0x31, 0x5c, 0x1c, 0xce, 0x2a, 0x83, + 0x14, 0x4d, 0xbb, 0xff, 0x09, 0xf7, 0x4e, 0x3e, + 0xfc, 0x77, 0x0b, 0x54, 0xd0, 0x98, 0x4a, 0x8f, + 0x19, 0xb1, 0x47, 0x19, 0xe6, 0x36, 0x35, 0x64, + 0x1d, 0x6b, 0x1e, 0xed, 0xf6, 0x3e, 0xfb, 0xf0, + 0x80, 0xe1, 0x78, 0x3d, 0x32, 0x44, 0x54, 0x12, + 0x11, 0x4c, 0x20, 0xde, 0x0b, 0x83, 0x7a, 0x0d, + 0xfa, 0x33, 0xd6, 0xb8, 0x28, 0x25, 0xff, 0xf4, + 0x4c, 0x9a, 0x70, 0xea, 0x54, 0xce, 0x47, 0xf0, + 0x7d, 0xf6, 0x98, 0xe6, 0xb0, 0x33, 0x23, 0xb5, + 0x30, 0x79, 0x36, 0x4a, 0x5f, 0xc3, 0xe9, 0xdd, + 0x03, 0x43, 0x92, 0xbd, 0xde, 0x86, 0xdc, 0xcd, + 0xda, 0x94, 0x32, 0x1c, 0x5e, 0x44, 0x06, 0x04, + 0x89, 0x33, 0x6c, 0xb6, 0x5b, 0xf3, 0x98, 0x9c, + 0x36, 0xf7, 0x28, 0x2c, 0x2f, 0x5d, 0x2b, 0x88, + 0x2c, 0x17, 0x1e, 0x74 +}; +static const u8 output16[] __initconst = { + 0xf2, 0x48, 0x31, 0x2e, 0x57, 0x8d, 0x9d, 0x58, + 0xf8, 0xb7, 0xbb, 0x4d, 0x19, 0x10, 0x54, 0x31 +}; +static const u8 key16[] __initconst = { + 0x95, 0xd5, 0xc0, 0x05, 0x50, 0x3e, 0x51, 0x0d, + 0x8c, 0xd0, 0xaa, 0x07, 0x2c, 0x4a, 0x4d, 0x06, + 0x6e, 0xab, 0xc5, 0x2d, 0x11, 0x65, 0x3d, 0xf4, + 0x7f, 0xbf, 0x63, 0xab, 0x19, 0x8b, 0xcc, 0x26 +}; + +/* AVX2 in OpenSSL's poly1305-x86.pl failed this with 176+32 split */ +static const u8 input17[] __initconst = { + 0x24, 0x8a, 0xc3, 0x10, 0x85, 0xb6, 0xc2, 0xad, + 0xaa, 0xa3, 0x82, 0x59, 0xa0, 0xd7, 0x19, 0x2c, + 0x5c, 0x35, 0xd1, 0xbb, 0x4e, 0xf3, 0x9a, 0xd9, + 0x4c, 0x38, 0xd1, 0xc8, 0x24, 0x79, 0xe2, 0xdd, + 0x21, 0x59, 0xa0, 0x77, 0x02, 0x4b, 0x05, 0x89, + 0xbc, 0x8a, 0x20, 0x10, 0x1b, 0x50, 0x6f, 0x0a, + 0x1a, 0xd0, 0xbb, 0xab, 0x76, 0xe8, 0x3a, 0x83, + 0xf1, 0xb9, 0x4b, 0xe6, 0xbe, 0xae, 0x74, 0xe8, + 0x74, 0xca, 0xb6, 0x92, 0xc5, 0x96, 0x3a, 0x75, + 0x43, 0x6b, 0x77, 0x61, 0x21, 0xec, 0x9f, 0x62, + 0x39, 0x9a, 0x3e, 0x66, 0xb2, 0xd2, 0x27, 0x07, + 0xda, 0xe8, 0x19, 0x33, 0xb6, 0x27, 0x7f, 0x3c, + 0x85, 0x16, 0xbc, 0xbe, 0x26, 0xdb, 0xbd, 0x86, + 0xf3, 0x73, 0x10, 0x3d, 0x7c, 0xf4, 0xca, 0xd1, + 0x88, 0x8c, 0x95, 0x21, 0x18, 0xfb, 0xfb, 0xd0, + 0xd7, 0xb4, 0xbe, 0xdc, 0x4a, 0xe4, 0x93, 0x6a, + 0xff, 0x91, 0x15, 0x7e, 0x7a, 0xa4, 0x7c, 0x54, + 0x44, 0x2e, 0xa7, 0x8d, 0x6a, 0xc2, 0x51, 0xd3, + 0x24, 0xa0, 0xfb, 0xe4, 0x9d, 0x89, 0xcc, 0x35, + 0x21, 0xb6, 0x6d, 0x16, 0xe9, 0xc6, 0x6a, 0x37, + 0x09, 0x89, 0x4e, 0x4e, 0xb0, 0xa4, 0xee, 0xdc, + 0x4a, 0xe1, 0x94, 0x68, 0xe6, 0x6b, 0x81, 0xf2, + 0x71, 0x35, 0x1b, 0x1d, 0x92, 0x1e, 0xa5, 0x51, + 0x04, 0x7a, 0xbc, 0xc6, 0xb8, 0x7a, 0x90, 0x1f, + 0xde, 0x7d, 0xb7, 0x9f, 0xa1, 0x81, 0x8c, 0x11, + 0x33, 0x6d, 0xbc, 0x07, 0x24, 0x4a, 0x40, 0xeb +}; +static const u8 output17[] __initconst = { + 0xbc, 0x93, 0x9b, 0xc5, 0x28, 0x14, 0x80, 0xfa, + 0x99, 0xc6, 0xd6, 0x8c, 0x25, 0x8e, 0xc4, 0x2f +}; +static const u8 key17[] __initconst = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* test vectors from Google */ +static const u8 input18[] __initconst = { }; +static const u8 output18[] __initconst = { + 0x47, 0x10, 0x13, 0x0e, 0x9f, 0x6f, 0xea, 0x8d, + 0x72, 0x29, 0x38, 0x50, 0xa6, 0x67, 0xd8, 0x6c +}; +static const u8 key18[] __initconst = { + 0xc8, 0xaf, 0xaa, 0xc3, 0x31, 0xee, 0x37, 0x2c, + 0xd6, 0x08, 0x2d, 0xe1, 0x34, 0x94, 0x3b, 0x17, + 0x47, 0x10, 0x13, 0x0e, 0x9f, 0x6f, 0xea, 0x8d, + 0x72, 0x29, 0x38, 0x50, 0xa6, 0x67, 0xd8, 0x6c +}; + +static const u8 input19[] __initconst = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, 0x21 +}; +static const u8 output19[] __initconst = { + 0xa6, 0xf7, 0x45, 0x00, 0x8f, 0x81, 0xc9, 0x16, + 0xa2, 0x0d, 0xcc, 0x74, 0xee, 0xf2, 0xb2, 0xf0 +}; +static const u8 key19[] __initconst = { + 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, + 0x33, 0x32, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, + 0x6b, 0x65, 0x79, 0x20, 0x66, 0x6f, 0x72, 0x20, + 0x50, 0x6f, 0x6c, 0x79, 0x31, 0x33, 0x30, 0x35 +}; + +static const u8 input20[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; +static const u8 output20[] __initconst = { + 0x49, 0xec, 0x78, 0x09, 0x0e, 0x48, 0x1e, 0xc6, + 0xc2, 0x6b, 0x33, 0xb9, 0x1c, 0xcc, 0x03, 0x07 +}; +static const u8 key20[] __initconst = { + 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, + 0x33, 0x32, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, + 0x6b, 0x65, 0x79, 0x20, 0x66, 0x6f, 0x72, 0x20, + 0x50, 0x6f, 0x6c, 0x79, 0x31, 0x33, 0x30, 0x35 +}; + +static const u8 input21[] __initconst = { + 0x89, 0xda, 0xb8, 0x0b, 0x77, 0x17, 0xc1, 0xdb, + 0x5d, 0xb4, 0x37, 0x86, 0x0a, 0x3f, 0x70, 0x21, + 0x8e, 0x93, 0xe1, 0xb8, 0xf4, 0x61, 0xfb, 0x67, + 0x7f, 0x16, 0xf3, 0x5f, 0x6f, 0x87, 0xe2, 0xa9, + 0x1c, 0x99, 0xbc, 0x3a, 0x47, 0xac, 0xe4, 0x76, + 0x40, 0xcc, 0x95, 0xc3, 0x45, 0xbe, 0x5e, 0xcc, + 0xa5, 0xa3, 0x52, 0x3c, 0x35, 0xcc, 0x01, 0x89, + 0x3a, 0xf0, 0xb6, 0x4a, 0x62, 0x03, 0x34, 0x27, + 0x03, 0x72, 0xec, 0x12, 0x48, 0x2d, 0x1b, 0x1e, + 0x36, 0x35, 0x61, 0x69, 0x8a, 0x57, 0x8b, 0x35, + 0x98, 0x03, 0x49, 0x5b, 0xb4, 0xe2, 0xef, 0x19, + 0x30, 0xb1, 0x7a, 0x51, 0x90, 0xb5, 0x80, 0xf1, + 0x41, 0x30, 0x0d, 0xf3, 0x0a, 0xdb, 0xec, 0xa2, + 0x8f, 0x64, 0x27, 0xa8, 0xbc, 0x1a, 0x99, 0x9f, + 0xd5, 0x1c, 0x55, 0x4a, 0x01, 0x7d, 0x09, 0x5d, + 0x8c, 0x3e, 0x31, 0x27, 0xda, 0xf9, 0xf5, 0x95 +}; +static const u8 output21[] __initconst = { + 0xc8, 0x5d, 0x15, 0xed, 0x44, 0xc3, 0x78, 0xd6, + 0xb0, 0x0e, 0x23, 0x06, 0x4c, 0x7b, 0xcd, 0x51 +}; +static const u8 key21[] __initconst = { + 0x2d, 0x77, 0x3b, 0xe3, 0x7a, 0xdb, 0x1e, 0x4d, + 0x68, 0x3b, 0xf0, 0x07, 0x5e, 0x79, 0xc4, 0xee, + 0x03, 0x79, 0x18, 0x53, 0x5a, 0x7f, 0x99, 0xcc, + 0xb7, 0x04, 0x0f, 0xb5, 0xf5, 0xf4, 0x3a, 0xea +}; + +static const u8 input22[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b, + 0x17, 0x03, 0x03, 0x02, 0x00, 0x00, 0x00, 0x00, + 0x06, 0xdb, 0x1f, 0x1f, 0x36, 0x8d, 0x69, 0x6a, + 0x81, 0x0a, 0x34, 0x9c, 0x0c, 0x71, 0x4c, 0x9a, + 0x5e, 0x78, 0x50, 0xc2, 0x40, 0x7d, 0x72, 0x1a, + 0xcd, 0xed, 0x95, 0xe0, 0x18, 0xd7, 0xa8, 0x52, + 0x66, 0xa6, 0xe1, 0x28, 0x9c, 0xdb, 0x4a, 0xeb, + 0x18, 0xda, 0x5a, 0xc8, 0xa2, 0xb0, 0x02, 0x6d, + 0x24, 0xa5, 0x9a, 0xd4, 0x85, 0x22, 0x7f, 0x3e, + 0xae, 0xdb, 0xb2, 0xe7, 0xe3, 0x5e, 0x1c, 0x66, + 0xcd, 0x60, 0xf9, 0xab, 0xf7, 0x16, 0xdc, 0xc9, + 0xac, 0x42, 0x68, 0x2d, 0xd7, 0xda, 0xb2, 0x87, + 0xa7, 0x02, 0x4c, 0x4e, 0xef, 0xc3, 0x21, 0xcc, + 0x05, 0x74, 0xe1, 0x67, 0x93, 0xe3, 0x7c, 0xec, + 0x03, 0xc5, 0xbd, 0xa4, 0x2b, 0x54, 0xc1, 0x14, + 0xa8, 0x0b, 0x57, 0xaf, 0x26, 0x41, 0x6c, 0x7b, + 0xe7, 0x42, 0x00, 0x5e, 0x20, 0x85, 0x5c, 0x73, + 0xe2, 0x1d, 0xc8, 0xe2, 0xed, 0xc9, 0xd4, 0x35, + 0xcb, 0x6f, 0x60, 0x59, 0x28, 0x00, 0x11, 0xc2, + 0x70, 0xb7, 0x15, 0x70, 0x05, 0x1c, 0x1c, 0x9b, + 0x30, 0x52, 0x12, 0x66, 0x20, 0xbc, 0x1e, 0x27, + 0x30, 0xfa, 0x06, 0x6c, 0x7a, 0x50, 0x9d, 0x53, + 0xc6, 0x0e, 0x5a, 0xe1, 0xb4, 0x0a, 0xa6, 0xe3, + 0x9e, 0x49, 0x66, 0x92, 0x28, 0xc9, 0x0e, 0xec, + 0xb4, 0xa5, 0x0d, 0xb3, 0x2a, 0x50, 0xbc, 0x49, + 0xe9, 0x0b, 0x4f, 0x4b, 0x35, 0x9a, 0x1d, 0xfd, + 0x11, 0x74, 0x9c, 0xd3, 0x86, 0x7f, 0xcf, 0x2f, + 0xb7, 0xbb, 0x6c, 0xd4, 0x73, 0x8f, 0x6a, 0x4a, + 0xd6, 0xf7, 0xca, 0x50, 0x58, 0xf7, 0x61, 0x88, + 0x45, 0xaf, 0x9f, 0x02, 0x0f, 0x6c, 0x3b, 0x96, + 0x7b, 0x8f, 0x4c, 0xd4, 0xa9, 0x1e, 0x28, 0x13, + 0xb5, 0x07, 0xae, 0x66, 0xf2, 0xd3, 0x5c, 0x18, + 0x28, 0x4f, 0x72, 0x92, 0x18, 0x60, 0x62, 0xe1, + 0x0f, 0xd5, 0x51, 0x0d, 0x18, 0x77, 0x53, 0x51, + 0xef, 0x33, 0x4e, 0x76, 0x34, 0xab, 0x47, 0x43, + 0xf5, 0xb6, 0x8f, 0x49, 0xad, 0xca, 0xb3, 0x84, + 0xd3, 0xfd, 0x75, 0xf7, 0x39, 0x0f, 0x40, 0x06, + 0xef, 0x2a, 0x29, 0x5c, 0x8c, 0x7a, 0x07, 0x6a, + 0xd5, 0x45, 0x46, 0xcd, 0x25, 0xd2, 0x10, 0x7f, + 0xbe, 0x14, 0x36, 0xc8, 0x40, 0x92, 0x4a, 0xae, + 0xbe, 0x5b, 0x37, 0x08, 0x93, 0xcd, 0x63, 0xd1, + 0x32, 0x5b, 0x86, 0x16, 0xfc, 0x48, 0x10, 0x88, + 0x6b, 0xc1, 0x52, 0xc5, 0x32, 0x21, 0xb6, 0xdf, + 0x37, 0x31, 0x19, 0x39, 0x32, 0x55, 0xee, 0x72, + 0xbc, 0xaa, 0x88, 0x01, 0x74, 0xf1, 0x71, 0x7f, + 0x91, 0x84, 0xfa, 0x91, 0x64, 0x6f, 0x17, 0xa2, + 0x4a, 0xc5, 0x5d, 0x16, 0xbf, 0xdd, 0xca, 0x95, + 0x81, 0xa9, 0x2e, 0xda, 0x47, 0x92, 0x01, 0xf0, + 0xed, 0xbf, 0x63, 0x36, 0x00, 0xd6, 0x06, 0x6d, + 0x1a, 0xb3, 0x6d, 0x5d, 0x24, 0x15, 0xd7, 0x13, + 0x51, 0xbb, 0xcd, 0x60, 0x8a, 0x25, 0x10, 0x8d, + 0x25, 0x64, 0x19, 0x92, 0xc1, 0xf2, 0x6c, 0x53, + 0x1c, 0xf9, 0xf9, 0x02, 0x03, 0xbc, 0x4c, 0xc1, + 0x9f, 0x59, 0x27, 0xd8, 0x34, 0xb0, 0xa4, 0x71, + 0x16, 0xd3, 0x88, 0x4b, 0xbb, 0x16, 0x4b, 0x8e, + 0xc8, 0x83, 0xd1, 0xac, 0x83, 0x2e, 0x56, 0xb3, + 0x91, 0x8a, 0x98, 0x60, 0x1a, 0x08, 0xd1, 0x71, + 0x88, 0x15, 0x41, 0xd5, 0x94, 0xdb, 0x39, 0x9c, + 0x6a, 0xe6, 0x15, 0x12, 0x21, 0x74, 0x5a, 0xec, + 0x81, 0x4c, 0x45, 0xb0, 0xb0, 0x5b, 0x56, 0x54, + 0x36, 0xfd, 0x6f, 0x13, 0x7a, 0xa1, 0x0a, 0x0c, + 0x0b, 0x64, 0x37, 0x61, 0xdb, 0xd6, 0xf9, 0xa9, + 0xdc, 0xb9, 0x9b, 0x1a, 0x6e, 0x69, 0x08, 0x54, + 0xce, 0x07, 0x69, 0xcd, 0xe3, 0x97, 0x61, 0xd8, + 0x2f, 0xcd, 0xec, 0x15, 0xf0, 0xd9, 0x2d, 0x7d, + 0x8e, 0x94, 0xad, 0xe8, 0xeb, 0x83, 0xfb, 0xe0 +}; +static const u8 output22[] __initconst = { + 0x26, 0x37, 0x40, 0x8f, 0xe1, 0x30, 0x86, 0xea, + 0x73, 0xf9, 0x71, 0xe3, 0x42, 0x5e, 0x28, 0x20 +}; +static const u8 key22[] __initconst = { + 0x99, 0xe5, 0x82, 0x2d, 0xd4, 0x17, 0x3c, 0x99, + 0x5e, 0x3d, 0xae, 0x0d, 0xde, 0xfb, 0x97, 0x74, + 0x3f, 0xde, 0x3b, 0x08, 0x01, 0x34, 0xb3, 0x9f, + 0x76, 0xe9, 0xbf, 0x8d, 0x0e, 0x88, 0xd5, 0x46 +}; + +/* test vectors from Hanno Böck */ +static const u8 input23[] __initconst = { + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0x80, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xc5, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xe3, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xac, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xe6, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x00, 0x00, 0x00, + 0xaf, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xff, 0xff, 0xff, 0xf5, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xff, 0xff, 0xff, 0xe7, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x71, 0x92, 0x05, 0xa8, 0x52, 0x1d, + 0xfc +}; +static const u8 output23[] __initconst = { + 0x85, 0x59, 0xb8, 0x76, 0xec, 0xee, 0xd6, 0x6e, + 0xb3, 0x77, 0x98, 0xc0, 0x45, 0x7b, 0xaf, 0xf9 +}; +static const u8 key23[] __initconst = { + 0x7f, 0x1b, 0x02, 0x64, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc +}; + +static const u8 input24[] __initconst = { + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x80, 0x02, 0x64 +}; +static const u8 output24[] __initconst = { + 0x00, 0xbd, 0x12, 0x58, 0x97, 0x8e, 0x20, 0x54, + 0x44, 0xc9, 0xaa, 0xaa, 0x82, 0x00, 0x6f, 0xed +}; +static const u8 key24[] __initconst = { + 0xe0, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa +}; + +static const u8 input25[] __initconst = { + 0x02, 0xfc +}; +static const u8 output25[] __initconst = { + 0x06, 0x12, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c +}; +static const u8 key25[] __initconst = { + 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c +}; + +static const u8 input26[] __initconst = { + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7a, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x5c, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x6e, 0x7b, 0x00, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7a, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x5c, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, + 0x7b, 0x6e, 0x7b, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x00, 0xb3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xf2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x20, 0x00, 0xef, 0xff, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, + 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x64, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x00, + 0xb3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf2, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x20, 0x00, 0xef, 0xff, 0x00, 0x09, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x7a, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x09, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc +}; +static const u8 output26[] __initconst = { + 0x33, 0x20, 0x5b, 0xbf, 0x9e, 0x9f, 0x8f, 0x72, + 0x12, 0xab, 0x9e, 0x2a, 0xb9, 0xb7, 0xe4, 0xa5 +}; +static const u8 key26[] __initconst = { + 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7b, 0x7b +}; + +static const u8 input27[] __initconst = { + 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, + 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, + 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, + 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, + 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, + 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, + 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, + 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, + 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, + 0x77, 0x77, 0x77, 0x77, 0xff, 0xff, 0xff, 0xe9, + 0xe9, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, + 0xac, 0xac, 0xac, 0xac, 0x00, 0x00, 0xac, 0xac, + 0xec, 0x01, 0x00, 0xac, 0xac, 0xac, 0x2c, 0xac, + 0xa2, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, + 0xac, 0xac, 0xac, 0xac, 0x64, 0xf2 +}; +static const u8 output27[] __initconst = { + 0x02, 0xee, 0x7c, 0x8c, 0x54, 0x6d, 0xde, 0xb1, + 0xa4, 0x67, 0xe4, 0xc3, 0x98, 0x11, 0x58, 0xb9 +}; +static const u8 key27[] __initconst = { + 0x00, 0x00, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x7f, + 0x01, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xcf, 0x77, 0x77, 0x77, 0x77, 0x77, + 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77 +}; + +/* nacl */ +static const u8 input28[] __initconst = { + 0x8e, 0x99, 0x3b, 0x9f, 0x48, 0x68, 0x12, 0x73, + 0xc2, 0x96, 0x50, 0xba, 0x32, 0xfc, 0x76, 0xce, + 0x48, 0x33, 0x2e, 0xa7, 0x16, 0x4d, 0x96, 0xa4, + 0x47, 0x6f, 0xb8, 0xc5, 0x31, 0xa1, 0x18, 0x6a, + 0xc0, 0xdf, 0xc1, 0x7c, 0x98, 0xdc, 0xe8, 0x7b, + 0x4d, 0xa7, 0xf0, 0x11, 0xec, 0x48, 0xc9, 0x72, + 0x71, 0xd2, 0xc2, 0x0f, 0x9b, 0x92, 0x8f, 0xe2, + 0x27, 0x0d, 0x6f, 0xb8, 0x63, 0xd5, 0x17, 0x38, + 0xb4, 0x8e, 0xee, 0xe3, 0x14, 0xa7, 0xcc, 0x8a, + 0xb9, 0x32, 0x16, 0x45, 0x48, 0xe5, 0x26, 0xae, + 0x90, 0x22, 0x43, 0x68, 0x51, 0x7a, 0xcf, 0xea, + 0xbd, 0x6b, 0xb3, 0x73, 0x2b, 0xc0, 0xe9, 0xda, + 0x99, 0x83, 0x2b, 0x61, 0xca, 0x01, 0xb6, 0xde, + 0x56, 0x24, 0x4a, 0x9e, 0x88, 0xd5, 0xf9, 0xb3, + 0x79, 0x73, 0xf6, 0x22, 0xa4, 0x3d, 0x14, 0xa6, + 0x59, 0x9b, 0x1f, 0x65, 0x4c, 0xb4, 0x5a, 0x74, + 0xe3, 0x55, 0xa5 +}; +static const u8 output28[] __initconst = { + 0xf3, 0xff, 0xc7, 0x70, 0x3f, 0x94, 0x00, 0xe5, + 0x2a, 0x7d, 0xfb, 0x4b, 0x3d, 0x33, 0x05, 0xd9 +}; +static const u8 key28[] __initconst = { + 0xee, 0xa6, 0xa7, 0x25, 0x1c, 0x1e, 0x72, 0x91, + 0x6d, 0x11, 0xc2, 0xcb, 0x21, 0x4d, 0x3c, 0x25, + 0x25, 0x39, 0x12, 0x1d, 0x8e, 0x23, 0x4e, 0x65, + 0x2d, 0x65, 0x1f, 0xa4, 0xc8, 0xcf, 0xf8, 0x80 +}; + +/* wrap 2^130-5 */ +static const u8 input29[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const u8 output29[] __initconst = { + 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; +static const u8 key29[] __initconst = { + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* wrap 2^128 */ +static const u8 input30[] __initconst = { + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; +static const u8 output30[] __initconst = { + 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; +static const u8 key30[] __initconst = { + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; + +/* limb carry */ +static const u8 input31[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; +static const u8 output31[] __initconst = { + 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; +static const u8 key31[] __initconst = { + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* 2^130-5 */ +static const u8 input32[] __initconst = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xfb, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 +}; +static const u8 output32[] __initconst = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; +static const u8 key32[] __initconst = { + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* 2^130-6 */ +static const u8 input33[] __initconst = { + 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const u8 output33[] __initconst = { + 0xfa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; +static const u8 key33[] __initconst = { + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* 5*H+L reduction intermediate */ +static const u8 input34[] __initconst = { + 0xe3, 0x35, 0x94, 0xd7, 0x50, 0x5e, 0x43, 0xb9, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x33, 0x94, 0xd7, 0x50, 0x5e, 0x43, 0x79, 0xcd, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; +static const u8 output34[] __initconst = { + 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x55, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; +static const u8 key34[] __initconst = { + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* 5*H+L reduction final */ +static const u8 input35[] __initconst = { + 0xe3, 0x35, 0x94, 0xd7, 0x50, 0x5e, 0x43, 0xb9, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x33, 0x94, 0xd7, 0x50, 0x5e, 0x43, 0x79, 0xcd, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; +static const u8 output35[] __initconst = { + 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; +static const u8 key35[] __initconst = { + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +static const struct poly1305_testvec poly1305_testvecs[] __initconst = { + { input01, output01, key01, sizeof(input01) }, + { input02, output02, key02, sizeof(input02) }, + { input03, output03, key03, sizeof(input03) }, + { input04, output04, key04, sizeof(input04) }, + { input05, output05, key05, sizeof(input05) }, + { input06, output06, key06, sizeof(input06) }, + { input07, output07, key07, sizeof(input07) }, + { input08, output08, key08, sizeof(input08) }, + { input09, output09, key09, sizeof(input09) }, + { input10, output10, key10, sizeof(input10) }, + { input11, output11, key11, sizeof(input11) }, + { input12, output12, key12, sizeof(input12) }, + { input13, output13, key13, sizeof(input13) }, + { input14, output14, key14, sizeof(input14) }, + { input15, output15, key15, sizeof(input15) }, + { input16, output16, key16, sizeof(input16) }, + { input17, output17, key17, sizeof(input17) }, + { input18, output18, key18, sizeof(input18) }, + { input19, output19, key19, sizeof(input19) }, + { input20, output20, key20, sizeof(input20) }, + { input21, output21, key21, sizeof(input21) }, + { input22, output22, key22, sizeof(input22) }, + { input23, output23, key23, sizeof(input23) }, + { input24, output24, key24, sizeof(input24) }, + { input25, output25, key25, sizeof(input25) }, + { input26, output26, key26, sizeof(input26) }, + { input27, output27, key27, sizeof(input27) }, + { input28, output28, key28, sizeof(input28) }, + { input29, output29, key29, sizeof(input29) }, + { input30, output30, key30, sizeof(input30) }, + { input31, output31, key31, sizeof(input31) }, + { input32, output32, key32, sizeof(input32) }, + { input33, output33, key33, sizeof(input33) }, + { input34, output34, key34, sizeof(input34) }, + { input35, output35, key35, sizeof(input35) } +}; + +static bool __init poly1305_selftest(void) +{ + simd_context_t simd_context; + bool success = true; + size_t i, j; + + simd_get(&simd_context); + for (i = 0; i < ARRAY_SIZE(poly1305_testvecs); ++i) { + struct poly1305_ctx poly1305; + u8 out[POLY1305_MAC_SIZE]; + + memset(out, 0, sizeof(out)); + memset(&poly1305, 0, sizeof(poly1305)); + poly1305_init(&poly1305, poly1305_testvecs[i].key); + poly1305_update(&poly1305, poly1305_testvecs[i].input, + poly1305_testvecs[i].ilen, &simd_context); + poly1305_final(&poly1305, out, &simd_context); + if (memcmp(out, poly1305_testvecs[i].output, + POLY1305_MAC_SIZE)) { + pr_err("poly1305 self-test %zu: FAIL\n", i + 1); + success = false; + } + simd_relax(&simd_context); + + if (poly1305_testvecs[i].ilen <= 1) + continue; + + for (j = 1; j < poly1305_testvecs[i].ilen - 1; ++j) { + memset(out, 0, sizeof(out)); + memset(&poly1305, 0, sizeof(poly1305)); + poly1305_init(&poly1305, poly1305_testvecs[i].key); + poly1305_update(&poly1305, poly1305_testvecs[i].input, + j, &simd_context); + poly1305_update(&poly1305, + poly1305_testvecs[i].input + j, + poly1305_testvecs[i].ilen - j, + &simd_context); + poly1305_final(&poly1305, out, &simd_context); + if (memcmp(out, poly1305_testvecs[i].output, + POLY1305_MAC_SIZE)) { + pr_err("poly1305 self-test %zu (split %zu): FAIL\n", + i + 1, j); + success = false; + } + + memset(out, 0, sizeof(out)); + memset(&poly1305, 0, sizeof(poly1305)); + poly1305_init(&poly1305, poly1305_testvecs[i].key); + poly1305_update(&poly1305, poly1305_testvecs[i].input, + j, &simd_context); + poly1305_update(&poly1305, + poly1305_testvecs[i].input + j, + poly1305_testvecs[i].ilen - j, + DONT_USE_SIMD); + poly1305_final(&poly1305, out, &simd_context); + if (memcmp(out, poly1305_testvecs[i].output, + POLY1305_MAC_SIZE)) { + pr_err("poly1305 self-test %zu (split %zu, mixed simd): FAIL\n", + i + 1, j); + success = false; + } + simd_relax(&simd_context); + } + } + simd_put(&simd_context); + + return success; +} diff --git a/net/wireguard/crypto/zinc/selftest/run.h b/net/wireguard/crypto/zinc/selftest/run.h new file mode 100644 index 000000000000..4ffaf6089eea --- /dev/null +++ b/net/wireguard/crypto/zinc/selftest/run.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _ZINC_SELFTEST_RUN_H +#define _ZINC_SELFTEST_RUN_H + +#include +#include +#include + +static inline bool selftest_run(const char *name, bool (*selftest)(void), + bool *const nobs[], unsigned int nobs_len) +{ + unsigned long set = 0, subset = 0, largest_subset = 0; + unsigned int i; + + BUILD_BUG_ON(!__builtin_constant_p(nobs_len) || + nobs_len >= BITS_PER_LONG); + + if (!IS_ENABLED(CONFIG_ZINC_SELFTEST)) + return true; + + for (i = 0; i < nobs_len; ++i) + set |= ((unsigned long)*nobs[i]) << i; + + do { + for (i = 0; i < nobs_len; ++i) + *nobs[i] = BIT(i) & subset; + if (selftest()) + largest_subset = max(subset, largest_subset); + else + pr_err("%s self-test combination 0x%lx: FAIL\n", name, + subset); + subset = (subset - set) & set; + } while (subset); + + for (i = 0; i < nobs_len; ++i) + *nobs[i] = BIT(i) & largest_subset; + + if (largest_subset == set) + pr_info("%s self-tests: pass\n", name); + + return !WARN_ON(largest_subset != set); +} + +#endif diff --git a/net/wireguard/device.c b/net/wireguard/device.c new file mode 100644 index 000000000000..062490f1b8a7 --- /dev/null +++ b/net/wireguard/device.c @@ -0,0 +1,475 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "queueing.h" +#include "socket.h" +#include "timers.h" +#include "device.h" +#include "ratelimiter.h" +#include "peer.h" +#include "messages.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static LIST_HEAD(device_list); + +static int wg_open(struct net_device *dev) +{ + struct in_device *dev_v4 = __in_dev_get_rtnl(dev); +#ifndef COMPAT_CANNOT_USE_IN6_DEV_GET + struct inet6_dev *dev_v6 = __in6_dev_get(dev); +#endif + struct wg_device *wg = netdev_priv(dev); + struct wg_peer *peer; + int ret; + + if (dev_v4) { + /* At some point we might put this check near the ip_rt_send_ + * redirect call of ip_forward in net/ipv4/ip_forward.c, similar + * to the current secpath check. + */ + IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false); + IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false; + } +#ifndef COMPAT_CANNOT_USE_IN6_DEV_GET + if (dev_v6) +#ifndef COMPAT_CANNOT_USE_DEV_CNF + dev_v6->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_NONE; +#else + dev_v6->addr_gen_mode = IN6_ADDR_GEN_MODE_NONE; +#endif +#endif + + mutex_lock(&wg->device_update_lock); + ret = wg_socket_init(wg, wg->incoming_port); + if (ret < 0) + goto out; + list_for_each_entry(peer, &wg->peer_list, peer_list) { + wg_packet_send_staged_packets(peer); + if (peer->persistent_keepalive_interval) + wg_packet_send_keepalive(peer); + } +out: + mutex_unlock(&wg->device_update_lock); + return ret; +} + +#ifdef CONFIG_PM_SLEEP +static int wg_pm_notification(struct notifier_block *nb, unsigned long action, + void *data) +{ + struct wg_device *wg; + struct wg_peer *peer; + + /* If the machine is constantly suspending and resuming, as part of + * its normal operation rather than as a somewhat rare event, then we + * don't actually want to clear keys. + */ + if (IS_ENABLED(CONFIG_PM_AUTOSLEEP) || IS_ENABLED(CONFIG_ANDROID)) + return 0; + + if (action != PM_HIBERNATION_PREPARE && action != PM_SUSPEND_PREPARE) + return 0; + + rtnl_lock(); + list_for_each_entry(wg, &device_list, device_list) { + mutex_lock(&wg->device_update_lock); + list_for_each_entry(peer, &wg->peer_list, peer_list) { + del_timer(&peer->timer_zero_key_material); + wg_noise_handshake_clear(&peer->handshake); + wg_noise_keypairs_clear(&peer->keypairs); + } + mutex_unlock(&wg->device_update_lock); + } + rtnl_unlock(); + rcu_barrier(); + return 0; +} + +static struct notifier_block pm_notifier = { .notifier_call = wg_pm_notification }; +#endif + +static int wg_stop(struct net_device *dev) +{ + struct wg_device *wg = netdev_priv(dev); + struct wg_peer *peer; + struct sk_buff *skb; + + mutex_lock(&wg->device_update_lock); + list_for_each_entry(peer, &wg->peer_list, peer_list) { + wg_packet_purge_staged_packets(peer); + wg_timers_stop(peer); + wg_noise_handshake_clear(&peer->handshake); + wg_noise_keypairs_clear(&peer->keypairs); + wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); + } + mutex_unlock(&wg->device_update_lock); + while ((skb = ptr_ring_consume(&wg->handshake_queue.ring)) != NULL) + kfree_skb(skb); + atomic_set(&wg->handshake_queue_len, 0); + wg_socket_reinit(wg, NULL, NULL); + return 0; +} + +static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct wg_device *wg = netdev_priv(dev); + struct sk_buff_head packets; + struct wg_peer *peer; + struct sk_buff *next; + sa_family_t family; + u32 mtu; + int ret; + + if (unlikely(!wg_check_packet_protocol(skb))) { + ret = -EPROTONOSUPPORT; + net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name); + goto err; + } + + peer = wg_allowedips_lookup_dst(&wg->peer_allowedips, skb); + if (unlikely(!peer)) { + ret = -ENOKEY; + if (skb->protocol == htons(ETH_P_IP)) + net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI4\n", + dev->name, &ip_hdr(skb)->daddr); + else if (skb->protocol == htons(ETH_P_IPV6)) + net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI6\n", + dev->name, &ipv6_hdr(skb)->daddr); + goto err_icmp; + } + + family = READ_ONCE(peer->endpoint.addr.sa_family); + if (unlikely(family != AF_INET && family != AF_INET6)) { + ret = -EDESTADDRREQ; + net_dbg_ratelimited("%s: No valid endpoint has been configured or discovered for peer %llu\n", + dev->name, peer->internal_id); + goto err_peer; + } + + mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + + __skb_queue_head_init(&packets); + if (!skb_is_gso(skb)) { + skb_mark_not_on_list(skb); + } else { + struct sk_buff *segs = skb_gso_segment(skb, 0); + + if (IS_ERR(segs)) { + ret = PTR_ERR(segs); + goto err_peer; + } + dev_kfree_skb(skb); + skb = segs; + } + + skb_list_walk_safe(skb, skb, next) { + skb_mark_not_on_list(skb); + + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + continue; + + /* We only need to keep the original dst around for icmp, + * so at this point we're in a position to drop it. + */ + skb_dst_drop(skb); + + PACKET_CB(skb)->mtu = mtu; + + __skb_queue_tail(&packets, skb); + } + + spin_lock_bh(&peer->staged_packet_queue.lock); + /* If the queue is getting too big, we start removing the oldest packets + * until it's small again. We do this before adding the new packet, so + * we don't remove GSO segments that are in excess. + */ + while (skb_queue_len(&peer->staged_packet_queue) > MAX_STAGED_PACKETS) { + dev_kfree_skb(__skb_dequeue(&peer->staged_packet_queue)); + ++dev->stats.tx_dropped; + } + skb_queue_splice_tail(&packets, &peer->staged_packet_queue); + spin_unlock_bh(&peer->staged_packet_queue.lock); + + wg_packet_send_staged_packets(peer); + + wg_peer_put(peer); + return NETDEV_TX_OK; + +err_peer: + wg_peer_put(peer); +err_icmp: + if (skb->protocol == htons(ETH_P_IP)) + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); + else if (skb->protocol == htons(ETH_P_IPV6)) + icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); +err: + ++dev->stats.tx_errors; + kfree_skb(skb); + return ret; +} + +static const struct net_device_ops netdev_ops = { + .ndo_open = wg_open, + .ndo_stop = wg_stop, + .ndo_start_xmit = wg_xmit, + .ndo_get_stats64 = ip_tunnel_get_stats64 +}; + +static void wg_destruct(struct net_device *dev) +{ + struct wg_device *wg = netdev_priv(dev); + + rtnl_lock(); + list_del(&wg->device_list); + rtnl_unlock(); + mutex_lock(&wg->device_update_lock); + rcu_assign_pointer(wg->creating_net, NULL); + wg->incoming_port = 0; + wg_socket_reinit(wg, NULL, NULL); + /* The final references are cleared in the below calls to destroy_workqueue. */ + wg_peer_remove_all(wg); + destroy_workqueue(wg->handshake_receive_wq); + destroy_workqueue(wg->handshake_send_wq); + destroy_workqueue(wg->packet_crypt_wq); + wg_packet_queue_free(&wg->handshake_queue, true); + wg_packet_queue_free(&wg->decrypt_queue, false); + wg_packet_queue_free(&wg->encrypt_queue, false); + rcu_barrier(); /* Wait for all the peers to be actually freed. */ + wg_ratelimiter_uninit(); + memzero_explicit(&wg->static_identity, sizeof(wg->static_identity)); + free_percpu(dev->tstats); + kvfree(wg->index_hashtable); + kvfree(wg->peer_hashtable); + mutex_unlock(&wg->device_update_lock); + + pr_debug("%s: Interface destroyed\n", dev->name); + free_netdev(dev); +} + +static const struct device_type device_type = { .name = KBUILD_MODNAME }; + +static void wg_setup(struct net_device *dev) +{ + struct wg_device *wg = netdev_priv(dev); + enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM | + NETIF_F_SG | NETIF_F_GSO | + NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA }; + const int overhead = MESSAGE_MINIMUM_LENGTH + sizeof(struct udphdr) + + max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); + + dev->netdev_ops = &netdev_ops; + dev->header_ops = &ip_tunnel_header_ops; + dev->hard_header_len = 0; + dev->addr_len = 0; + dev->needed_headroom = DATA_PACKET_HEAD_ROOM; + dev->needed_tailroom = noise_encrypted_len(MESSAGE_PADDING_MULTIPLE); + dev->type = ARPHRD_NONE; + dev->flags = IFF_POINTOPOINT | IFF_NOARP; +#ifndef COMPAT_CANNOT_USE_IFF_NO_QUEUE + dev->priv_flags |= IFF_NO_QUEUE; +#else + dev->tx_queue_len = 0; +#endif + dev->features |= NETIF_F_LLTX; + dev->features |= WG_NETDEV_FEATURES; + dev->hw_features |= WG_NETDEV_FEATURES; + dev->hw_enc_features |= WG_NETDEV_FEATURES; + dev->mtu = ETH_DATA_LEN - overhead; +#ifndef COMPAT_CANNOT_USE_MAX_MTU + dev->max_mtu = round_down(INT_MAX, MESSAGE_PADDING_MULTIPLE) - overhead; +#endif + + SET_NETDEV_DEVTYPE(dev, &device_type); + + /* We need to keep the dst around in case of icmp replies. */ + netif_keep_dst(dev); + + memset(wg, 0, sizeof(*wg)); + wg->dev = dev; +} + +static int wg_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct wg_device *wg = netdev_priv(dev); + int ret = -ENOMEM; + + rcu_assign_pointer(wg->creating_net, src_net); + init_rwsem(&wg->static_identity.lock); + mutex_init(&wg->socket_update_lock); + mutex_init(&wg->device_update_lock); + wg_allowedips_init(&wg->peer_allowedips); + wg_cookie_checker_init(&wg->cookie_checker, wg); + INIT_LIST_HEAD(&wg->peer_list); + wg->device_update_gen = 1; + + wg->peer_hashtable = wg_pubkey_hashtable_alloc(); + if (!wg->peer_hashtable) + return ret; + + wg->index_hashtable = wg_index_hashtable_alloc(); + if (!wg->index_hashtable) + goto err_free_peer_hashtable; + + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + goto err_free_index_hashtable; + + wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s", + WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name); + if (!wg->handshake_receive_wq) + goto err_free_tstats; + + wg->handshake_send_wq = alloc_workqueue("wg-kex-%s", + WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name); + if (!wg->handshake_send_wq) + goto err_destroy_handshake_receive; + + wg->packet_crypt_wq = alloc_workqueue("wg-crypt-%s", + WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0, dev->name); + if (!wg->packet_crypt_wq) + goto err_destroy_handshake_send; + + ret = wg_packet_queue_init(&wg->encrypt_queue, wg_packet_encrypt_worker, + MAX_QUEUED_PACKETS); + if (ret < 0) + goto err_destroy_packet_crypt; + + ret = wg_packet_queue_init(&wg->decrypt_queue, wg_packet_decrypt_worker, + MAX_QUEUED_PACKETS); + if (ret < 0) + goto err_free_encrypt_queue; + + ret = wg_packet_queue_init(&wg->handshake_queue, wg_packet_handshake_receive_worker, + MAX_QUEUED_INCOMING_HANDSHAKES); + if (ret < 0) + goto err_free_decrypt_queue; + + ret = wg_ratelimiter_init(); + if (ret < 0) + goto err_free_handshake_queue; + + ret = register_netdevice(dev); + if (ret < 0) + goto err_uninit_ratelimiter; + + list_add(&wg->device_list, &device_list); + + /* We wait until the end to assign priv_destructor, so that + * register_netdevice doesn't call it for us if it fails. + */ + dev->priv_destructor = wg_destruct; + + pr_debug("%s: Interface created\n", dev->name); + return ret; + +err_uninit_ratelimiter: + wg_ratelimiter_uninit(); +err_free_handshake_queue: + wg_packet_queue_free(&wg->handshake_queue, false); +err_free_decrypt_queue: + wg_packet_queue_free(&wg->decrypt_queue, false); +err_free_encrypt_queue: + wg_packet_queue_free(&wg->encrypt_queue, false); +err_destroy_packet_crypt: + destroy_workqueue(wg->packet_crypt_wq); +err_destroy_handshake_send: + destroy_workqueue(wg->handshake_send_wq); +err_destroy_handshake_receive: + destroy_workqueue(wg->handshake_receive_wq); +err_free_tstats: + free_percpu(dev->tstats); +err_free_index_hashtable: + kvfree(wg->index_hashtable); +err_free_peer_hashtable: + kvfree(wg->peer_hashtable); + return ret; +} + +static struct rtnl_link_ops link_ops __read_mostly = { + .kind = KBUILD_MODNAME, + .priv_size = sizeof(struct wg_device), + .setup = wg_setup, + .newlink = wg_newlink, +}; + +static void wg_netns_pre_exit(struct net *net) +{ + struct wg_device *wg; + struct wg_peer *peer; + + rtnl_lock(); + list_for_each_entry(wg, &device_list, device_list) { + if (rcu_access_pointer(wg->creating_net) == net) { + pr_debug("%s: Creating namespace exiting\n", wg->dev->name); + netif_carrier_off(wg->dev); + mutex_lock(&wg->device_update_lock); + rcu_assign_pointer(wg->creating_net, NULL); + wg_socket_reinit(wg, NULL, NULL); + list_for_each_entry(peer, &wg->peer_list, peer_list) + wg_socket_clear_peer_endpoint_src(peer); + mutex_unlock(&wg->device_update_lock); + } + } + rtnl_unlock(); +} + +static struct pernet_operations pernet_ops = { + .pre_exit = wg_netns_pre_exit +}; + +int __init wg_device_init(void) +{ + int ret; + +#ifdef CONFIG_PM_SLEEP + ret = register_pm_notifier(&pm_notifier); + if (ret) + return ret; +#endif + + ret = register_pernet_device(&pernet_ops); + if (ret) + goto error_pm; + + ret = rtnl_link_register(&link_ops); + if (ret) + goto error_pernet; + + return 0; + +error_pernet: + unregister_pernet_device(&pernet_ops); +error_pm: +#ifdef CONFIG_PM_SLEEP + unregister_pm_notifier(&pm_notifier); +#endif + return ret; +} + +void wg_device_uninit(void) +{ + rtnl_link_unregister(&link_ops); + unregister_pernet_device(&pernet_ops); +#ifdef CONFIG_PM_SLEEP + unregister_pm_notifier(&pm_notifier); +#endif + rcu_barrier(); +} diff --git a/net/wireguard/device.h b/net/wireguard/device.h new file mode 100644 index 000000000000..43c7cebbf50b --- /dev/null +++ b/net/wireguard/device.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_DEVICE_H +#define _WG_DEVICE_H + +#include "noise.h" +#include "allowedips.h" +#include "peerlookup.h" +#include "cookie.h" + +#include +#include +#include +#include +#include +#include + +struct wg_device; + +struct multicore_worker { + void *ptr; + struct work_struct work; +}; + +struct crypt_queue { + struct ptr_ring ring; + struct multicore_worker __percpu *worker; + int last_cpu; +}; + +struct prev_queue { + struct sk_buff *head, *tail, *peeked; + struct { struct sk_buff *next, *prev; } empty; // Match first 2 members of struct sk_buff. + atomic_t count; +}; + +struct wg_device { + struct net_device *dev; + struct crypt_queue encrypt_queue, decrypt_queue, handshake_queue; + struct sock __rcu *sock4, *sock6; + struct net __rcu *creating_net; + struct noise_static_identity static_identity; + struct workqueue_struct *packet_crypt_wq,*handshake_receive_wq, *handshake_send_wq; + struct cookie_checker cookie_checker; + struct pubkey_hashtable *peer_hashtable; + struct index_hashtable *index_hashtable; + struct allowedips peer_allowedips; + struct mutex device_update_lock, socket_update_lock; + struct list_head device_list, peer_list; + atomic_t handshake_queue_len; + unsigned int num_peers, device_update_gen; + u32 fwmark; + u16 incoming_port; +}; + +int wg_device_init(void); +void wg_device_uninit(void); + +#endif /* _WG_DEVICE_H */ diff --git a/net/wireguard/dkms.conf b/net/wireguard/dkms.conf new file mode 100644 index 000000000000..947bc0ce0c9b --- /dev/null +++ b/net/wireguard/dkms.conf @@ -0,0 +1,9 @@ +PACKAGE_NAME="wireguard" +PACKAGE_VERSION="1.0.20220627" +AUTOINSTALL=yes + +BUILT_MODULE_NAME="wireguard" +DEST_MODULE_LOCATION="/kernel/net" + +# requires kernel 3.10 - 5.5, inclusive: +BUILD_EXCLUSIVE_KERNEL="^((5\.[0-5]($|[.-]))|(4\.)|(3\.1[0-9]))" diff --git a/net/wireguard/main.c b/net/wireguard/main.c new file mode 100644 index 000000000000..d5ce491e822e --- /dev/null +++ b/net/wireguard/main.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "version.h" +#include "device.h" +#include "noise.h" +#include "queueing.h" +#include "ratelimiter.h" +#include "netlink.h" +#include "uapi/wireguard.h" +#include "crypto/zinc.h" + +#include +#include +#include +#include + +static int __init wg_mod_init(void) +{ + int ret; + + if ((ret = chacha20_mod_init()) || (ret = poly1305_mod_init()) || + (ret = chacha20poly1305_mod_init()) || (ret = blake2s_mod_init()) || + (ret = curve25519_mod_init())) + return ret; + + ret = wg_allowedips_slab_init(); + if (ret < 0) + goto err_allowedips; + +#ifdef DEBUG + ret = -ENOTRECOVERABLE; + if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() || + !wg_ratelimiter_selftest()) + goto err_peer; +#endif + wg_noise_init(); + + ret = wg_peer_init(); + if (ret < 0) + goto err_peer; + + ret = wg_device_init(); + if (ret < 0) + goto err_device; + + ret = wg_genetlink_init(); + if (ret < 0) + goto err_netlink; + + pr_info("WireGuard " WIREGUARD_VERSION " loaded. See www.wireguard.com for information.\n"); + pr_info("Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved.\n"); + + return 0; + +err_netlink: + wg_device_uninit(); +err_device: + wg_peer_uninit(); +err_peer: + wg_allowedips_slab_uninit(); +err_allowedips: + return ret; +} + +static void __exit wg_mod_exit(void) +{ + wg_genetlink_uninit(); + wg_device_uninit(); + wg_peer_uninit(); + wg_allowedips_slab_uninit(); +} + +module_init(wg_mod_init); +module_exit(wg_mod_exit); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("WireGuard secure network tunnel"); +MODULE_AUTHOR("Jason A. Donenfeld "); +MODULE_VERSION(WIREGUARD_VERSION); +MODULE_ALIAS_RTNL_LINK(KBUILD_MODNAME); +MODULE_ALIAS_GENL_FAMILY(WG_GENL_NAME); +MODULE_INFO(intree, "Y"); diff --git a/net/wireguard/messages.h b/net/wireguard/messages.h new file mode 100644 index 000000000000..1d1ed18f11f8 --- /dev/null +++ b/net/wireguard/messages.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_MESSAGES_H +#define _WG_MESSAGES_H + +#include +#include +#include + +#include +#include +#include + +enum noise_lengths { + NOISE_PUBLIC_KEY_LEN = CURVE25519_KEY_SIZE, + NOISE_SYMMETRIC_KEY_LEN = CHACHA20POLY1305_KEY_SIZE, + NOISE_TIMESTAMP_LEN = sizeof(u64) + sizeof(u32), + NOISE_AUTHTAG_LEN = CHACHA20POLY1305_AUTHTAG_SIZE, + NOISE_HASH_LEN = BLAKE2S_HASH_SIZE +}; + +#define noise_encrypted_len(plain_len) ((plain_len) + NOISE_AUTHTAG_LEN) + +enum cookie_values { + COOKIE_SECRET_MAX_AGE = 2 * 60, + COOKIE_SECRET_LATENCY = 5, + COOKIE_NONCE_LEN = XCHACHA20POLY1305_NONCE_SIZE, + COOKIE_LEN = 16 +}; + +enum counter_values { + COUNTER_BITS_TOTAL = 8192, + COUNTER_REDUNDANT_BITS = BITS_PER_LONG, + COUNTER_WINDOW_SIZE = COUNTER_BITS_TOTAL - COUNTER_REDUNDANT_BITS +}; + +enum limits { + REKEY_AFTER_MESSAGES = 1ULL << 60, + REJECT_AFTER_MESSAGES = U64_MAX - COUNTER_WINDOW_SIZE - 1, + REKEY_TIMEOUT = 5, + REKEY_TIMEOUT_JITTER_MAX_JIFFIES = HZ / 3, + REKEY_AFTER_TIME = 120, + REJECT_AFTER_TIME = 180, + INITIATIONS_PER_SECOND = 50, + MAX_PEERS_PER_DEVICE = 1U << 20, + KEEPALIVE_TIMEOUT = 10, + MAX_TIMER_HANDSHAKES = 90 / REKEY_TIMEOUT, + MAX_QUEUED_INCOMING_HANDSHAKES = 4096, /* TODO: replace this with DQL */ + MAX_STAGED_PACKETS = 128, + MAX_QUEUED_PACKETS = 1024 /* TODO: replace this with DQL */ +}; + +enum message_type { + MESSAGE_INVALID = 0, + MESSAGE_HANDSHAKE_INITIATION = 1, + MESSAGE_HANDSHAKE_RESPONSE = 2, + MESSAGE_HANDSHAKE_COOKIE = 3, + MESSAGE_DATA = 4 +}; + +struct message_header { + /* The actual layout of this that we want is: + * u8 type + * u8 reserved_zero[3] + * + * But it turns out that by encoding this as little endian, + * we achieve the same thing, and it makes checking faster. + */ + __le32 type; +}; + +struct message_macs { + u8 mac1[COOKIE_LEN]; + u8 mac2[COOKIE_LEN]; +}; + +struct message_handshake_initiation { + struct message_header header; + __le32 sender_index; + u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN]; + u8 encrypted_static[noise_encrypted_len(NOISE_PUBLIC_KEY_LEN)]; + u8 encrypted_timestamp[noise_encrypted_len(NOISE_TIMESTAMP_LEN)]; + struct message_macs macs; +}; + +struct message_handshake_response { + struct message_header header; + __le32 sender_index; + __le32 receiver_index; + u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN]; + u8 encrypted_nothing[noise_encrypted_len(0)]; + struct message_macs macs; +}; + +struct message_handshake_cookie { + struct message_header header; + __le32 receiver_index; + u8 nonce[COOKIE_NONCE_LEN]; + u8 encrypted_cookie[noise_encrypted_len(COOKIE_LEN)]; +}; + +struct message_data { + struct message_header header; + __le32 key_idx; + __le64 counter; + u8 encrypted_data[]; +}; + +#define message_data_len(plain_len) \ + (noise_encrypted_len(plain_len) + sizeof(struct message_data)) + +enum message_alignments { + MESSAGE_PADDING_MULTIPLE = 16, + MESSAGE_MINIMUM_LENGTH = message_data_len(0) +}; + +#define SKB_HEADER_LEN \ + (max(sizeof(struct iphdr), sizeof(struct ipv6hdr)) + \ + sizeof(struct udphdr) + NET_SKB_PAD) +#define DATA_PACKET_HEAD_ROOM \ + ALIGN(sizeof(struct message_data) + SKB_HEADER_LEN, 4) + +enum { HANDSHAKE_DSCP = 0x88 /* AF41, plus 00 ECN */ }; + +#endif /* _WG_MESSAGES_H */ diff --git a/net/wireguard/netlink.c b/net/wireguard/netlink.c new file mode 100644 index 000000000000..ef239ab1e2d6 --- /dev/null +++ b/net/wireguard/netlink.c @@ -0,0 +1,658 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "netlink.h" +#include "device.h" +#include "peer.h" +#include "socket.h" +#include "queueing.h" +#include "messages.h" +#include "uapi/wireguard.h" +#include +#include +#include +#include + +static struct genl_family genl_family; + +static const struct nla_policy device_policy[WGDEVICE_A_MAX + 1] = { + [WGDEVICE_A_IFINDEX] = { .type = NLA_U32 }, + [WGDEVICE_A_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, + [WGDEVICE_A_PRIVATE_KEY] = NLA_POLICY_EXACT_LEN(NOISE_PUBLIC_KEY_LEN), + [WGDEVICE_A_PUBLIC_KEY] = NLA_POLICY_EXACT_LEN(NOISE_PUBLIC_KEY_LEN), + [WGDEVICE_A_FLAGS] = { .type = NLA_U32 }, + [WGDEVICE_A_LISTEN_PORT] = { .type = NLA_U16 }, + [WGDEVICE_A_FWMARK] = { .type = NLA_U32 }, + [WGDEVICE_A_PEERS] = { .type = NLA_NESTED } +}; + +static const struct nla_policy peer_policy[WGPEER_A_MAX + 1] = { + [WGPEER_A_PUBLIC_KEY] = NLA_POLICY_EXACT_LEN(NOISE_PUBLIC_KEY_LEN), + [WGPEER_A_PRESHARED_KEY] = NLA_POLICY_EXACT_LEN(NOISE_SYMMETRIC_KEY_LEN), + [WGPEER_A_FLAGS] = { .type = NLA_U32 }, + [WGPEER_A_ENDPOINT] = NLA_POLICY_MIN_LEN(sizeof(struct sockaddr)), + [WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL] = { .type = NLA_U16 }, + [WGPEER_A_LAST_HANDSHAKE_TIME] = NLA_POLICY_EXACT_LEN(sizeof(struct __kernel_timespec)), + [WGPEER_A_RX_BYTES] = { .type = NLA_U64 }, + [WGPEER_A_TX_BYTES] = { .type = NLA_U64 }, + [WGPEER_A_ALLOWEDIPS] = { .type = NLA_NESTED }, + [WGPEER_A_PROTOCOL_VERSION] = { .type = NLA_U32 } +}; + +static const struct nla_policy allowedip_policy[WGALLOWEDIP_A_MAX + 1] = { + [WGALLOWEDIP_A_FAMILY] = { .type = NLA_U16 }, + [WGALLOWEDIP_A_IPADDR] = NLA_POLICY_MIN_LEN(sizeof(struct in_addr)), + [WGALLOWEDIP_A_CIDR_MASK] = { .type = NLA_U8 } +}; + +static struct wg_device *lookup_interface(struct nlattr **attrs, + struct sk_buff *skb) +{ + struct net_device *dev = NULL; + + if (!attrs[WGDEVICE_A_IFINDEX] == !attrs[WGDEVICE_A_IFNAME]) + return ERR_PTR(-EBADR); + if (attrs[WGDEVICE_A_IFINDEX]) + dev = dev_get_by_index(sock_net(skb->sk), + nla_get_u32(attrs[WGDEVICE_A_IFINDEX])); + else if (attrs[WGDEVICE_A_IFNAME]) + dev = dev_get_by_name(sock_net(skb->sk), + nla_data(attrs[WGDEVICE_A_IFNAME])); + if (!dev) + return ERR_PTR(-ENODEV); + if (!dev->rtnl_link_ops || !dev->rtnl_link_ops->kind || + strcmp(dev->rtnl_link_ops->kind, KBUILD_MODNAME)) { + dev_put(dev); + return ERR_PTR(-EOPNOTSUPP); + } + return netdev_priv(dev); +} + +static int get_allowedips(struct sk_buff *skb, const u8 *ip, u8 cidr, + int family) +{ + struct nlattr *allowedip_nest; + + allowedip_nest = nla_nest_start(skb, 0); + if (!allowedip_nest) + return -EMSGSIZE; + + if (nla_put_u8(skb, WGALLOWEDIP_A_CIDR_MASK, cidr) || + nla_put_u16(skb, WGALLOWEDIP_A_FAMILY, family) || + nla_put(skb, WGALLOWEDIP_A_IPADDR, family == AF_INET6 ? + sizeof(struct in6_addr) : sizeof(struct in_addr), ip)) { + nla_nest_cancel(skb, allowedip_nest); + return -EMSGSIZE; + } + + nla_nest_end(skb, allowedip_nest); + return 0; +} + +struct dump_ctx { + struct wg_device *wg; + struct wg_peer *next_peer; + u64 allowedips_seq; + struct allowedips_node *next_allowedip; +}; + +#define DUMP_CTX(cb) ((struct dump_ctx *)(cb)->args) + +static int +get_peer(struct wg_peer *peer, struct sk_buff *skb, struct dump_ctx *ctx) +{ + + struct nlattr *allowedips_nest, *peer_nest = nla_nest_start(skb, 0); + struct allowedips_node *allowedips_node = ctx->next_allowedip; + bool fail; + + if (!peer_nest) + return -EMSGSIZE; + + down_read(&peer->handshake.lock); + fail = nla_put(skb, WGPEER_A_PUBLIC_KEY, NOISE_PUBLIC_KEY_LEN, + peer->handshake.remote_static); + up_read(&peer->handshake.lock); + if (fail) + goto err; + + if (!allowedips_node) { + const struct __kernel_timespec last_handshake = { + .tv_sec = peer->walltime_last_handshake.tv_sec, + .tv_nsec = peer->walltime_last_handshake.tv_nsec + }; + + down_read(&peer->handshake.lock); + fail = nla_put(skb, WGPEER_A_PRESHARED_KEY, + NOISE_SYMMETRIC_KEY_LEN, + peer->handshake.preshared_key); + up_read(&peer->handshake.lock); + if (fail) + goto err; + + if (nla_put(skb, WGPEER_A_LAST_HANDSHAKE_TIME, + sizeof(last_handshake), &last_handshake) || + nla_put_u16(skb, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, + peer->persistent_keepalive_interval) || + nla_put_u64_64bit(skb, WGPEER_A_TX_BYTES, peer->tx_bytes, + WGPEER_A_UNSPEC) || + nla_put_u64_64bit(skb, WGPEER_A_RX_BYTES, peer->rx_bytes, + WGPEER_A_UNSPEC) || + nla_put_u32(skb, WGPEER_A_PROTOCOL_VERSION, 1)) + goto err; + + read_lock_bh(&peer->endpoint_lock); + if (peer->endpoint.addr.sa_family == AF_INET) + fail = nla_put(skb, WGPEER_A_ENDPOINT, + sizeof(peer->endpoint.addr4), + &peer->endpoint.addr4); + else if (peer->endpoint.addr.sa_family == AF_INET6) + fail = nla_put(skb, WGPEER_A_ENDPOINT, + sizeof(peer->endpoint.addr6), + &peer->endpoint.addr6); + read_unlock_bh(&peer->endpoint_lock); + if (fail) + goto err; + allowedips_node = + list_first_entry_or_null(&peer->allowedips_list, + struct allowedips_node, peer_list); + } + if (!allowedips_node) + goto no_allowedips; + if (!ctx->allowedips_seq) + ctx->allowedips_seq = peer->device->peer_allowedips.seq; + else if (ctx->allowedips_seq != peer->device->peer_allowedips.seq) + goto no_allowedips; + + allowedips_nest = nla_nest_start(skb, WGPEER_A_ALLOWEDIPS); + if (!allowedips_nest) + goto err; + + list_for_each_entry_from(allowedips_node, &peer->allowedips_list, + peer_list) { + u8 cidr, ip[16] __aligned(__alignof(u64)); + int family; + + family = wg_allowedips_read_node(allowedips_node, ip, &cidr); + if (get_allowedips(skb, ip, cidr, family)) { + nla_nest_end(skb, allowedips_nest); + nla_nest_end(skb, peer_nest); + ctx->next_allowedip = allowedips_node; + return -EMSGSIZE; + } + } + nla_nest_end(skb, allowedips_nest); +no_allowedips: + nla_nest_end(skb, peer_nest); + ctx->next_allowedip = NULL; + ctx->allowedips_seq = 0; + return 0; +err: + nla_nest_cancel(skb, peer_nest); + return -EMSGSIZE; +} + +static int wg_get_device_start(struct netlink_callback *cb) +{ + struct wg_device *wg; + + wg = lookup_interface(genl_dumpit_info(cb)->attrs, cb->skb); + if (IS_ERR(wg)) + return PTR_ERR(wg); + DUMP_CTX(cb)->wg = wg; + return 0; +} + +static int wg_get_device_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct wg_peer *peer, *next_peer_cursor; + struct dump_ctx *ctx = DUMP_CTX(cb); + struct wg_device *wg = ctx->wg; + struct nlattr *peers_nest; + int ret = -EMSGSIZE; + bool done = true; + void *hdr; + + rtnl_lock(); + mutex_lock(&wg->device_update_lock); + cb->seq = wg->device_update_gen; + next_peer_cursor = ctx->next_peer; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &genl_family, NLM_F_MULTI, WG_CMD_GET_DEVICE); + if (!hdr) + goto out; + genl_dump_check_consistent(cb, hdr); + + if (!ctx->next_peer) { + if (nla_put_u16(skb, WGDEVICE_A_LISTEN_PORT, + wg->incoming_port) || + nla_put_u32(skb, WGDEVICE_A_FWMARK, wg->fwmark) || + nla_put_u32(skb, WGDEVICE_A_IFINDEX, wg->dev->ifindex) || + nla_put_string(skb, WGDEVICE_A_IFNAME, wg->dev->name)) + goto out; + + down_read(&wg->static_identity.lock); + if (wg->static_identity.has_identity) { + if (nla_put(skb, WGDEVICE_A_PRIVATE_KEY, + NOISE_PUBLIC_KEY_LEN, + wg->static_identity.static_private) || + nla_put(skb, WGDEVICE_A_PUBLIC_KEY, + NOISE_PUBLIC_KEY_LEN, + wg->static_identity.static_public)) { + up_read(&wg->static_identity.lock); + goto out; + } + } + up_read(&wg->static_identity.lock); + } + + peers_nest = nla_nest_start(skb, WGDEVICE_A_PEERS); + if (!peers_nest) + goto out; + ret = 0; + /* If the last cursor was removed via list_del_init in peer_remove, then + * we just treat this the same as there being no more peers left. The + * reason is that seq_nr should indicate to userspace that this isn't a + * coherent dump anyway, so they'll try again. + */ + if (list_empty(&wg->peer_list) || + (ctx->next_peer && list_empty(&ctx->next_peer->peer_list))) { + nla_nest_cancel(skb, peers_nest); + goto out; + } + lockdep_assert_held(&wg->device_update_lock); + peer = list_prepare_entry(ctx->next_peer, &wg->peer_list, peer_list); + list_for_each_entry_continue(peer, &wg->peer_list, peer_list) { + if (get_peer(peer, skb, ctx)) { + done = false; + break; + } + next_peer_cursor = peer; + } + nla_nest_end(skb, peers_nest); + +out: + if (!ret && !done && next_peer_cursor) + wg_peer_get(next_peer_cursor); + wg_peer_put(ctx->next_peer); + mutex_unlock(&wg->device_update_lock); + rtnl_unlock(); + + if (ret) { + genlmsg_cancel(skb, hdr); + return ret; + } + genlmsg_end(skb, hdr); + if (done) { + ctx->next_peer = NULL; + return 0; + } + ctx->next_peer = next_peer_cursor; + return skb->len; + + /* At this point, we can't really deal ourselves with safely zeroing out + * the private key material after usage. This will need an additional API + * in the kernel for marking skbs as zero_on_free. + */ +} + +static int wg_get_device_done(struct netlink_callback *cb) +{ + struct dump_ctx *ctx = DUMP_CTX(cb); + + if (ctx->wg) + dev_put(ctx->wg->dev); + wg_peer_put(ctx->next_peer); + return 0; +} + +static int set_port(struct wg_device *wg, u16 port) +{ + struct wg_peer *peer; + + if (wg->incoming_port == port) + return 0; + list_for_each_entry(peer, &wg->peer_list, peer_list) + wg_socket_clear_peer_endpoint_src(peer); + if (!netif_running(wg->dev)) { + wg->incoming_port = port; + return 0; + } + return wg_socket_init(wg, port); +} + +static int set_allowedip(struct wg_peer *peer, struct nlattr **attrs) +{ + int ret = -EINVAL; + u16 family; + u8 cidr; + + if (!attrs[WGALLOWEDIP_A_FAMILY] || !attrs[WGALLOWEDIP_A_IPADDR] || + !attrs[WGALLOWEDIP_A_CIDR_MASK]) + return ret; + family = nla_get_u16(attrs[WGALLOWEDIP_A_FAMILY]); + cidr = nla_get_u8(attrs[WGALLOWEDIP_A_CIDR_MASK]); + + if (family == AF_INET && cidr <= 32 && + nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in_addr)) + ret = wg_allowedips_insert_v4( + &peer->device->peer_allowedips, + nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer, + &peer->device->device_update_lock); + else if (family == AF_INET6 && cidr <= 128 && + nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in6_addr)) + ret = wg_allowedips_insert_v6( + &peer->device->peer_allowedips, + nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer, + &peer->device->device_update_lock); + + return ret; +} + +static int set_peer(struct wg_device *wg, struct nlattr **attrs) +{ + u8 *public_key = NULL, *preshared_key = NULL; + struct wg_peer *peer = NULL; + u32 flags = 0; + int ret; + + ret = -EINVAL; + if (attrs[WGPEER_A_PUBLIC_KEY] && + nla_len(attrs[WGPEER_A_PUBLIC_KEY]) == NOISE_PUBLIC_KEY_LEN) + public_key = nla_data(attrs[WGPEER_A_PUBLIC_KEY]); + else + goto out; + if (attrs[WGPEER_A_PRESHARED_KEY] && + nla_len(attrs[WGPEER_A_PRESHARED_KEY]) == NOISE_SYMMETRIC_KEY_LEN) + preshared_key = nla_data(attrs[WGPEER_A_PRESHARED_KEY]); + + if (attrs[WGPEER_A_FLAGS]) + flags = nla_get_u32(attrs[WGPEER_A_FLAGS]); + ret = -EOPNOTSUPP; + if (flags & ~__WGPEER_F_ALL) + goto out; + + ret = -EPFNOSUPPORT; + if (attrs[WGPEER_A_PROTOCOL_VERSION]) { + if (nla_get_u32(attrs[WGPEER_A_PROTOCOL_VERSION]) != 1) + goto out; + } + + peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, + nla_data(attrs[WGPEER_A_PUBLIC_KEY])); + ret = 0; + if (!peer) { /* Peer doesn't exist yet. Add a new one. */ + if (flags & (WGPEER_F_REMOVE_ME | WGPEER_F_UPDATE_ONLY)) + goto out; + + /* The peer is new, so there aren't allowed IPs to remove. */ + flags &= ~WGPEER_F_REPLACE_ALLOWEDIPS; + + down_read(&wg->static_identity.lock); + if (wg->static_identity.has_identity && + !memcmp(nla_data(attrs[WGPEER_A_PUBLIC_KEY]), + wg->static_identity.static_public, + NOISE_PUBLIC_KEY_LEN)) { + /* We silently ignore peers that have the same public + * key as the device. The reason we do it silently is + * that we'd like for people to be able to reuse the + * same set of API calls across peers. + */ + up_read(&wg->static_identity.lock); + ret = 0; + goto out; + } + up_read(&wg->static_identity.lock); + + peer = wg_peer_create(wg, public_key, preshared_key); + if (IS_ERR(peer)) { + ret = PTR_ERR(peer); + peer = NULL; + goto out; + } + /* Take additional reference, as though we've just been + * looked up. + */ + wg_peer_get(peer); + } + + if (flags & WGPEER_F_REMOVE_ME) { + wg_peer_remove(peer); + goto out; + } + + if (preshared_key) { + down_write(&peer->handshake.lock); + memcpy(&peer->handshake.preshared_key, preshared_key, + NOISE_SYMMETRIC_KEY_LEN); + up_write(&peer->handshake.lock); + } + + if (attrs[WGPEER_A_ENDPOINT]) { + struct sockaddr *addr = nla_data(attrs[WGPEER_A_ENDPOINT]); + size_t len = nla_len(attrs[WGPEER_A_ENDPOINT]); + + if ((len == sizeof(struct sockaddr_in) && + addr->sa_family == AF_INET) || + (len == sizeof(struct sockaddr_in6) && + addr->sa_family == AF_INET6)) { + struct endpoint endpoint = { { { 0 } } }; + + memcpy(&endpoint.addr, addr, len); + wg_socket_set_peer_endpoint(peer, &endpoint); + } + } + + if (flags & WGPEER_F_REPLACE_ALLOWEDIPS) + wg_allowedips_remove_by_peer(&wg->peer_allowedips, peer, + &wg->device_update_lock); + + if (attrs[WGPEER_A_ALLOWEDIPS]) { + struct nlattr *attr, *allowedip[WGALLOWEDIP_A_MAX + 1]; + int rem; + + nla_for_each_nested(attr, attrs[WGPEER_A_ALLOWEDIPS], rem) { + ret = nla_parse_nested(allowedip, WGALLOWEDIP_A_MAX, + attr, allowedip_policy, NULL); + if (ret < 0) + goto out; + ret = set_allowedip(peer, allowedip); + if (ret < 0) + goto out; + } + } + + if (attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]) { + const u16 persistent_keepalive_interval = nla_get_u16( + attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]); + const bool send_keepalive = + !peer->persistent_keepalive_interval && + persistent_keepalive_interval && + netif_running(wg->dev); + + peer->persistent_keepalive_interval = persistent_keepalive_interval; + if (send_keepalive) + wg_packet_send_keepalive(peer); + } + + if (netif_running(wg->dev)) + wg_packet_send_staged_packets(peer); + +out: + wg_peer_put(peer); + if (attrs[WGPEER_A_PRESHARED_KEY]) + memzero_explicit(nla_data(attrs[WGPEER_A_PRESHARED_KEY]), + nla_len(attrs[WGPEER_A_PRESHARED_KEY])); + return ret; +} + +static int wg_set_device(struct sk_buff *skb, struct genl_info *info) +{ + struct wg_device *wg = lookup_interface(info->attrs, skb); + u32 flags = 0; + int ret; + + if (IS_ERR(wg)) { + ret = PTR_ERR(wg); + goto out_nodev; + } + + rtnl_lock(); + mutex_lock(&wg->device_update_lock); + + if (info->attrs[WGDEVICE_A_FLAGS]) + flags = nla_get_u32(info->attrs[WGDEVICE_A_FLAGS]); + ret = -EOPNOTSUPP; + if (flags & ~__WGDEVICE_F_ALL) + goto out; + + if (info->attrs[WGDEVICE_A_LISTEN_PORT] || info->attrs[WGDEVICE_A_FWMARK]) { + struct net *net; + rcu_read_lock(); + net = rcu_dereference(wg->creating_net); + ret = !net || !ns_capable(net->user_ns, CAP_NET_ADMIN) ? -EPERM : 0; + rcu_read_unlock(); + if (ret) + goto out; + } + + ++wg->device_update_gen; + + if (info->attrs[WGDEVICE_A_FWMARK]) { + struct wg_peer *peer; + + wg->fwmark = nla_get_u32(info->attrs[WGDEVICE_A_FWMARK]); + list_for_each_entry(peer, &wg->peer_list, peer_list) + wg_socket_clear_peer_endpoint_src(peer); + } + + if (info->attrs[WGDEVICE_A_LISTEN_PORT]) { + ret = set_port(wg, + nla_get_u16(info->attrs[WGDEVICE_A_LISTEN_PORT])); + if (ret) + goto out; + } + + if (flags & WGDEVICE_F_REPLACE_PEERS) + wg_peer_remove_all(wg); + + if (info->attrs[WGDEVICE_A_PRIVATE_KEY] && + nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY]) == + NOISE_PUBLIC_KEY_LEN) { + u8 *private_key = nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]); + u8 public_key[NOISE_PUBLIC_KEY_LEN]; + struct wg_peer *peer, *temp; + + if (!crypto_memneq(wg->static_identity.static_private, + private_key, NOISE_PUBLIC_KEY_LEN)) + goto skip_set_private_key; + + /* We remove before setting, to prevent race, which means doing + * two 25519-genpub ops. + */ + if (curve25519_generate_public(public_key, private_key)) { + peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, + public_key); + if (peer) { + wg_peer_put(peer); + wg_peer_remove(peer); + } + } + + down_write(&wg->static_identity.lock); + wg_noise_set_static_identity_private_key(&wg->static_identity, + private_key); + list_for_each_entry_safe(peer, temp, &wg->peer_list, + peer_list) { + wg_noise_precompute_static_static(peer); + wg_noise_expire_current_peer_keypairs(peer); + } + wg_cookie_checker_precompute_device_keys(&wg->cookie_checker); + up_write(&wg->static_identity.lock); + } +skip_set_private_key: + + if (info->attrs[WGDEVICE_A_PEERS]) { + struct nlattr *attr, *peer[WGPEER_A_MAX + 1]; + int rem; + + nla_for_each_nested(attr, info->attrs[WGDEVICE_A_PEERS], rem) { + ret = nla_parse_nested(peer, WGPEER_A_MAX, attr, + peer_policy, NULL); + if (ret < 0) + goto out; + ret = set_peer(wg, peer); + if (ret < 0) + goto out; + } + } + ret = 0; + +out: + mutex_unlock(&wg->device_update_lock); + rtnl_unlock(); + dev_put(wg->dev); +out_nodev: + if (info->attrs[WGDEVICE_A_PRIVATE_KEY]) + memzero_explicit(nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]), + nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY])); + return ret; +} + +#ifndef COMPAT_CANNOT_USE_CONST_GENL_OPS +static const +#else +static +#endif +struct genl_ops genl_ops[] = { + { + .cmd = WG_CMD_GET_DEVICE, +#ifndef COMPAT_CANNOT_USE_NETLINK_START + .start = wg_get_device_start, +#endif + .dumpit = wg_get_device_dump, + .done = wg_get_device_done, +#ifdef COMPAT_CANNOT_INDIVIDUAL_NETLINK_OPS_POLICY + .policy = device_policy, +#endif + .flags = GENL_UNS_ADMIN_PERM + }, { + .cmd = WG_CMD_SET_DEVICE, + .doit = wg_set_device, +#ifdef COMPAT_CANNOT_INDIVIDUAL_NETLINK_OPS_POLICY + .policy = device_policy, +#endif + .flags = GENL_UNS_ADMIN_PERM + } +}; + +static struct genl_family genl_family +#ifndef COMPAT_CANNOT_USE_GENL_NOPS +__ro_after_init = { + .ops = genl_ops, + .n_ops = ARRAY_SIZE(genl_ops), +#else += { +#endif + .name = WG_GENL_NAME, + .version = WG_GENL_VERSION, + .maxattr = WGDEVICE_A_MAX, + .module = THIS_MODULE, +#ifndef COMPAT_CANNOT_INDIVIDUAL_NETLINK_OPS_POLICY + .policy = device_policy, +#endif + .netnsok = true +}; + +int __init wg_genetlink_init(void) +{ + return genl_register_family(&genl_family); +} + +void __exit wg_genetlink_uninit(void) +{ + genl_unregister_family(&genl_family); +} diff --git a/net/wireguard/netlink.h b/net/wireguard/netlink.h new file mode 100644 index 000000000000..15100d92e2e3 --- /dev/null +++ b/net/wireguard/netlink.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_NETLINK_H +#define _WG_NETLINK_H + +int wg_genetlink_init(void); +void wg_genetlink_uninit(void); + +#endif /* _WG_NETLINK_H */ diff --git a/net/wireguard/noise.c b/net/wireguard/noise.c new file mode 100644 index 000000000000..baf455e21e79 --- /dev/null +++ b/net/wireguard/noise.c @@ -0,0 +1,830 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "noise.h" +#include "device.h" +#include "peer.h" +#include "messages.h" +#include "queueing.h" +#include "peerlookup.h" + +#include +#include +#include +#include +#include +#include + +/* This implements Noise_IKpsk2: + * + * <- s + * ****** + * -> e, es, s, ss, {t} + * <- e, ee, se, psk, {} + */ + +static const u8 handshake_name[37] = "Noise_IKpsk2_25519_ChaChaPoly_BLAKE2s"; +static const u8 identifier_name[34] = "WireGuard v1 zx2c4 Jason@zx2c4.com"; +static u8 handshake_init_hash[NOISE_HASH_LEN] __ro_after_init; +static u8 handshake_init_chaining_key[NOISE_HASH_LEN] __ro_after_init; +static atomic64_t keypair_counter = ATOMIC64_INIT(0); + +void __init wg_noise_init(void) +{ + struct blake2s_state blake; + + blake2s(handshake_init_chaining_key, handshake_name, NULL, + NOISE_HASH_LEN, sizeof(handshake_name), 0); + blake2s_init(&blake, NOISE_HASH_LEN); + blake2s_update(&blake, handshake_init_chaining_key, NOISE_HASH_LEN); + blake2s_update(&blake, identifier_name, sizeof(identifier_name)); + blake2s_final(&blake, handshake_init_hash); +} + +/* Must hold peer->handshake.static_identity->lock */ +void wg_noise_precompute_static_static(struct wg_peer *peer) +{ + down_write(&peer->handshake.lock); + if (!peer->handshake.static_identity->has_identity || + !curve25519(peer->handshake.precomputed_static_static, + peer->handshake.static_identity->static_private, + peer->handshake.remote_static)) + memset(peer->handshake.precomputed_static_static, 0, + NOISE_PUBLIC_KEY_LEN); + up_write(&peer->handshake.lock); +} + +void wg_noise_handshake_init(struct noise_handshake *handshake, + struct noise_static_identity *static_identity, + const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], + const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], + struct wg_peer *peer) +{ + memset(handshake, 0, sizeof(*handshake)); + init_rwsem(&handshake->lock); + handshake->entry.type = INDEX_HASHTABLE_HANDSHAKE; + handshake->entry.peer = peer; + memcpy(handshake->remote_static, peer_public_key, NOISE_PUBLIC_KEY_LEN); + if (peer_preshared_key) + memcpy(handshake->preshared_key, peer_preshared_key, + NOISE_SYMMETRIC_KEY_LEN); + handshake->static_identity = static_identity; + handshake->state = HANDSHAKE_ZEROED; + wg_noise_precompute_static_static(peer); +} + +static void handshake_zero(struct noise_handshake *handshake) +{ + memset(&handshake->ephemeral_private, 0, NOISE_PUBLIC_KEY_LEN); + memset(&handshake->remote_ephemeral, 0, NOISE_PUBLIC_KEY_LEN); + memset(&handshake->hash, 0, NOISE_HASH_LEN); + memset(&handshake->chaining_key, 0, NOISE_HASH_LEN); + handshake->remote_index = 0; + handshake->state = HANDSHAKE_ZEROED; +} + +void wg_noise_handshake_clear(struct noise_handshake *handshake) +{ + down_write(&handshake->lock); + wg_index_hashtable_remove( + handshake->entry.peer->device->index_hashtable, + &handshake->entry); + handshake_zero(handshake); + up_write(&handshake->lock); +} + +static struct noise_keypair *keypair_create(struct wg_peer *peer) +{ + struct noise_keypair *keypair = kzalloc(sizeof(*keypair), GFP_KERNEL); + + if (unlikely(!keypair)) + return NULL; + spin_lock_init(&keypair->receiving_counter.lock); + keypair->internal_id = atomic64_inc_return(&keypair_counter); + keypair->entry.type = INDEX_HASHTABLE_KEYPAIR; + keypair->entry.peer = peer; + kref_init(&keypair->refcount); + return keypair; +} + +static void keypair_free_rcu(struct rcu_head *rcu) +{ + kfree_sensitive(container_of(rcu, struct noise_keypair, rcu)); +} + +static void keypair_free_kref(struct kref *kref) +{ + struct noise_keypair *keypair = + container_of(kref, struct noise_keypair, refcount); + + net_dbg_ratelimited("%s: Keypair %llu destroyed for peer %llu\n", + keypair->entry.peer->device->dev->name, + keypair->internal_id, + keypair->entry.peer->internal_id); + wg_index_hashtable_remove(keypair->entry.peer->device->index_hashtable, + &keypair->entry); + call_rcu(&keypair->rcu, keypair_free_rcu); +} + +void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now) +{ + if (unlikely(!keypair)) + return; + if (unlikely(unreference_now)) + wg_index_hashtable_remove( + keypair->entry.peer->device->index_hashtable, + &keypair->entry); + kref_put(&keypair->refcount, keypair_free_kref); +} + +struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair) +{ + RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(), + "Taking noise keypair reference without holding the RCU BH read lock"); + if (unlikely(!keypair || !kref_get_unless_zero(&keypair->refcount))) + return NULL; + return keypair; +} + +void wg_noise_keypairs_clear(struct noise_keypairs *keypairs) +{ + struct noise_keypair *old; + + spin_lock_bh(&keypairs->keypair_update_lock); + + /* We zero the next_keypair before zeroing the others, so that + * wg_noise_received_with_keypair returns early before subsequent ones + * are zeroed. + */ + old = rcu_dereference_protected(keypairs->next_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + RCU_INIT_POINTER(keypairs->next_keypair, NULL); + wg_noise_keypair_put(old, true); + + old = rcu_dereference_protected(keypairs->previous_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + RCU_INIT_POINTER(keypairs->previous_keypair, NULL); + wg_noise_keypair_put(old, true); + + old = rcu_dereference_protected(keypairs->current_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + RCU_INIT_POINTER(keypairs->current_keypair, NULL); + wg_noise_keypair_put(old, true); + + spin_unlock_bh(&keypairs->keypair_update_lock); +} + +void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer) +{ + struct noise_keypair *keypair; + + wg_noise_handshake_clear(&peer->handshake); + wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); + + spin_lock_bh(&peer->keypairs.keypair_update_lock); + keypair = rcu_dereference_protected(peer->keypairs.next_keypair, + lockdep_is_held(&peer->keypairs.keypair_update_lock)); + if (keypair) + keypair->sending.is_valid = false; + keypair = rcu_dereference_protected(peer->keypairs.current_keypair, + lockdep_is_held(&peer->keypairs.keypair_update_lock)); + if (keypair) + keypair->sending.is_valid = false; + spin_unlock_bh(&peer->keypairs.keypair_update_lock); +} + +static void add_new_keypair(struct noise_keypairs *keypairs, + struct noise_keypair *new_keypair) +{ + struct noise_keypair *previous_keypair, *next_keypair, *current_keypair; + + spin_lock_bh(&keypairs->keypair_update_lock); + previous_keypair = rcu_dereference_protected(keypairs->previous_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + next_keypair = rcu_dereference_protected(keypairs->next_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + current_keypair = rcu_dereference_protected(keypairs->current_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + if (new_keypair->i_am_the_initiator) { + /* If we're the initiator, it means we've sent a handshake, and + * received a confirmation response, which means this new + * keypair can now be used. + */ + if (next_keypair) { + /* If there already was a next keypair pending, we + * demote it to be the previous keypair, and free the + * existing current. Note that this means KCI can result + * in this transition. It would perhaps be more sound to + * always just get rid of the unused next keypair + * instead of putting it in the previous slot, but this + * might be a bit less robust. Something to think about + * for the future. + */ + RCU_INIT_POINTER(keypairs->next_keypair, NULL); + rcu_assign_pointer(keypairs->previous_keypair, + next_keypair); + wg_noise_keypair_put(current_keypair, true); + } else /* If there wasn't an existing next keypair, we replace + * the previous with the current one. + */ + rcu_assign_pointer(keypairs->previous_keypair, + current_keypair); + /* At this point we can get rid of the old previous keypair, and + * set up the new keypair. + */ + wg_noise_keypair_put(previous_keypair, true); + rcu_assign_pointer(keypairs->current_keypair, new_keypair); + } else { + /* If we're the responder, it means we can't use the new keypair + * until we receive confirmation via the first data packet, so + * we get rid of the existing previous one, the possibly + * existing next one, and slide in the new next one. + */ + rcu_assign_pointer(keypairs->next_keypair, new_keypair); + wg_noise_keypair_put(next_keypair, true); + RCU_INIT_POINTER(keypairs->previous_keypair, NULL); + wg_noise_keypair_put(previous_keypair, true); + } + spin_unlock_bh(&keypairs->keypair_update_lock); +} + +bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs, + struct noise_keypair *received_keypair) +{ + struct noise_keypair *old_keypair; + bool key_is_new; + + /* We first check without taking the spinlock. */ + key_is_new = received_keypair == + rcu_access_pointer(keypairs->next_keypair); + if (likely(!key_is_new)) + return false; + + spin_lock_bh(&keypairs->keypair_update_lock); + /* After locking, we double check that things didn't change from + * beneath us. + */ + if (unlikely(received_keypair != + rcu_dereference_protected(keypairs->next_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)))) { + spin_unlock_bh(&keypairs->keypair_update_lock); + return false; + } + + /* When we've finally received the confirmation, we slide the next + * into the current, the current into the previous, and get rid of + * the old previous. + */ + old_keypair = rcu_dereference_protected(keypairs->previous_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + rcu_assign_pointer(keypairs->previous_keypair, + rcu_dereference_protected(keypairs->current_keypair, + lockdep_is_held(&keypairs->keypair_update_lock))); + wg_noise_keypair_put(old_keypair, true); + rcu_assign_pointer(keypairs->current_keypair, received_keypair); + RCU_INIT_POINTER(keypairs->next_keypair, NULL); + + spin_unlock_bh(&keypairs->keypair_update_lock); + return true; +} + +/* Must hold static_identity->lock */ +void wg_noise_set_static_identity_private_key( + struct noise_static_identity *static_identity, + const u8 private_key[NOISE_PUBLIC_KEY_LEN]) +{ + memcpy(static_identity->static_private, private_key, + NOISE_PUBLIC_KEY_LEN); + curve25519_clamp_secret(static_identity->static_private); + static_identity->has_identity = curve25519_generate_public( + static_identity->static_public, private_key); +} + +/* This is Hugo Krawczyk's HKDF: + * - https://eprint.iacr.org/2010/264.pdf + * - https://tools.ietf.org/html/rfc5869 + */ +static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data, + size_t first_len, size_t second_len, size_t third_len, + size_t data_len, const u8 chaining_key[NOISE_HASH_LEN]) +{ + u8 output[BLAKE2S_HASH_SIZE + 1]; + u8 secret[BLAKE2S_HASH_SIZE]; + + WARN_ON(IS_ENABLED(DEBUG) && + (first_len > BLAKE2S_HASH_SIZE || + second_len > BLAKE2S_HASH_SIZE || + third_len > BLAKE2S_HASH_SIZE || + ((second_len || second_dst || third_len || third_dst) && + (!first_len || !first_dst)) || + ((third_len || third_dst) && (!second_len || !second_dst)))); + + /* Extract entropy from data into secret */ + blake2s_hmac(secret, data, chaining_key, BLAKE2S_HASH_SIZE, data_len, + NOISE_HASH_LEN); + + if (!first_dst || !first_len) + goto out; + + /* Expand first key: key = secret, data = 0x1 */ + output[0] = 1; + blake2s_hmac(output, output, secret, BLAKE2S_HASH_SIZE, 1, + BLAKE2S_HASH_SIZE); + memcpy(first_dst, output, first_len); + + if (!second_dst || !second_len) + goto out; + + /* Expand second key: key = secret, data = first-key || 0x2 */ + output[BLAKE2S_HASH_SIZE] = 2; + blake2s_hmac(output, output, secret, BLAKE2S_HASH_SIZE, + BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE); + memcpy(second_dst, output, second_len); + + if (!third_dst || !third_len) + goto out; + + /* Expand third key: key = secret, data = second-key || 0x3 */ + output[BLAKE2S_HASH_SIZE] = 3; + blake2s_hmac(output, output, secret, BLAKE2S_HASH_SIZE, + BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE); + memcpy(third_dst, output, third_len); + +out: + /* Clear sensitive data from stack */ + memzero_explicit(secret, BLAKE2S_HASH_SIZE); + memzero_explicit(output, BLAKE2S_HASH_SIZE + 1); +} + +static void derive_keys(struct noise_symmetric_key *first_dst, + struct noise_symmetric_key *second_dst, + const u8 chaining_key[NOISE_HASH_LEN]) +{ + u64 birthdate = ktime_get_coarse_boottime_ns(); + kdf(first_dst->key, second_dst->key, NULL, NULL, + NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0, + chaining_key); + first_dst->birthdate = second_dst->birthdate = birthdate; + first_dst->is_valid = second_dst->is_valid = true; +} + +static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN], + u8 key[NOISE_SYMMETRIC_KEY_LEN], + const u8 private[NOISE_PUBLIC_KEY_LEN], + const u8 public[NOISE_PUBLIC_KEY_LEN]) +{ + u8 dh_calculation[NOISE_PUBLIC_KEY_LEN]; + + if (unlikely(!curve25519(dh_calculation, private, public))) + return false; + kdf(chaining_key, key, NULL, dh_calculation, NOISE_HASH_LEN, + NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, chaining_key); + memzero_explicit(dh_calculation, NOISE_PUBLIC_KEY_LEN); + return true; +} + +static bool __must_check mix_precomputed_dh(u8 chaining_key[NOISE_HASH_LEN], + u8 key[NOISE_SYMMETRIC_KEY_LEN], + const u8 precomputed[NOISE_PUBLIC_KEY_LEN]) +{ + static u8 zero_point[NOISE_PUBLIC_KEY_LEN]; + if (unlikely(!crypto_memneq(precomputed, zero_point, NOISE_PUBLIC_KEY_LEN))) + return false; + kdf(chaining_key, key, NULL, precomputed, NOISE_HASH_LEN, + NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, + chaining_key); + return true; +} + +static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len) +{ + struct blake2s_state blake; + + blake2s_init(&blake, NOISE_HASH_LEN); + blake2s_update(&blake, hash, NOISE_HASH_LEN); + blake2s_update(&blake, src, src_len); + blake2s_final(&blake, hash); +} + +static void mix_psk(u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN], + u8 key[NOISE_SYMMETRIC_KEY_LEN], + const u8 psk[NOISE_SYMMETRIC_KEY_LEN]) +{ + u8 temp_hash[NOISE_HASH_LEN]; + + kdf(chaining_key, temp_hash, key, psk, NOISE_HASH_LEN, NOISE_HASH_LEN, + NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, chaining_key); + mix_hash(hash, temp_hash, NOISE_HASH_LEN); + memzero_explicit(temp_hash, NOISE_HASH_LEN); +} + +static void handshake_init(u8 chaining_key[NOISE_HASH_LEN], + u8 hash[NOISE_HASH_LEN], + const u8 remote_static[NOISE_PUBLIC_KEY_LEN]) +{ + memcpy(hash, handshake_init_hash, NOISE_HASH_LEN); + memcpy(chaining_key, handshake_init_chaining_key, NOISE_HASH_LEN); + mix_hash(hash, remote_static, NOISE_PUBLIC_KEY_LEN); +} + +static void message_encrypt(u8 *dst_ciphertext, const u8 *src_plaintext, + size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN], + u8 hash[NOISE_HASH_LEN]) +{ + chacha20poly1305_encrypt(dst_ciphertext, src_plaintext, src_len, hash, + NOISE_HASH_LEN, + 0 /* Always zero for Noise_IK */, key); + mix_hash(hash, dst_ciphertext, noise_encrypted_len(src_len)); +} + +static bool message_decrypt(u8 *dst_plaintext, const u8 *src_ciphertext, + size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN], + u8 hash[NOISE_HASH_LEN]) +{ + if (!chacha20poly1305_decrypt(dst_plaintext, src_ciphertext, src_len, + hash, NOISE_HASH_LEN, + 0 /* Always zero for Noise_IK */, key)) + return false; + mix_hash(hash, src_ciphertext, src_len); + return true; +} + +static void message_ephemeral(u8 ephemeral_dst[NOISE_PUBLIC_KEY_LEN], + const u8 ephemeral_src[NOISE_PUBLIC_KEY_LEN], + u8 chaining_key[NOISE_HASH_LEN], + u8 hash[NOISE_HASH_LEN]) +{ + if (ephemeral_dst != ephemeral_src) + memcpy(ephemeral_dst, ephemeral_src, NOISE_PUBLIC_KEY_LEN); + mix_hash(hash, ephemeral_src, NOISE_PUBLIC_KEY_LEN); + kdf(chaining_key, NULL, NULL, ephemeral_src, NOISE_HASH_LEN, 0, 0, + NOISE_PUBLIC_KEY_LEN, chaining_key); +} + +static void tai64n_now(u8 output[NOISE_TIMESTAMP_LEN]) +{ + struct timespec64 now; + + ktime_get_real_ts64(&now); + + /* In order to prevent some sort of infoleak from precise timers, we + * round down the nanoseconds part to the closest rounded-down power of + * two to the maximum initiations per second allowed anyway by the + * implementation. + */ + now.tv_nsec = ALIGN_DOWN(now.tv_nsec, + rounddown_pow_of_two(NSEC_PER_SEC / INITIATIONS_PER_SECOND)); + + /* https://cr.yp.to/libtai/tai64.html */ + *(__be64 *)output = cpu_to_be64(0x400000000000000aULL + now.tv_sec); + *(__be32 *)(output + sizeof(__be64)) = cpu_to_be32(now.tv_nsec); +} + +bool +wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst, + struct noise_handshake *handshake) +{ + u8 timestamp[NOISE_TIMESTAMP_LEN]; + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + bool ret = false; + + /* We need to wait for crng _before_ taking any locks, since + * curve25519_generate_secret uses get_random_bytes_wait. + */ + wait_for_random_bytes(); + + down_read(&handshake->static_identity->lock); + down_write(&handshake->lock); + + if (unlikely(!handshake->static_identity->has_identity)) + goto out; + + dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION); + + handshake_init(handshake->chaining_key, handshake->hash, + handshake->remote_static); + + /* e */ + curve25519_generate_secret(handshake->ephemeral_private); + if (!curve25519_generate_public(dst->unencrypted_ephemeral, + handshake->ephemeral_private)) + goto out; + message_ephemeral(dst->unencrypted_ephemeral, + dst->unencrypted_ephemeral, handshake->chaining_key, + handshake->hash); + + /* es */ + if (!mix_dh(handshake->chaining_key, key, handshake->ephemeral_private, + handshake->remote_static)) + goto out; + + /* s */ + message_encrypt(dst->encrypted_static, + handshake->static_identity->static_public, + NOISE_PUBLIC_KEY_LEN, key, handshake->hash); + + /* ss */ + if (!mix_precomputed_dh(handshake->chaining_key, key, + handshake->precomputed_static_static)) + goto out; + + /* {t} */ + tai64n_now(timestamp); + message_encrypt(dst->encrypted_timestamp, timestamp, + NOISE_TIMESTAMP_LEN, key, handshake->hash); + + dst->sender_index = wg_index_hashtable_insert( + handshake->entry.peer->device->index_hashtable, + &handshake->entry); + + handshake->state = HANDSHAKE_CREATED_INITIATION; + ret = true; + +out: + up_write(&handshake->lock); + up_read(&handshake->static_identity->lock); + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); + return ret; +} + +struct wg_peer * +wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src, + struct wg_device *wg) +{ + struct wg_peer *peer = NULL, *ret_peer = NULL; + struct noise_handshake *handshake; + bool replay_attack, flood_attack; + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + u8 chaining_key[NOISE_HASH_LEN]; + u8 hash[NOISE_HASH_LEN]; + u8 s[NOISE_PUBLIC_KEY_LEN]; + u8 e[NOISE_PUBLIC_KEY_LEN]; + u8 t[NOISE_TIMESTAMP_LEN]; + u64 initiation_consumption; + + down_read(&wg->static_identity.lock); + if (unlikely(!wg->static_identity.has_identity)) + goto out; + + handshake_init(chaining_key, hash, wg->static_identity.static_public); + + /* e */ + message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash); + + /* es */ + if (!mix_dh(chaining_key, key, wg->static_identity.static_private, e)) + goto out; + + /* s */ + if (!message_decrypt(s, src->encrypted_static, + sizeof(src->encrypted_static), key, hash)) + goto out; + + /* Lookup which peer we're actually talking to */ + peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, s); + if (!peer) + goto out; + handshake = &peer->handshake; + + /* ss */ + if (!mix_precomputed_dh(chaining_key, key, + handshake->precomputed_static_static)) + goto out; + + /* {t} */ + if (!message_decrypt(t, src->encrypted_timestamp, + sizeof(src->encrypted_timestamp), key, hash)) + goto out; + + down_read(&handshake->lock); + replay_attack = memcmp(t, handshake->latest_timestamp, + NOISE_TIMESTAMP_LEN) <= 0; + flood_attack = (s64)handshake->last_initiation_consumption + + NSEC_PER_SEC / INITIATIONS_PER_SECOND > + (s64)ktime_get_coarse_boottime_ns(); + up_read(&handshake->lock); + if (replay_attack || flood_attack) + goto out; + + /* Success! Copy everything to peer */ + down_write(&handshake->lock); + memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN); + if (memcmp(t, handshake->latest_timestamp, NOISE_TIMESTAMP_LEN) > 0) + memcpy(handshake->latest_timestamp, t, NOISE_TIMESTAMP_LEN); + memcpy(handshake->hash, hash, NOISE_HASH_LEN); + memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN); + handshake->remote_index = src->sender_index; + initiation_consumption = ktime_get_coarse_boottime_ns(); + if ((s64)(handshake->last_initiation_consumption - initiation_consumption) < 0) + handshake->last_initiation_consumption = initiation_consumption; + handshake->state = HANDSHAKE_CONSUMED_INITIATION; + up_write(&handshake->lock); + ret_peer = peer; + +out: + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); + memzero_explicit(hash, NOISE_HASH_LEN); + memzero_explicit(chaining_key, NOISE_HASH_LEN); + up_read(&wg->static_identity.lock); + if (!ret_peer) + wg_peer_put(peer); + return ret_peer; +} + +bool wg_noise_handshake_create_response(struct message_handshake_response *dst, + struct noise_handshake *handshake) +{ + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + bool ret = false; + + /* We need to wait for crng _before_ taking any locks, since + * curve25519_generate_secret uses get_random_bytes_wait. + */ + wait_for_random_bytes(); + + down_read(&handshake->static_identity->lock); + down_write(&handshake->lock); + + if (handshake->state != HANDSHAKE_CONSUMED_INITIATION) + goto out; + + dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE); + dst->receiver_index = handshake->remote_index; + + /* e */ + curve25519_generate_secret(handshake->ephemeral_private); + if (!curve25519_generate_public(dst->unencrypted_ephemeral, + handshake->ephemeral_private)) + goto out; + message_ephemeral(dst->unencrypted_ephemeral, + dst->unencrypted_ephemeral, handshake->chaining_key, + handshake->hash); + + /* ee */ + if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private, + handshake->remote_ephemeral)) + goto out; + + /* se */ + if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private, + handshake->remote_static)) + goto out; + + /* psk */ + mix_psk(handshake->chaining_key, handshake->hash, key, + handshake->preshared_key); + + /* {} */ + message_encrypt(dst->encrypted_nothing, NULL, 0, key, handshake->hash); + + dst->sender_index = wg_index_hashtable_insert( + handshake->entry.peer->device->index_hashtable, + &handshake->entry); + + handshake->state = HANDSHAKE_CREATED_RESPONSE; + ret = true; + +out: + up_write(&handshake->lock); + up_read(&handshake->static_identity->lock); + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); + return ret; +} + +struct wg_peer * +wg_noise_handshake_consume_response(struct message_handshake_response *src, + struct wg_device *wg) +{ + enum noise_handshake_state state = HANDSHAKE_ZEROED; + struct wg_peer *peer = NULL, *ret_peer = NULL; + struct noise_handshake *handshake; + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + u8 hash[NOISE_HASH_LEN]; + u8 chaining_key[NOISE_HASH_LEN]; + u8 e[NOISE_PUBLIC_KEY_LEN]; + u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN]; + u8 static_private[NOISE_PUBLIC_KEY_LEN]; + u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]; + + down_read(&wg->static_identity.lock); + + if (unlikely(!wg->static_identity.has_identity)) + goto out; + + handshake = (struct noise_handshake *)wg_index_hashtable_lookup( + wg->index_hashtable, INDEX_HASHTABLE_HANDSHAKE, + src->receiver_index, &peer); + if (unlikely(!handshake)) + goto out; + + down_read(&handshake->lock); + state = handshake->state; + memcpy(hash, handshake->hash, NOISE_HASH_LEN); + memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN); + memcpy(ephemeral_private, handshake->ephemeral_private, + NOISE_PUBLIC_KEY_LEN); + memcpy(preshared_key, handshake->preshared_key, + NOISE_SYMMETRIC_KEY_LEN); + up_read(&handshake->lock); + + if (state != HANDSHAKE_CREATED_INITIATION) + goto fail; + + /* e */ + message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash); + + /* ee */ + if (!mix_dh(chaining_key, NULL, ephemeral_private, e)) + goto fail; + + /* se */ + if (!mix_dh(chaining_key, NULL, wg->static_identity.static_private, e)) + goto fail; + + /* psk */ + mix_psk(chaining_key, hash, key, preshared_key); + + /* {} */ + if (!message_decrypt(NULL, src->encrypted_nothing, + sizeof(src->encrypted_nothing), key, hash)) + goto fail; + + /* Success! Copy everything to peer */ + down_write(&handshake->lock); + /* It's important to check that the state is still the same, while we + * have an exclusive lock. + */ + if (handshake->state != state) { + up_write(&handshake->lock); + goto fail; + } + memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN); + memcpy(handshake->hash, hash, NOISE_HASH_LEN); + memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN); + handshake->remote_index = src->sender_index; + handshake->state = HANDSHAKE_CONSUMED_RESPONSE; + up_write(&handshake->lock); + ret_peer = peer; + goto out; + +fail: + wg_peer_put(peer); +out: + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); + memzero_explicit(hash, NOISE_HASH_LEN); + memzero_explicit(chaining_key, NOISE_HASH_LEN); + memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN); + memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN); + memzero_explicit(preshared_key, NOISE_SYMMETRIC_KEY_LEN); + up_read(&wg->static_identity.lock); + return ret_peer; +} + +bool wg_noise_handshake_begin_session(struct noise_handshake *handshake, + struct noise_keypairs *keypairs) +{ + struct noise_keypair *new_keypair; + bool ret = false; + + down_write(&handshake->lock); + if (handshake->state != HANDSHAKE_CREATED_RESPONSE && + handshake->state != HANDSHAKE_CONSUMED_RESPONSE) + goto out; + + new_keypair = keypair_create(handshake->entry.peer); + if (!new_keypair) + goto out; + new_keypair->i_am_the_initiator = handshake->state == + HANDSHAKE_CONSUMED_RESPONSE; + new_keypair->remote_index = handshake->remote_index; + + if (new_keypair->i_am_the_initiator) + derive_keys(&new_keypair->sending, &new_keypair->receiving, + handshake->chaining_key); + else + derive_keys(&new_keypair->receiving, &new_keypair->sending, + handshake->chaining_key); + + handshake_zero(handshake); + rcu_read_lock_bh(); + if (likely(!READ_ONCE(container_of(handshake, struct wg_peer, + handshake)->is_dead))) { + add_new_keypair(keypairs, new_keypair); + net_dbg_ratelimited("%s: Keypair %llu created for peer %llu\n", + handshake->entry.peer->device->dev->name, + new_keypair->internal_id, + handshake->entry.peer->internal_id); + ret = wg_index_hashtable_replace( + handshake->entry.peer->device->index_hashtable, + &handshake->entry, &new_keypair->entry); + } else { + kfree_sensitive(new_keypair); + } + rcu_read_unlock_bh(); + +out: + up_write(&handshake->lock); + return ret; +} diff --git a/net/wireguard/noise.h b/net/wireguard/noise.h new file mode 100644 index 000000000000..c527253dba80 --- /dev/null +++ b/net/wireguard/noise.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ +#ifndef _WG_NOISE_H +#define _WG_NOISE_H + +#include "messages.h" +#include "peerlookup.h" + +#include +#include +#include +#include +#include +#include + +struct noise_replay_counter { + u64 counter; + spinlock_t lock; + unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG]; +}; + +struct noise_symmetric_key { + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + u64 birthdate; + bool is_valid; +}; + +struct noise_keypair { + struct index_hashtable_entry entry; + struct noise_symmetric_key sending; + atomic64_t sending_counter; + struct noise_symmetric_key receiving; + struct noise_replay_counter receiving_counter; + __le32 remote_index; + bool i_am_the_initiator; + struct kref refcount; + struct rcu_head rcu; + u64 internal_id; +}; + +struct noise_keypairs { + struct noise_keypair __rcu *current_keypair; + struct noise_keypair __rcu *previous_keypair; + struct noise_keypair __rcu *next_keypair; + spinlock_t keypair_update_lock; +}; + +struct noise_static_identity { + u8 static_public[NOISE_PUBLIC_KEY_LEN]; + u8 static_private[NOISE_PUBLIC_KEY_LEN]; + struct rw_semaphore lock; + bool has_identity; +}; + +enum noise_handshake_state { + HANDSHAKE_ZEROED, + HANDSHAKE_CREATED_INITIATION, + HANDSHAKE_CONSUMED_INITIATION, + HANDSHAKE_CREATED_RESPONSE, + HANDSHAKE_CONSUMED_RESPONSE +}; + +struct noise_handshake { + struct index_hashtable_entry entry; + + enum noise_handshake_state state; + u64 last_initiation_consumption; + + struct noise_static_identity *static_identity; + + u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN]; + u8 remote_static[NOISE_PUBLIC_KEY_LEN]; + u8 remote_ephemeral[NOISE_PUBLIC_KEY_LEN]; + u8 precomputed_static_static[NOISE_PUBLIC_KEY_LEN]; + + u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]; + + u8 hash[NOISE_HASH_LEN]; + u8 chaining_key[NOISE_HASH_LEN]; + + u8 latest_timestamp[NOISE_TIMESTAMP_LEN]; + __le32 remote_index; + + /* Protects all members except the immutable (after noise_handshake_ + * init): remote_static, precomputed_static_static, static_identity. + */ + struct rw_semaphore lock; +}; + +struct wg_device; + +void wg_noise_init(void); +void wg_noise_handshake_init(struct noise_handshake *handshake, + struct noise_static_identity *static_identity, + const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], + const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], + struct wg_peer *peer); +void wg_noise_handshake_clear(struct noise_handshake *handshake); +static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns) +{ + atomic64_set(handshake_ns, ktime_get_coarse_boottime_ns() - + (u64)(REKEY_TIMEOUT + 1) * NSEC_PER_SEC); +} + +void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now); +struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair); +void wg_noise_keypairs_clear(struct noise_keypairs *keypairs); +bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs, + struct noise_keypair *received_keypair); +void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer); + +void wg_noise_set_static_identity_private_key( + struct noise_static_identity *static_identity, + const u8 private_key[NOISE_PUBLIC_KEY_LEN]); +void wg_noise_precompute_static_static(struct wg_peer *peer); + +bool +wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst, + struct noise_handshake *handshake); +struct wg_peer * +wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src, + struct wg_device *wg); + +bool wg_noise_handshake_create_response(struct message_handshake_response *dst, + struct noise_handshake *handshake); +struct wg_peer * +wg_noise_handshake_consume_response(struct message_handshake_response *src, + struct wg_device *wg); + +bool wg_noise_handshake_begin_session(struct noise_handshake *handshake, + struct noise_keypairs *keypairs); + +#endif /* _WG_NOISE_H */ diff --git a/net/wireguard/peer.c b/net/wireguard/peer.c new file mode 100644 index 000000000000..1acd00ab2fbc --- /dev/null +++ b/net/wireguard/peer.c @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "peer.h" +#include "device.h" +#include "queueing.h" +#include "timers.h" +#include "peerlookup.h" +#include "noise.h" + +#include +#include +#include +#include + +static struct kmem_cache *peer_cache; +static atomic64_t peer_counter = ATOMIC64_INIT(0); + +struct wg_peer *wg_peer_create(struct wg_device *wg, + const u8 public_key[NOISE_PUBLIC_KEY_LEN], + const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]) +{ + struct wg_peer *peer; + int ret = -ENOMEM; + + lockdep_assert_held(&wg->device_update_lock); + + if (wg->num_peers >= MAX_PEERS_PER_DEVICE) + return ERR_PTR(ret); + + peer = kmem_cache_zalloc(peer_cache, GFP_KERNEL); + if (unlikely(!peer)) + return ERR_PTR(ret); + if (unlikely(dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))) + goto err; + + peer->device = wg; + wg_noise_handshake_init(&peer->handshake, &wg->static_identity, + public_key, preshared_key, peer); + peer->internal_id = atomic64_inc_return(&peer_counter); + peer->serial_work_cpu = nr_cpumask_bits; + wg_cookie_init(&peer->latest_cookie); + wg_timers_init(peer); + wg_cookie_checker_precompute_peer_keys(peer); + spin_lock_init(&peer->keypairs.keypair_update_lock); + INIT_WORK(&peer->transmit_handshake_work, wg_packet_handshake_send_worker); + INIT_WORK(&peer->transmit_packet_work, wg_packet_tx_worker); + wg_prev_queue_init(&peer->tx_queue); + wg_prev_queue_init(&peer->rx_queue); + rwlock_init(&peer->endpoint_lock); + kref_init(&peer->refcount); + skb_queue_head_init(&peer->staged_packet_queue); + wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); + set_bit(NAPI_STATE_NO_BUSY_POLL, &peer->napi.state); + netif_napi_add(wg->dev, &peer->napi, wg_packet_rx_poll, + NAPI_POLL_WEIGHT); + napi_enable(&peer->napi); + list_add_tail(&peer->peer_list, &wg->peer_list); + INIT_LIST_HEAD(&peer->allowedips_list); + wg_pubkey_hashtable_add(wg->peer_hashtable, peer); + ++wg->num_peers; + pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id); + return peer; + +err: + kmem_cache_free(peer_cache, peer); + return ERR_PTR(ret); +} + +struct wg_peer *wg_peer_get_maybe_zero(struct wg_peer *peer) +{ + RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(), + "Taking peer reference without holding the RCU read lock"); + if (unlikely(!peer || !kref_get_unless_zero(&peer->refcount))) + return NULL; + return peer; +} + +static void peer_make_dead(struct wg_peer *peer) +{ + /* Remove from configuration-time lookup structures. */ + list_del_init(&peer->peer_list); + wg_allowedips_remove_by_peer(&peer->device->peer_allowedips, peer, + &peer->device->device_update_lock); + wg_pubkey_hashtable_remove(peer->device->peer_hashtable, peer); + + /* Mark as dead, so that we don't allow jumping contexts after. */ + WRITE_ONCE(peer->is_dead, true); + + /* The caller must now synchronize_net() for this to take effect. */ +} + +static void peer_remove_after_dead(struct wg_peer *peer) +{ + WARN_ON(!peer->is_dead); + + /* No more keypairs can be created for this peer, since is_dead protects + * add_new_keypair, so we can now destroy existing ones. + */ + wg_noise_keypairs_clear(&peer->keypairs); + + /* Destroy all ongoing timers that were in-flight at the beginning of + * this function. + */ + wg_timers_stop(peer); + + /* The transition between packet encryption/decryption queues isn't + * guarded by is_dead, but each reference's life is strictly bounded by + * two generations: once for parallel crypto and once for serial + * ingestion, so we can simply flush twice, and be sure that we no + * longer have references inside these queues. + */ + + /* a) For encrypt/decrypt. */ + flush_workqueue(peer->device->packet_crypt_wq); + /* b.1) For send (but not receive, since that's napi). */ + flush_workqueue(peer->device->packet_crypt_wq); + /* b.2.1) For receive (but not send, since that's wq). */ + napi_disable(&peer->napi); + /* b.2.1) It's now safe to remove the napi struct, which must be done + * here from process context. + */ + netif_napi_del(&peer->napi); + + /* Ensure any workstructs we own (like transmit_handshake_work or + * clear_peer_work) no longer are in use. + */ + flush_workqueue(peer->device->handshake_send_wq); + + /* After the above flushes, a peer might still be active in a few + * different contexts: 1) from xmit(), before hitting is_dead and + * returning, 2) from wg_packet_consume_data(), before hitting is_dead + * and returning, 3) from wg_receive_handshake_packet() after a point + * where it has processed an incoming handshake packet, but where + * all calls to pass it off to timers fails because of is_dead. We won't + * have new references in (1) eventually, because we're removed from + * allowedips; we won't have new references in (2) eventually, because + * wg_index_hashtable_lookup will always return NULL, since we removed + * all existing keypairs and no more can be created; we won't have new + * references in (3) eventually, because we're removed from the pubkey + * hash table, which allows for a maximum of one handshake response, + * via the still-uncleared index hashtable entry, but not more than one, + * and in wg_cookie_message_consume, the lookup eventually gets a peer + * with a refcount of zero, so no new reference is taken. + */ + + --peer->device->num_peers; + wg_peer_put(peer); +} + +/* We have a separate "remove" function make sure that all active places where + * a peer is currently operating will eventually come to an end and not pass + * their reference onto another context. + */ +void wg_peer_remove(struct wg_peer *peer) +{ + if (unlikely(!peer)) + return; + lockdep_assert_held(&peer->device->device_update_lock); + + peer_make_dead(peer); + synchronize_net(); + peer_remove_after_dead(peer); +} + +void wg_peer_remove_all(struct wg_device *wg) +{ + struct wg_peer *peer, *temp; + LIST_HEAD(dead_peers); + + lockdep_assert_held(&wg->device_update_lock); + + /* Avoid having to traverse individually for each one. */ + wg_allowedips_free(&wg->peer_allowedips, &wg->device_update_lock); + + list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) { + peer_make_dead(peer); + list_add_tail(&peer->peer_list, &dead_peers); + } + synchronize_net(); + list_for_each_entry_safe(peer, temp, &dead_peers, peer_list) + peer_remove_after_dead(peer); +} + +static void rcu_release(struct rcu_head *rcu) +{ + struct wg_peer *peer = container_of(rcu, struct wg_peer, rcu); + + dst_cache_destroy(&peer->endpoint_cache); + WARN_ON(wg_prev_queue_peek(&peer->tx_queue) || wg_prev_queue_peek(&peer->rx_queue)); + + /* The final zeroing takes care of clearing any remaining handshake key + * material and other potentially sensitive information. + */ + memzero_explicit(peer, sizeof(*peer)); + kmem_cache_free(peer_cache, peer); +} + +static void kref_release(struct kref *refcount) +{ + struct wg_peer *peer = container_of(refcount, struct wg_peer, refcount); + + pr_debug("%s: Peer %llu (%pISpfsc) destroyed\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr); + + /* Remove ourself from dynamic runtime lookup structures, now that the + * last reference is gone. + */ + wg_index_hashtable_remove(peer->device->index_hashtable, + &peer->handshake.entry); + + /* Remove any lingering packets that didn't have a chance to be + * transmitted. + */ + wg_packet_purge_staged_packets(peer); + + /* Free the memory used. */ + call_rcu(&peer->rcu, rcu_release); +} + +void wg_peer_put(struct wg_peer *peer) +{ + if (unlikely(!peer)) + return; + kref_put(&peer->refcount, kref_release); +} + +int __init wg_peer_init(void) +{ + peer_cache = KMEM_CACHE(wg_peer, 0); + return peer_cache ? 0 : -ENOMEM; +} + +void wg_peer_uninit(void) +{ + kmem_cache_destroy(peer_cache); +} diff --git a/net/wireguard/peer.h b/net/wireguard/peer.h new file mode 100644 index 000000000000..76e4d3128ad4 --- /dev/null +++ b/net/wireguard/peer.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_PEER_H +#define _WG_PEER_H + +#include "device.h" +#include "noise.h" +#include "cookie.h" + +#include +#include +#include +#include +#include + +struct wg_device; + +struct endpoint { + union { + struct sockaddr addr; + struct sockaddr_in addr4; + struct sockaddr_in6 addr6; + }; + union { + struct { + struct in_addr src4; + /* Essentially the same as addr6->scope_id */ + int src_if4; + }; + struct in6_addr src6; + }; +}; + +struct wg_peer { + struct wg_device *device; + struct prev_queue tx_queue, rx_queue; + struct sk_buff_head staged_packet_queue; + int serial_work_cpu; + bool is_dead; + struct noise_keypairs keypairs; + struct endpoint endpoint; + struct dst_cache endpoint_cache; + rwlock_t endpoint_lock; + struct noise_handshake handshake; + atomic64_t last_sent_handshake; + struct work_struct transmit_handshake_work, clear_peer_work, transmit_packet_work; + struct cookie latest_cookie; + struct hlist_node pubkey_hash; + u64 rx_bytes, tx_bytes; + struct timer_list timer_retransmit_handshake, timer_send_keepalive; + struct timer_list timer_new_handshake, timer_zero_key_material; + struct timer_list timer_persistent_keepalive; + unsigned int timer_handshake_attempts; + u16 persistent_keepalive_interval; + bool timer_need_another_keepalive; + bool sent_lastminute_handshake; + struct timespec64 walltime_last_handshake; + struct kref refcount; + struct rcu_head rcu; + struct list_head peer_list; + struct list_head allowedips_list; + struct napi_struct napi; + u64 internal_id; +}; + +struct wg_peer *wg_peer_create(struct wg_device *wg, + const u8 public_key[NOISE_PUBLIC_KEY_LEN], + const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]); + +struct wg_peer *__must_check wg_peer_get_maybe_zero(struct wg_peer *peer); +static inline struct wg_peer *wg_peer_get(struct wg_peer *peer) +{ + kref_get(&peer->refcount); + return peer; +} +void wg_peer_put(struct wg_peer *peer); +void wg_peer_remove(struct wg_peer *peer); +void wg_peer_remove_all(struct wg_device *wg); + +int wg_peer_init(void); +void wg_peer_uninit(void); + +#endif /* _WG_PEER_H */ diff --git a/net/wireguard/peerlookup.c b/net/wireguard/peerlookup.c new file mode 100644 index 000000000000..f2783aa7a88f --- /dev/null +++ b/net/wireguard/peerlookup.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "peerlookup.h" +#include "peer.h" +#include "noise.h" + +static struct hlist_head *pubkey_bucket(struct pubkey_hashtable *table, + const u8 pubkey[NOISE_PUBLIC_KEY_LEN]) +{ + /* siphash gives us a secure 64bit number based on a random key. Since + * the bits are uniformly distributed, we can then mask off to get the + * bits we need. + */ + const u64 hash = siphash(pubkey, NOISE_PUBLIC_KEY_LEN, &table->key); + + return &table->hashtable[hash & (HASH_SIZE(table->hashtable) - 1)]; +} + +struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void) +{ + struct pubkey_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL); + + if (!table) + return NULL; + + get_random_bytes(&table->key, sizeof(table->key)); + hash_init(table->hashtable); + mutex_init(&table->lock); + return table; +} + +void wg_pubkey_hashtable_add(struct pubkey_hashtable *table, + struct wg_peer *peer) +{ + mutex_lock(&table->lock); + hlist_add_head_rcu(&peer->pubkey_hash, + pubkey_bucket(table, peer->handshake.remote_static)); + mutex_unlock(&table->lock); +} + +void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table, + struct wg_peer *peer) +{ + mutex_lock(&table->lock); + hlist_del_init_rcu(&peer->pubkey_hash); + mutex_unlock(&table->lock); +} + +/* Returns a strong reference to a peer */ +struct wg_peer * +wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table, + const u8 pubkey[NOISE_PUBLIC_KEY_LEN]) +{ + struct wg_peer *iter_peer, *peer = NULL; + + rcu_read_lock_bh(); + hlist_for_each_entry_rcu_bh(iter_peer, pubkey_bucket(table, pubkey), + pubkey_hash) { + if (!memcmp(pubkey, iter_peer->handshake.remote_static, + NOISE_PUBLIC_KEY_LEN)) { + peer = iter_peer; + break; + } + } + peer = wg_peer_get_maybe_zero(peer); + rcu_read_unlock_bh(); + return peer; +} + +static struct hlist_head *index_bucket(struct index_hashtable *table, + const __le32 index) +{ + /* Since the indices are random and thus all bits are uniformly + * distributed, we can find its bucket simply by masking. + */ + return &table->hashtable[(__force u32)index & + (HASH_SIZE(table->hashtable) - 1)]; +} + +struct index_hashtable *wg_index_hashtable_alloc(void) +{ + struct index_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL); + + if (!table) + return NULL; + + hash_init(table->hashtable); + spin_lock_init(&table->lock); + return table; +} + +/* At the moment, we limit ourselves to 2^20 total peers, which generally might + * amount to 2^20*3 items in this hashtable. The algorithm below works by + * picking a random number and testing it. We can see that these limits mean we + * usually succeed pretty quickly: + * + * >>> def calculation(tries, size): + * ... return (size / 2**32)**(tries - 1) * (1 - (size / 2**32)) + * ... + * >>> calculation(1, 2**20 * 3) + * 0.999267578125 + * >>> calculation(2, 2**20 * 3) + * 0.0007318854331970215 + * >>> calculation(3, 2**20 * 3) + * 5.360489012673497e-07 + * >>> calculation(4, 2**20 * 3) + * 3.9261394135792216e-10 + * + * At the moment, we don't do any masking, so this algorithm isn't exactly + * constant time in either the random guessing or in the hash list lookup. We + * could require a minimum of 3 tries, which would successfully mask the + * guessing. this would not, however, help with the growing hash lengths, which + * is another thing to consider moving forward. + */ + +__le32 wg_index_hashtable_insert(struct index_hashtable *table, + struct index_hashtable_entry *entry) +{ + struct index_hashtable_entry *existing_entry; + + spin_lock_bh(&table->lock); + hlist_del_init_rcu(&entry->index_hash); + spin_unlock_bh(&table->lock); + + rcu_read_lock_bh(); + +search_unused_slot: + /* First we try to find an unused slot, randomly, while unlocked. */ + entry->index = (__force __le32)get_random_u32(); + hlist_for_each_entry_rcu_bh(existing_entry, + index_bucket(table, entry->index), + index_hash) { + if (existing_entry->index == entry->index) + /* If it's already in use, we continue searching. */ + goto search_unused_slot; + } + + /* Once we've found an unused slot, we lock it, and then double-check + * that nobody else stole it from us. + */ + spin_lock_bh(&table->lock); + hlist_for_each_entry_rcu_bh(existing_entry, + index_bucket(table, entry->index), + index_hash) { + if (existing_entry->index == entry->index) { + spin_unlock_bh(&table->lock); + /* If it was stolen, we start over. */ + goto search_unused_slot; + } + } + /* Otherwise, we know we have it exclusively (since we're locked), + * so we insert. + */ + hlist_add_head_rcu(&entry->index_hash, + index_bucket(table, entry->index)); + spin_unlock_bh(&table->lock); + + rcu_read_unlock_bh(); + + return entry->index; +} + +bool wg_index_hashtable_replace(struct index_hashtable *table, + struct index_hashtable_entry *old, + struct index_hashtable_entry *new) +{ + bool ret; + + spin_lock_bh(&table->lock); + ret = !hlist_unhashed(&old->index_hash); + if (unlikely(!ret)) + goto out; + + new->index = old->index; + hlist_replace_rcu(&old->index_hash, &new->index_hash); + + /* Calling init here NULLs out index_hash, and in fact after this + * function returns, it's theoretically possible for this to get + * reinserted elsewhere. That means the RCU lookup below might either + * terminate early or jump between buckets, in which case the packet + * simply gets dropped, which isn't terrible. + */ + INIT_HLIST_NODE(&old->index_hash); +out: + spin_unlock_bh(&table->lock); + return ret; +} + +void wg_index_hashtable_remove(struct index_hashtable *table, + struct index_hashtable_entry *entry) +{ + spin_lock_bh(&table->lock); + hlist_del_init_rcu(&entry->index_hash); + spin_unlock_bh(&table->lock); +} + +/* Returns a strong reference to a entry->peer */ +struct index_hashtable_entry * +wg_index_hashtable_lookup(struct index_hashtable *table, + const enum index_hashtable_type type_mask, + const __le32 index, struct wg_peer **peer) +{ + struct index_hashtable_entry *iter_entry, *entry = NULL; + + rcu_read_lock_bh(); + hlist_for_each_entry_rcu_bh(iter_entry, index_bucket(table, index), + index_hash) { + if (iter_entry->index == index) { + if (likely(iter_entry->type & type_mask)) + entry = iter_entry; + break; + } + } + if (likely(entry)) { + entry->peer = wg_peer_get_maybe_zero(entry->peer); + if (likely(entry->peer)) + *peer = entry->peer; + else + entry = NULL; + } + rcu_read_unlock_bh(); + return entry; +} diff --git a/net/wireguard/peerlookup.h b/net/wireguard/peerlookup.h new file mode 100644 index 000000000000..ced811797680 --- /dev/null +++ b/net/wireguard/peerlookup.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_PEERLOOKUP_H +#define _WG_PEERLOOKUP_H + +#include "messages.h" + +#include +#include +#include + +struct wg_peer; + +struct pubkey_hashtable { + /* TODO: move to rhashtable */ + DECLARE_HASHTABLE(hashtable, 11); + siphash_key_t key; + struct mutex lock; +}; + +struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void); +void wg_pubkey_hashtable_add(struct pubkey_hashtable *table, + struct wg_peer *peer); +void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table, + struct wg_peer *peer); +struct wg_peer * +wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table, + const u8 pubkey[NOISE_PUBLIC_KEY_LEN]); + +struct index_hashtable { + /* TODO: move to rhashtable */ + DECLARE_HASHTABLE(hashtable, 13); + spinlock_t lock; +}; + +enum index_hashtable_type { + INDEX_HASHTABLE_HANDSHAKE = 1U << 0, + INDEX_HASHTABLE_KEYPAIR = 1U << 1 +}; + +struct index_hashtable_entry { + struct wg_peer *peer; + struct hlist_node index_hash; + enum index_hashtable_type type; + __le32 index; +}; + +struct index_hashtable *wg_index_hashtable_alloc(void); +__le32 wg_index_hashtable_insert(struct index_hashtable *table, + struct index_hashtable_entry *entry); +bool wg_index_hashtable_replace(struct index_hashtable *table, + struct index_hashtable_entry *old, + struct index_hashtable_entry *new); +void wg_index_hashtable_remove(struct index_hashtable *table, + struct index_hashtable_entry *entry); +struct index_hashtable_entry * +wg_index_hashtable_lookup(struct index_hashtable *table, + const enum index_hashtable_type type_mask, + const __le32 index, struct wg_peer **peer); + +#endif /* _WG_PEERLOOKUP_H */ diff --git a/net/wireguard/queueing.c b/net/wireguard/queueing.c new file mode 100644 index 000000000000..8084e7408c0a --- /dev/null +++ b/net/wireguard/queueing.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "queueing.h" +#include + +struct multicore_worker __percpu * +wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr) +{ + int cpu; + struct multicore_worker __percpu *worker = alloc_percpu(struct multicore_worker); + + if (!worker) + return NULL; + + for_each_possible_cpu(cpu) { + per_cpu_ptr(worker, cpu)->ptr = ptr; + INIT_WORK(&per_cpu_ptr(worker, cpu)->work, function); + } + return worker; +} + +int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, + unsigned int len) +{ + int ret; + + memset(queue, 0, sizeof(*queue)); + ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL); + if (ret) + return ret; + queue->worker = wg_packet_percpu_multicore_worker_alloc(function, queue); + if (!queue->worker) { + ptr_ring_cleanup(&queue->ring, NULL); + return -ENOMEM; + } + return 0; +} + +void wg_packet_queue_free(struct crypt_queue *queue, bool purge) +{ + free_percpu(queue->worker); + WARN_ON(!purge && !__ptr_ring_empty(&queue->ring)); + ptr_ring_cleanup(&queue->ring, purge ? __skb_array_destroy_skb : NULL); +} + +#define NEXT(skb) ((skb)->prev) +#define STUB(queue) ((struct sk_buff *)&queue->empty) + +void wg_prev_queue_init(struct prev_queue *queue) +{ + NEXT(STUB(queue)) = NULL; + queue->head = queue->tail = STUB(queue); + queue->peeked = NULL; + atomic_set(&queue->count, 0); + BUILD_BUG_ON( + offsetof(struct sk_buff, next) != offsetof(struct prev_queue, empty.next) - + offsetof(struct prev_queue, empty) || + offsetof(struct sk_buff, prev) != offsetof(struct prev_queue, empty.prev) - + offsetof(struct prev_queue, empty)); +} + +static void __wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb) +{ + WRITE_ONCE(NEXT(skb), NULL); + WRITE_ONCE(NEXT(xchg_release(&queue->head, skb)), skb); +} + +bool wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb) +{ + if (!atomic_add_unless(&queue->count, 1, MAX_QUEUED_PACKETS)) + return false; + __wg_prev_queue_enqueue(queue, skb); + return true; +} + +struct sk_buff *wg_prev_queue_dequeue(struct prev_queue *queue) +{ + struct sk_buff *tail = queue->tail, *next = smp_load_acquire(&NEXT(tail)); + + if (tail == STUB(queue)) { + if (!next) + return NULL; + queue->tail = next; + tail = next; + next = smp_load_acquire(&NEXT(next)); + } + if (next) { + queue->tail = next; + atomic_dec(&queue->count); + return tail; + } + if (tail != READ_ONCE(queue->head)) + return NULL; + __wg_prev_queue_enqueue(queue, STUB(queue)); + next = smp_load_acquire(&NEXT(tail)); + if (next) { + queue->tail = next; + atomic_dec(&queue->count); + return tail; + } + return NULL; +} + +#undef NEXT +#undef STUB diff --git a/net/wireguard/queueing.h b/net/wireguard/queueing.h new file mode 100644 index 000000000000..03850c43ebaf --- /dev/null +++ b/net/wireguard/queueing.h @@ -0,0 +1,217 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_QUEUEING_H +#define _WG_QUEUEING_H + +#include "peer.h" +#include +#include +#include +#include +#include + +struct wg_device; +struct wg_peer; +struct multicore_worker; +struct crypt_queue; +struct prev_queue; +struct sk_buff; + +/* queueing.c APIs: */ +int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, + unsigned int len); +void wg_packet_queue_free(struct crypt_queue *queue, bool purge); +struct multicore_worker __percpu * +wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr); + +/* receive.c APIs: */ +void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb); +void wg_packet_handshake_receive_worker(struct work_struct *work); +/* NAPI poll function: */ +int wg_packet_rx_poll(struct napi_struct *napi, int budget); +/* Workqueue worker: */ +void wg_packet_decrypt_worker(struct work_struct *work); + +/* send.c APIs: */ +void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer, + bool is_retry); +void wg_packet_send_handshake_response(struct wg_peer *peer); +void wg_packet_send_handshake_cookie(struct wg_device *wg, + struct sk_buff *initiating_skb, + __le32 sender_index); +void wg_packet_send_keepalive(struct wg_peer *peer); +void wg_packet_purge_staged_packets(struct wg_peer *peer); +void wg_packet_send_staged_packets(struct wg_peer *peer); +/* Workqueue workers: */ +void wg_packet_handshake_send_worker(struct work_struct *work); +void wg_packet_tx_worker(struct work_struct *work); +void wg_packet_encrypt_worker(struct work_struct *work); + +enum packet_state { + PACKET_STATE_UNCRYPTED, + PACKET_STATE_CRYPTED, + PACKET_STATE_DEAD +}; + +struct packet_cb { + u64 nonce; + struct noise_keypair *keypair; + atomic_t state; + u32 mtu; + u8 ds; +}; + +#define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb)) +#define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer) + +static inline bool wg_check_packet_protocol(struct sk_buff *skb) +{ + __be16 real_protocol = ip_tunnel_parse_protocol(skb); + return real_protocol && skb->protocol == real_protocol; +} + +static inline void wg_reset_packet(struct sk_buff *skb, bool encapsulating) +{ + const int pfmemalloc = skb->pfmemalloc; + u32 hash = skb->hash; + u8 l4_hash = skb->l4_hash; + u8 sw_hash = skb->sw_hash; + + skb_scrub_packet(skb, true); + memset(&skb->headers_start, 0, + offsetof(struct sk_buff, headers_end) - + offsetof(struct sk_buff, headers_start)); + skb->pfmemalloc = pfmemalloc; + if (encapsulating) { + skb->hash = hash; + skb->l4_hash = l4_hash; + skb->sw_hash = sw_hash; + } + skb->queue_mapping = 0; + skb->nohdr = 0; + skb->peeked = 0; + skb->mac_len = 0; + skb->dev = NULL; +#ifdef CONFIG_NET_SCHED + skb->tc_index = 0; +#endif + skb_reset_redirect(skb); + skb->hdr_len = skb_headroom(skb); + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb_probe_transport_header(skb); + skb_reset_inner_headers(skb); +} + +static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id) +{ + unsigned int cpu = *stored_cpu, cpu_index, i; + + if (unlikely(cpu == nr_cpumask_bits || + !cpumask_test_cpu(cpu, cpu_online_mask))) { + cpu_index = id % cpumask_weight(cpu_online_mask); + cpu = cpumask_first(cpu_online_mask); + for (i = 0; i < cpu_index; ++i) + cpu = cpumask_next(cpu, cpu_online_mask); + *stored_cpu = cpu; + } + return cpu; +} + +/* This function is racy, in the sense that next is unlocked, so it could return + * the same CPU twice. A race-free version of this would be to instead store an + * atomic sequence number, do an increment-and-return, and then iterate through + * every possible CPU until we get to that index -- choose_cpu. However that's + * a bit slower, and it doesn't seem like this potential race actually + * introduces any performance loss, so we live with it. + */ +static inline int wg_cpumask_next_online(int *next) +{ + int cpu = *next; + + while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask))) + cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; + *next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; + return cpu; +} + +void wg_prev_queue_init(struct prev_queue *queue); + +/* Multi producer */ +bool wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb); + +/* Single consumer */ +struct sk_buff *wg_prev_queue_dequeue(struct prev_queue *queue); + +/* Single consumer */ +static inline struct sk_buff *wg_prev_queue_peek(struct prev_queue *queue) +{ + if (queue->peeked) + return queue->peeked; + queue->peeked = wg_prev_queue_dequeue(queue); + return queue->peeked; +} + +/* Single consumer */ +static inline void wg_prev_queue_drop_peeked(struct prev_queue *queue) +{ + queue->peeked = NULL; +} + +static inline int wg_queue_enqueue_per_device_and_peer( + struct crypt_queue *device_queue, struct prev_queue *peer_queue, + struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu) +{ + int cpu; + + atomic_set_release(&PACKET_CB(skb)->state, PACKET_STATE_UNCRYPTED); + /* We first queue this up for the peer ingestion, but the consumer + * will wait for the state to change to CRYPTED or DEAD before. + */ + if (unlikely(!wg_prev_queue_enqueue(peer_queue, skb))) + return -ENOSPC; + + /* Then we queue it up in the device queue, which consumes the + * packet as soon as it can. + */ + cpu = wg_cpumask_next_online(next_cpu); + if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb))) + return -EPIPE; + queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work); + return 0; +} + +static inline void wg_queue_enqueue_per_peer_tx(struct sk_buff *skb, enum packet_state state) +{ + /* We take a reference, because as soon as we call atomic_set, the + * peer can be freed from below us. + */ + struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb)); + + atomic_set_release(&PACKET_CB(skb)->state, state); + queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, peer->internal_id), + peer->device->packet_crypt_wq, &peer->transmit_packet_work); + wg_peer_put(peer); +} + +static inline void wg_queue_enqueue_per_peer_rx(struct sk_buff *skb, enum packet_state state) +{ + /* We take a reference, because as soon as we call atomic_set, the + * peer can be freed from below us. + */ + struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb)); + + atomic_set_release(&PACKET_CB(skb)->state, state); + napi_schedule(&peer->napi); + wg_peer_put(peer); +} + +#ifdef DEBUG +bool wg_packet_counter_selftest(void); +#endif + +#endif /* _WG_QUEUEING_H */ diff --git a/net/wireguard/ratelimiter.c b/net/wireguard/ratelimiter.c new file mode 100644 index 000000000000..ecee41f528a5 --- /dev/null +++ b/net/wireguard/ratelimiter.c @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifdef COMPAT_CANNOT_DEPRECIATE_BH_RCU +/* We normally alias all non-_bh functions to the _bh ones in the compat layer, + * but that's not appropriate here, where we actually do want non-_bh ones. + */ +#undef synchronize_rcu +#define synchronize_rcu old_synchronize_rcu +#undef call_rcu +#define call_rcu old_call_rcu +#undef rcu_barrier +#define rcu_barrier old_rcu_barrier +#endif + +#include "ratelimiter.h" +#include +#include +#include +#include + +static struct kmem_cache *entry_cache; +static hsiphash_key_t key; +static spinlock_t table_lock = __SPIN_LOCK_UNLOCKED("ratelimiter_table_lock"); +static DEFINE_MUTEX(init_lock); +static u64 init_refcnt; /* Protected by init_lock, hence not atomic. */ +static atomic_t total_entries = ATOMIC_INIT(0); +static unsigned int max_entries, table_size; +static void wg_ratelimiter_gc_entries(struct work_struct *); +static DECLARE_DEFERRABLE_WORK(gc_work, wg_ratelimiter_gc_entries); +static struct hlist_head *table_v4; +#if IS_ENABLED(CONFIG_IPV6) +static struct hlist_head *table_v6; +#endif + +struct ratelimiter_entry { + u64 last_time_ns, tokens, ip; + void *net; + spinlock_t lock; + struct hlist_node hash; + struct rcu_head rcu; +}; + +enum { + PACKETS_PER_SECOND = 20, + PACKETS_BURSTABLE = 5, + PACKET_COST = NSEC_PER_SEC / PACKETS_PER_SECOND, + TOKEN_MAX = PACKET_COST * PACKETS_BURSTABLE +}; + +static void entry_free(struct rcu_head *rcu) +{ + kmem_cache_free(entry_cache, + container_of(rcu, struct ratelimiter_entry, rcu)); + atomic_dec(&total_entries); +} + +static void entry_uninit(struct ratelimiter_entry *entry) +{ + hlist_del_rcu(&entry->hash); + call_rcu(&entry->rcu, entry_free); +} + +/* Calling this function with a NULL work uninits all entries. */ +static void wg_ratelimiter_gc_entries(struct work_struct *work) +{ + const u64 now = ktime_get_coarse_boottime_ns(); + struct ratelimiter_entry *entry; + struct hlist_node *temp; + unsigned int i; + + for (i = 0; i < table_size; ++i) { + spin_lock(&table_lock); + hlist_for_each_entry_safe(entry, temp, &table_v4[i], hash) { + if (unlikely(!work) || + now - entry->last_time_ns > NSEC_PER_SEC) + entry_uninit(entry); + } +#if IS_ENABLED(CONFIG_IPV6) + hlist_for_each_entry_safe(entry, temp, &table_v6[i], hash) { + if (unlikely(!work) || + now - entry->last_time_ns > NSEC_PER_SEC) + entry_uninit(entry); + } +#endif + spin_unlock(&table_lock); + if (likely(work)) + cond_resched(); + } + if (likely(work)) + queue_delayed_work(system_power_efficient_wq, &gc_work, HZ); +} + +bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net) +{ + /* We only take the bottom half of the net pointer, so that we can hash + * 3 words in the end. This way, siphash's len param fits into the final + * u32, and we don't incur an extra round. + */ + const u32 net_word = (unsigned long)net; + struct ratelimiter_entry *entry; + struct hlist_head *bucket; + u64 ip; + + if (skb->protocol == htons(ETH_P_IP)) { + ip = (u64 __force)ip_hdr(skb)->saddr; + bucket = &table_v4[hsiphash_2u32(net_word, ip, &key) & + (table_size - 1)]; + } +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) { + /* Only use 64 bits, so as to ratelimit the whole /64. */ + memcpy(&ip, &ipv6_hdr(skb)->saddr, sizeof(ip)); + bucket = &table_v6[hsiphash_3u32(net_word, ip >> 32, ip, &key) & + (table_size - 1)]; + } +#endif + else + return false; + rcu_read_lock(); + hlist_for_each_entry_rcu(entry, bucket, hash) { + if (entry->net == net && entry->ip == ip) { + u64 now, tokens; + bool ret; + /* Quasi-inspired by nft_limit.c, but this is actually a + * slightly different algorithm. Namely, we incorporate + * the burst as part of the maximum tokens, rather than + * as part of the rate. + */ + spin_lock(&entry->lock); + now = ktime_get_coarse_boottime_ns(); + tokens = min_t(u64, TOKEN_MAX, + entry->tokens + now - + entry->last_time_ns); + entry->last_time_ns = now; + ret = tokens >= PACKET_COST; + entry->tokens = ret ? tokens - PACKET_COST : tokens; + spin_unlock(&entry->lock); + rcu_read_unlock(); + return ret; + } + } + rcu_read_unlock(); + + if (atomic_inc_return(&total_entries) > max_entries) + goto err_oom; + + entry = kmem_cache_alloc(entry_cache, GFP_KERNEL); + if (unlikely(!entry)) + goto err_oom; + + entry->net = net; + entry->ip = ip; + INIT_HLIST_NODE(&entry->hash); + spin_lock_init(&entry->lock); + entry->last_time_ns = ktime_get_coarse_boottime_ns(); + entry->tokens = TOKEN_MAX - PACKET_COST; + spin_lock(&table_lock); + hlist_add_head_rcu(&entry->hash, bucket); + spin_unlock(&table_lock); + return true; + +err_oom: + atomic_dec(&total_entries); + return false; +} + +int wg_ratelimiter_init(void) +{ + mutex_lock(&init_lock); + if (++init_refcnt != 1) + goto out; + + entry_cache = KMEM_CACHE(ratelimiter_entry, 0); + if (!entry_cache) + goto err; + + /* xt_hashlimit.c uses a slightly different algorithm for ratelimiting, + * but what it shares in common is that it uses a massive hashtable. So, + * we borrow their wisdom about good table sizes on different systems + * dependent on RAM. This calculation here comes from there. + */ + table_size = (totalram_pages() > (1U << 30) / PAGE_SIZE) ? 8192 : + max_t(unsigned long, 16, roundup_pow_of_two( + (totalram_pages() << PAGE_SHIFT) / + (1U << 14) / sizeof(struct hlist_head))); + max_entries = table_size * 8; + + table_v4 = kvcalloc(table_size, sizeof(*table_v4), GFP_KERNEL); + if (unlikely(!table_v4)) + goto err_kmemcache; + +#if IS_ENABLED(CONFIG_IPV6) + table_v6 = kvcalloc(table_size, sizeof(*table_v6), GFP_KERNEL); + if (unlikely(!table_v6)) { + kvfree(table_v4); + goto err_kmemcache; + } +#endif + + queue_delayed_work(system_power_efficient_wq, &gc_work, HZ); + get_random_bytes(&key, sizeof(key)); +out: + mutex_unlock(&init_lock); + return 0; + +err_kmemcache: + kmem_cache_destroy(entry_cache); +err: + --init_refcnt; + mutex_unlock(&init_lock); + return -ENOMEM; +} + +void wg_ratelimiter_uninit(void) +{ + mutex_lock(&init_lock); + if (!init_refcnt || --init_refcnt) + goto out; + + cancel_delayed_work_sync(&gc_work); + wg_ratelimiter_gc_entries(NULL); + rcu_barrier(); + kvfree(table_v4); +#if IS_ENABLED(CONFIG_IPV6) + kvfree(table_v6); +#endif + kmem_cache_destroy(entry_cache); +out: + mutex_unlock(&init_lock); +} + +#include "selftest/ratelimiter.c" diff --git a/net/wireguard/ratelimiter.h b/net/wireguard/ratelimiter.h new file mode 100644 index 000000000000..83067f71ea99 --- /dev/null +++ b/net/wireguard/ratelimiter.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_RATELIMITER_H +#define _WG_RATELIMITER_H + +#include + +int wg_ratelimiter_init(void); +void wg_ratelimiter_uninit(void); +bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net); + +#ifdef DEBUG +bool wg_ratelimiter_selftest(void); +#endif + +#endif /* _WG_RATELIMITER_H */ diff --git a/net/wireguard/receive.c b/net/wireguard/receive.c new file mode 100644 index 000000000000..214889edb48e --- /dev/null +++ b/net/wireguard/receive.c @@ -0,0 +1,602 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "queueing.h" +#include "device.h" +#include "peer.h" +#include "timers.h" +#include "messages.h" +#include "cookie.h" +#include "socket.h" + +#include +#include +#include +#include +#include + +/* Must be called with bh disabled. */ +static void update_rx_stats(struct wg_peer *peer, size_t len) +{ + struct pcpu_sw_netstats *tstats = + get_cpu_ptr(peer->device->dev->tstats); + + u64_stats_update_begin(&tstats->syncp); + ++tstats->rx_packets; + tstats->rx_bytes += len; + peer->rx_bytes += len; + u64_stats_update_end(&tstats->syncp); + put_cpu_ptr(tstats); +} + +#define SKB_TYPE_LE32(skb) (((struct message_header *)(skb)->data)->type) + +static size_t validate_header_len(struct sk_buff *skb) +{ + if (unlikely(skb->len < sizeof(struct message_header))) + return 0; + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_DATA) && + skb->len >= MESSAGE_MINIMUM_LENGTH) + return sizeof(struct message_data); + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION) && + skb->len == sizeof(struct message_handshake_initiation)) + return sizeof(struct message_handshake_initiation); + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE) && + skb->len == sizeof(struct message_handshake_response)) + return sizeof(struct message_handshake_response); + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE) && + skb->len == sizeof(struct message_handshake_cookie)) + return sizeof(struct message_handshake_cookie); + return 0; +} + +static int prepare_skb_header(struct sk_buff *skb, struct wg_device *wg) +{ + size_t data_offset, data_len, header_len; + struct udphdr *udp; + + if (unlikely(!wg_check_packet_protocol(skb) || + skb_transport_header(skb) < skb->head || + (skb_transport_header(skb) + sizeof(struct udphdr)) > + skb_tail_pointer(skb))) + return -EINVAL; /* Bogus IP header */ + udp = udp_hdr(skb); + data_offset = (u8 *)udp - skb->data; + if (unlikely(data_offset > U16_MAX || + data_offset + sizeof(struct udphdr) > skb->len)) + /* Packet has offset at impossible location or isn't big enough + * to have UDP fields. + */ + return -EINVAL; + data_len = ntohs(udp->len); + if (unlikely(data_len < sizeof(struct udphdr) || + data_len > skb->len - data_offset)) + /* UDP packet is reporting too small of a size or lying about + * its size. + */ + return -EINVAL; + data_len -= sizeof(struct udphdr); + data_offset = (u8 *)udp + sizeof(struct udphdr) - skb->data; + if (unlikely(!pskb_may_pull(skb, + data_offset + sizeof(struct message_header)) || + pskb_trim(skb, data_len + data_offset) < 0)) + return -EINVAL; + skb_pull(skb, data_offset); + if (unlikely(skb->len != data_len)) + /* Final len does not agree with calculated len */ + return -EINVAL; + header_len = validate_header_len(skb); + if (unlikely(!header_len)) + return -EINVAL; + __skb_push(skb, data_offset); + if (unlikely(!pskb_may_pull(skb, data_offset + header_len))) + return -EINVAL; + __skb_pull(skb, data_offset); + return 0; +} + +static void wg_receive_handshake_packet(struct wg_device *wg, + struct sk_buff *skb) +{ + enum cookie_mac_state mac_state; + struct wg_peer *peer = NULL; + /* This is global, so that our load calculation applies to the whole + * system. We don't care about races with it at all. + */ + static u64 last_under_load; + bool packet_needs_cookie; + bool under_load; + + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE)) { + net_dbg_skb_ratelimited("%s: Receiving cookie response from %pISpfsc\n", + wg->dev->name, skb); + wg_cookie_message_consume( + (struct message_handshake_cookie *)skb->data, wg); + return; + } + + under_load = atomic_read(&wg->handshake_queue_len) >= + MAX_QUEUED_INCOMING_HANDSHAKES / 8; + if (under_load) { + last_under_load = ktime_get_coarse_boottime_ns(); + } else if (last_under_load) { + under_load = !wg_birthdate_has_expired(last_under_load, 1); + if (!under_load) + last_under_load = 0; + } + mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb, + under_load); + if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) || + (!under_load && mac_state == VALID_MAC_BUT_NO_COOKIE)) { + packet_needs_cookie = false; + } else if (under_load && mac_state == VALID_MAC_BUT_NO_COOKIE) { + packet_needs_cookie = true; + } else { + net_dbg_skb_ratelimited("%s: Invalid MAC of handshake, dropping packet from %pISpfsc\n", + wg->dev->name, skb); + return; + } + + switch (SKB_TYPE_LE32(skb)) { + case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): { + struct message_handshake_initiation *message = + (struct message_handshake_initiation *)skb->data; + + if (packet_needs_cookie) { + wg_packet_send_handshake_cookie(wg, skb, + message->sender_index); + return; + } + peer = wg_noise_handshake_consume_initiation(message, wg); + if (unlikely(!peer)) { + net_dbg_skb_ratelimited("%s: Invalid handshake initiation from %pISpfsc\n", + wg->dev->name, skb); + return; + } + wg_socket_set_peer_endpoint_from_skb(peer, skb); + net_dbg_ratelimited("%s: Receiving handshake initiation from peer %llu (%pISpfsc)\n", + wg->dev->name, peer->internal_id, + &peer->endpoint.addr); + wg_packet_send_handshake_response(peer); + break; + } + case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): { + struct message_handshake_response *message = + (struct message_handshake_response *)skb->data; + + if (packet_needs_cookie) { + wg_packet_send_handshake_cookie(wg, skb, + message->sender_index); + return; + } + peer = wg_noise_handshake_consume_response(message, wg); + if (unlikely(!peer)) { + net_dbg_skb_ratelimited("%s: Invalid handshake response from %pISpfsc\n", + wg->dev->name, skb); + return; + } + wg_socket_set_peer_endpoint_from_skb(peer, skb); + net_dbg_ratelimited("%s: Receiving handshake response from peer %llu (%pISpfsc)\n", + wg->dev->name, peer->internal_id, + &peer->endpoint.addr); + if (wg_noise_handshake_begin_session(&peer->handshake, + &peer->keypairs)) { + wg_timers_session_derived(peer); + wg_timers_handshake_complete(peer); + /* Calling this function will either send any existing + * packets in the queue and not send a keepalive, which + * is the best case, Or, if there's nothing in the + * queue, it will send a keepalive, in order to give + * immediate confirmation of the session. + */ + wg_packet_send_keepalive(peer); + } + break; + } + } + + if (unlikely(!peer)) { + WARN(1, "Somehow a wrong type of packet wound up in the handshake queue!\n"); + return; + } + + local_bh_disable(); + update_rx_stats(peer, skb->len); + local_bh_enable(); + + wg_timers_any_authenticated_packet_received(peer); + wg_timers_any_authenticated_packet_traversal(peer); + wg_peer_put(peer); +} + +void wg_packet_handshake_receive_worker(struct work_struct *work) +{ + struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr; + struct wg_device *wg = container_of(queue, struct wg_device, handshake_queue); + struct sk_buff *skb; + + while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) { + wg_receive_handshake_packet(wg, skb); + dev_kfree_skb(skb); + atomic_dec(&wg->handshake_queue_len); + cond_resched(); + } +} + +static void keep_key_fresh(struct wg_peer *peer) +{ + struct noise_keypair *keypair; + bool send; + + if (peer->sent_lastminute_handshake) + return; + + rcu_read_lock_bh(); + keypair = rcu_dereference_bh(peer->keypairs.current_keypair); + send = keypair && READ_ONCE(keypair->sending.is_valid) && + keypair->i_am_the_initiator && + wg_birthdate_has_expired(keypair->sending.birthdate, + REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT); + rcu_read_unlock_bh(); + + if (unlikely(send)) { + peer->sent_lastminute_handshake = true; + wg_packet_send_queued_handshake_initiation(peer, false); + } +} + +static bool decrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair, + simd_context_t *simd_context) +{ + struct scatterlist sg[MAX_SKB_FRAGS + 8]; + struct sk_buff *trailer; + unsigned int offset; + int num_frags; + + if (unlikely(!keypair)) + return false; + + if (unlikely(!READ_ONCE(keypair->receiving.is_valid) || + wg_birthdate_has_expired(keypair->receiving.birthdate, REJECT_AFTER_TIME) || + keypair->receiving_counter.counter >= REJECT_AFTER_MESSAGES)) { + WRITE_ONCE(keypair->receiving.is_valid, false); + return false; + } + + PACKET_CB(skb)->nonce = + le64_to_cpu(((struct message_data *)skb->data)->counter); + + /* We ensure that the network header is part of the packet before we + * call skb_cow_data, so that there's no chance that data is removed + * from the skb, so that later we can extract the original endpoint. + */ + offset = skb->data - skb_network_header(skb); + skb_push(skb, offset); + num_frags = skb_cow_data(skb, 0, &trailer); + offset += sizeof(struct message_data); + skb_pull(skb, offset); + if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg))) + return false; + + sg_init_table(sg, num_frags); + if (skb_to_sgvec(skb, sg, 0, skb->len) <= 0) + return false; + + if (!chacha20poly1305_decrypt_sg_inplace(sg, skb->len, NULL, 0, + PACKET_CB(skb)->nonce, + keypair->receiving.key, + simd_context)) + return false; + + /* Another ugly situation of pushing and pulling the header so as to + * keep endpoint information intact. + */ + skb_push(skb, offset); + if (pskb_trim(skb, skb->len - noise_encrypted_len(0))) + return false; + skb_pull(skb, offset); + + return true; +} + +/* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */ +static bool counter_validate(struct noise_replay_counter *counter, u64 their_counter) +{ + unsigned long index, index_current, top, i; + bool ret = false; + + spin_lock_bh(&counter->lock); + + if (unlikely(counter->counter >= REJECT_AFTER_MESSAGES + 1 || + their_counter >= REJECT_AFTER_MESSAGES)) + goto out; + + ++their_counter; + + if (unlikely((COUNTER_WINDOW_SIZE + their_counter) < + counter->counter)) + goto out; + + index = their_counter >> ilog2(BITS_PER_LONG); + + if (likely(their_counter > counter->counter)) { + index_current = counter->counter >> ilog2(BITS_PER_LONG); + top = min_t(unsigned long, index - index_current, + COUNTER_BITS_TOTAL / BITS_PER_LONG); + for (i = 1; i <= top; ++i) + counter->backtrack[(i + index_current) & + ((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0; + counter->counter = their_counter; + } + + index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1; + ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1), + &counter->backtrack[index]); + +out: + spin_unlock_bh(&counter->lock); + return ret; +} + +#include "selftest/counter.c" + +static void wg_packet_consume_data_done(struct wg_peer *peer, + struct sk_buff *skb, + struct endpoint *endpoint) +{ + struct net_device *dev = peer->device->dev; + unsigned int len, len_before_trim; + struct wg_peer *routed_peer; + + wg_socket_set_peer_endpoint(peer, endpoint); + + if (unlikely(wg_noise_received_with_keypair(&peer->keypairs, + PACKET_CB(skb)->keypair))) { + wg_timers_handshake_complete(peer); + wg_packet_send_staged_packets(peer); + } + + keep_key_fresh(peer); + + wg_timers_any_authenticated_packet_received(peer); + wg_timers_any_authenticated_packet_traversal(peer); + + /* A packet with length 0 is a keepalive packet */ + if (unlikely(!skb->len)) { + update_rx_stats(peer, message_data_len(0)); + net_dbg_ratelimited("%s: Receiving keepalive packet from peer %llu (%pISpfsc)\n", + dev->name, peer->internal_id, + &peer->endpoint.addr); + goto packet_processed; + } + + wg_timers_data_received(peer); + + if (unlikely(skb_network_header(skb) < skb->head)) + goto dishonest_packet_size; + if (unlikely(!(pskb_network_may_pull(skb, sizeof(struct iphdr)) && + (ip_hdr(skb)->version == 4 || + (ip_hdr(skb)->version == 6 && + pskb_network_may_pull(skb, sizeof(struct ipv6hdr))))))) + goto dishonest_packet_type; + + skb->dev = dev; + /* We've already verified the Poly1305 auth tag, which means this packet + * was not modified in transit. We can therefore tell the networking + * stack that all checksums of every layer of encapsulation have already + * been checked "by the hardware" and therefore is unnecessary to check + * again in software. + */ + skb->ip_summed = CHECKSUM_UNNECESSARY; +#ifndef COMPAT_CANNOT_USE_CSUM_LEVEL + skb->csum_level = ~0; /* All levels */ +#endif + skb->protocol = ip_tunnel_parse_protocol(skb); + if (skb->protocol == htons(ETH_P_IP)) { + len = ntohs(ip_hdr(skb)->tot_len); + if (unlikely(len < sizeof(struct iphdr))) + goto dishonest_packet_size; + INET_ECN_decapsulate(skb, PACKET_CB(skb)->ds, ip_hdr(skb)->tos); + } else if (skb->protocol == htons(ETH_P_IPV6)) { + len = ntohs(ipv6_hdr(skb)->payload_len) + + sizeof(struct ipv6hdr); + INET_ECN_decapsulate(skb, PACKET_CB(skb)->ds, ipv6_get_dsfield(ipv6_hdr(skb))); + } else { + goto dishonest_packet_type; + } + + if (unlikely(len > skb->len)) + goto dishonest_packet_size; + len_before_trim = skb->len; + if (unlikely(pskb_trim(skb, len))) + goto packet_processed; + + routed_peer = wg_allowedips_lookup_src(&peer->device->peer_allowedips, + skb); + wg_peer_put(routed_peer); /* We don't need the extra reference. */ + + if (unlikely(routed_peer != peer)) + goto dishonest_packet_peer; + + napi_gro_receive(&peer->napi, skb); + update_rx_stats(peer, message_data_len(len_before_trim)); + return; + +dishonest_packet_peer: + net_dbg_skb_ratelimited("%s: Packet has unallowed src IP (%pISc) from peer %llu (%pISpfsc)\n", + dev->name, skb, peer->internal_id, + &peer->endpoint.addr); + ++dev->stats.rx_errors; + ++dev->stats.rx_frame_errors; + goto packet_processed; +dishonest_packet_type: + net_dbg_ratelimited("%s: Packet is neither ipv4 nor ipv6 from peer %llu (%pISpfsc)\n", + dev->name, peer->internal_id, &peer->endpoint.addr); + ++dev->stats.rx_errors; + ++dev->stats.rx_frame_errors; + goto packet_processed; +dishonest_packet_size: + net_dbg_ratelimited("%s: Packet has incorrect size from peer %llu (%pISpfsc)\n", + dev->name, peer->internal_id, &peer->endpoint.addr); + ++dev->stats.rx_errors; + ++dev->stats.rx_length_errors; + goto packet_processed; +packet_processed: + dev_kfree_skb(skb); +} + +int wg_packet_rx_poll(struct napi_struct *napi, int budget) +{ + struct wg_peer *peer = container_of(napi, struct wg_peer, napi); + struct noise_keypair *keypair; + struct endpoint endpoint; + enum packet_state state; + struct sk_buff *skb; + int work_done = 0; + bool free; + + if (unlikely(budget <= 0)) + return 0; + + while ((skb = wg_prev_queue_peek(&peer->rx_queue)) != NULL && + (state = atomic_read_acquire(&PACKET_CB(skb)->state)) != + PACKET_STATE_UNCRYPTED) { + wg_prev_queue_drop_peeked(&peer->rx_queue); + keypair = PACKET_CB(skb)->keypair; + free = true; + + if (unlikely(state != PACKET_STATE_CRYPTED)) + goto next; + + if (unlikely(!counter_validate(&keypair->receiving_counter, + PACKET_CB(skb)->nonce))) { + net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n", + peer->device->dev->name, + PACKET_CB(skb)->nonce, + keypair->receiving_counter.counter); + goto next; + } + + if (unlikely(wg_socket_endpoint_from_skb(&endpoint, skb))) + goto next; + + wg_reset_packet(skb, false); + wg_packet_consume_data_done(peer, skb, &endpoint); + free = false; + +next: + wg_noise_keypair_put(keypair, false); + wg_peer_put(peer); + if (unlikely(free)) + dev_kfree_skb(skb); + + if (++work_done >= budget) + break; + } + + if (work_done < budget) + napi_complete_done(napi, work_done); + + return work_done; +} + +void wg_packet_decrypt_worker(struct work_struct *work) +{ + struct crypt_queue *queue = container_of(work, struct multicore_worker, + work)->ptr; + simd_context_t simd_context; + struct sk_buff *skb; + + simd_get(&simd_context); + while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) { + enum packet_state state = + likely(decrypt_packet(skb, PACKET_CB(skb)->keypair, + &simd_context)) ? + PACKET_STATE_CRYPTED : PACKET_STATE_DEAD; + wg_queue_enqueue_per_peer_rx(skb, state); + simd_relax(&simd_context); + } + + simd_put(&simd_context); +} + +static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb) +{ + __le32 idx = ((struct message_data *)skb->data)->key_idx; + struct wg_peer *peer = NULL; + int ret; + + rcu_read_lock_bh(); + PACKET_CB(skb)->keypair = + (struct noise_keypair *)wg_index_hashtable_lookup( + wg->index_hashtable, INDEX_HASHTABLE_KEYPAIR, idx, + &peer); + if (unlikely(!wg_noise_keypair_get(PACKET_CB(skb)->keypair))) + goto err_keypair; + + if (unlikely(READ_ONCE(peer->is_dead))) + goto err; + + ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue, &peer->rx_queue, skb, + wg->packet_crypt_wq, &wg->decrypt_queue.last_cpu); + if (unlikely(ret == -EPIPE)) + wg_queue_enqueue_per_peer_rx(skb, PACKET_STATE_DEAD); + if (likely(!ret || ret == -EPIPE)) { + rcu_read_unlock_bh(); + return; + } +err: + wg_noise_keypair_put(PACKET_CB(skb)->keypair, false); +err_keypair: + rcu_read_unlock_bh(); + wg_peer_put(peer); + dev_kfree_skb(skb); +} + +void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb) +{ + if (unlikely(prepare_skb_header(skb, wg) < 0)) + goto err; + switch (SKB_TYPE_LE32(skb)) { + case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): + case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): + case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): { + int cpu, ret = -EBUSY; + + if (unlikely(!rng_is_initialized())) + goto drop; + if (atomic_read(&wg->handshake_queue_len) > MAX_QUEUED_INCOMING_HANDSHAKES / 2) { + if (spin_trylock_bh(&wg->handshake_queue.ring.producer_lock)) { + ret = __ptr_ring_produce(&wg->handshake_queue.ring, skb); + spin_unlock_bh(&wg->handshake_queue.ring.producer_lock); + } + } else + ret = ptr_ring_produce_bh(&wg->handshake_queue.ring, skb); + if (ret) { + drop: + net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n", + wg->dev->name, skb); + goto err; + } + atomic_inc(&wg->handshake_queue_len); + cpu = wg_cpumask_next_online(&wg->handshake_queue.last_cpu); + /* Queues up a call to packet_process_queued_handshake_packets(skb): */ + queue_work_on(cpu, wg->handshake_receive_wq, + &per_cpu_ptr(wg->handshake_queue.worker, cpu)->work); + break; + } + case cpu_to_le32(MESSAGE_DATA): + PACKET_CB(skb)->ds = ip_tunnel_get_dsfield(ip_hdr(skb), skb); + wg_packet_consume_data(wg, skb); + break; + default: + WARN(1, "Non-exhaustive parsing of packet header lead to unknown packet type!\n"); + goto err; + } + return; + +err: + dev_kfree_skb(skb); +} diff --git a/net/wireguard/selftest/allowedips.c b/net/wireguard/selftest/allowedips.c new file mode 100644 index 000000000000..e173204ae7d7 --- /dev/null +++ b/net/wireguard/selftest/allowedips.c @@ -0,0 +1,676 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * + * This contains some basic static unit tests for the allowedips data structure. + * It also has two additional modes that are disabled and meant to be used by + * folks directly playing with this file. If you define the macro + * DEBUG_PRINT_TRIE_GRAPHVIZ to be 1, then every time there's a full tree in + * memory, it will be printed out as KERN_DEBUG in a format that can be passed + * to graphviz (the dot command) to visualize it. If you define the macro + * DEBUG_RANDOM_TRIE to be 1, then there will be an extremely costly set of + * randomized tests done against a trivial implementation, which may take + * upwards of a half-hour to complete. There's no set of users who should be + * enabling these, and the only developers that should go anywhere near these + * nobs are the ones who are reading this comment. + */ + +#ifdef DEBUG + +#include + +static __init void print_node(struct allowedips_node *node, u8 bits) +{ + char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n"; + char *fmt_declaration = KERN_DEBUG "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n"; + u8 ip1[16], ip2[16], cidr1, cidr2; + char *style = "dotted"; + u32 color = 0; + + if (node == NULL) + return; + if (bits == 32) { + fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n"; + fmt_declaration = KERN_DEBUG "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n"; + } else if (bits == 128) { + fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n"; + fmt_declaration = KERN_DEBUG "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n"; + } + if (node->peer) { + hsiphash_key_t key = { { 0 } }; + + memcpy(&key, &node->peer, sizeof(node->peer)); + color = hsiphash_1u32(0xdeadbeef, &key) % 200 << 16 | + hsiphash_1u32(0xbabecafe, &key) % 200 << 8 | + hsiphash_1u32(0xabad1dea, &key) % 200; + style = "bold"; + } + wg_allowedips_read_node(node, ip1, &cidr1); + printk(fmt_declaration, ip1, cidr1, style, color); + if (node->bit[0]) { + wg_allowedips_read_node(rcu_dereference_raw(node->bit[0]), ip2, &cidr2); + printk(fmt_connection, ip1, cidr1, ip2, cidr2); + } + if (node->bit[1]) { + wg_allowedips_read_node(rcu_dereference_raw(node->bit[1]), ip2, &cidr2); + printk(fmt_connection, ip1, cidr1, ip2, cidr2); + } + if (node->bit[0]) + print_node(rcu_dereference_raw(node->bit[0]), bits); + if (node->bit[1]) + print_node(rcu_dereference_raw(node->bit[1]), bits); +} + +static __init void print_tree(struct allowedips_node __rcu *top, u8 bits) +{ + printk(KERN_DEBUG "digraph trie {\n"); + print_node(rcu_dereference_raw(top), bits); + printk(KERN_DEBUG "}\n"); +} + +enum { + NUM_PEERS = 2000, + NUM_RAND_ROUTES = 400, + NUM_MUTATED_ROUTES = 100, + NUM_QUERIES = NUM_RAND_ROUTES * NUM_MUTATED_ROUTES * 30 +}; + +struct horrible_allowedips { + struct hlist_head head; +}; + +struct horrible_allowedips_node { + struct hlist_node table; + union nf_inet_addr ip; + union nf_inet_addr mask; + u8 ip_version; + void *value; +}; + +static __init void horrible_allowedips_init(struct horrible_allowedips *table) +{ + INIT_HLIST_HEAD(&table->head); +} + +static __init void horrible_allowedips_free(struct horrible_allowedips *table) +{ + struct horrible_allowedips_node *node; + struct hlist_node *h; + + hlist_for_each_entry_safe(node, h, &table->head, table) { + hlist_del(&node->table); + kfree(node); + } +} + +static __init inline union nf_inet_addr horrible_cidr_to_mask(u8 cidr) +{ + union nf_inet_addr mask; + + memset(&mask, 0, sizeof(mask)); + memset(&mask.all, 0xff, cidr / 8); + if (cidr % 32) + mask.all[cidr / 32] = (__force u32)htonl( + (0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL); + return mask; +} + +static __init inline u8 horrible_mask_to_cidr(union nf_inet_addr subnet) +{ + return hweight32(subnet.all[0]) + hweight32(subnet.all[1]) + + hweight32(subnet.all[2]) + hweight32(subnet.all[3]); +} + +static __init inline void +horrible_mask_self(struct horrible_allowedips_node *node) +{ + if (node->ip_version == 4) { + node->ip.ip &= node->mask.ip; + } else if (node->ip_version == 6) { + node->ip.ip6[0] &= node->mask.ip6[0]; + node->ip.ip6[1] &= node->mask.ip6[1]; + node->ip.ip6[2] &= node->mask.ip6[2]; + node->ip.ip6[3] &= node->mask.ip6[3]; + } +} + +static __init inline bool +horrible_match_v4(const struct horrible_allowedips_node *node, struct in_addr *ip) +{ + return (ip->s_addr & node->mask.ip) == node->ip.ip; +} + +static __init inline bool +horrible_match_v6(const struct horrible_allowedips_node *node, struct in6_addr *ip) +{ + return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) == node->ip.ip6[0] && + (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) == node->ip.ip6[1] && + (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) == node->ip.ip6[2] && + (ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == node->ip.ip6[3]; +} + +static __init void +horrible_insert_ordered(struct horrible_allowedips *table, struct horrible_allowedips_node *node) +{ + struct horrible_allowedips_node *other = NULL, *where = NULL; + u8 my_cidr = horrible_mask_to_cidr(node->mask); + + hlist_for_each_entry(other, &table->head, table) { + if (other->ip_version == node->ip_version && + !memcmp(&other->mask, &node->mask, sizeof(union nf_inet_addr)) && + !memcmp(&other->ip, &node->ip, sizeof(union nf_inet_addr))) { + other->value = node->value; + kfree(node); + return; + } + } + hlist_for_each_entry(other, &table->head, table) { + where = other; + if (horrible_mask_to_cidr(other->mask) <= my_cidr) + break; + } + if (!other && !where) + hlist_add_head(&node->table, &table->head); + else if (!other) + hlist_add_behind(&node->table, &where->table); + else + hlist_add_before(&node->table, &where->table); +} + +static __init int +horrible_allowedips_insert_v4(struct horrible_allowedips *table, + struct in_addr *ip, u8 cidr, void *value) +{ + struct horrible_allowedips_node *node = kzalloc(sizeof(*node), GFP_KERNEL); + + if (unlikely(!node)) + return -ENOMEM; + node->ip.in = *ip; + node->mask = horrible_cidr_to_mask(cidr); + node->ip_version = 4; + node->value = value; + horrible_mask_self(node); + horrible_insert_ordered(table, node); + return 0; +} + +static __init int +horrible_allowedips_insert_v6(struct horrible_allowedips *table, + struct in6_addr *ip, u8 cidr, void *value) +{ + struct horrible_allowedips_node *node = kzalloc(sizeof(*node), GFP_KERNEL); + + if (unlikely(!node)) + return -ENOMEM; + node->ip.in6 = *ip; + node->mask = horrible_cidr_to_mask(cidr); + node->ip_version = 6; + node->value = value; + horrible_mask_self(node); + horrible_insert_ordered(table, node); + return 0; +} + +static __init void * +horrible_allowedips_lookup_v4(struct horrible_allowedips *table, struct in_addr *ip) +{ + struct horrible_allowedips_node *node; + + hlist_for_each_entry(node, &table->head, table) { + if (node->ip_version == 4 && horrible_match_v4(node, ip)) + return node->value; + } + return NULL; +} + +static __init void * +horrible_allowedips_lookup_v6(struct horrible_allowedips *table, struct in6_addr *ip) +{ + struct horrible_allowedips_node *node; + + hlist_for_each_entry(node, &table->head, table) { + if (node->ip_version == 6 && horrible_match_v6(node, ip)) + return node->value; + } + return NULL; +} + + +static __init void +horrible_allowedips_remove_by_value(struct horrible_allowedips *table, void *value) +{ + struct horrible_allowedips_node *node; + struct hlist_node *h; + + hlist_for_each_entry_safe(node, h, &table->head, table) { + if (node->value != value) + continue; + hlist_del(&node->table); + kfree(node); + } + +} + +static __init bool randomized_test(void) +{ + unsigned int i, j, k, mutate_amount, cidr; + u8 ip[16], mutate_mask[16], mutated[16]; + struct wg_peer **peers, *peer; + struct horrible_allowedips h; + DEFINE_MUTEX(mutex); + struct allowedips t; + bool ret = false; + + mutex_init(&mutex); + + wg_allowedips_init(&t); + horrible_allowedips_init(&h); + + peers = kcalloc(NUM_PEERS, sizeof(*peers), GFP_KERNEL); + if (unlikely(!peers)) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free; + } + for (i = 0; i < NUM_PEERS; ++i) { + peers[i] = kzalloc(sizeof(*peers[i]), GFP_KERNEL); + if (unlikely(!peers[i])) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free; + } + kref_init(&peers[i]->refcount); + INIT_LIST_HEAD(&peers[i]->allowedips_list); + } + + mutex_lock(&mutex); + + for (i = 0; i < NUM_RAND_ROUTES; ++i) { + prandom_bytes(ip, 4); + cidr = prandom_u32_max(32) + 1; + peer = peers[prandom_u32_max(NUM_PEERS)]; + if (wg_allowedips_insert_v4(&t, (struct in_addr *)ip, cidr, + peer, &mutex) < 0) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + if (horrible_allowedips_insert_v4(&h, (struct in_addr *)ip, + cidr, peer) < 0) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + for (j = 0; j < NUM_MUTATED_ROUTES; ++j) { + memcpy(mutated, ip, 4); + prandom_bytes(mutate_mask, 4); + mutate_amount = prandom_u32_max(32); + for (k = 0; k < mutate_amount / 8; ++k) + mutate_mask[k] = 0xff; + mutate_mask[k] = 0xff + << ((8 - (mutate_amount % 8)) % 8); + for (; k < 4; ++k) + mutate_mask[k] = 0; + for (k = 0; k < 4; ++k) + mutated[k] = (mutated[k] & mutate_mask[k]) | + (~mutate_mask[k] & + prandom_u32_max(256)); + cidr = prandom_u32_max(32) + 1; + peer = peers[prandom_u32_max(NUM_PEERS)]; + if (wg_allowedips_insert_v4(&t, + (struct in_addr *)mutated, + cidr, peer, &mutex) < 0) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + if (horrible_allowedips_insert_v4(&h, + (struct in_addr *)mutated, cidr, peer)) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + } + } + + for (i = 0; i < NUM_RAND_ROUTES; ++i) { + prandom_bytes(ip, 16); + cidr = prandom_u32_max(128) + 1; + peer = peers[prandom_u32_max(NUM_PEERS)]; + if (wg_allowedips_insert_v6(&t, (struct in6_addr *)ip, cidr, + peer, &mutex) < 0) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + if (horrible_allowedips_insert_v6(&h, (struct in6_addr *)ip, + cidr, peer) < 0) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + for (j = 0; j < NUM_MUTATED_ROUTES; ++j) { + memcpy(mutated, ip, 16); + prandom_bytes(mutate_mask, 16); + mutate_amount = prandom_u32_max(128); + for (k = 0; k < mutate_amount / 8; ++k) + mutate_mask[k] = 0xff; + mutate_mask[k] = 0xff + << ((8 - (mutate_amount % 8)) % 8); + for (; k < 4; ++k) + mutate_mask[k] = 0; + for (k = 0; k < 4; ++k) + mutated[k] = (mutated[k] & mutate_mask[k]) | + (~mutate_mask[k] & + prandom_u32_max(256)); + cidr = prandom_u32_max(128) + 1; + peer = peers[prandom_u32_max(NUM_PEERS)]; + if (wg_allowedips_insert_v6(&t, + (struct in6_addr *)mutated, + cidr, peer, &mutex) < 0) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + if (horrible_allowedips_insert_v6( + &h, (struct in6_addr *)mutated, cidr, + peer)) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + } + } + + mutex_unlock(&mutex); + + if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) { + print_tree(t.root4, 32); + print_tree(t.root6, 128); + } + + for (j = 0;; ++j) { + for (i = 0; i < NUM_QUERIES; ++i) { + prandom_bytes(ip, 4); + if (lookup(t.root4, 32, ip) != horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) { + horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip); + pr_err("allowedips random v4 self-test: FAIL\n"); + goto free; + } + prandom_bytes(ip, 16); + if (lookup(t.root6, 128, ip) != horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) { + pr_err("allowedips random v6 self-test: FAIL\n"); + goto free; + } + } + if (j >= NUM_PEERS) + break; + mutex_lock(&mutex); + wg_allowedips_remove_by_peer(&t, peers[j], &mutex); + mutex_unlock(&mutex); + horrible_allowedips_remove_by_value(&h, peers[j]); + } + + if (t.root4 || t.root6) { + pr_err("allowedips random self-test removal: FAIL\n"); + goto free; + } + + ret = true; + +free: + mutex_lock(&mutex); +free_locked: + wg_allowedips_free(&t, &mutex); + mutex_unlock(&mutex); + horrible_allowedips_free(&h); + if (peers) { + for (i = 0; i < NUM_PEERS; ++i) + kfree(peers[i]); + } + kfree(peers); + return ret; +} + +static __init inline struct in_addr *ip4(u8 a, u8 b, u8 c, u8 d) +{ + static struct in_addr ip; + u8 *split = (u8 *)&ip; + + split[0] = a; + split[1] = b; + split[2] = c; + split[3] = d; + return &ip; +} + +static __init inline struct in6_addr *ip6(u32 a, u32 b, u32 c, u32 d) +{ + static struct in6_addr ip; + __be32 *split = (__be32 *)&ip; + + split[0] = cpu_to_be32(a); + split[1] = cpu_to_be32(b); + split[2] = cpu_to_be32(c); + split[3] = cpu_to_be32(d); + return &ip; +} + +static __init struct wg_peer *init_peer(void) +{ + struct wg_peer *peer = kzalloc(sizeof(*peer), GFP_KERNEL); + + if (!peer) + return NULL; + kref_init(&peer->refcount); + INIT_LIST_HEAD(&peer->allowedips_list); + return peer; +} + +#define insert(version, mem, ipa, ipb, ipc, ipd, cidr) \ + wg_allowedips_insert_v##version(&t, ip##version(ipa, ipb, ipc, ipd), \ + cidr, mem, &mutex) + +#define maybe_fail() do { \ + ++i; \ + if (!_s) { \ + pr_info("allowedips self-test %zu: FAIL\n", i); \ + success = false; \ + } \ + } while (0) + +#define test(version, mem, ipa, ipb, ipc, ipd) do { \ + bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \ + ip##version(ipa, ipb, ipc, ipd)) == (mem); \ + maybe_fail(); \ + } while (0) + +#define test_negative(version, mem, ipa, ipb, ipc, ipd) do { \ + bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \ + ip##version(ipa, ipb, ipc, ipd)) != (mem); \ + maybe_fail(); \ + } while (0) + +#define test_boolean(cond) do { \ + bool _s = (cond); \ + maybe_fail(); \ + } while (0) + +bool __init wg_allowedips_selftest(void) +{ + bool found_a = false, found_b = false, found_c = false, found_d = false, + found_e = false, found_other = false; + struct wg_peer *a = init_peer(), *b = init_peer(), *c = init_peer(), + *d = init_peer(), *e = init_peer(), *f = init_peer(), + *g = init_peer(), *h = init_peer(); + struct allowedips_node *iter_node; + bool success = false; + struct allowedips t; + DEFINE_MUTEX(mutex); + struct in6_addr ip; + size_t i = 0, count = 0; + __be64 part; + + mutex_init(&mutex); + mutex_lock(&mutex); + wg_allowedips_init(&t); + + if (!a || !b || !c || !d || !e || !f || !g || !h) { + pr_err("allowedips self-test malloc: FAIL\n"); + goto free; + } + + insert(4, a, 192, 168, 4, 0, 24); + insert(4, b, 192, 168, 4, 4, 32); + insert(4, c, 192, 168, 0, 0, 16); + insert(4, d, 192, 95, 5, 64, 27); + /* replaces previous entry, and maskself is required */ + insert(4, c, 192, 95, 5, 65, 27); + insert(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128); + insert(6, c, 0x26075300, 0x60006b00, 0, 0, 64); + insert(4, e, 0, 0, 0, 0, 0); + insert(6, e, 0, 0, 0, 0, 0); + /* replaces previous entry */ + insert(6, f, 0, 0, 0, 0, 0); + insert(6, g, 0x24046800, 0, 0, 0, 32); + /* maskself is required */ + insert(6, h, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 64); + insert(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 128); + insert(6, c, 0x24446800, 0x40e40800, 0xdeaebeef, 0xdefbeef, 128); + insert(6, b, 0x24446800, 0xf0e40800, 0xeeaebeef, 0, 98); + insert(4, g, 64, 15, 112, 0, 20); + /* maskself is required */ + insert(4, h, 64, 15, 123, 211, 25); + insert(4, a, 10, 0, 0, 0, 25); + insert(4, b, 10, 0, 0, 128, 25); + insert(4, a, 10, 1, 0, 0, 30); + insert(4, b, 10, 1, 0, 4, 30); + insert(4, c, 10, 1, 0, 8, 29); + insert(4, d, 10, 1, 0, 16, 29); + + if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) { + print_tree(t.root4, 32); + print_tree(t.root6, 128); + } + + success = true; + + test(4, a, 192, 168, 4, 20); + test(4, a, 192, 168, 4, 0); + test(4, b, 192, 168, 4, 4); + test(4, c, 192, 168, 200, 182); + test(4, c, 192, 95, 5, 68); + test(4, e, 192, 95, 5, 96); + test(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543); + test(6, c, 0x26075300, 0x60006b00, 0, 0xc02e01ee); + test(6, f, 0x26075300, 0x60006b01, 0, 0); + test(6, g, 0x24046800, 0x40040806, 0, 0x1006); + test(6, g, 0x24046800, 0x40040806, 0x1234, 0x5678); + test(6, f, 0x240467ff, 0x40040806, 0x1234, 0x5678); + test(6, f, 0x24046801, 0x40040806, 0x1234, 0x5678); + test(6, h, 0x24046800, 0x40040800, 0x1234, 0x5678); + test(6, h, 0x24046800, 0x40040800, 0, 0); + test(6, h, 0x24046800, 0x40040800, 0x10101010, 0x10101010); + test(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef); + test(4, g, 64, 15, 116, 26); + test(4, g, 64, 15, 127, 3); + test(4, g, 64, 15, 123, 1); + test(4, h, 64, 15, 123, 128); + test(4, h, 64, 15, 123, 129); + test(4, a, 10, 0, 0, 52); + test(4, b, 10, 0, 0, 220); + test(4, a, 10, 1, 0, 2); + test(4, b, 10, 1, 0, 6); + test(4, c, 10, 1, 0, 10); + test(4, d, 10, 1, 0, 20); + + insert(4, a, 1, 0, 0, 0, 32); + insert(4, a, 64, 0, 0, 0, 32); + insert(4, a, 128, 0, 0, 0, 32); + insert(4, a, 192, 0, 0, 0, 32); + insert(4, a, 255, 0, 0, 0, 32); + wg_allowedips_remove_by_peer(&t, a, &mutex); + test_negative(4, a, 1, 0, 0, 0); + test_negative(4, a, 64, 0, 0, 0); + test_negative(4, a, 128, 0, 0, 0); + test_negative(4, a, 192, 0, 0, 0); + test_negative(4, a, 255, 0, 0, 0); + + wg_allowedips_free(&t, &mutex); + wg_allowedips_init(&t); + insert(4, a, 192, 168, 0, 0, 16); + insert(4, a, 192, 168, 0, 0, 24); + wg_allowedips_remove_by_peer(&t, a, &mutex); + test_negative(4, a, 192, 168, 0, 1); + + /* These will hit the WARN_ON(len >= 128) in free_node if something + * goes wrong. + */ + for (i = 0; i < 128; ++i) { + part = cpu_to_be64(~(1LLU << (i % 64))); + memset(&ip, 0xff, 16); + memcpy((u8 *)&ip + (i < 64) * 8, &part, 8); + wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex); + } + + wg_allowedips_free(&t, &mutex); + + wg_allowedips_init(&t); + insert(4, a, 192, 95, 5, 93, 27); + insert(6, a, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128); + insert(4, a, 10, 1, 0, 20, 29); + insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 83); + insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 21); + list_for_each_entry(iter_node, &a->allowedips_list, peer_list) { + u8 cidr, ip[16] __aligned(__alignof(u64)); + int family = wg_allowedips_read_node(iter_node, ip, &cidr); + + count++; + + if (cidr == 27 && family == AF_INET && + !memcmp(ip, ip4(192, 95, 5, 64), sizeof(struct in_addr))) + found_a = true; + else if (cidr == 128 && family == AF_INET6 && + !memcmp(ip, ip6(0x26075300, 0x60006b00, 0, 0xc05f0543), + sizeof(struct in6_addr))) + found_b = true; + else if (cidr == 29 && family == AF_INET && + !memcmp(ip, ip4(10, 1, 0, 16), sizeof(struct in_addr))) + found_c = true; + else if (cidr == 83 && family == AF_INET6 && + !memcmp(ip, ip6(0x26075300, 0x6d8a6bf8, 0xdab1e000, 0), + sizeof(struct in6_addr))) + found_d = true; + else if (cidr == 21 && family == AF_INET6 && + !memcmp(ip, ip6(0x26075000, 0, 0, 0), + sizeof(struct in6_addr))) + found_e = true; + else + found_other = true; + } + test_boolean(count == 5); + test_boolean(found_a); + test_boolean(found_b); + test_boolean(found_c); + test_boolean(found_d); + test_boolean(found_e); + test_boolean(!found_other); + + if (IS_ENABLED(DEBUG_RANDOM_TRIE) && success) + success = randomized_test(); + + if (success) + pr_info("allowedips self-tests: pass\n"); + +free: + wg_allowedips_free(&t, &mutex); + kfree(a); + kfree(b); + kfree(c); + kfree(d); + kfree(e); + kfree(f); + kfree(g); + kfree(h); + mutex_unlock(&mutex); + + return success; +} + +#undef test_negative +#undef test +#undef remove +#undef insert +#undef init_peer + +#endif diff --git a/net/wireguard/selftest/counter.c b/net/wireguard/selftest/counter.c new file mode 100644 index 000000000000..ec3c156bf91b --- /dev/null +++ b/net/wireguard/selftest/counter.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifdef DEBUG +bool __init wg_packet_counter_selftest(void) +{ + struct noise_replay_counter *counter; + unsigned int test_num = 0, i; + bool success = true; + + counter = kmalloc(sizeof(*counter), GFP_KERNEL); + if (unlikely(!counter)) { + pr_err("nonce counter self-test malloc: FAIL\n"); + return false; + } + +#define T_INIT do { \ + memset(counter, 0, sizeof(*counter)); \ + spin_lock_init(&counter->lock); \ + } while (0) +#define T_LIM (COUNTER_WINDOW_SIZE + 1) +#define T(n, v) do { \ + ++test_num; \ + if (counter_validate(counter, n) != (v)) { \ + pr_err("nonce counter self-test %u: FAIL\n", \ + test_num); \ + success = false; \ + } \ + } while (0) + + T_INIT; + /* 1 */ T(0, true); + /* 2 */ T(1, true); + /* 3 */ T(1, false); + /* 4 */ T(9, true); + /* 5 */ T(8, true); + /* 6 */ T(7, true); + /* 7 */ T(7, false); + /* 8 */ T(T_LIM, true); + /* 9 */ T(T_LIM - 1, true); + /* 10 */ T(T_LIM - 1, false); + /* 11 */ T(T_LIM - 2, true); + /* 12 */ T(2, true); + /* 13 */ T(2, false); + /* 14 */ T(T_LIM + 16, true); + /* 15 */ T(3, false); + /* 16 */ T(T_LIM + 16, false); + /* 17 */ T(T_LIM * 4, true); + /* 18 */ T(T_LIM * 4 - (T_LIM - 1), true); + /* 19 */ T(10, false); + /* 20 */ T(T_LIM * 4 - T_LIM, false); + /* 21 */ T(T_LIM * 4 - (T_LIM + 1), false); + /* 22 */ T(T_LIM * 4 - (T_LIM - 2), true); + /* 23 */ T(T_LIM * 4 + 1 - T_LIM, false); + /* 24 */ T(0, false); + /* 25 */ T(REJECT_AFTER_MESSAGES, false); + /* 26 */ T(REJECT_AFTER_MESSAGES - 1, true); + /* 27 */ T(REJECT_AFTER_MESSAGES, false); + /* 28 */ T(REJECT_AFTER_MESSAGES - 1, false); + /* 29 */ T(REJECT_AFTER_MESSAGES - 2, true); + /* 30 */ T(REJECT_AFTER_MESSAGES + 1, false); + /* 31 */ T(REJECT_AFTER_MESSAGES + 2, false); + /* 32 */ T(REJECT_AFTER_MESSAGES - 2, false); + /* 33 */ T(REJECT_AFTER_MESSAGES - 3, true); + /* 34 */ T(0, false); + + T_INIT; + for (i = 1; i <= COUNTER_WINDOW_SIZE; ++i) + T(i, true); + T(0, true); + T(0, false); + + T_INIT; + for (i = 2; i <= COUNTER_WINDOW_SIZE + 1; ++i) + T(i, true); + T(1, true); + T(0, false); + + T_INIT; + for (i = COUNTER_WINDOW_SIZE + 1; i-- > 0;) + T(i, true); + + T_INIT; + for (i = COUNTER_WINDOW_SIZE + 2; i-- > 1;) + T(i, true); + T(0, false); + + T_INIT; + for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;) + T(i, true); + T(COUNTER_WINDOW_SIZE + 1, true); + T(0, false); + + T_INIT; + for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;) + T(i, true); + T(0, true); + T(COUNTER_WINDOW_SIZE + 1, true); + +#undef T +#undef T_LIM +#undef T_INIT + + if (success) + pr_info("nonce counter self-tests: pass\n"); + kfree(counter); + return success; +} +#endif diff --git a/net/wireguard/selftest/ratelimiter.c b/net/wireguard/selftest/ratelimiter.c new file mode 100644 index 000000000000..007cd4457c5f --- /dev/null +++ b/net/wireguard/selftest/ratelimiter.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifdef DEBUG + +#include + +static const struct { + bool result; + unsigned int msec_to_sleep_before; +} expected_results[] __initconst = { + [0 ... PACKETS_BURSTABLE - 1] = { true, 0 }, + [PACKETS_BURSTABLE] = { false, 0 }, + [PACKETS_BURSTABLE + 1] = { true, MSEC_PER_SEC / PACKETS_PER_SECOND }, + [PACKETS_BURSTABLE + 2] = { false, 0 }, + [PACKETS_BURSTABLE + 3] = { true, (MSEC_PER_SEC / PACKETS_PER_SECOND) * 2 }, + [PACKETS_BURSTABLE + 4] = { true, 0 }, + [PACKETS_BURSTABLE + 5] = { false, 0 } +}; + +static __init unsigned int maximum_jiffies_at_index(int index) +{ + unsigned int total_msecs = 2 * MSEC_PER_SEC / PACKETS_PER_SECOND / 3; + int i; + + for (i = 0; i <= index; ++i) + total_msecs += expected_results[i].msec_to_sleep_before; + return msecs_to_jiffies(total_msecs); +} + +static __init int timings_test(struct sk_buff *skb4, struct iphdr *hdr4, + struct sk_buff *skb6, struct ipv6hdr *hdr6, + int *test) +{ + unsigned long loop_start_time; + int i; + + wg_ratelimiter_gc_entries(NULL); + rcu_barrier(); + loop_start_time = jiffies; + + for (i = 0; i < ARRAY_SIZE(expected_results); ++i) { + if (expected_results[i].msec_to_sleep_before) + msleep(expected_results[i].msec_to_sleep_before); + + if (time_is_before_jiffies(loop_start_time + + maximum_jiffies_at_index(i))) + return -ETIMEDOUT; + if (wg_ratelimiter_allow(skb4, &init_net) != + expected_results[i].result) + return -EXFULL; + ++(*test); + + hdr4->saddr = htonl(ntohl(hdr4->saddr) + i + 1); + if (time_is_before_jiffies(loop_start_time + + maximum_jiffies_at_index(i))) + return -ETIMEDOUT; + if (!wg_ratelimiter_allow(skb4, &init_net)) + return -EXFULL; + ++(*test); + + hdr4->saddr = htonl(ntohl(hdr4->saddr) - i - 1); + +#if IS_ENABLED(CONFIG_IPV6) + hdr6->saddr.in6_u.u6_addr32[2] = htonl(i); + hdr6->saddr.in6_u.u6_addr32[3] = htonl(i); + if (time_is_before_jiffies(loop_start_time + + maximum_jiffies_at_index(i))) + return -ETIMEDOUT; + if (wg_ratelimiter_allow(skb6, &init_net) != + expected_results[i].result) + return -EXFULL; + ++(*test); + + hdr6->saddr.in6_u.u6_addr32[0] = + htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) + i + 1); + if (time_is_before_jiffies(loop_start_time + + maximum_jiffies_at_index(i))) + return -ETIMEDOUT; + if (!wg_ratelimiter_allow(skb6, &init_net)) + return -EXFULL; + ++(*test); + + hdr6->saddr.in6_u.u6_addr32[0] = + htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) - i - 1); + + if (time_is_before_jiffies(loop_start_time + + maximum_jiffies_at_index(i))) + return -ETIMEDOUT; +#endif + } + return 0; +} + +static __init int capacity_test(struct sk_buff *skb4, struct iphdr *hdr4, + int *test) +{ + int i; + + wg_ratelimiter_gc_entries(NULL); + rcu_barrier(); + + if (atomic_read(&total_entries)) + return -EXFULL; + ++(*test); + + for (i = 0; i <= max_entries; ++i) { + hdr4->saddr = htonl(i); + if (wg_ratelimiter_allow(skb4, &init_net) != (i != max_entries)) + return -EXFULL; + ++(*test); + } + return 0; +} + +bool __init wg_ratelimiter_selftest(void) +{ + enum { TRIALS_BEFORE_GIVING_UP = 5000 }; + bool success = false; + int test = 0, trials; + struct sk_buff *skb4, *skb6 = NULL; + struct iphdr *hdr4; + struct ipv6hdr *hdr6 = NULL; + + if (IS_ENABLED(CONFIG_KASAN) || IS_ENABLED(CONFIG_UBSAN)) + return true; + + BUILD_BUG_ON(MSEC_PER_SEC % PACKETS_PER_SECOND != 0); + + if (wg_ratelimiter_init()) + goto out; + ++test; + if (wg_ratelimiter_init()) { + wg_ratelimiter_uninit(); + goto out; + } + ++test; + if (wg_ratelimiter_init()) { + wg_ratelimiter_uninit(); + wg_ratelimiter_uninit(); + goto out; + } + ++test; + + skb4 = alloc_skb(sizeof(struct iphdr), GFP_KERNEL); + if (unlikely(!skb4)) + goto err_nofree; + skb4->protocol = htons(ETH_P_IP); + hdr4 = (struct iphdr *)skb_put(skb4, sizeof(*hdr4)); + hdr4->saddr = htonl(8182); + skb_reset_network_header(skb4); + ++test; + +#if IS_ENABLED(CONFIG_IPV6) + skb6 = alloc_skb(sizeof(struct ipv6hdr), GFP_KERNEL); + if (unlikely(!skb6)) { + kfree_skb(skb4); + goto err_nofree; + } + skb6->protocol = htons(ETH_P_IPV6); + hdr6 = (struct ipv6hdr *)skb_put(skb6, sizeof(*hdr6)); + hdr6->saddr.in6_u.u6_addr32[0] = htonl(1212); + hdr6->saddr.in6_u.u6_addr32[1] = htonl(289188); + skb_reset_network_header(skb6); + ++test; +#endif + + for (trials = TRIALS_BEFORE_GIVING_UP;;) { + int test_count = 0, ret; + + ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count); + if (ret == -ETIMEDOUT) { + if (!trials--) { + test += test_count; + goto err; + } + msleep(500); + continue; + } else if (ret < 0) { + test += test_count; + goto err; + } else { + test += test_count; + break; + } + } + + for (trials = TRIALS_BEFORE_GIVING_UP;;) { + int test_count = 0; + + if (capacity_test(skb4, hdr4, &test_count) < 0) { + if (!trials--) { + test += test_count; + goto err; + } + msleep(50); + continue; + } + test += test_count; + break; + } + + success = true; + +err: + kfree_skb(skb4); +#if IS_ENABLED(CONFIG_IPV6) + kfree_skb(skb6); +#endif +err_nofree: + wg_ratelimiter_uninit(); + wg_ratelimiter_uninit(); + wg_ratelimiter_uninit(); + /* Uninit one extra time to check underflow detection. */ + wg_ratelimiter_uninit(); +out: + if (success) + pr_info("ratelimiter self-tests: pass\n"); + else + pr_err("ratelimiter self-test %d: FAIL\n", test); + + return success; +} +#endif diff --git a/net/wireguard/send.c b/net/wireguard/send.c new file mode 100644 index 000000000000..55bb0c9313d7 --- /dev/null +++ b/net/wireguard/send.c @@ -0,0 +1,420 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "queueing.h" +#include "timers.h" +#include "device.h" +#include "peer.h" +#include "socket.h" +#include "messages.h" +#include "cookie.h" + +#include +#include +#include +#include +#include +#include +#include + +static void wg_packet_send_handshake_initiation(struct wg_peer *peer) +{ + struct message_handshake_initiation packet; + + if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake), + REKEY_TIMEOUT)) + return; /* This function is rate limited. */ + + atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns()); + net_dbg_ratelimited("%s: Sending handshake initiation to peer %llu (%pISpfsc)\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr); + + if (wg_noise_handshake_create_initiation(&packet, &peer->handshake)) { + wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer); + wg_timers_any_authenticated_packet_traversal(peer); + wg_timers_any_authenticated_packet_sent(peer); + atomic64_set(&peer->last_sent_handshake, + ktime_get_coarse_boottime_ns()); + wg_socket_send_buffer_to_peer(peer, &packet, sizeof(packet), + HANDSHAKE_DSCP); + wg_timers_handshake_initiated(peer); + } +} + +void wg_packet_handshake_send_worker(struct work_struct *work) +{ + struct wg_peer *peer = container_of(work, struct wg_peer, + transmit_handshake_work); + + wg_packet_send_handshake_initiation(peer); + wg_peer_put(peer); +} + +void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer, + bool is_retry) +{ + if (!is_retry) + peer->timer_handshake_attempts = 0; + + rcu_read_lock_bh(); + /* We check last_sent_handshake here in addition to the actual function + * we're queueing up, so that we don't queue things if not strictly + * necessary: + */ + if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake), + REKEY_TIMEOUT) || + unlikely(READ_ONCE(peer->is_dead))) + goto out; + + wg_peer_get(peer); + /* Queues up calling packet_send_queued_handshakes(peer), where we do a + * peer_put(peer) after: + */ + if (!queue_work(peer->device->handshake_send_wq, + &peer->transmit_handshake_work)) + /* If the work was already queued, we want to drop the + * extra reference: + */ + wg_peer_put(peer); +out: + rcu_read_unlock_bh(); +} + +void wg_packet_send_handshake_response(struct wg_peer *peer) +{ + struct message_handshake_response packet; + + atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns()); + net_dbg_ratelimited("%s: Sending handshake response to peer %llu (%pISpfsc)\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr); + + if (wg_noise_handshake_create_response(&packet, &peer->handshake)) { + wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer); + if (wg_noise_handshake_begin_session(&peer->handshake, + &peer->keypairs)) { + wg_timers_session_derived(peer); + wg_timers_any_authenticated_packet_traversal(peer); + wg_timers_any_authenticated_packet_sent(peer); + atomic64_set(&peer->last_sent_handshake, + ktime_get_coarse_boottime_ns()); + wg_socket_send_buffer_to_peer(peer, &packet, + sizeof(packet), + HANDSHAKE_DSCP); + } + } +} + +void wg_packet_send_handshake_cookie(struct wg_device *wg, + struct sk_buff *initiating_skb, + __le32 sender_index) +{ + struct message_handshake_cookie packet; + + net_dbg_skb_ratelimited("%s: Sending cookie response for denied handshake message for %pISpfsc\n", + wg->dev->name, initiating_skb); + wg_cookie_message_create(&packet, initiating_skb, sender_index, + &wg->cookie_checker); + wg_socket_send_buffer_as_reply_to_skb(wg, initiating_skb, &packet, + sizeof(packet)); +} + +static void keep_key_fresh(struct wg_peer *peer) +{ + struct noise_keypair *keypair; + bool send; + + rcu_read_lock_bh(); + keypair = rcu_dereference_bh(peer->keypairs.current_keypair); + send = keypair && READ_ONCE(keypair->sending.is_valid) && + (atomic64_read(&keypair->sending_counter) > REKEY_AFTER_MESSAGES || + (keypair->i_am_the_initiator && + wg_birthdate_has_expired(keypair->sending.birthdate, REKEY_AFTER_TIME))); + rcu_read_unlock_bh(); + + if (unlikely(send)) + wg_packet_send_queued_handshake_initiation(peer, false); +} + +static unsigned int calculate_skb_padding(struct sk_buff *skb) +{ + unsigned int padded_size, last_unit = skb->len; + + if (unlikely(!PACKET_CB(skb)->mtu)) + return ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE) - last_unit; + + /* We do this modulo business with the MTU, just in case the networking + * layer gives us a packet that's bigger than the MTU. In that case, we + * wouldn't want the final subtraction to overflow in the case of the + * padded_size being clamped. Fortunately, that's very rarely the case, + * so we optimize for that not happening. + */ + if (unlikely(last_unit > PACKET_CB(skb)->mtu)) + last_unit %= PACKET_CB(skb)->mtu; + + padded_size = min(PACKET_CB(skb)->mtu, + ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE)); + return padded_size - last_unit; +} + +static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair, + simd_context_t *simd_context) +{ + unsigned int padding_len, plaintext_len, trailer_len; + struct scatterlist sg[MAX_SKB_FRAGS + 8]; + struct message_data *header; + struct sk_buff *trailer; + int num_frags; + + /* Force hash calculation before encryption so that flow analysis is + * consistent over the inner packet. + */ + skb_get_hash(skb); + + /* Calculate lengths. */ + padding_len = calculate_skb_padding(skb); + trailer_len = padding_len + noise_encrypted_len(0); + plaintext_len = skb->len + padding_len; + + /* Expand data section to have room for padding and auth tag. */ + num_frags = skb_cow_data(skb, trailer_len, &trailer); + if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg))) + return false; + + /* Set the padding to zeros, and make sure it and the auth tag are part + * of the skb. + */ + memset(skb_tail_pointer(trailer), 0, padding_len); + + /* Expand head section to have room for our header and the network + * stack's headers. + */ + if (unlikely(skb_cow_head(skb, DATA_PACKET_HEAD_ROOM) < 0)) + return false; + + /* Finalize checksum calculation for the inner packet, if required. */ + if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_help(skb))) + return false; + + /* Only after checksumming can we safely add on the padding at the end + * and the header. + */ + skb_set_inner_network_header(skb, 0); + header = (struct message_data *)skb_push(skb, sizeof(*header)); + header->header.type = cpu_to_le32(MESSAGE_DATA); + header->key_idx = keypair->remote_index; + header->counter = cpu_to_le64(PACKET_CB(skb)->nonce); + pskb_put(skb, trailer, trailer_len); + + /* Now we can encrypt the scattergather segments */ + sg_init_table(sg, num_frags); + if (skb_to_sgvec(skb, sg, sizeof(struct message_data), + noise_encrypted_len(plaintext_len)) <= 0) + return false; + return chacha20poly1305_encrypt_sg_inplace(sg, plaintext_len, NULL, 0, + PACKET_CB(skb)->nonce, + keypair->sending.key, + simd_context); +} + +void wg_packet_send_keepalive(struct wg_peer *peer) +{ + struct sk_buff *skb; + + if (skb_queue_empty(&peer->staged_packet_queue)) { + skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH, + GFP_ATOMIC); + if (unlikely(!skb)) + return; + skb_reserve(skb, DATA_PACKET_HEAD_ROOM); + skb->dev = peer->device->dev; + PACKET_CB(skb)->mtu = skb->dev->mtu; + skb_queue_tail(&peer->staged_packet_queue, skb); + net_dbg_ratelimited("%s: Sending keepalive packet to peer %llu (%pISpfsc)\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr); + } + + wg_packet_send_staged_packets(peer); +} + +static void wg_packet_create_data_done(struct wg_peer *peer, struct sk_buff *first) +{ + struct sk_buff *skb, *next; + bool is_keepalive, data_sent = false; + + wg_timers_any_authenticated_packet_traversal(peer); + wg_timers_any_authenticated_packet_sent(peer); + skb_list_walk_safe(first, skb, next) { + is_keepalive = skb->len == message_data_len(0); + if (likely(!wg_socket_send_skb_to_peer(peer, skb, + PACKET_CB(skb)->ds) && !is_keepalive)) + data_sent = true; + } + + if (likely(data_sent)) + wg_timers_data_sent(peer); + + keep_key_fresh(peer); +} + +void wg_packet_tx_worker(struct work_struct *work) +{ + struct wg_peer *peer = container_of(work, struct wg_peer, transmit_packet_work); + struct noise_keypair *keypair; + enum packet_state state; + struct sk_buff *first; + + while ((first = wg_prev_queue_peek(&peer->tx_queue)) != NULL && + (state = atomic_read_acquire(&PACKET_CB(first)->state)) != + PACKET_STATE_UNCRYPTED) { + wg_prev_queue_drop_peeked(&peer->tx_queue); + keypair = PACKET_CB(first)->keypair; + + if (likely(state == PACKET_STATE_CRYPTED)) + wg_packet_create_data_done(peer, first); + else + kfree_skb_list(first); + + wg_noise_keypair_put(keypair, false); + wg_peer_put(peer); + if (need_resched()) + cond_resched(); + } +} + +void wg_packet_encrypt_worker(struct work_struct *work) +{ + struct crypt_queue *queue = container_of(work, struct multicore_worker, + work)->ptr; + struct sk_buff *first, *skb, *next; + simd_context_t simd_context; + + simd_get(&simd_context); + while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) { + enum packet_state state = PACKET_STATE_CRYPTED; + + skb_list_walk_safe(first, skb, next) { + if (likely(encrypt_packet(skb, + PACKET_CB(first)->keypair, + &simd_context))) { + wg_reset_packet(skb, true); + } else { + state = PACKET_STATE_DEAD; + break; + } + } + wg_queue_enqueue_per_peer_tx(first, state); + + simd_relax(&simd_context); + } + simd_put(&simd_context); +} + +static void wg_packet_create_data(struct wg_peer *peer, struct sk_buff *first) +{ + struct wg_device *wg = peer->device; + int ret = -EINVAL; + + rcu_read_lock_bh(); + if (unlikely(READ_ONCE(peer->is_dead))) + goto err; + + ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, &peer->tx_queue, first, + wg->packet_crypt_wq, &wg->encrypt_queue.last_cpu); + if (unlikely(ret == -EPIPE)) + wg_queue_enqueue_per_peer_tx(first, PACKET_STATE_DEAD); +err: + rcu_read_unlock_bh(); + if (likely(!ret || ret == -EPIPE)) + return; + wg_noise_keypair_put(PACKET_CB(first)->keypair, false); + wg_peer_put(peer); + kfree_skb_list(first); +} + +void wg_packet_purge_staged_packets(struct wg_peer *peer) +{ + spin_lock_bh(&peer->staged_packet_queue.lock); + peer->device->dev->stats.tx_dropped += peer->staged_packet_queue.qlen; + __skb_queue_purge(&peer->staged_packet_queue); + spin_unlock_bh(&peer->staged_packet_queue.lock); +} + +void wg_packet_send_staged_packets(struct wg_peer *peer) +{ + struct noise_keypair *keypair; + struct sk_buff_head packets; + struct sk_buff *skb; + + /* Steal the current queue into our local one. */ + __skb_queue_head_init(&packets); + spin_lock_bh(&peer->staged_packet_queue.lock); + skb_queue_splice_init(&peer->staged_packet_queue, &packets); + spin_unlock_bh(&peer->staged_packet_queue.lock); + if (unlikely(skb_queue_empty(&packets))) + return; + + /* First we make sure we have a valid reference to a valid key. */ + rcu_read_lock_bh(); + keypair = wg_noise_keypair_get( + rcu_dereference_bh(peer->keypairs.current_keypair)); + rcu_read_unlock_bh(); + if (unlikely(!keypair)) + goto out_nokey; + if (unlikely(!READ_ONCE(keypair->sending.is_valid))) + goto out_nokey; + if (unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, + REJECT_AFTER_TIME))) + goto out_invalid; + + /* After we know we have a somewhat valid key, we now try to assign + * nonces to all of the packets in the queue. If we can't assign nonces + * for all of them, we just consider it a failure and wait for the next + * handshake. + */ + skb_queue_walk(&packets, skb) { + /* 0 for no outer TOS: no leak. TODO: at some later point, we + * might consider using flowi->tos as outer instead. + */ + PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0, ip_hdr(skb), skb); + PACKET_CB(skb)->nonce = + atomic64_inc_return(&keypair->sending_counter) - 1; + if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES)) + goto out_invalid; + } + + packets.prev->next = NULL; + wg_peer_get(keypair->entry.peer); + PACKET_CB(packets.next)->keypair = keypair; + wg_packet_create_data(peer, packets.next); + return; + +out_invalid: + WRITE_ONCE(keypair->sending.is_valid, false); +out_nokey: + wg_noise_keypair_put(keypair, false); + + /* We orphan the packets if we're waiting on a handshake, so that they + * don't block a socket's pool. + */ + skb_queue_walk(&packets, skb) + skb_orphan(skb); + /* Then we put them back on the top of the queue. We're not too + * concerned about accidentally getting things a little out of order if + * packets are being added really fast, because this queue is for before + * packets can even be sent and it's small anyway. + */ + spin_lock_bh(&peer->staged_packet_queue.lock); + skb_queue_splice(&packets, &peer->staged_packet_queue); + spin_unlock_bh(&peer->staged_packet_queue.lock); + + /* If we're exiting because there's something wrong with the key, it + * means we should initiate a new handshake. + */ + wg_packet_send_queued_handshake_initiation(peer, false); +} diff --git a/net/wireguard/socket.c b/net/wireguard/socket.c new file mode 100644 index 000000000000..9e0af9320c6b --- /dev/null +++ b/net/wireguard/socket.c @@ -0,0 +1,437 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "device.h" +#include "peer.h" +#include "socket.h" +#include "queueing.h" +#include "messages.h" + +#include +#include +#include +#include +#include +#include +#include + +static int send4(struct wg_device *wg, struct sk_buff *skb, + struct endpoint *endpoint, u8 ds, struct dst_cache *cache) +{ + struct flowi4 fl = { + .saddr = endpoint->src4.s_addr, + .daddr = endpoint->addr4.sin_addr.s_addr, + .fl4_dport = endpoint->addr4.sin_port, + .flowi4_mark = wg->fwmark, + .flowi4_proto = IPPROTO_UDP + }; + struct rtable *rt = NULL; + struct sock *sock; + int ret = 0; + + skb_mark_not_on_list(skb); + skb->dev = wg->dev; + skb->mark = wg->fwmark; + + rcu_read_lock_bh(); + sock = rcu_dereference_bh(wg->sock4); + + if (unlikely(!sock)) { + ret = -ENONET; + goto err; + } + + fl.fl4_sport = inet_sk(sock)->inet_sport; + + if (cache) + rt = dst_cache_get_ip4(cache, &fl.saddr); + + if (!rt) { + security_sk_classify_flow(sock, flowi4_to_flowi(&fl)); + if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0, + fl.saddr, RT_SCOPE_HOST))) { + endpoint->src4.s_addr = 0; + endpoint->src_if4 = 0; + fl.saddr = 0; + if (cache) + dst_cache_reset(cache); + } + rt = ip_route_output_flow(sock_net(sock), &fl, sock); + if (unlikely(endpoint->src_if4 && ((IS_ERR(rt) && + PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) && + rt->dst.dev->ifindex != endpoint->src_if4)))) { + endpoint->src4.s_addr = 0; + endpoint->src_if4 = 0; + fl.saddr = 0; + if (cache) + dst_cache_reset(cache); + if (!IS_ERR(rt)) + ip_rt_put(rt); + rt = ip_route_output_flow(sock_net(sock), &fl, sock); + } + if (IS_ERR(rt)) { + ret = PTR_ERR(rt); + net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", + wg->dev->name, &endpoint->addr, ret); + goto err; + } + if (cache) + dst_cache_set_ip4(cache, &rt->dst, fl.saddr); + } + + skb->ignore_df = 1; + udp_tunnel_xmit_skb(rt, sock, skb, fl.saddr, fl.daddr, ds, + ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport, + fl.fl4_dport, false, false); + goto out; + +err: + kfree_skb(skb); +out: + rcu_read_unlock_bh(); + return ret; +} + +static int send6(struct wg_device *wg, struct sk_buff *skb, + struct endpoint *endpoint, u8 ds, struct dst_cache *cache) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct flowi6 fl = { + .saddr = endpoint->src6, + .daddr = endpoint->addr6.sin6_addr, + .fl6_dport = endpoint->addr6.sin6_port, + .flowi6_mark = wg->fwmark, + .flowi6_oif = endpoint->addr6.sin6_scope_id, + .flowi6_proto = IPPROTO_UDP + /* TODO: addr->sin6_flowinfo */ + }; + struct dst_entry *dst = NULL; + struct sock *sock; + int ret = 0; + + skb_mark_not_on_list(skb); + skb->dev = wg->dev; + skb->mark = wg->fwmark; + + rcu_read_lock_bh(); + sock = rcu_dereference_bh(wg->sock6); + + if (unlikely(!sock)) { + ret = -ENONET; + goto err; + } + + fl.fl6_sport = inet_sk(sock)->inet_sport; + + if (cache) + dst = dst_cache_get_ip6(cache, &fl.saddr); + + if (!dst) { + security_sk_classify_flow(sock, flowi6_to_flowi(&fl)); + if (unlikely(!ipv6_addr_any(&fl.saddr) && + !ipv6_chk_addr(sock_net(sock), &fl.saddr, NULL, 0))) { + endpoint->src6 = fl.saddr = in6addr_any; + if (cache) + dst_cache_reset(cache); + } + dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl, + NULL); + if (IS_ERR(dst)) { + ret = PTR_ERR(dst); + net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", + wg->dev->name, &endpoint->addr, ret); + goto err; + } + if (cache) + dst_cache_set_ip6(cache, dst, &fl.saddr); + } + + skb->ignore_df = 1; + udp_tunnel6_xmit_skb(dst, sock, skb, skb->dev, &fl.saddr, &fl.daddr, ds, + ip6_dst_hoplimit(dst), 0, fl.fl6_sport, + fl.fl6_dport, false); + goto out; + +err: + kfree_skb(skb); +out: + rcu_read_unlock_bh(); + return ret; +#else + kfree_skb(skb); + return -EAFNOSUPPORT; +#endif +} + +int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, u8 ds) +{ + size_t skb_len = skb->len; + int ret = -EAFNOSUPPORT; + + read_lock_bh(&peer->endpoint_lock); + if (peer->endpoint.addr.sa_family == AF_INET) + ret = send4(peer->device, skb, &peer->endpoint, ds, + &peer->endpoint_cache); + else if (peer->endpoint.addr.sa_family == AF_INET6) + ret = send6(peer->device, skb, &peer->endpoint, ds, + &peer->endpoint_cache); + else + dev_kfree_skb(skb); + if (likely(!ret)) + peer->tx_bytes += skb_len; + read_unlock_bh(&peer->endpoint_lock); + + return ret; +} + +int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *buffer, + size_t len, u8 ds) +{ + struct sk_buff *skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); + + if (unlikely(!skb)) + return -ENOMEM; + + skb_reserve(skb, SKB_HEADER_LEN); + skb_set_inner_network_header(skb, 0); + skb_put_data(skb, buffer, len); + return wg_socket_send_skb_to_peer(peer, skb, ds); +} + +int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg, + struct sk_buff *in_skb, void *buffer, + size_t len) +{ + int ret = 0; + struct sk_buff *skb; + struct endpoint endpoint; + + if (unlikely(!in_skb)) + return -EINVAL; + ret = wg_socket_endpoint_from_skb(&endpoint, in_skb); + if (unlikely(ret < 0)) + return ret; + + skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); + if (unlikely(!skb)) + return -ENOMEM; + skb_reserve(skb, SKB_HEADER_LEN); + skb_set_inner_network_header(skb, 0); + skb_put_data(skb, buffer, len); + + if (endpoint.addr.sa_family == AF_INET) + ret = send4(wg, skb, &endpoint, 0, NULL); + else if (endpoint.addr.sa_family == AF_INET6) + ret = send6(wg, skb, &endpoint, 0, NULL); + /* No other possibilities if the endpoint is valid, which it is, + * as we checked above. + */ + + return ret; +} + +int wg_socket_endpoint_from_skb(struct endpoint *endpoint, + const struct sk_buff *skb) +{ + memset(endpoint, 0, sizeof(*endpoint)); + if (skb->protocol == htons(ETH_P_IP)) { + endpoint->addr4.sin_family = AF_INET; + endpoint->addr4.sin_port = udp_hdr(skb)->source; + endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr; + endpoint->src4.s_addr = ip_hdr(skb)->daddr; + endpoint->src_if4 = skb->skb_iif; + } else if (IS_ENABLED(CONFIG_IPV6) && skb->protocol == htons(ETH_P_IPV6)) { + endpoint->addr6.sin6_family = AF_INET6; + endpoint->addr6.sin6_port = udp_hdr(skb)->source; + endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr; + endpoint->addr6.sin6_scope_id = ipv6_iface_scope_id( + &ipv6_hdr(skb)->saddr, skb->skb_iif); + endpoint->src6 = ipv6_hdr(skb)->daddr; + } else { + return -EINVAL; + } + return 0; +} + +static bool endpoint_eq(const struct endpoint *a, const struct endpoint *b) +{ + return (a->addr.sa_family == AF_INET && b->addr.sa_family == AF_INET && + a->addr4.sin_port == b->addr4.sin_port && + a->addr4.sin_addr.s_addr == b->addr4.sin_addr.s_addr && + a->src4.s_addr == b->src4.s_addr && a->src_if4 == b->src_if4) || + (a->addr.sa_family == AF_INET6 && + b->addr.sa_family == AF_INET6 && + a->addr6.sin6_port == b->addr6.sin6_port && + ipv6_addr_equal(&a->addr6.sin6_addr, &b->addr6.sin6_addr) && + a->addr6.sin6_scope_id == b->addr6.sin6_scope_id && + ipv6_addr_equal(&a->src6, &b->src6)) || + unlikely(!a->addr.sa_family && !b->addr.sa_family); +} + +void wg_socket_set_peer_endpoint(struct wg_peer *peer, + const struct endpoint *endpoint) +{ + /* First we check unlocked, in order to optimize, since it's pretty rare + * that an endpoint will change. If we happen to be mid-write, and two + * CPUs wind up writing the same thing or something slightly different, + * it doesn't really matter much either. + */ + if (endpoint_eq(endpoint, &peer->endpoint)) + return; + write_lock_bh(&peer->endpoint_lock); + if (endpoint->addr.sa_family == AF_INET) { + peer->endpoint.addr4 = endpoint->addr4; + peer->endpoint.src4 = endpoint->src4; + peer->endpoint.src_if4 = endpoint->src_if4; + } else if (IS_ENABLED(CONFIG_IPV6) && endpoint->addr.sa_family == AF_INET6) { + peer->endpoint.addr6 = endpoint->addr6; + peer->endpoint.src6 = endpoint->src6; + } else { + goto out; + } + dst_cache_reset(&peer->endpoint_cache); +out: + write_unlock_bh(&peer->endpoint_lock); +} + +void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer, + const struct sk_buff *skb) +{ + struct endpoint endpoint; + + if (!wg_socket_endpoint_from_skb(&endpoint, skb)) + wg_socket_set_peer_endpoint(peer, &endpoint); +} + +void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer) +{ + write_lock_bh(&peer->endpoint_lock); + memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6)); + dst_cache_reset_now(&peer->endpoint_cache); + write_unlock_bh(&peer->endpoint_lock); +} + +static int wg_receive(struct sock *sk, struct sk_buff *skb) +{ + struct wg_device *wg; + + if (unlikely(!sk)) + goto err; + wg = sk->sk_user_data; + if (unlikely(!wg)) + goto err; + skb_mark_not_on_list(skb); + wg_packet_receive(wg, skb); + return 0; + +err: + kfree_skb(skb); + return 0; +} + +static void sock_free(struct sock *sock) +{ + if (unlikely(!sock)) + return; + sk_clear_memalloc(sock); + udp_tunnel_sock_release(sock->sk_socket); +} + +static void set_sock_opts(struct socket *sock) +{ + sock->sk->sk_allocation = GFP_ATOMIC; + sock->sk->sk_sndbuf = INT_MAX; + sk_set_memalloc(sock->sk); +} + +int wg_socket_init(struct wg_device *wg, u16 port) +{ + struct net *net; + int ret; + struct udp_tunnel_sock_cfg cfg = { + .sk_user_data = wg, + .encap_type = 1, + .encap_rcv = wg_receive + }; + struct socket *new4 = NULL, *new6 = NULL; + struct udp_port_cfg port4 = { + .family = AF_INET, + .local_ip.s_addr = htonl(INADDR_ANY), + .local_udp_port = htons(port), + .use_udp_checksums = true + }; +#if IS_ENABLED(CONFIG_IPV6) + int retries = 0; + struct udp_port_cfg port6 = { + .family = AF_INET6, + .local_ip6 = IN6ADDR_ANY_INIT, + .use_udp6_tx_checksums = true, + .use_udp6_rx_checksums = true, + .ipv6_v6only = true + }; +#endif + + rcu_read_lock(); + net = rcu_dereference(wg->creating_net); + net = net ? maybe_get_net(net) : NULL; + rcu_read_unlock(); + if (unlikely(!net)) + return -ENONET; + +#if IS_ENABLED(CONFIG_IPV6) +retry: +#endif + + ret = udp_sock_create(net, &port4, &new4); + if (ret < 0) { + pr_err("%s: Could not create IPv4 socket\n", wg->dev->name); + goto out; + } + set_sock_opts(new4); + setup_udp_tunnel_sock(net, new4, &cfg); + +#if IS_ENABLED(CONFIG_IPV6) + if (ipv6_mod_enabled()) { + port6.local_udp_port = inet_sk(new4->sk)->inet_sport; + ret = udp_sock_create(net, &port6, &new6); + if (ret < 0) { + udp_tunnel_sock_release(new4); + if (ret == -EADDRINUSE && !port && retries++ < 100) + goto retry; + pr_err("%s: Could not create IPv6 socket\n", + wg->dev->name); + goto out; + } + set_sock_opts(new6); + setup_udp_tunnel_sock(net, new6, &cfg); + } +#endif + + wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL); + ret = 0; +out: + put_net(net); + return ret; +} + +void wg_socket_reinit(struct wg_device *wg, struct sock *new4, + struct sock *new6) +{ + struct sock *old4, *old6; + + mutex_lock(&wg->socket_update_lock); + old4 = rcu_dereference_protected(wg->sock4, + lockdep_is_held(&wg->socket_update_lock)); + old6 = rcu_dereference_protected(wg->sock6, + lockdep_is_held(&wg->socket_update_lock)); + rcu_assign_pointer(wg->sock4, new4); + rcu_assign_pointer(wg->sock6, new6); + if (new4) + wg->incoming_port = ntohs(inet_sk(new4)->inet_sport); + mutex_unlock(&wg->socket_update_lock); + synchronize_net(); + sock_free(old4); + sock_free(old6); +} diff --git a/net/wireguard/socket.h b/net/wireguard/socket.h new file mode 100644 index 000000000000..bab5848efbcd --- /dev/null +++ b/net/wireguard/socket.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_SOCKET_H +#define _WG_SOCKET_H + +#include +#include +#include +#include + +int wg_socket_init(struct wg_device *wg, u16 port); +void wg_socket_reinit(struct wg_device *wg, struct sock *new4, + struct sock *new6); +int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *data, + size_t len, u8 ds); +int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, + u8 ds); +int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg, + struct sk_buff *in_skb, + void *out_buffer, size_t len); + +int wg_socket_endpoint_from_skb(struct endpoint *endpoint, + const struct sk_buff *skb); +void wg_socket_set_peer_endpoint(struct wg_peer *peer, + const struct endpoint *endpoint); +void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer, + const struct sk_buff *skb); +void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer); + +#if defined(CONFIG_DYNAMIC_DEBUG) || defined(DEBUG) +#define net_dbg_skb_ratelimited(fmt, dev, skb, ...) do { \ + struct endpoint __endpoint; \ + wg_socket_endpoint_from_skb(&__endpoint, skb); \ + net_dbg_ratelimited(fmt, dev, &__endpoint.addr, \ + ##__VA_ARGS__); \ + } while (0) +#else +#define net_dbg_skb_ratelimited(fmt, skb, ...) +#endif + +#endif /* _WG_SOCKET_H */ diff --git a/net/wireguard/tests/debug.mk b/net/wireguard/tests/debug.mk new file mode 100644 index 000000000000..de2a8666383b --- /dev/null +++ b/net/wireguard/tests/debug.mk @@ -0,0 +1,73 @@ +REMOTE_HOST1 ?= root@172.16.48.128 +REMOTE_HOST2 ?= root@172.16.48.129 +REMOTE_HOST3 ?= root@172.16.48.130 +PEER1 := [Peer]\nPublicKey=UQGBaem0U6JjIObMQzunZ2Euv8MMYcUUdWKJV87WDE8=\nAllowedIPs=192.168.2.1/32,abcd::1/128\nEndpoint=$(subst root@,,$(REMOTE_HOST1)):12912\n +PEER2 := [Peer]\nPublicKey=tNXrD6GCvHRNgoZ/D/BmTbTbzoVGZh0R2V6rzY6hwl4=\nAllowedIPs=192.168.2.2/32,abcd::2/128\nEndpoint=$(subst root@,,$(REMOTE_HOST2)):21281\n +PEER3 := [Peer]\nPublicKey=gLvFUb1FTyoACC/yZNqGLKnNkt+w30JEvfFChDVuewo=\nAllowedIPs=192.168.2.3/32,abcd::3/128\nEndpoint=$(subst root@,,$(REMOTE_HOST3)):54812\n +SSH_OPTS := -q -o ControlMaster=auto -o ControlPath=.ssh-deployment.sock +SSH_OPTS1 := $(SSH_OPTS)-1 +SSH_OPTS2 := $(SSH_OPTS)-2 +SSH_OPTS3 := $(SSH_OPTS)-3 +RSYNC_OPTS := --exclude="*.o" --exclude="*.mod.c" --exclude="tests/qemu" --exclude "*.ko" --exclude="*.cmd" -avP + +MAYBE_DEBUG := "debug" +ifeq ($(D),0) +MAYBE_DEBUG := +endif + +insert: debug + -modinfo -F depends wireguard.ko | tr ',' '\n' | sudo xargs -n 1 modprobe + -sudo rmmod wireguard + -sudo insmod wireguard.ko + +test: insert + sudo PATH="$$PATH:/usr/sbin:/sbin:/usr/bin:/bin:/usr/local/sbin:/usr/local/bin" ./tests/netns.sh + +test-qemu: + $(MAKE) -C tests/qemu + +remote-test: + ssh $(SSH_OPTS1) -Nf $(REMOTE_HOST1) + rsync --rsh="ssh $(SSH_OPTS1)" $(RSYNC_OPTS) . $(REMOTE_HOST1):wireguard-build/ + ssh $(SSH_OPTS1) $(REMOTE_HOST1) 'make -C wireguard-build test -j$$(nproc)' + ssh $(SSH_OPTS1) -O exit $(REMOTE_HOST1) + +remote-run-1: + ssh $(SSH_OPTS1) -Nf $(REMOTE_HOST1) + rsync --rsh="ssh $(SSH_OPTS1)" $(RSYNC_OPTS) . $(REMOTE_HOST1):wireguard-build/ + ssh $(SSH_OPTS1) $(REMOTE_HOST1) 'ip l d wg0; rmmod wireguard; cd wireguard-build && make -j$$(nproc) $(MAYBE_DEBUG) && make install' + ssh $(SSH_OPTS1) $(REMOTE_HOST1) 'ip l a wg0 type wireguard' + printf '[Interface]\nListenPort=12912\nPrivateKey=4IoHwlfTyKb9Z9W1YPmBmZvSiU6qcs0oa4xnjAEm/3U=\n$(PEER2)$(PEER3)' | ssh $(SSH_OPTS1) $(REMOTE_HOST1) 'cat > config.conf' + ssh $(SSH_OPTS1) $(REMOTE_HOST1) 'wg setconf wg0 config.conf' + ssh $(SSH_OPTS1) $(REMOTE_HOST1) 'ip l set up dev wg0' + ssh $(SSH_OPTS1) $(REMOTE_HOST1) 'ip a a 192.168.2.1/24 dev wg0' + ssh $(SSH_OPTS1) $(REMOTE_HOST1) 'ip a a abcd::1/120 dev wg0' + ssh $(SSH_OPTS1) -O exit $(REMOTE_HOST1) + + +remote-run-2: + ssh $(SSH_OPTS2) -Nf $(REMOTE_HOST2) + rsync --rsh="ssh $(SSH_OPTS2)" $(RSYNC_OPTS) . $(REMOTE_HOST2):wireguard-build/ + ssh $(SSH_OPTS2) $(REMOTE_HOST2) 'ip l d wg0; rmmod wireguard; cd wireguard-build && make -j$$(nproc) $(MAYBE_DEBUG) && make install' + ssh $(SSH_OPTS2) $(REMOTE_HOST2) 'ip l a wg0 type wireguard' + printf '[Interface]\nListenPort=21281\nPrivateKey=kEKL+m4h5xTn2cYKU6NTEv32kuXHAkuqrjdT9VtsnX8=\n$(PEER1)$(PEER3)' | ssh $(SSH_OPTS2) $(REMOTE_HOST2) 'cat > config.conf' + ssh $(SSH_OPTS2) $(REMOTE_HOST2) 'wg setconf wg0 config.conf' + ssh $(SSH_OPTS2) $(REMOTE_HOST2) 'ip l set up dev wg0' + ssh $(SSH_OPTS2) $(REMOTE_HOST2) 'ip a a 192.168.2.2/24 dev wg0' + ssh $(SSH_OPTS2) $(REMOTE_HOST2) 'ip a a abcd::2/120 dev wg0' + ssh $(SSH_OPTS2) -O exit $(REMOTE_HOST2) + +remote-run-3: + ssh $(SSH_OPTS3) -Nf $(REMOTE_HOST3) + rsync --rsh="ssh $(SSH_OPTS3)" $(RSYNC_OPTS) . $(REMOTE_HOST3):wireguard-build/ + ssh $(SSH_OPTS3) $(REMOTE_HOST3) 'ip l d wg0; rmmod wireguard; cd wireguard-build && make -j$$(nproc) $(MAYBE_DEBUG) && make install' + ssh $(SSH_OPTS3) $(REMOTE_HOST3) 'ip l a wg0 type wireguard' + printf '[Interface]\nListenPort=54812\nPrivateKey=qFunvj5kgENrtWn754hNBLrk5mMA+8+evVtnI2YqWkk=\n$(PEER1)$(PEER2)' | ssh $(SSH_OPTS3) $(REMOTE_HOST3) 'cat > config.conf' + ssh $(SSH_OPTS3) $(REMOTE_HOST3) 'wg setconf wg0 config.conf' + ssh $(SSH_OPTS3) $(REMOTE_HOST3) 'ip l set up dev wg0' + ssh $(SSH_OPTS3) $(REMOTE_HOST3) 'ip a a 192.168.2.3/24 dev wg0' + ssh $(SSH_OPTS3) $(REMOTE_HOST3) 'ip a a abcd::3/120 dev wg0' + ssh $(SSH_OPTS3) -O exit $(REMOTE_HOST3) + +remote-run: + $(MAKE) -j3 remote-run-1 remote-run-2 remote-run-3 diff --git a/net/wireguard/tests/netns.sh b/net/wireguard/tests/netns.sh new file mode 100644 index 000000000000..f21d59924a44 --- /dev/null +++ b/net/wireguard/tests/netns.sh @@ -0,0 +1,682 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. +# +# This script tests the below topology: +# +# ┌─────────────────────┐ ┌──────────────────────────────────┐ ┌─────────────────────┐ +# │ $ns1 namespace │ │ $ns0 namespace │ │ $ns2 namespace │ +# │ │ │ │ │ │ +# │┌────────┐ │ │ ┌────────┐ │ │ ┌────────┐│ +# ││ wg0 │───────────┌───┌────────────│ lo │────────────┌───┌───────────│ wg0 ││ +# │├────────┮──────────┐│ │ ┌───────┮────────┮────────┐ │ │┌──────────┎─────────│ +# ││192.168.241.1/24 ││ │ │(ns1) (ns2) │ │ ││192.168.241.2/24 ││ +# ││fd00::1/24 ││ │ │127.0.0.1:1 127.0.0.1:2│ │ ││fd00::2/24 ││ +# │└───────────────────┘│ │ │[::]:1 [::]:2 │ │ │└───────────────────┘│ +# └─────────────────────┘ │ └─────────────────────────┘ │ └─────────────────────┘ +# └──────────────────────────────────┘ +# +# After the topology is prepared we run a series of TCP/UDP iperf3 tests between the +# wireguard peers in $ns1 and $ns2. Note that $ns0 is the endpoint for the wg0 +# interfaces in $ns1 and $ns2. See https://www.wireguard.com/netns/ for further +# details on how this is accomplished. +set -e +shopt -s extglob + +exec 3>&1 +export LANG=C +export WG_HIDE_KEYS=never +NPROC=( /sys/devices/system/cpu/cpu+([0-9]) ); NPROC=${#NPROC[@]} +netns0="wg-test-$$-0" +netns1="wg-test-$$-1" +netns2="wg-test-$$-2" +pretty() { echo -e "\x1b[32m\x1b[1m[+] ${1:+NS$1: }${2}\x1b[0m" >&3; } +compatwarn() { echo -e "\x1b[31m\x1b[1m[-] Tolerated failure: $1\x1b[0m" >&3; } +pp() { pretty "" "$*"; "$@"; } +maybe_exec() { if [[ $BASHPID -eq $$ ]]; then "$@"; else exec "$@"; fi; } +n0() { pretty 0 "$*"; maybe_exec ip netns exec $netns0 "$@"; } +n1() { pretty 1 "$*"; maybe_exec ip netns exec $netns1 "$@"; } +n2() { pretty 2 "$*"; maybe_exec ip netns exec $netns2 "$@"; } +ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; } +ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; } +ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; } +sleep() { read -t "$1" -N 1 || true; } +waitiperf() { pretty "${1//*-}" "wait for iperf:${3:-5201} pid $2"; while [[ $(ss -N "$1" -tlpH "sport = ${3:-5201}") != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; } +waitncatudp() { pretty "${1//*-}" "wait for udp:1111 pid $2"; while [[ $(ss -N "$1" -ulpH 'sport = 1111') != *\"ncat\",pid=$2,fd=* ]]; do sleep 0.1; done; } +waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; } + +cleanup() { + set +e + exec 2>/dev/null + printf "$orig_message_cost" > /proc/sys/net/core/message_cost + ip0 link del dev wg0 + ip0 link del dev wg1 + ip1 link del dev wg0 + ip1 link del dev wg1 + ip2 link del dev wg0 + ip2 link del dev wg1 + local to_kill="$(ip netns pids $netns0) $(ip netns pids $netns1) $(ip netns pids $netns2)" + [[ -n $to_kill ]] && kill $to_kill + pp ip netns del $netns1 + pp ip netns del $netns2 + pp ip netns del $netns0 + exit +} + +orig_message_cost="$(< /proc/sys/net/core/message_cost)" +trap cleanup EXIT +printf 0 > /proc/sys/net/core/message_cost + +ip netns del $netns0 2>/dev/null || true +ip netns del $netns1 2>/dev/null || true +ip netns del $netns2 2>/dev/null || true +pp ip netns add $netns0 +pp ip netns add $netns1 +pp ip netns add $netns2 +ip0 link set up dev lo + +ip0 link add dev wg0 type wireguard +ip0 link set wg0 netns $netns1 +ip0 link add dev wg0 type wireguard +ip0 link set wg0 netns $netns2 +key1="$(pp wg genkey)" +key2="$(pp wg genkey)" +key3="$(pp wg genkey)" +key4="$(pp wg genkey)" +pub1="$(pp wg pubkey <<<"$key1")" +pub2="$(pp wg pubkey <<<"$key2")" +pub3="$(pp wg pubkey <<<"$key3")" +pub4="$(pp wg pubkey <<<"$key4")" +psk="$(pp wg genpsk)" +[[ -n $key1 && -n $key2 && -n $psk ]] + +configure_peers() { + ip1 addr add 192.168.241.1/24 dev wg0 + ip1 addr add fd00::1/112 dev wg0 + + ip2 addr add 192.168.241.2/24 dev wg0 + ip2 addr add fd00::2/112 dev wg0 + + n1 wg set wg0 \ + private-key <(echo "$key1") \ + listen-port 1 \ + peer "$pub2" \ + preshared-key <(echo "$psk") \ + allowed-ips 192.168.241.2/32,fd00::2/128 + n2 wg set wg0 \ + private-key <(echo "$key2") \ + listen-port 2 \ + peer "$pub1" \ + preshared-key <(echo "$psk") \ + allowed-ips 192.168.241.1/32,fd00::1/128 + + ip1 link set up dev wg0 + ip2 link set up dev wg0 +} +configure_peers + +tests() { + # Ping over IPv4 + n2 ping -c 10 -f -W 1 192.168.241.1 + n1 ping -c 10 -f -W 1 192.168.241.2 + + # Ping over IPv6 + n2 ping6 -c 10 -f -W 1 fd00::1 + n1 ping6 -c 10 -f -W 1 fd00::2 + + # TCP over IPv4 + n2 iperf3 -s -1 -B 192.168.241.2 & + waitiperf $netns2 $! + n1 iperf3 -Z -t 3 -c 192.168.241.2 + + # TCP over IPv6 + n1 iperf3 -s -1 -B fd00::1 & + waitiperf $netns1 $! + n2 iperf3 -Z -t 3 -c fd00::1 + + # UDP over IPv4 + n1 iperf3 -s -1 -B 192.168.241.1 & + waitiperf $netns1 $! + n2 iperf3 -Z -t 3 -b 0 -u -c 192.168.241.1 + + # UDP over IPv6 + n2 iperf3 -s -1 -B fd00::2 & + waitiperf $netns2 $! + n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2 + + # Old TCP stack bugs make the below tests problematic + [[ $(< /proc/version) =~ ^Linux\ version\ 5\.4[.\ ] ]] || return 0 + + # TCP over IPv4, in parallel + local pids=( ) i + for ((i=0; i < NPROC; ++i)) do + n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 & + pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i )) + done + for ((i=0; i < NPROC; ++i)) do + n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 & + done + wait "${pids[@]}" +} + +[[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}" +big_mtu=$(( 34816 - 1500 + $orig_mtu )) + +# Test using IPv4 as outer transport +n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2 +n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1 +# Before calling tests, we first make sure that the stats counters and timestamper are working +n2 ping -c 10 -f -W 1 192.168.241.1 +{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip2 -stats link show dev wg0) +(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) )) +{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip1 -stats link show dev wg0) +(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) )) +read _ rx_bytes tx_bytes < <(n2 wg show wg0 transfer) +(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) )) +read _ rx_bytes tx_bytes < <(n1 wg show wg0 transfer) +(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) )) +read _ timestamp < <(n1 wg show wg0 latest-handshakes) +(( timestamp != 0 )) + +tests +ip1 link set wg0 mtu $big_mtu +ip2 link set wg0 mtu $big_mtu +tests + +ip1 link set wg0 mtu $orig_mtu +ip2 link set wg0 mtu $orig_mtu + +# Test using IPv6 as outer transport +n1 wg set wg0 peer "$pub2" endpoint [::1]:2 +n2 wg set wg0 peer "$pub1" endpoint [::1]:1 +tests +ip1 link set wg0 mtu $big_mtu +ip2 link set wg0 mtu $big_mtu +tests + +# Test that route MTUs work with the padding +ip1 link set wg0 mtu 1300 +ip2 link set wg0 mtu 1300 +n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2 +n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1 +n0 iptables -A INPUT -m length --length 1360 -j DROP +n1 ip route add 192.168.241.2/32 dev wg0 mtu 1299 +n2 ip route add 192.168.241.1/32 dev wg0 mtu 1299 +n2 ping -c 1 -W 1 -s 1269 192.168.241.1 +n2 ip route delete 192.168.241.1/32 dev wg0 mtu 1299 +n1 ip route delete 192.168.241.2/32 dev wg0 mtu 1299 +n0 iptables -F INPUT + +ip1 link set wg0 mtu $orig_mtu +ip2 link set wg0 mtu $orig_mtu + +# Test using IPv4 that roaming works +ip0 -4 addr del 127.0.0.1/8 dev lo +ip0 -4 addr add 127.212.121.99/8 dev lo +n1 wg set wg0 listen-port 9999 +n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2 +n1 ping6 -W 1 -c 1 fd00::2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 127.212.121.99:9999" ]] + +# Test using IPv6 that roaming works +n1 wg set wg0 listen-port 9998 +n1 wg set wg0 peer "$pub2" endpoint [::1]:2 +n1 ping -W 1 -c 1 192.168.241.2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 [::1]:9998" ]] + +# Test that crypto-RP filter works +n1 wg set wg0 peer "$pub2" allowed-ips 192.168.241.0/24 +exec 4< <(n1 ncat -l -u -p 1111) +ncat_pid=$! +waitncatudp $netns1 $ncat_pid +n2 ncat -u 192.168.241.1 1111 <<<"X" +read -r -N 1 -t 1 out <&4 && [[ $out == "X" ]] +kill $ncat_pid +more_specific_key="$(pp wg genkey | pp wg pubkey)" +n1 wg set wg0 peer "$more_specific_key" allowed-ips 192.168.241.2/32 +n2 wg set wg0 listen-port 9997 +exec 4< <(n1 ncat -l -u -p 1111) +ncat_pid=$! +waitncatudp $netns1 $ncat_pid +n2 ncat -u 192.168.241.1 1111 <<<"X" +! read -r -N 1 -t 1 out <&4 || false +kill $ncat_pid +n1 wg set wg0 peer "$more_specific_key" remove +[[ $(n1 wg show wg0 endpoints) == "$pub2 [::1]:9997" ]] + +# Test that we can change private keys keys and immediately handshake +n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") allowed-ips 192.168.241.2/32 endpoint 127.0.0.1:2 +n2 wg set wg0 private-key <(echo "$key2") listen-port 2 peer "$pub1" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 +n1 ping -W 1 -c 1 192.168.241.2 +n1 wg set wg0 private-key <(echo "$key3") +n2 wg set wg0 peer "$pub3" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 peer "$pub1" remove +n1 ping -W 1 -c 1 192.168.241.2 +n2 wg set wg0 peer "$pub3" remove + +# Test that we can route wg through wg +ip1 addr flush dev wg0 +ip2 addr flush dev wg0 +ip1 addr add fd00::5:1/112 dev wg0 +ip2 addr add fd00::5:2/112 dev wg0 +n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") allowed-ips fd00::5:2/128 endpoint 127.0.0.1:2 +n2 wg set wg0 private-key <(echo "$key2") listen-port 2 peer "$pub1" preshared-key <(echo "$psk") allowed-ips fd00::5:1/128 endpoint 127.212.121.99:9998 +ip1 link add wg1 type wireguard +ip2 link add wg1 type wireguard +ip1 addr add 192.168.241.1/24 dev wg1 +ip1 addr add fd00::1/112 dev wg1 +ip2 addr add 192.168.241.2/24 dev wg1 +ip2 addr add fd00::2/112 dev wg1 +ip1 link set mtu 1340 up dev wg1 +ip2 link set mtu 1340 up dev wg1 +n1 wg set wg1 listen-port 5 private-key <(echo "$key3") peer "$pub4" allowed-ips 192.168.241.2/32,fd00::2/128 endpoint [fd00::5:2]:5 +n2 wg set wg1 listen-port 5 private-key <(echo "$key4") peer "$pub3" allowed-ips 192.168.241.1/32,fd00::1/128 endpoint [fd00::5:1]:5 +tests +# Try to set up a routing loop between the two namespaces +ip1 link set netns $netns0 dev wg1 +ip0 addr add 192.168.241.1/24 dev wg1 +ip0 link set up dev wg1 +n0 ping -W 1 -c 1 192.168.241.2 +n1 wg set wg0 peer "$pub2" endpoint 192.168.241.2:7 +ip2 link del wg0 +ip2 link del wg1 +read _ _ tx_bytes_before < <(n0 wg show wg1 transfer) +! n0 ping -W 1 -c 10 -f 192.168.241.2 || false +sleep 1 +read _ _ tx_bytes_after < <(n0 wg show wg1 transfer) +if ! (( tx_bytes_after - tx_bytes_before < 70000 )); then + errstart=$'\x1b[37m\x1b[41m\x1b[1m' + errend=$'\x1b[0m' + echo "${errstart} ${errend}" + echo "${errstart} E R R O R ${errend}" + echo "${errstart} ${errend}" + echo "${errstart} This architecture does not do the right thing ${errend}" + echo "${errstart} with cross-namespace routing loops. This test ${errend}" + echo "${errstart} has thus technically failed but, as this issue ${errend}" + echo "${errstart} is as yet unsolved, these tests will continue ${errend}" + echo "${errstart} onward. :( ${errend}" + echo "${errstart} ${errend}" +fi + +ip0 link del wg1 +ip1 link del wg0 + +# Test using NAT. We now change the topology to this: +# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────────────┐ ┌────────────────────────────────────────┐ +# │ $ns1 namespace │ │ $ns0 namespace │ │ $ns2 namespace │ +# │ │ │ │ │ │ +# │ ┌─────┐ ┌─────┐ │ │ ┌──────┐ ┌──────┐ │ │ ┌─────┐ ┌─────┐ │ +# │ │ wg0 │─────────────│vethc│───────────┌────┌────│vethrc│ │vethrs│──────────────┌─────┌──│veths│────────────│ wg0 │ │ +# │ ├─────┮──────────┐ ├─────┮──────────┐│ │ ├──────┮─────────┐ ├──────┮────────────┐ │ │ ├─────┮──────────┐ ├─────┮──────────┐ │ +# │ │192.168.241.1/24│ │192.168.1.100/24││ │ │192.168.1.1/24 │ │10.0.0.1/24 │ │ │ │10.0.0.100/24 │ │192.168.241.2/24│ │ +# │ │fd00::1/24 │ │ ││ │ │ │ │SNAT:192.168.1.0/24│ │ │ │ │ │fd00::2/24 │ │ +# │ └────────────────┘ └────────────────┘│ │ └────────────────┘ └───────────────────┘ │ │ └────────────────┘ └────────────────┘ │ +# └────────────────────────────────────────┘ └────────────────────────────────────────────────┘ └────────────────────────────────────────┘ + +ip1 link add dev wg0 type wireguard +ip2 link add dev wg0 type wireguard +configure_peers + +ip0 link add vethrc type veth peer name vethc +ip0 link add vethrs type veth peer name veths +ip0 link set vethc netns $netns1 +ip0 link set veths netns $netns2 +ip0 link set vethrc up +ip0 link set vethrs up +ip0 addr add 192.168.1.1/24 dev vethrc +ip0 addr add 10.0.0.1/24 dev vethrs +ip1 addr add 192.168.1.100/24 dev vethc +ip1 link set vethc up +ip1 route add default via 192.168.1.1 +ip2 addr add 10.0.0.100/24 dev veths +ip2 link set veths up +waitiface $netns0 vethrc +waitiface $netns0 vethrs +waitiface $netns1 vethc +waitiface $netns2 veths + +n0 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward' +n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout' +n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream' +n0 iptables -t nat -A POSTROUTING -s 192.168.1.0/24 -d 10.0.0.0/24 -j SNAT --to 10.0.0.1 + +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.100:2 persistent-keepalive 1 +n1 ping -W 1 -c 1 192.168.241.2 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] +# Demonstrate n2 can still send packets to n1, since persistent-keepalive will prevent connection tracking entry from expiring (to see entries: `n0 conntrack -L`). +pp sleep 3 +n2 ping -W 1 -c 1 192.168.241.1 +n1 wg set wg0 peer "$pub2" persistent-keepalive 0 + +# Test that sk_bound_dev_if works +n1 ping -I wg0 -c 1 -W 1 192.168.241.2 +# What about when the mark changes and the packet must be rerouted? +n1 iptables -t mangle -I OUTPUT -j MARK --set-xmark 1 +n1 ping -c 1 -W 1 192.168.241.2 # First the boring case +n1 ping -I wg0 -c 1 -W 1 192.168.241.2 || compatwarn "sk_bound_dev_if test failed, but fix not backportable in compat layer" # Then the sk_bound_dev_if case +n1 iptables -t mangle -D OUTPUT -j MARK --set-xmark 1 + +# Test that onion routing works, even when it loops +n1 wg set wg0 peer "$pub3" allowed-ips 192.168.242.2/32 endpoint 192.168.241.2:5 +ip1 addr add 192.168.242.1/24 dev wg0 +ip2 link add wg1 type wireguard +ip2 addr add 192.168.242.2/24 dev wg1 +n2 wg set wg1 private-key <(echo "$key3") listen-port 5 peer "$pub1" allowed-ips 192.168.242.1/32 +ip2 link set wg1 up +n1 ping -W 1 -c 1 192.168.242.2 +ip2 link del wg1 +n1 wg set wg0 peer "$pub3" endpoint 192.168.242.2:5 +! n1 ping -W 1 -c 1 192.168.242.2 || false # Should not crash kernel +n1 wg set wg0 peer "$pub3" remove +ip1 addr del 192.168.242.1/24 dev wg0 + +# Do a wg-quick(8)-style policy routing for the default route, making sure vethc has a v6 address to tease out bugs. +ip1 -6 addr add fc00::9/96 dev vethc +ip1 -6 route add default via fc00::1 +ip2 -4 addr add 192.168.99.7/32 dev wg0 +ip2 -6 addr add abab::1111/128 dev wg0 +n1 wg set wg0 fwmark 51820 peer "$pub2" allowed-ips 192.168.99.7,abab::1111 +ip1 -6 route add default dev wg0 table 51820 +ip1 -6 rule add not fwmark 51820 table 51820 +ip1 -6 rule add table main suppress_prefixlength 0 +ip1 -4 route add default dev wg0 table 51820 +ip1 -4 rule add not fwmark 51820 table 51820 +ip1 -4 rule add table main suppress_prefixlength 0 +n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/vethc/rp_filter' +# suppress_prefixlength only got added in 3.12, and we want to support 3.10+. +if [[ $(ip1 -4 rule show all) == *suppress_prefixlength* ]]; then + # Flood the pings instead of sending just one, to trigger routing table reference counting bugs. + n1 ping -W 1 -c 100 -f 192.168.99.7 + # ca7a03c got ported to 5.2 when it shouldn't have. + [[ $(< /proc/version) =~ ^Linux\ version\ 5\.2[.\ ] ]] || n1 ping -W 1 -c 100 -f abab::1111 +fi + +# Have ns2 NAT into wg0 packets from ns0, but return an icmp error along the right route. +n2 iptables -t nat -A POSTROUTING -s 10.0.0.0/24 -d 192.168.241.0/24 -j SNAT --to 192.168.241.2 +n0 iptables -t filter -A INPUT \! -s 10.0.0.0/24 -i vethrs -j DROP # Manual rpfilter just to be explicit. +n2 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward' +ip0 -4 route add 192.168.241.1 via 10.0.0.100 +n2 wg set wg0 peer "$pub1" remove +[[ $(! n0 ping -W 1 -c 1 192.168.241.1 || false) == *"From 10.0.0.100 icmp_seq=1 Destination Host Unreachable"* ]] + +n0 iptables -t nat -F +n0 iptables -t filter -F +n2 iptables -t nat -F +ip0 link del vethrc +ip0 link del vethrs +ip1 link del wg0 +ip2 link del wg0 + +# Test that saddr routing is sticky but not too sticky, changing to this topology: +# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────┐ +# │ $ns1 namespace │ │ $ns2 namespace │ +# │ │ │ │ +# │ ┌─────┐ ┌─────┐ │ │ ┌─────┐ ┌─────┐ │ +# │ │ wg0 │─────────────│veth1│───────────┌────┌──│veth2│────────────│ wg0 │ │ +# │ ├─────┮──────────┐ ├─────┮──────────┐│ │ ├─────┮──────────┐ ├─────┮──────────┐ │ +# │ │192.168.241.1/24│ │10.0.0.1/24 ││ │ │10.0.0.2/24 │ │192.168.241.2/24│ │ +# │ │fd00::1/24 │ │fd00:aa::1/96 ││ │ │fd00:aa::2/96 │ │fd00::2/24 │ │ +# │ └────────────────┘ └────────────────┘│ │ └────────────────┘ └────────────────┘ │ +# └────────────────────────────────────────┘ └────────────────────────────────────────┘ + +ip1 link add dev wg0 type wireguard +ip2 link add dev wg0 type wireguard +configure_peers +ip1 link add veth1 type veth peer name veth2 +ip1 link set veth2 netns $netns2 +n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad' +n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad' +n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth1/accept_dad' +n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth2/accept_dad' +n1 bash -c 'printf 1 > /proc/sys/net/ipv4/conf/veth1/promote_secondaries' + +# First we check that we aren't overly sticky and can fall over to new IPs when old ones are removed +ip1 addr add 10.0.0.1/24 dev veth1 +ip1 addr add fd00:aa::1/96 dev veth1 +ip2 addr add 10.0.0.2/24 dev veth2 +ip2 addr add fd00:aa::2/96 dev veth2 +ip1 link set veth1 up +ip2 link set veth2 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2 +n1 ping -W 1 -c 1 192.168.241.2 +ip1 addr add 10.0.0.10/24 dev veth1 +ip1 addr del 10.0.0.1/24 dev veth1 +n1 ping -W 1 -c 1 192.168.241.2 +n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2 +n1 ping -W 1 -c 1 192.168.241.2 +ip1 addr add fd00:aa::10/96 dev veth1 +ip1 addr del fd00:aa::1/96 dev veth1 +n1 ping -W 1 -c 1 192.168.241.2 + +# Now we show that we can successfully do reply to sender routing +ip1 link set veth1 down +ip2 link set veth2 down +ip1 addr flush dev veth1 +ip2 addr flush dev veth2 +ip1 addr add 10.0.0.1/24 dev veth1 +ip1 addr add 10.0.0.2/24 dev veth1 +ip1 addr add fd00:aa::1/96 dev veth1 +ip1 addr add fd00:aa::2/96 dev veth1 +ip2 addr add 10.0.0.3/24 dev veth2 +ip2 addr add fd00:aa::3/96 dev veth2 +ip1 link set veth1 up +ip2 link set veth2 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +n2 wg set wg0 peer "$pub1" endpoint 10.0.0.1:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] +n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::1]:1" ]] +n2 wg set wg0 peer "$pub1" endpoint 10.0.0.2:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.2:1" ]] +n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::2]:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::2]:1" ]] + +# What happens if the inbound destination address belongs to a different interface as the default route? +ip1 link add dummy0 type dummy +ip1 addr add 10.50.0.1/24 dev dummy0 +ip1 link set dummy0 up +ip2 route add 10.50.0.0/24 dev veth2 +n2 wg set wg0 peer "$pub1" endpoint 10.50.0.1:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.50.0.1:1" ]] + +ip1 link del dummy0 +ip1 addr flush dev veth1 +ip2 addr flush dev veth2 +ip1 route flush dev veth1 +ip2 route flush dev veth2 + +# Now we see what happens if another interface route takes precedence over an ongoing one +ip1 link add veth3 type veth peer name veth4 +ip1 link set veth4 netns $netns2 +ip1 addr add 10.0.0.1/24 dev veth1 +ip2 addr add 10.0.0.2/24 dev veth2 +ip1 addr add 10.0.0.3/24 dev veth3 +ip1 link set veth1 up +ip2 link set veth2 up +ip1 link set veth3 up +ip2 link set veth4 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +waitiface $netns1 veth3 +waitiface $netns2 veth4 +ip1 route flush dev veth1 +ip1 route flush dev veth3 +ip1 route add 10.0.0.0/24 dev veth1 src 10.0.0.1 metric 2 +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2 +n1 ping -W 1 -c 1 192.168.241.2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] +ip1 route add 10.0.0.0/24 dev veth3 src 10.0.0.3 metric 1 +n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth1/rp_filter' +n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth4/rp_filter' +n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter' +n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter' +n1 ping -W 1 -c 1 192.168.241.2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.3:1" ]] + +ip1 link del veth1 +ip1 link del veth3 +ip1 link del wg0 +ip2 link del wg0 + +# We test that Netlink/IPC is working properly by doing things that usually cause split responses +ip0 link add dev wg0 type wireguard +config=( "[Interface]" "PrivateKey=$(wg genkey)" "[Peer]" "PublicKey=$(wg genkey)" ) +for a in {1..255}; do + for b in {0..255}; do + config+=( "AllowedIPs=$a.$b.0.0/16,$a::$b/128" ) + done +done +n0 wg setconf wg0 <(printf '%s\n' "${config[@]}") +i=0 +for ip in $(n0 wg show wg0 allowed-ips); do + ((++i)) +done +((i == 255*256*2+1)) +ip0 link del wg0 +ip0 link add dev wg0 type wireguard +config=( "[Interface]" "PrivateKey=$(wg genkey)" ) +for a in {1..40}; do + config+=( "[Peer]" "PublicKey=$(wg genkey)" ) + for b in {1..52}; do + config+=( "AllowedIPs=$a.$b.0.0/16" ) + done +done +n0 wg setconf wg0 <(printf '%s\n' "${config[@]}") +i=0 +while read -r line; do + j=0 + for ip in $line; do + ((++j)) + done + ((j == 53)) + ((++i)) +done < <(n0 wg show wg0 allowed-ips) +((i == 40)) +ip0 link del wg0 +ip0 link add wg0 type wireguard +config=( ) +for i in {1..29}; do + config+=( "[Peer]" "PublicKey=$(wg genkey)" ) +done +config+=( "[Peer]" "PublicKey=$(wg genkey)" "AllowedIPs=255.2.3.4/32,abcd::255/128" ) +n0 wg setconf wg0 <(printf '%s\n' "${config[@]}") +n0 wg showconf wg0 > /dev/null +ip0 link del wg0 + +allowedips=( ) +for i in {1..197}; do + allowedips+=( abcd::$i ) +done +saved_ifs="$IFS" +IFS=, +allowedips="${allowedips[*]}" +IFS="$saved_ifs" +ip0 link add wg0 type wireguard +n0 wg set wg0 peer "$pub1" +n0 wg set wg0 peer "$pub2" allowed-ips "$allowedips" +{ + read -r pub allowedips + [[ $pub == "$pub1" && $allowedips == "(none)" ]] + read -r pub allowedips + [[ $pub == "$pub2" ]] + i=0 + for _ in $allowedips; do + ((++i)) + done + ((i == 197)) +} < <(n0 wg show wg0 allowed-ips) +ip0 link del wg0 + +! n0 wg show doesnotexist || false + +ip0 link add wg0 type wireguard +n0 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") +[[ $(n0 wg show wg0 private-key) == "$key1" ]] +[[ $(n0 wg show wg0 preshared-keys) == "$pub2 $psk" ]] +n0 wg set wg0 private-key /dev/null peer "$pub2" preshared-key /dev/null +[[ $(n0 wg show wg0 private-key) == "(none)" ]] +[[ $(n0 wg show wg0 preshared-keys) == "$pub2 (none)" ]] +n0 wg set wg0 peer "$pub2" +n0 wg set wg0 private-key <(echo "$key2") +[[ $(n0 wg show wg0 public-key) == "$pub2" ]] +[[ -z $(n0 wg show wg0 peers) ]] +n0 wg set wg0 peer "$pub2" +[[ -z $(n0 wg show wg0 peers) ]] +n0 wg set wg0 private-key <(echo "$key1") +n0 wg set wg0 peer "$pub2" +[[ $(n0 wg show wg0 peers) == "$pub2" ]] +n0 wg set wg0 private-key <(echo "/${key1:1}") +[[ $(n0 wg show wg0 private-key) == "+${key1:1}" ]] +n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0,10.0.0.0/8,100.0.0.0/10,172.16.0.0/12,192.168.0.0/16 +n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0 +n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75 +n0 wg set wg0 peer "$pub2" allowed-ips ::/0 +n0 wg set wg0 peer "$pub2" remove +for low_order_point in AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38=; do + n0 wg set wg0 peer "$low_order_point" persistent-keepalive 1 endpoint 127.0.0.1:1111 +done +[[ -n $(n0 wg show wg0 peers) ]] +exec 4< <(n0 ncat -l -u -p 1111) +ncat_pid=$! +waitncatudp $netns0 $ncat_pid +ip0 link set wg0 up +! read -r -n 1 -t 2 <&4 || false +kill $ncat_pid +ip0 link del wg0 + +# Ensure that dst_cache references don't outlive netns lifetime +ip1 link add dev wg0 type wireguard +ip2 link add dev wg0 type wireguard +configure_peers +ip1 link add veth1 type veth peer name veth2 +ip1 link set veth2 netns $netns2 +ip1 addr add fd00:aa::1/64 dev veth1 +ip2 addr add fd00:aa::2/64 dev veth2 +ip1 link set veth1 up +ip2 link set veth2 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +ip1 -6 route add default dev veth1 via fd00:aa::2 +ip2 -6 route add default dev veth2 via fd00:aa::1 +n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2 +n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1 +n1 ping6 -c 1 fd00::2 +pp ip netns delete $netns1 +pp ip netns delete $netns2 +pp ip netns add $netns1 +pp ip netns add $netns2 + +# Ensure there aren't circular reference loops +ip1 link add wg1 type wireguard +ip2 link add wg2 type wireguard +ip1 link set wg1 netns $netns2 +ip2 link set wg2 netns $netns1 +pp ip netns delete $netns1 +pp ip netns delete $netns2 +pp ip netns add $netns1 +pp ip netns add $netns2 + +sleep 2 # Wait for cleanup and grace periods +declare -A objects +while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do + [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ ?[0-9]*)\ .*(created|destroyed).* ]] || continue + objects["${BASH_REMATCH[1]}"]+="${BASH_REMATCH[2]}" +done < /dev/kmsg +alldeleted=1 +for object in "${!objects[@]}"; do + if [[ ${objects["$object"]} != *createddestroyed && ${objects["$object"]} != *createdcreateddestroyeddestroyed ]]; then + echo "Error: $object: merely ${objects["$object"]}" >&3 + alldeleted=0 + fi +done +[[ $alldeleted -eq 1 ]] +pretty "" "Objects that were created were also destroyed." diff --git a/net/wireguard/tests/qemu/Makefile b/net/wireguard/tests/qemu/Makefile new file mode 100644 index 000000000000..ed44e88441dc --- /dev/null +++ b/net/wireguard/tests/qemu/Makefile @@ -0,0 +1,431 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + +PWD := $(shell pwd) + +CHOST := $(shell gcc -dumpmachine) +HOST_ARCH := $(firstword $(subst -, ,$(CHOST))) +ifneq (,$(ARCH)) +CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(filter-out android,$(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc))))))) +endif +ifeq (,$(CBUILD)) +CBUILD := $(CHOST) +endif +ARCH := $(firstword $(subst -, ,$(CBUILD))) + +# Set these from the environment to override +KERNEL_VERSION ?= 5.4.200 +KERNEL_VERSION := $(KERNEL_VERSION)$(if $(DEBUG_KERNEL),$(if $(findstring -debug,$(KERNEL_VERSION)),,-debug),) +BUILD_PATH ?= $(PWD)/../../../qemu-build/$(ARCH) +DISTFILES_PATH ?= $(PWD)/distfiles +NR_CPUS ?= 4 + +MIRROR := https://download.wireguard.com/qemu-test/distfiles/ + +rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) +WIREGUARD_SOURCES := ../../Kbuild ../../Kconfig $(filter-out ../../tests/%,$(call rwildcard,../../,*.c *.h *.S *.pl *.include)) + +default: qemu + +# variable name, tarball project name, version, tarball extension, default URI base +define tar_download = +$(1)_VERSION := $(3) +$(1)_NAME := $(2)-$$($(1)_VERSION) +$(1)_TAR := $(DISTFILES_PATH)/$$($(1)_NAME:-debug=)$(4) +$(1)_PATH := $(BUILD_PATH)/$$($(1)_NAME) +$(call file_download,$$($(1)_NAME:-debug=)$(4),$(5)) +endef + +define file_download = +$(DISTFILES_PATH)/$(1): + mkdir -p $(DISTFILES_PATH) + flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@ $(MIRROR)$(1) || wget -t inf --retry-on-http-error=404 -O $$@ $(2)$(1) || rm -f $$@' +endef + +ifneq ($(findstring https://,$(KERNEL_VERSION)),) +KERNEL_URL := $(word 1,$(KERNEL_VERSION)) +KERNEL_NAME := $(word 2,$(KERNEL_VERSION)) +KERNEL_TAR := $(DISTFILES_PATH)/linux-$(KERNEL_NAME)-$(notdir $(KERNEL_URL)) +KERNEL_PATH := $(BUILD_PATH)/linux-$(KERNEL_NAME) +KERNEL_VERSION := $(KERNEL_NAME) +$(KERNEL_TAR): + mkdir -p $(DISTFILES_PATH) + flock -x $@.lock -c '[ -f $@ ] && exit 0; wget -O $@ $(KERNEL_URL) || rm -f $@' +else +ifeq ($(findstring -rc,$(KERNEL_VERSION)),) +KERNEL_URL_DIRECTORY := https://cdn.kernel.org/pub/linux/kernel/v$(firstword $(subst ., ,$(KERNEL_VERSION:-debug=))).x/ +else +KERNEL_URL_DIRECTORY := https://git.kernel.org/torvalds/t/ +endif +$(eval $(call tar_download,KERNEL,linux,$(KERNEL_VERSION),.tar.gz,$(KERNEL_URL_DIRECTORY))) +endif +$(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/)) +$(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/)) +$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/)) +$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/)) +$(eval $(call tar_download,IPROUTE2,iproute2,5.6.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/)) +$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/)) +$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/)) +$(eval $(call tar_download,IPUTILS,iputils,s20161105,.tar.gz,https://github.com/iputils/iputils/archive/s20161105.tar.gz/#)) +$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20191226,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/)) + +export CFLAGS ?= -O3 -pipe +export LDFLAGS ?= +export CPPFLAGS := -I$(BUILD_PATH)/include + +ifeq ($(HOST_ARCH),$(ARCH)) +CROSS_COMPILE_FLAG := --host=$(CHOST) +CFLAGS += -march=native +OMIT_AVX512 := $(shell $(CBUILD)-gcc -march=native -Q --help=target | sed -n 's/.*\-m\(avx512[^ ]*\).*\[enabled\].*/-mno-\1/p') +CFLAGS += $(OMIT_AVX512) +STRIP := strip +else +$(info Cross compilation: building for $(CBUILD) using $(CHOST)) +CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST) +export CROSS_COMPILE=$(CBUILD)- +STRIP := $(CBUILD)-strip +endif +QEMU_VPORT_RESULT := +ifeq ($(ARCH),aarch64) +QEMU_ARCH := aarch64 +QEMU_VPORT_RESULT := virtio-serial-device +KERNEL_ARCH := arm64 +KERNEL_BZIMAGE := $(KERNEL_PATH)/arch/arm64/boot/Image +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm +else +QEMU_MACHINE := -cpu cortex-a53 -machine virt +CFLAGS += -march=armv8-a -mtune=cortex-a53 +endif +else ifeq ($(ARCH),aarch64_be) +QEMU_ARCH := aarch64 +QEMU_VPORT_RESULT := virtio-serial-device +KERNEL_ARCH := arm64 +KERNEL_BZIMAGE := $(KERNEL_PATH)/arch/arm64/boot/Image +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm +else +QEMU_MACHINE := -cpu cortex-a53 -machine virt +CFLAGS += -march=armv8-a -mtune=cortex-a53 +endif +else ifeq ($(ARCH),arm) +QEMU_ARCH := arm +QEMU_VPORT_RESULT := virtio-serial-device +KERNEL_ARCH := arm +KERNEL_BZIMAGE := $(KERNEL_PATH)/arch/arm/boot/zImage +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm +else +QEMU_MACHINE := -cpu cortex-a15 -machine virt +CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux +endif +else ifeq ($(ARCH),armeb) +QEMU_ARCH := arm +QEMU_VPORT_RESULT := virtio-serial-device +KERNEL_ARCH := arm +KERNEL_BZIMAGE := $(KERNEL_PATH)/arch/arm/boot/zImage +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm +else +QEMU_MACHINE := -cpu cortex-a15 -machine virt +CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due to a compiler bug on big endian. +LDFLAGS += -Wl,--be8 +endif +else ifeq ($(ARCH),x86_64) +QEMU_ARCH := x86_64 +KERNEL_ARCH := x86_64 +KERNEL_BZIMAGE := $(KERNEL_PATH)/arch/x86/boot/bzImage +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine q35,accel=kvm +else +QEMU_MACHINE := -cpu Skylake-Server -machine q35 +CFLAGS += -march=skylake-avx512 +endif +else ifeq ($(ARCH),i686) +QEMU_ARCH := i386 +KERNEL_ARCH := x86 +KERNEL_BZIMAGE := $(KERNEL_PATH)/arch/x86/boot/bzImage +ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH)) +QEMU_MACHINE := -cpu host -machine q35,accel=kvm +else +QEMU_MACHINE := -cpu coreduo -machine q35 +CFLAGS += -march=prescott +endif +else ifeq ($(ARCH),mips64) +QEMU_ARCH := mips64 +KERNEL_ARCH := mips +KERNEL_BZIMAGE := $(KERNEL_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine malta,accel=kvm +CFLAGS += -EB +else +QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1 +CFLAGS += -march=mips64r2 -EB +endif +else ifeq ($(ARCH),mips64el) +QEMU_ARCH := mips64el +KERNEL_ARCH := mips +KERNEL_BZIMAGE := $(KERNEL_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine malta,accel=kvm +CFLAGS += -EL +else +QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1 +CFLAGS += -march=mips64r2 -EL +endif +else ifeq ($(ARCH),mips) +QEMU_ARCH := mips +KERNEL_ARCH := mips +KERNEL_BZIMAGE := $(KERNEL_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine malta,accel=kvm +CFLAGS += -EB +else +QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1 +CFLAGS += -march=mips32r2 -EB +endif +else ifeq ($(ARCH),mipsel) +QEMU_ARCH := mipsel +KERNEL_ARCH := mips +KERNEL_BZIMAGE := $(KERNEL_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine malta,accel=kvm +CFLAGS += -EL +else +QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1 +CFLAGS += -march=mips32r2 -EL +endif +else ifeq ($(ARCH),powerpc64le) +QEMU_ARCH := ppc64 +KERNEL_ARCH := powerpc +KERNEL_BZIMAGE := $(KERNEL_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host,accel=kvm -machine pseries +else +QEMU_MACHINE := -machine pseries +endif +CFLAGS += -mcpu=powerpc64le -mlong-double-64 +else ifeq ($(ARCH),powerpc) +QEMU_ARCH := ppc +KERNEL_ARCH := powerpc +KERNEL_BZIMAGE := $(KERNEL_PATH)/arch/powerpc/boot/uImage +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500 +else +QEMU_MACHINE := -machine ppce500 +endif +CFLAGS += -mcpu=powerpc -mlong-double-64 -msecure-plt +else ifeq ($(ARCH),m68k) +QEMU_ARCH := m68k +KERNEL_ARCH := m68k +KERNEL_BZIMAGE := $(KERNEL_PATH)/vmlinux +KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config) +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -append $(KERNEL_CMDLINE) +else +QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE) +endif +else +$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k) +endif + +REAL_CC := $(CBUILD)-gcc +MUSL_CC := $(BUILD_PATH)/musl-gcc +export CC := $(MUSL_CC) +USERSPACE_DEPS := $(MUSL_CC) $(BUILD_PATH)/include/.installed $(BUILD_PATH)/include/linux/.installed + +comma := , +build: $(KERNEL_BZIMAGE) +qemu: $(KERNEL_BZIMAGE) + rm -f $(BUILD_PATH)/result + timeout --foreground 20m qemu-system-$(QEMU_ARCH) \ + -nodefaults \ + -nographic \ + -smp $(NR_CPUS) \ + $(QEMU_MACHINE) \ + -m $$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_PATH)/.config && echo 1G || echo 256M) \ + -serial stdio \ + -chardev file,path=$(BUILD_PATH)/result,id=result \ + $(if $(QEMU_VPORT_RESULT),-device $(QEMU_VPORT_RESULT) -device virtserialport$(comma)chardev=result,-serial chardev:result) \ + -no-reboot \ + -monitor none \ + -kernel $< + grep -Fq success $(BUILD_PATH)/result + +$(BUILD_PATH)/init-cpio-spec.txt: + mkdir -p $(BUILD_PATH) + echo "file /init $(BUILD_PATH)/init 755 0 0" > $@ + echo "file /init.sh $(PWD)/../netns.sh 755 0 0" >> $@ + echo "dir /dev 755 0 0" >> $@ + echo "nod /dev/console 644 0 0 c 5 1" >> $@ + echo "dir /bin 755 0 0" >> $@ + echo "file /bin/iperf3 $(IPERF_PATH)/src/iperf3 755 0 0" >> $@ + echo "file /bin/wg $(WIREGUARD_TOOLS_PATH)/src/wg 755 0 0" >> $@ + echo "file /bin/bash $(BASH_PATH)/bash 755 0 0" >> $@ + echo "file /bin/ip $(IPROUTE2_PATH)/ip/ip 755 0 0" >> $@ + echo "file /bin/ss $(IPROUTE2_PATH)/misc/ss 755 0 0" >> $@ + echo "file /bin/ping $(IPUTILS_PATH)/ping 755 0 0" >> $@ + echo "file /bin/ncat $(NMAP_PATH)/ncat/ncat 755 0 0" >> $@ + echo "file /bin/xtables-legacy-multi $(IPTABLES_PATH)/iptables/xtables-legacy-multi 755 0 0" >> $@ + echo "slink /bin/iptables xtables-legacy-multi 777 0 0" >> $@ + echo "slink /bin/ping6 ping 777 0 0" >> $@ + echo "dir /lib 755 0 0" >> $@ + echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@ + echo "slink /lib/ld-linux.so.1 libc.so 777 0 0" >> $@ + +ifeq ($(findstring -git,$(KERNEL_VERSION)),) +$(KERNEL_PATH)/.installed: $(KERNEL_TAR) + mkdir -p $(KERNEL_PATH) + flock -s $<.lock tar --strip-components=1 -C $(KERNEL_PATH) -xf $< + find $(KERNEL_PATH) -type f -name Makefile -exec sed -i 's/-Werror\(\s\|$$\)//g' {} + + sed -i "/printf '\\\\#/d" $(KERNEL_PATH)/tools/build/Build.include || true + sed -i '/jmp early_idt_handler$$/a .fill early_idt_handlers + i*9 - ., 1, 0xcc' $(KERNEL_PATH)/arch/x86/kernel/head_{64,32}.S || true + printf 'ifdef CONFIG_X86_64\nLDFLAGS += $$(call ld-option, -z max-page-size=0x200000)\nendif\n' >> $(KERNEL_PATH)/arch/x86/Makefile + sed -i 's/^Elf_Addr per_cpu_load_addr;$$/static \0/' $(KERNEL_PATH)/arch/x86/tools/relocs.c || true + if grep -sqr UTS_UBUNTU_RELEASE_ABI $(KERNEL_PATH)/debian/rules.d; then echo 'KBUILD_CFLAGS += -DUTS_UBUNTU_RELEASE_ABI=0' >> $(KERNEL_PATH)/Makefile; fi + sed -i "/^if INET\$$/a source \"net/wireguard/Kconfig\"" $(KERNEL_PATH)/net/Kconfig + sed -i "/^obj-\$$(CONFIG_NETFILTER).*+=/a obj-\$$(CONFIG_WIREGUARD) += wireguard/" $(KERNEL_PATH)/net/Makefile + ln -sfT $(shell readlink -f ../..) $(KERNEL_PATH)/net/wireguard + touch $@ +else +$(KERNEL_PATH)/.installed: + mkdir -p $(dir $(KERNEL_PATH)) + flock -s $(dir $(KERNEL_TAR))/$(KERNEL_VERSION).lock git clone $(GIT_URI_$(patsubst %-git,%,$(patsubst %-debug,%,$(KERNEL_VERSION)))) $(KERNEL_PATH) + touch $@ +always-pull: $(KERNEL_PATH)/.installed + flock -s $(dir $(KERNEL_TAR))/$(KERNEL_VERSION).lock git -C $(KERNEL_PATH) fetch + flock -s $(dir $(KERNEL_TAR))/$(KERNEL_VERSION).lock git -C $(KERNEL_PATH) reset --hard FETCH_HEAD +.PHONY: always-pull +$(KERNEL_BZIMAGE): always-pull +endif + +$(KERNEL_PATH)/.config: kernel.config arch/$(ARCH).config | $(KERNEL_PATH)/.installed + cp kernel.config $(KERNEL_PATH)/minimal.config + printf 'CONFIG_NR_CPUS=$(NR_CPUS)\nCONFIG_INITRAMFS_SOURCE="$(BUILD_PATH)/init-cpio-spec.txt"\n' >> $(KERNEL_PATH)/minimal.config + cat arch/$(ARCH).config >> $(KERNEL_PATH)/minimal.config + $(MAKE) -C $(KERNEL_PATH) ARCH=$(KERNEL_ARCH) allnoconfig + cd $(KERNEL_PATH) && ARCH=$(KERNEL_ARCH) scripts/kconfig/merge_config.sh -n .config minimal.config + $(if $(findstring -debug,$(KERNEL_VERSION)),cd $(KERNEL_PATH) && sed -i 's/^EXTRAVERSION =.*/EXTRAVERSION = -debug/' Makefile && ARCH=$(KERNEL_ARCH) scripts/kconfig/merge_config.sh -n .config $(PWD)/debug.config,) + +$(KERNEL_BZIMAGE): $(KERNEL_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES) + LOCALVERSION="" $(MAKE) -C $(KERNEL_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(CBUILD)-gcc -fno-PIE" + +$(BUILD_PATH)/include/linux/.installed: | $(KERNEL_PATH)/.config + LOCALVERSION="" $(MAKE) -C $(KERNEL_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install + touch $@ + +$(MUSL_PATH)/lib/libc.so: $(MUSL_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + cd $(MUSL_PATH) && CC=$(REAL_CC) ./configure --prefix=/ --disable-static --build=$(CBUILD) + $(MAKE) -C $(MUSL_PATH) + $(STRIP) -s $@ + +$(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so + $(MAKE) -C $(MUSL_PATH) DESTDIR=$(BUILD_PATH) install-headers + touch $@ + +$(MUSL_CC): $(MUSL_PATH)/lib/libc.so + sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs + printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" "$$@"\n' > $(BUILD_PATH)/musl-gcc + chmod +x $(BUILD_PATH)/musl-gcc + +$(IPERF_PATH)/.installed: $(IPERF_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + sed -i '1s/^/#include /' $(IPERF_PATH)/src/cjson.h $(IPERF_PATH)/src/timer.h + sed -i -r 's/-p?g//g' $(IPERF_PATH)/src/Makefile* + touch $@ + +$(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS) + cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --with-openssl=no + $(MAKE) -C $(IPERF_PATH) + $(STRIP) -s $@ + +$(LIBMNL_PATH)/.installed: $(LIBMNL_TAR) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(LIBMNL_PATH)/src/.libs/libmnl.a: | $(LIBMNL_PATH)/.installed $(USERSPACE_DEPS) + cd $(LIBMNL_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared + $(MAKE) -C $(LIBMNL_PATH) + sed -i 's:prefix=.*:prefix=$(LIBMNL_PATH):' $(LIBMNL_PATH)/libmnl.pc + +$(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) + LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg + $(STRIP) -s $@ + +$(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS) + mkdir -p $(BUILD_PATH) + $(MUSL_CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $< + $(STRIP) -s $@ + +$(IPUTILS_PATH)/.installed: $(IPUTILS_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(IPUTILS_PATH)/ping: | $(IPUTILS_PATH)/.installed $(USERSPACE_DEPS) + $(MAKE) -C $(IPUTILS_PATH) USE_CAP=no USE_IDN=no USE_NETTLE=no USE_CRYPTO=no CFLAGS="-fcommon $(CFLAGS)" ping + $(STRIP) -s $@ + +$(BASH_PATH)/.installed: $(BASH_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS) + cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-multibyte --disable-progcomp --disable-readline --disable-mem-scramble + $(MAKE) -C $(BASH_PATH) + $(STRIP) -s $@ + +$(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=y\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_LIBMNL -I$(LIBMNL_PATH)/include\nLDLIBS+=-lmnl' > $(IPROUTE2_PATH)/config.mk + printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile + touch $@ + +$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) + LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip + $(STRIP) -s $(IPROUTE2_PATH)/ip/ip + +$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) + LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss + $(STRIP) -s $(IPROUTE2_PATH)/misc/ss + +$(IPTABLES_PATH)/.installed: $(IPTABLES_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure + touch $@ + +$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) + cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include + $(MAKE) -C $(IPTABLES_PATH) + $(STRIP) -s $@ + +$(NMAP_PATH)/.installed: $(NMAP_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(NMAP_PATH)/ncat/ncat: | $(NMAP_PATH)/.installed $(USERSPACE_DEPS) + cd $(NMAP_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --without-ndiff --without-zenmap --without-nping --with-libpcap=included --with-libpcre=included --with-libdnet=included --without-liblua --with-liblinear=included --without-nmap-update --without-openssl --with-pcap=linux --without-libssh + $(MAKE) -C $(NMAP_PATH)/libpcap + $(MAKE) -C $(NMAP_PATH)/ncat + $(STRIP) -s $@ + +clean: + rm -rf $(BUILD_PATH) + +distclean: clean + rm -rf $(DISTFILES_PATH) + +.PHONY: qemu build clean distclean +.DELETE_ON_ERROR: diff --git a/net/wireguard/tests/qemu/arch/aarch64.config b/net/wireguard/tests/qemu/arch/aarch64.config new file mode 100644 index 000000000000..09016880ce03 --- /dev/null +++ b/net/wireguard/tests/qemu/arch/aarch64.config @@ -0,0 +1,8 @@ +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1" +CONFIG_FRAME_WARN=1280 diff --git a/net/wireguard/tests/qemu/arch/aarch64_be.config b/net/wireguard/tests/qemu/arch/aarch64_be.config new file mode 100644 index 000000000000..19ff66e4c602 --- /dev/null +++ b/net/wireguard/tests/qemu/arch/aarch64_be.config @@ -0,0 +1,9 @@ +CONFIG_CPU_BIG_ENDIAN=y +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1" +CONFIG_FRAME_WARN=1280 diff --git a/net/wireguard/tests/qemu/arch/arm.config b/net/wireguard/tests/qemu/arch/arm.config new file mode 100644 index 000000000000..fc7959bef9c2 --- /dev/null +++ b/net/wireguard/tests/qemu/arch/arm.config @@ -0,0 +1,12 @@ +CONFIG_MMU=y +CONFIG_ARCH_MULTI_V7=y +CONFIG_ARCH_VIRT=y +CONFIG_THUMB2_KERNEL=n +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1" +CONFIG_FRAME_WARN=1024 diff --git a/net/wireguard/tests/qemu/arch/armeb.config b/net/wireguard/tests/qemu/arch/armeb.config new file mode 100644 index 000000000000..f3066be81c19 --- /dev/null +++ b/net/wireguard/tests/qemu/arch/armeb.config @@ -0,0 +1,13 @@ +CONFIG_MMU=y +CONFIG_ARCH_MULTI_V7=y +CONFIG_ARCH_VIRT=y +CONFIG_THUMB2_KERNEL=n +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1" +CONFIG_CPU_BIG_ENDIAN=y +CONFIG_FRAME_WARN=1024 diff --git a/net/wireguard/tests/qemu/arch/i686.config b/net/wireguard/tests/qemu/arch/i686.config new file mode 100644 index 000000000000..6d90892a85a2 --- /dev/null +++ b/net/wireguard/tests/qemu/arch/i686.config @@ -0,0 +1,6 @@ +CONFIG_ACPI=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" +CONFIG_FRAME_WARN=1024 diff --git a/net/wireguard/tests/qemu/arch/m68k.config b/net/wireguard/tests/qemu/arch/m68k.config new file mode 100644 index 000000000000..82c925e49beb --- /dev/null +++ b/net/wireguard/tests/qemu/arch/m68k.config @@ -0,0 +1,9 @@ +CONFIG_MMU=y +CONFIG_M68KCLASSIC=y +CONFIG_M68040=y +CONFIG_MAC=y +CONFIG_SERIAL_PMACZILOG=y +CONFIG_SERIAL_PMACZILOG_TTYS=y +CONFIG_SERIAL_PMACZILOG_CONSOLE=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" +CONFIG_FRAME_WARN=1024 diff --git a/net/wireguard/tests/qemu/arch/mips.config b/net/wireguard/tests/qemu/arch/mips.config new file mode 100644 index 000000000000..d7ec63c17b30 --- /dev/null +++ b/net/wireguard/tests/qemu/arch/mips.config @@ -0,0 +1,11 @@ +CONFIG_CPU_MIPS32_R2=y +CONFIG_MIPS_MALTA=y +CONFIG_MIPS_CPS=y +CONFIG_MIPS_FP_SUPPORT=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" +CONFIG_FRAME_WARN=1024 diff --git a/net/wireguard/tests/qemu/arch/mips64.config b/net/wireguard/tests/qemu/arch/mips64.config new file mode 100644 index 000000000000..0994947e3392 --- /dev/null +++ b/net/wireguard/tests/qemu/arch/mips64.config @@ -0,0 +1,14 @@ +CONFIG_64BIT=y +CONFIG_CPU_MIPS64_R2=y +CONFIG_MIPS32_N32=y +CONFIG_CPU_HAS_MSA=y +CONFIG_MIPS_MALTA=y +CONFIG_MIPS_CPS=y +CONFIG_MIPS_FP_SUPPORT=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" +CONFIG_FRAME_WARN=1280 diff --git a/net/wireguard/tests/qemu/arch/mips64el.config b/net/wireguard/tests/qemu/arch/mips64el.config new file mode 100644 index 000000000000..591184342f47 --- /dev/null +++ b/net/wireguard/tests/qemu/arch/mips64el.config @@ -0,0 +1,15 @@ +CONFIG_64BIT=y +CONFIG_CPU_MIPS64_R2=y +CONFIG_MIPS32_N32=y +CONFIG_CPU_HAS_MSA=y +CONFIG_MIPS_MALTA=y +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_MIPS_CPS=y +CONFIG_MIPS_FP_SUPPORT=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" +CONFIG_FRAME_WARN=1280 diff --git a/net/wireguard/tests/qemu/arch/mipsel.config b/net/wireguard/tests/qemu/arch/mipsel.config new file mode 100644 index 000000000000..18a498293737 --- /dev/null +++ b/net/wireguard/tests/qemu/arch/mipsel.config @@ -0,0 +1,12 @@ +CONFIG_CPU_MIPS32_R2=y +CONFIG_MIPS_MALTA=y +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_MIPS_CPS=y +CONFIG_MIPS_FP_SUPPORT=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" +CONFIG_FRAME_WARN=1024 diff --git a/net/wireguard/tests/qemu/arch/powerpc.config b/net/wireguard/tests/qemu/arch/powerpc.config new file mode 100644 index 000000000000..5e04882e8e35 --- /dev/null +++ b/net/wireguard/tests/qemu/arch/powerpc.config @@ -0,0 +1,10 @@ +CONFIG_PPC_QEMU_E500=y +CONFIG_FSL_SOC_BOOKE=y +CONFIG_PPC_85xx=y +CONFIG_PHYS_64BIT=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_MATH_EMULATION=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" +CONFIG_FRAME_WARN=1024 diff --git a/net/wireguard/tests/qemu/arch/powerpc64le.config b/net/wireguard/tests/qemu/arch/powerpc64le.config new file mode 100644 index 000000000000..8148b9d1220a --- /dev/null +++ b/net/wireguard/tests/qemu/arch/powerpc64le.config @@ -0,0 +1,13 @@ +CONFIG_PPC64=y +CONFIG_PPC_PSERIES=y +CONFIG_ALTIVEC=y +CONFIG_VSX=y +CONFIG_PPC_OF_BOOT_TRAMPOLINE=y +CONFIG_PPC_RADIX_MMU=y +CONFIG_HVC_CONSOLE=y +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1" +CONFIG_SECTION_MISMATCH_WARN_ONLY=y +CONFIG_FRAME_WARN=1280 +CONFIG_THREAD_SHIFT=14 diff --git a/net/wireguard/tests/qemu/arch/x86_64.config b/net/wireguard/tests/qemu/arch/x86_64.config new file mode 100644 index 000000000000..efa00693e08b --- /dev/null +++ b/net/wireguard/tests/qemu/arch/x86_64.config @@ -0,0 +1,6 @@ +CONFIG_ACPI=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" +CONFIG_FRAME_WARN=1280 diff --git a/net/wireguard/tests/qemu/debug.config b/net/wireguard/tests/qemu/debug.config new file mode 100644 index 000000000000..5e7fa101c752 --- /dev/null +++ b/net/wireguard/tests/qemu/debug.config @@ -0,0 +1,66 @@ +CONFIG_ENABLE_WARN_DEPRECATED=y +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_POINTER=y +CONFIG_STACK_VALIDATION=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_PAGE_EXTENSION=y +CONFIG_PAGE_POISONING=y +CONFIG_DEBUG_OBJECTS=y +CONFIG_DEBUG_OBJECTS_FREE=y +CONFIG_DEBUG_OBJECTS_TIMERS=y +CONFIG_DEBUG_OBJECTS_WORK=y +CONFIG_DEBUG_OBJECTS_RCU_HEAD=y +CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y +CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1 +CONFIG_SLUB_DEBUG_ON=y +CONFIG_DEBUG_VM=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_HAVE_DEBUG_STACKOVERFLOW=y +CONFIG_DEBUG_STACKOVERFLOW=y +CONFIG_HAVE_ARCH_KMEMCHECK=y +CONFIG_HAVE_ARCH_KASAN=y +CONFIG_KASAN=y +CONFIG_KASAN_INLINE=y +CONFIG_UBSAN=y +CONFIG_UBSAN_SANITIZE_ALL=y +CONFIG_UBSAN_NO_ALIGNMENT=y +CONFIG_UBSAN_NULL=y +CONFIG_DEBUG_KMEMLEAK=y +CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192 +CONFIG_DEBUG_STACK_USAGE=y +CONFIG_DEBUG_SHIRQ=y +CONFIG_WQ_WATCHDOG=y +CONFIG_SCHED_DEBUG=y +CONFIG_SCHED_INFO=y +CONFIG_SCHEDSTATS=y +CONFIG_SCHED_STACK_END_CHECK=y +CONFIG_DEBUG_TIMEKEEPING=y +CONFIG_TIMER_STATS=y +CONFIG_DEBUG_PREEMPT=y +CONFIG_DEBUG_RT_MUTEXES=y +CONFIG_DEBUG_SPINLOCK=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=y +CONFIG_LOCKDEP=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_TRACE_IRQFLAGS=y +CONFIG_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_PI_LIST=y +CONFIG_PROVE_RCU=y +CONFIG_SPARSE_RCU_POINTER=y +CONFIG_RCU_CPU_STALL_TIMEOUT=21 +CONFIG_RCU_TRACE=y +CONFIG_RCU_EQS_DEBUG=y +CONFIG_USER_STACKTRACE_SUPPORT=y +CONFIG_DEBUG_SG=y +CONFIG_DEBUG_NOTIFIERS=y +CONFIG_DOUBLEFAULT=y +CONFIG_X86_DEBUG_FPU=y +CONFIG_DEBUG_SECTION_MISMATCH=y +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT=y +CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y diff --git a/net/wireguard/tests/qemu/init.c b/net/wireguard/tests/qemu/init.c new file mode 100644 index 000000000000..6be8064cf9c9 --- /dev/null +++ b/net/wireguard/tests/qemu/init.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +__attribute__((noreturn)) static void poweroff(void) +{ + fflush(stdout); + fflush(stderr); + reboot(RB_AUTOBOOT); + sleep(30); + fprintf(stderr, "\x1b[37m\x1b[41m\x1b[1mFailed to power off!!!\x1b[0m\n"); + exit(1); +} + +static void panic(const char *what) +{ + fprintf(stderr, "\n\n\x1b[37m\x1b[41m\x1b[1mSOMETHING WENT HORRIBLY WRONG\x1b[0m\n\n \x1b[31m\x1b[1m%s: %s\x1b[0m\n\n\x1b[37m\x1b[44m\x1b[1mPower off...\x1b[0m\n\n", what, strerror(errno)); + poweroff(); +} + +#define pretty_message(msg) puts("\x1b[32m\x1b[1m" msg "\x1b[0m") + +static void print_banner(void) +{ + struct utsname utsname; + int len; + + if (uname(&utsname) < 0) + panic("uname"); + + len = strlen(" WireGuard Test Suite on ") + strlen(utsname.sysname) + strlen(utsname.release) + strlen(utsname.machine); + printf("\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\x1b[45m\x1b[33m\x1b[1m WireGuard Test Suite on %s %s %s \x1b[0m\n\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\n", len, "", utsname.sysname, utsname.release, utsname.machine, len, ""); +} + +static void seed_rng(void) +{ + int bits = 4096, fd; + + pretty_message("[+] Fake seeding RNG..."); + fd = open("/dev/random", O_WRONLY); + if (fd < 0) + panic("open(random)"); + for (;;) { + if (!getrandom(NULL, 0, GRND_NONBLOCK) || errno == ENOSYS) + break; + if (ioctl(fd, RNDADDTOENTCNT, &bits) < 0) + panic("ioctl(RNDADDTOENTCNT)"); + } + close(fd); +} + +static void mount_filesystems(void) +{ + pretty_message("[+] Mounting filesystems..."); + mkdir("/dev", 0755); + mkdir("/proc", 0755); + mkdir("/sys", 0755); + mkdir("/tmp", 0755); + mkdir("/run", 0755); + mkdir("/var", 0755); + if (mount("none", "/dev", "devtmpfs", 0, NULL)) + panic("devtmpfs mount"); + if (mount("none", "/proc", "proc", 0, NULL)) + panic("procfs mount"); + if (mount("none", "/sys", "sysfs", 0, NULL)) + panic("sysfs mount"); + if (mount("none", "/tmp", "tmpfs", 0, NULL)) + panic("tmpfs mount"); + if (mount("none", "/run", "tmpfs", 0, NULL)) + panic("tmpfs mount"); + if (mount("none", "/sys/kernel/debug", "debugfs", 0, NULL)) + ; /* Not a problem if it fails.*/ + if (symlink("/run", "/var/run")) + panic("run symlink"); + if (symlink("/proc/self/fd", "/dev/fd")) + panic("fd symlink"); +} + +static void enable_logging(void) +{ + int fd; + pretty_message("[+] Enabling logging..."); + fd = open("/proc/sys/kernel/printk", O_WRONLY); + if (fd >= 0) { + if (write(fd, "9\n", 2) != 2) + panic("write(printk)"); + close(fd); + } + fd = open("/proc/sys/debug/exception-trace", O_WRONLY); + if (fd >= 0) { + if (write(fd, "1\n", 2) != 2) + panic("write(exception-trace)"); + close(fd); + } +} + +static void kmod_selftests(void) +{ + FILE *file; + char line[2048], *start, *pass; + bool success = true; + pretty_message("[+] Module self-tests:"); + file = fopen("/proc/kmsg", "r"); + if (!file) + panic("fopen(kmsg)"); + if (fcntl(fileno(file), F_SETFL, O_NONBLOCK) < 0) + panic("fcntl(kmsg, nonblock)"); + while (fgets(line, sizeof(line), file)) { + start = strstr(line, "wireguard: "); + if (!start) + continue; + start += 11; + *strchrnul(start, '\n') = '\0'; + if (strstr(start, "www.wireguard.com")) + break; + pass = strstr(start, ": pass"); + if (!pass || pass[6] != '\0') { + success = false; + printf(" \x1b[31m* %s\x1b[0m\n", start); + } else + printf(" \x1b[32m* %s\x1b[0m\n", start); + } + fclose(file); + if (!success) { + puts("\x1b[31m\x1b[1m[-] Tests failed! \u2639\x1b[0m"); + poweroff(); + } +} + +static void launch_tests(void) +{ + char cmdline[4096], *success_dev; + int status, fd; + pid_t pid; + + pretty_message("[+] Launching tests..."); + pid = fork(); + if (pid == -1) + panic("fork"); + else if (pid == 0) { + execl("/init.sh", "init", NULL); + panic("exec"); + } + if (waitpid(pid, &status, 0) < 0) + panic("waitpid"); + if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { + pretty_message("[+] Tests successful! :-)"); + fd = open("/proc/cmdline", O_RDONLY); + if (fd < 0) + panic("open(/proc/cmdline)"); + if (read(fd, cmdline, sizeof(cmdline) - 1) <= 0) + panic("read(/proc/cmdline)"); + cmdline[sizeof(cmdline) - 1] = '\0'; + for (success_dev = strtok(cmdline, " \n"); success_dev; success_dev = strtok(NULL, " \n")) { + if (strncmp(success_dev, "wg.success=", 11)) + continue; + memcpy(success_dev + 11 - 5, "/dev/", 5); + success_dev += 11 - 5; + break; + } + if (!success_dev || !strlen(success_dev)) + panic("Unable to find success device"); + + fd = open(success_dev, O_WRONLY); + if (fd < 0) + panic("open(success_dev)"); + if (write(fd, "success\n", 8) != 8) + panic("write(success_dev)"); + close(fd); + } else { + const char *why = "unknown cause"; + int what = -1; + + if (WIFEXITED(status)) { + why = "exit code"; + what = WEXITSTATUS(status); + } else if (WIFSIGNALED(status)) { + why = "signal"; + what = WTERMSIG(status); + } + printf("\x1b[31m\x1b[1m[-] Tests failed with %s %d! \u2639\x1b[0m\n", why, what); + } +} + +static void ensure_console(void) +{ + for (unsigned int i = 0; i < 1000; ++i) { + int fd = open("/dev/console", O_RDWR); + if (fd < 0) { + usleep(50000); + continue; + } + dup2(fd, 0); + dup2(fd, 1); + dup2(fd, 2); + close(fd); + if (write(1, "\0\0\0\0\n", 5) == 5) + return; + } + panic("Unable to open console device"); +} + +static void clear_leaks(void) +{ + int fd; + + fd = open("/sys/kernel/debug/kmemleak", O_WRONLY); + if (fd < 0) + return; + pretty_message("[+] Starting memory leak detection..."); + write(fd, "clear\n", 5); + close(fd); +} + +static void check_leaks(void) +{ + int fd; + + fd = open("/sys/kernel/debug/kmemleak", O_WRONLY); + if (fd < 0) + return; + pretty_message("[+] Scanning for memory leaks..."); + sleep(2); /* Wait for any grace periods. */ + write(fd, "scan\n", 5); + close(fd); + + fd = open("/sys/kernel/debug/kmemleak", O_RDONLY); + if (fd < 0) + return; + if (sendfile(1, fd, NULL, 0x7ffff000) > 0) + panic("Memory leaks encountered"); + close(fd); +} + +int main(int argc, char *argv[]) +{ + ensure_console(); + print_banner(); + mount_filesystems(); + seed_rng(); + kmod_selftests(); + enable_logging(); + clear_leaks(); + launch_tests(); + check_leaks(); + poweroff(); + return 1; +} diff --git a/net/wireguard/tests/qemu/kernel.config b/net/wireguard/tests/qemu/kernel.config new file mode 100644 index 000000000000..85267f986a44 --- /dev/null +++ b/net/wireguard/tests/qemu/kernel.config @@ -0,0 +1,90 @@ +CONFIG_NET=y +CONFIG_NETDEVICES=y +CONFIG_NET_CORE=y +CONFIG_NET_IPIP=y +CONFIG_DUMMY=y +CONFIG_VETH=y +CONFIG_MULTIUSER=y +CONFIG_NAMESPACES=y +CONFIG_NET_NS=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IPV6=y +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_NAT=y +CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XT_NAT=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MARK=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_NAT_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_NAT=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_TTY=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_SCRIPT=y +CONFIG_VDSO=y +CONFIG_VIRTUALIZATION=y +CONFIG_HYPERVISOR_GUEST=y +CONFIG_PARAVIRT=y +CONFIG_KVM_GUEST=y +CONFIG_PARAVIRT_SPINLOCKS=y +CONFIG_PRINTK=y +CONFIG_KALLSYMS=y +CONFIG_BUG=y +CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y +CONFIG_JUMP_LABEL=y +CONFIG_EMBEDDED=n +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_SHMEM=y +CONFIG_SLUB=y +CONFIG_SPARSEMEM_VMEMMAP=y +CONFIG_SMP=y +CONFIG_SCHED_SMT=y +CONFIG_SCHED_MC=y +CONFIG_NUMA=y +CONFIG_PREEMPT=y +CONFIG_NO_HZ=y +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_HZ_PERIODIC=n +CONFIG_HIGH_RES_TIMERS=y +CONFIG_COMPAT_32BIT_TIME=y +CONFIG_ARCH_RANDOM=y +CONFIG_FILE_LOCKING=y +CONFIG_POSIX_TIMERS=y +CONFIG_DEVTMPFS=y +CONFIG_PROC_FS=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15 +CONFIG_LOG_BUF_SHIFT=18 +CONFIG_PRINTK_TIME=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_LEGACY_VSYSCALL_NONE=y +CONFIG_KERNEL_GZIP=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_BUG_ON_DATA_CORRUPTION=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y +CONFIG_HARDLOCKUP_DETECTOR=y +CONFIG_WQ_WATCHDOG=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y +CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y +CONFIG_PANIC_TIMEOUT=-1 +CONFIG_STACKTRACE=y +CONFIG_EARLY_PRINTK=y +CONFIG_GDB_SCRIPTS=y +CONFIG_WIREGUARD=y +CONFIG_WIREGUARD_DEBUG=y diff --git a/net/wireguard/timers.c b/net/wireguard/timers.c new file mode 100644 index 000000000000..d54d32ac9bc4 --- /dev/null +++ b/net/wireguard/timers.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "timers.h" +#include "device.h" +#include "peer.h" +#include "queueing.h" +#include "socket.h" + +/* + * - Timer for retransmitting the handshake if we don't hear back after + * `REKEY_TIMEOUT + jitter` ms. + * + * - Timer for sending empty packet if we have received a packet but after have + * not sent one for `KEEPALIVE_TIMEOUT` ms. + * + * - Timer for initiating new handshake if we have sent a packet but after have + * not received one (even empty) for `(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) + + * jitter` ms. + * + * - Timer for zeroing out all ephemeral keys after `(REJECT_AFTER_TIME * 3)` ms + * if no new keys have been received. + * + * - Timer for, if enabled, sending an empty authenticated packet every user- + * specified seconds. + */ + +static inline void mod_peer_timer(struct wg_peer *peer, + struct timer_list *timer, + unsigned long expires) +{ + rcu_read_lock_bh(); + if (likely(netif_running(peer->device->dev) && + !READ_ONCE(peer->is_dead))) + mod_timer(timer, expires); + rcu_read_unlock_bh(); +} + +static void wg_expired_retransmit_handshake(struct timer_list *timer) +{ + struct wg_peer *peer = from_timer(peer, timer, + timer_retransmit_handshake); + + if (peer->timer_handshake_attempts > MAX_TIMER_HANDSHAKES) { + pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d attempts, giving up\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr, MAX_TIMER_HANDSHAKES + 2); + + del_timer(&peer->timer_send_keepalive); + /* We drop all packets without a keypair and don't try again, + * if we try unsuccessfully for too long to make a handshake. + */ + wg_packet_purge_staged_packets(peer); + + /* We set a timer for destroying any residue that might be left + * of a partial exchange. + */ + if (!timer_pending(&peer->timer_zero_key_material)) + mod_peer_timer(peer, &peer->timer_zero_key_material, + jiffies + REJECT_AFTER_TIME * 3 * HZ); + } else { + ++peer->timer_handshake_attempts; + pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d seconds, retrying (try %d)\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr, REKEY_TIMEOUT, + peer->timer_handshake_attempts + 1); + + /* We clear the endpoint address src address, in case this is + * the cause of trouble. + */ + wg_socket_clear_peer_endpoint_src(peer); + + wg_packet_send_queued_handshake_initiation(peer, true); + } +} + +static void wg_expired_send_keepalive(struct timer_list *timer) +{ + struct wg_peer *peer = from_timer(peer, timer, timer_send_keepalive); + + wg_packet_send_keepalive(peer); + if (peer->timer_need_another_keepalive) { + peer->timer_need_another_keepalive = false; + mod_peer_timer(peer, &peer->timer_send_keepalive, + jiffies + KEEPALIVE_TIMEOUT * HZ); + } +} + +static void wg_expired_new_handshake(struct timer_list *timer) +{ + struct wg_peer *peer = from_timer(peer, timer, timer_new_handshake); + + pr_debug("%s: Retrying handshake with peer %llu (%pISpfsc) because we stopped hearing back after %d seconds\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr, KEEPALIVE_TIMEOUT + REKEY_TIMEOUT); + /* We clear the endpoint address src address, in case this is the cause + * of trouble. + */ + wg_socket_clear_peer_endpoint_src(peer); + wg_packet_send_queued_handshake_initiation(peer, false); +} + +static void wg_expired_zero_key_material(struct timer_list *timer) +{ + struct wg_peer *peer = from_timer(peer, timer, timer_zero_key_material); + + rcu_read_lock_bh(); + if (!READ_ONCE(peer->is_dead)) { + wg_peer_get(peer); + if (!queue_work(peer->device->handshake_send_wq, + &peer->clear_peer_work)) + /* If the work was already on the queue, we want to drop + * the extra reference. + */ + wg_peer_put(peer); + } + rcu_read_unlock_bh(); +} + +static void wg_queued_expired_zero_key_material(struct work_struct *work) +{ + struct wg_peer *peer = container_of(work, struct wg_peer, + clear_peer_work); + + pr_debug("%s: Zeroing out all keys for peer %llu (%pISpfsc), since we haven't received a new one in %d seconds\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr, REJECT_AFTER_TIME * 3); + wg_noise_handshake_clear(&peer->handshake); + wg_noise_keypairs_clear(&peer->keypairs); + wg_peer_put(peer); +} + +static void wg_expired_send_persistent_keepalive(struct timer_list *timer) +{ + struct wg_peer *peer = from_timer(peer, timer, + timer_persistent_keepalive); + + if (likely(peer->persistent_keepalive_interval)) + wg_packet_send_keepalive(peer); +} + +/* Should be called after an authenticated data packet is sent. */ +void wg_timers_data_sent(struct wg_peer *peer) +{ + if (!timer_pending(&peer->timer_new_handshake)) + mod_peer_timer(peer, &peer->timer_new_handshake, + jiffies + (KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) * HZ + + prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX_JIFFIES)); +} + +/* Should be called after an authenticated data packet is received. */ +void wg_timers_data_received(struct wg_peer *peer) +{ + if (likely(netif_running(peer->device->dev))) { + if (!timer_pending(&peer->timer_send_keepalive)) + mod_peer_timer(peer, &peer->timer_send_keepalive, + jiffies + KEEPALIVE_TIMEOUT * HZ); + else + peer->timer_need_another_keepalive = true; + } +} + +/* Should be called after any type of authenticated packet is sent, whether + * keepalive, data, or handshake. + */ +void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer) +{ + del_timer(&peer->timer_send_keepalive); +} + +/* Should be called after any type of authenticated packet is received, whether + * keepalive, data, or handshake. + */ +void wg_timers_any_authenticated_packet_received(struct wg_peer *peer) +{ + del_timer(&peer->timer_new_handshake); +} + +/* Should be called after a handshake initiation message is sent. */ +void wg_timers_handshake_initiated(struct wg_peer *peer) +{ + mod_peer_timer(peer, &peer->timer_retransmit_handshake, + jiffies + REKEY_TIMEOUT * HZ + + prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX_JIFFIES)); +} + +/* Should be called after a handshake response message is received and processed + * or when getting key confirmation via the first data message. + */ +void wg_timers_handshake_complete(struct wg_peer *peer) +{ + del_timer(&peer->timer_retransmit_handshake); + peer->timer_handshake_attempts = 0; + peer->sent_lastminute_handshake = false; + ktime_get_real_ts64(&peer->walltime_last_handshake); +} + +/* Should be called after an ephemeral key is created, which is before sending a + * handshake response or after receiving a handshake response. + */ +void wg_timers_session_derived(struct wg_peer *peer) +{ + mod_peer_timer(peer, &peer->timer_zero_key_material, + jiffies + REJECT_AFTER_TIME * 3 * HZ); +} + +/* Should be called before a packet with authentication, whether + * keepalive, data, or handshakem is sent, or after one is received. + */ +void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer) +{ + if (peer->persistent_keepalive_interval) + mod_peer_timer(peer, &peer->timer_persistent_keepalive, + jiffies + peer->persistent_keepalive_interval * HZ); +} + +void wg_timers_init(struct wg_peer *peer) +{ + timer_setup(&peer->timer_retransmit_handshake, + wg_expired_retransmit_handshake, 0); + timer_setup(&peer->timer_send_keepalive, wg_expired_send_keepalive, 0); + timer_setup(&peer->timer_new_handshake, wg_expired_new_handshake, 0); + timer_setup(&peer->timer_zero_key_material, + wg_expired_zero_key_material, 0); + timer_setup(&peer->timer_persistent_keepalive, + wg_expired_send_persistent_keepalive, 0); + INIT_WORK(&peer->clear_peer_work, wg_queued_expired_zero_key_material); + peer->timer_handshake_attempts = 0; + peer->sent_lastminute_handshake = false; + peer->timer_need_another_keepalive = false; +} + +void wg_timers_stop(struct wg_peer *peer) +{ + del_timer_sync(&peer->timer_retransmit_handshake); + del_timer_sync(&peer->timer_send_keepalive); + del_timer_sync(&peer->timer_new_handshake); + del_timer_sync(&peer->timer_zero_key_material); + del_timer_sync(&peer->timer_persistent_keepalive); + flush_work(&peer->clear_peer_work); +} diff --git a/net/wireguard/timers.h b/net/wireguard/timers.h new file mode 100644 index 000000000000..f0653dcb1326 --- /dev/null +++ b/net/wireguard/timers.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_TIMERS_H +#define _WG_TIMERS_H + +#include + +struct wg_peer; + +void wg_timers_init(struct wg_peer *peer); +void wg_timers_stop(struct wg_peer *peer); +void wg_timers_data_sent(struct wg_peer *peer); +void wg_timers_data_received(struct wg_peer *peer); +void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer); +void wg_timers_any_authenticated_packet_received(struct wg_peer *peer); +void wg_timers_handshake_initiated(struct wg_peer *peer); +void wg_timers_handshake_complete(struct wg_peer *peer); +void wg_timers_session_derived(struct wg_peer *peer); +void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer); + +static inline bool wg_birthdate_has_expired(u64 birthday_nanoseconds, + u64 expiration_seconds) +{ + return (s64)(birthday_nanoseconds + expiration_seconds * NSEC_PER_SEC) + <= (s64)ktime_get_coarse_boottime_ns(); +} + +#endif /* _WG_TIMERS_H */ diff --git a/net/wireguard/uapi/wireguard.h b/net/wireguard/uapi/wireguard.h new file mode 100644 index 000000000000..ae88be14c947 --- /dev/null +++ b/net/wireguard/uapi/wireguard.h @@ -0,0 +1,196 @@ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * + * Documentation + * ============= + * + * The below enums and macros are for interfacing with WireGuard, using generic + * netlink, with family WG_GENL_NAME and version WG_GENL_VERSION. It defines two + * methods: get and set. Note that while they share many common attributes, + * these two functions actually accept a slightly different set of inputs and + * outputs. + * + * WG_CMD_GET_DEVICE + * ----------------- + * + * May only be called via NLM_F_REQUEST | NLM_F_DUMP. The command should contain + * one but not both of: + * + * WGDEVICE_A_IFINDEX: NLA_U32 + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 + * + * The kernel will then return several messages (NLM_F_MULTI) containing the + * following tree of nested items: + * + * WGDEVICE_A_IFINDEX: NLA_U32 + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 + * WGDEVICE_A_PRIVATE_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGDEVICE_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGDEVICE_A_LISTEN_PORT: NLA_U16 + * WGDEVICE_A_FWMARK: NLA_U32 + * WGDEVICE_A_PEERS: NLA_NESTED + * 0: NLA_NESTED + * WGPEER_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGPEER_A_PRESHARED_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGPEER_A_ENDPOINT: NLA_MIN_LEN(struct sockaddr), struct sockaddr_in or struct sockaddr_in6 + * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16 + * WGPEER_A_LAST_HANDSHAKE_TIME: NLA_EXACT_LEN, struct __kernel_timespec + * WGPEER_A_RX_BYTES: NLA_U64 + * WGPEER_A_TX_BYTES: NLA_U64 + * WGPEER_A_ALLOWEDIPS: NLA_NESTED + * 0: NLA_NESTED + * WGALLOWEDIP_A_FAMILY: NLA_U16 + * WGALLOWEDIP_A_IPADDR: NLA_MIN_LEN(struct in_addr), struct in_addr or struct in6_addr + * WGALLOWEDIP_A_CIDR_MASK: NLA_U8 + * 0: NLA_NESTED + * ... + * 0: NLA_NESTED + * ... + * ... + * WGPEER_A_PROTOCOL_VERSION: NLA_U32 + * 0: NLA_NESTED + * ... + * ... + * + * It is possible that all of the allowed IPs of a single peer will not + * fit within a single netlink message. In that case, the same peer will + * be written in the following message, except it will only contain + * WGPEER_A_PUBLIC_KEY and WGPEER_A_ALLOWEDIPS. This may occur several + * times in a row for the same peer. It is then up to the receiver to + * coalesce adjacent peers. Likewise, it is possible that all peers will + * not fit within a single message. So, subsequent peers will be sent + * in following messages, except those will only contain WGDEVICE_A_IFNAME + * and WGDEVICE_A_PEERS. It is then up to the receiver to coalesce these + * messages to form the complete list of peers. + * + * Since this is an NLA_F_DUMP command, the final message will always be + * NLMSG_DONE, even if an error occurs. However, this NLMSG_DONE message + * contains an integer error code. It is either zero or a negative error + * code corresponding to the errno. + * + * WG_CMD_SET_DEVICE + * ----------------- + * + * May only be called via NLM_F_REQUEST. The command should contain the + * following tree of nested items, containing one but not both of + * WGDEVICE_A_IFINDEX and WGDEVICE_A_IFNAME: + * + * WGDEVICE_A_IFINDEX: NLA_U32 + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 + * WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current + * peers should be removed prior to adding the list below. + * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove + * WGDEVICE_A_LISTEN_PORT: NLA_U16, 0 to choose randomly + * WGDEVICE_A_FWMARK: NLA_U32, 0 to disable + * WGDEVICE_A_PEERS: NLA_NESTED + * 0: NLA_NESTED + * WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN + * WGPEER_A_FLAGS: NLA_U32, 0 and/or WGPEER_F_REMOVE_ME if the + * specified peer should not exist at the end of the + * operation, rather than added/updated and/or + * WGPEER_F_REPLACE_ALLOWEDIPS if all current allowed + * IPs of this peer should be removed prior to adding + * the list below and/or WGPEER_F_UPDATE_ONLY if the + * peer should only be set if it already exists. + * WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN, all zeros to remove + * WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6 + * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16, 0 to disable + * WGPEER_A_ALLOWEDIPS: NLA_NESTED + * 0: NLA_NESTED + * WGALLOWEDIP_A_FAMILY: NLA_U16 + * WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr + * WGALLOWEDIP_A_CIDR_MASK: NLA_U8 + * 0: NLA_NESTED + * ... + * 0: NLA_NESTED + * ... + * ... + * WGPEER_A_PROTOCOL_VERSION: NLA_U32, should not be set or used at + * all by most users of this API, as the + * most recent protocol will be used when + * this is unset. Otherwise, must be set + * to 1. + * 0: NLA_NESTED + * ... + * ... + * + * It is possible that the amount of configuration data exceeds that of + * the maximum message length accepted by the kernel. In that case, several + * messages should be sent one after another, with each successive one + * filling in information not contained in the prior. Note that if + * WGDEVICE_F_REPLACE_PEERS is specified in the first message, it probably + * should not be specified in fragments that come after, so that the list + * of peers is only cleared the first time but appended after. Likewise for + * peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the first message + * of a peer, it likely should not be specified in subsequent fragments. + * + * If an error occurs, NLMSG_ERROR will reply containing an errno. + */ + +#ifndef _WG_UAPI_WIREGUARD_H +#define _WG_UAPI_WIREGUARD_H + +#define WG_GENL_NAME "wireguard" +#define WG_GENL_VERSION 1 + +#define WG_KEY_LEN 32 + +enum wg_cmd { + WG_CMD_GET_DEVICE, + WG_CMD_SET_DEVICE, + __WG_CMD_MAX +}; +#define WG_CMD_MAX (__WG_CMD_MAX - 1) + +enum wgdevice_flag { + WGDEVICE_F_REPLACE_PEERS = 1U << 0, + __WGDEVICE_F_ALL = WGDEVICE_F_REPLACE_PEERS +}; +enum wgdevice_attribute { + WGDEVICE_A_UNSPEC, + WGDEVICE_A_IFINDEX, + WGDEVICE_A_IFNAME, + WGDEVICE_A_PRIVATE_KEY, + WGDEVICE_A_PUBLIC_KEY, + WGDEVICE_A_FLAGS, + WGDEVICE_A_LISTEN_PORT, + WGDEVICE_A_FWMARK, + WGDEVICE_A_PEERS, + __WGDEVICE_A_LAST +}; +#define WGDEVICE_A_MAX (__WGDEVICE_A_LAST - 1) + +enum wgpeer_flag { + WGPEER_F_REMOVE_ME = 1U << 0, + WGPEER_F_REPLACE_ALLOWEDIPS = 1U << 1, + WGPEER_F_UPDATE_ONLY = 1U << 2, + __WGPEER_F_ALL = WGPEER_F_REMOVE_ME | WGPEER_F_REPLACE_ALLOWEDIPS | + WGPEER_F_UPDATE_ONLY +}; +enum wgpeer_attribute { + WGPEER_A_UNSPEC, + WGPEER_A_PUBLIC_KEY, + WGPEER_A_PRESHARED_KEY, + WGPEER_A_FLAGS, + WGPEER_A_ENDPOINT, + WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, + WGPEER_A_LAST_HANDSHAKE_TIME, + WGPEER_A_RX_BYTES, + WGPEER_A_TX_BYTES, + WGPEER_A_ALLOWEDIPS, + WGPEER_A_PROTOCOL_VERSION, + __WGPEER_A_LAST +}; +#define WGPEER_A_MAX (__WGPEER_A_LAST - 1) + +enum wgallowedip_attribute { + WGALLOWEDIP_A_UNSPEC, + WGALLOWEDIP_A_FAMILY, + WGALLOWEDIP_A_IPADDR, + WGALLOWEDIP_A_CIDR_MASK, + __WGALLOWEDIP_A_LAST +}; +#define WGALLOWEDIP_A_MAX (__WGALLOWEDIP_A_LAST - 1) + +#endif /* _WG_UAPI_WIREGUARD_H */ diff --git a/net/wireguard/version.h b/net/wireguard/version.h new file mode 100644 index 000000000000..c7f9028f0177 --- /dev/null +++ b/net/wireguard/version.h @@ -0,0 +1,3 @@ +#ifndef WIREGUARD_VERSION +#define WIREGUARD_VERSION "1.0.20220627" +#endif diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 937f719e3033..b64a07fbd62c 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -197,8 +197,8 @@ cc-ldoption = $(call try-run,\ $(CC) $(CLANG_TARGET) $(1) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2)) # ld-option -# Usage: LDFLAGS += $(call ld-option, -X) -ld-option = $(call try-run, $(LD) $(LDFLAGS) $(1) -v,$(1),$(2)) +# Usage: LDFLAGS += $(call ld-option, -X, -Y) +ld-option = $(call try-run, $(LD) $(LDFLAGS) $(1) -v,$(1),$(2),$(3)) # ar-option # Usage: KBUILD_ARFLAGS := $(call ar-option,D) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index b560f9de83b8..624112961ddd 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -218,12 +218,12 @@ ifdef CONFIG_LTO_CLANG # to the kernel and/or modules at the end. cmd_modversions_c = \ if $(OBJDUMP) -h $(@D)/.tmp_$(@F) >/dev/null 2>/dev/null; then \ - if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ + if $(OBJDUMP) -h $(@D)/.tmp_$(@F) 2>/dev/null | grep -q __ksymtab; then \ $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ > $(@D)/$(@F).symversions; \ fi; \ else \ - if $(LLVM_DIS) -o=- $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ + if $(LLVM_DIS) -o=- $(@D)/.tmp_$(@F) 2>/dev/null | grep -q __ksymtab; then \ $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ > $(@D)/$(@F).symversions; \ fi; \ @@ -231,7 +231,7 @@ cmd_modversions_c = \ mv -f $(@D)/.tmp_$(@F) $@; else cmd_modversions_c = \ - if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ + if $(OBJDUMP) -h $(@D)/.tmp_$(@F) 2>/dev/null | grep -q __ksymtab; then \ $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ > $(@D)/.tmp_$(@F:.o=.ver); \ \ diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index e78a3a73df3c..0e29d70ead3c 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -73,5 +73,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, uninitialized) KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast) KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access) KBUILD_CFLAGS += $(call cc-disable-warning, cast-function-type-strict) +KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) +KBUILD_CFLAGS += $(call cc-disable-warning, asm-operand-widths) endif endif diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 8627e3da5af6..c1e1ab0e163b 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -283,6 +283,9 @@ ifeq ($(KBUILD_ENABLE_EXTRA_GCC_CHECKS),) DTC_FLAGS += -Wno-unit_address_vs_reg endif +# Silence warnings +DTC_FLAGS += -q + # Generate an assembly file to wrap the output of the device tree compiler quiet_cmd_dt_S_dtb= DTB $@ cmd_dt_S_dtb= \ @@ -329,7 +332,7 @@ dtc-tmp = $(subst $(comma),_,$(dot-target).dts.tmp) # mkdtimg #---------------------------------------------------------------------------- quiet_cmd_mkdtimg = DTBOIMG $@ -cmd_mkdtimg = mkdtimg cfg_create $@ $< +cmd_mkdtimg = python2 $(srctree)/scripts/dtc/libfdt/mkdtboimg.py cfg_create $@ $< # cat # --------------------------------------------------------------------------- @@ -342,25 +345,7 @@ cmd_cat = (cat $(filter-out FORCE,$^) > $@) || (rm -f $@; false) # Bzip2 and LZMA do not include size in file... so we have to fake that; # append the size as a 32-bit littleendian number as gzip does. -size_append = printf $(shell \ -dec_size=0; \ -for F in $1; do \ - fsize=$$(stat -c "%s" $$F); \ - dec_size=$$(expr $$dec_size + $$fsize); \ -done; \ -printf "%08x\n" $$dec_size | \ - sed 's/\(..\)/\1 /g' | { \ - read ch0 ch1 ch2 ch3; \ - for ch in $$ch3 $$ch2 $$ch1 $$ch0; do \ - printf '%s%03o' '\\' $$((0x$$ch)); \ - done; \ - } \ -) - -quiet_cmd_bzip2 = BZIP2 $@ -cmd_bzip2 = (cat $(filter-out FORCE,$^) | \ - bzip2 -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \ - (rm -f $@ ; false) +size_append = printf "" # Lzma # --------------------------------------------------------------------------- diff --git a/scripts/dtc/dtc.c b/scripts/dtc/dtc.c index 5fa23c406266..da2151a601cc 100644 --- a/scripts/dtc/dtc.c +++ b/scripts/dtc/dtc.c @@ -241,6 +241,10 @@ int main(int argc, char *argv[]) } } + while (!quiet) { + quiet++; + } + if (argc > (optind+1)) usage("missing files"); else if (argc < (optind+1)) diff --git a/scripts/dtc/libfdt/mkdtboimg.py b/scripts/dtc/libfdt/mkdtboimg.py new file mode 100644 index 000000000000..e7ed21c9022b --- /dev/null +++ b/scripts/dtc/libfdt/mkdtboimg.py @@ -0,0 +1,950 @@ +#! /usr/bin/env python +# Copyright 2017, The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +"""Tool for packing multiple DTB/DTBO files into a single image""" + +import argparse +import os +from array import array +from collections import namedtuple +import struct +from sys import stdout +import zlib + +class CompressionFormat(object): + """Enum representing DT compression format for a DT entry. + """ + NO_COMPRESSION = 0x00 + ZLIB_COMPRESSION = 0x01 + GZIP_COMPRESSION = 0x02 + +class DtEntry(object): + """Provides individual DT image file arguments to be added to a DTBO. + Attributes: + _REQUIRED_KEYS: 'keys' needed to be present in the dictionary passed to instantiate + an object of this class. + _COMPRESSION_FORMAT_MASK: Mask to retrieve compression info for DT entry from flags field + when a DTBO header of version 1 is used. + """ + _COMPRESSION_FORMAT_MASK = 0x0f + REQUIRED_KEYS = ('dt_file', 'dt_size', 'dt_offset', 'id', 'rev', 'flags', + 'custom0', 'custom1', 'custom2') + + @staticmethod + def __get_number_or_prop(arg): + """Converts string to integer or reads the property from DT image. + Args: + arg: String containing the argument provided on the command line. + Returns: + An integer property read from DT file or argument string + converted to integer + """ + + if not arg or arg[0] == '+' or arg[0] == '-': + raise ValueError('Invalid argument passed to DTImage') + if arg[0] == '/': + # TODO(b/XXX): Use pylibfdt to get property value from DT + raise ValueError('Invalid argument passed to DTImage') + else: + base = 10 + if arg.startswith('0x') or arg.startswith('0X'): + base = 16 + elif arg.startswith('0'): + base = 8 + return int(arg, base) + + def __init__(self, **kwargs): + """Constructor for DtEntry object. + Initializes attributes from dictionary object that contains + values keyed with names equivalent to the class's attributes. + Args: + kwargs: Dictionary object containing values to instantiate + class members with. Expected keys in dictionary are from + the tuple (_REQUIRED_KEYS) + """ + + missing_keys = set(self.REQUIRED_KEYS) - set(kwargs) + if missing_keys: + raise ValueError('Missing keys in DtEntry constructor: %r' % + sorted(missing_keys)) + + self.__dt_file = kwargs['dt_file'] + self.__dt_offset = kwargs['dt_offset'] + self.__dt_size = kwargs['dt_size'] + self.__id = self.__get_number_or_prop(kwargs['id']) + self.__rev = self.__get_number_or_prop(kwargs['rev']) + self.__flags = self.__get_number_or_prop(kwargs['flags']) + self.__custom0 = self.__get_number_or_prop(kwargs['custom0']) + self.__custom1 = self.__get_number_or_prop(kwargs['custom1']) + self.__custom2 = self.__get_number_or_prop(kwargs['custom2']) + + def __str__(self): + sb = [] + sb.append('{key:>20} = {value:d}'.format(key='dt_size', + value=self.__dt_size)) + sb.append('{key:>20} = {value:d}'.format(key='dt_offset', + value=self.__dt_offset)) + sb.append('{key:>20} = {value:08x}'.format(key='id', + value=self.__id)) + sb.append('{key:>20} = {value:08x}'.format(key='rev', + value=self.__rev)) + sb.append('{key:>20} = {value:08x}'.format(key='custom[0]', + value=self.__flags)) + sb.append('{key:>20} = {value:08x}'.format(key='custom[1]', + value=self.__custom0)) + sb.append('{key:>20} = {value:08x}'.format(key='custom[2]', + value=self.__custom1)) + sb.append('{key:>20} = {value:08x}'.format(key='custom[3]', + value=self.__custom2)) + return '\n'.join(sb) + + def compression_info(self, version): + """CompressionFormat: compression format for DT image file. + Args: + version: Version of DTBO header, compression is only + supported from version 1. + """ + if version is 0: + return CompressionFormat.NO_COMPRESSION + return self.flags & self._COMPRESSION_FORMAT_MASK + + @property + def dt_file(self): + """file: File handle to the DT image file.""" + return self.__dt_file + + @property + def size(self): + """int: size in bytes of the DT image file.""" + return self.__dt_size + + @size.setter + def size(self, value): + self.__dt_size = value + + @property + def dt_offset(self): + """int: offset in DTBO file for this DT image.""" + return self.__dt_offset + + @dt_offset.setter + def dt_offset(self, value): + self.__dt_offset = value + + @property + def image_id(self): + """int: DT entry _id for this DT image.""" + return self.__id + + @property + def rev(self): + """int: DT entry _rev for this DT image.""" + return self.__rev + + @property + def flags(self): + """int: DT entry _flags for this DT image.""" + return self.__flags + + @property + def custom0(self): + """int: DT entry _custom0 for this DT image.""" + return self.__custom0 + + @property + def custom1(self): + """int: DT entry _custom1 for this DT image.""" + return self.__custom1 + + @property + def custom2(self): + """int: DT entry custom2 for this DT image.""" + return self.__custom2 + + +class Dtbo(object): + """ + Provides parser, reader, writer for dumping and creating Device Tree Blob + Overlay (DTBO) images. + Attributes: + _DTBO_MAGIC: Device tree table header magic. + _ACPIO_MAGIC: Advanced Configuration and Power Interface table header + magic. + _DT_TABLE_HEADER_SIZE: Size of Device tree table header. + _DT_TABLE_HEADER_INTS: Number of integers in DT table header. + _DT_ENTRY_HEADER_SIZE: Size of Device tree entry header within a DTBO. + _DT_ENTRY_HEADER_INTS: Number of integers in DT entry header. + _GZIP_COMPRESSION_WBITS: Argument 'wbits' for gzip compression + _ZLIB_DECOMPRESSION_WBITS: Argument 'wbits' for zlib/gzip compression + """ + + _DTBO_MAGIC = 0xd7b7ab1e + _ACPIO_MAGIC = 0x41435049 + _DT_TABLE_HEADER_SIZE = struct.calcsize('>8I') + _DT_TABLE_HEADER_INTS = 8 + _DT_ENTRY_HEADER_SIZE = struct.calcsize('>8I') + _DT_ENTRY_HEADER_INTS = 8 + _GZIP_COMPRESSION_WBITS = 31 + _ZLIB_DECOMPRESSION_WBITS = 47 + + def _update_dt_table_header(self): + """Converts header entries into binary data for DTBO header. + Packs the current Device tree table header attribute values in + metadata buffer. + """ + struct.pack_into('>8I', self.__metadata, 0, self.magic, + self.total_size, self.header_size, + self.dt_entry_size, self.dt_entry_count, + self.dt_entries_offset, self.page_size, + self.version) + + def _update_dt_entry_header(self, dt_entry, metadata_offset): + """Converts each DT entry header entry into binary data for DTBO file. + Packs the current device tree table entry attribute into + metadata buffer as device tree entry header. + Args: + dt_entry: DtEntry object for the header to be packed. + metadata_offset: Offset into metadata buffer to begin writing. + dtbo_offset: Offset where the DT image file for this dt_entry can + be found in the resulting DTBO image. + """ + struct.pack_into('>8I', self.__metadata, metadata_offset, dt_entry.size, + dt_entry.dt_offset, dt_entry.image_id, dt_entry.rev, + dt_entry.flags, dt_entry.custom0, dt_entry.custom1, + dt_entry.custom2) + + def _update_metadata(self): + """Updates the DTBO metadata. + Initialize the internal metadata buffer and fill it with all Device + Tree table entries and update the DTBO header. + """ + + self.__metadata = array('c', ' ' * self.__metadata_size) + metadata_offset = self.header_size + for dt_entry in self.__dt_entries: + self._update_dt_entry_header(dt_entry, metadata_offset) + metadata_offset += self.dt_entry_size + self._update_dt_table_header() + + def _read_dtbo_header(self, buf): + """Reads DTBO file header into metadata buffer. + Unpack and read the DTBO table header from given buffer. The + buffer size must exactly be equal to _DT_TABLE_HEADER_SIZE. + Args: + buf: Bytebuffer read directly from the file of size + _DT_TABLE_HEADER_SIZE. + """ + (self.magic, self.total_size, self.header_size, + self.dt_entry_size, self.dt_entry_count, self.dt_entries_offset, + self.page_size, self.version) = struct.unpack_from('>8I', buf, 0) + + # verify the header + if self.magic != self._DTBO_MAGIC and self.magic != self._ACPIO_MAGIC: + raise ValueError('Invalid magic number 0x%x in DTBO/ACPIO file' % + (self.magic)) + + if self.header_size != self._DT_TABLE_HEADER_SIZE: + raise ValueError('Invalid header size (%d) in DTBO/ACPIO file' % + (self.header_size)) + + if self.dt_entry_size != self._DT_ENTRY_HEADER_SIZE: + raise ValueError('Invalid DT entry header size (%d) in DTBO/ACPIO file' % + (self.dt_entry_size)) + + def _read_dt_entries_from_metadata(self): + """Reads individual DT entry headers from metadata buffer. + Unpack and read the DTBO DT entry headers from the internal buffer. + The buffer size must exactly be equal to _DT_TABLE_HEADER_SIZE + + (_DT_ENTRY_HEADER_SIZE * dt_entry_count). The method raises exception + if DT entries have already been set for this object. + """ + + if self.__dt_entries: + raise ValueError('DTBO DT entries can be added only once') + + offset = self.dt_entries_offset / 4 + params = {} + params['dt_file'] = None + for i in range(0, self.dt_entry_count): + dt_table_entry = self.__metadata[offset:offset + self._DT_ENTRY_HEADER_INTS] + params['dt_size'] = dt_table_entry[0] + params['dt_offset'] = dt_table_entry[1] + for j in range(2, self._DT_ENTRY_HEADER_INTS): + params[DtEntry.REQUIRED_KEYS[j + 1]] = str(dt_table_entry[j]) + dt_entry = DtEntry(**params) + self.__dt_entries.append(dt_entry) + offset += self._DT_ENTRY_HEADER_INTS + + def _read_dtbo_image(self): + """Parse the input file and instantiate this object.""" + + # First check if we have enough to read the header + file_size = os.fstat(self.__file.fileno()).st_size + if file_size < self._DT_TABLE_HEADER_SIZE: + raise ValueError('Invalid DTBO file') + + self.__file.seek(0) + buf = self.__file.read(self._DT_TABLE_HEADER_SIZE) + self._read_dtbo_header(buf) + + self.__metadata_size = (self.header_size + + self.dt_entry_count * self.dt_entry_size) + if file_size < self.__metadata_size: + raise ValueError('Invalid or truncated DTBO file of size %d expected %d' % + file_size, self.__metadata_size) + + num_ints = (self._DT_TABLE_HEADER_INTS + + self.dt_entry_count * self._DT_ENTRY_HEADER_INTS) + if self.dt_entries_offset > self._DT_TABLE_HEADER_SIZE: + num_ints += (self.dt_entries_offset - self._DT_TABLE_HEADER_SIZE) / 4 + format_str = '>' + str(num_ints) + 'I' + self.__file.seek(0) + self.__metadata = struct.unpack(format_str, + self.__file.read(self.__metadata_size)) + self._read_dt_entries_from_metadata() + + def _find_dt_entry_with_same_file(self, dt_entry): + """Finds DT Entry that has identical backing DT file. + Args: + dt_entry: DtEntry object whose 'dtfile' we find for existence in the + current 'dt_entries'. + Returns: + If a match by file path is found, the corresponding DtEntry object + from internal list is returned. If not, 'None' is returned. + """ + + dt_entry_path = os.path.realpath(dt_entry.dt_file.name) + for entry in self.__dt_entries: + entry_path = os.path.realpath(entry.dt_file.name) + if entry_path == dt_entry_path: + return entry + return None + + def __init__(self, file_handle, dt_type='dtb', page_size=None, version=0): + """Constructor for Dtbo Object + Args: + file_handle: The Dtbo File handle corresponding to this object. + The file handle can be used to write to (in case of 'create') + or read from (in case of 'dump') + """ + + self.__file = file_handle + self.__dt_entries = [] + self.__metadata = None + self.__metadata_size = 0 + + # if page_size is given, assume the object is being instantiated to + # create a DTBO file + if page_size: + if dt_type == 'acpi': + self.magic = self._ACPIO_MAGIC + else: + self.magic = self._DTBO_MAGIC + self.total_size = self._DT_TABLE_HEADER_SIZE + self.header_size = self._DT_TABLE_HEADER_SIZE + self.dt_entry_size = self._DT_ENTRY_HEADER_SIZE + self.dt_entry_count = 0 + self.dt_entries_offset = self._DT_TABLE_HEADER_SIZE + self.page_size = page_size + self.version = version + self.__metadata_size = self._DT_TABLE_HEADER_SIZE + else: + self._read_dtbo_image() + + def __str__(self): + sb = [] + sb.append('dt_table_header:') + _keys = ('magic', 'total_size', 'header_size', 'dt_entry_size', + 'dt_entry_count', 'dt_entries_offset', 'page_size', 'version') + for key in _keys: + if key == 'magic': + sb.append('{key:>20} = {value:08x}'.format(key=key, + value=self.__dict__[key])) + else: + sb.append('{key:>20} = {value:d}'.format(key=key, + value=self.__dict__[key])) + count = 0 + for dt_entry in self.__dt_entries: + sb.append('dt_table_entry[{0:d}]:'.format(count)) + sb.append(str(dt_entry)) + count = count + 1 + return '\n'.join(sb) + + @property + def dt_entries(self): + """Returns a list of DtEntry objects found in DTBO file.""" + return self.__dt_entries + + def compress_dt_entry(self, compression_format, dt_entry_file): + """Compresses a DT entry. + Args: + compression_format: Compression format for DT Entry + dt_entry_file: File handle to read DT entry from. + Returns: + Compressed DT entry and its length. + Raises: + ValueError if unrecognized compression format is found. + """ + compress_zlib = zlib.compressobj() # zlib + compress_gzip = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, + zlib.DEFLATED, self._GZIP_COMPRESSION_WBITS) # gzip + compression_obj_dict = { + CompressionFormat.NO_COMPRESSION: None, + CompressionFormat.ZLIB_COMPRESSION: compress_zlib, + CompressionFormat.GZIP_COMPRESSION: compress_gzip, + } + + if compression_format not in compression_obj_dict: + ValueError("Bad compression format %d" % compression_format) + + if compression_format is CompressionFormat.NO_COMPRESSION: + dt_entry = dt_entry_file.read() + else: + compression_object = compression_obj_dict[compression_format] + dt_entry_file.seek(0) + dt_entry = compression_object.compress(dt_entry_file.read()) + dt_entry += compression_object.flush() + return dt_entry, len(dt_entry) + + def add_dt_entries(self, dt_entries): + """Adds DT image files to the DTBO object. + Adds a list of Dtentry Objects to the DTBO image. The changes are not + committed to the output file until commit() is called. + Args: + dt_entries: List of DtEntry object to be added. + Returns: + A buffer containing all DT entries. + Raises: + ValueError: if the list of DT entries is empty or if a list of DT entries + has already been added to the DTBO. + """ + if not dt_entries: + raise ValueError('Attempted to add empty list of DT entries') + + if self.__dt_entries: + raise ValueError('DTBO DT entries can be added only once') + + dt_entry_count = len(dt_entries) + dt_offset = (self.header_size + + dt_entry_count * self.dt_entry_size) + + dt_entry_buf = "" + for dt_entry in dt_entries: + if not isinstance(dt_entry, DtEntry): + raise ValueError('Adding invalid DT entry object to DTBO') + entry = self._find_dt_entry_with_same_file(dt_entry) + dt_entry_compression_info = dt_entry.compression_info(self.version) + if entry and (entry.compression_info(self.version) + == dt_entry_compression_info): + dt_entry.dt_offset = entry.dt_offset + dt_entry.size = entry.size + else: + dt_entry.dt_offset = dt_offset + compressed_entry, dt_entry.size = self.compress_dt_entry(dt_entry_compression_info, + dt_entry.dt_file) + dt_entry_buf += compressed_entry + dt_offset += dt_entry.size + self.total_size += dt_entry.size + self.__dt_entries.append(dt_entry) + self.dt_entry_count += 1 + self.__metadata_size += self.dt_entry_size + self.total_size += self.dt_entry_size + + return dt_entry_buf + + def extract_dt_file(self, idx, fout, decompress): + """Extract DT Image files embedded in the DTBO file. + Extracts Device Tree blob image file at given index into a file handle. + Args: + idx: Index of the DT entry in the DTBO file. + fout: File handle where the DTB at index idx to be extracted into. + decompress: If a DT entry is compressed, decompress it before writing + it to the file handle. + Raises: + ValueError: if invalid DT entry index or compression format is detected. + """ + if idx > self.dt_entry_count: + raise ValueError('Invalid index %d of DtEntry' % idx) + + size = self.dt_entries[idx].size + offset = self.dt_entries[idx].dt_offset + self.__file.seek(offset, 0) + fout.seek(0) + compression_format = self.dt_entries[idx].compression_info(self.version) + if decompress and compression_format: + if (compression_format == CompressionFormat.ZLIB_COMPRESSION or + compression_format == CompressionFormat.GZIP_COMPRESSION): + fout.write(zlib.decompress(self.__file.read(size), self._ZLIB_DECOMPRESSION_WBITS)) + else: + raise ValueError("Unknown compression format detected") + else: + fout.write(self.__file.read(size)) + + def commit(self, dt_entry_buf): + """Write out staged changes to the DTBO object to create a DTBO file. + Writes a fully instantiated Dtbo Object into the output file using the + file handle present in '_file'. No checks are performed on the object + except for existence of output file handle on the object before writing + out the file. + Args: + dt_entry_buf: Buffer containing all DT entries. + """ + if not self.__file: + raise ValueError('No file given to write to.') + + if not self.__dt_entries: + raise ValueError('No DT image files to embed into DTBO image given.') + + self._update_metadata() + + self.__file.seek(0) + self.__file.write(self.__metadata) + self.__file.write(dt_entry_buf) + self.__file.flush() + + +def parse_dt_entry(global_args, arglist): + """Parse arguments for single DT entry file. + Parses command line arguments for single DT image file while + creating a Device tree blob overlay (DTBO). + Args: + global_args: Dtbo object containing global default values + for DtEntry attributes. + arglist: Command line argument list for this DtEntry. + Returns: + A Namespace object containing all values to instantiate DtEntry object. + """ + + parser = argparse.ArgumentParser(add_help=False) + parser.add_argument('dt_file', nargs='?', + type=argparse.FileType('rb'), + default=None) + parser.add_argument('--id', type=str, dest='id', action='store', + default=global_args.global_id) + parser.add_argument('--rev', type=str, dest='rev', + action='store', default=global_args.global_rev) + parser.add_argument('--flags', type=str, dest='flags', + action='store', + default=global_args.global_flags) + parser.add_argument('--custom0', type=str, dest='custom0', + action='store', + default=global_args.global_custom0) + parser.add_argument('--custom1', type=str, dest='custom1', + action='store', + default=global_args.global_custom1) + parser.add_argument('--custom2', type=str, dest='custom2', + action='store', + default=global_args.global_custom2) + return parser.parse_args(arglist) + + +def parse_dt_entries(global_args, arg_list): + """Parse all DT entries from command line. + Parse all DT image files and their corresponding attribute from + command line + Args: + global_args: Argument containing default global values for _id, + _rev and customX. + arg_list: The remainder of the command line after global options + DTBO creation have been parsed. + Returns: + A List of DtEntry objects created after parsing the command line + given in argument. + """ + dt_entries = [] + img_file_idx = [] + idx = 0 + # find all positional arguments (i.e. DT image file paths) + for arg in arg_list: + if not arg.startswith("--"): + img_file_idx.append(idx) + idx = idx + 1 + + if not img_file_idx: + raise ValueError('Input DT images must be provided') + + total_images = len(img_file_idx) + for idx in xrange(total_images): + start_idx = img_file_idx[idx] + if idx == total_images - 1: + argv = arg_list[start_idx:] + else: + end_idx = img_file_idx[idx + 1] + argv = arg_list[start_idx:end_idx] + args = parse_dt_entry(global_args, argv) + params = vars(args) + params['dt_offset'] = 0 + params['dt_size'] = os.fstat(params['dt_file'].fileno()).st_size + dt_entries.append(DtEntry(**params)) + + return dt_entries + +def parse_config_option(line, is_global, dt_keys, global_key_types): + """Parses a single line from the configuration file. + Args: + line: String containing the key=value line from the file. + is_global: Boolean indicating if we should parse global or DT entry + specific option. + dt_keys: Tuple containing all valid DT entry and global option strings + in configuration file. + global_key_types: A dict of global options and their corresponding types. It + contains all exclusive valid global option strings in configuration + file that are not repeated in dt entry options. + Returns: + Returns a tuple for parsed key and value for the option. Also, checks + the key to make sure its valid. + """ + + if line.find('=') == -1: + raise ValueError('Invalid line (%s) in configuration file' % line) + + key, value = (x.strip() for x in line.split('=')) + if is_global and key in global_key_types: + if global_key_types[key] is int: + value = int(value) + elif key not in dt_keys: + raise ValueError('Invalid option (%s) in configuration file' % key) + + return key, value + +def parse_config_file(fin, dt_keys, global_key_types): + """Parses the configuration file for creating DTBO image. + Args: + fin: File handle for configuration file + is_global: Boolean indicating if we should parse global or DT entry + specific option. + dt_keys: Tuple containing all valid DT entry and global option strings + in configuration file. + global_key_types: A dict of global options and their corresponding types. It + contains all exclusive valid global option strings in configuration + file that are not repeated in dt entry options. + Returns: + global_args, dt_args: Tuple of a dictionary with global arguments + and a list of dictionaries for all DT entry specific arguments the + following format. + global_args: + {'id' : , 'rev' : ...} + dt_args: + [{'filename' : 'dt_file_name', 'id' : , + 'rev' : ...}, + {'filename' : 'dt_file_name2', 'id' : , + 'rev' : ...}, ... + ] + """ + + # set all global defaults + global_args = dict((k, '0') for k in dt_keys) + global_args['dt_type'] = 'dtb' + global_args['page_size'] = 2048 + global_args['version'] = 0 + + dt_args = [] + found_dt_entry = False + count = -1 + for line in fin: + line = line.rstrip() + if line.lstrip().startswith('#'): + continue + comment_idx = line.find('#') + line = line if comment_idx == -1 else line[0:comment_idx] + if not line or line.isspace(): + continue + if line.startswith((' ', '\t')) and not found_dt_entry: + # This is a global argument + key, value = parse_config_option(line, True, dt_keys, global_key_types) + global_args[key] = value + elif line.find('=') != -1: + key, value = parse_config_option(line, False, dt_keys, global_key_types) + dt_args[-1][key] = value + else: + found_dt_entry = True + count += 1 + dt_args.append({}) + dt_args[-1]['filename'] = line.strip() + return global_args, dt_args + +def parse_create_args(arg_list): + """Parse command line arguments for 'create' sub-command. + Args: + arg_list: All command line arguments except the outfile file name. + Returns: + The list of remainder of the command line arguments after parsing + for 'create'. + """ + + image_arg_index = 0 + for arg in arg_list: + if not arg.startswith("--"): + break + image_arg_index = image_arg_index + 1 + + argv = arg_list[0:image_arg_index] + remainder = arg_list[image_arg_index:] + parser = argparse.ArgumentParser(prog='create', add_help=False) + parser.add_argument('--dt_type', type=str, dest='dt_type', + action='store', default='dtb') + parser.add_argument('--page_size', type=int, dest='page_size', + action='store', default=2048) + parser.add_argument('--version', type=int, dest='version', + action='store', default=0) + parser.add_argument('--id', type=str, dest='global_id', + action='store', default='0') + parser.add_argument('--rev', type=str, dest='global_rev', + action='store', default='0') + parser.add_argument('--flags', type=str, dest='global_flags', + action='store', default='0') + parser.add_argument('--custom0', type=str, dest='global_custom0', + action='store', default='0') + parser.add_argument('--custom1', type=str, dest='global_custom1', + action='store', default='0') + parser.add_argument('--custom2', type=str, dest='global_custom2', + action='store', default='0') + args = parser.parse_args(argv) + return args, remainder + +def parse_dump_cmd_args(arglist): + """Parse command line arguments for 'dump' sub-command. + Args: + arglist: List of all command line arguments including the outfile + file name if exists. + Returns: + A namespace object of parsed arguments. + """ + + parser = argparse.ArgumentParser(prog='dump') + parser.add_argument('--output', '-o', nargs='?', + type=argparse.FileType('wb'), + dest='outfile', + default=stdout) + parser.add_argument('--dtb', '-b', nargs='?', type=str, + dest='dtfilename') + parser.add_argument('--decompress', action='store_true', dest='decompress') + return parser.parse_args(arglist) + +def parse_config_create_cmd_args(arglist): + """Parse command line arguments for 'cfg_create subcommand. + Args: + arglist: A list of all command line arguments including the + mandatory input configuration file name. + Returns: + A Namespace object of parsed arguments. + """ + parser = argparse.ArgumentParser(prog='cfg_create') + parser.add_argument('conf_file', nargs='?', + type=argparse.FileType('rb'), + default=None) + cwd = os.getcwd() + parser.add_argument('--dtb-dir', '-d', nargs='?', type=str, + dest='dtbdir', default=cwd) + return parser.parse_args(arglist) + +def create_dtbo_image(fout, argv): + """Create Device Tree Blob Overlay image using provided arguments. + Args: + fout: Output file handle to write to. + argv: list of command line arguments. + """ + + global_args, remainder = parse_create_args(argv) + if not remainder: + raise ValueError('List of dtimages to add to DTBO not provided') + dt_entries = parse_dt_entries(global_args, remainder) + dtbo = Dtbo(fout, global_args.dt_type, global_args.page_size, global_args.version) + dt_entry_buf = dtbo.add_dt_entries(dt_entries) + dtbo.commit(dt_entry_buf) + fout.close() + +def dump_dtbo_image(fin, argv): + """Dump DTBO file. + Dump Device Tree Blob Overlay metadata as output and the device + tree image files embedded in the DTBO image into file(s) provided + as arguments + Args: + fin: Input DTBO image files. + argv: list of command line arguments. + """ + dtbo = Dtbo(fin) + args = parse_dump_cmd_args(argv) + if args.dtfilename: + num_entries = len(dtbo.dt_entries) + for idx in range(0, num_entries): + with open(args.dtfilename + '.{:d}'.format(idx), 'wb') as fout: + dtbo.extract_dt_file(idx, fout, args.decompress) + args.outfile.write(str(dtbo) + '\n') + args.outfile.close() + +def create_dtbo_image_from_config(fout, argv): + """Create DTBO file from a configuration file. + Args: + fout: Output file handle to write to. + argv: list of command line arguments. + """ + args = parse_config_create_cmd_args(argv) + if not args.conf_file: + raise ValueError('Configuration file must be provided') + + _DT_KEYS = ('id', 'rev', 'flags', 'custom0', 'custom1', 'custom2') + _GLOBAL_KEY_TYPES = {'dt_type': str, 'page_size': int, 'version': int} + + global_args, dt_args = parse_config_file(args.conf_file, + _DT_KEYS, _GLOBAL_KEY_TYPES) + params = {} + dt_entries = [] + for dt_arg in dt_args: + filepath = args.dtbdir + os.sep + dt_arg['filename'] + params['dt_file'] = open(filepath, 'rb') + params['dt_offset'] = 0 + params['dt_size'] = os.fstat(params['dt_file'].fileno()).st_size + for key in _DT_KEYS: + if key not in dt_arg: + params[key] = global_args[key] + else: + params[key] = dt_arg[key] + dt_entries.append(DtEntry(**params)) + + # Create and write DTBO file + dtbo = Dtbo(fout, global_args['dt_type'], global_args['page_size'], global_args['version']) + dt_entry_buf = dtbo.add_dt_entries(dt_entries) + dtbo.commit(dt_entry_buf) + fout.close() + +def print_default_usage(progname): + """Prints program's default help string. + Args: + progname: This program's name. + """ + sb = [] + sb.append(' ' + progname + ' help all') + sb.append(' ' + progname + ' help \n') + sb.append(' commands:') + sb.append(' help, dump, create, cfg_create') + print('\n'.join(sb)) + +def print_dump_usage(progname): + """Prints usage for 'dump' sub-command. + Args: + progname: This program's name. + """ + sb = [] + sb.append(' ' + progname + ' dump (