diff --git a/anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM_SPE_PMU b/anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM_SPE_PMU deleted file mode 100644 index 8d5ea4e3be935cdb002f3420ad7896f99d5a1f55..0000000000000000000000000000000000000000 --- a/anolis/configs/L0-MANDATORY/arm64/CONFIG_ARM_SPE_PMU +++ /dev/null @@ -1 +0,0 @@ -CONFIG_ARM_SPE_PMU=y diff --git a/anolis/configs/L0-MANDATORY/sw_64-6b/CONFIG_MEMORY_FAILURE b/anolis/configs/L0-MANDATORY/sw_64-6b/CONFIG_MEMORY_FAILURE new file mode 100644 index 0000000000000000000000000000000000000000..50d51d72166b2f83c573d5df6ca5647d95d2f57d --- /dev/null +++ b/anolis/configs/L0-MANDATORY/sw_64-6b/CONFIG_MEMORY_FAILURE @@ -0,0 +1 @@ +# CONFIG_MEMORY_FAILURE is not set diff --git a/anolis/configs/L0-MANDATORY/sw_64-8a/CONFIG_MEMORY_FAILURE b/anolis/configs/L0-MANDATORY/sw_64-8a/CONFIG_MEMORY_FAILURE new file mode 100644 index 0000000000000000000000000000000000000000..50d51d72166b2f83c573d5df6ca5647d95d2f57d --- /dev/null +++ b/anolis/configs/L0-MANDATORY/sw_64-8a/CONFIG_MEMORY_FAILURE @@ -0,0 +1 @@ +# CONFIG_MEMORY_FAILURE is not set diff --git a/anolis/configs/L1-RECOMMEND/arm64/CONFIG_CMA_SIZE_MBYTES b/anolis/configs/L1-RECOMMEND/arm64/CONFIG_CMA_SIZE_MBYTES deleted file mode 100644 index cc0c4e7eefc572fb21121d465ba5078dda7a207b..0000000000000000000000000000000000000000 --- a/anolis/configs/L1-RECOMMEND/arm64/CONFIG_CMA_SIZE_MBYTES +++ /dev/null @@ -1 +0,0 @@ -CONFIG_CMA_SIZE_MBYTES=64 diff --git a/anolis/configs/L1-RECOMMEND/arm64/CONFIG_HISI_PMU b/anolis/configs/L1-RECOMMEND/arm64/CONFIG_HISI_PMU index f9809111ad969b31ed6cf1713b0e9effeb829c2a..7bd6395da722e6e0f7efc00cab91d2562cf91315 100644 --- a/anolis/configs/L1-RECOMMEND/arm64/CONFIG_HISI_PMU +++ b/anolis/configs/L1-RECOMMEND/arm64/CONFIG_HISI_PMU @@ -1 +1 @@ -CONFIG_HISI_PMU=m +CONFIG_HISI_PMU=y diff --git a/anolis/configs/L1-RECOMMEND/x86/CONFIG_CMA_SIZE_MBYTES b/anolis/configs/L1-RECOMMEND/default/CONFIG_CMA_SIZE_MBYTES similarity index 100% rename from anolis/configs/L1-RECOMMEND/x86/CONFIG_CMA_SIZE_MBYTES rename to anolis/configs/L1-RECOMMEND/default/CONFIG_CMA_SIZE_MBYTES diff --git a/anolis/configs/L1-RECOMMEND/arm64/CONFIG_SCHED_CLUSTER b/anolis/configs/L1-RECOMMEND/default/CONFIG_SCHED_CLUSTER similarity index 100% rename from anolis/configs/L1-RECOMMEND/arm64/CONFIG_SCHED_CLUSTER rename to anolis/configs/L1-RECOMMEND/default/CONFIG_SCHED_CLUSTER diff --git a/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_DMA_RESTRICTED_POOL b/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_DMA_RESTRICTED_POOL new file mode 100644 index 0000000000000000000000000000000000000000..2f680768ebef136e6f821c00e8f2c8278fdf6692 --- /dev/null +++ b/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_DMA_RESTRICTED_POOL @@ -0,0 +1 @@ +# CONFIG_DMA_RESTRICTED_POOL is not set diff --git a/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_INFINIBAND_XSC b/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_INFINIBAND_XSC index 734ca6c9dfe0942cb749ff8ed8cd823aca34a51e..a4a1366d8093333847bf2c4a895dab88d56a4d8a 100644 --- a/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_INFINIBAND_XSC +++ b/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_INFINIBAND_XSC @@ -1 +1 @@ -CONFIG_INFINIBAND_XSC=m +# CONFIG_INFINIBAND_XSC is not set diff --git a/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_KUNPENG_MPAM b/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_KUNPENG_MPAM new file mode 100644 index 0000000000000000000000000000000000000000..45f8ea309e9d4a0b8adbb59020b2283adb18e6c4 --- /dev/null +++ b/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_KUNPENG_MPAM @@ -0,0 +1 @@ +# CONFIG_KUNPENG_MPAM is not set diff --git a/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_NET_VENDOR_YUNSILICON b/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_NET_VENDOR_YUNSILICON index f6aca2a290f7ffebb4f6c127a967acca60ded17d..79e2504adcc0afdc1d4e9a60a4b175a0dd48afc2 100644 --- a/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_NET_VENDOR_YUNSILICON +++ b/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_NET_VENDOR_YUNSILICON @@ -1 +1 @@ -CONFIG_NET_VENDOR_YUNSILICON=y +# CONFIG_NET_VENDOR_YUNSILICON is not set diff --git a/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_PREEMPTIRQ_TRACEPOINTS b/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_PREEMPTIRQ_TRACEPOINTS new file mode 100644 index 0000000000000000000000000000000000000000..cea44236941c39d19bbe4809cf0b0dc18286e1f4 --- /dev/null +++ b/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_PREEMPTIRQ_TRACEPOINTS @@ -0,0 +1 @@ +CONFIG_PREEMPTIRQ_TRACEPOINTS=y diff --git a/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_SCHED_CLUSTER b/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_SCHED_CLUSTER new file mode 100644 index 0000000000000000000000000000000000000000..304f56bd92f4a46283edd66c092f0575884664d0 --- /dev/null +++ b/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_SCHED_CLUSTER @@ -0,0 +1 @@ +# CONFIG_SCHED_CLUSTER is not set diff --git a/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP b/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP new file mode 100644 index 0000000000000000000000000000000000000000..79fb521f3632205462c82f15c78f27e209a508f7 --- /dev/null +++ b/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP @@ -0,0 +1 @@ +CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y diff --git a/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_YUNSILICON_XSC_ETH b/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_YUNSILICON_XSC_ETH index 343284c7c0de1b02f4b8edc32c91ee4abf953064..d62074efd1dcdd316773ff0591ea0fc7b353df6f 100644 --- a/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_YUNSILICON_XSC_ETH +++ b/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_YUNSILICON_XSC_ETH @@ -1 +1 @@ -CONFIG_YUNSILICON_XSC_ETH=m +# CONFIG_YUNSILICON_XSC_ETH is not set diff --git a/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_YUNSILICON_XSC_PCI b/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_YUNSILICON_XSC_PCI index 3a3fbc36325a9d9f48e10e158b197dc936d0924e..f152071410d24ff28eb1189f5d3da171571ffd56 100644 --- a/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_YUNSILICON_XSC_PCI +++ b/anolis/configs/L1-RECOMMEND/sw_64-6b/CONFIG_YUNSILICON_XSC_PCI @@ -1 +1 @@ -CONFIG_YUNSILICON_XSC_PCI=m +# CONFIG_YUNSILICON_XSC_PCI is not set diff --git a/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_DMA_RESTRICTED_POOL b/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_DMA_RESTRICTED_POOL new file mode 100644 index 0000000000000000000000000000000000000000..2f680768ebef136e6f821c00e8f2c8278fdf6692 --- /dev/null +++ b/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_DMA_RESTRICTED_POOL @@ -0,0 +1 @@ +# CONFIG_DMA_RESTRICTED_POOL is not set diff --git a/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_INFINIBAND_XSC b/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_INFINIBAND_XSC index ef75138910a7f01c7f80a4d5da8ee84416fe663e..a4a1366d8093333847bf2c4a895dab88d56a4d8a 100644 --- a/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_INFINIBAND_XSC +++ b/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_INFINIBAND_XSC @@ -1 +1 @@ -CONFIG_INFINIBAND_XSC=y +# CONFIG_INFINIBAND_XSC is not set diff --git a/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_KUNPENG_MPAM b/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_KUNPENG_MPAM new file mode 100644 index 0000000000000000000000000000000000000000..45f8ea309e9d4a0b8adbb59020b2283adb18e6c4 --- /dev/null +++ b/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_KUNPENG_MPAM @@ -0,0 +1 @@ +# CONFIG_KUNPENG_MPAM is not set diff --git a/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_NET_VENDOR_YUNSILICON b/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_NET_VENDOR_YUNSILICON index f6aca2a290f7ffebb4f6c127a967acca60ded17d..79e2504adcc0afdc1d4e9a60a4b175a0dd48afc2 100644 --- a/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_NET_VENDOR_YUNSILICON +++ b/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_NET_VENDOR_YUNSILICON @@ -1 +1 @@ -CONFIG_NET_VENDOR_YUNSILICON=y +# CONFIG_NET_VENDOR_YUNSILICON is not set diff --git a/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_PREEMPTIRQ_TRACEPOINTS b/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_PREEMPTIRQ_TRACEPOINTS new file mode 100644 index 0000000000000000000000000000000000000000..cea44236941c39d19bbe4809cf0b0dc18286e1f4 --- /dev/null +++ b/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_PREEMPTIRQ_TRACEPOINTS @@ -0,0 +1 @@ +CONFIG_PREEMPTIRQ_TRACEPOINTS=y diff --git a/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_SCHED_CLUSTER b/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_SCHED_CLUSTER new file mode 100644 index 0000000000000000000000000000000000000000..304f56bd92f4a46283edd66c092f0575884664d0 --- /dev/null +++ b/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_SCHED_CLUSTER @@ -0,0 +1 @@ +# CONFIG_SCHED_CLUSTER is not set diff --git a/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP b/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP new file mode 100644 index 0000000000000000000000000000000000000000..79fb521f3632205462c82f15c78f27e209a508f7 --- /dev/null +++ b/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP @@ -0,0 +1 @@ +CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y diff --git a/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_YUNSILICON_XSC_ETH b/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_YUNSILICON_XSC_ETH index 343284c7c0de1b02f4b8edc32c91ee4abf953064..d62074efd1dcdd316773ff0591ea0fc7b353df6f 100644 --- a/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_YUNSILICON_XSC_ETH +++ b/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_YUNSILICON_XSC_ETH @@ -1 +1 @@ -CONFIG_YUNSILICON_XSC_ETH=m +# CONFIG_YUNSILICON_XSC_ETH is not set diff --git a/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_YUNSILICON_XSC_PCI b/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_YUNSILICON_XSC_PCI index 3a3fbc36325a9d9f48e10e158b197dc936d0924e..f152071410d24ff28eb1189f5d3da171571ffd56 100644 --- a/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_YUNSILICON_XSC_PCI +++ b/anolis/configs/L1-RECOMMEND/sw_64-8a/CONFIG_YUNSILICON_XSC_PCI @@ -1 +1 @@ -CONFIG_YUNSILICON_XSC_PCI=m +# CONFIG_YUNSILICON_XSC_PCI is not set diff --git a/anolis/configs/L1-RECOMMEND/x86/CONFIG_SCHED_CLUSTER b/anolis/configs/L1-RECOMMEND/x86/CONFIG_SCHED_CLUSTER deleted file mode 100644 index 046feb27d436a582b7acecd47a891bbcf3a492b3..0000000000000000000000000000000000000000 --- a/anolis/configs/L1-RECOMMEND/x86/CONFIG_SCHED_CLUSTER +++ /dev/null @@ -1 +0,0 @@ -CONFIG_SCHED_CLUSTER=y diff --git a/anolis/configs/L1-RECOMMEND/x86/CONFIG_USING_FPU_IN_KERNEL_NONATOMIC b/anolis/configs/L1-RECOMMEND/x86/CONFIG_USING_FPU_IN_KERNEL_NONATOMIC new file mode 100644 index 0000000000000000000000000000000000000000..be2ff4beebe72433ebb68b9035f0d4b84b66a20d --- /dev/null +++ b/anolis/configs/L1-RECOMMEND/x86/CONFIG_USING_FPU_IN_KERNEL_NONATOMIC @@ -0,0 +1 @@ +CONFIG_USING_FPU_IN_KERNEL_NONATOMIC=y diff --git a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ARM64_TWED b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ARM64_TWED new file mode 100644 index 0000000000000000000000000000000000000000..d76a4f21fe1531afb46133afc5e3e087d7c50ee7 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_ARM64_TWED @@ -0,0 +1 @@ +CONFIG_ARM64_TWED=y diff --git a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_HISILICON_ERRATUM_162100602 b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_HISILICON_ERRATUM_162100602 new file mode 100644 index 0000000000000000000000000000000000000000..89fc5be248be7a07a667fe52619b908ebf7f6a22 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_HISILICON_ERRATUM_162100602 @@ -0,0 +1 @@ +CONFIG_HISILICON_ERRATUM_162100602=y diff --git a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_HISI_PCIE_PMU b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_HISI_PCIE_PMU new file mode 100644 index 0000000000000000000000000000000000000000..9216bb64856824838217760978c84f7b5b02e519 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_HISI_PCIE_PMU @@ -0,0 +1 @@ +# CONFIG_HISI_PCIE_PMU is not set diff --git a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_HISI_PMU b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_HISI_PMU deleted file mode 100644 index 7bd6395da722e6e0f7efc00cab91d2562cf91315..0000000000000000000000000000000000000000 --- a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_HISI_PMU +++ /dev/null @@ -1 +0,0 @@ -CONFIG_HISI_PMU=y diff --git a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_SCSI_HISI_SAS b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_SCSI_HISI_SAS deleted file mode 100644 index 778ee9b80f58c82ea3f6a06ce994cf127d4b126e..0000000000000000000000000000000000000000 --- a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_SCSI_HISI_SAS +++ /dev/null @@ -1 +0,0 @@ -CONFIG_SCSI_HISI_SAS=m diff --git a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_SCSI_HISI_SAS_PCI b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_SCSI_HISI_SAS_PCI deleted file mode 100644 index 601e09bf13805ef8125cc58ea5cde1b81de27584..0000000000000000000000000000000000000000 --- a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_SCSI_HISI_SAS_PCI +++ /dev/null @@ -1 +0,0 @@ -CONFIG_SCSI_HISI_SAS_PCI=m diff --git a/anolis/configs/L2-OPTIONAL/arm64/CONFIG_SPI_HISI_KUNPENG b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_SPI_HISI_KUNPENG new file mode 100644 index 0000000000000000000000000000000000000000..cb775f3737e5292c4eda33262d00274358d15966 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/arm64/CONFIG_SPI_HISI_KUNPENG @@ -0,0 +1 @@ +# CONFIG_SPI_HISI_KUNPENG is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE new file mode 100644 index 0000000000000000000000000000000000000000..77d6d6ad95fdf7ba267991810bb7892ca65b78c8 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE @@ -0,0 +1 @@ +# CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_IRQSOFF_TRACER b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_IRQSOFF_TRACER new file mode 100644 index 0000000000000000000000000000000000000000..d2d5198bc1d6d507cae61a098b723e91fb0fa1ab --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_IRQSOFF_TRACER @@ -0,0 +1 @@ +CONFIG_IRQSOFF_TRACER=y diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_PREEMPTIRQ_DELAY_TEST b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_PREEMPTIRQ_DELAY_TEST new file mode 100644 index 0000000000000000000000000000000000000000..7e4cadf92dd12e5b5efd8483b6fd1d101df939d6 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_PREEMPTIRQ_DELAY_TEST @@ -0,0 +1 @@ +CONFIG_PREEMPTIRQ_DELAY_TEST=m diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_RING_BUFFER_ALLOW_SWAP b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_RING_BUFFER_ALLOW_SWAP new file mode 100644 index 0000000000000000000000000000000000000000..b485248533c8d30af248e59fe7dbbd952e3dd8fb --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_RING_BUFFER_ALLOW_SWAP @@ -0,0 +1 @@ +CONFIG_RING_BUFFER_ALLOW_SWAP=y diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLES b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLES new file mode 100644 index 0000000000000000000000000000000000000000..b41b215110e3c0897c87bbb51404b12b58751297 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLES @@ -0,0 +1 @@ +CONFIG_SAMPLES=y diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_AUXDISPLAY b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_AUXDISPLAY new file mode 100644 index 0000000000000000000000000000000000000000..ec0412f04995f1d58b36810e87d2e757445594e4 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_AUXDISPLAY @@ -0,0 +1 @@ +# CONFIG_SAMPLE_AUXDISPLAY is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_CONFIGFS b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_CONFIGFS new file mode 100644 index 0000000000000000000000000000000000000000..26f0903ebb33cf8d956e97789f0ee93128fec5f9 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_CONFIGFS @@ -0,0 +1 @@ +# CONFIG_SAMPLE_CONFIGFS is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_FTRACE_OPS b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_FTRACE_OPS new file mode 100644 index 0000000000000000000000000000000000000000..72f8993996c4f98bb541401578ce90e2dd0ea53f --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_FTRACE_OPS @@ -0,0 +1 @@ +# CONFIG_SAMPLE_FTRACE_OPS is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_KDB b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_KDB new file mode 100644 index 0000000000000000000000000000000000000000..16bea232b3d6238c2b7f695a6b23ba4b64300012 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_KDB @@ -0,0 +1 @@ +# CONFIG_SAMPLE_KDB is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_KFIFO b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_KFIFO new file mode 100644 index 0000000000000000000000000000000000000000..2d119b5ce1f4f0c6da116b1a78d90b54cc58c6dc --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_KFIFO @@ -0,0 +1 @@ +# CONFIG_SAMPLE_KFIFO is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_KOBJECT b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_KOBJECT new file mode 100644 index 0000000000000000000000000000000000000000..bd43f586d4ae127c5e0675c043694774360ad4a3 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_KOBJECT @@ -0,0 +1 @@ +# CONFIG_SAMPLE_KOBJECT is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_KPROBES b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_KPROBES new file mode 100644 index 0000000000000000000000000000000000000000..68db752ea61bfbf9ad91eb8595e84384f27c4da6 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_KPROBES @@ -0,0 +1 @@ +# CONFIG_SAMPLE_KPROBES is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_LIVEPATCH b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_LIVEPATCH new file mode 100644 index 0000000000000000000000000000000000000000..f1647bb8608d620d4b6c125a14cfa6aafa11a0b4 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_LIVEPATCH @@ -0,0 +1 @@ +# CONFIG_SAMPLE_LIVEPATCH is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_TRACE_ARRAY b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_TRACE_ARRAY new file mode 100644 index 0000000000000000000000000000000000000000..411e84cdca79bd34a9cb2b33b98ed314e76dea42 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_TRACE_ARRAY @@ -0,0 +1 @@ +# CONFIG_SAMPLE_TRACE_ARRAY is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_TRACE_EVENTS b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_TRACE_EVENTS new file mode 100644 index 0000000000000000000000000000000000000000..a7fde076012ff46fed75bb586f6a5a6b085fd3ad --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_TRACE_EVENTS @@ -0,0 +1 @@ +# CONFIG_SAMPLE_TRACE_EVENTS is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_TRACE_PRINTK b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_TRACE_PRINTK new file mode 100644 index 0000000000000000000000000000000000000000..a28e552a78b1dffdd247c0738d99d483a85aaca1 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_TRACE_PRINTK @@ -0,0 +1 @@ +# CONFIG_SAMPLE_TRACE_PRINTK is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_VFIO_MDEV_MDPY_FB b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_VFIO_MDEV_MDPY_FB new file mode 100644 index 0000000000000000000000000000000000000000..a20351556d7b9e69411bb144955588d02718e016 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_VFIO_MDEV_MDPY_FB @@ -0,0 +1 @@ +# CONFIG_SAMPLE_VFIO_MDEV_MDPY_FB is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_WATCHDOG b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_WATCHDOG new file mode 100644 index 0000000000000000000000000000000000000000..d96fd4b642356daef46297cdf53d29037a21a9f2 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_SAMPLE_WATCHDOG @@ -0,0 +1 @@ +# CONFIG_SAMPLE_WATCHDOG is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_TRACE_IRQFLAGS b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_TRACE_IRQFLAGS new file mode 100644 index 0000000000000000000000000000000000000000..21bc5bbe018dc4cb72c57e49deb48192d12b1c64 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_TRACE_IRQFLAGS @@ -0,0 +1 @@ +CONFIG_TRACE_IRQFLAGS=y diff --git a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_VFIO_IOMMU_TYPE1 b/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_VFIO_IOMMU_TYPE1 deleted file mode 100644 index 9f1df8c24020f6f9164ebf58479bccc71709c861..0000000000000000000000000000000000000000 --- a/anolis/configs/L2-OPTIONAL/sw_64-6b/CONFIG_VFIO_IOMMU_TYPE1 +++ /dev/null @@ -1 +0,0 @@ -# CONFIG_VFIO_IOMMU_TYPE1 is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE new file mode 100644 index 0000000000000000000000000000000000000000..77d6d6ad95fdf7ba267991810bb7892ca65b78c8 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE @@ -0,0 +1 @@ +# CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_IRQSOFF_TRACER b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_IRQSOFF_TRACER new file mode 100644 index 0000000000000000000000000000000000000000..d2d5198bc1d6d507cae61a098b723e91fb0fa1ab --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_IRQSOFF_TRACER @@ -0,0 +1 @@ +CONFIG_IRQSOFF_TRACER=y diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_PREEMPTIRQ_DELAY_TEST b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_PREEMPTIRQ_DELAY_TEST new file mode 100644 index 0000000000000000000000000000000000000000..7e4cadf92dd12e5b5efd8483b6fd1d101df939d6 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_PREEMPTIRQ_DELAY_TEST @@ -0,0 +1 @@ +CONFIG_PREEMPTIRQ_DELAY_TEST=m diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_RING_BUFFER_ALLOW_SWAP b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_RING_BUFFER_ALLOW_SWAP new file mode 100644 index 0000000000000000000000000000000000000000..b485248533c8d30af248e59fe7dbbd952e3dd8fb --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_RING_BUFFER_ALLOW_SWAP @@ -0,0 +1 @@ +CONFIG_RING_BUFFER_ALLOW_SWAP=y diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLES b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLES new file mode 100644 index 0000000000000000000000000000000000000000..b41b215110e3c0897c87bbb51404b12b58751297 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLES @@ -0,0 +1 @@ +CONFIG_SAMPLES=y diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_AUXDISPLAY b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_AUXDISPLAY new file mode 100644 index 0000000000000000000000000000000000000000..ec0412f04995f1d58b36810e87d2e757445594e4 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_AUXDISPLAY @@ -0,0 +1 @@ +# CONFIG_SAMPLE_AUXDISPLAY is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_CONFIGFS b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_CONFIGFS new file mode 100644 index 0000000000000000000000000000000000000000..26f0903ebb33cf8d956e97789f0ee93128fec5f9 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_CONFIGFS @@ -0,0 +1 @@ +# CONFIG_SAMPLE_CONFIGFS is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_FTRACE_OPS b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_FTRACE_OPS new file mode 100644 index 0000000000000000000000000000000000000000..72f8993996c4f98bb541401578ce90e2dd0ea53f --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_FTRACE_OPS @@ -0,0 +1 @@ +# CONFIG_SAMPLE_FTRACE_OPS is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_KDB b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_KDB new file mode 100644 index 0000000000000000000000000000000000000000..16bea232b3d6238c2b7f695a6b23ba4b64300012 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_KDB @@ -0,0 +1 @@ +# CONFIG_SAMPLE_KDB is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_KFIFO b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_KFIFO new file mode 100644 index 0000000000000000000000000000000000000000..2d119b5ce1f4f0c6da116b1a78d90b54cc58c6dc --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_KFIFO @@ -0,0 +1 @@ +# CONFIG_SAMPLE_KFIFO is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_KOBJECT b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_KOBJECT new file mode 100644 index 0000000000000000000000000000000000000000..bd43f586d4ae127c5e0675c043694774360ad4a3 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_KOBJECT @@ -0,0 +1 @@ +# CONFIG_SAMPLE_KOBJECT is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_KPROBES b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_KPROBES new file mode 100644 index 0000000000000000000000000000000000000000..68db752ea61bfbf9ad91eb8595e84384f27c4da6 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_KPROBES @@ -0,0 +1 @@ +# CONFIG_SAMPLE_KPROBES is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_LIVEPATCH b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_LIVEPATCH new file mode 100644 index 0000000000000000000000000000000000000000..f1647bb8608d620d4b6c125a14cfa6aafa11a0b4 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_LIVEPATCH @@ -0,0 +1 @@ +# CONFIG_SAMPLE_LIVEPATCH is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_TRACE_ARRAY b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_TRACE_ARRAY new file mode 100644 index 0000000000000000000000000000000000000000..411e84cdca79bd34a9cb2b33b98ed314e76dea42 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_TRACE_ARRAY @@ -0,0 +1 @@ +# CONFIG_SAMPLE_TRACE_ARRAY is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_TRACE_EVENTS b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_TRACE_EVENTS new file mode 100644 index 0000000000000000000000000000000000000000..a7fde076012ff46fed75bb586f6a5a6b085fd3ad --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_TRACE_EVENTS @@ -0,0 +1 @@ +# CONFIG_SAMPLE_TRACE_EVENTS is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_TRACE_PRINTK b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_TRACE_PRINTK new file mode 100644 index 0000000000000000000000000000000000000000..a28e552a78b1dffdd247c0738d99d483a85aaca1 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_TRACE_PRINTK @@ -0,0 +1 @@ +# CONFIG_SAMPLE_TRACE_PRINTK is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_VFIO_MDEV_MDPY_FB b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_VFIO_MDEV_MDPY_FB new file mode 100644 index 0000000000000000000000000000000000000000..a20351556d7b9e69411bb144955588d02718e016 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_VFIO_MDEV_MDPY_FB @@ -0,0 +1 @@ +# CONFIG_SAMPLE_VFIO_MDEV_MDPY_FB is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_WATCHDOG b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_WATCHDOG new file mode 100644 index 0000000000000000000000000000000000000000..d96fd4b642356daef46297cdf53d29037a21a9f2 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_SAMPLE_WATCHDOG @@ -0,0 +1 @@ +# CONFIG_SAMPLE_WATCHDOG is not set diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_TRACE_IRQFLAGS b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_TRACE_IRQFLAGS new file mode 100644 index 0000000000000000000000000000000000000000..21bc5bbe018dc4cb72c57e49deb48192d12b1c64 --- /dev/null +++ b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_TRACE_IRQFLAGS @@ -0,0 +1 @@ +CONFIG_TRACE_IRQFLAGS=y diff --git a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_VFIO_IOMMU_TYPE1 b/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_VFIO_IOMMU_TYPE1 deleted file mode 100644 index 9f1df8c24020f6f9164ebf58479bccc71709c861..0000000000000000000000000000000000000000 --- a/anolis/configs/L2-OPTIONAL/sw_64-8a/CONFIG_VFIO_IOMMU_TYPE1 +++ /dev/null @@ -1 +0,0 @@ -# CONFIG_VFIO_IOMMU_TYPE1 is not set diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index f5ed19fae6de977adf9f8d4d1053374a644d819c..f52bea4d7fc4ce91401e7c86856416210effb1e7 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -130,7 +130,6 @@ config SW64 select THREAD_INFO_IN_TASK select GENERIC_IRQ_EFFECTIVE_AFF_MASK select IOMMU_DMA if IOMMU_SUPPORT - select ARCH_SUPPORTS_MEMORY_FAILURE select HAVE_CONTEXT_TRACKING select HAVE_NMI select HAVE_DMA_CONTIGUOUS diff --git a/arch/sw_64/include/asm/ftrace.h b/arch/sw_64/include/asm/ftrace.h index d211b8ce1d1832b479e298dcc3f7d448fc287fde..54aa68c86b6090b49fcf2e76289081ee9c561bde 100644 --- a/arch/sw_64/include/asm/ftrace.h +++ b/arch/sw_64/include/asm/ftrace.h @@ -12,10 +12,17 @@ #define _ASM_SW64_FTRACE_H #define MCOUNT_ADDR ((unsigned long)_mcount) +#ifdef CONFIG_DYNAMIC_FTRACE +#define MCOUNT_INSN_SIZE 16 /* 4 * SW64_INSN_SIZE */ +#else #define MCOUNT_INSN_SIZE 20 /* 5 * SW64_INSN_SIZE */ +#endif + #define MCOUNT_LDGP_SIZE 8 /* 2 * SW64_INSN_SIZE */ +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS #define ARCH_SUPPORTS_FTRACE_OPS 1 +#endif #ifndef __ASSEMBLY__ #include diff --git a/arch/sw_64/include/asm/ptrace.h b/arch/sw_64/include/asm/ptrace.h index 1f9371722488615541d9d04d645ff7f538be839b..3aa5ddf11217f126eb5a71df1b1ed307f0b4cdf1 100644 --- a/arch/sw_64/include/asm/ptrace.h +++ b/arch/sw_64/include/asm/ptrace.h @@ -6,7 +6,7 @@ #include #include -#define NO_SYSCALL (-1) +#define NO_SYSCALL (-0xdead) #ifdef __KERNEL__ #ifndef __ASSEMBLY__ diff --git a/arch/sw_64/kernel/entry-ftrace.S b/arch/sw_64/kernel/entry-ftrace.S index 73e8e043fc9d14fbbaa50bb164fcc4326329001b..6878b773968dc6bbe719eb00cd9ff750dc1d65b5 100644 --- a/arch/sw_64/kernel/entry-ftrace.S +++ b/arch/sw_64/kernel/entry-ftrace.S @@ -206,6 +206,7 @@ END(return_to_handler) #endif + .pushsection ".entry.text", "ax" #ifdef CONFIG_DYNAMIC_FTRACE .global _mcount .ent _mcount @@ -283,8 +284,8 @@ skip_ftrace: 3: mcount_end ret $31, ($28), 1 .end _mcount - #endif /* CONFIG_DYNAMIC_FTRACE */ + .popsection #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS .global ftrace_regs_caller diff --git a/arch/sw_64/kernel/entry_c3.S b/arch/sw_64/kernel/entry_c3.S index df8bff5d09b37c5b856a752ed153e56781eb76ff..fe1e9a8635a203f2c94185b703e8cef1e8f3425c 100644 --- a/arch/sw_64/kernel/entry_c3.S +++ b/arch/sw_64/kernel/entry_c3.S @@ -130,6 +130,7 @@ * Non-syscall kernel entry points. */ + .pushsection ".entry.text", "ax" .align 4 .globl entInt .ent entInt @@ -311,3 +312,4 @@ ret_from_kernel_thread: call $26, ($9) br ret_to_user .end ret_from_kernel_thread + .popsection diff --git a/arch/sw_64/kernel/entry_c4.S b/arch/sw_64/kernel/entry_c4.S index 219016ef05b20f57bf60e83b0015ff7434bd2e2d..3a8912fb6287807be4afdf2ba4e85e407c0e1b1d 100644 --- a/arch/sw_64/kernel/entry_c4.S +++ b/arch/sw_64/kernel/entry_c4.S @@ -263,6 +263,7 @@ * Non-syscall kernel entry points. */ + .pushsection ".entry.text", "ax" .align 4 .globl entNMI .ent entNMI @@ -489,3 +490,4 @@ ret_from_kernel_thread: call $26, ($9) br ret_to_user .end ret_from_kernel_thread + .popsection diff --git a/arch/sw_64/kernel/ftrace.c b/arch/sw_64/kernel/ftrace.c index fb25ffe3dbdaf4f26bf4389e63d37fd1aaaa754b..511760ac18c2ae5803e43d9fa7daa0e6ce074f7e 100644 --- a/arch/sw_64/kernel/ftrace.c +++ b/arch/sw_64/kernel/ftrace.c @@ -72,13 +72,17 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) else offset = TI_FTRACE_REGS_ADDR; - insn[0] = SW64_NOP; /* ldl r28,(ftrace_addr_offset)(r8) */ - insn[1] = (0x23U << 26) | (28U << 21) | (8U << 16) | offset; - insn[2] = SW64_CALL(R28, R28, 0); + insn[0] = (0x23U << 26) | (28U << 21) | (8U << 16) | offset; + insn[1] = SW64_CALL(R28, R28, 0); + insn[2] = SW64_NOP; - /* replace the 3 mcount instructions at once */ - return copy_to_kernel_nofault((void *)pc, insn, 3 * SW64_INSN_SIZE); + *((u32 *)pc) = insn[0]; + mb(); + *((u32 *)(pc + 4)) = insn[1]; + *((u32 *)(pc + 8)) = insn[2]; + + return 0; } /* @@ -90,7 +94,12 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long pc = rec->ip + MCOUNT_LDGP_SIZE; unsigned int insn[3] = {SW64_NOP, SW64_NOP, SW64_NOP}; - return copy_to_kernel_nofault((void *)pc, insn, 3 * SW64_INSN_SIZE); + *((u32 *)(pc + 8)) = insn[2]; + *((u32 *)(pc + 4)) = insn[1]; + mb(); + *((u32 *)pc) = insn[0]; + + return 0; } void arch_ftrace_update_code(int command) @@ -115,6 +124,19 @@ int __init ftrace_dyn_arch_init(void) int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { + unsigned int insn[1]; + unsigned long pc = rec->ip + MCOUNT_LDGP_SIZE; + unsigned long offset; + + if (addr == FTRACE_ADDR) + offset = TI_FTRACE_ADDR; + else + offset = TI_FTRACE_REGS_ADDR; + + /* ldl r28,(ftrace_addr_offset)(r8) */ + insn[0] = (0x23U << 26) | (28U << 21) | (8U << 16) | offset; + copy_to_kernel_nofault((void *)pc, insn, SW64_INSN_SIZE); + return 0; } #endif diff --git a/arch/sw_64/kernel/idle.c b/arch/sw_64/kernel/idle.c index eb4738dedac5fec49f7defb0ed6ed7492d702125..03db27bc6b388e59f0ca7079113b865a192cd7b4 100644 --- a/arch/sw_64/kernel/idle.c +++ b/arch/sw_64/kernel/idle.c @@ -9,7 +9,7 @@ #include #include -void arch_cpu_idle(void) +void noinstr arch_cpu_idle(void) { local_irq_enable(); cpu_relax(); diff --git a/arch/sw_64/kernel/kprobes/kprobes.c b/arch/sw_64/kernel/kprobes/kprobes.c index 3bacfb3b3a4c58592e5e030923f2daf8d0d8f8a3..839134fee52f0c20c7ccf6f38f5626e16df5b287 100644 --- a/arch/sw_64/kernel/kprobes/kprobes.c +++ b/arch/sw_64/kernel/kprobes/kprobes.c @@ -274,6 +274,19 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, regs->regs[26] = (unsigned long)kretprobe_trampoline; } +/* + * Provide a blacklist of symbols identifying ranges which cannot be kprobed. + * This blacklist is exposed to userspace via debugfs (kprobes/blacklist). + */ +int __init arch_populate_kprobe_blacklist(void) +{ + int ret; + + ret = kprobe_add_area_blacklist((unsigned long)__entry_text_start, + (unsigned long)__entry_text_end); + return ret; +} + /* * Called when the probe at kretprobe trampoline is hit */ diff --git a/arch/sw_64/kernel/process.c b/arch/sw_64/kernel/process.c index 60b899c4ace604850085e842355ffdc82867302a..eea3bb4fcae4740301229103903ca05afb42d227 100644 --- a/arch/sw_64/kernel/process.c +++ b/arch/sw_64/kernel/process.c @@ -21,6 +21,7 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { regs->pc = pc; + regs->regs[27] = pc; regs->ps = 8; regs->regs[30] = sp; } diff --git a/arch/sw_64/kernel/stacktrace.c b/arch/sw_64/kernel/stacktrace.c index 1a67a8bb854b09aec50587f6a36cb3d106abc1ce..101843d3a92d22d61747fa77414c9311a621bedf 100644 --- a/arch/sw_64/kernel/stacktrace.c +++ b/arch/sw_64/kernel/stacktrace.c @@ -61,7 +61,7 @@ int unwind_frame(struct task_struct *tsk, struct stackframe *frame) } EXPORT_SYMBOL_GPL(unwind_frame); -void walk_stackframe(struct task_struct *tsk, struct pt_regs *regs, +void noinstr walk_stackframe(struct task_struct *tsk, struct pt_regs *regs, int (*fn)(unsigned long, void *), void *data) { unsigned long pc, fp; @@ -107,7 +107,7 @@ void walk_stackframe(struct task_struct *tsk, struct pt_regs *regs, EXPORT_SYMBOL_GPL(walk_stackframe); #else /* !CONFIG_FRAME_POINTER */ -void walk_stackframe(struct task_struct *tsk, struct pt_regs *regs, +void noinstr walk_stackframe(struct task_struct *tsk, struct pt_regs *regs, int (*fn)(unsigned long, void *), void *data) { unsigned long *ksp; diff --git a/arch/sw_64/kernel/traps.c b/arch/sw_64/kernel/traps.c index 2883ca92a4239c39b5ce4832c81d83363fea8f0d..7df8e8c4491a55c689d0edaae1422cbde2f5dd17 100644 --- a/arch/sw_64/kernel/traps.c +++ b/arch/sw_64/kernel/traps.c @@ -148,7 +148,7 @@ extern long sw64_fp_emul(unsigned long pc); #endif asmlinkage void -do_entArith(unsigned long exc_sum, unsigned long write_mask, +noinstr do_entArith(unsigned long exc_sum, unsigned long write_mask, struct pt_regs *regs) { long si_code = FPE_FLTINV; @@ -250,7 +250,7 @@ static int try_fix_rd_f(unsigned int inst, struct pt_regs *regs) * do something necessary to handle it correctly. */ asmlinkage void -do_entIF(unsigned long inst_type, unsigned long va, struct pt_regs *regs) +noinstr do_entIF(unsigned long inst_type, unsigned long va, struct pt_regs *regs) { int signo, code; unsigned int inst, type; @@ -397,7 +397,7 @@ do_entIF(unsigned long inst_type, unsigned long va, struct pt_regs *regs) 1L << 0x3 | 1L << 0x9) /* ldl_a stl_a */ asmlinkage void -do_entUna(void *va, unsigned long opcode, unsigned long reg, +noinstr do_entUna(void *va, unsigned long opcode, unsigned long reg, struct pt_regs *regs) { long error, disp; @@ -1348,7 +1348,7 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg, * However, we need to deal with stt/ldt and sts/lds only. */ asmlinkage void -do_entUnaUser(void __user *va, unsigned long opcode, +noinstr do_entUnaUser(void __user *va, unsigned long opcode, unsigned long reg, struct pt_regs *regs) { #ifdef CONFIG_UNA_PRINT @@ -2508,7 +2508,7 @@ do_entUnaUser(void __user *va, unsigned long opcode, force_sig_fault(SIGBUS, BUS_ADRALN, va); } -asmlinkage void do_entSys(struct pt_regs *regs) +asmlinkage void noinstr do_entSys(struct pt_regs *regs) { long ret = -ENOSYS; unsigned long nr; diff --git a/arch/sw_64/kernel/vmlinux.lds.S b/arch/sw_64/kernel/vmlinux.lds.S index acdcf3c1ab1f32814c7b5ed20f859253c51232b2..fafcf93a5a3288e2c502a466f482afe86b02b48b 100644 --- a/arch/sw_64/kernel/vmlinux.lds.S +++ b/arch/sw_64/kernel/vmlinux.lds.S @@ -29,6 +29,7 @@ SECTIONS IRQENTRY_TEXT SOFTIRQENTRY_TEXT KPROBES_TEXT + ENTRY_TEXT *(.fixup) *(.gnu.warning) } :text diff --git a/arch/sw_64/mm/fault.c b/arch/sw_64/mm/fault.c index 5c9a42c77fb29776aad083adde73e4e0b2db3a00..850ebdd6ab13243f10e9c1a61afb2057c28fc4bb 100644 --- a/arch/sw_64/mm/fault.c +++ b/arch/sw_64/mm/fault.c @@ -133,7 +133,7 @@ unsigned long show_va_to_pa(struct mm_struct *mm, unsigned long addr) extern int do_match(unsigned long address, unsigned long mmcsr, long cause, struct pt_regs *regs); asmlinkage void notrace -do_page_fault(unsigned long address, unsigned long mmcsr, +noinstr do_page_fault(unsigned long address, unsigned long mmcsr, long cause, struct pt_regs *regs) { struct vm_area_struct *vma; diff --git a/arch/sw_64/net/bpf_jit.h b/arch/sw_64/net/bpf_jit.h index 929036d8ea6b10daec13166c1e87f63165d99f1a..bb6399d61e3842b7c210e12778fdac72b9ad8ec2 100644 --- a/arch/sw_64/net/bpf_jit.h +++ b/arch/sw_64/net/bpf_jit.h @@ -75,7 +75,11 @@ #define SW64_BPF_FUNC_ALU_ADDL 0x08 #define SW64_BPF_FUNC_ALU_SUBL 0x09 #define SW64_BPF_FUNC_ALU_MULW 0x10 +#define SW64_BPF_FUNC_ALU_UDIVW 0x12 +#define SW64_BPF_FUNC_ALU_UREMW 0x14 #define SW64_BPF_FUNC_ALU_MULL 0x18 +#define SW64_BPF_FUNC_ALU_UDIVL 0x1b +#define SW64_BPF_FUNC_ALU_UREML 0x1d #define SW64_BPF_FUNC_ALU_CMPEQ 0x28 #define SW64_BPF_FUNC_ALU_CMPLT 0x29 #define SW64_BPF_FUNC_ALU_CMPLE 0x2A @@ -90,6 +94,9 @@ #define SW64_BPF_FUNC_ALU_SLL 0x48 #define SW64_BPF_FUNC_ALU_SRL 0x49 #define SW64_BPF_FUNC_ALU_SRA 0x4A +#define SW64_BPF_FUNC_ALU_REVBH 0x5b +#define SW64_BPF_FUNC_ALU_REVBW 0x5c +#define SW64_BPF_FUNC_ALU_REVBL 0x5d #define SW64_BPF_FUNC_ALU_ZAP 0x68 #define SW64_BPF_FUNC_ALU_ZAPNOT 0x69 #define SW64_BPF_FUNC_ALU_SEXTB 0x6A @@ -193,9 +200,30 @@ enum sw64_bpf_registers { #define SW64_BPF_MULW_REG(ra, rb, dst) \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ ra, rb, dst, SW64_BPF_FUNC_ALU_MULW) +#define SW64_BPF_UDIVW_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_UDIVW) +#define SW64_BPF_UREMW_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_UREMW) #define SW64_BPF_MULL_REG(ra, rb, dst) \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ ra, rb, dst, SW64_BPF_FUNC_ALU_MULL) +#define SW64_BPF_UDIVL_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_UDIVL) +#define SW64_BPF_UREML_REG(ra, rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + ra, rb, dst, SW64_BPF_FUNC_ALU_UREML) +#define SW64_BPF_REVBH_REG(rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + SW64_BPF_REG_ZR, rb, dst, SW64_BPF_FUNC_ALU_REVBH) +#define SW64_BPF_REVBW_REG(rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + SW64_BPF_REG_ZR, rb, dst, SW64_BPF_FUNC_ALU_REVBW) +#define SW64_BPF_REVBL_REG(rb, dst) \ + sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ + SW64_BPF_REG_ZR, rb, dst, SW64_BPF_FUNC_ALU_REVBL) #define SW64_BPF_ZAP_REG(ra, rb, dst) \ sw64_bpf_gen_format_simple_alu_reg(SW64_BPF_OPCODE_ALU_REG, \ ra, rb, dst, SW64_BPF_FUNC_ALU_ZAP) diff --git a/arch/sw_64/net/bpf_jit_comp.c b/arch/sw_64/net/bpf_jit_comp.c index 720ca1f0a3090a916500af50c33c3bd2b15e338a..bbbb96ca5a1f5502874419e2f5f4cc214d621feb 100644 --- a/arch/sw_64/net/bpf_jit_comp.c +++ b/arch/sw_64/net/bpf_jit_comp.c @@ -277,6 +277,7 @@ static void emit_sw64_ldu64(const int dst, const u64 imm, struct jit_ctx *ctx) put_tmp_reg(ctx); } +#if defined(CONFIG_SUBARCH_C3B) /* Do not change!!! See arch/sw_64/lib/divide.S for more detail */ #define REG(x) "$"str(x) #define str(x) #x @@ -323,6 +324,7 @@ static void emit_sw64_divmod(const int dst, const int src, struct jit_ctx *ctx, #undef DIVIDEND #undef DIVISOR #undef RESULT +#endif /* STX XADD: lock *(u32 *)(dst + off) += src */ static void emit_sw64_xadd32(const int src, int dst, s16 off, struct jit_ctx *ctx) @@ -331,7 +333,7 @@ static void emit_sw64_xadd32(const int src, int dst, s16 off, struct jit_ctx *ct int atomic_end; u8 tmp1 = get_tmp_reg(ctx); u8 tmp2 = get_tmp_reg(ctx); - u8 tmp3 = get_tmp_reg(ctx); + u8 __maybe_unused tmp3 = get_tmp_reg(ctx); if (off < -0x800 || off > 0x7ff) { emit(SW64_BPF_LDI(tmp1, dst, off), ctx); @@ -341,15 +343,19 @@ static void emit_sw64_xadd32(const int src, int dst, s16 off, struct jit_ctx *ct atomic_start = ctx->idx; emit(SW64_BPF_LLDW(tmp2, dst, off), ctx); +#if defined(CONFIG_SUBARCH_C3B) emit(SW64_BPF_LDI(tmp3, SW64_BPF_REG_ZR, 1), ctx); emit(SW64_BPF_WR_F(tmp3), ctx); +#endif emit(SW64_BPF_ADDW_REG(tmp2, src, tmp2), ctx); if (ctx->idx & 1) emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx); emit(SW64_BPF_LSTW(tmp2, dst, off), ctx); - emit(SW64_BPF_RD_F(tmp3), ctx); +#if defined(CONFIG_SUBARCH_C3B) + emit(SW64_BPF_RD_F(tmp2), ctx); +#endif atomic_end = ctx->idx; - emit(SW64_BPF_BEQ(tmp3, atomic_start - atomic_end - 1), ctx); + emit(SW64_BPF_BEQ(tmp2, atomic_start - atomic_end - 1), ctx); put_tmp_reg(ctx); put_tmp_reg(ctx); @@ -363,7 +369,7 @@ static void emit_sw64_xadd64(const int src, int dst, s16 off, struct jit_ctx *ct int atomic_end; u8 tmp1 = get_tmp_reg(ctx); u8 tmp2 = get_tmp_reg(ctx); - u8 tmp3 = get_tmp_reg(ctx); + u8 __maybe_unused tmp3 = get_tmp_reg(ctx); if (off < -0x800 || off > 0x7ff) { emit(SW64_BPF_LDI(tmp1, dst, off), ctx); @@ -373,21 +379,26 @@ static void emit_sw64_xadd64(const int src, int dst, s16 off, struct jit_ctx *ct atomic_start = ctx->idx; emit(SW64_BPF_LLDL(tmp2, dst, off), ctx); +#if defined(CONFIG_SUBARCH_C3B) emit(SW64_BPF_LDI(tmp3, SW64_BPF_REG_ZR, 1), ctx); emit(SW64_BPF_WR_F(tmp3), ctx); +#endif emit(SW64_BPF_ADDL_REG(tmp2, src, tmp2), ctx); if (ctx->idx & 1) emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx); emit(SW64_BPF_LSTL(tmp2, dst, off), ctx); - emit(SW64_BPF_RD_F(tmp3), ctx); +#if defined(CONFIG_SUBARCH_C3B) + emit(SW64_BPF_RD_F(tmp2), ctx); +#endif atomic_end = ctx->idx; - emit(SW64_BPF_BEQ(tmp3, atomic_start - atomic_end - 1), ctx); + emit(SW64_BPF_BEQ(tmp2, atomic_start - atomic_end - 1), ctx); put_tmp_reg(ctx); put_tmp_reg(ctx); put_tmp_reg(ctx); } +#if defined(CONFIG_SUBARCH_C3B) static void emit_sw64_htobe16(const int dst, struct jit_ctx *ctx) { u8 tmp = get_tmp_reg(ctx); @@ -464,6 +475,7 @@ static void emit_sw64_htobe64(const int dst, struct jit_ctx *ctx) put_tmp_reg(ctx); put_tmp_reg(ctx); } +#endif static void jit_fill_hole(void *area, unsigned int size) { @@ -707,16 +719,34 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, emit(SW64_BPF_MULL_REG(dst, src, dst), ctx); break; case BPF_ALU | BPF_DIV | BPF_X: +#if defined(CONFIG_SUBARCH_C3B) emit_sw64_divmod(dst, src, ctx, code); +#else + emit(SW64_BPF_UDIVW_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); +#endif break; case BPF_ALU64 | BPF_DIV | BPF_X: +#if defined(CONFIG_SUBARCH_C3B) emit_sw64_divmod(dst, src, ctx, code); +#else + emit(SW64_BPF_UDIVL_REG(dst, src, dst), ctx); +#endif break; case BPF_ALU | BPF_MOD | BPF_X: +#if defined(CONFIG_SUBARCH_C3B) emit_sw64_divmod(dst, src, ctx, code); +#else + emit(SW64_BPF_UREMW_REG(dst, src, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); +#endif break; case BPF_ALU64 | BPF_MOD | BPF_X: +#if defined(CONFIG_SUBARCH_C3B) emit_sw64_divmod(dst, src, ctx, code); +#else + emit(SW64_BPF_UREML_REG(dst, src, dst), ctx); +#endif break; case BPF_ALU | BPF_LSH | BPF_X: emit(SW64_BPF_SLL_REG(dst, src, dst), ctx); @@ -786,13 +816,26 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, case BPF_ALU | BPF_END | BPF_TO_BE: switch (imm) { case 16: +#if defined(CONFIG_SUBARCH_C3B) emit_sw64_htobe16(dst, ctx); +#else + emit(SW64_BPF_REVBH_REG(dst, dst), ctx); +#endif break; case 32: +#if defined(CONFIG_SUBARCH_C3B) emit_sw64_htobe32(dst, ctx); +#else + emit(SW64_BPF_REVBW_REG(dst, dst), ctx); + emit(SW64_BPF_ZAPNOT_IMM(dst, 0xf, dst), ctx); +#endif break; case 64: +#if defined(CONFIG_SUBARCH_C3B) emit_sw64_htobe64(dst, ctx); +#else + emit(SW64_BPF_REVBL_REG(dst, dst), ctx); +#endif break; default: pr_err("eBPF JIT %s[%d]: BPF_TO_BE unknown size\n", @@ -867,19 +910,37 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, break; case BPF_ALU | BPF_DIV | BPF_K: emit_sw64_ldu32(tmp1, imm, ctx); +#if defined(CONFIG_SUBARCH_C3B) emit_sw64_divmod(dst, tmp1, ctx, code); +#else + emit(SW64_BPF_UDIVW_REG(dst, tmp1, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); +#endif break; case BPF_ALU64 | BPF_DIV | BPF_K: emit_sw64_lds32(tmp1, imm, ctx); +#if defined(CONFIG_SUBARCH_C3B) emit_sw64_divmod(dst, tmp1, ctx, code); +#else + emit(SW64_BPF_UDIVL_REG(dst, tmp1, dst), ctx); +#endif break; case BPF_ALU | BPF_MOD | BPF_K: emit_sw64_ldu32(tmp1, imm, ctx); +#if defined(CONFIG_SUBARCH_C3B) emit_sw64_divmod(dst, tmp1, ctx, code); +#else + emit(SW64_BPF_UREMW_REG(dst, tmp1, dst), ctx); + emit(SW64_BPF_ZAP_IMM(dst, 0xf0, dst), ctx); +#endif break; case BPF_ALU64 | BPF_MOD | BPF_K: emit_sw64_lds32(tmp1, imm, ctx); +#if defined(CONFIG_SUBARCH_C3B) emit_sw64_divmod(dst, tmp1, ctx, code); +#else + emit(SW64_BPF_UREML_REG(dst, tmp1, dst), ctx); +#endif break; case BPF_ALU | BPF_LSH | BPF_K: if (imm >= 0 && imm <= U8_MAX) { diff --git a/arch/x86/Kconfig.fpu b/arch/x86/Kconfig.fpu index 5410feda1bc770e12e6de371cce05a846fcc249b..9f7507f31bd31f3759c8195fafec51c6c10802b8 100644 --- a/arch/x86/Kconfig.fpu +++ b/arch/x86/Kconfig.fpu @@ -8,7 +8,7 @@ if USING_FPU_IN_KERNEL_NONATOMIC choice prompt "X86_HYGON_LMC" depends on X86_64 && CPU_SUP_HYGON - default X86_HYGON_LMC_SSE2_ON + default X86_HYGON_LMC_AVX2_ON config X86_HYGON_LMC_SSE2_ON bool "Using sse2 nt copy for large memory copy" diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index c3734cdf0cb1d7178125176214afac8201c56bb7..b609da515d807ac145d4cf8dbf7c77f7c377f92d 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -296,6 +296,9 @@ static u64 amd_pmu_event_map(int hw_event) if (cpu_feature_enabled(X86_FEATURE_ZEN1)) return amd_zen1_perfmon_event_map[hw_event]; + if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + return amd_zen1_perfmon_event_map[hw_event]; + return amd_perfmon_event_map[hw_event]; } diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 4230a80a5a9de2264ba53a96e06aea1ea1709a95..2c940e6d7541e13fa2ca5e223908082f4d6ed237 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -83,6 +83,7 @@ bool amd_nb_has_feature(unsigned int feature); struct amd_northbridge *node_to_amd_nb(int node); bool hygon_f18h_m4h(void); +bool hygon_f18h_m10h(void); u16 hygon_nb_num(void); int get_df_id(struct pci_dev *misc, u8 *id); @@ -124,6 +125,7 @@ static inline bool amd_gart_present(void) #define amd_gart_present(x) false #define hygon_f18h_m4h false +#define hygon_f18h_m10h false #define hygon_nb_num(x) 0 #define get_df_id(x, y) NULL diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index a7d42e1ef6ba9e60a5989c2dc96adc1170d61182..2d6c9099cfe056a5db1d5463239d9447bbe1e987 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -322,6 +322,7 @@ #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ +#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */ #define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */ #define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */ #define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */ @@ -466,8 +467,8 @@ #define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ /* HYGON-defined CPU features, CPUID level 0x8c860000:0 (EDX), word 21 */ -#define X86_FEATURE_SM3 (21*32 + 1) /* SM3 instructions */ -#define X86_FEATURE_SM4 (21*32 + 2) /* SM4 instructions */ +#define X86_FEATURE_HYGON_CIS_SM3 (21*32 + 1) /* "sm3" SM3 instructions */ +#define X86_FEATURE_HYGON_CIS_SM4 (21*32 + 2) /* "sm4" SM4 instructions */ /* * Extended auxiliary flags: Linux defined - for features scattered in various diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h index 460201cbd8570d3d18bdb89c9396bff474533aab..a76c2ef0a037d6a0c2025547dfbbfbfba8e5e885 100644 --- a/arch/x86/include/asm/sgx.h +++ b/arch/x86/include/asm/sgx.h @@ -97,17 +97,36 @@ enum sgx_miscselect { * %SGX_ATTR_EINITTOKENKEY: Allow to use token signing key that is used to * sign cryptographic tokens that can be passed to * EINIT as an authorization to run an enclave. + * %SGX_ATTR_ASYNC_EXIT_NOTIFY: Allow enclaves to be notified after an + * asynchronous exit has occurred. */ enum sgx_attribute { - SGX_ATTR_INIT = BIT(0), - SGX_ATTR_DEBUG = BIT(1), - SGX_ATTR_MODE64BIT = BIT(2), - SGX_ATTR_PROVISIONKEY = BIT(4), - SGX_ATTR_EINITTOKENKEY = BIT(5), - SGX_ATTR_KSS = BIT(7), + SGX_ATTR_INIT = BIT(0), + SGX_ATTR_DEBUG = BIT(1), + SGX_ATTR_MODE64BIT = BIT(2), + /* BIT(3) is reserved */ + SGX_ATTR_PROVISIONKEY = BIT(4), + SGX_ATTR_EINITTOKENKEY = BIT(5), + /* BIT(6) is for CET */ + SGX_ATTR_KSS = BIT(7), + /* BIT(8) is reserved */ + /* BIT(9) is reserved */ + SGX_ATTR_ASYNC_EXIT_NOTIFY = BIT(10), }; -#define SGX_ATTR_RESERVED_MASK (BIT_ULL(3) | BIT_ULL(6) | GENMASK_ULL(63, 8)) +#define SGX_ATTR_RESERVED_MASK (BIT_ULL(3) | \ + BIT_ULL(6) | \ + BIT_ULL(8) | \ + BIT_ULL(9) | \ + GENMASK_ULL(63, 11)) + +#define SGX_ATTR_UNPRIV_MASK (SGX_ATTR_DEBUG | \ + SGX_ATTR_MODE64BIT | \ + SGX_ATTR_KSS | \ + SGX_ATTR_ASYNC_EXIT_NOTIFY) + +#define SGX_ATTR_PRIV_MASK (SGX_ATTR_PROVISIONKEY | \ + SGX_ATTR_EINITTOKENKEY) /** * struct sgx_secs - SGX Enclave Control Structure (SECS) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index acdf93a79ff379d4d60e4606e0216f03d7362690..8c8458b2fc884216e937bb025d3f491df761d042 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -229,6 +229,20 @@ bool hygon_f18h_m4h(void) } EXPORT_SYMBOL_GPL(hygon_f18h_m4h); +bool hygon_f18h_m10h(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + return false; + + if (boot_cpu_data.x86 == 0x18 && + boot_cpu_data.x86_model >= 0x10 && + boot_cpu_data.x86_model <= 0x1f) + return true; + + return false; +} +EXPORT_SYMBOL_GPL(hygon_f18h_m10h); + u16 hygon_nb_num(void) { return nb_num; @@ -252,8 +266,7 @@ static int get_df_register(struct pci_dev *misc, u8 func, int offset, u32 *value else device = PCI_DEVICE_ID_HYGON_18H_M04H_DF_F1; break; - case 0x6: - case 0x7: + case 0x6 ... 0x8: device = PCI_DEVICE_ID_HYGON_18H_M05H_DF_F1; break; default: @@ -261,8 +274,7 @@ static int get_df_register(struct pci_dev *misc, u8 func, int offset, u32 *value } } else if (func == 5) { switch (boot_cpu_data.x86_model) { - case 0x6: - case 0x7: + case 0x6 ... 0x8: device = PCI_DEVICE_ID_HYGON_18H_M06H_DF_F5; break; default: @@ -295,7 +307,8 @@ int get_df_id(struct pci_dev *misc, u8 *id) u32 value; int ret; - if (boot_cpu_data.x86_model == 0x6) { + if (boot_cpu_data.x86_model >= 0x6 && + boot_cpu_data.x86_model <= 0xf) { /* F5x180[19:16]: DF ID */ ret = get_df_register(misc, 5, 0x180, &value); *id = (value >> 16) & 0xf; @@ -433,8 +446,9 @@ static int northbridge_init_f18h_m4h(const struct pci_device_id *root_ids, amd_northbridges.nb = NULL; ret: - pr_err("Hygon Fam%xh Model%xh northbridge init failed(%d)!\n", - boot_cpu_data.x86, boot_cpu_data.x86_model, err); + if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) + pr_err("Hygon Fam%xh Model%xh northbridge init failed(%d)!\n", + boot_cpu_data.x86, boot_cpu_data.x86_model, err); return err; } diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index 7895271026219a2112a1f7804e0dbf1c81bf2636..152e00a750b340ce8dd12f612457c0b6056db12e 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -84,6 +84,7 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_SGX_LC, X86_FEATURE_SGX }, { X86_FEATURE_SGX1, X86_FEATURE_SGX }, { X86_FEATURE_SGX2, X86_FEATURE_SGX1 }, + { X86_FEATURE_SGX_EDECCSSA, X86_FEATURE_SGX1 }, { X86_FEATURE_CRC32C_LOW_PERF, X86_FEATURE_XMM4_2 }, {} }; diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 74117afd2515fc4812f0bb25ded89f76c06bbfa3..986db4152251effd50d6a11eca735ed62f1a7bee 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -85,16 +85,11 @@ static void hygon_get_topology(struct cpuinfo_x86 *c) if (smp_num_siblings > 1) c->x86_max_cores /= smp_num_siblings; - switch (c->x86_model) { - case 0x0 ... 0x3: - if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) - break; + if (c->x86 == 0x18 && c->x86_model < 0x4 && + !boot_cpu_has(X86_FEATURE_HYPERVISOR)) { /* Socket ID is ApicId[6] for these processors. */ c->phys_proc_id = c->apicid >> APICID_SOCKET_ID_BIT; - break; - case 0x4: - case 0x5: - case 0x6: + } else { /* * In case leaf 0xB is available, use it to derive * topology information. @@ -104,9 +99,6 @@ static void hygon_get_topology(struct cpuinfo_x86 *c) c->x86_coreid_bits = get_count_order(c->x86_max_cores); __max_die_per_package = nodes_per_socket; - break; - default: - break; } cacheinfo_hygon_init_llc_id(c, cpu); diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index d9efb7e15d2dc6f84e5ee9a6ade80ebb317474a3..10cd37e096d7bdb6ee597e69122f2daceafa8085 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -39,6 +39,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_PER_THREAD_MBA, CPUID_ECX, 0, 0x00000010, 3 }, { X86_FEATURE_SGX1, CPUID_EAX, 0, 0x00000012, 0 }, { X86_FEATURE_SGX2, CPUID_EAX, 1, 0x00000012, 0 }, + { X86_FEATURE_SGX_EDECCSSA, CPUID_EAX, 11, 0x00000012, 0 }, { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index de026eb92883f9d8c3e7cf791036c2840a85576d..c049a6f3824279f3503834637321f59b33954250 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -12,6 +12,9 @@ #include "encls.h" #include "sgx.h" +static int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index, + struct sgx_backing *backing); + #define PCMDS_PER_PAGE (PAGE_SIZE / sizeof(struct sgx_pcmd)) /* * 32 PCMD entries share a PCMD page. PCMD_FIRST_MASK is used to @@ -344,8 +347,11 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma, } va_page = sgx_encl_grow(encl, false); - if (IS_ERR(va_page)) + if (IS_ERR(va_page)) { + if (PTR_ERR(va_page) == -EBUSY) + vmret = VM_FAULT_NOPAGE; goto err_out_epc; + } if (va_page) list_add(&va_page->list, &encl->va_pages); @@ -674,11 +680,15 @@ const struct vm_operations_struct sgx_vm_ops = { void sgx_encl_release(struct kref *ref) { struct sgx_encl *encl = container_of(ref, struct sgx_encl, refcount); + unsigned long max_page_index = PFN_DOWN(encl->base + encl->size - 1); struct sgx_va_page *va_page; struct sgx_encl_page *entry; - unsigned long index; + unsigned long count = 0; + + XA_STATE(xas, &encl->page_array, PFN_DOWN(encl->base)); - xa_for_each(&encl->page_array, index, entry) { + xas_lock(&xas); + xas_for_each(&xas, entry, max_page_index) { if (entry->epc_page) { /* * The page and its radix tree entry cannot be freed @@ -693,9 +703,20 @@ void sgx_encl_release(struct kref *ref) } kfree(entry); - /* Invoke scheduler to prevent soft lockups. */ - cond_resched(); + /* + * Invoke scheduler on every XA_CHECK_SCHED iteration + * to prevent soft lockups. + */ + if (!(++count % XA_CHECK_SCHED)) { + xas_pause(&xas); + xas_unlock(&xas); + + cond_resched(); + + xas_lock(&xas); + } } + xas_unlock(&xas); xa_destroy(&encl->page_array); @@ -914,7 +935,7 @@ static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl, } /** - * sgx_encl_get_backing() - Pin the backing storage + * __sgx_encl_get_backing() - Pin the backing storage * @encl: an enclave pointer * @page_index: enclave page index * @backing: data for accessing backing storage for the page @@ -926,7 +947,7 @@ static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl, * 0 on success, * -errno otherwise. */ -static int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, +static int __sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, struct sgx_backing *backing) { pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index); @@ -1001,7 +1022,7 @@ static struct mem_cgroup *sgx_encl_get_mem_cgroup(struct sgx_encl *encl) } /** - * sgx_encl_alloc_backing() - allocate a new backing storage page + * sgx_encl_alloc_backing() - create a new backing storage page * @encl: an enclave pointer * @page_index: enclave page index * @backing: data for accessing backing storage for the page @@ -1009,7 +1030,9 @@ static struct mem_cgroup *sgx_encl_get_mem_cgroup(struct sgx_encl *encl) * When called from ksgxd, sets the active memcg from one of the * mms in the enclave's mm_list prior to any backing page allocation, * in order to ensure that shmem page allocations are charged to the - * enclave. + * enclave. Create a backing page for loading data back into an EPC page with + * ELDU. This function takes a reference on a new backing page which + * must be dropped with a corresponding call to sgx_encl_put_backing(). * * Return: * 0 on success, @@ -1022,7 +1045,7 @@ int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index, struct mem_cgroup *memcg = set_active_memcg(encl_memcg); int ret; - ret = sgx_encl_get_backing(encl, page_index, backing); + ret = __sgx_encl_get_backing(encl, page_index, backing); set_active_memcg(memcg); mem_cgroup_put(encl_memcg); @@ -1040,15 +1063,17 @@ int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index, * It is the caller's responsibility to ensure that it is appropriate to use * sgx_encl_lookup_backing() rather than sgx_encl_alloc_backing(). If lookup is * not used correctly, this will cause an allocation which is not accounted for. + * This function takes a reference on an existing backing page which must be + * dropped with a corresponding call to sgx_encl_put_backing(). * * Return: * 0 on success, * -errno otherwise. */ -int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index, +static int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index, struct sgx_backing *backing) { - return sgx_encl_get_backing(encl, page_index, backing); + return __sgx_encl_get_backing(encl, page_index, backing); } /** diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h index a65a952116fd1c2c8fac3db27b9f6453e14f5a59..f94ff14c948698140a1fa84e5efd0d2905ac7951 100644 --- a/arch/x86/kernel/cpu/sgx/encl.h +++ b/arch/x86/kernel/cpu/sgx/encl.h @@ -107,8 +107,6 @@ bool current_is_ksgxd(void); void sgx_encl_release(struct kref *ref); int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm); const cpumask_t *sgx_encl_cpumask(struct sgx_encl *encl); -int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index, - struct sgx_backing *backing); int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index, struct sgx_backing *backing); void sgx_encl_put_backing(struct sgx_backing *backing); diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c index ebe79d60619f2f161c7a564aed3144572201b5ea..dbc543de89e7df07b9cd9b623744a7eca3fdfa35 100644 --- a/arch/x86/kernel/cpu/sgx/ioctl.c +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -111,7 +111,7 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs) encl->base = secs->base; encl->size = secs->size; encl->attributes = secs->attributes; - encl->attributes_mask = SGX_ATTR_DEBUG | SGX_ATTR_MODE64BIT | SGX_ATTR_KSS; + encl->attributes_mask = SGX_ATTR_UNPRIV_MASK; /* Set only after completion, as encl->lock has not been taken. */ set_bit(SGX_ENCL_CREATED, &encl->flags); @@ -356,6 +356,9 @@ static int sgx_validate_offset_length(struct sgx_encl *encl, if (!length || !IS_ALIGNED(length, PAGE_SIZE)) return -EINVAL; + if (offset + length < offset) + return -EINVAL; + if (offset + length - PAGE_SIZE >= encl->size) return -EINVAL; diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 44b83ddc0c8f97d07bc2fae9d58b5b5e11132435..58f57fcff1665f6a5e90b4e8e28202f9f100de5d 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -49,9 +49,13 @@ static LIST_HEAD(sgx_dirty_page_list); * Reset post-kexec EPC pages to the uninitialized state. The pages are removed * from the input list, and made available for the page allocator. SECS pages * prepending their children in the input list are left intact. + * + * Return 0 when sanitization was successful or kthread was stopped, and the + * number of unsanitized pages otherwise. */ -static void __sgx_sanitize_pages(struct list_head *dirty_page_list) +static unsigned long __sgx_sanitize_pages(struct list_head *dirty_page_list) { + unsigned long left_dirty = 0; struct sgx_epc_page *page; LIST_HEAD(dirty); int ret; @@ -59,7 +63,7 @@ static void __sgx_sanitize_pages(struct list_head *dirty_page_list) /* dirty_page_list is thread-local, no need for a lock: */ while (!list_empty(dirty_page_list)) { if (kthread_should_stop()) - return; + return 0; page = list_first_entry(dirty_page_list, struct sgx_epc_page, list); @@ -92,12 +96,14 @@ static void __sgx_sanitize_pages(struct list_head *dirty_page_list) } else { /* The page is not yet clean - move to the dirty list. */ list_move_tail(&page->list, &dirty); + left_dirty++; } cond_resched(); } list_splice(&dirty, dirty_page_list); + return left_dirty; } static bool sgx_reclaimer_age(struct sgx_epc_page *epc_page) @@ -395,10 +401,7 @@ static int ksgxd(void *p) * required for SECS pages, whose child pages blocked EREMOVE. */ __sgx_sanitize_pages(&sgx_dirty_page_list); - __sgx_sanitize_pages(&sgx_dirty_page_list); - - /* sanity check: */ - WARN_ON(!list_empty(&sgx_dirty_page_list)); + WARN_ON(__sgx_sanitize_pages(&sgx_dirty_page_list)); while (!kthread_should_stop()) { if (try_to_freeze()) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 34dd24b50e600397b232a8e5dde804255cfe393b..fc9d01588c823105bfa89e70947176a599332841 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -530,7 +530,7 @@ void kvm_set_cpu_caps(void) ); kvm_cpu_cap_init_kvm_defined(CPUID_12_EAX, - SF(SGX1) | SF(SGX2) + SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA) ); kvm_cpu_cap_mask(CPUID_8000_0001_ECX, @@ -895,9 +895,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) * userspace. ATTRIBUTES.XFRM is not adjusted as userspace is * expected to derive it from supported XCR0. */ - entry->eax &= SGX_ATTR_DEBUG | SGX_ATTR_MODE64BIT | - SGX_ATTR_PROVISIONKEY | SGX_ATTR_EINITTOKENKEY | - SGX_ATTR_KSS; + entry->eax &= SGX_ATTR_PRIV_MASK | SGX_ATTR_UNPRIV_MASK; entry->ebx &= 0; break; /* Intel PT */ diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 37904abf4466fa4752402be47bf6fc48d2709be0..444a261b03d4416df2518fb78bf5d38dfbaa9a61 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -37,6 +37,7 @@ enum kvm_only_cpuid_leafs { /* Intel-defined SGX sub-features, CPUID level 0x12 (EAX). */ #define KVM_X86_FEATURE_SGX1 KVM_X86_FEATURE(CPUID_12_EAX, 0) #define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1) +#define KVM_X86_FEATURE_SGX_EDECCSSA KVM_X86_FEATURE(CPUID_12_EAX, 11) extern u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly; void kvm_set_cpu_caps(void); @@ -135,6 +136,8 @@ static __always_inline u32 __feature_translate(int x86_feature) return KVM_X86_FEATURE_SGX1; else if (x86_feature == X86_FEATURE_SGX2) return KVM_X86_FEATURE_SGX2; + else if (x86_feature == X86_FEATURE_SGX_EDECCSSA) + return KVM_X86_FEATURE_SGX_EDECCSSA; return x86_feature; } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index dad45bd012dd7000c16ee6e4368dec8990113833..23af817eb77681905d4f63cc53963bb2b11f6396 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -206,6 +206,17 @@ module_param(sev_es, int, 0444); bool __read_mostly dump_invalid_vmcb; module_param(dump_invalid_vmcb, bool, 0644); +/* + * Allow set guest PAT to WB in some non-passthrough + * application scenarios to enhance performance. + * + * Add kernel parameter set_guest_pat_wb(default 0): + * 1 - set guest PAT to WB + * 0 - keep guest PAT to the kernel default value + */ +static int set_guest_pat_wb; +module_param(set_guest_pat_wb, int, 0444); + static u8 rsm_ins_bytes[] = "\x0f\xaa"; static void svm_complete_interrupts(struct vcpu_svm *svm); @@ -1128,6 +1139,16 @@ static void svm_check_invpcid(struct vcpu_svm *svm) } } +static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + + if (!kvm_arch_has_assigned_device(vcpu->kvm)) + *g_pat = GUEST_PAT_WB_ATTR; + else + *g_pat = vcpu->arch.pat; +} + static void init_vmcb(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -1242,6 +1263,9 @@ static void init_vmcb(struct vcpu_svm *svm) svm_clr_intercept(svm, INTERCEPT_CR3_READ); svm_clr_intercept(svm, INTERCEPT_CR3_WRITE); save->g_pat = svm->vcpu.arch.pat; + if (set_guest_pat_wb) + svm_set_guest_pat(svm, &save->g_pat); + save->cr3 = 0; save->cr4 = 0; } @@ -2863,6 +2887,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) return 1; vcpu->arch.pat = data; svm->vmcb->save.g_pat = data; + if (npt_enabled && set_guest_pat_wb) { + svm_set_guest_pat(svm, &svm->vmcb->save.g_pat); + vcpu->arch.pat = svm->vmcb->save.g_pat; + } vmcb_mark_dirty(svm->vmcb, VMCB_NPT); break; case MSR_IA32_SPEC_CTRL: diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index bab44d3291f32d810a7e60079fc339217abfff5c..64847508efeadf6b21e622a7d27fa328839b7aa3 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -43,6 +43,8 @@ static const struct svm_host_save_msrs { }; #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) +#define GUEST_PAT_WB_ATTR 0x0606060606060606 + #define MAX_DIRECT_ACCESS_MSRS 18 #define MSRPM_OFFSETS 16 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 907f9c76a43da0bdc4bb757e916b7e2e370820ec..f674c931198ccb588de9674ad7ccf53a0707dc62 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -8104,6 +8104,7 @@ static __init void vmx_set_cpu_caps(void) kvm_cpu_cap_clear(X86_FEATURE_SGX_LC); kvm_cpu_cap_clear(X86_FEATURE_SGX1); kvm_cpu_cap_clear(X86_FEATURE_SGX2); + kvm_cpu_cap_clear(X86_FEATURE_SGX_EDECCSSA); } if (vmx_umip_emulated()) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 2088bf3c9e7b91f56ae27d837e50bdf85b478872..b38b800e33d3053f5a0e28b4479ffa46df3970d2 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -862,6 +862,7 @@ static void blkcg_fill_root_iostats(void) } disk_put_part(part); } + class_dev_iter_exit(&iter); } static int blkcg_print_stat(struct seq_file *sf, void *v) diff --git a/block/blk-core.c b/block/blk-core.c index 6699f6f17d6fc61447c004189ba74902942d6d3e..1d17a76c59258c7d207358eba35dab887d9e84bc 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1139,6 +1139,7 @@ EXPORT_SYMBOL(submit_bio); /** * bio_poll - poll for BIO completions * @bio: bio to poll for + * @iob: batches of IO * @flags: BLK_POLL_* flags that control the behavior * * Poll for completions on queue associated with the bio. Returns number of diff --git a/block/blk-mq.c b/block/blk-mq.c index e46e8c125eee1fd5afdf4461560be827128cfcdf..2ca6eca108af75d11fe851ffb53d888f072fb34f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -647,6 +647,13 @@ static inline void blk_mq_flush_tag_batch(struct blk_mq_hw_ctx *hctx, { struct request_queue *q = hctx->queue; + /* + * All requests should have been marked as RQF_MQ_INFLIGHT, so + * update hctx->nr_active in batch + */ + if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) + __blk_mq_sub_active_requests(hctx, nr_tags); + blk_mq_put_tags(hctx->tags, tag_array, nr_tags); percpu_ref_put_many(&q->q_usage_counter, nr_tags); } @@ -669,13 +676,14 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob) if (iob->need_ts) __blk_mq_end_request_acct(rq, now); + rq_qos_done(rq->q, rq); + WRITE_ONCE(rq->state, MQ_RQ_IDLE); if (!refcount_dec_and_test(&rq->ref)) continue; blk_crypto_free_request(rq); blk_pm_mark_last_busy(rq); - rq_qos_done(rq->q, rq); if (nr_tags == TAG_COMP_BATCH || (last_hctx && last_hctx != rq->mq_hctx)) { diff --git a/block/blk-mq.h b/block/blk-mq.h index d7f723d04246558128025330e5af0f53f7a97745..cf245e5f4e2e2a373aa2b63a7500c5997407c852 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -235,12 +235,18 @@ static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx) atomic_inc(&hctx->nr_active); } -static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx) +static inline void __blk_mq_sub_active_requests(struct blk_mq_hw_ctx *hctx, + int val) { if (blk_mq_is_sbitmap_shared(hctx->flags)) - atomic_dec(&hctx->queue->nr_active_requests_shared_sbitmap); + atomic_sub(val, &hctx->queue->nr_active_requests_shared_sbitmap); else - atomic_dec(&hctx->nr_active); + atomic_sub(val, &hctx->nr_active); +} + +static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx) +{ + __blk_mq_sub_active_requests(hctx, 1); } static inline int __blk_mq_active_requests(struct blk_mq_hw_ctx *hctx) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index d6e3edb40474823870b8ac71920226ef3748fdc0..477600958719020e34b67d74cacd7238a2efcc91 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -2065,6 +2065,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd) flush_workqueue(nbd->recv_workq); nbd_clear_que(nbd); nbd->task_setup = NULL; + clear_bit(NBD_RT_BOUND, &nbd->config->runtime_flags); mutex_unlock(&nbd->config_lock); if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF, diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 93f076b578b96beb4fc8423b7e8249c5b05ead65..8510987f0a2addd0d23d2ef16a4ac4dfd68f260d 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -917,6 +917,33 @@ static int sev_get_api_version(void) return 0; } +static int csv_get_api_version_locked(void) +{ + struct sev_device *sev = psp_master->sev_data; + struct sev_user_data_status status; + int error = 0, ret; + + ret = __sev_do_cmd_locked(SEV_CMD_PLATFORM_STATUS, + &status, &error); + if (ret) { + dev_err(sev->dev, + "CSV: failed to get status. Error: %#x\n", error); + return ret; + } + + sev->api_major = status.api_major; + sev->api_minor = status.api_minor; + sev->build = status.build; + sev->state = status.state; + + /* + * The api version fields of HYGON CSV firmware are not consistent + * with AMD SEV firmware. + */ + csv_update_api_version(&status); + + return 0; +} static int sev_get_firmware(struct device *dev, const struct firmware **firmware) { @@ -1318,11 +1345,12 @@ static int csv_ioctl_do_download_firmware(struct sev_issue_cmd *argp) } /* - * Synchronize API version status, and return -EIO if the Hygon PSP fails - * to respond to the PLATFORM_STATUS API. + * Synchronize API version status. The return value of csv_get_api_version + * will inform the user of any error encountered when attempting to + * communicate with the Hygon PSP after the DOWNLOAD_FIRMWARE API completes + * successfully. */ - if (sev_get_api_version()) - ret = -EIO; + ret = csv_get_api_version_locked(); err_free_page: __free_pages(p, order); diff --git a/drivers/crypto/montage/tsse/Makefile b/drivers/crypto/montage/tsse/Makefile index d67ffde3a5b046c114fb6953bb042265539fb07d..daa61ea213819a6717c5bb213e3d1387a9146e0e 100644 --- a/drivers/crypto/montage/tsse/Makefile +++ b/drivers/crypto/montage/tsse/Makefile @@ -8,6 +8,12 @@ obj-m += tsse.o tsse-objs := tsse_dev_mgr.o \ tsse_ipc.o \ + tsse_ipc_epid.o \ + tsse_ipc_api.o \ + tsse_ipc_setup.o \ + tsse_ipc_drv.o \ + tsse_ipc_service.o \ + tsse_ipc_hash.o \ tsse_fw_service.o \ tsse_service.o \ tsse_irq.o \ diff --git a/drivers/crypto/montage/tsse/tsse_dev.h b/drivers/crypto/montage/tsse/tsse_dev.h index c16d2ae7c414088aa585c698b776f2e9bb4acf03..0a331867ae7eec60c06e47f2c67719398034cedc 100644 --- a/drivers/crypto/montage/tsse/tsse_dev.h +++ b/drivers/crypto/montage/tsse/tsse_dev.h @@ -2,7 +2,7 @@ /* * This file is part of tsse driver for Linux * - * Copyright © 2023 Montage Technology. All rights reserved. + * Copyright © 2023-2024 Montage Technology. All rights reserved. */ #ifndef __TSSE_DEV_H__ @@ -14,7 +14,7 @@ #include #include #include -#include "tsse_ipc.h" +#include "tsse_ipc_setup.h" #define TSSE_PCI_MAX_BARS 4 #define TSSE_FW_VERSION_LEN 32 @@ -37,8 +37,6 @@ enum tsse_dev_status_bit { struct tsse_qpairs_bank { struct tsse_dev *tsse_dev; void __iomem *reg_base; - - u32 num_qparis; u32 irq_vec; }; struct tsse_dev { @@ -57,10 +55,12 @@ struct tsse_dev { struct tsse_ipc *ipc; void *adi; void *mbx_hw; + void *fw_data; const struct firmware *fw; char fw_version[TSSE_FW_VERSION_LEN]; bool fw_version_exist; }; + #define TSSEDEV_TO_DEV(tssedev) (&((tssedev)->tsse_pci_dev.pci_dev->dev)) #define TSSE_DEV_BARS(tssedev) ((tssedev)->tsse_pci_dev.bars) @@ -74,6 +74,10 @@ int tsse_devmgr_add_dev(struct tsse_dev *tsse_dev); void tsse_devmgr_rm_dev(struct tsse_dev *tdev); int tsse_prepare_restart_dev(struct tsse_dev *tdev); int tsse_start_dev(struct tsse_dev *tdev); +struct tsse_dev *tsse_get_dev_by_handle(int handle); + +typedef int (*tsse_dev_process_func)(struct tsse_dev *tdev); +int tsse_process_for_all(tsse_dev_process_func func); static inline struct tsse_dev *pci_to_tsse_dev(struct pci_dev *pci_dev) { @@ -99,4 +103,19 @@ static inline int tsse_dev_in_use(struct tsse_dev *tdev) { return atomic_read(&tdev->ref_count) != 0; } + +static inline void tsse_list_del(struct list_head *entry) +{ + WRITE_ONCE(entry->next->prev, entry->prev); + WRITE_ONCE(entry->prev->next, entry->next); +} +static inline void tsse_list_add(struct list_head *new, struct list_head *prev, + struct list_head *next) +{ + WRITE_ONCE(new->next, next); + WRITE_ONCE(new->prev, prev); + mb(); /* Make sure new node updates first */ + WRITE_ONCE(next->prev, new); + WRITE_ONCE(prev->next, new); +} #endif diff --git a/drivers/crypto/montage/tsse/tsse_dev_drv.c b/drivers/crypto/montage/tsse/tsse_dev_drv.c index 86c619d64f5ee3e6a76ab05ef607c424c40bd396..9ec036df7955e13cc244c0a710d13d4ccd8d9afd 100644 --- a/drivers/crypto/montage/tsse/tsse_dev_drv.c +++ b/drivers/crypto/montage/tsse/tsse_dev_drv.c @@ -2,7 +2,7 @@ /* * This file is part of tsse driver for Linux * - * Copyright © 2023 Montage Technology. All rights reserved. + * Copyright © 2023-2024 Montage Technology. All rights reserved. */ #include @@ -15,7 +15,7 @@ #include "tsse_dev_drv.h" #include "tsse_vuart.h" -#include "tsse_ipc.h" +#include "tsse_ipc_setup.h" #include "tsse_fw_service.h" #define CLUSTER_SLOT_CONFIG_OFFSET 0x5780000 @@ -87,22 +87,20 @@ static int tsse_sriov_configure(struct pci_dev *pdev, int num_vfs_param) if (tdev->num_vfs > 0) { tdev->num_irqs = TSSE_SRIOV_PF_MAX_IRQ_NUM; - tdev->qpairs_bank.num_qparis = TSSE_SRIOV_PF_MAX_QPAIR_NUM; } else { tdev->num_irqs = TSSE_PF_MAX_IRQ_NUM; - tdev->qpairs_bank.num_qparis = TSSE_PF_MAX_QPAIR_NUM; } tsse_dev_info( tdev, - "num_irqs:%u num_qparis:%u qpairs' start irq vector index:%u qpairs' reg base:0x%lx\n", - tdev->num_irqs, tdev->qpairs_bank.num_qparis, + "num_irqs:%u, qpair start irq vector index:%u, qpair reg base:0x%lx\n", + tdev->num_irqs, tdev->qpairs_bank.irq_vec, (ulong)tdev->qpairs_bank.reg_base); ret = tsse_start_dev(tdev); if (ret) { - dev_err(&pdev->dev, "%s %d: failed to start the device\n", - __func__, __LINE__); + dev_err(&pdev->dev, "%s %d: failed to start the device: %d\n", + __func__, __LINE__, ret); return ret; } @@ -168,15 +166,31 @@ static int device_probe(struct pci_dev *pdev, const struct pci_device_id *id) return -EINVAL; } + /* Disable ASPM completely as that cause device performence low */ + status = pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | + PCIE_LINK_STATE_L1); + if (status) { + dev_info(&pdev->dev, + "%s %d: Disable ASPM failed(%d), may cause device performence low.\n", + __func__, __LINE__, status); + } + tdev = kzalloc_node(sizeof(*tdev), GFP_KERNEL, dev_to_node(&pdev->dev)); if (!tdev) return -ENOMEM; + tdev->fw_data = kzalloc_node(TSSE_FIRMWARE_MAX_LENGTH, GFP_KERNEL, dev_to_node(&pdev->dev)); + + if (!tdev->fw_data) { + kfree(tdev); + return -ENOMEM; + } + status = pcim_enable_device(pdev); if (status) { - dev_err(&pdev->dev, "pcim_enable_device failed\n"); + dev_err(&pdev->dev, "pcim_enable_device failed: %d\n", status); goto out_err; } @@ -200,7 +214,7 @@ static int device_probe(struct pci_dev *pdev, const struct pci_device_id *id) status = pcim_iomap_regions(pdev, BIT(0) | BIT(2), TSSE_DEV_NAME); if (status) { - dev_err(&pdev->dev, "I/O memory remapping failed\n"); + dev_err(&pdev->dev, "I/O memory remapping failed: %d\n", status); goto out_err; } @@ -232,7 +246,6 @@ static int device_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_drvdata(pdev, tdev); tdev->num_irqs = TSSE_PF_MAX_IRQ_NUM; - tdev->qpairs_bank.num_qparis = TSSE_PF_MAX_QPAIR_NUM; tdev->qpairs_bank.irq_vec = TSSE_PF_QPAIR_START_IRQ_VECTOR; tdev->qpairs_bank.reg_base = TSSE_DEV_BARS(tdev)[2].virt_addr + TSSE_PF_QPAIR_REG_BASE; @@ -241,8 +254,8 @@ static int device_probe(struct pci_dev *pdev, const struct pci_device_id *id) tsse_dev_info( tdev, - "num_irqs:%u num_qparis:%u qpairs' start irq vector index:%u qpairs' reg base:0x%lx\n", - tdev->num_irqs, tdev->qpairs_bank.num_qparis, + "num_irqs:%u, qpair start irq vector index:%u, qpair reg base:0x%lx\n", + tdev->num_irqs, tdev->qpairs_bank.irq_vec, (ulong)tdev->qpairs_bank.reg_base); if (tsse_devmgr_add_dev(tdev)) { @@ -268,37 +281,41 @@ static int device_probe(struct pci_dev *pdev, const struct pci_device_id *id) tdev->fw_version_exist = true; } - if (tsse_ipc_init(pdev)) { - dev_err(&pdev->dev, - "%s %d: tsse_ipc_init failed to tsse_ipc.\n", __func__, - __LINE__); - status = -EFAULT; - goto out_err_ipc; - } - if (sysfs_create_file(&pdev->dev.kobj, &dev_attr_tsse_image_load.attr)) { dev_err(&pdev->dev, "%s %d: sysfs_create_file failed for tsse image load.\n", __func__, __LINE__); status = -EFAULT; - goto out_err_image_load; + goto out_err_sysfs; } + if (tsse_ipc_init(pdev)) { + dev_err(&pdev->dev, + "%s %d: tsse_ipc_init failed.\n", __func__, + __LINE__); + status = -EFAULT; + goto out_err_ipc; + } tsse_dev_info(tdev, "successful\n"); pci_read_config_dword(pdev, 0x720, &tmp_val); tsse_dev_dbg(tdev, "the value of FILTER_MASK_2_REG is 0x%x\n", tmp_val); return 0; -out_err_image_load: - tsse_ipc_deinit(tdev); out_err_ipc: + if (tdev->fw) { + release_firmware(tdev->fw); + tdev->fw = NULL; + } + sysfs_remove_file(&pdev->dev.kobj, &dev_attr_tsse_image_load.attr); +out_err_sysfs: vuart_uninit_port(pdev); out_err_port_init: tsse_devmgr_rm_dev(tdev); out_err_ida_free: ida_free(&tsse_ida, tdev->id); out_err: + kfree(tdev->fw_data); kfree(tdev); return status; } @@ -311,12 +328,13 @@ static void device_remove(struct pci_dev *pdev) (ulong)pdev, (ulong)tdev); tsse_sriov_disable(tdev); + tsse_ipc_deinit(tdev); if (tdev->fw) { release_firmware(tdev->fw); tdev->fw = NULL; } sysfs_remove_file(&pdev->dev.kobj, &dev_attr_tsse_image_load.attr); - tsse_ipc_deinit(tdev); + kfree(tdev->fw_data); vuart_uninit_port(pdev); tsse_devmgr_rm_dev(tdev); ida_free(&tsse_ida, tdev->id); @@ -378,6 +396,6 @@ module_exit(tsse_exit); MODULE_AUTHOR("montage-tech.com"); MODULE_DESCRIPTION("TSSE device driver"); -MODULE_VERSION("1.0.0"); +MODULE_VERSION("1.1.2"); MODULE_LICENSE("GPL"); MODULE_FIRMWARE(TSSE_FIRMWARE); diff --git a/drivers/crypto/montage/tsse/tsse_dev_mgr.c b/drivers/crypto/montage/tsse/tsse_dev_mgr.c index 39553eb96832380480a138593941c3c0f8bf26fa..91394b9eabd691f8c70f36f22e3019a7562f0827 100644 --- a/drivers/crypto/montage/tsse/tsse_dev_mgr.c +++ b/drivers/crypto/montage/tsse/tsse_dev_mgr.c @@ -2,7 +2,7 @@ /* * This file is part of tsse driver for Linux * - * Copyright © 2023 Montage Technology. All rights reserved. + * Copyright © 2023-2024 Montage Technology. All rights reserved. */ #include @@ -14,26 +14,12 @@ #include #include "tsse_dev.h" #include "tsse_irq.h" +#include "tsse_handle.h" static DEFINE_MUTEX(tsse_dev_table_lock); static LIST_HEAD(tsse_dev_table); static DEFINE_MUTEX(algs_lock); -static inline void tsse_list_del(struct list_head *entry) -{ - WRITE_ONCE(entry->next->prev, entry->prev); - WRITE_ONCE(entry->prev->next, entry->next); -} -static inline void tsse_list_add(struct list_head *new, struct list_head *prev, - struct list_head *next) -{ - WRITE_ONCE(new->next, next); - WRITE_ONCE(new->prev, prev); - mb(); /* Make sure new node updates first */ - WRITE_ONCE(next->prev, new); - WRITE_ONCE(prev->next, new); -} - static int tsse_dev_pf_get(struct tsse_dev *vf_tsse_dev) { int ret = 0; @@ -116,12 +102,10 @@ static int tsse_stop_dev(struct tsse_dev *tdev, bool busy_exit) if (busy_exit) return -EBUSY; } - if (tdev->qpairs_bank.num_qparis != 0) { - mutex_lock(&tsse_dev_table_lock); - tsse_list_del(&tdev->list); - mutex_unlock(&tsse_dev_table_lock); - tsse_dev_info(tdev, "removed from active dev table list\n"); - } + mutex_lock(&tsse_dev_table_lock); + tsse_list_del(&tdev->list); + mutex_unlock(&tsse_dev_table_lock); + tsse_dev_info(tdev, "removed from active dev table list\n"); tsse_dev_info(tdev, "device stopped\n"); @@ -134,12 +118,6 @@ int tsse_start_dev(struct tsse_dev *tdev) struct list_head *prev_node = &tsse_dev_table; int ret = 0; - if (tdev->qpairs_bank.num_qparis == 0) { - set_bit(TSSE_DEV_STATUS_STARTED, &tdev->status); - tsse_dev_info(tdev, "device started\n"); - return 0; - } - set_bit(TSSE_DEV_STATUS_STARTING, &tdev->status); mutex_lock(&tsse_dev_table_lock); @@ -148,7 +126,7 @@ int tsse_start_dev(struct tsse_dev *tdev) if (tmp_dev == tdev) { ret = -EEXIST; tsse_dev_err(tdev, - "The device cannot be added repeatedly\n"); + "The device cannot be added repeatedly\n"); goto clear_status; } } @@ -156,9 +134,9 @@ int tsse_start_dev(struct tsse_dev *tdev) set_bit(TSSE_DEV_STATUS_STARTED, &tdev->status); tsse_list_add(&tdev->list, prev_node, prev_node->next); - tsse_dev_info(tdev, "device started\n"); mutex_unlock(&tsse_dev_table_lock); + tsse_dev_info(tdev, "device started\n"); return 0; clear_status: mutex_unlock(&tsse_dev_table_lock); @@ -199,3 +177,94 @@ struct list_head *tsse_devmgr_get_head(void) { return &tsse_dev_table; } + +/** + * tsse_get_dev_by_handle() - Get TSSE device by its handle + * @handle: handle to TSSE device + * Return: pointer to TSSE device structure if found, otherwise NULL + */ +struct tsse_dev *tsse_get_dev_by_handle(int handle) +{ + struct list_head *itr = NULL; + struct tsse_dev *ptr = NULL; + struct tsse_dev *tdev = NULL; + + mutex_lock(&tsse_dev_table_lock); + list_for_each(itr, &tsse_dev_table) { + ptr = list_entry(itr, struct tsse_dev, list); + if (handle == ptr->id) { + tdev = ptr; + break; + } + } + mutex_unlock(&tsse_dev_table_lock); + + if (!tdev) { + pr_err("%s %d: no such device: %d\n", __func__, __LINE__, handle); + return NULL; + } + return tdev; +} + +/** + * tsse_get_available_handle() - get handle from available device. + * Return: -1 if no available device, otherwise the handle id. + */ +int tsse_get_available_handle(void) +{ + struct list_head *itr = NULL; + struct tsse_dev *tdev = NULL; + + mutex_lock(&tsse_dev_table_lock); + list_for_each(itr, &tsse_dev_table) { + tdev = list_entry(itr, struct tsse_dev, list); + break; + } + mutex_unlock(&tsse_dev_table_lock); + + if (!tdev) { + pr_err("%s(): device not ready\n", __func__); + return -1; + } + return tdev->id; +} +EXPORT_SYMBOL_GPL(tsse_get_available_handle); + +/** + * tsse_get_domain_by_handle() - get IOMMU domain from the handle of device. + * @handle: handle of a TSSE device + * Return: pointer to IOMMU domain of the device if the handle is correct + * and IOMMU enabled, otherwise NULL. + */ +struct iommu_domain *tsse_get_domain_by_handle(int handle) +{ + struct tsse_dev *tdev; + struct pci_dev *pdev; + + if (!iommu_present(&pci_bus_type)) { + pr_err("%s(): IOMMU is not enabled\n", __func__); + return NULL; + } + tdev = tsse_get_dev_by_handle(handle); + if (!tdev) + return NULL; + + pdev = tdev->tsse_pci_dev.pci_dev; + return iommu_get_domain_for_dev(&pdev->dev); +} +EXPORT_SYMBOL_GPL(tsse_get_domain_by_handle); + +int tsse_process_for_all(tsse_dev_process_func func) +{ + struct list_head *itr = NULL; + struct tsse_dev *tdev = NULL; + int rc = 0; + + list_for_each(itr, &tsse_dev_table) { + tdev = list_entry(itr, struct tsse_dev, list); + rc = func(tdev); + if (rc) + break; + } + return rc; +} diff --git a/drivers/crypto/montage/tsse/tsse_fw_service.c b/drivers/crypto/montage/tsse/tsse_fw_service.c index 486352bc8f84af626f36325e008030174e0a8862..4917955ca13df2d8503d4313b5aec7b026ed096f 100644 --- a/drivers/crypto/montage/tsse/tsse_fw_service.c +++ b/drivers/crypto/montage/tsse/tsse_fw_service.c @@ -2,7 +2,7 @@ /* * This file is part of tsse driver for Linux * - * Copyright © 2023 Montage Technology. All rights reserved. + * Copyright © 2023-2024 Montage Technology. All rights reserved. */ #include @@ -18,37 +18,19 @@ #include #include "tsse_dev.h" +#include "tsse_ipc.h" #include "tsse_service.h" +#include "tsse_fw_service.h" #define SEARCH_PATTERN "MT_CFG_BUILD_VERSION_DETAIL" #define SPACE_CH ' ' -static int fw_send_msg(struct tsse_ipc *tsseipc, struct ipc_msg *msg) +static int fw_send_msg(struct tsse_dev *tdev, struct fw_load *fw_task) { - u8 *h2d; - u32 int_reg; + struct tsse_ipc *tsseipc = tdev->ipc; - mutex_lock(&tsseipc->list_lock); - - int_reg = readl(tsseipc->virt_addr + HOST2MAIN_INTR_SET_OFFSET); - if ((int_reg & IPC_REGISTER_INT_SET) != 0) { - mutex_unlock(&tsseipc->list_lock); - return -EFAULT; - } - if (msg->header.i_len < sizeof(struct ipc_header) + - sizeof(struct msg_info) + sizeof(struct fw_load)) { - dev_err(tsseipc->dev, "msg format error\n"); - return -EFAULT; - } - h2d = (u8 *)(tsseipc->virt_addr + HOST2MAIN_IPC_OFFSET); - memcpy_toio(h2d, msg, sizeof(struct ipc_header)); - memcpy_toio(h2d + sizeof(struct ipc_header), (u8 *)msg->i_data, - msg->header.i_len - sizeof(struct ipc_header)); - writel(0x1, tsseipc->virt_addr + HOST2MAIN_INTR_SET_OFFSET); - - dev_info(tsseipc->dev, "notify device to get firmware\n"); - mutex_unlock(&tsseipc->list_lock); - return 0; + dev_dbg(tsseipc->dev, "notify device\n"); + return ipc_h2d_msg_send_legacy(tdev->id, IPC_MESSAGE_BOOT, fw_task, sizeof(struct fw_load)); } /** @@ -103,42 +85,46 @@ int get_firmware_version(const struct firmware *fw, char *fw_version_out) } /** - * fw_service() - Firmware service to handle IPC message from mainCPU. + * fw_service() - Firmware service to handle IPC message from device. * It will write init or manual load firmware to PCIe BAR and send message back. - * @tsseipc_t: pointer to a structure used for IPC - * @msg_t: pointer to IPC message + * @handle: handle to TSSE device + * @msg_payload: pointer to IPC message payload + * @length: length of the msg_payload + * Return: 0 on success, error code otherwise */ -void fw_service(void *tsseipc_t, void *msg_t) +int fw_service(int handle, void *msg_payload, uint32_t length) { void __iomem *fw; - uint32_t size; - uint32_t task_offset; - struct fw_load *fw_task; struct tsse_dev *tdev; - struct tsse_ipc *tsseipc = (struct tsse_ipc *)tsseipc_t; - struct ipc_msg *msg = (struct ipc_msg *)msg_t; + struct tsse_ipc *tsseipc; + struct fw_load *fw_task; - task_offset = sizeof(struct msg_info); - fw_task = (struct fw_load *)((uint8_t *)msg->i_data + task_offset); - tdev = pci_to_tsse_dev(tsseipc->pdev); + if (!msg_payload || !length) { + pr_err("%s %d: invalid input parameter\n", __func__, __LINE__); + return -EINVAL; + } + tdev = tsse_get_dev_by_handle(handle); + if (!tdev) + return -ENODEV; - if (!tdev || !tdev->fw) { + tsseipc = tdev->ipc; + fw_task = (struct fw_load *) msg_payload; + if (!tdev->fw) { fw_task->result = 1; fw_task->size = 0; dev_info(tsseipc->dev, "firmware loading failed\n"); - if (fw_send_msg(tsseipc, msg)) + if (fw_send_msg(tdev, fw_task)) dev_err(tsseipc->dev, "notify device failed\n"); - return; + return -ENOENT; } fw_task->result = 0; fw_task->size = tdev->fw->size; - size = tdev->fw->size; fw = tsseipc->virt_addr + fw_task->offset + FW_BASE; - memcpy_toio((u8 *)fw, tdev->fw->data, size); + memcpy_toio((u8 *)fw, tdev->fw->data, tdev->fw->size); dev_info(tsseipc->dev, "firmware loading done\n"); - if (fw_send_msg(tsseipc, msg)) + if (fw_send_msg(tdev, fw_task)) dev_err(tsseipc->dev, "notify device failed\n"); if (tdev->fw_version_exist) @@ -150,6 +136,7 @@ void fw_service(void *tsseipc_t, void *msg_t) memset(tdev->fw_version, 0, TSSE_FW_VERSION_LEN); tdev->fw_version_exist = false; } + return 0; } /** @@ -162,9 +149,11 @@ void fw_service(void *tsseipc_t, void *msg_t) int tsse_fw_load(struct pci_dev *pdev, const char *name, const struct firmware **fw) { int result; + struct tsse_dev *tdev = pci_to_tsse_dev(pdev); - result = request_firmware(fw, name, &pdev->dev); + result = request_firmware_into_buf(fw, name, &pdev->dev, + tdev->fw_data, TSSE_FIRMWARE_MAX_LENGTH); if (result) - dev_err(&pdev->dev, "%s failed for %s\n", __func__, name); + dev_err(&pdev->dev, "%s failed for %s: %d\n", __func__, name, result); return result; } diff --git a/drivers/crypto/montage/tsse/tsse_fw_service.h b/drivers/crypto/montage/tsse/tsse_fw_service.h index 706ea6d297696cf1e49e51bc1150f63a141af1e0..24a209350710e52219465db532c4f1c623824a7c 100644 --- a/drivers/crypto/montage/tsse/tsse_fw_service.h +++ b/drivers/crypto/montage/tsse/tsse_fw_service.h @@ -2,7 +2,7 @@ /* * This file is part of tsse driver for Linux * - * Copyright © 2023 Montage Technology. All rights reserved. + * Copyright © 2023-2024 Montage Technology. All rights reserved. */ #ifndef __TSSE_FW_SERVICE_H__ @@ -12,8 +12,9 @@ #define FW_BASE 0x7000000 #define TSSE_FIRMWARE "tsse_firmware.bin" +#define TSSE_FIRMWARE_MAX_LENGTH (1024 * 1024) -void fw_service(void *tsseipc_t, void *msg_t); +int fw_service(int handle, void *msg_payload, uint32_t length); int tsse_fw_load(struct pci_dev *pdev, const char *name, const struct firmware **fw); int get_firmware_version(const struct firmware *fw, char *fw_version_out); #endif diff --git a/drivers/crypto/montage/tsse/tsse_handle.h b/drivers/crypto/montage/tsse/tsse_handle.h new file mode 100644 index 0000000000000000000000000000000000000000..d18cc5bf0e0add4b1f295783698835d5247e90b9 --- /dev/null +++ b/drivers/crypto/montage/tsse/tsse_handle.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * This file is part of tsse driver for Linux + * + * Copyright © 2023-2024 Montage Technology. All rights reserved. + */ + +#ifndef __TSSE_HDNDLE_H__ +#define __TSSE_HDNDLE_H__ + +#include +#include + +int tsse_get_available_handle(void); +struct iommu_domain *tsse_get_domain_by_handle(int handle); +#endif diff --git a/drivers/crypto/montage/tsse/tsse_ipc.c b/drivers/crypto/montage/tsse/tsse_ipc.c index b75ca97db6b67afa3c3159427a454c865e01a7f7..a34330489237951323118d214315d4a72f51f2be 100644 --- a/drivers/crypto/montage/tsse/tsse_ipc.c +++ b/drivers/crypto/montage/tsse/tsse_ipc.c @@ -2,7 +2,7 @@ /* * This file is part of tsse driver for Linux * - * Copyright © 2023 Montage Technology. All rights reserved. + * Copyright © 2023-2024 Montage Technology. All rights reserved. */ #include @@ -11,6 +11,8 @@ #include #include "tsse_ipc.h" +#include "tsse_ipc_setup.h" +#include "tsse_ipc_drv.h" #include "tsse_dev.h" #include "tsse_service.h" @@ -29,15 +31,15 @@ static struct ipc_msg *get_msginf(void __iomem *d2h) struct ipc_header *ipc_info = (struct ipc_header *)d2h; // The memory layout in d2h should at least contains: - // ipc_header, msg_info and fw_load (message body) + // ipc_header, msg_info if (ipc_info->i_len < sizeof(struct ipc_header) + - sizeof(struct msg_info) + sizeof(struct fw_load)) { + sizeof(struct msg_info)) { pr_info("%s(): msg format error\n", __func__); return NULL; } u_len = ipc_info->i_len - sizeof(struct ipc_header); msg = (struct ipc_msg *)(kzalloc(sizeof(struct ipc_msg) + u_len, - GFP_ATOMIC)); + GFP_ATOMIC)); if (!msg) { pr_info("%s(): ipc_msg kzalloc failed\n", __func__); return NULL; @@ -53,164 +55,96 @@ static struct ipc_msg *get_msginf(void __iomem *d2h) return msg; } -static irqreturn_t tsse_ipc_d2h_irqhandler(int irq, void *dev_id) -{ - struct tsse_ipc *tsseipc = (struct tsse_ipc *)dev_id; - - writel(0x0, tsseipc->virt_addr + MAIN2HOST_INTR_SET_OFFSET); - tasklet_hi_schedule(&tsseipc->ipc_handle); - dev_err(tsseipc->dev, "irq%d\n", irq); - return IRQ_HANDLED; -} - -bool check_send_enbit(struct tsse_ipc *tsseipc) -{ - u32 int_reg; - - int_reg = readl(tsseipc->virt_addr + HOST2MAIN_INTR_SET_OFFSET); - if ((int_reg & IPC_REGISTER_INT_SET) == 0) - return true; - else - return false; -} - -void notify_device(struct tsse_ipc *tsseipc) -{ - writel(0x1, tsseipc->virt_addr + HOST2MAIN_INTR_SET_OFFSET); - return; - -} - /** - * ipc_hw_init()- Enable main2host interrupt, cleanup interrupt - * set value in host2main and main2host. - * @hw_ipc: pointer to a structure used for IPC + * tsse_write_msg() - do write msg from host to device + * @tsseipc: pointer to structure used for IPC in current device + * @msg_class: type for the IPC message + * @msg_payload: pointer to actual content that caller wants to send + * @payload_length: length of msg_payload + * Return: 0 on success, error code otherwise */ -static void ipc_hw_init(struct tsse_ipc *hw_ipc) -{ - writel(0x1, hw_ipc->virt_addr + MAIN2HOST_INTR_ENABLE_OFFSET); - writel(0x0, hw_ipc->virt_addr + HOST2MAIN_INTR_SET_OFFSET); - writel(0x0, hw_ipc->virt_addr + MAIN2HOST_INTR_SET_OFFSET); -} - -static int ipc_init_msg(struct tsse_ipc *tsseipc) +static int tsse_write_msg(struct tsse_ipc *tsseipc, uint32_t msg_class, + void *msg_payload, uint32_t payload_length) { u8 *h2d; u32 int_reg; - u32 cmd_len; - u32 i_len; + u32 comm_msg_length; struct ipc_msg *msg; struct msg_info *info_msg; - cmd_len = sizeof(uint32_t); - i_len = sizeof(struct ipc_header) + sizeof(struct msg_info) + cmd_len; - msg = (struct ipc_msg *)(kzalloc(i_len, GFP_ATOMIC)); + comm_msg_length = sizeof(struct ipc_header) + sizeof(struct msg_info); + msg = (struct ipc_msg *)(kzalloc(comm_msg_length, GFP_ATOMIC)); if (!msg) { pr_info("%s(): msg kzalloc failed\n", __func__); - return -EFAULT; + return -ENOMEM; } - msg->header.i_len = i_len; + msg->header.i_len = comm_msg_length + payload_length; info_msg = (struct msg_info *)msg->i_data; - info_msg->msg_class = IPC_MESSAGE_BASIC; - *(uint32_t *)((uint8_t *)msg->i_data + sizeof(struct msg_info)) = IPC_BASIC_CMD_HOST_INIT; + info_msg->msg_class = msg_class; mutex_lock(&tsseipc->list_lock); int_reg = readl(tsseipc->virt_addr + HOST2MAIN_INTR_SET_OFFSET); if ((int_reg & IPC_REGISTER_INT_SET) != 0) { mutex_unlock(&tsseipc->list_lock); kfree(msg); - return -EFAULT; + return -EAGAIN; } h2d = (u8 *)(tsseipc->virt_addr + HOST2MAIN_IPC_OFFSET); - memcpy_toio(h2d, msg, sizeof(struct ipc_header)); - memcpy_toio(h2d + sizeof(struct ipc_header), (u8 *)msg->i_data, - sizeof(struct msg_info) + sizeof(uint32_t)); + ipc_memcpy_to_io(h2d, (u8 *)msg, comm_msg_length); + ipc_memcpy_to_io(h2d + comm_msg_length, (u8 *)msg_payload, payload_length); writel(0x1, tsseipc->virt_addr + HOST2MAIN_INTR_SET_OFFSET); mutex_unlock(&tsseipc->list_lock); kfree(msg); - return 0; } -static void tsse_ipc_bh_handler(unsigned long data) +int ipc_d2h_legacy_msg_process(struct tsse_ipc *tsseipc, void __iomem *d2h_msg) { - struct tsse_ipc *tsseipc = (struct tsse_ipc *)data; - - void __iomem *d2h_payload = tsseipc->virt_addr + MAIN2HOST_IPC_OFFSET; - struct ipc_msg *msg = get_msginf(d2h_payload); + int ret; + struct ipc_msg *msg = get_msginf(d2h_msg); if (!msg) { dev_err(tsseipc->dev, "get_msginf is NULL\n"); - return; - } - if (service_rout(tsseipc, msg)) - dev_err(tsseipc->dev, "illegal message class\n"); - kfree(msg); -} - -int tsse_ipc_init(struct pci_dev *pdev) -{ - struct tsse_dev *tdev = pci_to_tsse_dev(pdev); - struct tsse_ipc *ipc; - int rc; - - ipc = devm_kzalloc(&pdev->dev, sizeof(*ipc), GFP_KERNEL); - if (ipc == NULL) return -ENOMEM; - tdev->ipc = ipc; - ipc->pdev = pdev; - ipc->dev = &pdev->dev; - ipc->virt_addr = TSSE_DEV_BARS(tdev)[2].virt_addr; - - mutex_init(&ipc->list_lock); - tasklet_init(&(ipc->ipc_handle), tsse_ipc_bh_handler, - (ulong)(ipc)); - - rc = request_threaded_irq(pci_irq_vector(pdev, 0), NULL, - tsse_ipc_d2h_irqhandler, IRQF_SHARED, - "pf-ipc", ipc); - if (rc) { - dev_err(&pdev->dev, "request_threaded_irq failed\n"); - return rc; } - ipc_hw_init(ipc); - rc = ipc_init_msg(ipc); - if (rc) { - dev_err(&pdev->dev, "ipc_init_msg failed\n"); - tsse_ipc_deinit(tdev); - } - return rc; + ret = service_rout(tsseipc, msg); + kfree(msg); + return ret; } -void tsse_ipc_deinit(void *tdev_t) +/** + * ipc_h2d_msg_send_legacy() - send message from host to device + * @handle: handle to TSSE device + * @msg_class: type for the IPC message + * @msg_payload: pointer to actual content that caller wants to send + * @length: length of msg_payload + * Return: 0 on success, error code otherwise + */ +int ipc_h2d_msg_send_legacy(int handle, uint32_t msg_class, + void *msg_payload, uint32_t length) { - struct tsse_ipc *tsseipc; - struct pci_dev *pdev; struct tsse_dev *tdev; + struct tsse_ipc *tsseipc; + tsse_d2h_ipc_handler ipc_handler; - tdev = tdev_t; - tsseipc = tdev->ipc; - pdev = tsseipc->pdev; - if (tsseipc) { - free_irq(pci_irq_vector(pdev, 0), tdev->ipc); - tdev->ipc = NULL; + tdev = tsse_get_dev_by_handle(handle); + if (!tdev) + return -ENODEV; + + if (!msg_payload || !length) { + pr_err("%s %d: invalid msg payload\n", __func__, __LINE__); + return -EINVAL; } -} -int tsse_fw_manual_load_ipc(struct pci_dev *pdev) -{ - struct tsse_dev *tdev = pci_to_tsse_dev(pdev); - struct tsse_ipc *ipc = tdev->ipc; - int rc = -EFAULT; - - if (ipc) { - ipc_hw_init(ipc); - rc = ipc_init_msg(ipc); - if (rc) - dev_err(&pdev->dev, "ipc_init_msg failed\n"); + tsseipc = tdev->ipc; + ipc_handler = tsseipc->d2h_handlers[msg_class]; + if ((msg_class >= IPC_MESSAGE_CLASS_NUM) || + (msg_class != IPC_MESSAGE_BASIC && !ipc_handler)) { + pr_err("%s %d: invalid msg class\n", __func__, __LINE__); + return -EINVAL; } - return rc; + return tsse_write_msg(tsseipc, msg_class, msg_payload, length); } diff --git a/drivers/crypto/montage/tsse/tsse_ipc.h b/drivers/crypto/montage/tsse/tsse_ipc.h index 82f8df71c98371de379f57b3cba1b936309fbec7..0f247333cf84c98d8d389bf854903437712ea73c 100644 --- a/drivers/crypto/montage/tsse/tsse_ipc.h +++ b/drivers/crypto/montage/tsse/tsse_ipc.h @@ -2,7 +2,7 @@ /* * This file is part of tsse driver for Linux * - * Copyright © 2023 Montage Technology. All rights reserved. + * Copyright © 2023-2024 Montage Technology. All rights reserved. */ #ifndef __TM_HOST_IPC_H__ @@ -11,45 +11,7 @@ #include #include #include - -#define TSSE_PASID_SVA - -#define HOST2MAIN_INTR_SET_OFFSET 0x2000 -#define HOST2MAIN_INTR_ENABLE_OFFSET 0x2004 -#define HOST2MAIN_ACK_INTR_CLR_OFFSET 0x2008 -#define HOST2MAIN_ACK_INTR_ENABLE_OFFSET 0x200c -#define HOST2MAIN_VLD_INTR_STATUS_OFFSET 0x2010 -#define HOST2MAIN_ACK_INTR_STATUS_OFFSET 0x2014 -#define MSIX_MASK_EN_REG_OFFSET 0x2020 -#define INTR_MASK_BIT_OFFSET 0x2024 -#define INTR_PENDING_BIT_OFFSET 0x2028 -#define HOST2MAIN_IPC_OFFSET 0x2400 - -#define MAIN2HOST_INTR_SET_OFFSET 0x3000 -#define MAIN2HOST_INTR_ENABLE_OFFSET 0x3004 -#define MAIN2HOST_ACK_INTR_CLR_OFFSET 0x3008 -#define MAIN2HOST_ACK_INTR_ENABLE_OFFSET 0x300c -#define MAIN2HOST_VEN_MSI_FUNC_NUM_OFFSET 0x3010 -#define MAIN2HOST_VEN_MSI_VFUNC_ACTIVE_OFFSET 0x3014 -#define MAIN2HOST_IPC_OFFSET 0x3400 - -#define IPC_REGISTER_INT_SET BIT(0) -#define IPC_REGISTER_INT_MASK BIT(1) - -enum IPC_BASIC_CMD { - IPC_BASIC_CMD_HOST_INIT = 0x1, - IPC_BASIC_CMD_PING = 0x2 -}; - -enum IPC_BOOT_CMD { - IPC_BOOT_CMD_GET_FIRMWARE = 0x1 -}; - -enum IPC_MESSAGE_CLASS { - IPC_MESSAGE_BASIC = 1, - IPC_MESSAGE_BOOT, - IPC_MESSAGE_CLASS_NUM, -}; +#include "tsse_ipc_setup.h" struct ipc_header { uint32_t inst_id; @@ -87,17 +49,7 @@ struct ipc_layout { struct msg_info info; }; -struct tsse_ipc { - struct device *dev; - struct pci_dev *pdev; - void __iomem *virt_addr; - struct mutex list_lock; - struct tasklet_struct ipc_handle; -}; +int ipc_h2d_msg_send_legacy(int handle, uint32_t msg_class, void *msg_payload, uint32_t length); +int ipc_d2h_legacy_msg_process(struct tsse_ipc *tsseipc, void *msg); -int tsse_ipc_init(struct pci_dev *pdev); -void tsse_ipc_deinit(void *tdev); -int tsse_fw_manual_load_ipc(struct pci_dev *pdev); -bool check_send_enbit(struct tsse_ipc *tsseipc); -void notify_device(struct tsse_ipc *tsseipc); #endif diff --git a/drivers/crypto/montage/tsse/tsse_ipc_api.c b/drivers/crypto/montage/tsse/tsse_ipc_api.c new file mode 100644 index 0000000000000000000000000000000000000000..1c3eadbbd02939b4246baefc11a2200b933fe036 --- /dev/null +++ b/drivers/crypto/montage/tsse/tsse_ipc_api.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * This file is part of tsse driver for Linux + * + * Copyright © 2023-2024 Montage Technology. All rights reserved. + */ +#include +#include +#include "tsse_ipc_msg.h" +#include "tsse_dev.h" +#include "tsse_ipc_hash.h" +#include "tsse_ipc_service.h" + +/** + * tsse_im_startup() - Startup TSSE IPC Message, will skip the device if it is already started. + * Return: 0 if startup successfully, other values for failure. + */ +int tsse_im_startup(void) +{ + return tsse_process_for_all(tsse_im_startup_for_dev); +} +EXPORT_SYMBOL_GPL(tsse_im_startup); + +/** + * tsse_im_service_exist() - Check if the specific IPC Message service exists. + * @name: IPC Message service name + * Return: 0 if the service exists, otherwise -EINVAL. + */ +int tsse_im_service_exist(const char *name) +{ + struct service_info_entry *entry; + + entry = tsse_service_info_hash_get(name); + if (!entry) { + pr_err("%s(): service: %s not exist\n", __func__, name); + return -EINVAL; + } + return 0; +} +EXPORT_SYMBOL_GPL(tsse_im_service_exist); + +/** + * tsse_im_service_handle_alloc() - Allocate IPC Message service handle for specific service. + * @name: IPC Message service name + * @cb: request callback for the service + * @handle: function output for the service handle + * Return: 0 if allocated successfully, other values for failure + */ +int tsse_im_service_handle_alloc( + const char *name, + tsse_im_cb_func cb, + tsse_im_service_handle *handle) +{ + struct tsse_service_instance *service_instance; + int ret; + + service_instance = kzalloc(sizeof(struct tsse_service_instance), GFP_ATOMIC); + if (!service_instance) + return -ENOMEM; + service_instance->service_opened = 0; + service_instance->cb = cb; + strscpy(service_instance->service_name, name, TSSE_IM_SERVICE_NAME_LEN); + + ret = tsse_schedule_device_handle(service_instance); + if (ret) { + kfree(service_instance); + return ret; + } + + ret = tsse_service_open(service_instance); + if (ret) { + pr_err("%s(): open service: %s failed: %d\n", + __func__, service_instance->service_name, ret); + kfree(service_instance); + return ret; + } + *handle = service_instance; + return 0; +} +EXPORT_SYMBOL_GPL(tsse_im_service_handle_alloc); + +/** + * tsse_im_service_handle_free() - Free IPC Message service handle + * @handle: service handle to free + * Return: 0 if free successfully, other values for failure + */ +int tsse_im_service_handle_free(tsse_im_service_handle handle) +{ + int ret = 0; + + if (handle) { + ret = tsse_service_close(handle); + kfree((void *)handle); + } + return ret; +} +EXPORT_SYMBOL_GPL(tsse_im_service_handle_free); + +/** + * tsse_im_service_msg_h2d() - Send message from host to device + * @handle: service handle + * @msg_payload: the message payload to send + * @payload_length: length of msg_payload + */ +int tsse_im_service_msg_h2d(tsse_im_service_handle handle, void *msg_payload, u32 payload_length) +{ + if (!handle || !msg_payload || !payload_length) + return -EINVAL; + return tsse_service_msg_send(handle, TSSE_SERVICE_CMD_APP_MSG, msg_payload, payload_length); +} +EXPORT_SYMBOL_GPL(tsse_im_service_msg_h2d); diff --git a/drivers/crypto/montage/tsse/tsse_ipc_drv.c b/drivers/crypto/montage/tsse/tsse_ipc_drv.c new file mode 100644 index 0000000000000000000000000000000000000000..dbc91d3894ba7bef1beea15a272ce784c1377e5d --- /dev/null +++ b/drivers/crypto/montage/tsse/tsse_ipc_drv.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * This file is part of tsse driver for Linux + * + * Copyright © 2023-2024 Montage Technology. All rights reserved. + */ + +#include +#include +#include + +#include "tsse_ipc.h" +#include "tsse_ipc_drv.h" +#include "tsse_ipc_service.h" +#include "tsse_ipc_epid.h" +#include "tsse_dev.h" + +#define ALIGN_TO_4_BYTES(length) (((length) + 3) & ~0x3) + +static int ipc_d2h_new_msg_process(struct tsse_ipc *tsseipc, void __iomem *d2h_msg) +{ + struct tsse_ipc_msg *ipc_msg = (struct tsse_ipc_msg *)d2h_msg; + void *payload; + u32 msg_len; + u32 header_len; + u32 payload_len; + int ret; + u64 epid; + + msg_len = ipc_msg->msg_len; + header_len = sizeof(struct tsse_ipc_msg); + payload_len = msg_len - header_len; + epid = ipc_msg->epid; + + if (msg_len < header_len || msg_len > IPC_MAX_DATA_LEN) { + pr_err("%s %d: invalid msg len: %u in resp\n", __func__, __LINE__, msg_len); + return -EINVAL; + } + payload = kzalloc(payload_len, GFP_ATOMIC); + if (!payload) + return -ENOMEM; + memcpy_fromio(payload, (u8 *)d2h_msg + header_len, payload_len); + if (ipc_msg->type == TSSE_IPC_TYPE_RING_SETUP_RSP) + ret = ipc_ring_setup_resp_receive(payload, payload_len); + else + ret = tsse_service_msg_receive(epid, payload, payload_len); + kfree(payload); + return ret; +} + +static struct tsse_ipc_msg *ipc_h2d_msg_header_create(u64 epid, u32 payload_length) +{ + struct tsse_ipc_msg *header = (struct tsse_ipc_msg *)( + kzalloc(sizeof(struct tsse_ipc_msg), GFP_ATOMIC)); + if (header) { + if (GET_SERVICE_ID(epid) == EPID_MANAGE_SERVICE_ID) { + if (GET_APP_SPECIFIC_ID(epid) == TSSE_IPC_SPECIFIC_RING_SETUP_REQ) + header->type = TSSE_IPC_TYPE_RING_SETUP_REQ; + else if (GET_APP_SPECIFIC_ID(epid) == TSSE_IPC_SPECIFIC_RING_SETUP_RSP) + header->type = TSSE_IPC_TYPE_RING_SETUP_RSP; + else + header->type = TSSE_IPC_TYPE_SERVICE; + } else { + header->type = TSSE_IPC_TYPE_SERVICE; + } + header->msg_len = sizeof(struct tsse_ipc_msg) + payload_length; + header->rev = 0; + header->epid = epid; + } + return header; +} + +int ipc_h2d_msg_send(int device_handle, u64 epid, void *msg_payload, u32 length) +{ + struct tsse_dev *tdev; + struct tsse_ipc *tsseipc; + struct tsse_ipc_msg *header; + u8 *h2d; + u32 int_reg; + u32 header_size; + + tdev = tsse_get_dev_by_handle(device_handle); + if (!tdev) + return -ENODEV; + + if (!msg_payload || !length) { + pr_err("%s %d: invalid msg payload\n", __func__, __LINE__); + return -EINVAL; + } + header_size = sizeof(struct tsse_ipc_msg); + if (length + header_size > IPC_MAX_DATA_LEN) { + pr_err("%s %d length too large: %u\n", __func__, __LINE__, length); + return -EINVAL; + } + tsseipc = tdev->ipc; + mutex_lock(&tsseipc->list_lock); + int_reg = readl(tsseipc->virt_addr + HOST2MAIN_INTR_SET_OFFSET); + if ((int_reg & IPC_REGISTER_INT_SET) != 0) { + mutex_unlock(&tsseipc->list_lock); + return -EAGAIN; + } + header = ipc_h2d_msg_header_create(epid, length); + if (!header) { + mutex_unlock(&tsseipc->list_lock); + pr_err("%s(): msg header kzalloc failed\n", __func__); + return -ENOMEM; + } + h2d = (u8 *)(tsseipc->virt_addr + HOST2MAIN_IPC_OFFSET); + ipc_memcpy_to_io(h2d, (u8 *)header, header_size); + ipc_memcpy_to_io(h2d + header_size, msg_payload, length); + + writel(0x1, tsseipc->virt_addr + HOST2MAIN_INTR_SET_OFFSET); + mutex_unlock(&tsseipc->list_lock); + kfree(header); + return 0; +} + +int ipc_d2h_msg_dispatch(struct tsse_ipc *tsseipc, void __iomem *d2h_msg) +{ + u16 type = (u16) cpu_to_le32(readl(d2h_msg)); + + switch (type) { + case TSSE_IPC_TYPE_LEGACY: + return ipc_d2h_legacy_msg_process(tsseipc, d2h_msg); + case TSSE_IPC_TYPE_SERVICE: + case TSSE_IPC_TYPE_RING_SETUP_RSP: + return ipc_d2h_new_msg_process(tsseipc, d2h_msg); + default: + pr_err("%s %d: invalid msg type: %u\n", __func__, __LINE__, type); + return -EINVAL; + } +} + +void ipc_memcpy_to_io(u8 *addr, u8 *src, u32 len) +{ + memcpy_toio(addr, src, len); +} diff --git a/drivers/crypto/montage/tsse/tsse_ipc_drv.h b/drivers/crypto/montage/tsse/tsse_ipc_drv.h new file mode 100644 index 0000000000000000000000000000000000000000..9129d91391325f0511ea98ab3250b2bbb9daa12d --- /dev/null +++ b/drivers/crypto/montage/tsse/tsse_ipc_drv.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * This file is part of tsse driver for Linux + * + * Copyright © 2023-2024 Montage Technology. All rights reserved. + */ + +#ifndef __TSSE_IPC_DRV_H__ +#define __TSSE_IPC_DRV_H__ + +#include +#include "tsse_ipc_setup.h" + +#define TSSE_IPC_SPECIFIC_RING_SETUP_REQ 100 +#define TSSE_IPC_SPECIFIC_RING_SETUP_RSP 101 + +#pragma pack(push, 4) +struct tsse_ipc_msg { + u16 type; + u16 msg_len; + u32 rev; + u64 epid; + u8 data[]; +}; +#pragma pack(pop) + +enum tsse_ipc_type { + TSSE_IPC_TYPE_LEGACY = 0, + TSSE_IPC_TYPE_SERVICE, + TSSE_IPC_TYPE_RING_SETUP_REQ, + TSSE_IPC_TYPE_RING_SETUP_RSP +}; + +int ipc_h2d_msg_send(int device_handle, u64 epid, void *msg_payload, u32 length); +int ipc_d2h_msg_dispatch(struct tsse_ipc *tsseipc, void __iomem *d2h_msg); +void ipc_memcpy_to_io(u8 *addr, u8 *src, u32 len); + +#endif diff --git a/drivers/crypto/montage/tsse/tsse_ipc_epid.c b/drivers/crypto/montage/tsse/tsse_ipc_epid.c new file mode 100644 index 0000000000000000000000000000000000000000..9965ba7d6f52dfe8dbc476f6248270b96dda1c96 --- /dev/null +++ b/drivers/crypto/montage/tsse/tsse_ipc_epid.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * This file is part of tsse driver for Linux + * + * Copyright © 2023-2024 Montage Technology. All rights reserved. + */ +#include +#include +#include "tsse_ipc_epid.h" +#include "tsse_ipc_hash.h" +#include "tsse_ipc_service.h" + +/* service max ids: 2^20 */ +#define SERVICE_RESERVED_NUM 16 +#define SERVICE_MAX_IDS (1 << 20) +#define SERVICE_BITMAP_SIZE (SERVICE_MAX_IDS / 8) + +#define IS_BIT_SET(bitmap, bit) ((bitmap[(bit) / 8] & (1 << ((bit) % 8))) != 0) +#define SET_BIT(bitmap, bit) (bitmap[(bit) / 8] |= (1 << ((bit) % 8))) +#define CLEAR_BIT(bitmap, bit) (bitmap[(bit) / 8] &= ~(1 << ((bit) % 8))) + +static u8 service_id_bitmap[SERVICE_BITMAP_SIZE] = {0}; +static u32 current_max_service_id = SERVICE_RESERVED_NUM; + +static int tsse_available_service_id(void) +{ + int i = current_max_service_id; + + if (i == SERVICE_MAX_IDS) + i = current_max_service_id = SERVICE_RESERVED_NUM; + for (; i < SERVICE_MAX_IDS; i++) { + if (!IS_BIT_SET(service_id_bitmap, i)) { + SET_BIT(service_id_bitmap, i); + if (i > current_max_service_id) + current_max_service_id = i; + return i; + } + } + return -1; +} + +static void fill_epid(struct tsse_service_instance *service_instance, int service_id) +{ + struct tsse_epid epid_data = {0}; + + epid_data.service_id = service_id; + epid_data.pasid_en = 0; + epid_data.vf_id = 0; + epid_data.is_pf = 1; + epid_data.device_id = service_instance->device_handle; + service_instance->service_epid = EPID_TO_UINT64(&epid_data); +} + +int tsse_alloc_service_epid(tsse_im_service_handle handle) +{ + int service_id; + struct tsse_service_instance *service_instance = (struct tsse_service_instance *)handle; + + if (strcmp(service_instance->service_name, TSSE_MANAGE_SERVICE_NAME) == 0) + service_id = EPID_MANAGE_SERVICE_ID; + else + service_id = tsse_available_service_id(); + if (service_id < 0) + return -EFAULT; + fill_epid(service_instance, service_id); + return 0; +} + +void tsse_free_service_epid(tsse_im_service_handle handle) +{ + struct tsse_service_instance *service_instance = (struct tsse_service_instance *)handle; + u32 service_id = GET_SERVICE_ID(service_instance->service_epid); + + if (service_id < SERVICE_MAX_IDS) + CLEAR_BIT(service_id_bitmap, service_id); +} diff --git a/drivers/crypto/montage/tsse/tsse_ipc_epid.h b/drivers/crypto/montage/tsse/tsse_ipc_epid.h new file mode 100644 index 0000000000000000000000000000000000000000..55886da8e07c6bc2edb7af933857cf5369e20bb8 --- /dev/null +++ b/drivers/crypto/montage/tsse/tsse_ipc_epid.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * This file is part of tsse driver for Linux + * + * Copyright © 2023-2024 Montage Technology. All rights reserved. + */ +#ifndef __TSSE_IPC_EPID_H__ +#define __TSSE_IPC_EPID_H__ + +#include +#include "tsse_ipc_msg.h" + +#define EPID_MANAGE_SERVICE_ID 0 +struct tsse_epid { + uint64_t app_id : 8; + uint64_t service_id : 20; + uint64_t pasid : 20; + uint64_t pasid_en : 2; + uint64_t vf_id : 4; + uint64_t is_pf : 2; + uint64_t device_id : 8; +}; + +#define GET_DEVICE_ID(epid) (((epid) >> 56) & 0xFF) +#define GET_SERVICE_ID(epid) (((epid) >> 8) & 0xFFFFF) +#define GET_APP_SPECIFIC_ID(epid) ((epid) & 0xFF) +#define EPID_SET_PF(epid, is_pf) (((epid) & 0xFF3FFFFFFFFFFFFF) | ((uint64_t)(is_pf)) << 54) +#define SERVICE_LEVEL_EPID(epid) ((epid) & 0xFFFFFFFFFFFFFF00) + +#define EPID_TO_UINT64(epid_data) \ + (((uint64_t)(epid_data)->app_id) | \ + ((uint64_t)(epid_data)->service_id << 8) | \ + ((uint64_t)(epid_data)->pasid << 28) | \ + ((uint64_t)(epid_data)->pasid_en << 48) | \ + ((uint64_t)(epid_data)->vf_id << 50) | \ + ((uint64_t)(epid_data)->is_pf << 54) | \ + ((uint64_t)(epid_data)->device_id << 56)) + +/* used to parse from response epid, + * contains device_id, service_id and app_id + */ +#define GET_BASIC_EPID(epid) ((epid) & 0xFF0000000FFFFFFF) + +#define APPEND_APP_ID_TO_EPID(epid, app_id) \ + (((epid) & 0xFFFFFFFFFFFFFF00) | ((app_id) & 0xFF)) + + +int tsse_alloc_service_epid(tsse_im_service_handle handle); +void tsse_free_service_epid(tsse_im_service_handle handle); +#endif diff --git a/drivers/crypto/montage/tsse/tsse_ipc_hash.c b/drivers/crypto/montage/tsse/tsse_ipc_hash.c new file mode 100644 index 0000000000000000000000000000000000000000..417e0e91382cd1a5b060ba4d4579cf26350b79de --- /dev/null +++ b/drivers/crypto/montage/tsse/tsse_ipc_hash.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * This file is part of tsse driver for Linux + * + * Copyright © 2023-2024 Montage Technology. All rights reserved. + */ +#include +#include +#include +#include +#include +#include "tsse_ipc_hash.h" + +DEFINE_HASHTABLE(service_info_table, SERVICE_TABLE_BUCKET_BITS); +DEFINE_HASHTABLE(service_handle_table, SERVICE_TABLE_BUCKET_BITS); + +static u32 hash_string(const char *str) +{ + return full_name_hash(NULL, str, strlen(str)); +} + +int tsse_service_info_hash_set(const char *service, void *service_info) +{ + struct service_info_entry *new_entry; + + new_entry = kzalloc(sizeof(struct service_info_entry), GFP_KERNEL); + if (!new_entry) + return -ENOMEM; + new_entry->service_name = kstrdup(service, GFP_KERNEL); + new_entry->service_info = service_info; + hash_add(service_info_table, &new_entry->node, + hash_min(hash_string(service), SERVICE_TABLE_BUCKET_BITS)); + return 0; +} + +struct service_info_entry *tsse_service_info_hash_get(const char *service) +{ + struct service_info_entry *entry; + + hash_for_each_possible(service_info_table, entry, node, + hash_min(hash_string(service), SERVICE_TABLE_BUCKET_BITS)) { + if (strcmp(entry->service_name, service) == 0) + return entry; + } + return NULL; +} + +void tsse_service_info_hash_remove_all(void) +{ + int bucket; + struct service_info_entry *entry; + struct hlist_node *tmp; + + hash_for_each_safe(service_info_table, bucket, tmp, entry, node) { + kfree(entry->service_name); + kfree(entry->service_info); + hash_del(&entry->node); + kfree(entry); + } +} + +int tsse_service_handle_hash_set(u64 epid, void *handle) +{ + struct service_handle_entry *new_entry; + + new_entry = kzalloc(sizeof(struct service_handle_entry), GFP_KERNEL); + if (!new_entry) + return -ENOMEM; + new_entry->epid = epid; + new_entry->handle = handle; + hash_add(service_handle_table, &new_entry->node, hash_min(epid, SERVICE_TABLE_BUCKET_BITS)); + return 0; +} + +struct service_handle_entry *tsse_service_handle_hash_get(u64 epid) +{ + struct service_handle_entry *entry; + + hash_for_each_possible(service_handle_table, entry, node, + hash_min(epid, SERVICE_TABLE_BUCKET_BITS)) { + if (entry->epid == epid) + return entry; + } + return NULL; +} + +void tsse_service_handle_hash_remove(u64 epid) +{ + struct service_handle_entry *entry = tsse_service_handle_hash_get(epid); + + if (entry) { + hash_del(&entry->node); + kfree(entry); + } +} + +void tsse_service_handle_hash_remove_all(void) +{ + int bucket; + struct service_handle_entry *entry; + struct hlist_node *tmp; + + hash_for_each_safe(service_handle_table, bucket, tmp, entry, node) { + hash_del(&entry->node); + kfree(entry); + } +} diff --git a/drivers/crypto/montage/tsse/tsse_ipc_hash.h b/drivers/crypto/montage/tsse/tsse_ipc_hash.h new file mode 100644 index 0000000000000000000000000000000000000000..963f33a633176f6ef2e824fa8ef3c9ff858c0770 --- /dev/null +++ b/drivers/crypto/montage/tsse/tsse_ipc_hash.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * This file is part of tsse driver for Linux + * + * Copyright © 2023-2024 Montage Technology. All rights reserved. + */ + +#ifndef __TSSE_IPC_HASH_H__ +#define __TSSE_IPC_HASH_H__ + +#define SERVICE_TABLE_BUCKET_BITS 8 + +struct service_info_entry { + char *service_name; + void *service_info; + struct hlist_node node; +}; + +struct service_handle_entry { + u64 epid; + void *handle; + struct hlist_node node; +}; + +int tsse_service_info_hash_set(const char *service, void *service_info); +struct service_info_entry *tsse_service_info_hash_get(const char *service); +void tsse_service_info_hash_remove_all(void); +int tsse_service_handle_hash_set(u64 epid, void *handle); +struct service_handle_entry *tsse_service_handle_hash_get(u64 epid); +void tsse_service_handle_hash_remove(u64 epid); +void tsse_service_handle_hash_remove_all(void); + +#endif diff --git a/drivers/crypto/montage/tsse/tsse_ipc_msg.h b/drivers/crypto/montage/tsse/tsse_ipc_msg.h new file mode 100644 index 0000000000000000000000000000000000000000..e8de74d339f973eef6e4c731169287011efdda0f --- /dev/null +++ b/drivers/crypto/montage/tsse/tsse_ipc_msg.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * This file is part of tsse driver for Linux + * + * Copyright © 2023-2024 Montage Technology. All rights reserved. + */ + +#ifndef __TSSE_IPC_MSG_H__ +#define __TSSE_IPC_MSG_H__ + +#include + +#define TSSE_IM_DEVICE_NUM_MAX 256 +#define TSSE_IM_SERVICE_NAME_LEN 16 + +enum tsse_im_msg_type { + IM_MSG_TYPE_REQ = 0, + IM_MSG_TYPE_RSP, + IM_MSG_TYPE_NOTIFY +}; + +#pragma pack(push, 4) +struct tsse_im_msg_hdr { + /** @ref enum tsse_im_msg_type */ + uint16_t msg_type; + /** internal command id of the service */ + uint16_t cmd; + uint64_t cookie; +}; + +struct tsse_im_service_info { + char service_name[TSSE_IM_SERVICE_NAME_LEN]; + uint32_t num_devices; + uint8_t device_ids[TSSE_IM_DEVICE_NUM_MAX]; +}; +#pragma pack(pop) + +#define tsse_im_service_handle void * + +/** + * tsse_im_cb_func - callback to process device-to-host IPC message, + * also called response handler. Service layer should register it + * when alloc service handle by tsse_im_service_handle_alloc. + * @handle: handle to TSSE service + * @msg_payload: actual data related to specific message class + * @payload_length: length of msg_payload + * Return: 0 on success, error code otherwise + */ +typedef int (*tsse_im_cb_func)(tsse_im_service_handle handle, + void *msg_payload, u32 payload_length); + +int tsse_im_startup(void); + +int tsse_im_service_exist(const char *name); + +int tsse_im_service_handle_alloc(const char *name, + tsse_im_cb_func cb, tsse_im_service_handle *handle); + +int tsse_im_service_handle_free(tsse_im_service_handle handle); + +int tsse_im_service_msg_h2d(tsse_im_service_handle handle, void *msg_payload, u32 payload_length); + +#endif diff --git a/drivers/crypto/montage/tsse/tsse_ipc_service.c b/drivers/crypto/montage/tsse/tsse_ipc_service.c new file mode 100644 index 0000000000000000000000000000000000000000..a9a2e0ba607686531afd114b521c8c6052e6f0ff --- /dev/null +++ b/drivers/crypto/montage/tsse/tsse_ipc_service.c @@ -0,0 +1,467 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * This file is part of tsse driver for Linux + * + * Copyright © 2023-2024 Montage Technology. All rights reserved. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "tsse_ipc_service.h" +#include "tsse_ipc_drv.h" +#include "tsse_ipc_epid.h" +#include "tsse_ipc_hash.h" +#include "tsse_ipc_msg.h" +#include "tsse_dev.h" + +static DEFINE_MUTEX(tsse_device_table_lock); +static int tsse_device_last_used_index = -1; + +static int service_request_post_process( + int ret, void *req, + struct tsse_service_user_data *user_data, + tsse_im_service_handle handle, + post_process_func func) +{ + if (ret) + goto cleanup; + if (!wait_for_completion_timeout(&user_data->req_completion, + msecs_to_jiffies(TSSE_SERVICE_MAX_WAIT_MILLISECONDS))) { + pr_err("%s(): completion timeout\n", __func__); + ret = -EFAULT; + goto cleanup; + } + ret = user_data->status; + if ((ret == 0) && func) + func(handle); +cleanup: + kfree(user_data); + kfree(req); + return ret; +} + +int tsse_service_msg_send( + tsse_im_service_handle handle, + u32 service_cmd, + void *msg_payload, + u32 payload_length) +{ + struct tsse_service_instance *service_instance; + u64 epid; + + service_instance = (struct tsse_service_instance *)handle; + if ((service_cmd == TSSE_SERVICE_CMD_APP_MSG) && !service_instance->service_opened) { + pr_err("%s(): service handle is not ready to use\n", __func__); + return -EPERM; + } + epid = APPEND_APP_ID_TO_EPID(service_instance->service_epid, service_cmd); + return ipc_h2d_msg_send(service_instance->device_handle, epid, msg_payload, payload_length); +} + +static int tsse_service_open_post_process(tsse_im_service_handle handle) +{ + struct tsse_service_instance *service_instance; + int ret; + + service_instance = (struct tsse_service_instance *)handle; + ret = tsse_service_handle_hash_set(service_instance->service_epid, handle); + if (ret) { + pr_err("%s() failed to save service handle: %d\n", __func__, ret); + return ret; + } + service_instance->service_opened = 1; + return 0; +} + +int tsse_service_open(tsse_im_service_handle handle) +{ + struct tsse_service_open_req *req; + struct tsse_service_user_data *user_data; + struct tsse_service_instance *service_instance; + int ret; + + ret = tsse_alloc_service_epid(handle); + if (ret) + return ret; + + req = kzalloc(sizeof(struct tsse_service_open_req), GFP_ATOMIC); + if (!req) + return -ENOMEM; + + user_data = kzalloc(sizeof(struct tsse_service_user_data), GFP_ATOMIC); + if (!user_data) { + kfree(req); + return -ENOMEM; + } + + init_completion(&user_data->req_completion); + service_instance = (struct tsse_service_instance *)handle; + req->hdr.msg_type = IM_MSG_TYPE_REQ; + req->hdr.cookie = (u64)user_data; + memcpy(req->service_name, service_instance->service_name, TSSE_IM_SERVICE_NAME_LEN); + ret = tsse_service_msg_send(handle, TSSE_SERVICE_CMD_OPEN, + req, sizeof(struct tsse_service_open_req)); + return service_request_post_process(ret, req, user_data, + handle, tsse_service_open_post_process); +} + +static int tsse_service_close_post_process(tsse_im_service_handle handle) +{ + struct tsse_service_instance *service_instance; + + service_instance = (struct tsse_service_instance *)handle; + service_instance->service_opened = 0; + tsse_service_handle_hash_remove(service_instance->service_epid); + tsse_free_service_epid(handle); + return 0; +} + +int tsse_service_close(tsse_im_service_handle handle) +{ + struct tsse_service_close_req *req; + struct tsse_service_user_data *user_data; + int ret; + + req = kzalloc(sizeof(struct tsse_service_close_req), GFP_ATOMIC); + if (!req) + return -ENOMEM; + + user_data = kzalloc(sizeof(struct tsse_service_user_data), GFP_ATOMIC); + if (!user_data) { + kfree(req); + return -ENOMEM; + } + + init_completion(&user_data->req_completion); + req->hdr.msg_type = IM_MSG_TYPE_REQ; + req->hdr.cookie = (u64)user_data; + ret = tsse_service_msg_send(handle, TSSE_SERVICE_CMD_CLOSE, + req, sizeof(struct tsse_service_close_req)); + return service_request_post_process(ret, req, user_data, + handle, tsse_service_close_post_process); +} + +int tsse_services_query_request(tsse_im_service_handle handle) +{ + struct tsse_services_query_req *req; + struct tsse_service_user_data *user_data; + int ret; + + req = kzalloc(sizeof(struct tsse_services_query_req), GFP_ATOMIC); + if (!req) + return -ENOMEM; + + user_data = kzalloc(sizeof(struct tsse_service_user_data), GFP_ATOMIC); + if (!user_data) { + kfree(req); + return -ENOMEM; + } + + init_completion(&user_data->req_completion); + req->hdr.msg_type = IM_MSG_TYPE_REQ; + req->hdr.cmd = TSSE_SERVICES_QUERY_CMD; + req->hdr.cookie = (u64)user_data; + ret = tsse_service_msg_send(handle, TSSE_SERVICE_CMD_APP_MSG, + req, sizeof(struct tsse_services_query_req)); + return service_request_post_process(ret, req, user_data, handle, NULL); +} + +int tsse_services_query_response(tsse_im_service_handle handle, void *payload, u32 length) +{ + struct tsse_service_instance *instance; + struct tsse_service_comm_resp *resp; + struct tsse_service_user_data *user_data; + struct tsse_services_query_resp *resp_payload; + struct tsse_service_info *service_info; + struct service_info_entry *entry; + char service_name[TSSE_IM_SERVICE_NAME_LEN] = {0}; + u32 buffer_len; + u32 data_offset; + u32 index; + u32 device_exists = 0; + + instance = (struct tsse_service_instance *) handle; + if (length < sizeof(struct tsse_service_comm_resp)) { + pr_err("%s() invalid length: %u\n", __func__, length); + return -EFAULT; + } + resp = (struct tsse_service_comm_resp *)payload; + user_data = (struct tsse_service_user_data *)resp->hdr.cookie; + if (resp->hdr.msg_type != IM_MSG_TYPE_RSP) { + pr_err("%s() invalid msg_type: %u\n", __func__, resp->hdr.msg_type); + return -EFAULT; + } + if (!user_data) { + pr_err("%s() empty cookie in resp header\n", __func__); + return -EFAULT; + } + length -= sizeof(struct tsse_service_comm_resp); + data_offset = 0; + while (data_offset < length) { + resp_payload = (struct tsse_services_query_resp *)( + resp->data + data_offset); + buffer_len = resp_payload->len + 1; + if (buffer_len > TSSE_IM_SERVICE_NAME_LEN) + buffer_len = TSSE_IM_SERVICE_NAME_LEN; + strscpy(service_name, resp_payload->data, buffer_len); + entry = tsse_service_info_hash_get(service_name); + if (entry) { + service_info = (struct tsse_service_info *)entry->service_info; + for (index = 0; index < service_info->num_devices; index++) + device_exists |= (service_info->device_handles[index] + == instance->device_handle); + if (!device_exists) { + service_info->device_handles[service_info->num_devices] + = instance->device_handle; + service_info->num_devices++; + } + } else { + service_info = kzalloc(sizeof(struct tsse_service_info), GFP_ATOMIC); + if (!service_info) + return -ENOMEM; + memcpy(service_info->name, service_name, TSSE_IM_SERVICE_NAME_LEN); + service_info->num_devices = 1; + service_info->device_handles[0] = instance->device_handle; + tsse_service_info_hash_set(service_name, service_info); + } + data_offset += (sizeof(struct tsse_services_query_resp) + + resp_payload->len); + } + user_data->status = resp->ret_code; + complete(&user_data->req_completion); + return 0; +} + +static int tsse_service_open_close_resp(void *msg, u32 msg_len) +{ + struct tsse_service_comm_resp *resp; + struct tsse_service_user_data *user_data; + + if (msg_len < sizeof(struct tsse_service_comm_resp)) { + pr_err("%s() invalid msg_len: %u\n", __func__, msg_len); + return -EFAULT; + } + resp = (struct tsse_service_comm_resp *)msg; + user_data = (struct tsse_service_user_data *)resp->hdr.cookie; + if (resp->hdr.msg_type != IM_MSG_TYPE_RSP) { + pr_err("%s() invalid msg_type: %u\n", __func__, resp->hdr.msg_type); + return -EFAULT; + } + if (!user_data) { + pr_err("%s() empty cookie in resp header\n", __func__); + return -EFAULT; + } + user_data->status = resp->ret_code; + complete(&user_data->req_completion); + return 0; +} + +static int tsse_service_app_resp(u64 epid, void *msg, u32 msg_len) +{ + struct service_handle_entry *entry; + struct tsse_service_instance *instance; + + entry = tsse_service_handle_hash_get(SERVICE_LEVEL_EPID(epid)); + if (!entry || !entry->handle) { + pr_err("%s() cannot find service handle for epid: 0x%llx\n", __func__, epid); + return -EFAULT; + } + instance = (struct tsse_service_instance *)entry->handle; + return instance->cb(instance, msg, msg_len); +} + +int tsse_service_msg_receive(u64 epid, void *msg, u32 msg_len) +{ + u32 service_cmd; + + if (!msg || !msg_len) { + pr_err("%s() service resp msg should not be empty\n", __func__); + return -EFAULT; + } + service_cmd = GET_APP_SPECIFIC_ID(epid); + switch (service_cmd) { + case TSSE_SERVICE_CMD_OPEN: + case TSSE_SERVICE_CMD_CLOSE: + return tsse_service_open_close_resp(msg, msg_len); + case TSSE_SERVICE_CMD_APP_MSG: + return tsse_service_app_resp(epid, msg, msg_len); + default: + return -EFAULT; + } + return 0; +} + +int tsse_schedule_device_handle(tsse_im_service_handle handle) +{ + struct tsse_service_info *service_info; + struct tsse_service_instance *service_instance; + struct service_info_entry *entry; + u32 device_handle_index; + + service_instance = (struct tsse_service_instance *)handle; + entry = tsse_service_info_hash_get(service_instance->service_name); + if (!entry || !entry->service_info) { + pr_err("%s(): service %s not exist\n", __func__, service_instance->service_name); + return -EFAULT; + } + service_info = (struct tsse_service_info *)entry->service_info; + if (service_info->num_devices == 0) { + pr_err("%s(): no available device for service: %s\n", + __func__, service_instance->service_name); + return -EFAULT; + } + mutex_lock(&tsse_device_table_lock); + if (tsse_device_last_used_index < 0) + device_handle_index = 0; + else + device_handle_index = (tsse_device_last_used_index + 1) % service_info->num_devices; + tsse_device_last_used_index = device_handle_index; + mutex_unlock(&tsse_device_table_lock); + service_instance->device_handle = service_info->device_handles[device_handle_index]; + return 0; +} + +static u64 get_init_ring_epid(int device_handle) +{ + struct tsse_epid epid_data = {0}; + + epid_data.service_id = EPID_MANAGE_SERVICE_ID; + epid_data.pasid_en = 0; + epid_data.vf_id = 0; + epid_data.is_pf = 1; + epid_data.device_id = device_handle; + epid_data.app_id = TSSE_IPC_SPECIFIC_RING_SETUP_REQ; + return EPID_TO_UINT64(&epid_data); +} + +int tsse_ipc_setup_ring(int device_handle, u32 is_create) +{ + int ret; + u64 epid; + struct tsse_ipc_ring_setup_req *setup_req; + struct tsse_service_user_data *user_data; + + setup_req = kzalloc(sizeof(struct tsse_ipc_ring_setup_req), GFP_ATOMIC); + user_data = kzalloc(sizeof(struct tsse_service_user_data), GFP_ATOMIC); + if (!setup_req || !user_data) + return -ENOMEM; + + setup_req->cookie = (u64) user_data; + setup_req->is_create = is_create > 0 ? 1 : 0; + epid = get_init_ring_epid(device_handle); + if (is_create) + init_completion(&user_data->req_completion); + + ret = ipc_h2d_msg_send(device_handle, epid, setup_req, + sizeof(struct tsse_ipc_ring_setup_req)); + if (ret) + goto cleanup; + if (is_create) { + if (!wait_for_completion_timeout(&user_data->req_completion, + msecs_to_jiffies(TSSE_SERVICE_MAX_WAIT_MILLISECONDS))) { + pr_err("%s(): completion timeout\n", __func__); + ret = -EFAULT; + goto cleanup; + } + ret = user_data->status; + } +cleanup: + kfree(user_data); + kfree(setup_req); + return ret; +} + +int ipc_ring_setup_resp_receive(void *msg, u32 length) +{ + struct tsse_ipc_ring_setup_resp *resp; + struct tsse_service_user_data *user_data; + + if (length < sizeof(struct tsse_ipc_ring_setup_resp)) { + pr_err("%s %d: invalid resp len: %u\n", __func__, __LINE__, length); + return -EINVAL; + } + resp = (struct tsse_ipc_ring_setup_resp *)msg; + user_data = (struct tsse_service_user_data *)resp->cookie; + user_data->status = resp->ret; + complete(&user_data->req_completion); + return 0; +} + +static int tsse_im_services_init(struct pci_dev *pdev) +{ + struct tsse_dev *tdev = pci_to_tsse_dev(pdev); + struct tsse_service_instance *service_instance; + int ret; + + service_instance = kzalloc(sizeof(struct tsse_service_instance), GFP_ATOMIC); + if (!service_instance) + return -ENOMEM; + service_instance->service_opened = 0; + service_instance->device_handle = tdev->id; + service_instance->cb = tsse_services_query_response; + strscpy(service_instance->service_name, TSSE_MANAGE_SERVICE_NAME, TSSE_IM_SERVICE_NAME_LEN); + + ret = tsse_service_open(service_instance); + if (ret) { + pr_err("%s(): open service: %s failed: %d\n", + __func__, service_instance->service_name, ret); + goto cleanup; + } + ret = tsse_services_query_request(service_instance); + if (ret) { + pr_err("%s(): services query failed: %d\n", __func__, ret); + goto cleanup; + } + ret = tsse_service_close(service_instance); + if (ret) { + pr_err("%s(): close service: %s failed: %d\n", + __func__, service_instance->service_name, ret); + goto cleanup; + } +cleanup: + kfree(service_instance); + return ret; +} + +int tsse_im_startup_for_dev(struct tsse_dev *tdev) +{ + int ret; + + if (!tdev || !tdev->ipc) { + pr_err("failed to startup im, the device is not ready\n"); + return -EPERM; + } + if (tdev->ipc->im_inited) + return 0; + ret = tsse_ipc_setup_ring(tdev->id, 1); + if (ret == 0) + ret = tsse_im_services_init(tdev->tsse_pci_dev.pci_dev); + if (ret == 0) { + tdev->ipc->im_inited = 1; + return ret; + } + tsse_im_shutdown_for_dev(tdev); + return ret; +} + +int tsse_im_shutdown_for_dev(struct tsse_dev *tdev) +{ + struct tsse_ipc *tsseipc; + int ret = 0; + + if (!tdev) + return 0; + + tsseipc = tdev->ipc; + if (tsseipc && tsseipc->im_inited) { + ret = tsse_ipc_setup_ring(tdev->id, 0); + if (ret == 0) + tsseipc->im_inited = 0; + } + return ret; +} diff --git a/drivers/crypto/montage/tsse/tsse_ipc_service.h b/drivers/crypto/montage/tsse/tsse_ipc_service.h new file mode 100644 index 0000000000000000000000000000000000000000..24db2f5a174c0dc872c6605e128aa32ab9e06f6a --- /dev/null +++ b/drivers/crypto/montage/tsse/tsse_ipc_service.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * This file is part of tsse driver for Linux + * + * Copyright © 2023-2024 Montage Technology. All rights reserved. + */ + +#ifndef __TSSE_IPC_SERVICE_H__ +#define __TSSE_IPC_SERVICE_H__ + +#include +#include +#include "tsse_ipc_msg.h" +#include "tsse_dev.h" + +#define TSSE_SERVICES_QUERY_CMD 0 +#define TSSE_MANAGE_SERVICE_NAME "manage service" +#define TSSE_SERVICE_MAX_WAIT_MILLISECONDS 5000 + +enum tsse_service_cmd { + TSSE_SERVICE_CMD_OPEN = 50, + TSSE_SERVICE_CMD_CLOSE, + TSSE_SERVICE_CMD_APP_MSG, + TSSE_SERVICE_CMD_DELIMITER +}; + +#pragma pack(push, 4) +struct tsse_service_instance { + u8 service_opened; + u8 service_name[TSSE_IM_SERVICE_NAME_LEN]; + int device_handle; + tsse_im_cb_func cb; + u64 service_epid; +}; + +struct tsse_service_user_data { + struct completion req_completion; + int status; +}; + +struct tsse_service_open_req { + struct tsse_im_msg_hdr hdr; + uint8_t service_name[TSSE_IM_SERVICE_NAME_LEN]; +}; + +struct tsse_service_close_req { + struct tsse_im_msg_hdr hdr; +}; + +struct tsse_services_query_req { + struct tsse_im_msg_hdr hdr; +}; + +struct tsse_services_query_resp { + u16 type; + u16 len; + u8 data[]; +}; + +struct tsse_service_comm_resp { + struct tsse_im_msg_hdr hdr; + int ret_code; + u8 data[]; +}; + +struct tsse_service_info { + char name[TSSE_IM_SERVICE_NAME_LEN]; + u32 num_devices; + u8 device_handles[TSSE_IM_DEVICE_NUM_MAX]; + struct list_head list; +}; + +struct tsse_ipc_ring_setup_req { + u64 cookie; + u32 is_create; + u32 reserved[13]; +}; + +struct tsse_ipc_ring_setup_resp { + uint64_t cookie; + int32_t ret; +}; +#pragma pack(pop) + +int tsse_service_msg_send( + tsse_im_service_handle handle, + u32 service_cmd, + void *msg_payload, + u32 payload_length); + +int tsse_service_msg_receive(u64 epid, void *msg, u32 msg_len); + +int tsse_service_open(tsse_im_service_handle handle); +int tsse_service_close(tsse_im_service_handle handle); +int tsse_services_query_request(tsse_im_service_handle handle); +int tsse_services_query_response(tsse_im_service_handle handle, void *payload, u32 length); +int tsse_schedule_device_handle(tsse_im_service_handle handle); +int tsse_ipc_setup_ring(int device_handle, u32 is_create); +int ipc_ring_setup_resp_receive(void *msg, u32 length); + +int tsse_im_shutdown_for_dev(struct tsse_dev *tdev); +int tsse_im_startup_for_dev(struct tsse_dev *tdev); + +typedef int (*post_process_func)(tsse_im_service_handle handle); +#endif diff --git a/drivers/crypto/montage/tsse/tsse_ipc_setup.c b/drivers/crypto/montage/tsse/tsse_ipc_setup.c new file mode 100644 index 0000000000000000000000000000000000000000..d1d0bfc98b36ddaad5f0957002648b96c98d9480 --- /dev/null +++ b/drivers/crypto/montage/tsse/tsse_ipc_setup.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * This file is part of tsse driver for Linux + * + * Copyright © 2023-2024 Montage Technology. All rights reserved. + */ + +#include +#include +#include +#include +#include + +#include "tsse_ipc.h" +#include "tsse_ipc_drv.h" +#include "tsse_ipc_setup.h" +#include "tsse_ipc_msg.h" +#include "tsse_ipc_service.h" +#include "tsse_ipc_hash.h" +#include "tsse_ipc_epid.h" +#ifndef DISABLE_FW +#include "tsse_fw_service.h" +#endif +#include "tsse_dev.h" + +/** + * ipc_hw_init()- Enable main2host interrupt, cleanup interrupt + * set value in host2main and main2host. + * @hw_ipc: pointer to a structure used for IPC + */ +static void ipc_hw_init(struct tsse_ipc *hw_ipc) +{ + writel(0x1, hw_ipc->virt_addr + MAIN2HOST_INTR_ENABLE_OFFSET); + writel(0x0, hw_ipc->virt_addr + HOST2MAIN_INTR_SET_OFFSET); + writel(0x0, hw_ipc->virt_addr + MAIN2HOST_INTR_SET_OFFSET); +} + +static void tsse_ipc_bh_handler(unsigned long data) +{ + struct tsse_ipc *tsseipc = (struct tsse_ipc *)data; + void __iomem *d2h_msg = tsseipc->virt_addr + MAIN2HOST_IPC_OFFSET; + int ret = ipc_d2h_msg_dispatch(tsseipc, d2h_msg); + + if (ret) + dev_err(tsseipc->dev, "%s: device message callback result: %d\n", + __func__, ret); +} + +static irqreturn_t tsse_ipc_d2h_irqhandler(int irq, void *dev_id) +{ + struct tsse_ipc *tsseipc = (struct tsse_ipc *)dev_id; + + writel(0x0, tsseipc->virt_addr + MAIN2HOST_INTR_SET_OFFSET); + tasklet_schedule(&tsseipc->ipc_handle); + dev_err(tsseipc->dev, "irq%d\n", irq); + return IRQ_HANDLED; +} + +#ifndef DISABLE_FW +static int host_init_msg(int handle) +{ + uint32_t cmd = IPC_BASIC_CMD_HOST_INIT; + + return ipc_h2d_msg_send_legacy(handle, IPC_MESSAGE_BASIC, &cmd, sizeof(uint32_t)); +} +#endif + +int tsse_ipc_init(struct pci_dev *pdev) +{ + struct tsse_dev *tdev = pci_to_tsse_dev(pdev); + struct tsse_ipc *ipc; + int rc; + + ipc = devm_kzalloc(&pdev->dev, sizeof(*ipc), GFP_KERNEL); + if (ipc == NULL) + return -ENOMEM; + tdev->ipc = ipc; + ipc->pdev = pdev; + ipc->dev = &pdev->dev; + ipc->virt_addr = TSSE_DEV_BARS(tdev)[2].virt_addr; + ipc->im_inited = 0; + + mutex_init(&ipc->list_lock); + tasklet_init(&(ipc->ipc_handle), tsse_ipc_bh_handler, (ulong)(ipc)); + + rc = request_threaded_irq(pci_irq_vector(pdev, 0), NULL, + tsse_ipc_d2h_irqhandler, IRQF_SHARED, + "pf-ipc", ipc); + if (rc) { + dev_err(&pdev->dev, "request_threaded_irq failed: %d\n", rc); + return rc; + } + ipc_hw_init(ipc); +#ifndef DISABLE_FW + ipc->d2h_handlers[IPC_MESSAGE_BOOT] = fw_service; + rc = host_init_msg(tdev->id); + if (rc) { + dev_err(&pdev->dev, "host_init_msg failed: %d\n", rc); + tsse_ipc_deinit(tdev); + return rc; + } +#endif + return rc; +} + +void tsse_ipc_deinit(void *tdev_t) +{ + struct tsse_ipc *tsseipc; + struct pci_dev *pdev; + struct tsse_dev *tdev; + + tdev = tdev_t; + tsseipc = tdev->ipc; + pdev = tsseipc->pdev; + if (tsseipc) { + tsse_im_shutdown_for_dev(tdev); + free_irq(pci_irq_vector(pdev, 0), tdev->ipc); + tdev->ipc = NULL; + } + tsse_service_info_hash_remove_all(); + tsse_service_handle_hash_remove_all(); +} + +#ifndef DISABLE_FW +int tsse_fw_manual_load_ipc(struct pci_dev *pdev) +{ + struct tsse_dev *tdev = pci_to_tsse_dev(pdev); + struct tsse_ipc *ipc = tdev->ipc; + int rc = -EFAULT; + + if (ipc) { + rc = host_init_msg(tdev->id); + if (rc) + dev_err(&pdev->dev, "host_init_msg failed: %d\n", rc); + } + return rc; +} +#endif diff --git a/drivers/crypto/montage/tsse/tsse_ipc_setup.h b/drivers/crypto/montage/tsse/tsse_ipc_setup.h new file mode 100644 index 0000000000000000000000000000000000000000..610b2851f9900684ba4ff8324e41c62ee70a9153 --- /dev/null +++ b/drivers/crypto/montage/tsse/tsse_ipc_setup.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * This file is part of tsse driver for Linux + * + * Copyright © 2023-2024 Montage Technology. All rights reserved. + */ + +#ifndef __TSSE_IPC_SETUP_H__ +#define __TSSE_IPC_SETUP_H__ + +#include +#include +#include +#include + +#define HOST2MAIN_INTR_SET_OFFSET 0x2000 +#define HOST2MAIN_INTR_ENABLE_OFFSET 0x2004 +#define HOST2MAIN_ACK_INTR_CLR_OFFSET 0x2008 +#define HOST2MAIN_ACK_INTR_ENABLE_OFFSET 0x200c +#define HOST2MAIN_VLD_INTR_STATUS_OFFSET 0x2010 +#define HOST2MAIN_ACK_INTR_STATUS_OFFSET 0x2014 +#define MSIX_MASK_EN_REG_OFFSET 0x2020 +#define INTR_MASK_BIT_OFFSET 0x2024 +#define INTR_PENDING_BIT_OFFSET 0x2028 +#define HOST2MAIN_IPC_OFFSET 0x2400 + +#define MAIN2HOST_INTR_SET_OFFSET 0x3000 +#define MAIN2HOST_INTR_ENABLE_OFFSET 0x3004 +#define MAIN2HOST_ACK_INTR_CLR_OFFSET 0x3008 +#define MAIN2HOST_ACK_INTR_ENABLE_OFFSET 0x300c +#define MAIN2HOST_VEN_MSI_FUNC_NUM_OFFSET 0x3010 +#define MAIN2HOST_VEN_MSI_VFUNC_ACTIVE_OFFSET 0x3014 +#define MAIN2HOST_IPC_OFFSET 0x3400 + +#define IPC_REGISTER_INT_SET BIT(0) +#define IPC_REGISTER_INT_MASK BIT(1) + +#define IPC_MAX_DATA_LEN 1024 + +typedef int (*tsse_d2h_ipc_handler)(int handle, void *msg_payload, uint32_t payload_length); + +enum IPC_BASIC_CMD { + IPC_BASIC_CMD_HOST_INIT = 0x1, + IPC_BASIC_CMD_PING = 0x2 +}; + +enum IPC_BOOT_CMD { + IPC_BOOT_CMD_GET_FIRMWARE = 0x1 +}; + +enum IPC_MESSAGE_CLASS { + IPC_MESSAGE_BASIC = 1, + IPC_MESSAGE_BOOT, + IPC_MESSAGE_CLASS_NUM, +}; + +struct tsse_ipc { + struct device *dev; + struct pci_dev *pdev; + void __iomem *virt_addr; + struct mutex list_lock; + struct tasklet_struct ipc_handle; + tsse_d2h_ipc_handler d2h_handlers[IPC_MESSAGE_CLASS_NUM]; + u32 im_inited; +}; + +int tsse_ipc_init(struct pci_dev *pdev); +void tsse_ipc_deinit(void *tdev_t); +int tsse_fw_manual_load_ipc(struct pci_dev *pdev); +int tsse_ipc_services_init(struct pci_dev *pdev); + +#endif diff --git a/drivers/crypto/montage/tsse/tsse_service.c b/drivers/crypto/montage/tsse/tsse_service.c index e4be85535b7765f6dc3db73642b7d519950bd715..65d3fee9f212d32fac3a93cc900fc4c55bb3b55f 100644 --- a/drivers/crypto/montage/tsse/tsse_service.c +++ b/drivers/crypto/montage/tsse/tsse_service.c @@ -2,27 +2,42 @@ /* * This file is part of tsse driver for Linux * - * Copyright © 2023 Montage Technology. All rights reserved. + * Copyright © 2023-2024 Montage Technology. All rights reserved. */ #include #include "tsse_service.h" +#include "tsse_dev.h" int service_rout(struct tsse_ipc *tsseipc, struct ipc_msg *msg) { struct msg_info *info; + struct tsse_dev *tdev; + tsse_d2h_ipc_handler d2h_handler; + void *payload; uint32_t msg_class; - int ret = 0; + uint32_t comm_msg_length; + uint32_t payload_length; + int ret; info = (struct msg_info *)msg->i_data; msg_class = info->msg_class; - switch (msg_class) { - case IPC_MESSAGE_BOOT: - fw_service(tsseipc, msg); - break; + d2h_handler = tsseipc->d2h_handlers[msg_class]; - default: - ret = -EINVAL; - break; + if (!d2h_handler) { + dev_err(tsseipc->dev, "%s %d: no d2h handler for msg [%u]\n", + __func__, __LINE__, msg_class); + return -EFAULT; } + tdev = pci_to_tsse_dev(tsseipc->pdev); + if (!tdev) { + dev_err(tsseipc->dev, "%s %d: no related dev info for ipc\n", + __func__, __LINE__); + return -EFAULT; + } + comm_msg_length = sizeof(struct ipc_header) + sizeof(struct msg_info); + payload = (void *) ((uint8_t *)msg + comm_msg_length); + payload_length = msg->header.i_len - comm_msg_length; + + ret = d2h_handler(tdev->id, payload, payload_length); return ret; } diff --git a/drivers/crypto/montage/tsse/tsse_service.h b/drivers/crypto/montage/tsse/tsse_service.h index d5fd87ee7dce430146e0560973c8a405a0ef0947..8672f9e75f126cf4a40ec5c3403215f697616fd5 100644 --- a/drivers/crypto/montage/tsse/tsse_service.h +++ b/drivers/crypto/montage/tsse/tsse_service.h @@ -2,14 +2,13 @@ /* * This file is part of tsse driver for Linux * - * Copyright © 2023 Montage Technology. All rights reserved. + * Copyright © 2023-2024 Montage Technology. All rights reserved. */ #ifndef __TSSE_SERVICE_H__ #define __TSSE_SERVICE_H__ #include "tsse_ipc.h" -#include "tsse_fw_service.h" int service_rout(struct tsse_ipc *tsseipc, struct ipc_msg *msg); diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 3a849168780dd1680a6e9e55e3500d276be87f2d..eec69c885c1dec514ea32229143f2e7ff923d0ec 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -58,11 +58,13 @@ obj-$(CONFIG_EDAC_MPC85XX) += mpc85xx_edac_mod.o layerscape_edac_mod-y := fsl_ddr_edac.o layerscape_edac.o obj-$(CONFIG_EDAC_LAYERSCAPE) += layerscape_edac_mod.o -skx_edac-y := skx_common.o skx_base.o -obj-$(CONFIG_EDAC_SKX) += skx_edac.o +skx_edac_common-y := skx_common.o -i10nm_edac-y := skx_common.o i10nm_base.o -obj-$(CONFIG_EDAC_I10NM) += i10nm_edac.o +skx_edac-y := skx_base.o +obj-$(CONFIG_EDAC_SKX) += skx_edac.o skx_edac_common.o + +i10nm_edac-y := i10nm_base.o +obj-$(CONFIG_EDAC_I10NM) += i10nm_edac.o skx_edac_common.o obj-$(CONFIG_EDAC_MV64X60) += mv64x60_edac.o obj-$(CONFIG_EDAC_CELL) += cell_edac.o diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index f841bc44de4e3babcbbdbca338ff260f8d40ae25..2dfd963a81aed8184c5ac01c7f7efb2a26ba785d 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -782,7 +782,12 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr { u64 dram_base_addr, dram_limit_addr, dram_hole_base; - u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask; + u8 die_id_shift, socket_id_shift; +#ifdef CONFIG_CPU_SUP_HYGON + u16 die_id_mask, socket_id_mask; +#else + u8 die_id_mask, socket_id_mask; +#endif u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets; u8 intlv_addr_sel, intlv_addr_bit; u8 num_intlv_bits, hashed_bit; @@ -801,7 +806,7 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr ctx.inst_id = umc; /* Read DramOffset, check if base 1 is used. */ - if (hygon_f18h_m4h() && + if ((hygon_f18h_m4h() || hygon_f18h_m10h()) && df_indirect_read_instance(nid, 0, 0x214, umc, &ctx.tmp)) goto out_err; else if (df_indirect_read_instance(nid, 0, 0x1B4, umc, &ctx.tmp)) @@ -829,7 +834,7 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr } intlv_num_sockets = 0; - if (hygon_f18h_m4h()) + if (hygon_f18h_m4h() || hygon_f18h_m10h()) intlv_num_sockets = (ctx.tmp >> 2) & 0x3; lgcy_mmio_hole_en = ctx.tmp & BIT(1); intlv_num_chan = (ctx.tmp >> 4) & 0xF; @@ -847,13 +852,20 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr if (df_indirect_read_instance(nid, 0, 0x114 + (8 * base), umc, &ctx.tmp)) goto out_err; - if (!hygon_f18h_m4h()) + if (!hygon_f18h_m4h() && !hygon_f18h_m10h()) intlv_num_sockets = (ctx.tmp >> 8) & 0x1; intlv_num_dies = (ctx.tmp >> 10) & 0x3; dram_limit_addr = ((ctx.tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0); intlv_addr_bit = intlv_addr_sel + 8; + if ((hygon_f18h_m4h() && boot_cpu_data.x86_model >= 0x6) || + hygon_f18h_m10h()) { + if (df_indirect_read_instance(nid, 0, 0x60, umc, &ctx.tmp)) + goto out_err; + intlv_num_dies = ctx.tmp & 0x3; + } + /* Re-use intlv_num_chan by setting it equal to log2(#channels) */ switch (intlv_num_chan) { case 0: intlv_num_chan = 0; break; @@ -897,7 +909,12 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr if (num_intlv_bits > 0) { u64 temp_addr_x, temp_addr_i, temp_addr_y; - u8 die_id_bit, sock_id_bit, cs_fabric_id; + u8 die_id_bit, sock_id_bit; +#ifdef CONFIG_CPU_SUP_HYGON + u16 cs_fabric_id; +#else + u8 cs_fabric_id; +#endif /* * Read FabricBlockInstanceInformation3_CS[BlockFabricID]. @@ -908,7 +925,7 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr if (df_indirect_read_instance(nid, 0, 0x50, umc, &ctx.tmp)) goto out_err; - if (hygon_f18h_m4h()) + if (hygon_f18h_m4h() || hygon_f18h_m10h()) cs_fabric_id = (ctx.tmp >> 8) & 0x7FF; else cs_fabric_id = (ctx.tmp >> 8) & 0xFF; @@ -934,12 +951,14 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr if (hygon_f18h_m4h()) { die_id_shift = (ctx.tmp >> 12) & 0xF; die_id_mask = ctx.tmp & 0x7FF; + cs_id |= (((cs_fabric_id & die_id_mask) >> die_id_shift) - 4) << + die_id_bit; } else { die_id_shift = (ctx.tmp >> 24) & 0xF; die_id_mask = (ctx.tmp >> 8) & 0xFF; + cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << + die_id_bit; } - - cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit; } /* If interleaved over more than 1 socket. */ @@ -1280,12 +1299,29 @@ static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) } } +static bool hygon_umc_channel_enabled(struct amd64_pvt *pvt, int channel) +{ + u32 enable; + + if (hygon_f18h_m10h()) { + df_indirect_read_instance(pvt->mc_node_id, 1, 0x32c, 0xc, &enable); + if ((enable & BIT(channel))) + return true; + return false; + } + + return true; +} + static void umc_dump_misc_regs(struct amd64_pvt *pvt) { struct amd64_umc *umc; u32 i, tmp, umc_base; for_each_umc(i) { + if (!hygon_umc_channel_enabled(pvt, i)) + continue; + if (hygon_f18h_m4h()) umc_base = get_umc_base_f18h_m4h(pvt->mc_node_id, i); else @@ -1402,6 +1438,9 @@ static void umc_read_base_mask(struct amd64_pvt *pvt) int cs, umc; for_each_umc(umc) { + if (!hygon_umc_channel_enabled(pvt, umc)) + continue; + if (hygon_f18h_m4h()) umc_base = get_umc_base_f18h_m4h(pvt->mc_node_id, umc); else @@ -1510,7 +1549,9 @@ static void umc_determine_memory_type(struct amd64_pvt *pvt) * Check if the system supports the "DDR Type" field in UMC Config * and has DDR5 DIMMs in use. */ - if ((pvt->flags.zn_regs_v2 || hygon_f18h_m4h()) && + if ((pvt->flags.zn_regs_v2 || + hygon_f18h_m4h() || + hygon_f18h_m10h()) && ((umc->umc_cfg & GENMASK(2, 0)) == 0x1)) { if (umc->dimm_cfg & BIT(5)) umc->dram_type = MEM_LRDDR5; @@ -2799,9 +2840,9 @@ static void decode_umc_error(int node_id, struct mce *m) pvt->ops->get_err_info(m, &err); - if (hygon_f18h_m4h()) { - if (boot_cpu_data.x86_model == 0x6) - umc = err.channel << 1; + if (hygon_f18h_m4h() || hygon_f18h_m10h()) { + if (boot_cpu_data.x86_model >= 0x6) + umc = (err.channel << 1) + ((m->ipid & BIT(13)) >> 13); else umc = err.channel; @@ -2889,6 +2930,9 @@ static void umc_read_mc_regs(struct amd64_pvt *pvt) /* Read registers from each UMC */ for_each_umc(i) { + if (!hygon_umc_channel_enabled(pvt, i)) + continue; + if (hygon_f18h_m4h()) umc_base = get_umc_base_f18h_m4h(pvt->mc_node_id, i); else @@ -3609,6 +3653,9 @@ static int per_family_init(struct amd64_pvt *pvt) } else if (pvt->model == 0x7) { pvt->ctl_name = "F18h_M07h"; break; + } else if (pvt->model == 0x8) { + pvt->ctl_name = "F18h_M08h"; + break; } else if (pvt->model == 0x10) { pvt->ctl_name = "F18h_M10h"; break; diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index b200335432329b0de5a69a249c7515bb327d56ba..7a9cdd13c8895b505a09c0a1cf2a931ff357ae50 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -1012,54 +1012,6 @@ static struct notifier_block i10nm_mce_dec = { .priority = MCE_PRIO_EDAC, }; -#ifdef CONFIG_EDAC_DEBUG -/* - * Debug feature. - * Exercise the address decode logic by writing an address to - * /sys/kernel/debug/edac/i10nm_test/addr. - */ -static struct dentry *i10nm_test; - -static int debugfs_u64_set(void *data, u64 val) -{ - struct mce m; - - pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); - - memset(&m, 0, sizeof(m)); - /* ADDRV + MemRd + Unknown channel */ - m.status = MCI_STATUS_ADDRV + 0x90; - /* One corrected error */ - m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT); - m.addr = val; - skx_mce_check_error(NULL, 0, &m); - - return 0; -} -DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); - -static void setup_i10nm_debug(void) -{ - i10nm_test = edac_debugfs_create_dir("i10nm_test"); - if (!i10nm_test) - return; - - if (!edac_debugfs_create_file("addr", 0200, i10nm_test, - NULL, &fops_u64_wo)) { - debugfs_remove(i10nm_test); - i10nm_test = NULL; - } -} - -static void teardown_i10nm_debug(void) -{ - debugfs_remove_recursive(i10nm_test); -} -#else -static inline void setup_i10nm_debug(void) {} -static inline void teardown_i10nm_debug(void) {} -#endif /*CONFIG_EDAC_DEBUG*/ - static int __init i10nm_init(void) { u8 mc = 0, src_id = 0, node_id = 0; @@ -1085,6 +1037,7 @@ static int __init i10nm_init(void) return -ENODEV; cfg = (struct res_config *)id->driver_data; + skx_set_res_cfg(cfg); res_cfg = cfg; rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm); @@ -1155,7 +1108,7 @@ static int __init i10nm_init(void) opstate_init(); mce_register_decode_chain(&i10nm_mce_dec); - setup_i10nm_debug(); + skx_setup_debug("i10nm_test"); if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) { skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log); @@ -1183,7 +1136,7 @@ static void __exit i10nm_exit(void) enable_retry_rd_err_log(false); } - teardown_i10nm_debug(); + skx_teardown_debug(); mce_unregister_decode_chain(&i10nm_mce_dec); skx_adxl_put(); skx_remove(); diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index ba9da97f16f70cb8ed028757c334e5991e189672..5e2c0b396b64fc4fd55f73928e6acf2ad519fad3 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -168,6 +168,33 @@ static const char * const smca_ls_mce_desc[] = { "L2 Fill Data error", }; +/* Hygon Model7h Scalable MCA LS error strings */ +static const char * const smca_ls_mce_hygon_desc[] = { + "Load queue parity error", + "Store queue parity error", + "Miss address buffer payload parity error", + "Level 1 TLB parity error", + "DC Tag error type 5", + "DC Tag error type 6", + "DC Tag error type 1", + "Internal error type 1", + "Internal error type 2", + "System Read Data Error 0", + "System Read Data Error 1", + "System Read Data Error 2", + "System Read Data Error 3", + "DC Tag error type 2", + "DC Data error type 1 and poison consumption", + "DC Data error type 2", + "DC Data error type 3", + "DC Tag error type 4", + "Level 2 TLB parity error", + "PDC parity error", + "DC Tag error type 3", + "DC Tag error type 5", + "L2 Fill Data error", +}; + static const char * const smca_ls2_mce_desc[] = { "An ECC error was detected on a data cache read by a probe or victimization", "An ECC error or L2 poison was detected on a data cache read by a load", @@ -217,6 +244,31 @@ static const char * const smca_if_mce_desc[] = { "CT MCE", }; +/* Hygon Model7h Scalable MCA IF error strings */ +static const char * const smca_if_mce_hygon_desc[] = { + "Op Cache Microtag Probe Port Parity Error", + "IC Microtag or Full Tag Multi-hit Error", + "IC Full Tag Parity Error", + "IC Data Array Parity Error", + "Decoupling Queue PhysAddr Parity Error", + "L0 ITLB Parity Error", + "L1 ITLB Parity Error", + "L2 ITLB Parity Error", + "BPQ 0 Snoop Parity Error", + "BPQ 1 Snoop Parity Error", + "BPQ 2 Snoop Parity Error", + "BPQ 3 Snoop Parity Error", + "L1 BTB Multi-Match Error", + "L2 BTB Multi-Match Error", + "L2 Cache Response Poison Error", + "System Read Data Error", + "Hardware Assertion Error", + "L1-TLB Multi-Hit", + "L2-TLB Multi-Hit", + "BSR Parity Error", + "CT MCE", +}; + static const char * const smca_l2_mce_desc[] = { "L2M Tag Multiple-Way-Hit error", "L2M Tag or State Array ECC Error", @@ -1249,6 +1301,16 @@ static int __init mce_amd_init(void) out: pr_info("MCE: In-kernel MCE decoding enabled.\n"); + if (c->x86_vendor == X86_VENDOR_HYGON && + c->x86_model >= 0x7 && c->x86_model <= 0xf) { + smca_mce_descs[SMCA_LS].descs = smca_ls_mce_hygon_desc; + smca_mce_descs[SMCA_LS].num_descs = ARRAY_SIZE(smca_ls_mce_hygon_desc); + smca_mce_descs[SMCA_IF].descs = smca_if_mce_hygon_desc; + smca_mce_descs[SMCA_IF].num_descs = ARRAY_SIZE(smca_if_mce_hygon_desc); + pr_info("MCE: Hygon Fam%xh Model%xh smca mce descs setup.\n", + c->x86, c->x86_model); + } + mce_register_decode_chain(&amd_mce_dec_nb); return 0; diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index bc399469e9594535705c15d9ba88e9bd4fc71f3a..0280518a51a9034f73048952b28973efd257641d 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -587,54 +587,6 @@ static struct notifier_block skx_mce_dec = { .priority = MCE_PRIO_EDAC, }; -#ifdef CONFIG_EDAC_DEBUG -/* - * Debug feature. - * Exercise the address decode logic by writing an address to - * /sys/kernel/debug/edac/skx_test/addr. - */ -static struct dentry *skx_test; - -static int debugfs_u64_set(void *data, u64 val) -{ - struct mce m; - - pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); - - memset(&m, 0, sizeof(m)); - /* ADDRV + MemRd + Unknown channel */ - m.status = MCI_STATUS_ADDRV + 0x90; - /* One corrected error */ - m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT); - m.addr = val; - skx_mce_check_error(NULL, 0, &m); - - return 0; -} -DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); - -static void setup_skx_debug(void) -{ - skx_test = edac_debugfs_create_dir("skx_test"); - if (!skx_test) - return; - - if (!edac_debugfs_create_file("addr", 0200, skx_test, - NULL, &fops_u64_wo)) { - debugfs_remove(skx_test); - skx_test = NULL; - } -} - -static void teardown_skx_debug(void) -{ - debugfs_remove_recursive(skx_test); -} -#else -static inline void setup_skx_debug(void) {} -static inline void teardown_skx_debug(void) {} -#endif /*CONFIG_EDAC_DEBUG*/ - /* * skx_init: * make sure we are running on the correct cpu model @@ -725,7 +677,7 @@ static int __init skx_init(void) /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); - setup_skx_debug(); + skx_setup_debug("skx_test"); mce_register_decode_chain(&skx_mce_dec); @@ -739,7 +691,7 @@ static void __exit skx_exit(void) { edac_dbg(2, "\n"); mce_unregister_decode_chain(&skx_mce_dec); - teardown_skx_debug(); + skx_teardown_debug(); if (nvdimm_count) skx_adxl_put(); skx_remove(); diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index fd8186fe005c8a6132a8ef4a4f06a2c7b6564a92..2be331c4826c6f1556bfafbc4c0f913c5033326f 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -47,8 +47,9 @@ static skx_show_retry_log_f skx_show_retry_rd_err_log; static u64 skx_tolm, skx_tohm; static LIST_HEAD(dev_edac_list); static bool skx_mem_cfg_2lm; +static struct res_config *skx_res_cfg; -int __init skx_adxl_get(void) +int skx_adxl_get(void) { const char * const *names; int i, j; @@ -110,14 +111,16 @@ int __init skx_adxl_get(void) return -ENODEV; } +EXPORT_SYMBOL_GPL(skx_adxl_get); -void __exit skx_adxl_put(void) +void skx_adxl_put(void) { kfree(adxl_values); kfree(adxl_msg); } +EXPORT_SYMBOL_GPL(skx_adxl_put); -static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_mem) +static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src) { struct skx_dev *d; int i, len = 0; @@ -133,8 +136,24 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me return false; } + /* + * GNR with a Flat2LM memory configuration may mistakenly classify + * a near-memory error(DDR5) as a far-memory error(CXL), resulting + * in the incorrect selection of decoded ADXL components. + * To address this, prefetch the decoded far-memory controller ID + * and adjust the error source to near-memory if the far-memory + * controller ID is invalid. + */ + if (skx_res_cfg && skx_res_cfg->type == GNR && err_src == ERR_SRC_2LM_FM) { + res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; + if (res->imc == -1) { + err_src = ERR_SRC_2LM_NM; + edac_dbg(0, "Adjust the error source to near-memory.\n"); + } + } + res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]]; - if (error_in_1st_level_mem) { + if (err_src == ERR_SRC_2LM_NM) { res->imc = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ? (int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1; res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ? @@ -187,12 +206,20 @@ void skx_set_mem_cfg(bool mem_cfg_2lm) { skx_mem_cfg_2lm = mem_cfg_2lm; } +EXPORT_SYMBOL_GPL(skx_set_mem_cfg); + +void skx_set_res_cfg(struct res_config *cfg) +{ + skx_res_cfg = cfg; +} +EXPORT_SYMBOL_GPL(skx_set_res_cfg); void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) { driver_decode = decode; skx_show_retry_rd_err_log = show_retry_log; } +EXPORT_SYMBOL_GPL(skx_set_decode); int skx_get_src_id(struct skx_dev *d, int off, u8 *id) { @@ -206,6 +233,7 @@ int skx_get_src_id(struct skx_dev *d, int off, u8 *id) *id = GET_BITFIELD(reg, 12, 14); return 0; } +EXPORT_SYMBOL_GPL(skx_get_src_id); int skx_get_node_id(struct skx_dev *d, u8 *id) { @@ -219,6 +247,7 @@ int skx_get_node_id(struct skx_dev *d, u8 *id) *id = GET_BITFIELD(reg, 0, 2); return 0; } +EXPORT_SYMBOL_GPL(skx_get_node_id); static int get_width(u32 mtr) { @@ -284,6 +313,7 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list) *list = &dev_edac_list; return ndev; } +EXPORT_SYMBOL_GPL(skx_get_all_bus_mappings); int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm) { @@ -323,6 +353,7 @@ int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm) pci_dev_put(pdev); return -ENODEV; } +EXPORT_SYMBOL_GPL(skx_get_hi_lo); static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add, int minval, int maxval, const char *name) @@ -394,6 +425,7 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, return 1; } +EXPORT_SYMBOL_GPL(skx_get_dimm_info); int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, int chan, int dimmno, const char *mod_str) @@ -442,6 +474,7 @@ int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, return (size == 0 || size == ~0ull) ? 0 : 1; } +EXPORT_SYMBOL_GPL(skx_get_nvdimm_info); int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, const char *ctl_name, const char *mod_str, @@ -512,6 +545,7 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, imc->mci = NULL; return rc; } +EXPORT_SYMBOL_GPL(skx_register_mci); static void skx_unregister_mci(struct skx_imc *imc) { @@ -609,31 +643,27 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, optype, skx_msg); } -static bool skx_error_in_1st_level_mem(const struct mce *m) +static enum error_source skx_error_source(const struct mce *m) { - u32 errcode; - - if (!skx_mem_cfg_2lm) - return false; + u32 errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; - errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; + if (errcode != MCACOD_MEM_CTL_ERR && errcode != MCACOD_EXT_MEM_ERR) + return ERR_SRC_NOT_MEMORY; - return errcode == MCACOD_EXT_MEM_ERR; -} - -static bool skx_error_in_mem(const struct mce *m) -{ - u32 errcode; + if (!skx_mem_cfg_2lm) + return ERR_SRC_1LM; - errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; + if (errcode == MCACOD_EXT_MEM_ERR) + return ERR_SRC_2LM_NM; - return (errcode == MCACOD_MEM_CTL_ERR || errcode == MCACOD_EXT_MEM_ERR); + return ERR_SRC_2LM_FM; } int skx_mce_check_error(struct notifier_block *nb, unsigned long val, void *data) { struct mce *mce = (struct mce *)data; + enum error_source err_src; struct decoded_addr res; struct mem_ctl_info *mci; char *type; @@ -641,8 +671,10 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, if (mce->kflags & MCE_HANDLED_CEC) return NOTIFY_DONE; + err_src = skx_error_source(mce); + /* Ignore unless this is memory related with an address */ - if (!skx_error_in_mem(mce) || !(mce->status & MCI_STATUS_ADDRV)) + if (err_src == ERR_SRC_NOT_MEMORY || !(mce->status & MCI_STATUS_ADDRV)) return NOTIFY_DONE; memset(&res, 0, sizeof(res)); @@ -652,7 +684,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, /* Try driver decoder first */ if (!(driver_decode && driver_decode(&res))) { /* Then try firmware decoder (ACPI DSM methods) */ - if (!(adxl_component_count && skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce)))) + if (!(adxl_component_count && skx_adxl_decode(&res, err_src))) return NOTIFY_DONE; } @@ -684,6 +716,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, mce->kflags |= MCE_HANDLED_EDAC; return NOTIFY_DONE; } +EXPORT_SYMBOL_GPL(skx_mce_check_error); void skx_remove(void) { @@ -721,3 +754,55 @@ void skx_remove(void) kfree(d); } } +EXPORT_SYMBOL_GPL(skx_remove); + +#ifdef CONFIG_EDAC_DEBUG +/* + * Debug feature. + * Exercise the address decode logic by writing an address to + * /sys/kernel/debug/edac/{skx,i10nm}_test/addr. + */ +static struct dentry *skx_test; + +static int debugfs_u64_set(void *data, u64 val) +{ + struct mce m; + + pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); + + memset(&m, 0, sizeof(m)); + /* ADDRV + MemRd + Unknown channel */ + m.status = MCI_STATUS_ADDRV + 0x90; + /* One corrected error */ + m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT); + m.addr = val; + skx_mce_check_error(NULL, 0, &m); + + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); + +void skx_setup_debug(const char *name) +{ + skx_test = edac_debugfs_create_dir(name); + if (!skx_test) + return; + + if (!edac_debugfs_create_file("addr", 0200, skx_test, + NULL, &fops_u64_wo)) { + debugfs_remove(skx_test); + skx_test = NULL; + } +} +EXPORT_SYMBOL_GPL(skx_setup_debug); + +void skx_teardown_debug(void) +{ + debugfs_remove_recursive(skx_test); +} +EXPORT_SYMBOL_GPL(skx_teardown_debug); +#endif /*CONFIG_EDAC_DEBUG*/ + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tony Luck"); +MODULE_DESCRIPTION("MC Driver for Intel server processors"); diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index b6d3607dffe27b3cc85308dbda91f67be0ddbc3e..85c978b612362b1e6a15fa9720497458864e446f 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -147,6 +147,13 @@ enum { INDEX_MAX }; +enum error_source { + ERR_SRC_1LM, + ERR_SRC_2LM_NM, + ERR_SRC_2LM_FM, + ERR_SRC_NOT_MEMORY, +}; + #define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL) #define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL) #define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM) @@ -231,10 +238,11 @@ typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, typedef bool (*skx_decode_f)(struct decoded_addr *res); typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err); -int __init skx_adxl_get(void); -void __exit skx_adxl_put(void); +int skx_adxl_get(void); +void skx_adxl_put(void); void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); void skx_set_mem_cfg(bool mem_cfg_2lm); +void skx_set_res_cfg(struct res_config *cfg); int skx_get_src_id(struct skx_dev *d, int off, u8 *id); int skx_get_node_id(struct skx_dev *d, u8 *id); @@ -260,4 +268,12 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, void skx_remove(void); +#ifdef CONFIG_EDAC_DEBUG +void skx_setup_debug(const char *name); +void skx_teardown_debug(void); +#else +static inline void skx_setup_debug(const char *name) {} +static inline void skx_teardown_debug(void) {} +#endif + #endif /* _SKX_COMM_EDAC_H */ diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index def8a84d1611b563975f931ff15aaf21fdedea57..14e253f50f3b9e5f0c8d12091b02d7dfee04e450 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -137,7 +137,10 @@ EXPORT_SYMBOL_GPL(scmi_driver_unregister); static void scmi_device_release(struct device *dev) { - kfree(to_scmi_dev(dev)); + struct scmi_device *scmi_dev = to_scmi_dev(dev); + + kfree_const(scmi_dev->name); + kfree(scmi_dev); } struct scmi_device * @@ -178,7 +181,6 @@ scmi_device_create(struct device_node *np, struct device *parent, int protocol, return scmi_dev; put_dev: - kfree_const(scmi_dev->name); put_device(&scmi_dev->dev); ida_simple_remove(&scmi_bus_id, id); return NULL; @@ -186,7 +188,6 @@ scmi_device_create(struct device_node *np, struct device *parent, int protocol, void scmi_device_destroy(struct scmi_device *scmi_dev) { - kfree_const(scmi_dev->name); scmi_handle_put(scmi_dev->handle); ida_simple_remove(&scmi_bus_id, scmi_dev->id); device_unregister(&scmi_dev->dev); diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c index b8f33c5becac6c7a786f7b73afe553085ec1ee9b..2b6f210dabe9acc633bb567d1309a9c7d048cf56 100644 --- a/drivers/hwtracing/intel_th/core.c +++ b/drivers/hwtracing/intel_th/core.c @@ -17,14 +17,14 @@ #include #include #include -#include +#include #include "intel_th.h" #include "debug.h" static bool host_mode __read_mostly; module_param(host_mode, bool, 0444); -static DEFINE_SPINLOCK(intel_th_lock); +static DEFINE_MUTEX(intel_th_lock); static DEFINE_IDA(intel_th_ida); @@ -718,7 +718,7 @@ int intel_th_output_enable(struct intel_th *th, unsigned int otype) { struct intel_th_device *thdev; int src = 0, dst = 0; - spin_lock(&intel_th_lock); + mutex_lock(&intel_th_lock); for (src = 0, dst = 0; dst <= th->num_thdevs; src++, dst++) { for (; src < ARRAY_SIZE(intel_th_subdevices); src++) { if (intel_th_subdevices[src].type != INTEL_TH_OUTPUT) @@ -732,7 +732,7 @@ int intel_th_output_enable(struct intel_th *th, unsigned int otype) /* no unallocated matching subdevices */ if (src == ARRAY_SIZE(intel_th_subdevices)) { - spin_unlock(&intel_th_lock); + mutex_unlock(&intel_th_lock); return -ENODEV; } @@ -753,18 +753,18 @@ int intel_th_output_enable(struct intel_th *th, unsigned int otype) if (dst == th->num_thdevs) goto found; } - spin_unlock(&intel_th_lock); + mutex_unlock(&intel_th_lock); return -ENODEV; found: thdev = intel_th_subdevice_alloc(th, &intel_th_subdevices[src]); if (IS_ERR(thdev)) { - spin_unlock(&intel_th_lock); + mutex_unlock(&intel_th_lock); return PTR_ERR(thdev); } th->thdev[th->num_thdevs++] = thdev; - spin_unlock(&intel_th_lock); + mutex_unlock(&intel_th_lock); return 0; } EXPORT_SYMBOL_GPL(intel_th_output_enable); @@ -773,7 +773,7 @@ static int intel_th_populate(struct intel_th *th) { int src; - spin_lock(&intel_th_lock); + mutex_lock(&intel_th_lock); /* create devices for each intel_th_subdevice */ for (src = 0; src < ARRAY_SIZE(intel_th_subdevices); src++) { const struct intel_th_subdevice *subdev = @@ -799,14 +799,14 @@ static int intel_th_populate(struct intel_th *th) /* ENODEV for individual subdevices is allowed */ if (PTR_ERR(thdev) == -ENODEV) continue; - spin_unlock(&intel_th_lock); + mutex_unlock(&intel_th_lock); return PTR_ERR(thdev); } th->thdev[th->num_thdevs++] = thdev; } - spin_unlock(&intel_th_lock); + mutex_unlock(&intel_th_lock); return 0; } diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 5b0c89b08f2571e9b80f0a77aa88e3f3b1cd6b56..d0a332494d5929aad911dba4b841b88bfa72919b 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2879,7 +2879,7 @@ static bool __init check_ioapic_information(void) (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON && boot_cpu_data.x86 == 0x18 && boot_cpu_data.x86_model >= 0x4 && - boot_cpu_data.x86_model <= 0xf && + boot_cpu_data.x86_model <= 0x10 && devid == IOAPIC_SB_DEVID_FAM18H_M4H)) { has_sb_ioapic = true; ret = true; diff --git a/drivers/iommu/sw64/iommu.c b/drivers/iommu/sw64/iommu.c index 0c76e73a6bf17e8638773ba5500e835d0d8df593..0e7881baf96b64aba7c3add0e1436a99e2bf19b8 100644 --- a/drivers/iommu/sw64/iommu.c +++ b/drivers/iommu/sw64/iommu.c @@ -255,6 +255,7 @@ static void dma_domain_free(struct dma_domain *dma_dom) if (dma_dom->sdomain.id) domain_id_free(dma_dom->sdomain.id); + iommu_put_dma_cookie(&dma_dom->sdomain.domain); kfree(dma_dom); } @@ -1037,6 +1038,10 @@ static struct iommu_domain *sunway_iommu_domain_alloc(unsigned type) } sdomain = &dma_dom->sdomain; + sdomain->domain.geometry.aperture_start = SW64_DMA_START; + sdomain->domain.geometry.aperture_end = SW64_DMA_LIMIT; + sdomain->domain.geometry.force_aperture = true; + if (iommu_get_dma_cookie(&sdomain->domain) == -ENOMEM) return NULL; break; @@ -1194,6 +1199,11 @@ sunway_iommu_map(struct iommu_domain *dom, unsigned long iova, * and pci device BAR, check should be introduced manually * to avoid VFIO trying to map pci config space. */ + if (iova > IO_BASE) { + pr_err("iova %#lx is out of memory!\n", iova); + return -ENOMEM; + } + if (iova >= SW64_BAR_ADDRESS) return 0; @@ -1347,11 +1357,26 @@ static void sunway_iommu_probe_finalize(struct device *dev) domain = iommu_get_domain_for_dev(dev); if (domain->type == IOMMU_DOMAIN_DMA) - iommu_setup_dma_ops(dev, SW64_DMA_START, SW64_DMA_LIMIT); + iommu_setup_dma_ops(dev, + SW64_DMA_START, SW64_DMA_LIMIT - SW64_DMA_START); else set_dma_ops(dev, get_arch_dma_ops(dev->bus)); } +static void sunway_iommu_get_resv_regions(struct device *dev, + struct list_head *head) +{ + struct iommu_resv_region *region; + int prot = IOMMU_NOEXEC | IOMMU_MMIO; + + region = iommu_alloc_resv_region(SW64_DMA_LIMIT, + (DMA_BIT_MASK(32) - SW64_DMA_LIMIT), + prot, IOMMU_RESV_RESERVED); + if (!region) + return; + list_add_tail(®ion->list, head); +} + const struct iommu_ops sunway_iommu_ops = { .capable = sunway_iommu_capable, .domain_alloc = sunway_iommu_domain_alloc, @@ -1365,6 +1390,8 @@ const struct iommu_ops sunway_iommu_ops = { .unmap = sunway_iommu_unmap, .iova_to_phys = sunway_iommu_iova_to_phys, .device_group = sunway_iommu_device_group, + .get_resv_regions = sunway_iommu_get_resv_regions, + .put_resv_regions = generic_iommu_put_resv_regions, .pgsize_bitmap = SW64_IOMMU_PGSIZES, .def_domain_type = sunway_iommu_def_domain_type, }; diff --git a/drivers/iommu/sw64/iommu_v2.c b/drivers/iommu/sw64/iommu_v2.c index a4e5153f6b2a06bb98ef1dda6ff5d8222012bda4..f41e74b3cef0213798bb6af0bab0d7e0d962e962 100644 --- a/drivers/iommu/sw64/iommu_v2.c +++ b/drivers/iommu/sw64/iommu_v2.c @@ -102,8 +102,8 @@ struct acpi_table_header *dmar_tbl; struct dma_domain { struct sunway_iommu_domain sdomain; - struct iova_domain iovad; }; + const struct iommu_ops sunway_iommu_ops; static const struct dma_map_ops sunway_dma_ops; @@ -282,11 +282,11 @@ static void dma_domain_free(struct dma_domain *dma_dom) return; del_domain_from_list(&dma_dom->sdomain); - put_iova_domain(&dma_dom->iovad); free_pagetable(&dma_dom->sdomain); if (dma_dom->sdomain.id) domain_id_free(dma_dom->sdomain.id); + iommu_put_dma_cookie(&dma_dom->sdomain.domain); kfree(dma_dom); } @@ -359,8 +359,6 @@ static struct dma_domain *dma_domain_alloc(void) sunway_domain_init(&dma_dom->sdomain); dma_dom->sdomain.type = IOMMU_DOMAIN_DMA; - init_iova_domain(&dma_dom->iovad, PAGE_SIZE, IOVA_PFN(SW64_DMA_START)); - reserve_iova(&dma_dom->iovad, (0xe0000000UL >> PAGE_SHIFT), (0x100000000UL >> PAGE_SHIFT)); add_domain_to_list(&dma_dom->sdomain); @@ -1283,6 +1281,9 @@ static struct iommu_domain *sunway_iommu_domain_alloc(unsigned int type) } sdomain = &dma_dom->sdomain; + sdomain->domain.geometry.aperture_start = SW64_DMA_START; + sdomain->domain.geometry.aperture_end = SW64_64BIT_DMA_LIMIT; + sdomain->domain.geometry.force_aperture = true; if (iommu_get_dma_cookie(&sdomain->domain) == -ENOMEM) return NULL; break; @@ -1463,6 +1464,11 @@ sunway_iommu_map(struct iommu_domain *dom, unsigned long iova, * and pci device BAR, check should be introduced manually * to avoid VFIO trying to map pci config space. */ + if (iova > IO_BASE) { + pr_err("iova %#lx is out of memory!\n", iova); + return -ENOMEM; + } + if (iova >= SW64_BAR_ADDRESS) return 0; @@ -1622,14 +1628,27 @@ static void sunway_iommu_probe_finalize(struct device *dev) domain = iommu_get_domain_for_dev(dev); if (domain->type == IOMMU_DOMAIN_DMA) { - if (min(dev->coherent_dma_mask, *dev->dma_mask) == DMA_BIT_MASK(32)) - iommu_setup_dma_ops(dev, SW64_DMA_START, SW64_32BIT_DMA_LIMIT); - else - iommu_setup_dma_ops(dev, SW64_DMA_START, SW64_64BIT_DMA_LIMIT); + iommu_setup_dma_ops(dev, SW64_DMA_START, (SW64_64BIT_DMA_LIMIT - SW64_DMA_START)); } else set_dma_ops(dev, get_arch_dma_ops(dev->bus)); } +static void sunway_iommu_get_resv_regions(struct device *dev, + struct list_head *head) +{ + struct iommu_resv_region *region; + int prot = IOMMU_NOEXEC | IOMMU_MMIO; + + /* Reserve 3.5~4G for device */ + region = iommu_alloc_resv_region(SW64_32BIT_DMA_LIMIT, + (DMA_BIT_MASK(32) - SW64_32BIT_DMA_LIMIT), + prot, IOMMU_RESV_RESERVED); + if (!region) + return; + + list_add_tail(®ion->list, head); +} + const struct iommu_ops sunway_iommu_ops = { .capable = sunway_iommu_capable, .domain_alloc = sunway_iommu_domain_alloc, @@ -1643,6 +1662,8 @@ const struct iommu_ops sunway_iommu_ops = { .unmap = sunway_iommu_unmap, .iova_to_phys = sunway_iommu_iova_to_phys, .device_group = sunway_iommu_device_group, + .get_resv_regions = sunway_iommu_get_resv_regions, + .put_resv_regions = generic_iommu_put_resv_regions, .pgsize_bitmap = SW64_IOMMU_PGSIZES, .def_domain_type = sunway_iommu_def_domain_type, }; diff --git a/drivers/irqchip/irq-sunway-cpu.c b/drivers/irqchip/irq-sunway-cpu.c index 1709d6a811461dd07b0f2d2063b219f5e2698fb8..325408be9a561810d1a26c8e1f7a4194cf5f9206 100644 --- a/drivers/irqchip/irq-sunway-cpu.c +++ b/drivers/irqchip/irq-sunway-cpu.c @@ -76,7 +76,7 @@ static void handle_nmi_int(void) int pme_state; -asmlinkage void do_entInt(unsigned long type, unsigned long vector, +asmlinkage void noinstr do_entInt(unsigned long type, unsigned long vector, unsigned long irq_arg, struct pt_regs *regs) { struct pt_regs *old_regs; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index da73c637e0907f1c253a86193c102b4752c88675..18cf05d13f5a12bd82cbf3dc561e2a907194d2bd 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2317,10 +2317,9 @@ static struct thin_c *get_first_thin(struct pool *pool) struct thin_c *tc = NULL; rcu_read_lock(); - if (!list_empty(&pool->active_thins)) { - tc = list_entry_rcu(pool->active_thins.next, struct thin_c, list); + tc = list_first_or_null_rcu(&pool->active_thins, struct thin_c, list); + if (tc) thin_get(tc); - } rcu_read_unlock(); return tc; diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 282f3d2388cc20612d612fa39622d99f208f4095..1c1cffb19ed79418f73f2b953d13284c03e8c30a 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -661,7 +661,7 @@ static int uvc_parse_format(struct uvc_device *dev, /* Parse the frame descriptors. Only uncompressed, MJPEG and frame * based formats have frame descriptors. */ - while (buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE && + while (ftype && buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE && buffer[2] == ftype) { frame = &format->frame[format->nframes]; if (ftype != UVC_VS_FRAME_FRAME_BASED) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e74100732cd0375cf93eeacfb031f9eef6480a48..4d87f2d84b271439ee9b4b12adeb992ce9b73b0f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1460,6 +1460,32 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, return error; } +static int nvme_identify_ns_cs_indep(struct nvme_ctrl *ctrl, unsigned nsid, + struct nvme_id_ns_cs_indep **id) +{ + struct nvme_command c = { + .identify.opcode = nvme_admin_identify, + .identify.nsid = cpu_to_le32(nsid), + .identify.cns = NVME_ID_CNS_NS_CS_INDEP, + }; + int ret; + + *id = kmalloc(sizeof(**id), GFP_KERNEL); + if (!*id) + return -ENOMEM; + + ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id)); + if (ret) { + dev_warn(ctrl->device, + "Identify namespace (CS independent) failed (%d)\n", + ret); + kfree(*id); + return ret; + } + + return 0; +} + static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid, unsigned int dword11, void *buffer, size_t buflen, u32 *result) { @@ -2100,10 +2126,9 @@ static const struct block_device_operations nvme_bdev_ops = { .pr_ops = &nvme_pr_ops, }; -static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) +static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled) { - unsigned long timeout = - ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; + unsigned long timeout_jiffies = ((timeout + 1) * HZ / 2) + jiffies; u32 csts, bit = enabled ? NVME_CSTS_RDY : 0; int ret; @@ -2116,7 +2141,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) usleep_range(1000, 2000); if (fatal_signal_pending(current)) return -EINTR; - if (time_after(jiffies, timeout)) { + if (time_after(jiffies, timeout_jiffies)) { dev_err(ctrl->device, "Device not ready; aborting %s, CSTS=0x%x\n", enabled ? "initialisation" : "reset", csts); @@ -2147,13 +2172,14 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl) if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) msleep(NVME_QUIRK_DELAY_AMOUNT); - return nvme_wait_ready(ctrl, ctrl->cap, false); + return nvme_wait_ready(ctrl, NVME_CAP_TIMEOUT(ctrl->cap), false); } EXPORT_SYMBOL_GPL(nvme_disable_ctrl); int nvme_enable_ctrl(struct nvme_ctrl *ctrl) { unsigned dev_page_min; + u32 timeout; int ret; ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap); @@ -2174,6 +2200,27 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) ctrl->ctrl_config = NVME_CC_CSS_CSI; else ctrl->ctrl_config = NVME_CC_CSS_NVM; + + if (ctrl->cap & NVME_CAP_CRMS_CRWMS) { + u32 crto; + + ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto); + if (ret) { + dev_err(ctrl->device, "Reading CRTO failed (%d)\n", + ret); + return ret; + } + + if (ctrl->cap & NVME_CAP_CRMS_CRIMS) { + ctrl->ctrl_config |= NVME_CC_CRIME; + timeout = NVME_CRTO_CRIMT(crto); + } else { + timeout = NVME_CRTO_CRWMT(crto); + } + } else { + timeout = NVME_CAP_TIMEOUT(ctrl->cap); + } + ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT; ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE; ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; @@ -2182,7 +2229,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); if (ret) return ret; - return nvme_wait_ready(ctrl, ctrl->cap, true); + return nvme_wait_ready(ctrl, timeout, true); } EXPORT_SYMBOL_GPL(nvme_enable_ctrl); @@ -3829,11 +3876,26 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids) static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns_ids ids = { }; + struct nvme_id_ns_cs_indep *id; struct nvme_ns *ns; + bool ready = true; if (nvme_identify_ns_descs(ctrl, nsid, &ids)) return; + /* + * Check if the namespace is ready. If not ignore it, we will get an + * AEN once it becomes ready and restart the scan. + */ + if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) && + !nvme_identify_ns_cs_indep(ctrl, nsid, &id)) { + ready = id->nstat & NVME_NSTAT_NRDY; + kfree(id); + } + + if (!ready) + return; + ns = nvme_find_get_ns(ctrl, nsid); if (ns) { nvme_validate_ns(ns, &ids); @@ -4539,6 +4601,8 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_command) != 64); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct nvme_id_ns_cs_indep) != + NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c2b061bfe48146f1d4a5537b9bfec0035cde0d2e..06a44d4e1569c6b1d3252bbd424ec9128feb4639 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1103,18 +1103,14 @@ static irqreturn_t nvme_irq(int irq, void *data) { struct nvme_queue *nvmeq = data; irqreturn_t ret = IRQ_NONE; - DEFINE_IO_COMP_BATCH(iob); /* * The rmb/wmb pair ensures we see all updates from a previous run of * the irq handler, even if that was on another CPU. */ rmb(); - if (nvme_poll_cq(nvmeq, &iob)) { - if (!rq_list_empty(iob.req_list)) - nvme_pci_complete_batch(&iob); + if (nvme_poll_cq(nvmeq, NULL)) ret = IRQ_HANDLED; - } wmb(); return ret; diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 90295d22667aff3f89f559df86720111ce9eb76d..ff8213ad988940a45075b301e538d125fbed2538 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -223,7 +223,7 @@ static void pcie_port_disable_hp_interrupt(struct pci_dev *dev) return; } - usleep_range(1000, 1000); + usleep_range(10000, 10000); timeout -= 10; } while (timeout >= 0); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index a5e6897b254984668b2b63ffed9631c1446d3a56..f2d299dc6f35ac98d838baf5a35adbeda29ed851 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1263,7 +1263,7 @@ static int pci_scan_bridge_extend(struct pci_bus *bus, struct pci_dev *dev, if ((secondary || subordinate) && !pcibios_assign_all_busses() && !is_cardbus && !broken) { - unsigned int cmax; + unsigned int cmax, buses; /* * Bus already configured by firmware, process it in the @@ -1288,7 +1288,8 @@ static int pci_scan_bridge_extend(struct pci_bus *bus, struct pci_dev *dev, child->bridge_ctl = bctl; } - cmax = pci_scan_child_bus(child); + buses = subordinate - secondary; + cmax = pci_scan_child_bus_extend(child, buses); if (cmax > subordinate) pci_warn(dev, "bridge has subordinate %02x but max busn %02x\n", subordinate, cmax); @@ -2882,8 +2883,8 @@ static unsigned int pci_scan_child_bus_extend(struct pci_bus *bus, * hotplug bridges too much during the second scan below. */ used_buses++; - if (cmax - max > 1) - used_buses += cmax - max - 1; + if (max - cmax > 1) + used_buses += max - cmax - 1; } /* Scan bridges that need to be reconfigured */ @@ -2891,7 +2892,6 @@ static unsigned int pci_scan_child_bus_extend(struct pci_bus *bus, unsigned int buses = 0; if (!hotplug_bridges && normal_bridges == 1) { - /* * There is only one bridge on the bus (upstream * port) so it gets all available buses which it @@ -2900,7 +2900,6 @@ static unsigned int pci_scan_child_bus_extend(struct pci_bus *bus, */ buses = available_buses; } else if (dev->is_hotplug_bridge) { - /* * Distribute the extra buses between hotplug * bridges if any. @@ -2919,7 +2918,7 @@ static unsigned int pci_scan_child_bus_extend(struct pci_bus *bus, /* * Make sure a hotplug bridge has at least the minimum requested * number of buses but allow it to grow up to the maximum available - * bus number of there is room. + * bus number if there is room. */ if (bus->self && bus->self->is_hotplug_bridge) { used_buses = max_t(unsigned int, available_buses, diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 2ce636937c6eaf5b2c23045a1de00d368fb8cb01..b2d446adbe3bc716730b4ff1f61bbe97d143631d 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -1745,119 +1745,6 @@ static enum enable_type pci_realloc_detect(struct pci_bus *bus, } #endif -/* - * First try will not touch PCI bridge res. - * Second and later try will clear small leaf bridge res. - * Will stop till to the max depth if can not find good one. - */ -void pci_assign_unassigned_root_bus_resources(struct pci_bus *bus) -{ - LIST_HEAD(realloc_head); - /* List of resources that want additional resources */ - struct list_head *add_list = NULL; - int tried_times = 0; - enum release_type rel_type = leaf_only; - LIST_HEAD(fail_head); - struct pci_dev_resource *fail_res; - int pci_try_num = 1; - enum enable_type enable_local; - - /* Don't realloc if asked to do so */ - enable_local = pci_realloc_detect(bus, pci_realloc_enable); - if (pci_realloc_enabled(enable_local)) { - int max_depth = pci_bus_get_depth(bus); - - pci_try_num = max_depth + 1; - dev_info(&bus->dev, "max bus depth: %d pci_try_num: %d\n", - max_depth, pci_try_num); - } - -again: - /* - * Last try will use add_list, otherwise will try good to have as must - * have, so can realloc parent bridge resource - */ - if (tried_times + 1 == pci_try_num) - add_list = &realloc_head; - /* - * Depth first, calculate sizes and alignments of all subordinate buses. - */ - __pci_bus_size_bridges(bus, add_list); - - /* Depth last, allocate resources and update the hardware. */ - __pci_bus_assign_resources(bus, add_list, &fail_head); - if (add_list) - BUG_ON(!list_empty(add_list)); - tried_times++; - - /* Any device complain? */ - if (list_empty(&fail_head)) - goto dump; - - if (tried_times >= pci_try_num) { - if (enable_local == undefined) - dev_info(&bus->dev, "Some PCI device resources are unassigned, try booting with pci=realloc\n"); - else if (enable_local == auto_enabled) - dev_info(&bus->dev, "Automatically enabled pci realloc, if you have problem, try booting with pci=realloc=off\n"); - - free_list(&fail_head); - goto dump; - } - - dev_info(&bus->dev, "No. %d try to assign unassigned res\n", - tried_times + 1); - - /* Third times and later will not check if it is leaf */ - if ((tried_times + 1) > 2) - rel_type = whole_subtree; - - /* - * Try to release leaf bridge's resources that doesn't fit resource of - * child device under that bridge. - */ - list_for_each_entry(fail_res, &fail_head, list) - pci_bus_release_bridge_resources(fail_res->dev->bus, - fail_res->flags & PCI_RES_TYPE_MASK, - rel_type); - - /* Restore size and flags */ - list_for_each_entry(fail_res, &fail_head, list) { - struct resource *res = fail_res->res; - int idx; - - res->start = fail_res->start; - res->end = fail_res->end; - res->flags = fail_res->flags; - - if (pci_is_bridge(fail_res->dev)) { - idx = res - &fail_res->dev->resource[0]; - if (idx >= PCI_BRIDGE_RESOURCES && - idx <= PCI_BRIDGE_RESOURCE_END) - res->flags = 0; - } - } - free_list(&fail_head); - - goto again; - -dump: - /* Dump the resource on buses */ - pci_bus_dump_resources(bus); -} - -void __init pci_assign_unassigned_resources(void) -{ - struct pci_bus *root_bus; - - list_for_each_entry(root_bus, &pci_root_buses, node) { - pci_assign_unassigned_root_bus_resources(root_bus); - - /* Make sure the root bridge has a companion ACPI device */ - if (ACPI_HANDLE(root_bus->bridge)) - acpi_ioapic_add(ACPI_HANDLE(root_bus->bridge)); - } -} - static void adjust_bridge_window(struct pci_dev *bridge, struct resource *res, struct list_head *add_list, resource_size_t new_size) @@ -1878,12 +1765,68 @@ static void adjust_bridge_window(struct pci_dev *bridge, struct resource *res, add_size = size - new_size; pci_dbg(bridge, "bridge window %pR shrunken by %pa\n", res, &add_size); + } else { + return; } res->end = res->start + new_size - 1; - remove_from_list(add_list, res); + + /* If the resource is part of the add_list remove it now */ + if (add_list) + remove_from_list(add_list, res); +} + +static void remove_dev_resource(struct resource *avail, struct pci_dev *dev, + struct resource *res) +{ + resource_size_t size, align, tmp; + + size = resource_size(res); + if (!size) + return; + + align = pci_resource_alignment(dev, res); + align = align ? ALIGN(avail->start, align) - avail->start : 0; + tmp = align + size; + avail->start = min(avail->start + tmp, avail->end + 1); +} + +static void remove_dev_resources(struct pci_dev *dev, struct resource *io, + struct resource *mmio, struct resource *mmio_pref) +{ + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *res = &dev->resource[i]; + + if (resource_type(res) == IORESOURCE_IO) { + remove_dev_resource(io, dev, res); + } else if (resource_type(res) == IORESOURCE_MEM) { + /* + * Make sure prefetchable memory is reduced from + * the correct resource. Specifically we put + * 32-bit prefetchable memory in non-prefetchable + * window if there is an 64-bit pretchable window. + * + * See comments in __pci_bus_size_bridges() for + * more information. + */ + if ((res->flags & IORESOURCE_PREFETCH) && + ((res->flags & IORESOURCE_MEM_64) == + (mmio_pref->flags & IORESOURCE_MEM_64))) + remove_dev_resource(mmio_pref, dev, res); + else + remove_dev_resource(mmio, dev, res); + } + } } +/* + * io, mmio and mmio_pref contain the total amount of bridge window + * space available. This includes the minimal space needed to cover all + * the existing devices on the bus and the possible extra space that can + * be shared with the bridges. + */ static void pci_bus_distribute_available_resources(struct pci_bus *bus, struct list_head *add_list, struct resource io, @@ -1893,7 +1836,7 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus, unsigned int normal_bridges = 0, hotplug_bridges = 0; struct resource *io_res, *mmio_res, *mmio_pref_res; struct pci_dev *dev, *bridge = bus->self; - resource_size_t io_per_hp, mmio_per_hp, mmio_pref_per_hp, align; + resource_size_t io_per_b, mmio_per_b, mmio_pref_per_b, align; io_res = &bridge->resource[PCI_BRIDGE_IO_WINDOW]; mmio_res = &bridge->resource[PCI_BRIDGE_MEM_WINDOW]; @@ -1937,105 +1880,99 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus, normal_bridges++; } + if (!(hotplug_bridges + normal_bridges)) + return; + /* - * There is only one bridge on the bus so it gets all available - * resources which it can then distribute to the possible hotplug - * bridges below. + * Calculate the amount of space we can forward from "bus" to + * any downstream buses, i.e., the space left over after + * assigning the BARs and windows on "bus". */ - if (hotplug_bridges + normal_bridges == 1) { - dev = list_first_entry(&bus->devices, struct pci_dev, bus_list); - if (dev->subordinate) - pci_bus_distribute_available_resources(dev->subordinate, - add_list, io, mmio, mmio_pref); - return; + list_for_each_entry(dev, &bus->devices, bus_list) { + if (!dev->is_virtfn) + remove_dev_resources(dev, &io, &mmio, &mmio_pref); } - if (hotplug_bridges == 0) - return; - /* - * Calculate the total amount of extra resource space we can - * pass to bridges below this one. This is basically the - * extra space reduced by the minimal required space for the - * non-hotplug bridges. + * If there is at least one hotplug bridge on this bus it gets + * all the extra resource space that was left after the + * reductions above. + * + * If there are no hotplug bridges the extra resource space is + * split between non-hotplug bridges. This is to allow possible + * hotplug bridges below them to get the extra space as well. */ + if (hotplug_bridges) { + io_per_b = div64_ul(resource_size(&io), hotplug_bridges); + mmio_per_b = div64_ul(resource_size(&mmio), hotplug_bridges); + mmio_pref_per_b = div64_ul(resource_size(&mmio_pref), + hotplug_bridges); + } else { + io_per_b = div64_ul(resource_size(&io), normal_bridges); + mmio_per_b = div64_ul(resource_size(&mmio), normal_bridges); + mmio_pref_per_b = div64_ul(resource_size(&mmio_pref), + normal_bridges); + } + for_each_pci_bridge(dev, bus) { - resource_size_t used_size; struct resource *res; + struct pci_bus *b; - if (dev->is_hotplug_bridge) + b = dev->subordinate; + if (!b) + continue; + if (hotplug_bridges && !dev->is_hotplug_bridge) continue; + res = &dev->resource[PCI_BRIDGE_IO_WINDOW]; /* - * Reduce the available resource space by what the - * bridge and devices below it occupy. + * Make sure the split resource space is properly + * aligned for bridge windows (align it down to avoid + * going above what is available). */ - res = &dev->resource[PCI_BRIDGE_IO_WINDOW]; align = pci_resource_alignment(dev, res); - align = align ? ALIGN(io.start, align) - io.start : 0; - used_size = align + resource_size(res); - if (!res->parent) - io.start = min(io.start + used_size, io.end + 1); + io.end = align ? io.start + ALIGN_DOWN(io_per_b, align) - 1 + : io.start + io_per_b - 1; + /* + * The x_per_b holds the extra resource space that can + * be added for each bridge but there is the minimal + * already reserved as well so adjust x.start down + * accordingly to cover the whole space. + */ + io.start -= resource_size(res); res = &dev->resource[PCI_BRIDGE_MEM_WINDOW]; align = pci_resource_alignment(dev, res); - align = align ? ALIGN(mmio.start, align) - mmio.start : 0; - used_size = align + resource_size(res); - if (!res->parent) - mmio.start = min(mmio.start + used_size, mmio.end + 1); + mmio.end = align ? mmio.start + ALIGN_DOWN(mmio_per_b, align) - 1 + : mmio.start + mmio_per_b - 1; + mmio.start -= resource_size(res); res = &dev->resource[PCI_BRIDGE_PREF_MEM_WINDOW]; align = pci_resource_alignment(dev, res); - align = align ? ALIGN(mmio_pref.start, align) - - mmio_pref.start : 0; - used_size = align + resource_size(res); - if (!res->parent) - mmio_pref.start = min(mmio_pref.start + used_size, - mmio_pref.end + 1); - } - - io_per_hp = div64_ul(resource_size(&io), hotplug_bridges); - mmio_per_hp = div64_ul(resource_size(&mmio), hotplug_bridges); - mmio_pref_per_hp = div64_ul(resource_size(&mmio_pref), - hotplug_bridges); - - /* - * Go over devices on this bus and distribute the remaining - * resource space between hotplug bridges. - */ - for_each_pci_bridge(dev, bus) { - struct pci_bus *b; - - b = dev->subordinate; - if (!b || !dev->is_hotplug_bridge) - continue; - - /* - * Distribute available extra resources equally between - * hotplug-capable downstream ports taking alignment into - * account. - */ - io.end = io.start + io_per_hp - 1; - mmio.end = mmio.start + mmio_per_hp - 1; - mmio_pref.end = mmio_pref.start + mmio_pref_per_hp - 1; + mmio_pref.end = align ? mmio_pref.start + + ALIGN_DOWN(mmio_pref_per_b, align) - 1 + : mmio_pref.start + mmio_pref_per_b - 1; + mmio_pref.start -= resource_size(res); pci_bus_distribute_available_resources(b, add_list, io, mmio, mmio_pref); - io.start += io_per_hp; - mmio.start += mmio_per_hp; - mmio_pref.start += mmio_pref_per_hp; + io.start += io.end + 1; + mmio.start += mmio.end + 1; + mmio_pref.start += mmio_pref.end + 1; } } static void pci_bridge_distribute_available_resources(struct pci_dev *bridge, - struct list_head *add_list) + struct list_head *add_list) { struct resource available_io, available_mmio, available_mmio_pref; if (!bridge->is_hotplug_bridge) return; + pci_dbg(bridge, "distributing available resources\n"); + /* Take the initial extra resources from the hotplug port */ available_io = bridge->resource[PCI_BRIDGE_IO_WINDOW]; available_mmio = bridge->resource[PCI_BRIDGE_MEM_WINDOW]; @@ -2047,6 +1984,169 @@ static void pci_bridge_distribute_available_resources(struct pci_dev *bridge, available_mmio_pref); } +static bool pci_bridge_resources_not_assigned(struct pci_dev *dev) +{ + const struct resource *r; + + /* + * Check the child device's resources and if they are not yet assigned + * it means we are configuring them (not the boot firmware) so we + * should be able to extend the upstream bridge resources in the same + * way we do with the normal hotplug case. + */ + r = &dev->resource[PCI_BRIDGE_IO_WINDOW]; + if (r->flags && !(r->flags & IORESOURCE_STARTALIGN)) + return false; + r = &dev->resource[PCI_BRIDGE_MEM_WINDOW]; + if (r->flags && !(r->flags & IORESOURCE_STARTALIGN)) + return false; + r = &dev->resource[PCI_BRIDGE_PREF_MEM_WINDOW]; + if (r->flags && !(r->flags & IORESOURCE_STARTALIGN)) + return false; + + return true; +} + +static void +pci_root_bus_distribute_available_resources(struct pci_bus *bus, + struct list_head *add_list) +{ + struct pci_dev *dev, *bridge = bus->self; + + for_each_pci_bridge(dev, bus) { + struct pci_bus *b; + + b = dev->subordinate; + if (!b) + continue; + + /* + * Need to check "bridge" here too because it is NULL + * in case of root bus. + */ + if (bridge && pci_bridge_resources_not_assigned(dev)) + pci_bridge_distribute_available_resources(bridge, + add_list); + else + pci_root_bus_distribute_available_resources(b, add_list); + } +} + +/* + * First try will not touch PCI bridge res. + * Second and later try will clear small leaf bridge res. + * Will stop till to the max depth if can not find good one. + */ +void pci_assign_unassigned_root_bus_resources(struct pci_bus *bus) +{ + LIST_HEAD(realloc_head); + /* List of resources that want additional resources */ + struct list_head *add_list = NULL; + int tried_times = 0; + enum release_type rel_type = leaf_only; + LIST_HEAD(fail_head); + struct pci_dev_resource *fail_res; + int pci_try_num = 1; + enum enable_type enable_local; + + /* Don't realloc if asked to do so */ + enable_local = pci_realloc_detect(bus, pci_realloc_enable); + if (pci_realloc_enabled(enable_local)) { + int max_depth = pci_bus_get_depth(bus); + + pci_try_num = max_depth + 1; + dev_info(&bus->dev, "max bus depth: %d pci_try_num: %d\n", + max_depth, pci_try_num); + } + +again: + /* + * Last try will use add_list, otherwise will try good to have as must + * have, so can realloc parent bridge resource + */ + if (tried_times + 1 == pci_try_num) + add_list = &realloc_head; + /* + * Depth first, calculate sizes and alignments of all subordinate buses. + */ + __pci_bus_size_bridges(bus, add_list); + + pci_root_bus_distribute_available_resources(bus, add_list); + + /* Depth last, allocate resources and update the hardware. */ + __pci_bus_assign_resources(bus, add_list, &fail_head); + if (add_list) + BUG_ON(!list_empty(add_list)); + tried_times++; + + /* Any device complain? */ + if (list_empty(&fail_head)) + goto dump; + + if (tried_times >= pci_try_num) { + if (enable_local == undefined) + dev_info(&bus->dev, "Some PCI device resources are unassigned, try booting with pci=realloc\n"); + else if (enable_local == auto_enabled) + dev_info(&bus->dev, "Automatically enabled pci realloc, if you have problem, try booting with pci=realloc=off\n"); + + free_list(&fail_head); + goto dump; + } + + dev_info(&bus->dev, "No. %d try to assign unassigned res\n", + tried_times + 1); + + /* Third times and later will not check if it is leaf */ + if ((tried_times + 1) > 2) + rel_type = whole_subtree; + + /* + * Try to release leaf bridge's resources that doesn't fit resource of + * child device under that bridge. + */ + list_for_each_entry(fail_res, &fail_head, list) + pci_bus_release_bridge_resources(fail_res->dev->bus, + fail_res->flags & PCI_RES_TYPE_MASK, + rel_type); + + /* Restore size and flags */ + list_for_each_entry(fail_res, &fail_head, list) { + struct resource *res = fail_res->res; + int idx; + + res->start = fail_res->start; + res->end = fail_res->end; + res->flags = fail_res->flags; + + if (pci_is_bridge(fail_res->dev)) { + idx = res - &fail_res->dev->resource[0]; + if (idx >= PCI_BRIDGE_RESOURCES && + idx <= PCI_BRIDGE_RESOURCE_END) + res->flags = 0; + } + } + free_list(&fail_head); + + goto again; + +dump: + /* Dump the resource on buses */ + pci_bus_dump_resources(bus); +} + +void __init pci_assign_unassigned_resources(void) +{ + struct pci_bus *root_bus; + + list_for_each_entry(root_bus, &pci_root_buses, node) { + pci_assign_unassigned_root_bus_resources(root_bus); + + /* Make sure the root bridge has a companion ACPI device */ + if (ACPI_HANDLE(root_bus->bridge)) + acpi_ioapic_add(ACPI_HANDLE(root_bus->bridge)); + } +} + void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge) { struct pci_bus *parent = bridge->subordinate; diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index e20bcc835d6a84f627084f3e61410910f0509b8c..4251f5c3a10f49647c47f67b82a1c73c0204ab03 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -984,6 +984,7 @@ static const struct acpi_device_id amd_gpio_acpi_match[] = { { "AMD0030", 0 }, { "AMDI0030", 0}, { "AMDI0031", 0}, + { "HYGO0030", 0}, { }, }; MODULE_DEVICE_TABLE(acpi, amd_gpio_acpi_match); diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 06701bd1462d7adf6457c6246d8b5709a51f179b..17deeb7e0ce5452715cf95f026a9a74da7c65db8 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -193,7 +193,7 @@ static int tcm_loop_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc) } tl_cmd->sc = sc; - tl_cmd->sc_cmd_tag = sc->request->tag; + tl_cmd->sc_cmd_tag = blk_mq_unique_tag(sc->request); INIT_WORK(&tl_cmd->work, tcm_loop_submission_work); queue_work(tcm_loop_workqueue, &tl_cmd->work); return 0; @@ -259,7 +259,7 @@ static int tcm_loop_abort_task(struct scsi_cmnd *sc) tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host); tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id]; ret = tcm_loop_issue_tmr(tl_tpg, sc->device->lun, - sc->request->tag, TMR_ABORT_TASK); + blk_mq_unique_tag(sc->request), TMR_ABORT_TASK); return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED; } diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 86c381ceb9a1e95e95643dcce8a20fc74c5b1559..0c1429eb7535c436fadfe9f55dc6ccce92489213 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -2,7 +2,7 @@ menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" select IOMMU_API - select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64) + select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64 || SW64) select INTERVAL_TREE help VFIO provides a framework for secure userspace device drivers. diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index b68e8531bc6463ed0b3a2ead9bcad9c6c67d51c8..4fb8b3e255d832dd08bd1b7ce4c9c7d257076205 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -177,6 +177,14 @@ struct vring_virtqueue { /* Host publishes avail event idx */ bool event; + /* Do DMA mapping by driver */ + bool premapped; + + /* Do unmap or not for desc. Just when premapped is False and + * use_dma_api is true, this is true. + */ + bool do_unmap; + /* Head of free buffer list. */ unsigned int free_head; /* Number we've added since last sync. */ @@ -378,21 +386,32 @@ static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) } /* Map one sg entry. */ -static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, - struct scatterlist *sg, - enum dma_data_direction direction) +static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg, + enum dma_data_direction direction, dma_addr_t *addr) { - if (!vq->use_dma_api) - return (dma_addr_t)sg_phys(sg); + if (vq->premapped) { + *addr = sg_dma_address(sg); + return 0; + } + + if (!vq->use_dma_api) { + *addr = (dma_addr_t)sg_phys(sg); + return 0; + } /* * We can't use dma_map_sg, because we don't use scatterlists in * the way it expects (we don't guarantee that the scatterlist * will exist for the lifetime of the mapping). */ - return dma_map_page(vring_dma_dev(vq), + *addr = dma_map_page(vring_dma_dev(vq), sg_page(sg), sg->offset, sg->length, direction); + + if (dma_mapping_error(vring_dma_dev(vq), *addr)) + return -ENOMEM; + + return 0; } static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, @@ -439,7 +458,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, { u16 flags; - if (!vq->use_dma_api) + if (!vq->do_unmap) return; flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); @@ -457,18 +476,21 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, struct vring_desc_extra *extra = vq->split.desc_extra; u16 flags; - if (!vq->use_dma_api) - goto out; - flags = extra[i].flags; if (flags & VRING_DESC_F_INDIRECT) { + if (!vq->use_dma_api) + goto out; + dma_unmap_single(vring_dma_dev(vq), extra[i].addr, extra[i].len, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } else { + if (!vq->do_unmap) + goto out; + dma_unmap_page(vring_dma_dev(vq), extra[i].addr, extra[i].len, @@ -600,8 +622,9 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, for (n = 0; n < out_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); - if (vring_mapping_error(vq, addr)) + dma_addr_t addr; + + if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr)) goto unmap_release; prev = i; @@ -615,8 +638,9 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, } for (; n < (out_sgs + in_sgs); n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) + dma_addr_t addr; + + if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr)) goto unmap_release; prev = i; @@ -632,7 +656,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, } /* Last one doesn't continue. */ desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); - if (!indirect && vq->use_dma_api) + if (!indirect && vq->do_unmap) vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= ~VRING_DESC_F_NEXT; @@ -641,8 +665,12 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, dma_addr_t addr = vring_map_single( vq, desc, total_sg * sizeof(struct vring_desc), DMA_TO_DEVICE); - if (vring_mapping_error(vq, addr)) + if (vring_mapping_error(vq, addr)) { + if (vq->premapped) + goto free_indirect; + goto unmap_release; + } virtqueue_add_desc_split(_vq, vq->split.vring.desc, head, addr, @@ -708,6 +736,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, i = vring_unmap_one_split(vq, i); } +free_indirect: if (indirect) kfree(desc); @@ -786,8 +815,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, VRING_DESC_F_INDIRECT)); BUG_ON(len == 0 || len % sizeof(struct vring_desc)); - for (j = 0; j < len / sizeof(struct vring_desc); j++) - vring_unmap_one_split_indirect(vq, &indir_desc[j]); + if (vq->do_unmap) { + for (j = 0; j < len / sizeof(struct vring_desc); j++) + vring_unmap_one_split_indirect(vq, &indir_desc[j]); + } kfree(indir_desc); vq->split.desc_state[head].indir_desc = NULL; @@ -1181,17 +1212,20 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, { u16 flags; - if (!vq->use_dma_api) - return; - flags = extra->flags; if (flags & VRING_DESC_F_INDIRECT) { + if (!vq->use_dma_api) + return; + dma_unmap_single(vring_dma_dev(vq), extra->addr, extra->len, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } else { + if (!vq->do_unmap) + return; + dma_unmap_page(vring_dma_dev(vq), extra->addr, extra->len, (flags & VRING_DESC_F_WRITE) ? @@ -1204,7 +1238,7 @@ static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, { u16 flags; - if (!vq->use_dma_api) + if (!vq->do_unmap) return; flags = le16_to_cpu(desc->flags); @@ -1265,9 +1299,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, for (n = 0; n < out_sgs + in_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - addr = vring_map_one_sg(vq, sg, n < out_sgs ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) + if (vring_map_one_sg(vq, sg, n < out_sgs ? + DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr)) goto unmap_release; desc[i].flags = cpu_to_le16(n < out_sgs ? @@ -1282,15 +1315,19 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, addr = vring_map_single(vq, desc, total_sg * sizeof(struct vring_packed_desc), DMA_TO_DEVICE); - if (vring_mapping_error(vq, addr)) + if (vring_mapping_error(vq, addr)) { + if (vq->premapped) + goto free_desc; + goto unmap_release; + } vq->packed.vring.desc[head].addr = cpu_to_le64(addr); vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * sizeof(struct vring_packed_desc)); vq->packed.vring.desc[head].id = cpu_to_le16(id); - if (vq->use_dma_api) { + if (vq->do_unmap) { vq->packed.desc_extra[id].addr = addr; vq->packed.desc_extra[id].len = total_sg * sizeof(struct vring_packed_desc); @@ -1341,6 +1378,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, for (i = 0; i < err_idx; i++) vring_unmap_desc_packed(vq, &desc[i]); +free_desc: kfree(desc); END_USE(vq); @@ -1412,9 +1450,10 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, c = 0; for (n = 0; n < out_sgs + in_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) + dma_addr_t addr; + + if (vring_map_one_sg(vq, sg, n < out_sgs ? + DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr)) goto unmap_release; flags = cpu_to_le16(vq->packed.avail_used_flags | @@ -1429,7 +1468,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, desc[i].len = cpu_to_le32(sg->length); desc[i].id = cpu_to_le16(id); - if (unlikely(vq->use_dma_api)) { + if (unlikely(vq->do_unmap)) { vq->packed.desc_extra[curr].addr = addr; vq->packed.desc_extra[curr].len = sg->length; vq->packed.desc_extra[curr].flags = @@ -1563,7 +1602,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq, vq->free_head = id; vq->vq.num_free += state->num; - if (unlikely(vq->use_dma_api)) { + if (unlikely(vq->do_unmap)) { curr = id; for (i = 0; i < state->num; i++) { vring_unmap_extra_packed(vq, @@ -1580,7 +1619,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq, if (!desc) return; - if (vq->use_dma_api) { + if (vq->do_unmap) { len = vq->packed.desc_extra[id].len; for (i = 0; i < len / sizeof(struct vring_packed_desc); i++) @@ -2009,6 +2048,8 @@ static struct virtqueue *vring_create_virtqueue_packed( vq->broken = false; vq->packed_ring = true; vq->use_dma_api = vring_use_dma_api(vdev); + vq->premapped = false; + vq->do_unmap = vq->use_dma_api; vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && !context; @@ -2069,6 +2110,43 @@ static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num) return -ENOMEM; } +static int virtqueue_disable_and_recycle(struct virtqueue *_vq, + void (*recycle)(struct virtqueue *vq, void *buf)) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = vq->vq.vdev; + void *buf; + int err; + + if (!vq->we_own_ring) + return -EPERM; + + if (!vdev->config->disable_vq_and_reset) + return -ENOENT; + + if (!vdev->config->enable_vq_after_reset) + return -ENOENT; + + err = vdev->config->disable_vq_and_reset(_vq); + if (err) + return err; + + while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) + recycle(_vq, buf); + + return 0; +} + +static int virtqueue_enable_after_reset(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = vq->vq.vdev; + + if (vdev->config->enable_vq_after_reset(_vq)) + return -EBUSY; + + return 0; +} /* * Generic functions and exported symbols. @@ -2194,6 +2272,23 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq, } EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); +/** + * virtqueue_dma_dev - get the dma dev + * @_vq: the struct virtqueue we're talking about. + * + * Returns the dma dev. That can been used for dma api. + */ +struct device *virtqueue_dma_dev(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (vq->use_dma_api) + return vring_dma_dev(vq); + else + return NULL; +} +EXPORT_SYMBOL_GPL(virtqueue_dma_dev); + /** * virtqueue_kick_prepare - first half of split virtqueue_kick call. * @_vq: the struct virtqueue @@ -2467,6 +2562,8 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->weak_barriers = weak_barriers; vq->broken = false; vq->use_dma_api = vring_use_dma_api(vdev); + vq->premapped = false; + vq->do_unmap = vq->use_dma_api; vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && !context; @@ -2520,7 +2617,7 @@ EXPORT_SYMBOL_GPL(vring_create_virtqueue); * virtqueue_resize - resize the vring of vq * @_vq: the struct virtqueue we're talking about. * @num: new ring num - * @recycle: callback for recycle the useless buffer + * @recycle: callback to recycle unused buffers * * When it is really necessary to create a new vring, it will set the current vq * into the reset state. Then call the passed callback to recycle the buffer @@ -2544,13 +2641,8 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, void (*recycle)(struct virtqueue *vq, void *buf)) { struct vring_virtqueue *vq = to_vvq(_vq); - struct virtio_device *vdev = vq->vq.vdev; - void *buf; int err; - if (!vq->we_own_ring) - return -EPERM; - if (num > vq->vq.num_max) return -E2BIG; @@ -2560,31 +2652,101 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num) return 0; - if (!vdev->config->disable_vq_and_reset) - return -ENOENT; - - if (!vdev->config->enable_vq_after_reset) - return -ENOENT; - - err = vdev->config->disable_vq_and_reset(_vq); + err = virtqueue_disable_and_recycle(_vq, recycle); if (err) return err; - while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) - recycle(_vq, buf); - if (vq->packed_ring) err = virtqueue_resize_packed(_vq, num); else err = virtqueue_resize_split(_vq, num); - if (vdev->config->enable_vq_after_reset(_vq)) - return -EBUSY; - - return err; + return virtqueue_enable_after_reset(_vq); } EXPORT_SYMBOL_GPL(virtqueue_resize); +/** + * virtqueue_set_dma_premapped - set the vring premapped mode + * @_vq: the struct virtqueue we're talking about. + * + * Enable the premapped mode of the vq. + * + * The vring in premapped mode does not do dma internally, so the driver must + * do dma mapping in advance. The driver must pass the dma_address through + * dma_address of scatterlist. When the driver got a used buffer from + * the vring, it has to unmap the dma address. + * + * This function must be called immediately after creating the vq, or after vq + * reset, and before adding any buffers to it. + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error. + * 0: success. + * -EINVAL: vring does not use the dma api, so we can not enable premapped mode. + */ +int virtqueue_set_dma_premapped(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + u32 num; + + START_USE(vq); + + num = vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; + + if (num != vq->vq.num_free) { + END_USE(vq); + return -EINVAL; + } + + if (!vq->use_dma_api) { + END_USE(vq); + return -EINVAL; + } + + vq->premapped = true; + vq->do_unmap = false; + + END_USE(vq); + + return 0; +} +EXPORT_SYMBOL_GPL(virtqueue_set_dma_premapped); + +/** + * virtqueue_reset - detach and recycle all unused buffers + * @_vq: the struct virtqueue we're talking about. + * @recycle: callback to recycle unused buffers + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error. + * 0: success. + * -EBUSY: Failed to sync with device, vq may not work properly + * -ENOENT: Transport or device not supported + * -EPERM: Operation not permitted + */ +int virtqueue_reset(struct virtqueue *_vq, + void (*recycle)(struct virtqueue *vq, void *buf)) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + int err; + + err = virtqueue_disable_and_recycle(_vq, recycle); + if (err) + return err; + + if (vq->packed_ring) + virtqueue_reinit_packed(vq); + else + virtqueue_reinit_split(vq); + + return virtqueue_enable_after_reset(_vq); +} +EXPORT_SYMBOL_GPL(virtqueue_reset); + /* Only available for split ring */ struct virtqueue *vring_new_virtqueue(unsigned int index, unsigned int num, @@ -2776,4 +2938,149 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq) } EXPORT_SYMBOL_GPL(virtqueue_get_vring); +/** + * virtqueue_dma_map_single_attrs - map DMA for _vq + * @_vq: the struct virtqueue we're talking about. + * @ptr: the pointer of the buffer to do dma + * @size: the size of the buffer to do dma + * @dir: DMA direction + * @attrs: DMA Attrs + * + * The caller calls this to do dma mapping in advance. The DMA address can be + * passed to this _vq when it is in pre-mapped mode. + * + * return DMA address. Caller should check that by virtqueue_dma_mapping_error(). + */ +dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr, + size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (!vq->use_dma_api) + return (dma_addr_t)virt_to_phys(ptr); + + return dma_map_single_attrs(vring_dma_dev(vq), ptr, size, dir, attrs); +} +EXPORT_SYMBOL_GPL(virtqueue_dma_map_single_attrs); + +/** + * virtqueue_dma_unmap_single_attrs - unmap DMA for _vq + * @_vq: the struct virtqueue we're talking about. + * @addr: the dma address to unmap + * @size: the size of the buffer + * @dir: DMA direction + * @attrs: DMA Attrs + * + * Unmap the address that is mapped by the virtqueue_dma_map_* APIs. + * + */ +void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (!vq->use_dma_api) + return; + + dma_unmap_single_attrs(vring_dma_dev(vq), addr, size, dir, attrs); +} +EXPORT_SYMBOL_GPL(virtqueue_dma_unmap_single_attrs); + +/** + * virtqueue_dma_mapping_error - check dma address + * @_vq: the struct virtqueue we're talking about. + * @addr: DMA address + * + * Returns 0 means dma valid. Other means invalid dma address. + */ +int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (!vq->use_dma_api) + return 0; + + return dma_mapping_error(vring_dma_dev(vq), addr); +} +EXPORT_SYMBOL_GPL(virtqueue_dma_mapping_error); + +/** + * virtqueue_dma_need_sync - check a dma address needs sync + * @_vq: the struct virtqueue we're talking about. + * @addr: DMA address + * + * Check if the dma address mapped by the virtqueue_dma_map_* APIs needs to be + * synchronized + * + * return bool + */ +bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (!vq->use_dma_api) + return false; + + return dma_need_sync(vring_dma_dev(vq), addr); +} +EXPORT_SYMBOL_GPL(virtqueue_dma_need_sync); + +/** + * virtqueue_dma_sync_single_range_for_cpu - dma sync for cpu + * @_vq: the struct virtqueue we're talking about. + * @addr: DMA address + * @offset: DMA address offset + * @size: buf size for sync + * @dir: DMA direction + * + * Before calling this function, use virtqueue_dma_need_sync() to confirm that + * the DMA address really needs to be synchronized + * + */ +void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq, + dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct device *dev = vring_dma_dev(vq); + + if (!vq->use_dma_api) + return; + + dma_sync_single_range_for_cpu(dev, addr, offset, size, + DMA_BIDIRECTIONAL); +} +EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_cpu); + +/** + * virtqueue_dma_sync_single_range_for_device - dma sync for device + * @_vq: the struct virtqueue we're talking about. + * @addr: DMA address + * @offset: DMA address offset + * @size: buf size for sync + * @dir: DMA direction + * + * Before calling this function, use virtqueue_dma_need_sync() to confirm that + * the DMA address really needs to be synchronized + */ +void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, + dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct device *dev = vring_dma_dev(vq); + + if (!vq->use_dma_api) + return; + + dma_sync_single_range_for_device(dev, addr, offset, size, + DMA_BIDIRECTIONAL); +} +EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_device); + MODULE_LICENSE("GPL"); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index c6fab956a20519c46910d2e55196dcc54ada8b8b..8104d01834640e3a7472c3374ef68393c3f5a6ab 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -73,6 +73,8 @@ static int load_elf_library(struct file *); #define load_elf_library NULL #endif +extern bool enable_brk_thp_aligned; + /* * If we don't support core dumping, then supply a NULL so we * don't even try. @@ -1377,6 +1379,9 @@ static int load_elf_binary(struct linux_binprm *bprm) #endif } + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && enable_brk_thp_aligned) + mm->brk = mm->start_brk = ALIGN(mm->start_brk, HPAGE_SIZE); + if (current->personality & MMAP_PAGE_ZERO) { /* Why this, you ask??? Well SVr4 maps page 0 as read-only, and some applications "depend" upon this behavior. diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 69f4db05191a3218bebfc65574e7e6d784eb1110..c29c8e7f95d362547a6a24a742799ef260bbba5b 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -534,7 +534,7 @@ static int load_flat_file(struct linux_binprm *bprm, * 28 bits (256 MB) is way more than reasonable in this case. * If some top bits are set we have probable binary corruption. */ - if ((text_len | data_len | bss_len | stack_len | full_data) >> 28) { + if ((text_len | data_len | bss_len | stack_len | relocs | full_data) >> 28) { pr_err("bad header\n"); ret = -ENOEXEC; goto err; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index b2a7238a342219bfab534fa7218cc60b3c57d4ed..616a7770f703d0f95557338d4b3f5437c6489cfe 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -4932,6 +4932,10 @@ receive_encrypted_standard(struct TCP_Server_Info *server, next_buffer = (char *)cifs_buf_get(); else next_buffer = (char *)cifs_small_buf_get(); + if (!next_buffer) { + cifs_server_dbg(VFS, "No memory for (large) SMB response\n"); + return -1; + } memcpy(next_buffer, buf + next_cmd, pdu_length - next_cmd); } diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 5123e786d56099bb5b7978d5c249ec67d19396ad..ac69e6d40fd9b2b5345d84b98e4739dc30462548 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -123,38 +123,28 @@ void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, int erofs_map_blocks_flatmode(struct inode *inode, struct erofs_map_blocks *map) { - erofs_blk_t nblocks, lastblk; - u64 offset = map->m_la; struct erofs_inode *vi = EROFS_I(inode); struct super_block *sb = inode->i_sb; bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE); + erofs_blk_t lastblk = erofs_iblks(inode) - tailendpacking; - nblocks = erofs_iblks(inode); - lastblk = nblocks - tailendpacking; - - /* there is no hole in flatmode */ - map->m_flags = EROFS_MAP_MAPPED; - if (offset < erofs_pos(sb, lastblk)) { + map->m_flags = EROFS_MAP_MAPPED; /* no hole in flat inodes */ + if (map->m_la < erofs_pos(sb, lastblk)) { map->m_pa = erofs_pos(sb, vi->raw_blkaddr) + map->m_la; - map->m_plen = erofs_pos(sb, lastblk) - offset; - } else if (tailendpacking) { + map->m_plen = erofs_pos(sb, lastblk) - map->m_la; + } else { + DBG_BUGON(!tailendpacking); map->m_pa = erofs_iloc(inode) + vi->inode_isize + - vi->xattr_isize + erofs_blkoff(sb, offset); - map->m_plen = inode->i_size - offset; + vi->xattr_isize + erofs_blkoff(sb, map->m_la); + map->m_plen = inode->i_size - map->m_la; /* inline data should be located in the same meta block */ if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) { - erofs_err(sb, "inline data cross block boundary @ nid %llu", - vi->nid); + erofs_err(sb, "inline data across blocks @ nid %llu", vi->nid); DBG_BUGON(1); return -EFSCORRUPTED; } map->m_flags |= EROFS_MAP_META; - } else { - erofs_err(sb, "internal error @ nid: %llu (size %llu), m_la 0x%llx", - vi->nid, inode->i_size, map->m_la); - DBG_BUGON(1); - return -EIO; } return 0; } diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h index f20048951e5cb7e9dae808d1100fd0894dd5c358..60b4c18942d29f277370ebfe29fd0dba96023f80 100644 --- a/fs/erofs/erofs_fs.h +++ b/fs/erofs/erofs_fs.h @@ -108,14 +108,14 @@ static inline bool erofs_inode_is_data_compressed(unsigned int datamode) } /* bit definitions of inode i_format */ -#define EROFS_I_VERSION_BITS 1 -#define EROFS_I_DATALAYOUT_BITS 3 +#define EROFS_I_VERSION_MASK 0x01 +#define EROFS_I_DATALAYOUT_MASK 0x07 #define EROFS_I_VERSION_BIT 0 #define EROFS_I_DATALAYOUT_BIT 1 +#define EROFS_I_ALL_BIT 4 -#define EROFS_I_ALL \ - ((1 << (EROFS_I_DATALAYOUT_BIT + EROFS_I_DATALAYOUT_BITS)) - 1) +#define EROFS_I_ALL ((1 << EROFS_I_ALL_BIT) - 1) /* indicate chunk blkbits, thus 'chunksize = blocksize << chunk blkbits' */ #define EROFS_CHUNK_FORMAT_BLKBITS_MASK 0x001F diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index f878fd39b13fc8b13e759b156c92fbbe05c6625a..ebe954ad379091f103abfc828bc805407a91fbbe 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -84,7 +84,6 @@ static int erofs_fscache_meta_readpage(struct file *data, struct page *page) GFP_KERNEL); switch (ret) { case 0: /* page found in fscache, read submitted */ - erofs_dbg("%s: submitted", __func__); return ret; case -ENOBUFS: /* page won't be cached */ case -ENODATA: /* page not in cache */ @@ -186,7 +185,6 @@ static int erofs_fscache_readpage(struct file *file, struct page *page) GFP_KERNEL, pstart); switch (ret) { case 0: /* page found in fscache, read submitted */ - erofs_dbg("%s: submitted", __func__); return ret; case -ENOBUFS: /* page won't be cached */ case -ENODATA: /* page not in cache */ diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index f540caf0a24ea67ba8d0f4f5ecb945834c4e180a..8df3356710a4a6fa2a3ac548870484a15cdc02f6 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -8,39 +8,52 @@ #include #include -static void *erofs_read_inode(struct erofs_buf *buf, - struct inode *inode, unsigned int *ofs) +static int erofs_fill_symlink(struct inode *inode, void *kaddr, + unsigned int m_pofs) +{ + struct erofs_inode *vi = EROFS_I(inode); + loff_t off; + + m_pofs += vi->xattr_isize; + /* check if it cannot be handled with fast symlink scheme */ + if (vi->datalayout != EROFS_INODE_FLAT_INLINE || inode->i_size < 0 || + check_add_overflow((loff_t)m_pofs, inode->i_size, &off) || + off > i_blocksize(inode)) + return 0; + + inode->i_link = kmemdup_nul(kaddr + m_pofs, inode->i_size, GFP_KERNEL); + return inode->i_link ? 0 : -ENOMEM; +} + +static int erofs_read_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_inode *vi = EROFS_I(inode); const erofs_off_t inode_loc = erofs_iloc(inode); - erofs_blk_t blkaddr, nblks = 0; void *kaddr; struct erofs_inode_compact *dic; struct erofs_inode_extended *die, *copied = NULL; - unsigned int ifmt; - int err; + union erofs_inode_i_u iu; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; + unsigned int ifmt, ofs; + int err = 0; blkaddr = erofs_blknr(sb, inode_loc); - *ofs = erofs_blkoff(sb, inode_loc); - - erofs_dbg("%s, reading inode nid %llu at %u of blkaddr %u", - __func__, vi->nid, *ofs, blkaddr); + ofs = erofs_blkoff(sb, inode_loc); - kaddr = erofs_read_metabuf(buf, sb, blkaddr, EROFS_KMAP); + kaddr = erofs_read_metabuf(&buf, sb, blkaddr, EROFS_KMAP); if (IS_ERR(kaddr)) { erofs_err(sb, "failed to get inode (nid: %llu) page, err %ld", vi->nid, PTR_ERR(kaddr)); - return kaddr; + return PTR_ERR(kaddr); } - dic = kaddr + *ofs; + dic = kaddr + ofs; ifmt = le16_to_cpu(dic->i_format); - if (ifmt & ~EROFS_I_ALL) { - erofs_err(inode->i_sb, "unsupported i_format %u of nid %llu", + erofs_err(sb, "unsupported i_format %u of nid %llu", ifmt, vi->nid); err = -EOPNOTSUPP; goto err_out; @@ -48,7 +61,7 @@ static void *erofs_read_inode(struct erofs_buf *buf, vi->datalayout = erofs_inode_datalayout(ifmt); if (vi->datalayout >= EROFS_INODE_DATALAYOUT_MAX) { - erofs_err(inode->i_sb, "unsupported datalayout %u of nid %llu", + erofs_err(sb, "unsupported datalayout %u of nid %llu", vi->datalayout, vi->nid); err = -EOPNOTSUPP; goto err_out; @@ -58,11 +71,11 @@ static void *erofs_read_inode(struct erofs_buf *buf, case EROFS_INODE_LAYOUT_EXTENDED: vi->inode_isize = sizeof(struct erofs_inode_extended); /* check if the extended inode acrosses block boundary */ - if (*ofs + vi->inode_isize <= sb->s_blocksize) { - *ofs += vi->inode_isize; + if (ofs + vi->inode_isize <= sb->s_blocksize) { + ofs += vi->inode_isize; die = (struct erofs_inode_extended *)dic; } else { - const unsigned int gotten = sb->s_blocksize - *ofs; + const unsigned int gotten = sb->s_blocksize - ofs; copied = kmalloc(vi->inode_isize, GFP_KERNEL); if (!copied) { @@ -70,108 +83,90 @@ static void *erofs_read_inode(struct erofs_buf *buf, goto err_out; } memcpy(copied, dic, gotten); - kaddr = erofs_read_metabuf(buf, sb, blkaddr + 1, + kaddr = erofs_read_metabuf(&buf, sb, blkaddr + 1, EROFS_KMAP); if (IS_ERR(kaddr)) { erofs_err(sb, "failed to get inode payload block (nid: %llu), err %ld", vi->nid, PTR_ERR(kaddr)); kfree(copied); - return kaddr; + return PTR_ERR(kaddr); } - *ofs = vi->inode_isize - gotten; - memcpy((u8 *)copied + gotten, kaddr, *ofs); + ofs = vi->inode_isize - gotten; + memcpy((u8 *)copied + gotten, kaddr, ofs); die = copied; } vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount); inode->i_mode = le16_to_cpu(die->i_mode); - switch (inode->i_mode & S_IFMT) { - case S_IFREG: - case S_IFDIR: - case S_IFLNK: - vi->raw_blkaddr = le32_to_cpu(die->i_u.raw_blkaddr); - break; - case S_IFCHR: - case S_IFBLK: - inode->i_rdev = - new_decode_dev(le32_to_cpu(die->i_u.rdev)); - break; - case S_IFIFO: - case S_IFSOCK: - inode->i_rdev = 0; - break; - default: - goto bogusimode; - } + iu = die->i_u; i_uid_write(inode, le32_to_cpu(die->i_uid)); i_gid_write(inode, le32_to_cpu(die->i_gid)); set_nlink(inode, le32_to_cpu(die->i_nlink)); - /* extended inode has its own timestamp */ + /* each extended inode has its own timestamp */ inode->i_ctime.tv_sec = le64_to_cpu(die->i_mtime); inode->i_ctime.tv_nsec = le32_to_cpu(die->i_mtime_nsec); inode->i_size = le64_to_cpu(die->i_size); - - /* total blocks for compressed files */ - if (erofs_inode_is_data_compressed(vi->datalayout)) - nblks = le32_to_cpu(die->i_u.compressed_blocks); - else if (vi->datalayout == EROFS_INODE_CHUNK_BASED) - /* fill chunked inode summary info */ - vi->chunkformat = le16_to_cpu(die->i_u.c.format); kfree(copied); - copied = NULL; break; case EROFS_INODE_LAYOUT_COMPACT: vi->inode_isize = sizeof(struct erofs_inode_compact); - *ofs += vi->inode_isize; + ofs += vi->inode_isize; vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount); inode->i_mode = le16_to_cpu(dic->i_mode); - switch (inode->i_mode & S_IFMT) { - case S_IFREG: - case S_IFDIR: - case S_IFLNK: - vi->raw_blkaddr = le32_to_cpu(dic->i_u.raw_blkaddr); - break; - case S_IFCHR: - case S_IFBLK: - inode->i_rdev = - new_decode_dev(le32_to_cpu(dic->i_u.rdev)); - break; - case S_IFIFO: - case S_IFSOCK: - inode->i_rdev = 0; - break; - default: - goto bogusimode; - } + iu = dic->i_u; i_uid_write(inode, le16_to_cpu(dic->i_uid)); i_gid_write(inode, le16_to_cpu(dic->i_gid)); set_nlink(inode, le16_to_cpu(dic->i_nlink)); - /* use build time for compact inodes */ inode->i_ctime.tv_sec = sbi->build_time; inode->i_ctime.tv_nsec = sbi->build_time_nsec; inode->i_size = le32_to_cpu(dic->i_size); - if (erofs_inode_is_data_compressed(vi->datalayout)) - nblks = le32_to_cpu(dic->i_u.compressed_blocks); - else if (vi->datalayout == EROFS_INODE_CHUNK_BASED) - vi->chunkformat = le16_to_cpu(dic->i_u.c.format); break; default: - erofs_err(inode->i_sb, - "unsupported on-disk inode version %u of nid %llu", + erofs_err(sb, "unsupported on-disk inode version %u of nid %llu", erofs_inode_version(ifmt), vi->nid); err = -EOPNOTSUPP; goto err_out; } - if (vi->datalayout == EROFS_INODE_CHUNK_BASED) { + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + case S_IFDIR: + case S_IFLNK: + vi->raw_blkaddr = le32_to_cpu(iu.raw_blkaddr); + if(S_ISLNK(inode->i_mode)) { + err = erofs_fill_symlink(inode, kaddr, ofs); + if (err) + goto err_out; + } + break; + case S_IFCHR: + case S_IFBLK: + inode->i_rdev = new_decode_dev(le32_to_cpu(iu.rdev)); + break; + case S_IFIFO: + case S_IFSOCK: + inode->i_rdev = 0; + break; + default: + erofs_err(sb, "bogus i_mode (%o) @ nid %llu", inode->i_mode, + vi->nid); + err = -EFSCORRUPTED; + goto err_out; + } + + /* total blocks for compressed files */ + if (erofs_inode_is_data_compressed(vi->datalayout)) { + nblks = le32_to_cpu(iu.compressed_blocks); + } else if (vi->datalayout == EROFS_INODE_CHUNK_BASED) { + /* fill chunked inode summary info */ + vi->chunkformat = le16_to_cpu(iu.c.format); if (vi->chunkformat & ~EROFS_CHUNK_FORMAT_ALL) { - erofs_err(inode->i_sb, - "unsupported chunk format %x of nid %llu", + erofs_err(sb, "unsupported chunk format %x of nid %llu", vi->chunkformat, vi->nid); err = -EOPNOTSUPP; goto err_out; @@ -194,62 +189,24 @@ static void *erofs_read_inode(struct erofs_buf *buf, inode->i_blocks = round_up(inode->i_size, sb->s_blocksize) >> 9; else inode->i_blocks = nblks << (sb->s_blocksize_bits - 9); - return kaddr; -bogusimode: - erofs_err(inode->i_sb, "bogus i_mode (%o) @ nid %llu", - inode->i_mode, vi->nid); - err = -EFSCORRUPTED; err_out: - DBG_BUGON(1); - kfree(copied); - erofs_put_metabuf(buf); - return ERR_PTR(err); -} - -static int erofs_fill_symlink(struct inode *inode, void *kaddr, - unsigned int m_pofs) -{ - struct erofs_inode *vi = EROFS_I(inode); - loff_t off; - char *lnk; - - m_pofs += vi->xattr_isize; - /* check if it cannot be handled with fast symlink scheme */ - if (vi->datalayout != EROFS_INODE_FLAT_INLINE || inode->i_size < 0 || - check_add_overflow((loff_t)m_pofs, inode->i_size, &off) || - off > i_blocksize(inode)) { - inode->i_op = &erofs_symlink_iops; - return 0; - } - - lnk = kmalloc(inode->i_size + 1, GFP_KERNEL); - if (!lnk) - return -ENOMEM; - - memcpy(lnk, kaddr + m_pofs, inode->i_size); - lnk[inode->i_size] = '\0'; - - inode->i_link = lnk; - inode->i_op = &erofs_fast_symlink_iops; - return 0; + DBG_BUGON(err); + erofs_put_metabuf(&buf); + return err; } static int erofs_fill_inode(struct inode *inode) { struct erofs_inode *vi = EROFS_I(inode); - struct erofs_buf buf = __EROFS_BUF_INITIALIZER; - struct super_block *sb = inode->i_sb; - void *kaddr; - unsigned int ofs; - int err = 0; + int err; trace_erofs_fill_inode(inode); /* read inode base data from disk */ - kaddr = erofs_read_inode(&buf, inode, &ofs); - if (IS_ERR(kaddr)) - return PTR_ERR(kaddr); + err = erofs_read_inode(inode); + if (err) + return err; /* setup the new inode */ switch (inode->i_mode & S_IFMT) { @@ -257,7 +214,7 @@ static int erofs_fill_inode(struct inode *inode) inode->i_op = &erofs_generic_iops; if (erofs_inode_is_data_compressed(vi->datalayout)) inode->i_fop = &generic_ro_fops; - else if (erofs_is_rafsv6_mode(sb)) + else if (erofs_is_rafsv6_mode(inode->i_sb)) erofs_rafsv6_set_fops(inode); else inode->i_fop = &erofs_file_fops; @@ -267,9 +224,10 @@ static int erofs_fill_inode(struct inode *inode) inode->i_fop = &erofs_dir_fops; break; case S_IFLNK: - err = erofs_fill_symlink(inode, kaddr, ofs); - if (err) - goto out_unlock; + if (inode->i_link) + inode->i_op = &erofs_fast_symlink_iops; + else + inode->i_op = &erofs_symlink_iops; inode_nohighmem(inode); break; case S_IFCHR: @@ -278,26 +236,21 @@ static int erofs_fill_inode(struct inode *inode) case S_IFSOCK: inode->i_op = &erofs_generic_iops; init_special_inode(inode, inode->i_mode, inode->i_rdev); - goto out_unlock; + return 0; default: - err = -EFSCORRUPTED; - goto out_unlock; + return -EFSCORRUPTED; } if (erofs_inode_is_data_compressed(vi->datalayout)) { + err = -EOPNOTSUPP; #ifdef CONFIG_EROFS_FS_ZIP if (!erofs_is_fscache_mode(inode->i_sb) && inode->i_sb->s_blocksize_bits == PAGE_SHIFT) { inode->i_mapping->a_ops = &z_erofs_aops; err = 0; - goto out_unlock; } #endif - err = -EOPNOTSUPP; - goto out_unlock; - } - - if (erofs_is_rafsv6_mode(sb)) { + } else if (erofs_is_rafsv6_mode(inode->i_sb)) { erofs_rafsv6_set_aops(inode); #ifdef CONFIG_EROFS_FS_ONDEMAND } else if (erofs_is_fscache_mode(inode->i_sb)) { @@ -306,9 +259,6 @@ static int erofs_fill_inode(struct inode *inode) } else { inode->i_mapping->a_ops = &erofs_raw_access_aops; } - -out_unlock: - erofs_put_metabuf(&buf); return err; } diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 8f9c70aa761448e5710947d8a26b24532d9b6c11..c411e84fe6cf3f8e876a393f58001f5589c9c28f 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -33,10 +33,8 @@ __printf(3, 4) void _erofs_info(struct super_block *sb, #define erofs_info(sb, fmt, ...) \ _erofs_info(sb, __func__, fmt "\n", ##__VA_ARGS__) #ifdef CONFIG_EROFS_FS_DEBUG -#define erofs_dbg(x, ...) pr_debug(x "\n", ##__VA_ARGS__) #define DBG_BUGON BUG_ON #else -#define erofs_dbg(x, ...) ((void)0) #define DBG_BUGON(x) ((void)(x)) #endif /* !CONFIG_EROFS_FS_DEBUG */ @@ -348,24 +346,14 @@ static inline erofs_off_t erofs_iloc(struct inode *inode) (EROFS_I(inode)->nid << sbi->islotbits); } -static inline unsigned int erofs_bitrange(unsigned int value, unsigned int bit, - unsigned int bits) +static inline unsigned int erofs_inode_version(unsigned int ifmt) { - - return (value >> bit) & ((1 << bits) - 1); -} - - -static inline unsigned int erofs_inode_version(unsigned int value) -{ - return erofs_bitrange(value, EROFS_I_VERSION_BIT, - EROFS_I_VERSION_BITS); + return (ifmt >> EROFS_I_VERSION_BIT) & EROFS_I_VERSION_MASK; } -static inline unsigned int erofs_inode_datalayout(unsigned int value) +static inline unsigned int erofs_inode_datalayout(unsigned int ifmt) { - return erofs_bitrange(value, EROFS_I_DATALAYOUT_BIT, - EROFS_I_DATALAYOUT_BITS); + return (ifmt >> EROFS_I_DATALAYOUT_BIT) & EROFS_I_DATALAYOUT_MASK; } /* diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c index 02b06053c38c66324efd7557ea3c06e33945f2d0..fdd6a78b74ec9385f1023d455dc714ad01daed2f 100644 --- a/fs/erofs/namei.c +++ b/fs/erofs/namei.c @@ -203,16 +203,13 @@ static struct dentry *erofs_lookup(struct inode *dir, struct dentry *dentry, err = erofs_namei(dir, &dentry->d_name, &nid, &d_type); - if (err == -ENOENT) { + if (err == -ENOENT) /* negative dentry */ inode = NULL; - } else if (err) { + else if (err) inode = ERR_PTR(err); - } else { - erofs_dbg("%s, %s (nid %llu) found, d_type %u", __func__, - dentry->d_name.name, nid, d_type); + else inode = erofs_iget(dir->i_sb, nid); - } return d_splice_alias(inode, dentry); } diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 6f9f933057d5d8c40f16b0ea6531ffd1981e2484..0b74f70558b32d65f3bb8247ab9393ae1444c92d 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -23,8 +23,7 @@ static struct kmem_cache *erofs_inode_cachep __read_mostly; -void _erofs_err(struct super_block *sb, const char *function, - const char *fmt, ...) +void _erofs_err(struct super_block *sb, const char *func, const char *fmt, ...) { struct va_format vaf; va_list args; @@ -34,12 +33,11 @@ void _erofs_err(struct super_block *sb, const char *function, vaf.fmt = fmt; vaf.va = &args; - pr_err("(device %s): %s: %pV", sb->s_id, function, &vaf); + pr_err("(device %s): %s: %pV", sb->s_id, func, &vaf); va_end(args); } -void _erofs_info(struct super_block *sb, const char *function, - const char *fmt, ...) +void _erofs_info(struct super_block *sb, const char *func, const char *fmt, ...) { struct va_format vaf; va_list args; @@ -104,11 +102,9 @@ static void erofs_free_inode(struct inode *inode) { struct erofs_inode *vi = EROFS_I(inode); - /* be careful of RCU symlink path */ if (inode->i_op == &erofs_fast_symlink_iops) kfree(inode->i_link); kfree(vi->xattr_shared_xattrs); - kmem_cache_free(erofs_inode_cachep, vi); } @@ -121,8 +117,7 @@ static bool check_layout_compatibility(struct super_block *sb, /* check if current kernel meets all mandatory requirements */ if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) { - erofs_err(sb, - "unidentified incompatible feature %x, please upgrade kernel version", + erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel", feature & ~EROFS_ALL_FEATURE_INCOMPAT); return false; } @@ -380,7 +375,6 @@ static int erofs_read_superblock(struct super_block *sb) return ret; } -/* set up default EROFS parameters */ static void erofs_default_options(struct erofs_fs_context *ctx) { #ifdef CONFIG_EROFS_FS_ZIP @@ -813,7 +807,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) xa_init(&sbi->managed_pslots); #endif - /* get the root inode */ inode = erofs_iget(sb, ROOT_NID(sbi)); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -830,7 +823,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) return -ENOMEM; erofs_shrinker_register(sb); - /* sb->s_umount is already locked, SB_ACTIVE and SB_BORN are not set */ #ifdef CONFIG_EROFS_FS_ZIP if (erofs_sb_has_fragments(sbi) && sbi->packed_nid) { sbi->packed_inode = erofs_iget(sb, sbi->packed_nid); @@ -975,10 +967,6 @@ static int erofs_init_fs_context(struct fs_context *fc) return 0; } -/* - * could be triggered after deactivate_locked_super() - * is called, thus including umount and failed to initialize. - */ static void erofs_kill_sb(struct super_block *sb) { struct erofs_sb_info *sbi; @@ -1016,7 +1004,6 @@ static void erofs_kill_sb(struct super_block *sb) sb->s_fs_info = NULL; } -/* called when ->s_root is non-NULL */ static void erofs_put_super(struct super_block *sb) { struct erofs_sb_info *const sbi = EROFS_SB(sb); @@ -1050,13 +1037,11 @@ static int __init erofs_module_init(void) erofs_check_ondisk_layout_definitions(); erofs_inode_cachep = kmem_cache_create("erofs_inode", - sizeof(struct erofs_inode), 0, - SLAB_RECLAIM_ACCOUNT, - erofs_inode_init_once); - if (!erofs_inode_cachep) { - err = -ENOMEM; - goto icache_err; - } + sizeof(struct erofs_inode), 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT, + erofs_inode_init_once); + if (!erofs_inode_cachep) + return -ENOMEM; err = erofs_init_shrinker(); if (err) @@ -1097,7 +1082,6 @@ static int __init erofs_module_init(void) erofs_exit_shrinker(); shrinker_err: kmem_cache_destroy(erofs_inode_cachep); -icache_err: return err; } @@ -1117,7 +1101,6 @@ static void __exit erofs_module_exit(void) erofs_pcpubuf_exit(); } -/* get filesystem statistics */ static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c index 7c09ad1beb839492553bdc3934394134d6001537..9a6035d7cb030c69ae644740fca329b8655ec081 100644 --- a/fs/erofs/xattr.c +++ b/fs/erofs/xattr.c @@ -7,6 +7,19 @@ #include #include "xattr.h" +static inline erofs_blk_t erofs_xattr_blkaddr(struct super_block *sb, + unsigned int xattr_id) +{ + return EROFS_SB(sb)->xattr_blkaddr + + erofs_blknr(sb, xattr_id * sizeof(__u32)); +} + +static inline unsigned int erofs_xattr_blkoff(struct super_block *sb, + unsigned int xattr_id) +{ + return erofs_blkoff(sb, xattr_id * sizeof(__u32)); +} + struct xattr_iter { struct super_block *sb; struct erofs_buf buf; @@ -16,7 +29,7 @@ struct xattr_iter { unsigned int ofs; }; -static int init_inode_xattrs(struct inode *inode) +static int erofs_init_inode_xattrs(struct inode *inode) { struct erofs_inode *const vi = EROFS_I(inode); struct xattr_iter it; @@ -157,7 +170,8 @@ static int inline_xattr_iter_begin(struct xattr_iter *it, struct erofs_inode *const vi = EROFS_I(inode); unsigned int xattr_header_sz, inline_xattr_ofs; - xattr_header_sz = inlinexattr_header_size(inode); + xattr_header_sz = sizeof(struct erofs_xattr_ibody_header) + + sizeof(u32) * vi->xattr_shared_count; if (xattr_header_sz >= vi->xattr_isize) { DBG_BUGON(xattr_header_sz > vi->xattr_isize); return -ENOATTR; @@ -351,20 +365,18 @@ static int inline_getxattr(struct inode *inode, struct getxattr_iter *it) static int shared_getxattr(struct inode *inode, struct getxattr_iter *it) { struct erofs_inode *const vi = EROFS_I(inode); - struct super_block *const sb = inode->i_sb; - unsigned int i; + struct super_block *const sb = it->it.sb; + unsigned int i, xsid; int ret = -ENOATTR; for (i = 0; i < vi->xattr_shared_count; ++i) { - erofs_blk_t blkaddr = - xattrblock_addr(sb, vi->xattr_shared_xattrs[i]); - - it->it.ofs = xattrblock_offset(sb, vi->xattr_shared_xattrs[i]); - it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb, blkaddr, - EROFS_KMAP_ATOMIC); + xsid = vi->xattr_shared_xattrs[i]; + it->it.blkaddr = erofs_xattr_blkaddr(sb, xsid); + it->it.ofs = erofs_xattr_blkoff(sb, xsid); + it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb, + it->it.blkaddr, EROFS_KMAP_ATOMIC); if (IS_ERR(it->it.kaddr)) return PTR_ERR(it->it.kaddr); - it->it.blkaddr = blkaddr; ret = xattr_foreach(&it->it, &find_xattr_handlers, NULL); if (ret != -ENOATTR) @@ -393,7 +405,7 @@ int erofs_getxattr(struct inode *inode, int index, if (!name) return -EINVAL; - ret = init_inode_xattrs(inode); + ret = erofs_init_inode_xattrs(inode); if (ret) return ret; @@ -420,20 +432,9 @@ static int erofs_xattr_generic_get(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, const char *name, void *buffer, size_t size) { - struct erofs_sb_info *const sbi = EROFS_I_SB(inode); - - switch (handler->flags) { - case EROFS_XATTR_INDEX_USER: - if (!test_opt(&sbi->opt, XATTR_USER)) - return -EOPNOTSUPP; - break; - case EROFS_XATTR_INDEX_TRUSTED: - break; - case EROFS_XATTR_INDEX_SECURITY: - break; - default: - return -EINVAL; - } + if (handler->flags == EROFS_XATTR_INDEX_USER && + !test_opt(&EROFS_I_SB(inode)->opt, XATTR_USER)) + return -EOPNOTSUPP; return erofs_getxattr(inode, handler->flags, name, buffer, size); } @@ -562,20 +563,18 @@ static int shared_listxattr(struct listxattr_iter *it) { struct inode *const inode = d_inode(it->dentry); struct erofs_inode *const vi = EROFS_I(inode); - struct super_block *const sb = inode->i_sb; - unsigned int i; + struct super_block *const sb = it->it.sb; + unsigned int i, xsid; int ret = 0; for (i = 0; i < vi->xattr_shared_count; ++i) { - erofs_blk_t blkaddr = - xattrblock_addr(sb, vi->xattr_shared_xattrs[i]); - - it->it.ofs = xattrblock_offset(sb, vi->xattr_shared_xattrs[i]); - it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb, blkaddr, - EROFS_KMAP_ATOMIC); + xsid = vi->xattr_shared_xattrs[i]; + it->it.blkaddr = erofs_xattr_blkaddr(sb, xsid); + it->it.ofs = erofs_xattr_blkoff(sb, xsid); + it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb, + it->it.blkaddr, EROFS_KMAP_ATOMIC); if (IS_ERR(it->it.kaddr)) return PTR_ERR(it->it.kaddr); - it->it.blkaddr = blkaddr; ret = xattr_foreach(&it->it, &list_xattr_handlers, NULL); if (ret) @@ -590,7 +589,7 @@ ssize_t erofs_listxattr(struct dentry *dentry, int ret; struct listxattr_iter it; - ret = init_inode_xattrs(d_inode(dentry)); + ret = erofs_init_inode_xattrs(d_inode(dentry)); if (ret == -ENOATTR) return 0; if (ret) diff --git a/fs/erofs/xattr.h b/fs/erofs/xattr.h index f7a21aaa975575c9a5f7da9be42d800842a7aed6..a65158cba14f6afdb7345a87c0cfff3c6387b904 100644 --- a/fs/erofs/xattr.h +++ b/fs/erofs/xattr.h @@ -13,29 +13,6 @@ /* Attribute not found */ #define ENOATTR ENODATA -static inline unsigned int inlinexattr_header_size(struct inode *inode) -{ - return sizeof(struct erofs_xattr_ibody_header) + - sizeof(u32) * EROFS_I(inode)->xattr_shared_count; -} - -static inline erofs_blk_t xattrblock_addr(struct super_block *sb, - unsigned int xattr_id) -{ -#ifdef CONFIG_EROFS_FS_XATTR - return EROFS_SB(sb)->xattr_blkaddr + - xattr_id * sizeof(__u32) / sb->s_blocksize; -#else - return 0; -#endif -} - -static inline unsigned int xattrblock_offset(struct super_block *sb, - unsigned int xattr_id) -{ - return (xattr_id * sizeof(__u32)) % sb->s_blocksize; -} - #ifdef CONFIG_EROFS_FS_XATTR extern const struct xattr_handler erofs_xattr_user_handler; extern const struct xattr_handler erofs_xattr_trusted_handler; diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 6deddaef26c27e82efe3a97155aa24575a3313c6..58ef8a2840512892fc49d36c6b5e8753c1e3d064 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -817,8 +817,6 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, if (offset + cur < map->m_la || offset + cur >= map->m_la + map->m_llen) { - erofs_dbg("out-of-range map @ pos %llu", offset + cur); - if (z_erofs_collector_end(fe)) fe->backmost = false; map->m_la = offset + cur; @@ -933,9 +931,6 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, if (err) z_erofs_page_mark_eio(page); z_erofs_onlinepage_endio(page); - - erofs_dbg("%s, finish page: %pK spiltted: %u map->m_llen %llu", - __func__, page, spiltted, map->m_llen); return err; } @@ -1631,15 +1626,10 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, page = erofs_grab_cache_page_nowait(inode->i_mapping, index); if (page) { - if (PageUptodate(page)) { + if (PageUptodate(page)) unlock_page(page); - } else { - err = z_erofs_do_read_page(f, page); - if (err) - erofs_err(inode->i_sb, - "readmore error at page %lu @ nid %llu", - index, EROFS_I(inode)->nid); - } + else + (void)z_erofs_do_read_page(f, page); put_page(page); } @@ -1667,8 +1657,9 @@ static int z_erofs_readpage(struct file *file, struct page *page) /* if some compressed cluster ready, need submit them anyway */ z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, 0), false); - if (err) - erofs_err(inode->i_sb, "failed to read, err [%d]", err); + if (err && err != -EINTR) + erofs_err(inode->i_sb, "read error %d @ %lu of nid %llu", + err, page->index, EROFS_I(inode)->nid); erofs_put_metabuf(&f.map.buf); erofs_release_pages(&f.pagepool); @@ -1702,10 +1693,9 @@ static void z_erofs_readahead(struct readahead_control *rac) head = (void *)page_private(page); err = z_erofs_do_read_page(&f, page); - if (err) - erofs_err(inode->i_sb, - "readahead error at page %lu @ nid %llu", - page->index, EROFS_I(inode)->nid); + if (err && err != -EINTR) + erofs_err(inode->i_sb, "readahead error %d @ %lu of nid %llu", + err, page->index, EROFS_I(inode)->nid); put_page(page); } z_erofs_pcluster_readmore(&f, rac, false); diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 69ea1eb5af4c92134cb072f3458a5f42621f801a..989a0f226b3dc79e45198318d0c6257184e7d1c9 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -22,8 +22,8 @@ struct z_erofs_maprecorder { bool partialref; }; -static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, - unsigned long lcn) +static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m, + unsigned long lcn) { struct inode *const inode = m->inode; struct erofs_inode *const vi = EROFS_I(inode); @@ -227,8 +227,8 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, return 0; } -static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m, - unsigned long lcn, bool lookahead) +static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m, + unsigned long lcn, bool lookahead) { struct inode *const inode = m->inode; struct erofs_inode *const vi = EROFS_I(inode); @@ -278,23 +278,23 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m, return unpack_compacted_index(m, amortizedshift, pos, lookahead); } -static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m, - unsigned int lcn, bool lookahead) +static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m, + unsigned int lcn, bool lookahead) { - const unsigned int datamode = EROFS_I(m->inode)->datalayout; - - if (datamode == EROFS_INODE_COMPRESSED_FULL) - return legacy_load_cluster_from_disk(m, lcn); - - if (datamode == EROFS_INODE_COMPRESSED_COMPACT) - return compacted_load_cluster_from_disk(m, lcn, lookahead); - - return -EINVAL; + switch (EROFS_I(m->inode)->datalayout) { + case EROFS_INODE_COMPRESSED_FULL: + return z_erofs_load_full_lcluster(m, lcn); + case EROFS_INODE_COMPRESSED_COMPACT: + return z_erofs_load_compact_lcluster(m, lcn, lookahead); + default: + return -EINVAL; + } } static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, unsigned int lookback_distance) { + struct super_block *sb = m->inode->i_sb; struct erofs_inode *const vi = EROFS_I(m->inode); const unsigned int lclusterbits = vi->z_logical_clusterbits; @@ -302,21 +302,15 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, unsigned long lcn = m->lcn - lookback_distance; int err; - /* load extent head logical cluster if needed */ - err = z_erofs_load_cluster_from_disk(m, lcn, false); + err = z_erofs_load_lcluster_from_disk(m, lcn, false); if (err) return err; switch (m->type) { case Z_EROFS_LCLUSTER_TYPE_NONHEAD: - if (!m->delta[0]) { - erofs_err(m->inode->i_sb, - "invalid lookback distance 0 @ nid %llu", - vi->nid); - DBG_BUGON(1); - return -EFSCORRUPTED; - } lookback_distance = m->delta[0]; + if (!lookback_distance) + goto err_bogus; continue; case Z_EROFS_LCLUSTER_TYPE_PLAIN: case Z_EROFS_LCLUSTER_TYPE_HEAD1: @@ -325,16 +319,15 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, m->map->m_la = (lcn << lclusterbits) | m->clusterofs; return 0; default: - erofs_err(m->inode->i_sb, - "unknown type %u @ lcn %lu of nid %llu", + erofs_err(sb, "unknown type %u @ lcn %lu of nid %llu", m->type, lcn, vi->nid); DBG_BUGON(1); return -EOPNOTSUPP; } } - - erofs_err(m->inode->i_sb, "bogus lookback distance @ nid %llu", - vi->nid); +err_bogus: + erofs_err(sb, "bogus lookback distance %u @ lcn %lu of nid %llu", + lookback_distance, m->lcn, vi->nid); DBG_BUGON(1); return -EFSCORRUPTED; } @@ -366,7 +359,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, if (m->compressedblks) goto out; - err = z_erofs_load_cluster_from_disk(m, lcn, false); + err = z_erofs_load_lcluster_from_disk(m, lcn, false); if (err) return err; @@ -398,8 +391,8 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, break; fallthrough; default: - erofs_err(sb, "cannot found CBLKCNT @ lcn %lu of nid %llu", - lcn, vi->nid); + erofs_err(sb, "cannot found CBLKCNT @ lcn %lu of nid %llu", lcn, + vi->nid); DBG_BUGON(1); return -EFSCORRUPTED; } @@ -407,9 +400,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, map->m_plen = erofs_pos(sb, m->compressedblks); return 0; err_bonus_cblkcnt: - erofs_err(m->inode->i_sb, - "bogus CBLKCNT @ lcn %lu of nid %llu", - lcn, vi->nid); + erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid); DBG_BUGON(1); return -EFSCORRUPTED; } @@ -430,7 +421,7 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m) return 0; } - err = z_erofs_load_cluster_from_disk(m, lcn, true); + err = z_erofs_load_lcluster_from_disk(m, lcn, true); if (err) return err; @@ -478,7 +469,7 @@ static int z_erofs_do_map_blocks(struct inode *inode, initial_lcn = ofs >> lclusterbits; endoff = ofs & ((1 << lclusterbits) - 1); - err = z_erofs_load_cluster_from_disk(&m, initial_lcn, false); + err = z_erofs_load_lcluster_from_disk(&m, initial_lcn, false); if (err) goto unmap_out; @@ -536,8 +527,7 @@ static int z_erofs_do_map_blocks(struct inode *inode, if (flags & EROFS_GET_BLOCKS_FINDTAIL) { vi->z_tailextent_headlcn = m.lcn; /* for non-compact indexes, fragmentoff is 64 bits */ - if (fragment && - vi->datalayout == EROFS_INODE_COMPRESSED_FULL) + if (fragment && vi->datalayout == EROFS_INODE_COMPRESSED_FULL) vi->z_fragmentoff |= (u64)m.pblk << 32; } if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) { @@ -586,9 +576,6 @@ static int z_erofs_do_map_blocks(struct inode *inode, unmap_out: erofs_unmap_metabuf(&m.map->buf); - erofs_dbg("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o", - __func__, map->m_la, map->m_pa, - map->m_llen, map->m_plen, map->m_flags); return err; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index c1391cb2015212a4f6f1d182907b4f149db9519f..e11470da52a6beaaf6ef52a19c62282c538be7f6 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -271,6 +271,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask, error = filemap_fdatawait(inode->i_mapping); if (error) goto out; + truncate_inode_pages(inode->i_mapping, 0); if (new_flags & GFS2_DIF_JDATA) gfs2_ordered_del_inode(ip); } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ddda5ccea39f496de68be8b8a7919d7df5241b5b..06e8962771b02b191ba7b7cc08e51439dd34ead8 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -553,8 +553,24 @@ static inline bool blk_mq_add_to_batch(struct request *req, struct io_comp_batch *iob, int ioerror, void (*complete)(struct io_comp_batch *)) { - if (!iob || req->q->elevator || req->end_io || ioerror) + /* + * Check various conditions that exclude batch processing: + * 1) No batch container + * 2) Has scheduler data attached + * 3) Not a passthrough request and end_io set + * 4) Not a passthrough request and an ioerror + */ + if (!iob) return false; + if (req->q->elevator) + return false; + if (!blk_rq_is_passthrough(req)) { + if (req->end_io) + return false; + if (ioerror < 0) + return false; + } + if (!iob->complete) iob->complete = complete; else if (iob->complete != complete) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index e1915e3fe2db1476e987ad01bbb68e13321613d1..b2d08b9f90a9a2ceb71816996d6ee40ab37160d0 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -538,6 +538,7 @@ extern void __init hrtimers_init(void); extern void sysrq_timer_list_show(void); int hrtimers_prepare_cpu(unsigned int cpu); +int hrtimers_cpu_starting(unsigned int cpu); #ifdef CONFIG_HOTPLUG_CPU int hrtimers_cpu_dying(unsigned int cpu); #else diff --git a/include/linux/nvme.h b/include/linux/nvme.h index d1f978c0f850ccbf7cb18e3190ef15bc945da650..dcb18eaff901b6d62927111b7d19811bd41c31d4 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -120,6 +120,7 @@ enum { NVME_REG_CMBMSC = 0x0050, /* Controller Memory Buffer Memory * Space Control */ + NVME_REG_CRTO = 0x0068, /* Controller Ready Timeouts */ NVME_REG_PMRCAP = 0x0e00, /* Persistent Memory Capabilities */ NVME_REG_PMRCTL = 0x0e04, /* Persistent Memory Region Control */ NVME_REG_PMRSTS = 0x0e08, /* Persistent Memory Region Status */ @@ -144,6 +145,9 @@ enum { #define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7) #define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff) +#define NVME_CRTO_CRIMT(crto) ((crto) >> 16) +#define NVME_CRTO_CRWMT(crto) ((crto) & 0xffff) + enum { NVME_CMBSZ_SQS = 1 << 0, NVME_CMBSZ_CQS = 1 << 1, @@ -187,6 +191,7 @@ enum { NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT, NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT, NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT, + NVME_CC_CRIME = 1 << 24, NVME_CAP_CSS_NVM = 1 << 0, NVME_CAP_CSS_CSI = 1 << 6, NVME_CSTS_RDY = 1 << 0, @@ -201,6 +206,11 @@ enum { NVME_CMBMSC_CMSE = 1 << 1, }; +enum { + NVME_CAP_CRMS_CRIMS = 1ULL << 59, + NVME_CAP_CRMS_CRWMS = 1ULL << 60, +}; + struct nvme_id_power_state { __le16 max_power; /* centiwatts */ __u8 rsvd2; @@ -382,6 +392,21 @@ struct nvme_id_ns { __u8 vs[3712]; }; +/* I/O Command Set Independent Identify Namespace Data Structure */ +struct nvme_id_ns_cs_indep { + __u8 nsfeat; + __u8 nmic; + __u8 rescap; + __u8 fpi; + __le32 anagrpid; + __u8 nsattr; + __u8 rsvd9; + __le16 nvmsetid; + __le16 endgid; + __u8 nstat; + __u8 rsvd15[4081]; +}; + struct nvme_zns_lbafe { __le64 zsze; __u8 zdes; @@ -413,6 +438,7 @@ enum { NVME_ID_CNS_NS_DESC_LIST = 0x03, NVME_ID_CNS_CS_NS = 0x05, NVME_ID_CNS_CS_CTRL = 0x06, + NVME_ID_CNS_NS_CS_INDEP = 0x08, NVME_ID_CNS_NS_PRESENT_LIST = 0x10, NVME_ID_CNS_NS_PRESENT = 0x11, NVME_ID_CNS_CTRL_NS_LIST = 0x12, @@ -481,6 +507,10 @@ struct nvme_ns_id_desc { #define NVME_NIDT_UUID_LEN 16 #define NVME_NIDT_CSI_LEN 1 +enum { + NVME_NSTAT_NRDY = 1 << 0, +}; + enum { NVME_NIDT_EUI64 = 0x01, NVME_NIDT_NGUID = 0x02, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 4f62945c352e0dedc56d5afeaae994df352f9ea8..e1e40e2768b2955d4522622922c799eec2f529c7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1325,6 +1325,18 @@ static inline long ksys_ftruncate(unsigned int fd, loff_t length) return do_sys_ftruncate(fd, length, 1); } +extern int close_fd(unsigned int fd); + +/* + * In contrast to sys_close(), this stub does not check whether the syscall + * should or should not be restarted, but returns the raw error codes from + * close_fd(). + */ +static inline int ksys_close(unsigned int fd) +{ + return close_fd(fd); +} + extern long do_sys_truncate(const char __user *pathname, loff_t length); static inline long ksys_truncate(const char __user *pathname, loff_t length) diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 76bad413a9c5510d3d2b5e0354791cd580693cbd..e098b68971dee0d3a5c35b6e7d8f2918be0e2476 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -9,6 +9,7 @@ #include #include #include +#include /** * virtqueue - a queue to register buffers for sending or receiving. @@ -61,6 +62,8 @@ int virtqueue_add_sgs(struct virtqueue *vq, void *data, gfp_t gfp); +struct device *virtqueue_dma_dev(struct virtqueue *vq); + bool virtqueue_kick(struct virtqueue *vq); bool virtqueue_kick_prepare(struct virtqueue *vq); @@ -78,6 +81,8 @@ bool virtqueue_enable_cb(struct virtqueue *vq); unsigned virtqueue_enable_cb_prepare(struct virtqueue *vq); +int virtqueue_set_dma_premapped(struct virtqueue *_vq); + bool virtqueue_poll(struct virtqueue *vq, unsigned); bool virtqueue_enable_cb_delayed(struct virtqueue *vq); @@ -95,6 +100,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq); int virtqueue_resize(struct virtqueue *vq, u32 num, void (*recycle)(struct virtqueue *vq, void *buf)); +int virtqueue_reset(struct virtqueue *vq, + void (*recycle)(struct virtqueue *vq, void *buf)); /** * virtio_device - representation of a device using virtio @@ -203,4 +210,19 @@ void unregister_virtio_driver(struct virtio_driver *drv); #define module_virtio_driver(__virtio_driver) \ module_driver(__virtio_driver, register_virtio_driver, \ unregister_virtio_driver) + +dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr, size_t size, + enum dma_data_direction dir, unsigned long attrs); +void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs); +int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr); + +bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr); +void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq, dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir); +void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir); #endif /* _LINUX_VIRTIO_H */ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 27cc1d4643219a44c01a2404124cd45ef46f7f3d..a0941d85e64448b65ecdcc2cd56cbe8eba2b3a42 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -53,6 +53,7 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, unsigned int thlen = 0; unsigned int p_off = 0; unsigned int ip_proto; + u64 ret, remainder, gso_size; if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { @@ -94,6 +95,16 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset); u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16)); + if (hdr->gso_size) { + gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); + ret = div64_u64_rem(skb->len, gso_size, &remainder); + if (!(ret && (hdr->gso_size > needed) && + ((remainder > needed) || (remainder == 0)))) { + return -EINVAL; + } + skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; + } + if (!pskb_may_pull(skb, needed)) return -EINVAL; diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 9569ae84cac45e5212a3221e0d18184d6ebd1c3f..a253c42a528ab41efbbbdd11141e31ea9682d2c1 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -32,6 +32,12 @@ #define IP_VS_HDR_INVERSE 1 #define IP_VS_HDR_ICMP 2 +/* + * Hash table: for virtual service lookups + */ +#define IP_VS_SVC_TAB_BITS 8 +#define IP_VS_SVC_TAB_SIZE BIT(IP_VS_SVC_TAB_BITS) +#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) /* Generic access of ipvs struct */ static inline struct netns_ipvs *net_ipvs(struct net* net) @@ -963,6 +969,13 @@ struct netns_ipvs { */ unsigned int mixed_address_family_dests; unsigned int hooks_afmask; /* &1=AF_INET, &2=AF_INET6 */ + + /* the service mutex that protect svc_table and svc_fwm_table */ + struct mutex service_mutex; + /* the service table hashed by */ + struct hlist_head svc_table[IP_VS_SVC_TAB_SIZE]; + /* the service table hashed by fwmark */ + struct hlist_head svc_fwm_table[IP_VS_SVC_TAB_SIZE]; }; #define DEFAULT_SYNC_THRESHOLD 3 diff --git a/kernel/cpu.c b/kernel/cpu.c index 8e7d6eaffab2e7b3c52f29dfe9ce3b4631b636d3..5dcef68c99c6e5572a54977c1373d0d4ea142faf 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1671,7 +1671,7 @@ static struct cpuhp_step cpuhp_hp_states[] = { }, [CPUHP_AP_HRTIMERS_DYING] = { .name = "hrtimers:dying", - .startup.single = NULL, + .startup.single = hrtimers_cpu_starting, .teardown.single = hrtimers_cpu_dying, }, diff --git a/kernel/fork.c b/kernel/fork.c index 1c7430eb65346ddb62ecc745a40fbb969d2e9f7e..0419b5de4ea8c1ca2fab72dea58fd96970788914 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2435,10 +2435,9 @@ static __latent_entropy struct task_struct *copy_process( bad_fork_cleanup_mm: if (p->mm) { #ifdef CONFIG_ASYNC_FORK - if (p->mm->async_fork_mm) { - WARN_ON_ONCE(clone_flags & CLONE_VM); + if (p->mm->async_fork_mm && + !WARN_ON_ONCE(clone_flags & CLONE_VM)) async_fork_cpr_done(p->mm, true, false); - } #endif mm_clear_owner(p->mm, p); mmput(p->mm); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index f1ad968e0dea0f4f9bf5df809c2bb786dc3b0732..4dbcdf1ce1075fdab1a5ef8b335cdfb9a3d892f6 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1495,6 +1495,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags) } else if (flags & ENQUEUE_REPLENISH) { replenish_dl_entity(dl_se); } else if ((flags & ENQUEUE_RESTORE) && + !is_dl_boosted(dl_se) && dl_time_before(dl_se->deadline, rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) { setup_new_dl_entity(dl_se); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index a25ccb3199a5a7b526bbc56ace5572023789f84b..47ca9cdbb106424cab774abd1b11abcb41d4da30 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -2072,6 +2072,15 @@ int hrtimers_prepare_cpu(unsigned int cpu) } cpu_base->cpu = cpu; + hrtimer_cpu_base_init_expiry_lock(cpu_base); + return 0; +} + +int hrtimers_cpu_starting(unsigned int cpu) +{ + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); + + /* Clear out any left over state from a CPU down operation */ cpu_base->active_bases = 0; cpu_base->hres_active = 0; cpu_base->hang_detected = 0; @@ -2080,7 +2089,6 @@ int hrtimers_prepare_cpu(unsigned int cpu) cpu_base->expires_next = KTIME_MAX; cpu_base->softirq_expires_next = KTIME_MAX; cpu_base->online = 1; - hrtimer_cpu_base_init_expiry_lock(cpu_base); return 0; } @@ -2158,6 +2166,7 @@ int hrtimers_cpu_dying(unsigned int dying_cpu) void __init hrtimers_init(void) { hrtimers_prepare_cpu(smp_processor_id()); + hrtimers_cpu_starting(smp_processor_id()); open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq); } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 88a130db0c17ced942876cb3dba3899ecb6a8506..7a4ecf12d813a5e01714bcaa48f1c09f9d2bf98c 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -786,7 +786,7 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type) if (unlikely(is_global_init(current))) return -EPERM; - if (irqs_disabled()) { + if (!preemptible()) { /* Do an early check on signal validity. Otherwise, * the error is lost in deferred irq_work. */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 37d01e44d48377a00becfc697352a2ce554335fe..bf62be91251fb4744017a620b8a31964c6ab8ab9 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2596,23 +2596,27 @@ static int rescuer_thread(void *__rescuer) * check_flush_dependency - check for flush dependency sanity * @target_wq: workqueue being flushed * @target_work: work item being flushed (NULL for workqueue flushes) + * @from_cancel: are we called from the work cancel path * * %current is trying to flush the whole @target_wq or @target_work on it. - * If @target_wq doesn't have %WQ_MEM_RECLAIM, verify that %current is not - * reclaiming memory or running on a workqueue which doesn't have - * %WQ_MEM_RECLAIM as that can break forward-progress guarantee leading to - * a deadlock. + * If this is not the cancel path (which implies work being flushed is either + * already running, or will not be at all), check if @target_wq doesn't have + * %WQ_MEM_RECLAIM and verify that %current is not reclaiming memory or running + * on a workqueue which doesn't have %WQ_MEM_RECLAIM as that can break forward- + * progress guarantee leading to a deadlock. */ static void check_flush_dependency(struct workqueue_struct *target_wq, - struct work_struct *target_work) + struct work_struct *target_work, + bool from_cancel) { - work_func_t target_func = target_work ? target_work->func : NULL; + work_func_t target_func; struct worker *worker; - if (target_wq->flags & WQ_MEM_RECLAIM) + if (from_cancel || target_wq->flags & WQ_MEM_RECLAIM) return; worker = current_wq_worker(); + target_func = target_work ? target_work->func : NULL; WARN_ONCE(current->flags & PF_MEMALLOC, "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps", @@ -2838,7 +2842,7 @@ void flush_workqueue(struct workqueue_struct *wq) list_add_tail(&this_flusher.list, &wq->flusher_overflow); } - check_flush_dependency(wq, NULL); + check_flush_dependency(wq, NULL, false); mutex_unlock(&wq->mutex); @@ -3013,7 +3017,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, pwq = worker->current_pwq; } - check_flush_dependency(pwq->wq, work); + check_flush_dependency(pwq->wq, work, from_cancel); insert_wq_barrier(pwq, barr, work, worker); raw_spin_unlock_irq(&pool->lock); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 227f790c12ad7a40b80762af0247553e7924749b..79a25d560e4ecb79008badd65042417fa0eb5cc9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4885,6 +4885,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, struct address_space *mapping; int need_wait_lock = 0; unsigned long haddr = address & huge_page_mask(h); + bool userfault = userfaultfd_missing(vma); /* * Acquire i_mmap_rwsem before calling huge_pte_alloc and hold @@ -5029,7 +5030,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, wait_on_page_locked(page); out_unlock: - if (ret == VM_FAULT_RETRY) { + if (!userfault && (ret == VM_FAULT_RETRY)) { page = find_get_page(mapping, idx); mmap_read_unlock(mm); if (page) diff --git a/mm/mmap.c b/mm/mmap.c index 4eafb97c9861310acfedd114d19d85ac90815804..a418319b763453160ee0edbac0e328263c30de28 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -59,6 +59,17 @@ #include "internal.h" +bool __maybe_unused enable_brk_thp_aligned; + +static int __init parse_enable_brk_thp_aligned(char *str) +{ + enable_brk_thp_aligned = true; + pr_info("Enabling brk thp aligned\n"); + + return 0; +} +__setup("brk_thp_aligned", parse_enable_brk_thp_aligned); + #ifndef arch_mmap_check #define arch_mmap_check(addr, len, flags) (0) #endif @@ -198,6 +209,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) bool populate; bool downgraded = false; LIST_HEAD(uf); + unsigned long __maybe_unused newbrk_aligned, oldbrk_aligned; if (mmap_write_lock_killable(mm)) return -EINTR; @@ -232,6 +244,17 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) newbrk = PAGE_ALIGN(brk); oldbrk = PAGE_ALIGN(mm->brk); + + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && enable_brk_thp_aligned) { + newbrk_aligned = ALIGN(brk, HPAGE_SIZE); + + next = find_vma(mm, oldbrk); + if (next && next->vm_start <= oldbrk) + oldbrk_aligned = next->vm_end; + else + oldbrk_aligned = oldbrk; + } + if (oldbrk == newbrk) { mm->brk = brk; goto success; @@ -250,6 +273,9 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) * mm->brk will be restored from origbrk. */ mm->brk = brk; + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && enable_brk_thp_aligned) + oldbrk = oldbrk_aligned; + ret = __do_munmap(mm, newbrk, oldbrk-newbrk, &uf, true); if (ret < 0) { mm->brk = origbrk; @@ -260,6 +286,14 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) goto success; } + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && enable_brk_thp_aligned) { + if (newbrk <= oldbrk_aligned) { + mm->brk = brk; + goto success; + } + newbrk = newbrk_aligned; + oldbrk = oldbrk_aligned; + } /* Check against existing mmap mappings. */ next = find_vma(mm, oldbrk); if (next && newbrk + PAGE_SIZE > vm_start_gap(next)) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 63bfbb5918fef848f664cbc0490043461f67d471..e1fc6e808dcc13aca607e1a64d01ebb501182e26 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1867,6 +1867,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, chan = l2cap_chan_create(); if (!chan) { sk_free(sk); + sock->sk = NULL; return NULL; } diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 10a2c7bca7199d52570c845b54b52e9d918d6187..df0e7e412b41c259520f5e9a10abe4e2e3998f5a 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -751,6 +752,10 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff return br_dev_queue_push_xmit(net, sk, skb); } + /* Fragmentation on metadata/template dst is not supported */ + if (unlikely(!skb_valid_dst(skb))) + goto drop; + /* This is wrong! We should preserve the original fragment * boundaries by preserving frag_list rather than refragmenting. */ diff --git a/net/core/dev.c b/net/core/dev.c index 52a6c7a22dce18c0e9cfc5f37c6353b27deaf4a6..674889f29b884540b6aa411293e6d7f76506775d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3772,7 +3772,7 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) sizeof(_tcphdr), &_tcphdr); if (likely(th)) hdr_len += __tcp_hdrlen(th); - } else { + } else if (shinfo->gso_type & SKB_GSO_UDP_L4) { struct udphdr _udphdr; if (skb_header_pointer(skb, skb_transport_offset(skb), diff --git a/net/core/dst.c b/net/core/dst.c index fb3bcba87744d3f48fcfbdd229f3313c7520f196..fe7158b8a43c178accb342c6c438ebd6775fb653 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -113,9 +113,6 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) child = xdst->child; } #endif - if (!(dst->flags & DST_NOCOUNT)) - dst_entries_add(dst->ops, -1); - if (dst->ops->destroy) dst->ops->destroy(dst); if (dst->dev) @@ -166,6 +163,12 @@ void dst_dev_put(struct dst_entry *dst) } EXPORT_SYMBOL(dst_dev_put); +static void dst_count_dec(struct dst_entry *dst) +{ + if (!(dst->flags & DST_NOCOUNT)) + dst_entries_add(dst->ops, -1); +} + void dst_release(struct dst_entry *dst) { if (dst) { @@ -175,8 +178,10 @@ void dst_release(struct dst_entry *dst) if (WARN_ONCE(newrefcnt < 0, "dst_release underflow")) net_warn_ratelimited("%s: dst:%p refcnt:%d\n", __func__, dst, newrefcnt); - if (!newrefcnt) + if (!newrefcnt){ + dst_count_dec(dst); call_rcu(&dst->rcu_head, dst_destroy_rcu); + } } } EXPORT_SYMBOL(dst_release); @@ -190,8 +195,10 @@ void dst_release_immediate(struct dst_entry *dst) if (WARN_ONCE(newrefcnt < 0, "dst_release_immediate underflow")) net_warn_ratelimited("%s: dst:%p refcnt:%d\n", __func__, dst, newrefcnt); - if (!newrefcnt) + if (!newrefcnt){ + dst_count_dec(dst); dst_destroy(dst); + } } } EXPORT_SYMBOL(dst_release_immediate); diff --git a/net/core/filter.c b/net/core/filter.c index f0c7b53877db3e5237b83f1710ecc2aea0148acf..68eabcba341605e97656fcc3404294b6f10400a3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -10045,6 +10045,7 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, bool is_sockarray = map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY; struct sock_reuseport *reuse; struct sock *selected_sk; + int err; selected_sk = map->ops->map_lookup_elem(map, key); if (!selected_sk) @@ -10052,10 +10053,6 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, reuse = rcu_dereference(selected_sk->sk_reuseport_cb); if (!reuse) { - /* Lookup in sock_map can return TCP ESTABLISHED sockets. */ - if (sk_is_refcounted(selected_sk)) - sock_put(selected_sk); - /* reuseport_array has only sk with non NULL sk_reuseport_cb. * The only (!reuse) case here is - the sk has already been * unhashed (e.g. by close()), so treat it as -ENOENT. @@ -10063,24 +10060,33 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, * Other maps (e.g. sock_map) do not provide this guarantee and * the sk may never be in the reuseport group to begin with. */ - return is_sockarray ? -ENOENT : -EINVAL; + err = is_sockarray ? -ENOENT : -EINVAL; + goto error; } if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) { struct sock *sk = reuse_kern->sk; - if (sk->sk_protocol != selected_sk->sk_protocol) - return -EPROTOTYPE; - else if (sk->sk_family != selected_sk->sk_family) - return -EAFNOSUPPORT; - - /* Catch all. Likely bound to a different sockaddr. */ - return -EBADFD; + if (sk->sk_protocol != selected_sk->sk_protocol) { + err = -EPROTOTYPE; + } else if (sk->sk_family != selected_sk->sk_family) { + err = -EAFNOSUPPORT; + } else { + /* Catch all. Likely bound to a different sockaddr. */ + err = -EBADFD; + } + goto error; } reuse_kern->selected_sk = selected_sk; return 0; +error: + /* Lookup in sock_map can return TCP ESTABLISHED sockets. */ + if (sk_is_refcounted(selected_sk)) + sock_put(selected_sk); + + return err; } static const struct bpf_func_proto sk_select_reuseport_proto = { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ab0acb726146c2b5389c9e901fc22949a9499fec..50fe8d470a24c80a93cc8635b0c094e89f20dbc5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1858,7 +1858,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 }, [IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 }, [IFLA_GSO_MAX_SEGS] = { .type = NLA_U32 }, - [IFLA_GSO_MAX_SIZE] = { .type = NLA_U32 }, + [IFLA_GSO_MAX_SIZE] = NLA_POLICY_MIN(NLA_U32, MAX_TCP_HEADER + 1), [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, [IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */ [IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f9ef6a00fdf4b71601037734e18c7ffa9b2f3efb..ab204b35dbfefa5deadee2b6e13d3a80b36f1786 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5323,11 +5323,11 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->offload_fwd_mark = 0; skb->offload_l3_fwd_mark = 0; #endif + ipvs_reset(skb); if (!xnet) return; - ipvs_reset(skb); skb->mark = 0; skb->tstamp = 0; } diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 206a3b5904ec228a544ba3a93b07486dacf0dad6..3881871ee0886ab24d44e3815a9cbfbffc12a7d1 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -941,9 +941,9 @@ static void sk_psock_strp_data_ready(struct sock *sk) if (tls_sw_has_ctx_rx(sk)) { psock->parser.saved_data_ready(sk); } else { - write_lock_bh(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); strp_data_ready(&psock->parser.strp); - write_unlock_bh(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } } rcu_read_unlock(); diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 35469745ad796298d84708025c816cacae8a0ce4..09ddda3706673ec17b9f2e54e557277a0934559b 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -421,12 +421,11 @@ static void *sock_map_lookup_sys(struct bpf_map *map, void *key) static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test, struct sock **psk) { - struct sock *sk; + struct sock *sk = NULL; int err = 0; raw_spin_lock_bh(&stab->lock); - sk = *psk; - if (!sk_test || sk_test == sk) + if (!sk_test || sk_test == *psk) sk = xchg(psk, NULL); if (likely(sk)) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 2be5c69824f94423e2f90b97938dd623a4fea9c7..4517175c992f8575eec9fa2cff6aaf5c316135dc 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -604,7 +604,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) by tcp. Feel free to propose better solution. --ANK (980728) */ - if (np->rxopt.all) + if (np->rxopt.all && sk->sk_state != DCCP_LISTEN) opt_skb = skb_clone(skb, GFP_ATOMIC); if (sk->sk_state == DCCP_OPEN) { /* Fast path */ diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index c25f7617770c8e0e9693c6116cdec0573422a1b3..eebbb0daad9095995eddecb5a0d128fcce5a8521 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -1037,19 +1037,21 @@ static int ieee802154_create(struct net *net, struct socket *sock, if (sk->sk_prot->hash) { rc = sk->sk_prot->hash(sk); - if (rc) { - sk_common_release(sk); - goto out; - } + if (rc) + goto out_sk_release; } if (sk->sk_prot->init) { rc = sk->sk_prot->init(sk); if (rc) - sk_common_release(sk); + goto out_sk_release; } out: return rc; +out_sk_release: + sk_common_release(sk); + sock->sk = NULL; + goto out; } static const struct net_proto_family ieee802154_family_ops = { diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index e5f69b0bf3df551e28344ddd1c5068069c823cd1..0ee742d1379ec0bc19d093811a573ea210d6df94 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -48,7 +48,7 @@ struct fou_net { static inline struct fou *fou_from_sock(struct sock *sk) { - return sk->sk_user_data; + return rcu_dereference_sk_user_data(sk); } static int fou_recv_pull(struct sk_buff *skb, struct fou *fou, size_t len) @@ -230,10 +230,16 @@ static struct sk_buff *fou_gro_receive(struct sock *sk, struct list_head *head, struct sk_buff *skb) { - u8 proto = fou_from_sock(sk)->protocol; - const struct net_offload **offloads; + const struct net_offload __rcu **offloads; + struct fou *fou = fou_from_sock(sk); const struct net_offload *ops; struct sk_buff *pp = NULL; + u8 proto; + + if (!fou) + goto out_unlock; + + proto = fou->protocol; /* We can clear the encap_mark for FOU as we are essentially doing * one of two possible things. We are either adding an L4 tunnel @@ -263,16 +269,26 @@ static struct sk_buff *fou_gro_receive(struct sock *sk, static int fou_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) { + const struct net_offload __rcu **offloads; + struct fou *fou = fou_from_sock(sk); const struct net_offload *ops; - u8 proto = fou_from_sock(sk)->protocol; - int err = -ENOSYS; - const struct net_offload **offloads; + u8 proto; + int err; + + if (!fou) { + err = -ENOENT; + goto out_unlock; + } + + proto = fou->protocol; rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); - if (WARN_ON(!ops || !ops->callbacks.gro_complete)) + if (WARN_ON(!ops || !ops->callbacks.gro_complete)) { + err = -ENOSYS; goto out_unlock; + } err = ops->callbacks.gro_complete(skb, nhoff); @@ -324,6 +340,9 @@ static struct sk_buff *gue_gro_receive(struct sock *sk, struct gro_remcsum grc; u8 proto; + if (!fou) + goto out; + skb_gro_remcsum_init(&grc); off = skb_gro_offset(skb); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 542362fc8407a3fa76fe75324fa551fd5f6e87bb..43d82e8310b1369a4ca0eb0e55170d9fae3e8676 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -218,7 +218,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, struct ip_tunnel *t = NULL; struct hlist_head *head = ip_bucket(itn, parms); - hlist_for_each_entry_rcu(t, head, hash_node) { + hlist_for_each_entry_rcu(t, head, hash_node, lockdep_rtnl_is_held()) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && link == t->parms.link && diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c index 6cc5743c553a02fd82ee97fab94f36019b6a1f7c..9a21175693db58906db43a8d1393a4da936aa9fe 100644 --- a/net/ipv4/netfilter/nf_dup_ipv4.c +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -52,8 +52,9 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, { struct iphdr *iph; + local_bh_disable(); if (this_cpu_read(nf_skb_duplicated)) - return; + goto out; /* * Copy the skb, and route the copy. Will later return %XT_CONTINUE for * the original skb, which should continue on its way as if nothing has @@ -61,7 +62,7 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, */ skb = pskb_copy(skb, GFP_ATOMIC); if (skb == NULL) - return; + goto out; #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Avoid counting cloned packets towards the original connection. */ @@ -90,6 +91,8 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, } else { kfree_skb(skb); } +out: + local_bh_enable(); } EXPORT_SYMBOL_GPL(nf_dup_ipv4); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 2a17dc9413ae95e8310371d2ba3b26d102220c89..67829ff8f92106fee00878550d89584d811c8d32 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -210,9 +210,10 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg) p = nla_data(nla); for (i = 0; i < nhg->num_nh; ++i) { - p->id = nhg->nh_entries[i].nh->id; - p->weight = nhg->nh_entries[i].weight - 1; - p += 1; + *p++ = (struct nexthop_grp) { + .id = nhg->nh_entries[i].nh->id, + .weight = nhg->nh_entries[i].weight - 1, + }; } return 0; diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 79f705450c162832e73f22a00a57916e6eddf8cd..be2c97e907ae20e58132b2e223cc3eabfa3f1219 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -55,7 +55,18 @@ struct dctcp { }; static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */ -module_param(dctcp_shift_g, uint, 0644); + +static int dctcp_shift_g_set(const char *val, const struct kernel_param *kp) +{ + return param_set_uint_minmax(val, kp, 0, 10); +} + +static const struct kernel_param_ops dctcp_shift_g_ops = { + .set = dctcp_shift_g_set, + .get = param_get_uint, +}; + +module_param_cb(dctcp_shift_g, &dctcp_shift_g_ops, &dctcp_shift_g, 0644); MODULE_PARM_DESC(dctcp_shift_g, "parameter g for updating dctcp_alpha"); static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1910f4aca3f8a4e55c57b4d7bc872b3d93c9b1b7..a4ca7bb012d64e1ed9dca968cca7309e1d7dbbc8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2308,7 +2308,7 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len) if (len <= skb->len) break; - if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb)) + if (tcp_has_tx_tstamp(skb) || !tcp_skb_can_collapse(skb, next)) return false; len -= skb->len; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index be1bdc33089dc92b86f3f10961b8053e019b0170..16c12a8a608151be2f5cdf9f218e4fc1eef37933 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -10,6 +10,7 @@ #include #include #include +#include static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features, @@ -269,13 +270,38 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, __sum16 check; __be16 newlen; - if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) - return __udp_gso_segment_list(gso_skb, features, is_ipv6); - mss = skb_shinfo(gso_skb)->gso_size; if (gso_skb->len <= sizeof(*uh) + mss) return ERR_PTR(-EINVAL); + if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) { + /* Packet is from an untrusted source, reset gso_segs. */ + skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh), + mss); + return NULL; + } + + if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) { + /* Detect modified geometry and pass those to skb_segment. */ + if (skb_pagelen(gso_skb) - sizeof(*uh) == skb_shinfo(gso_skb)->gso_size) + return __udp_gso_segment_list(gso_skb, features, is_ipv6); + + /* Setup csum, as fraglist skips this in udp4_gro_receive. */ + gso_skb->csum_start = skb_transport_header(gso_skb) - gso_skb->head; + gso_skb->csum_offset = offsetof(struct udphdr, check); + gso_skb->ip_summed = CHECKSUM_PARTIAL; + + uh = udp_hdr(gso_skb); + if (is_ipv6) + uh->check = ~udp_v6_check(gso_skb->len, + &ipv6_hdr(gso_skb)->saddr, + &ipv6_hdr(gso_skb)->daddr, 0); + else + uh->check = ~udp_v4_check(gso_skb->len, + ip_hdr(gso_skb)->saddr, + ip_hdr(gso_skb)->daddr, 0); + } + skb_pull(gso_skb, sizeof(*uh)); /* clear destructor to avoid skb_segment assigning it to tail */ @@ -383,8 +409,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && - !skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) return __udp_gso_segment(skb, features, false); mss = skb_shinfo(skb)->gso_size; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 18773a0c9aca1a7ee432466fd4aec995f547c7a8..00f337bd74ebeb6e62c51618baf32730c60867f1 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -257,31 +257,29 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, */ inet->inet_sport = htons(inet->inet_num); err = sk->sk_prot->hash(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } if (!kern) { err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } out: return err; out_rcu_unlock: rcu_read_unlock(); goto out; +out_sk_release: + sk_common_release(sk); + sock->sk = NULL; + goto out; } static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 3a2741569b84764797021a975345db6a687e1fc7..ac963ad946449fb45bebeb53e53a727a3e16a819 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -830,8 +830,8 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, bool log_ecn_err) { struct pcpu_sw_netstats *tstats; - const struct ipv6hdr *ipv6h = ipv6_hdr(skb); - int err; + const struct ipv6hdr *ipv6h; + int nh, err; if ((!(tpi->flags & TUNNEL_CSUM) && (tunnel->parms.i_flags & TUNNEL_CSUM)) || @@ -863,14 +863,29 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, goto drop; } - ipv6h = ipv6_hdr(skb); skb->protocol = eth_type_trans(skb, tunnel->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } else { skb->dev = tunnel->dev; } + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_reset_network_header(skb); + + if (!pskb_inet_may_pull(skb)) { + tunnel->dev->stats.rx_length_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + + /* Get the outer header. */ + ipv6h = (struct ipv6hdr *)(skb->head + nh); + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index a0a2de30be3e7b6fa9aa34dcc6a918e566713e07..0c39c77fe8a8a4c7589cdd9e6b7fb78e6f0ef88b 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -47,11 +47,12 @@ static bool nf_dup_ipv6_route(struct net *net, struct sk_buff *skb, void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, const struct in6_addr *gw, int oif) { + local_bh_disable(); if (this_cpu_read(nf_skb_duplicated)) - return; + goto out; skb = pskb_copy(skb, GFP_ATOMIC); if (skb == NULL) - return; + goto out; #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_reset_ct(skb); @@ -69,6 +70,8 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, } else { kfree_skb(skb); } +out: + local_bh_enable(); } EXPORT_SYMBOL_GPL(nf_dup_ipv6); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 16ea7b58d6ae19e2d9fd77656e9faefcfadd097f..1d53ed30b8a51b349aec370ef90f5df941d7f69a 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -134,13 +134,13 @@ static int nf_reject6_fill_skb_dst(struct sk_buff *skb_in) void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) { struct net_device *br_indev __maybe_unused; - struct sk_buff *nskb; - struct tcphdr _otcph; - const struct tcphdr *otcph; - unsigned int otcplen, hh_len; const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); struct ipv6hdr *ip6h; struct dst_entry *dst = NULL; + const struct tcphdr *otcph; + struct sk_buff *nskb; + struct tcphdr _otcph; + unsigned int otcplen; struct flowi6 fl6; if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || @@ -179,9 +179,8 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) if (IS_ERR(dst)) return; - hh_len = (dst->dev->hard_header_len + 15)&~15; - nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) + dst->trailer_len, + nskb = alloc_skb(LL_MAX_HEADER + sizeof(struct ipv6hdr) + + sizeof(struct tcphdr) + dst->trailer_len, GFP_ATOMIC); if (!nskb) { @@ -194,7 +193,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) nskb->mark = fl6.flowi6_mark; - skb_reserve(nskb, hh_len + dst->header_len); + skb_reserve(nskb, LL_MAX_HEADER); ip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, ip6_dst_hoplimit(dst)); nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d1ef8d1ec0024af8cdfd56437ec75f4c6a69fadb..71dd9ef797591422d41fcfbc8e68b25ae2822d74 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -378,6 +378,7 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, struct inet6_dev *idev = rt->rt6i_idev; struct net_device *loopback_dev = dev_net(dev)->loopback_dev; + struct fib6_info *from; if (idev && idev->dev != loopback_dev) { struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev); @@ -386,6 +387,8 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, in6_dev_put(idev); } } + from = xchg((__force struct fib6_info **)&rt->from, NULL); + fib6_info_release(from); } static bool __rt6_check_expired(const struct rt6_info *rt) @@ -1444,7 +1447,6 @@ static DEFINE_SPINLOCK(rt6_exception_lock); static void rt6_remove_exception(struct rt6_exception_bucket *bucket, struct rt6_exception *rt6_ex) { - struct fib6_info *from; struct net *net; if (!bucket || !rt6_ex) @@ -1456,8 +1458,6 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket, /* purge completely the exception to allow releasing the held resources: * some [sk] cache may keep the dst around for unlimited time */ - from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL); - fib6_info_release(from); dst_dev_put(&rt6_ex->rt6i->dst); hlist_del_rcu(&rt6_ex->hlist); @@ -2647,10 +2647,10 @@ static void ip6_negative_advice(struct sock *sk, if (rt->rt6i_flags & RTF_CACHE) { rcu_read_lock(); if (rt6_check_expired(rt)) { - /* counteract the dst_release() in sk_dst_reset() */ - dst_hold(dst); + /* rt/dst can not be destroyed yet, + * because of rcu_read_lock() + */ sk_dst_reset(sk); - rt6_remove_exception_rt(rt); } rcu_read_unlock(); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ebe07c09658728c82a9f3b3b95e1c8674d4fc4ce..258c5ca39edaa9f19912a61a880dbba5e7ac214f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1481,7 +1481,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) by tcp. Feel free to propose better solution. --ANK (980728) */ - if (np->rxopt.all) + if (np->rxopt.all && sk->sk_state != TCP_LISTEN) opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ @@ -1515,8 +1515,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (nsk != sk) { if (tcp_child_process(sk, nsk, skb)) goto reset; - if (opt_skb) - __kfree_skb(opt_skb); return 0; } } else diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index fc4337b41eceea785116a0a8f8d966ead61da281..7752e1e921f8f84e0ae26659bab710fcaa6fea5e 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -45,8 +45,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && - !skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) return __udp_gso_segment(skb, features, true); /* Do software UFO. Complete and fill in the UDP checksum as HW cannot diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 31edba8d49a7d186a6dc2e484ade0455d56c9977..cdd61b82eec9501e145d56efe6ca64c4f0d44012 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -390,6 +390,12 @@ static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk) mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN; } +static inline bool tcp_can_send_ack(const struct sock *ssk) +{ + return !((1 << inet_sk_state_load(ssk)) & + (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE | TCPF_LISTEN)); +} + static void mptcp_check_data_fin(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -1459,8 +1465,9 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) ssk = mptcp_subflow_tcp_sock(subflow); slow = lock_sock_fast(ssk); WRITE_ONCE(ssk->sk_rcvbuf, rcvbuf); - tcp_sk(ssk)->window_clamp = window_clamp; - tcp_cleanup_rbuf(ssk, 1); + WRITE_ONCE(tcp_sk(ssk)->window_clamp, window_clamp); + if (tcp_can_send_ack(ssk)) + tcp_cleanup_rbuf(ssk, 1); unlock_sock_fast(ssk, slow); } } diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index e4fa00abde6a2ab9941c8cd7d0b8a12f537503e0..5988b9bb9029dc1ac6e3c88d79c9d74442e00ac4 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -163,11 +163,8 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; - if (ip > ip_to) { + if (ip > ip_to) swap(ip, ip_to); - if (ip < map->first_ip) - return -IPSET_ERR_BITMAP_RANGE; - } } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); @@ -178,7 +175,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], ip_to = ip; } - if (ip_to > map->last_ip) + if (ip < map->first_ip || ip_to > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; for (; !before(ip_to, ip); ip += map->hosts) { diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 2b19189a930fd552cc859aed02eade8698bd9061..e15432c3fb2a86e3ffac1a7c0358e86e40812eb9 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -101,14 +101,19 @@ find_set_type(const char *name, u8 family, u8 revision) static bool load_settype(const char *name) { + if (!try_module_get(THIS_MODULE)) + return false; + nfnl_unlock(NFNL_SUBSYS_IPSET); pr_debug("try to load ip_set_%s\n", name); if (request_module("ip_set_%s", name) < 0) { pr_warn("Can't find ip_set type %s\n", name); nfnl_lock(NFNL_SUBSYS_IPSET); + module_put(THIS_MODULE); return false; } nfnl_lock(NFNL_SUBSYS_IPSET); + module_put(THIS_MODULE); return true; } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index e62b40bd349e22910e19a2241afeb97aa5ef97c7..bd1ac540deebcc7f6709cab6f065297b274de519 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -49,7 +49,7 @@ #include /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ -static DEFINE_MUTEX(__ip_vs_mutex); +static struct lock_class_key __ipvs_service_key; /* sysctl variables */ @@ -253,17 +253,6 @@ ip_vs_use_count_dec(void) } -/* - * Hash table: for virtual service lookups - */ -#define IP_VS_SVC_TAB_BITS 8 -#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS) -#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) - -/* the service table hashed by */ -static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; -/* the service table hashed by fwmark */ -static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; /* @@ -298,8 +287,8 @@ static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 } /* - * Hashes a service in the ip_vs_svc_table by - * or in the ip_vs_svc_fwm_table by fwmark. + * Hashes a service in the svc_table by + * or in the svc_fwm_table by fwmark. * Should be called with locked tables. */ static int ip_vs_svc_hash(struct ip_vs_service *svc) @@ -314,17 +303,17 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) if (svc->fwmark == 0) { /* - * Hash it by in ip_vs_svc_table + * Hash it by in svc_table */ hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol, &svc->addr, svc->port); - hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]); + hlist_add_head_rcu(&svc->s_list, &svc->ipvs->svc_table[hash]); } else { /* * Hash it by fwmark in svc_fwm_table */ hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark); - hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]); + hlist_add_head_rcu(&svc->f_list, &svc->ipvs->svc_fwm_table[hash]); } svc->flags |= IP_VS_SVC_F_HASHED; @@ -373,12 +362,9 @@ __ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol, /* Check for "full" addressed entries */ hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport); - hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) { - if ((svc->af == af) - && ip_vs_addr_equal(af, &svc->addr, vaddr) - && (svc->port == vport) - && (svc->protocol == protocol) - && (svc->ipvs == ipvs)) { + hlist_for_each_entry_rcu(svc, &ipvs->svc_table[hash], s_list) { + if (svc->af == af && ip_vs_addr_equal(af, &svc->addr, vaddr) && + svc->port == vport && svc->protocol == protocol) { /* HIT */ return svc; } @@ -400,9 +386,8 @@ __ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark) /* Check for fwmark addressed entries */ hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark); - hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) { - if (svc->fwmark == fwmark && svc->af == af - && (svc->ipvs == ipvs)) { + hlist_for_each_entry_rcu(svc, &ipvs->svc_fwm_table[hash], f_list) { + if (svc->fwmark == fwmark && svc->af == af) { /* HIT */ return svc; } @@ -1621,10 +1606,9 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) * Flush the service table hashed by */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx], + hlist_for_each_entry_safe(svc, n, &ipvs->svc_table[idx], s_list) { - if (svc->ipvs == ipvs) - ip_vs_unlink_service(svc, cleanup); + ip_vs_unlink_service(svc, cleanup); } } @@ -1632,10 +1616,9 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) * Flush the service table hashed by fwmark */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx], + hlist_for_each_entry_safe(svc, n, &ipvs->svc_fwm_table[idx], f_list) { - if (svc->ipvs == ipvs) - ip_vs_unlink_service(svc, cleanup); + ip_vs_unlink_service(svc, cleanup); } } @@ -1653,12 +1636,12 @@ void ip_vs_service_nets_cleanup(struct list_head *net_list) EnterFunction(2); /* Check for "full" addressed entries */ - mutex_lock(&__ip_vs_mutex); list_for_each_entry(net, net_list, exit_list) { ipvs = net_ipvs(net); + mutex_lock(&ipvs->service_mutex); ip_vs_flush(ipvs, true); + mutex_unlock(&ipvs->service_mutex); } - mutex_unlock(&__ip_vs_mutex); LeaveFunction(2); } @@ -1698,25 +1681,20 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, return NOTIFY_DONE; IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); EnterFunction(2); - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { - if (svc->ipvs == ipvs) { - list_for_each_entry(dest, &svc->destinations, - n_list) { - ip_vs_forget_dev(dest, dev); - } + hlist_for_each_entry(svc, &ipvs->svc_table[idx], s_list) { + list_for_each_entry(dest, &svc->destinations, + n_list) { + ip_vs_forget_dev(dest, dev); } } - hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { - if (svc->ipvs == ipvs) { - list_for_each_entry(dest, &svc->destinations, - n_list) { - ip_vs_forget_dev(dest, dev); - } + hlist_for_each_entry(svc, &ipvs->svc_fwm_table[idx], f_list) { + list_for_each_entry(dest, &svc->destinations, + n_list) { + ip_vs_forget_dev(dest, dev); } - } } @@ -1725,7 +1703,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, ip_vs_forget_dev(dest, dev); } spin_unlock_bh(&ipvs->dest_trash_lock); - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); LeaveFunction(2); return NOTIFY_DONE; } @@ -1750,16 +1728,14 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs) struct ip_vs_service *svc; for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { - if (svc->ipvs == ipvs) - ip_vs_zero_service(svc); + hlist_for_each_entry(svc, &ipvs->svc_table[idx], s_list) { + ip_vs_zero_service(svc); } } for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { - if (svc->ipvs == ipvs) - ip_vs_zero_service(svc); + hlist_for_each_entry(svc, &ipvs->svc_fwm_table[idx], f_list) { + ip_vs_zero_service(svc); } } @@ -2075,9 +2051,9 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* look in hash by protocol */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) { - if ((svc->ipvs == ipvs) && pos-- == 0) { - iter->table = ip_vs_svc_table; + hlist_for_each_entry_rcu(svc, &ipvs->svc_table[idx], s_list) { + if (pos-- == 0) { + iter->table = ipvs->svc_table; iter->bucket = idx; return svc; } @@ -2086,10 +2062,10 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* keep looking in fwmark */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx], + hlist_for_each_entry_rcu(svc, &ipvs->svc_fwm_table[idx], f_list) { - if ((svc->ipvs == ipvs) && pos-- == 0) { - iter->table = ip_vs_svc_fwm_table; + if (pos-- == 0) { + iter->table = ipvs->svc_fwm_table; iter->bucket = idx; return svc; } @@ -2112,6 +2088,8 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct hlist_node *e; struct ip_vs_iter *iter; struct ip_vs_service *svc; + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); ++*pos; if (v == SEQ_START_TOKEN) @@ -2120,7 +2098,7 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) svc = v; iter = seq->private; - if (iter->table == ip_vs_svc_table) { + if (iter->table == ipvs->svc_table) { /* next service in table hashed by protocol */ e = rcu_dereference(hlist_next_rcu(&svc->s_list)); if (e) @@ -2128,13 +2106,13 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { hlist_for_each_entry_rcu(svc, - &ip_vs_svc_table[iter->bucket], + &ipvs->svc_table[iter->bucket], s_list) { return svc; } } - iter->table = ip_vs_svc_fwm_table; + iter->table = ipvs->svc_fwm_table; iter->bucket = -1; goto scan_fwmark; } @@ -2147,7 +2125,7 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) scan_fwmark: while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { hlist_for_each_entry_rcu(svc, - &ip_vs_svc_fwm_table[iter->bucket], + &ipvs->svc_fwm_table[iter->bucket], f_list) return svc; } @@ -2183,7 +2161,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) if (svc->ipvs != ipvs) return 0; - if (iter->table == ip_vs_svc_table) { + if (iter->table == ipvs->svc_table) { #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) seq_printf(seq, "%s [%pI6]:%04X %s ", @@ -2505,7 +2483,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len) return ret; } - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); if (cmd == IP_VS_SO_SET_FLUSH) { /* Flush the virtual service */ ret = ip_vs_flush(ipvs, false); @@ -2597,7 +2575,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len) } out_unlock: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); return ret; } @@ -2635,9 +2613,9 @@ __ip_vs_get_service_entries(struct netns_ipvs *ipvs, int ret = 0; for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + hlist_for_each_entry(svc, &ipvs->svc_table[idx], s_list) { /* Only expose IPv4 entries to old interface */ - if (svc->af != AF_INET || (svc->ipvs != ipvs)) + if (svc->af != AF_INET) continue; if (count >= get->num_services) @@ -2654,9 +2632,9 @@ __ip_vs_get_service_entries(struct netns_ipvs *ipvs, } for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry(svc, &ipvs->svc_fwm_table[idx], f_list) { /* Only expose IPv4 entries to old interface */ - if (svc->af != AF_INET || (svc->ipvs != ipvs)) + if (svc->af != AF_INET) continue; if (count >= get->num_services) @@ -2825,7 +2803,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); switch (cmd) { case IP_VS_SO_GET_VERSION: { @@ -2924,7 +2902,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) } out: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); return ret; } @@ -3159,10 +3137,10 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, struct net *net = sock_net(skb->sk); struct netns_ipvs *ipvs = net_ipvs(net); - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { - hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { - if (++idx <= start || (svc->ipvs != ipvs)) + hlist_for_each_entry(svc, &ipvs->svc_table[i], s_list) { + if (++idx <= start) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { idx--; @@ -3172,8 +3150,8 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, } for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { - hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { - if (++idx <= start || (svc->ipvs != ipvs)) + hlist_for_each_entry(svc, &ipvs->svc_fwm_table[i], f_list) { + if (++idx <= start) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { idx--; @@ -3183,7 +3161,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, } nla_put_failure: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); cb->args[0] = idx; return skb->len; @@ -3372,7 +3350,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, struct net *net = sock_net(skb->sk); struct netns_ipvs *ipvs = net_ipvs(net); - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); /* Try to find the service for which to dump destinations */ if (nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy, cb->extack)) @@ -3397,7 +3375,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, cb->args[0] = idx; out_err: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); return skb->len; } @@ -3683,7 +3661,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) cmd = info->genlhdr->cmd; - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); if (cmd == IPVS_CMD_FLUSH) { ret = ip_vs_flush(ipvs, false); @@ -3795,7 +3773,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) } out: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); return ret; } @@ -3825,7 +3803,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd); if (reply == NULL) @@ -3893,7 +3871,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) out_err: nlmsg_free(msg); out: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); return ret; } @@ -4009,6 +3987,7 @@ static struct genl_family ip_vs_genl_family __ro_after_init = { .module = THIS_MODULE, .small_ops = ip_vs_genl_ops, .n_small_ops = ARRAY_SIZE(ip_vs_genl_ops), + .parallel_ops = 1, }; static int __init ip_vs_genl_register(void) @@ -4156,6 +4135,13 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) { int i, idx; + /* Initialize service_mutex, svc_table, svc_fwm_table per netns */ + __mutex_init(&ipvs->service_mutex, "ipvs->service_mutex", &__ipvs_service_key); + for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { + INIT_HLIST_HEAD(&ipvs->svc_table[idx]); + INIT_HLIST_HEAD(&ipvs->svc_fwm_table[idx]); + } + /* Initialize rs_table */ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) INIT_HLIST_HEAD(&ipvs->rs_table[idx]); @@ -4257,19 +4243,10 @@ void ip_vs_unregister_nl_ioctl(void) int __init ip_vs_control_init(void) { - int idx; int ret; EnterFunction(2); - /* Initialize svc_table, ip_vs_svc_fwm_table */ - for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - INIT_HLIST_HEAD(&ip_vs_svc_table[idx]); - INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]); - } - - smp_wmb(); /* Do we really need it now ? */ - ret = register_netdevice_notifier(&ip_vs_dst_notifier); if (ret < 0) return ret; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index bd98783f0265c751cc2bf6615860458526765a3b..a71ceedb2c0bee71d80ff320e3093a767629c46c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2549,7 +2549,7 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family, { const struct nft_expr_type *type, *candidate = NULL; - list_for_each_entry(type, &nf_tables_expressions, list) { + list_for_each_entry_rcu(type, &nf_tables_expressions, list) { if (!nla_strcmp(nla, type->name)) { if (!type->family && !candidate) candidate = type; @@ -2581,9 +2581,13 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net, if (nla == NULL) return ERR_PTR(-EINVAL); + rcu_read_lock(); type = __nft_expr_type_get(family, nla); - if (type != NULL && try_module_get(type->owner)) + if (type != NULL && try_module_get(type->owner)) { + rcu_read_unlock(); return type; + } + rcu_read_unlock(); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES @@ -4251,6 +4255,9 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) == (NFT_SET_EVAL | NFT_SET_OBJECT)) return -EOPNOTSUPP; + if ((flags & (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT | NFT_SET_EVAL)) == + (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT)) + return -EOPNOTSUPP; } dtype = 0; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index da113a881322d2c7f61337687d955c8b41e962e2..7192ca57230e03275e2aa8f001c5ca80f308e874 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -627,6 +627,9 @@ static void nft_payload_set_eval(const struct nft_expr *expr, if ((priv->csum_type == NFT_PAYLOAD_CSUM_INET || priv->csum_flags) && (priv->base != NFT_PAYLOAD_TRANSPORT_HEADER || skb->ip_summed != CHECKSUM_PARTIAL)) { + if (offset + priv->len > skb->len) + goto err; + fsum = skb_checksum(skb, offset, priv->len, 0); tsum = csum_partial(src, priv->len, 0); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 69bebb33f86c681a8aea5997cbde42c38bbf9f82..4092d0c11ef19cadfdc8475eea8174fbc3722741 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -385,15 +385,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) static void netlink_sock_destruct(struct sock *sk) { - struct netlink_sock *nlk = nlk_sk(sk); - - if (nlk->cb_running) { - if (nlk->cb.done) - nlk->cb.done(&nlk->cb); - module_put(nlk->cb.module); - kfree_skb(nlk->cb.skb); - } - skb_queue_purge(&sk->sk_receive_queue); if (!sock_flag(sk, SOCK_DEAD)) { @@ -406,14 +397,6 @@ static void netlink_sock_destruct(struct sock *sk) WARN_ON(nlk_sk(sk)->groups); } -static void netlink_sock_destruct_work(struct work_struct *work) -{ - struct netlink_sock *nlk = container_of(work, struct netlink_sock, - work); - - sk_free(&nlk->sk); -} - /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on * SMP. Look, when several writers sleep and reader wakes them up, all but one * immediately hit write lock and grab all the cpus. Exclusive sleep solves @@ -728,12 +711,6 @@ static void deferred_put_nlk_sk(struct rcu_head *head) if (!refcount_dec_and_test(&sk->sk_refcnt)) return; - if (nlk->cb_running && nlk->cb.done) { - INIT_WORK(&nlk->work, netlink_sock_destruct_work); - schedule_work(&nlk->work); - return; - } - sk_free(sk); } @@ -783,6 +760,14 @@ static int netlink_release(struct socket *sock) NETLINK_URELEASE, &n); } + /* Terminate any outstanding dump */ + if (nlk->cb_running) { + if (nlk->cb.done) + nlk->cb.done(&nlk->cb); + module_put(nlk->cb.module); + kfree_skb(nlk->cb.skb); + } + module_put(nlk->module); if (netlink_is_kernel(sk)) { diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 5f454c8de6a4de07996578538d98bfd8ad45b950..fca9556848885e1bfb16eb4a72c4e3737f7285b2 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -4,7 +4,6 @@ #include #include -#include #include /* flags */ @@ -46,7 +45,6 @@ struct netlink_sock { struct rhash_head node; struct rcu_head rcu; - struct work_struct work; }; static inline struct netlink_sock *nlk_sk(struct sock *sk) diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index e594b4d6b58a9b931cdd942e8dfb235b873ab26c..0cf3dda5319fe3bc3b24b59a068be03497ee819d 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -450,7 +450,7 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) err = attach_meter(meter_tbl, meter); if (err) - goto exit_unlock; + goto exit_free_old_meter; ovs_unlock(); @@ -473,6 +473,8 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) genlmsg_end(reply, ovs_reply_header); return genlmsg_reply(reply, info); +exit_free_old_meter: + ovs_meter_free(old_meter); exit_unlock: ovs_unlock(); nlmsg_free(reply); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 83e208472022f75365a65326ebdce621b394e2d2..89b6b1f5ed71e89c451ccab224e21f261698071e 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3316,18 +3316,18 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, if (sock->type == SOCK_PACKET) sock->ops = &packet_ops_spkt; + po = pkt_sk(sk); + err = packet_alloc_pending(po); + if (err) + goto out_sk_free; + sock_init_data(sock, sk); - po = pkt_sk(sk); init_completion(&po->skb_completion); sk->sk_family = PF_PACKET; po->num = proto; po->xmit = dev_queue_xmit; - err = packet_alloc_pending(po); - if (err) - goto out2; - packet_cached_dev_reset(po); sk->sk_destruct = packet_sock_destruct; @@ -3362,7 +3362,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, preempt_enable(); return 0; -out2: +out_sk_free: sk_free(sk); out: return err; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 6f1ac68c0fd5a10cb7009c49c2c98c863856d2ba..0e8e9a89553befd2cd10f35bc27f16ce161acd00 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -2127,6 +2127,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, } if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) { + tfilter_put(tp, fh); NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind"); err = -EINVAL; goto errout; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 87398af2715a3efc53ffa901ca828f4ab810d0c8..117c7b038591e3a2a2310d571f04f05f6ba19469 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -354,7 +354,8 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_KEYS] = { .type = NLA_U32 }, [TCA_FLOW_MODE] = { .type = NLA_U32 }, [TCA_FLOW_BASECLASS] = { .type = NLA_U32 }, - [TCA_FLOW_RSHIFT] = { .type = NLA_U32 }, + [TCA_FLOW_RSHIFT] = NLA_POLICY_MAX(NLA_U32, + 31 /* BITS_PER_U32 - 1 */), [TCA_FLOW_ADDEND] = { .type = NLA_U32 }, [TCA_FLOW_MASK] = { .type = NLA_U32 }, [TCA_FLOW_XOR] = { .type = NLA_U32 }, diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 9c224872ef0355e2b4d128c1924cb242106e31c0..bda114034bad37626924e032d6bd397256d2c1ca 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -91,6 +91,8 @@ ets_class_from_arg(struct Qdisc *sch, unsigned long arg) { struct ets_sched *q = qdisc_priv(sch); + if (arg == 0 || arg > q->nbands) + return NULL; return &q->classes[arg - 1]; } diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index ac8de6fc06595eb7269a568badcf8c4ca9170f4c..b8f06b6c255b64f2c5eafec8f0d829c9ddce3fa2 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -65,6 +65,7 @@ struct taprio_sched { u32 flags; enum tk_offsets tk_offset; int clockid; + bool offloaded; atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+ * speeds it's sub-nanoseconds per byte */ @@ -1264,6 +1265,8 @@ static int taprio_enable_offload(struct net_device *dev, goto done; } + q->offloaded = true; + done: taprio_offload_free(offload); @@ -1278,12 +1281,9 @@ static int taprio_disable_offload(struct net_device *dev, struct tc_taprio_qopt_offload *offload; int err; - if (!FULL_OFFLOAD_IS_ENABLED(q->flags)) + if (!q->offloaded) return 0; - if (!ops->ndo_setup_tc) - return -EOPNOTSUPP; - offload = taprio_offload_alloc(0); if (!offload) { NL_SET_ERR_MSG(extack, @@ -1299,6 +1299,8 @@ static int taprio_disable_offload(struct net_device *dev, goto out; } + q->offloaded = false; + out: taprio_offload_free(offload); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 20c93b68505e6d21481fe8affa6adfbd2ad7bf49..522e43f66ecd05c855ac97c36ffb0916adf1d858 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1434,7 +1434,9 @@ static int c_show(struct seq_file *m, void *p) seq_printf(m, "# expiry=%lld refcnt=%d flags=%lx\n", convert_to_wallclock(cp->expiry_time), kref_read(&cp->ref), cp->flags); - cache_get(cp); + if (!cache_get_rcu(cp)) + return 0; + if (cache_check(cd, cp, NULL)) /* cache_check does a cache_put on failure */ seq_puts(m, "# "); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 0666f981618a287bb541bed525a42fb17b3881ef..94d1816fe978a3eeb93215aa3c87575bd843cfad 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1134,6 +1134,7 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt) clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state); clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state); clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state); + clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state); } static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr) @@ -2314,6 +2315,13 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -EALREADY: xprt_unlock_connect(xprt, transport); return; + case -EPERM: + /* Happens, for instance, if a BPF program is preventing + * the connect. Remap the error so upper layers can better + * deal with it. + */ + status = -ECONNREFUSED; + fallthrough; case -EINVAL: /* Happens, for instance, if the user specified a link * local IPv6 address without a scope-id. diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 3c1591f64bd59cc8e655a9632c8de6748a921253..df9b93bcf93f22b1b2495856dc09764d15cdba62 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -809,10 +809,10 @@ static void cleanup_bearer(struct work_struct *work) kfree_rcu(rcast, rcu); } - atomic_dec(&tipc_net(sock_net(ub->ubsock->sk))->wq_count); dst_cache_destroy(&ub->rcast.dst_cache); udp_tunnel_sock_release(ub->ubsock); synchronize_net(); + atomic_dec(&tipc_net(sock_net(ub->ubsock->sk))->wq_count); kfree(ub); } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index b52a3e64b3c90a1efe3931918b820ac550fb41fc..7e2220410c7c04ccac54c8d7e0e2694a76aabd82 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -724,9 +724,17 @@ struct tls_context *tls_ctx_create(struct sock *sk) return NULL; mutex_init(&ctx->tx_lock); - rcu_assign_pointer(icsk->icsk_ulp_data, ctx); ctx->sk_proto = READ_ONCE(sk->sk_prot); ctx->sk = sk; + /* Release semantic of rcu_assign_pointer() ensures that + * ctx->sk_proto is visible before changing sk->sk_prot in + * update_sk_prot(), and prevents reading uninitialized value in + * tls_{getsockopt, setsockopt}. Note that we do not need a + * read barrier in tls_{getsockopt,setsockopt} as there is an + * address dependency between sk->sk_proto->{getsockopt,setsockopt} + * and ctx->sk_proto. + */ + rcu_assign_pointer(icsk->icsk_ulp_data, ctx); return ctx; } diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index cc3bae2659e7916fcda89d2af2637894c49a59f4..b4871cc1fd393570b057302effc96bf141eb1faa 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -538,6 +538,7 @@ static void hvs_destruct(struct vsock_sock *vsk) vmbus_hvsock_device_unregister(chan); kfree(hvs); + vsk->trans = NULL; } static int hvs_dgram_bind(struct vsock_sock *vsk, struct sockaddr_vm *addr) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index fe231a9144c9ccdfc2fc40ad98644590bc19b4fa..d0b90843aeb9a7fdd7e4b278f4973ece574a6870 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -687,6 +687,7 @@ void virtio_transport_destruct(struct vsock_sock *vsk) struct virtio_vsock_sock *vvs = vsk->trans; kfree(vvs); + vsk->trans = NULL; } EXPORT_SYMBOL_GPL(virtio_transport_destruct); @@ -961,6 +962,11 @@ virtio_transport_recv_connected(struct sock *sk, virtio_transport_recv_enqueue(vsk, pkt); sk->sk_data_ready(sk); return err; + case VIRTIO_VSOCK_OP_CREDIT_REQUEST: + virtio_transport_send_credit_update(vsk, + VIRTIO_VSOCK_TYPE_STREAM, + NULL); + break; case VIRTIO_VSOCK_OP_CREDIT_UPDATE: sk->sk_write_space(sk); break; @@ -1061,6 +1067,14 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt, return -ENOMEM; } + /* __vsock_release() might have already flushed accept_queue. + * Subsequent enqueues would lead to a memory leak. + */ + if (sk->sk_shutdown == SHUTDOWN_MASK) { + virtio_transport_reset_no_sock(t, pkt); + return -ESHUTDOWN; + } + child = vsock_create_connected(sk); if (!child) { virtio_transport_reset_no_sock(t, pkt); diff --git a/net/vtoa/vtoa.h b/net/vtoa/vtoa.h index 6b16ba863df299aa7a72df83c8b8441c12759c09..a597ac5ecb51e505ac3891720b216c46432c4bbc 100644 --- a/net/vtoa/vtoa.h +++ b/net/vtoa/vtoa.h @@ -52,6 +52,17 @@ #define TCPOLEN_TOA_V6 20 #endif +/* can be readed from ipv4 or ipv6 */ +#define TCPOPT_TOA_V6_EX 254 +#define TCPOLEN_TOA_V6_EX 20 + +struct toa_data_v6 { + __u8 optcode; + __u8 optsize; + __be16 port; + struct in6_addr in6; +}; + /* MUST be 4 bytes alignment */ struct toa_data { __u8 optcode; diff --git a/net/vtoa/vtoa_main.c b/net/vtoa/vtoa_main.c index d6e5476cd9a39825a535eda0cc0034d31b8287d5..edaeccb06cf08dd7666327e611d6833ba8c2c6af 100644 --- a/net/vtoa/vtoa_main.c +++ b/net/vtoa/vtoa_main.c @@ -229,6 +229,13 @@ static int get_toa_data(struct sk_buff *skb, void *sk_toa_data, int sk_toa_datal ntohl(dbg_vid), &dbg_v6vip); return 1; + } else if (opcode == TCPOPT_TOA_V6_EX && opsize == TCPOLEN_TOA_V6_EX) { + struct toa_data_v6 *tdata; + + memset(sk_toa_data, 0, sizeof(struct toa_data_v6)); + memcpy(sk_toa_data, ptr - 2, TCPOLEN_TOA_V6_EX); + tdata = (struct toa_data_v6 *)sk_toa_data; + return 1; } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (opcode == TCPOPT_TOA_V6 && opsize == TCPOLEN_TOA_V6) { @@ -308,6 +315,19 @@ static int inet_getname_toa(struct socket *sock, struct sockaddr *uaddr, TOA_DBG("%s: af: %d, cip [%pI6]:%u\n", sin6->sin6_family, __func__, &sin6->sin6_addr, ntohs(sin6->sin6_port)); + } else if (TCPOPT_TOA_V6_EX == option[0] && TCPOLEN_TOA_V6_EX == option[1]) { + struct toa_data_v6 *tdata = SK_TOA_DATA(sk); + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)uaddr; + + /* hack to AF_INET6 */ + *p_retval = sizeof(*sin6); + retval = *p_retval; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = tdata->port; + sin6->sin6_flowinfo = 0; + sin6->sin6_scope_id = 0; + sin6->sin6_addr = tdata->in6; + } else { /* doesn't belong to us */ #ifdef TOA_DEBUG struct toa_data *tdata = SK_TOA_DATA(sk); @@ -347,6 +367,12 @@ static int inet6_getname_toa(struct socket *sock, struct sockaddr *uaddr, TOA_DBG("%s: ipv6 = %pI6, port = %u\n", __func__, &sin->sin6_addr, ntohs(sin->sin6_port)); + } else if (TCPOPT_TOA_V6_EX == option[0] && TCPOLEN_TOA_V6_EX == option[1]) { + struct toa_data_v6 *tdata = SK_TOA_DATA(sk); + + sin->sin6_port = tdata->port; + sin->sin6_addr = tdata->in6; + } else if (TCPOPT_TOA == option[0] && TCPOLEN_TOA == option[1]) { struct toa_data *tdata = SK_TOA_DATA(sk); diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index 49da2b8ace8b7d7673df3abcd5365ed1b2f196b6..bb356e892128c80f4d169ba1701f038a4538efa8 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -223,7 +223,7 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key) { struct xsk_map *m = container_of(map, struct xsk_map, map); struct xdp_sock *old_xs, **map_entry; - int k = *(u32 *)key; + u32 k = *(u32 *)key; if (k >= map->max_entries) return -EINVAL; diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c index 7e31329cac3cdba8bc5606ccc26efacfd31d3b99..4908506381c6393ac6175acccfe5d3923e0a4c5c 100644 --- a/tools/testing/selftests/openat2/openat2_test.c +++ b/tools/testing/selftests/openat2/openat2_test.c @@ -22,7 +22,7 @@ * XXX: This is wrong on {mips, parisc, powerpc, sparc}. */ #undef O_LARGEFILE -#ifdef __aarch64__ +#if defined (__aarch64__) || defined (__sw_64__) #define O_LARGEFILE 0x20000 #else #define O_LARGEFILE 0x8000 diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index b52f87ca0ebd9e6aca15133ae75016b34f1fac33..6a91824ad8679563b59cab0d66f812f0f4b7f837 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1813,8 +1813,8 @@ TEST_F(TRACE_poke, getpid_runs_normally) #elif defined(__sw_64__) # define ARCH_REGS struct user_pt_regs # define SYSCALL_NUM(_regs) (_regs).regs[0] -# define SYSCALL_RET(_regs) (_regs).regs[0] -# define SYSCALL_NUM_RET_SHARE_REG +# define SYSCALL_RET_SET(_regs, _val) \ + TH_LOG("Can't modify syscall return on this architecture") #elif defined(__xtensa__) # define ARCH_REGS struct user_pt_regs # define SYSCALL_NUM(_regs) (_regs).syscall diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c index 50c5ab1aa6fa1a3918546e84df23682c9f846153..a07896a463643d3bd8e6e29c1dfc7f6b44e5f49e 100644 --- a/tools/testing/selftests/sgx/sigstruct.c +++ b/tools/testing/selftests/sgx/sigstruct.c @@ -17,6 +17,12 @@ #include "defines.h" #include "main.h" +/* + * FIXME: OpenSSL 3.0 has deprecated some functions. For now just ignore + * the warnings. + */ +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + struct q1q2_ctx { BN_CTX *bn_ctx; BIGNUM *m;