diff --git a/.github/workflows/build-cachelib-centos.yml b/.github/workflows/build-cachelib-centos-long.yml
similarity index 86%
rename from .github/workflows/build-cachelib-centos.yml
rename to .github/workflows/build-cachelib-centos-long.yml
index 3b071a186a..92165f603b 100644
--- a/.github/workflows/build-cachelib-centos.yml
+++ b/.github/workflows/build-cachelib-centos-long.yml
@@ -1,7 +1,8 @@
name: build-cachelib-centos-latest
on:
schedule:
- - cron: '30 5 * * 1,4'
+ - cron: '0 7 * * *'
+
jobs:
build-cachelib-centos8-latest:
name: "CentOS/latest - Build CacheLib with all dependencies"
@@ -33,3 +34,6 @@ jobs:
uses: actions/checkout@v2
- name: "build CacheLib using build script"
run: ./contrib/build.sh -j -v -T
+ - name: "run tests"
+ timeout-minutes: 60
+ run: cd opt/cachelib/tests && ../../../run_tests.sh long
diff --git a/.github/workflows/build-cachelib-debian.yml b/.github/workflows/build-cachelib-debian.yml
index a2ae44a569..5bc3ad3c70 100644
--- a/.github/workflows/build-cachelib-debian.yml
+++ b/.github/workflows/build-cachelib-debian.yml
@@ -1,7 +1,8 @@
name: build-cachelib-debian-10
on:
schedule:
- - cron: '30 5 * * 2,6'
+ - cron: '30 5 * * 0,3'
+
jobs:
build-cachelib-debian-10:
name: "Debian/Buster - Build CacheLib with all dependencies"
@@ -37,3 +38,6 @@ jobs:
uses: actions/checkout@v2
- name: "build CacheLib using build script"
run: ./contrib/build.sh -j -v -T
+ - name: "run tests"
+ timeout-minutes: 60
+ run: cd opt/cachelib/tests && ../../../run_tests.sh
diff --git a/.github/workflows/build-cachelib-docker.yml b/.github/workflows/build-cachelib-docker.yml
new file mode 100644
index 0000000000..be28bc233c
--- /dev/null
+++ b/.github/workflows/build-cachelib-docker.yml
@@ -0,0 +1,49 @@
+name: build-cachelib-docker
+on:
+ push:
+ pull_request:
+
+jobs:
+ build-cachelib-docker:
+ name: "CentOS/latest - Build CacheLib with all dependencies"
+ runs-on: ubuntu-latest
+ env:
+ REPO: cachelib
+ GITHUB_REPO: intel/CacheLib
+ CONTAINER_REG: ghcr.io/pmem/cachelib
+ CONTAINER_REG_USER: ${{ secrets.GH_CR_USER }}
+ CONTAINER_REG_PASS: ${{ secrets.GH_CR_PAT }}
+ FORCE_IMAGE_ACTION: ${{ secrets.FORCE_IMAGE_ACTION }}
+ HOST_WORKDIR: ${{ github.workspace }}
+ WORKDIR: docker
+ IMG_VER: devel
+ strategy:
+ matrix:
+ CONFIG: ["OS=centos OS_VER=8streams PUSH_IMAGE=1"]
+ steps:
+ - name: "System Information"
+ run: |
+ echo === uname ===
+ uname -a
+ echo === /etc/os-release ===
+ cat /etc/os-release
+ echo === df -hl ===
+ df -hl
+ echo === free -h ===
+ free -h
+ echo === top ===
+ top -b -n1 -1 -Eg || timeout 1 top -b -n1
+ echo === env ===
+ env
+ echo === gcc -v ===
+ gcc -v
+ - name: "checkout sources"
+ uses: actions/checkout@v2
+ with:
+ fetch-depth: 0
+
+ - name: Pull the image or rebuild and push it
+ run: cd $WORKDIR && ${{ matrix.CONFIG }} ./pull-or-rebuild-image.sh $FORCE_IMAGE_ACTION
+
+ - name: Run the build
+ run: cd $WORKDIR && ${{ matrix.CONFIG }} ./build.sh
diff --git a/.github/workflows/build-cachelib.yml b/.github/workflows/build-cachelib.yml
deleted file mode 100644
index 15161c40e0..0000000000
--- a/.github/workflows/build-cachelib.yml
+++ /dev/null
@@ -1,147 +0,0 @@
-# NOTES:
-# 1. While Github-Actions enables cache of dependencies,
-# Facebook's projects (folly,fizz,wangle,fbthrift)
-# are fast-moving targets - so we always checkout the latest version
-# (as opposed to using gitactions cache, which is recommended in the
-# documentation).
-#
-# 2. Using docker containers to build on CentOS and Debian,
-# Specifically CentOS v8.1.1911 as that
-# version is closest to Facebook's internal dev machines.
-#
-# 3. When using docker containers we install 'sudo',
-# as the docker images are typically very minimal and without
-# 'sudo', while the ./contrib/ scripts use sudo.
-#
-# 4. When using the docker containers we install 'git'
-# BEFORE getting the CacheLib source code (with the 'checkout' action).
-# Otherwise, the 'checkout@v2' action script falls back to downloading
-# the git repository files only, without the ".git" directory.
-# We need the ".git" directory to updating the git-submodules
-# (folly/wangle/fizz/fbthrift). See:
-# https://github.com/actions/checkout/issues/126#issuecomment-570288731
-#
-# 5. To reduce less-critical (and yet frequent) rebuilds, the jobs
-# check the author of the commit, and SKIP the build if
-# the author is "svcscm". These commits are automatic updates
-# for the folly/fbthrift git-submodules, and can happen several times a day.
-# While there is a possiblity that updating the git-submodules breaks
-# CacheLib, it is less likely, and will be detected once an actual
-# code change commit triggers a full build.
-# e.g. https://github.com/facebookincubator/CacheLib/commit/9372a82190dd71a6e2bcb668828cfed9d1bd25c1
-#
-# 6. The 'if' condition checking the author name of the commit (see #5 above)
-# uses github actions metadata variable:
-# 'github.event.head_commit.author.name'
-# GitHub have changed in the past the metadata structure and broke
-# such conditions. If you need to debug the metadata values,
-# see the "dummy-show-github-event" job below.
-# E.g. https://github.blog/changelog/2019-10-16-changes-in-github-actions-push-event-payload/
-# As of Jan-2021, the output is:
-# {
-# "author": {
-# "email": "mimi@moo.moo",
-# "name": "mimi"
-# },
-# "committer": {
-# "email": "assafgordon@gmail.com",
-# "name": "Assaf Gordon",
-# "username": "agordon"
-# },
-# "distinct": true,
-# "id": "6c3aab0970f4a07cc2af7658756a6ef9d82f3276",
-# "message": "gitactions: test",
-# "timestamp": "2021-01-26T11:11:57-07:00",
-# "tree_id": "741cd1cb802df84362a51e5d01f28788845d08b7",
-# "url": "https://github.com/agordon/CacheLib/commit/6c3aab0970f4a07cc2af7658756a6ef9d82f3276"
-# }
-#
-# 7. When checking the commit's author name, we use '...author.name',
-# NOT '...author.username' - because the 'svcscm' author does not
-# have a github username (see the 'mimi' example above).
-#
-
-name: build-cachelib
-on: [push]
-jobs:
- dummy-show-github-event:
- name: "Show GitHub Action event.head_commit variable"
- runs-on: ubuntu-latest
- steps:
- - name: "GitHub Variable Content"
- env:
- CONTENT: ${{ toJSON(github.event.head_commit) }}
- run: echo "$CONTENT"
-
-
- build-cachelib-centos8-1-1911:
- if: "!contains(github.event.head_commit.author.name, 'svcscm')"
- name: "CentOS/8.1.1911 - Build CacheLib with all dependencies"
- runs-on: ubuntu-latest
- # Docker container image name
- container: "centos:8.1.1911"
- steps:
- - name: "update packages"
- # stock centos has a problem with CMAKE, fails with:
- # "cmake: symbol lookup error: cmake: undefined symbol: archive_write_add_filter_zstd"
- # updating solves it
- run: dnf update -y
- - name: "install sudo,git"
- run: dnf install -y sudo git cmake gcc
- - name: "System Information"
- run: |
- echo === uname ===
- uname -a
- echo === /etc/os-release ===
- cat /etc/os-release
- echo === df -hl ===
- df -hl
- echo === free -h ===
- free -h
- echo === top ===
- top -b -n1 -1 -Eg || timeout 1 top -b -n1
- echo === env ===
- env
- echo === gcc -v ===
- gcc -v
- - name: "checkout sources"
- uses: actions/checkout@v2
- - name: "Install Prerequisites"
- run: ./contrib/build.sh -S -B
- - name: "Test: update-submodules"
- run: ./contrib/update-submodules.sh
- - name: "Install dependency: zstd"
- run: ./contrib/build-package.sh -j -v -i zstd
- - name: "Install dependency: googleflags"
- run: ./contrib/build-package.sh -j -v -i googleflags
- - name: "Install dependency: googlelog"
- run: ./contrib/build-package.sh -j -v -i googlelog
- - name: "Install dependency: googletest"
- run: ./contrib/build-package.sh -j -v -i googletest
- - name: "Install dependency: sparsemap"
- run: ./contrib/build-package.sh -j -v -i sparsemap
- - name: "Install dependency: fmt"
- run: ./contrib/build-package.sh -j -v -i fmt
- - name: "Install dependency: folly"
- run: ./contrib/build-package.sh -j -v -i folly
- - name: "Install dependency: fizz"
- run: ./contrib/build-package.sh -j -v -i fizz
- - name: "Install dependency: wangle"
- run: ./contrib/build-package.sh -j -v -i wangle
- - name: "Install dependency: fbthrift"
- run: ./contrib/build-package.sh -j -v -i fbthrift
- - name: "build CacheLib"
- # Build cachelib in debug mode (-d) and with all tests (-t)
- run: ./contrib/build-package.sh -j -v -i -d -t cachelib
- - uses: actions/upload-artifact@v2
- if: failure()
- with:
- name: cachelib-cmake-logs
- path: |
- build-cachelib/CMakeFiles/*.log
- build-cachelib/CMakeCache.txt
- build-cachelib/Makefile
- build-cachelib/**/Makefile
- if-no-files-found: warn
- retention-days: 1
-
diff --git a/.github/workflows/clang-format-check.yml b/.github/workflows/clang-format-check.yml
index 99370135ff..90c8d739c6 100644
--- a/.github/workflows/clang-format-check.yml
+++ b/.github/workflows/clang-format-check.yml
@@ -1,6 +1,6 @@
# From: https://github.com/marketplace/actions/clang-format-check#multiple-paths
name: clang-format Check
-on: [pull_request]
+on: []
jobs:
formatting-check:
name: Formatting Check
@@ -13,7 +13,7 @@ jobs:
steps:
- uses: actions/checkout@v2
- name: Run clang-format style check for C/C++ programs.
- uses: jidicula/clang-format-action@v3.4.0
+ uses: jidicula/clang-format-action@v4.6.2
with:
- clang-format-version: '11'
+ clang-format-version: '13'
check-path: ${{ matrix.path }}
diff --git a/.packit.yaml b/.packit.yaml
new file mode 100644
index 0000000000..bea307d9d0
--- /dev/null
+++ b/.packit.yaml
@@ -0,0 +1,25 @@
+# See the documentation for more information:
+# https://packit.dev/docs/configuration
+
+specfile_path: cachelib.spec
+
+upstream_package_name: CacheLib
+downstream_package_name: cachelib
+
+actions:
+ fix-spec-file:
+ - bash -c "sed -i cachelib.spec -e \"s/%global commit.*/%global commit $(git rev-parse HEAD)/\""
+ - bash -c "sed -i cachelib.spec -e \"s/%global date.*/%global date $(git show -s --date=format:'%Y%m%d' --format=%cd)/\""
+ create-archive:
+ - bash -c "COMMIT=$(git rev-parse HEAD); curl -ORL https://github.com/facebook/CacheLib/archive/${COMMIT}/cachelib-${COMMIT}.tar.gz; echo cachelib-${COMMIT}.tar.gz"
+ post-upstream-clone: "bash -c \"rm -rf cachelib-dist-git; git clone -b packit https://pagure.io/meta/cachelib.git cachelib-dist-git && mv cachelib-dist-git/cachelib*.{spec,patch} .\""
+
+jobs:
+- job: copr_build
+ trigger: pull_request
+ metadata:
+ targets:
+ - fedora-rawhide-aarch64
+ - fedora-rawhide-x86_64
+ - fedora-35-aarch64
+ - fedora-35-x86_64
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ebe779f258..2cf2803a42 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
# Changelog
+## V17
+In this version, `CacheAllocator::ItemHandle` is removed. Updating to this version will cause compilation error if `ItemHandle` is still used.
+
## V16
This version is incompatible with versions below 15. Downgrading from this version directly to a version below 15 will require the cache to be dropped. If you need to downgrade from this version, please make sure you downgrade to version 15 first to avoid dropping the cache.
@@ -8,7 +11,7 @@ This version is incompatible with versions below 15. Downgrading from this versi
This version is incompatible with any previous versions.
-Updating to this version may cause compliation error because:
+Updating to this version may cause compilation error because:
- The following APIs are removed:
1. CacheAllocator::allocatePermanent_deprecated.
diff --git a/MultiTierDataMovement.md b/MultiTierDataMovement.md
new file mode 100644
index 0000000000..7fb10d01e6
--- /dev/null
+++ b/MultiTierDataMovement.md
@@ -0,0 +1,95 @@
+# Background Data Movement
+
+In order to reduce the number of online evictions and support asynchronous
+promotion - we have added two periodic workers to handle eviction and promotion.
+
+The diagram below shows a simplified version of how the background evictor
+thread (green) is integrated to the CacheLib architecture.
+
+
+
+
+
+## Synchronous Eviction and Promotion
+
+- `disableEviction`: Disables eviction to memory (item is always evicted to NVMe or removed
+on eviction)
+
+## Background Evictors
+
+The background evictors scan each class to see if there are objects to move the next (lower)
+tier using a given strategy. Here we document the parameters for the different
+strategies and general parameters.
+
+- `backgroundEvictorIntervalMilSec`: The interval that this thread runs for - by default
+the background evictor threads will wake up every 10 ms to scan the AllocationClasses. Also,
+the background evictor thread will be woken up everytime there is a failed allocation (from
+a request handling thread) and the current percentage of free memory for the
+AllocationClass is lower than `lowEvictionAcWatermark`. This may render the interval parameter
+not as important when there are many allocations occuring from request handling threads.
+
+- `evictorThreads`: The number of background evictors to run - each thread is a assigned
+a set of AllocationClasses to scan and evict objects from. Currently, each thread gets
+an equal number of classes to scan - but as object size distribution may be unequal - future
+versions will attempt to balance the classes among threads. The range is 1 to number of AllocationClasses.
+The default is 1.
+
+- `maxEvictionBatch`: The number of objects to remove in a given eviction call. The
+default is 40. Lower range is 10 and the upper range is 1000. Too low and we might not
+remove objects at a reasonable rate, too high and it might increase contention with user threads.
+
+- `minEvictionBatch`: Minimum number of items to evict at any time (if there are any
+candidates)
+
+- `maxEvictionPromotionHotness`: Maximum candidates to consider for eviction. This is similar to `maxEvictionBatch`
+but it specifies how many candidates will be taken into consideration, not the actual number of items to evict.
+This option can be used to configure duration of critical section on LRU lock.
+
+
+### FreeThresholdStrategy (default)
+
+- `lowEvictionAcWatermark`: Triggers background eviction thread to run
+when this percentage of the AllocationClass is free.
+The default is `2.0`, to avoid wasting capacity we don't set this above `10.0`.
+
+- `highEvictionAcWatermark`: Stop the evictions from an AllocationClass when this
+percentage of the AllocationClass is free. The default is `5.0`, to avoid wasting capacity we
+don't set this above `10`.
+
+
+## Background Promoters
+
+The background promoters scan each class to see if there are objects to move to a lower
+tier using a given strategy. Here we document the parameters for the different
+strategies and general parameters.
+
+- `backgroundPromoterIntervalMilSec`: The interval that this thread runs for - by default
+the background promoter threads will wake up every 10 ms to scan the AllocationClasses for
+objects to promote.
+
+- `promoterThreads`: The number of background promoters to run - each thread is a assigned
+a set of AllocationClasses to scan and promote objects from. Currently, each thread gets
+an equal number of classes to scan - but as object size distribution may be unequal - future
+versions will attempt to balance the classes among threads. The range is `1` to number of AllocationClasses. The default is `1`.
+
+- `maxProtmotionBatch`: The number of objects to promote in a given promotion call. The
+default is 40. Lower range is 10 and the upper range is 1000. Too low and we might not
+remove objects at a reasonable rate, too high and it might increase contention with user threads.
+
+- `minPromotionBatch`: Minimum number of items to promote at any time (if there are any
+candidates)
+
+- `numDuplicateElements`: This allows us to promote items that have existing handles (read-only) since
+we won't need to modify the data when a user is done with the data. Therefore, for a short time
+the data could reside in both tiers until it is evicted from its current tier. The default is to
+not allow this (0). Setting the value to 100 will enable duplicate elements in tiers.
+
+### Background Promotion Strategy (only one currently)
+
+- `promotionAcWatermark`: Promote items if there is at least this
+percent of free AllocationClasses. Promotion thread will attempt to move `maxPromotionBatch` number of objects
+to that tier. The objects are chosen from the head of the LRU. The default is `4.0`.
+This value should correlate with `lowEvictionAcWatermark`, `highEvictionAcWatermark`, `minAcAllocationWatermark`, `maxAcAllocationWatermark`.
+- `maxPromotionBatch`: The number of objects to promote in batch during BG promotion. Analogous to
+`maxEvictionBatch`. It's value should be lower to decrease contention on hot items.
+
diff --git a/cachelib/CMakeLists.txt b/cachelib/CMakeLists.txt
index 917e164e3b..f666025093 100644
--- a/cachelib/CMakeLists.txt
+++ b/cachelib/CMakeLists.txt
@@ -17,7 +17,7 @@
# refer to the root source directory of the project as ${HELLO_SOURCE_DIR} and
# to the root binary directory of the project as ${HELLO_BINARY_DIR}.
-cmake_minimum_required (VERSION 3.19)
+cmake_minimum_required (VERSION 3.12)
## TODO: get version from variable
project (CacheLib VERSION 0.1)
diff --git a/cachelib/allocator/BackgroundMover-inl.h b/cachelib/allocator/BackgroundMover-inl.h
new file mode 100644
index 0000000000..d65e732202
--- /dev/null
+++ b/cachelib/allocator/BackgroundMover-inl.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) Intel and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace facebook {
+namespace cachelib {
+
+template
+BackgroundMover::BackgroundMover(
+ Cache& cache,
+ std::shared_ptr strategy,
+ MoverDir direction)
+ : cache_(cache), strategy_(strategy), direction_(direction) {
+ if (direction_ == MoverDir::Evict) {
+ moverFunc = BackgroundMoverAPIWrapper::traverseAndEvictItems;
+
+ } else if (direction_ == MoverDir::Promote) {
+ moverFunc = BackgroundMoverAPIWrapper::traverseAndPromoteItems;
+ }
+}
+
+template
+BackgroundMover::~BackgroundMover() {
+ stop(std::chrono::seconds(0));
+}
+
+template
+void BackgroundMover::work() {
+ try {
+ checkAndRun();
+ } catch (const std::exception& ex) {
+ XLOGF(ERR, "BackgroundMover interrupted due to exception: {}", ex.what());
+ }
+}
+
+template
+void BackgroundMover::setAssignedMemory(
+ std::vector>&& assignedMemory) {
+ XLOG(INFO, "Class assigned to background worker:");
+ for (auto [tid, pid, cid] : assignedMemory) {
+ XLOGF(INFO, "Tid: {}, Pid: {}, Cid: {}", tid, pid, cid);
+ }
+
+ mutex.lock_combine([this, &assignedMemory] {
+ this->assignedMemory_ = std::move(assignedMemory);
+ });
+}
+
+// Look for classes that exceed the target memory capacity
+// and return those for eviction
+template
+void BackgroundMover::checkAndRun() {
+ auto assignedMemory = mutex.lock_combine([this] { return assignedMemory_; });
+
+ unsigned int moves = 0;
+ std::set classes{};
+ auto batches = strategy_->calculateBatchSizes(cache_, assignedMemory);
+
+ for (size_t i = 0; i < batches.size(); i++) {
+ const auto [tid, pid, cid] = assignedMemory[i];
+ const auto batch = batches[i];
+
+ classes.insert(cid);
+ const auto& mpStats = cache_.getPoolByTid(pid, tid).getStats();
+
+ if (!batch) {
+ continue;
+ }
+
+ // try moving BATCH items from the class in order to reach free target
+ auto moved = moverFunc(cache_, tid, pid, cid, batch);
+ moves += moved;
+ moves_per_class_[tid][pid][cid] += moved;
+ totalBytesMoved.add(moved * mpStats.acStats.at(cid).allocSize);
+ }
+
+ numTraversals.inc();
+ numMovedItems.add(moves);
+ totalClasses.add(classes.size());
+}
+
+template
+BackgroundMoverStats BackgroundMover::getStats() const noexcept {
+ BackgroundMoverStats stats;
+ stats.numMovedItems = numMovedItems.get();
+ stats.runCount = numTraversals.get();
+ stats.totalBytesMoved = totalBytesMoved.get();
+ stats.totalClasses = totalClasses.get();
+ stats.strategyStats = strategy_->getStats();
+
+ return stats;
+}
+
+template
+std::map>>
+BackgroundMover::getClassStats() const noexcept {
+ return moves_per_class_;
+}
+
+} // namespace cachelib
+} // namespace facebook
diff --git a/cachelib/allocator/BackgroundMover.h b/cachelib/allocator/BackgroundMover.h
new file mode 100644
index 0000000000..5538561e11
--- /dev/null
+++ b/cachelib/allocator/BackgroundMover.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) Intel and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "cachelib/allocator/BackgroundMoverStrategy.h"
+#include "cachelib/allocator/CacheStats.h"
+#include "cachelib/common/AtomicCounter.h"
+#include "cachelib/common/PeriodicWorker.h"
+
+namespace facebook {
+namespace cachelib {
+
+// wrapper that exposes the private APIs of CacheType that are specifically
+// needed for the cache api
+template
+struct BackgroundMoverAPIWrapper {
+ static size_t traverseAndEvictItems(C& cache,
+ unsigned int tid,
+ unsigned int pid,
+ unsigned int cid,
+ size_t batch) {
+ return cache.traverseAndEvictItems(tid, pid, cid, batch);
+ }
+
+ static size_t traverseAndPromoteItems(C& cache,
+ unsigned int tid,
+ unsigned int pid,
+ unsigned int cid,
+ size_t batch) {
+ return cache.traverseAndPromoteItems(tid, pid, cid, batch);
+ }
+};
+
+enum class MoverDir { Evict = 0, Promote };
+
+// Periodic worker that evicts items from tiers in batches
+// The primary aim is to reduce insertion times for new items in the
+// cache
+template
+class BackgroundMover : public PeriodicWorker {
+ public:
+ using Cache = CacheT;
+ // @param cache the cache interface
+ // @param strategy the stragey class that defines how objects are
+ // moved,
+ // (promoted vs. evicted and how much)
+ BackgroundMover(Cache& cache,
+ std::shared_ptr strategy,
+ MoverDir direction_);
+
+ ~BackgroundMover() override;
+
+ BackgroundMoverStats getStats() const noexcept;
+ std::map>>
+ getClassStats() const noexcept;
+
+ void setAssignedMemory(
+ std::vector>&& assignedMemory);
+
+ private:
+ std::map>>
+ moves_per_class_;
+ // cache allocator's interface for evicting
+ using Item = typename Cache::Item;
+
+ Cache& cache_;
+ std::shared_ptr strategy_;
+ MoverDir direction_;
+
+ std::function
+ moverFunc;
+
+ // implements the actual logic of running the background evictor
+ void work() override final;
+ void checkAndRun();
+
+ AtomicCounter numMovedItems{0};
+ AtomicCounter numTraversals{0};
+ AtomicCounter totalClasses{0};
+ AtomicCounter totalBytesMoved{0};
+
+ std::vector> assignedMemory_;
+ folly::DistributedMutex mutex;
+};
+} // namespace cachelib
+} // namespace facebook
+
+#include "cachelib/allocator/BackgroundMover-inl.h"
diff --git a/cachelib/allocator/BackgroundMoverStrategy.h b/cachelib/allocator/BackgroundMoverStrategy.h
new file mode 100644
index 0000000000..1d586277ab
--- /dev/null
+++ b/cachelib/allocator/BackgroundMoverStrategy.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "cachelib/allocator/Cache.h"
+
+namespace facebook {
+namespace cachelib {
+
+// Base class for background eviction strategy.
+class BackgroundMoverStrategy {
+ public:
+ virtual std::vector calculateBatchSizes(
+ const CacheBase& cache,
+ std::vector> acVec) = 0;
+ virtual BackgroundStrategyStats getStats() = 0;
+};
+
+} // namespace cachelib
+} // namespace facebook
diff --git a/cachelib/allocator/CCacheAllocator.cpp b/cachelib/allocator/CCacheAllocator.cpp
index 6f0bab6727..cff4bded4b 100644
--- a/cachelib/allocator/CCacheAllocator.cpp
+++ b/cachelib/allocator/CCacheAllocator.cpp
@@ -30,12 +30,12 @@ CCacheAllocator::CCacheAllocator(MemoryAllocator& allocator, PoolId poolId)
CCacheAllocator::CCacheAllocator(MemoryAllocator& allocator,
PoolId poolId,
const SerializationType& object)
- : CCacheAllocatorBase(*object.ccMetadata_ref()),
+ : CCacheAllocatorBase(*object.ccMetadata()),
allocator_(allocator),
poolId_(poolId),
currentChunksIndex_(0) {
auto& currentChunks = chunks_[currentChunksIndex_];
- for (auto chunk : *object.chunks_ref()) {
+ for (auto chunk : *object.chunks()) {
currentChunks.push_back(allocator_.unCompress(CompressedPtr(chunk)));
}
}
@@ -93,11 +93,11 @@ size_t CCacheAllocator::resize() {
CCacheAllocator::SerializationType CCacheAllocator::saveState() {
CCacheAllocator::SerializationType object;
- *object.ccMetadata_ref() = ccType_.saveState();
+ *object.ccMetadata() = ccType_.saveState();
std::lock_guard guard(resizeLock_);
for (auto chunk : getCurrentChunks()) {
- object.chunks_ref()->push_back(allocator_.compress(chunk).saveState());
+ object.chunks()->push_back(allocator_.compress(chunk).saveState());
}
return object;
}
diff --git a/cachelib/allocator/CCacheManager.cpp b/cachelib/allocator/CCacheManager.cpp
index d9d1d6db7a..6750139ff0 100644
--- a/cachelib/allocator/CCacheManager.cpp
+++ b/cachelib/allocator/CCacheManager.cpp
@@ -24,7 +24,7 @@ CCacheManager::CCacheManager(const SerializationType& object,
: memoryAllocator_(memoryAllocator) {
std::lock_guard guard(lock_);
- for (const auto& allocator : *object.allocators_ref()) {
+ for (const auto& allocator : *object.allocators()) {
auto id = memoryAllocator_.getPoolId(allocator.first);
allocators_.emplace(
std::piecewise_construct,
@@ -81,8 +81,7 @@ CCacheManager::SerializationType CCacheManager::saveState() {
SerializationType object;
for (auto& allocator : allocators_) {
- object.allocators_ref()->emplace(allocator.first,
- allocator.second.saveState());
+ object.allocators()->emplace(allocator.first, allocator.second.saveState());
}
return object;
}
diff --git a/cachelib/allocator/CMakeLists.txt b/cachelib/allocator/CMakeLists.txt
index 0c19c720d8..87643ff006 100644
--- a/cachelib/allocator/CMakeLists.txt
+++ b/cachelib/allocator/CMakeLists.txt
@@ -35,6 +35,7 @@ add_library (cachelib_allocator
CCacheManager.cpp
ContainerTypes.cpp
FreeMemStrategy.cpp
+ FreeThresholdStrategy.cpp
HitsPerSlabStrategy.cpp
LruTailAgeStrategy.cpp
MarginalHitsOptimizeStrategy.cpp
@@ -77,7 +78,7 @@ install(TARGETS cachelib_allocator
DESTINATION ${LIB_INSTALL_DIR} )
if (BUILD_TESTS)
- add_library (allocator_test_support
+ add_library (allocator_test_support OBJECT
${DATASTRUCT_TESTS_THRIFT_FILES}
./nvmcache/tests/NvmTestBase.cpp
./memory/tests/TestBase.cpp
@@ -85,6 +86,7 @@ if (BUILD_TESTS)
add_dependencies(allocator_test_support thrift_generated_files)
target_link_libraries (allocator_test_support PUBLIC
cachelib_allocator
+ common_test_utils
glog::glog
gflags
GTest::gtest
@@ -116,8 +118,11 @@ if (BUILD_TESTS)
add_test (tests/ChainedHashTest.cpp)
add_test (tests/AllocatorResizeTypeTest.cpp)
add_test (tests/AllocatorHitStatsTypeTest.cpp)
+ add_test (tests/AllocatorMemoryTiersTest.cpp)
+ add_test (tests/MemoryTiersTest.cpp)
add_test (tests/MultiAllocatorTest.cpp)
add_test (tests/NvmAdmissionPolicyTest.cpp)
+ add_test (tests/CacheAllocatorConfigTest.cpp)
add_test (nvmcache/tests/NvmItemTests.cpp)
add_test (nvmcache/tests/InFlightPutsTest.cpp)
add_test (nvmcache/tests/TombStoneTests.cpp)
diff --git a/cachelib/allocator/Cache.h b/cachelib/allocator/Cache.h
index a737074ac6..589614ee3b 100644
--- a/cachelib/allocator/Cache.h
+++ b/cachelib/allocator/Cache.h
@@ -83,13 +83,25 @@ class CacheBase {
CacheBase(CacheBase&&) = default;
CacheBase& operator=(CacheBase&&) = default;
+ // TODO: come up with some reasonable number
+ static constexpr unsigned kMaxTiers = 2;
+
// Get a string referring to the cache name for this cache
virtual const std::string getCacheName() const = 0;
+ // Returns true for ObjectCacheBase, false for CacheAllocator.
+ virtual bool isObjectCache() const = 0;
+
// Get the reference to a memory pool, for stats purposes
//
// @param poolId The pool id to query
virtual const MemoryPool& getPool(PoolId poolId) const = 0;
+
+ // Get the reference to a memory pool using a tier id, for stats purposes
+ //
+ // @param poolId The pool id to query
+ // @param tierId The tier of the pool id
+ virtual const MemoryPool& getPoolByTid(PoolId poolId, TierId tid) const = 0;
// Get Pool specific stats (regular pools). This includes stats from the
// Memory Pool and also the cache.
@@ -97,6 +109,9 @@ class CacheBase {
// @param poolId the pool id
virtual PoolStats getPoolStats(PoolId poolId) const = 0;
+ virtual AllocationClassBaseStat getAllocationClassStats(
+ TierId, PoolId pid, ClassId cid) const = 0;
+
// @param poolId the pool id
virtual AllSlabReleaseEvents getAllSlabReleaseEvents(PoolId poolId) const = 0;
@@ -187,6 +202,10 @@ class CacheBase {
// pool id
virtual const ICompactCache& getCompactCache(PoolId pid) const = 0;
+ // return object cache stats
+ virtual void getObjectCacheCounters(
+ std::function) const {}
+
protected:
// move bytes from one pool to another. The source pool should be at least
// _bytes_ in size.
diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h
index a512ed4b6b..52f8a9412d 100644
--- a/cachelib/allocator/CacheAllocator-inl.h
+++ b/cachelib/allocator/CacheAllocator-inl.h
@@ -16,109 +16,37 @@
#pragma once
+#include
+
namespace facebook {
namespace cachelib {
template
CacheAllocator::CacheAllocator(Config config)
- : isOnShm_{config.memMonitoringEnabled()},
- config_(config.validate()),
- tempShm_(isOnShm_ ? std::make_unique(config_.size)
- : nullptr),
- allocator_(isOnShm_ ? std::make_unique(
- getAllocatorConfig(config_),
- tempShm_->getAddr(),
- config_.size)
- : std::make_unique(
- getAllocatorConfig(config_), config_.size)),
- compactCacheManager_(std::make_unique(*allocator_)),
- compressor_(createPtrCompressor()),
- accessContainer_(std::make_unique(
- config_.accessConfig,
- compressor_,
- [this](Item* it) -> ItemHandle { return acquire(it); })),
- chainedItemAccessContainer_(std::make_unique(
- config_.chainedItemAccessConfig,
- compressor_,
- [this](Item* it) -> ItemHandle { return acquire(it); })),
- chainedItemLocks_(config_.chainedItemsLockPower,
- std::make_shared()),
- cacheCreationTime_{util::getCurrentTimeSec()},
- nvmCacheState_{config_.cacheDir, config_.isNvmCacheEncryptionEnabled(),
- config_.isNvmCacheTruncateAllocSizeEnabled()} {
+ : CacheAllocator(InitMemType::kNone, config) {
+ // TODO(MEMORY_TIER)
+ if (getNumTiers() > 1 || std::holds_alternative(
+ memoryTierConfigs[0].getShmTypeOpts())) {
+ throw std::runtime_error(
+ "Using custom memory tier or using more than one tier is only "
+ "supported for Shared Memory.");
+ }
initCommon(false);
}
template
CacheAllocator::CacheAllocator(SharedMemNewT, Config config)
- : isOnShm_{true},
- config_(config.validate()),
- shmManager_(
- std::make_unique(config_.cacheDir, config_.usePosixShm)),
- allocator_(createNewMemoryAllocator()),
- compactCacheManager_(std::make_unique(*allocator_)),
- compressor_(createPtrCompressor()),
- accessContainer_(std::make_unique(
- config_.accessConfig,
- shmManager_
- ->createShm(detail::kShmHashTableName,
- AccessContainer::getRequiredSize(
- config_.accessConfig.getNumBuckets()),
- nullptr,
- ShmSegmentOpts(config_.accessConfig.getPageSize()))
- .addr,
- compressor_,
- [this](Item* it) -> ItemHandle { return acquire(it); })),
- chainedItemAccessContainer_(std::make_unique(
- config_.chainedItemAccessConfig,
- shmManager_
- ->createShm(detail::kShmChainedItemHashTableName,
- AccessContainer::getRequiredSize(
- config_.chainedItemAccessConfig.getNumBuckets()),
- nullptr,
- ShmSegmentOpts(config_.accessConfig.getPageSize()))
- .addr,
- compressor_,
- [this](Item* it) -> ItemHandle { return acquire(it); })),
- chainedItemLocks_(config_.chainedItemsLockPower,
- std::make_shared()),
- cacheCreationTime_{util::getCurrentTimeSec()},
- nvmCacheState_{config_.cacheDir, config_.isNvmCacheEncryptionEnabled(),
- config_.isNvmCacheTruncateAllocSizeEnabled()} {
+ : CacheAllocator(InitMemType::kMemNew, config) {
initCommon(false);
- shmManager_->removeShm(detail::kShmInfoName);
+ shmManager_->removeShm(detail::kShmInfoName,
+ PosixSysVSegmentOpts(config_.isUsingPosixShm()));
}
template
CacheAllocator::CacheAllocator(SharedMemAttachT, Config config)
- : isOnShm_{true},
- config_(config.validate()),
- shmManager_(
- std::make_unique(config_.cacheDir, config_.usePosixShm)),
- deserializer_(createDeserializer()),
- metadata_{deserializeCacheAllocatorMetadata(*deserializer_)},
- allocator_(restoreMemoryAllocator()),
- compactCacheManager_(restoreCCacheManager()),
- compressor_(createPtrCompressor()),
- mmContainers_(deserializeMMContainers(*deserializer_, compressor_)),
- accessContainer_(std::make_unique(
- deserializer_->deserialize(),
- config_.accessConfig,
- shmManager_->attachShm(detail::kShmHashTableName),
- compressor_,
- [this](Item* it) -> ItemHandle { return acquire(it); })),
- chainedItemAccessContainer_(std::make_unique(
- deserializer_->deserialize(),
- config_.chainedItemAccessConfig,
- shmManager_->attachShm(detail::kShmChainedItemHashTableName),
- compressor_,
- [this](Item* it) -> ItemHandle { return acquire(it); })),
- chainedItemLocks_(config_.chainedItemsLockPower,
- std::make_shared()),
- cacheCreationTime_{*metadata_.cacheCreationTime_ref()},
- nvmCacheState_{config_.cacheDir, config_.isNvmCacheEncryptionEnabled(),
- config_.isNvmCacheTruncateAllocSizeEnabled()} {
- for (auto pid : *metadata_.compactCachePools_ref()) {
+ : CacheAllocator(InitMemType::kMemAttach, config) {
+ /* TODO - per tier? */
+ for (auto pid : *metadata_.compactCachePools()) {
isCompactCachePool_[pid] = true;
}
@@ -127,9 +55,61 @@ CacheAllocator::CacheAllocator(SharedMemAttachT, Config config)
// We will create a new info shm segment on shutDown(). If we don't remove
// this info shm segment here and the new info shm segment's size is larger
// than this one, creating new one will fail.
- shmManager_->removeShm(detail::kShmInfoName);
+ shmManager_->removeShm(detail::kShmInfoName,
+ PosixSysVSegmentOpts(config_.isUsingPosixShm()));
}
+template
+CacheAllocator::CacheAllocator(
+ typename CacheAllocator::InitMemType type, Config config)
+ : isOnShm_{type != InitMemType::kNone ? true
+ : config.memMonitoringEnabled()},
+ config_(config.validate()),
+ memoryTierConfigs(config.getMemoryTierConfigs()),
+ tempShm_(type == InitMemType::kNone && isOnShm_
+ ? std::make_unique(config_.getCacheSize())
+ : nullptr),
+ shmManager_(type != InitMemType::kNone
+ ? std::make_unique(config_.cacheDir,
+ config_.isUsingPosixShm())
+ : nullptr),
+ deserializer_(type == InitMemType::kMemAttach ? createDeserializer()
+ : nullptr),
+ metadata_{type == InitMemType::kMemAttach
+ ? deserializeCacheAllocatorMetadata(*deserializer_)
+ : serialization::CacheAllocatorMetadata{}},
+ allocator_(initAllocator(type)),
+ compactCacheManager_(type != InitMemType::kMemAttach
+ ? std::make_unique(*allocator_[0] /* TODO: per tier */)
+ : restoreCCacheManager(0/* TODO: per tier */)),
+ compressor_(createPtrCompressor()),
+ mmContainers_(type == InitMemType::kMemAttach
+ ? deserializeMMContainers(*deserializer_, compressor_)
+ : MMContainers{getNumTiers()}),
+ accessContainer_(initAccessContainer(
+ type, detail::kShmHashTableName, config.accessConfig, config_.isUsingPosixShm())),
+ chainedItemAccessContainer_(
+ initAccessContainer(type,
+ detail::kShmChainedItemHashTableName,
+ config.chainedItemAccessConfig,
+ config_.isUsingPosixShm())),
+ chainedItemLocks_(config_.chainedItemsLockPower,
+ std::make_shared()),
+ movesMap_(kShards),
+ moveLock_(kShards),
+ cacheCreationTime_{
+ type != InitMemType::kMemAttach
+ ? util::getCurrentTimeSec()
+ : static_cast(*metadata_.cacheCreationTime())},
+ cacheInstanceCreationTime_{type != InitMemType::kMemAttach
+ ? cacheCreationTime_
+ : util::getCurrentTimeSec()},
+ // Pass in cacheInstnaceCreationTime_ as the current time to keep
+ // nvmCacheState's current time in sync
+ nvmCacheState_{cacheInstanceCreationTime_, config_.cacheDir,
+ config_.isNvmCacheEncryptionEnabled(),
+ config_.isNvmCacheTruncateAllocSizeEnabled()} {}
+
template
CacheAllocator::~CacheAllocator() {
XLOG(DBG, "destructing CacheAllocator");
@@ -141,39 +121,99 @@ CacheAllocator::~CacheAllocator() {
}
template
-std::unique_ptr
-CacheAllocator::createNewMemoryAllocator() {
+ShmSegmentOpts CacheAllocator::createShmCacheOpts(TierId tid) {
ShmSegmentOpts opts;
opts.alignment = sizeof(Slab);
+ opts.typeOpts = memoryTierConfigs[tid].getShmTypeOpts();
+ opts.memBindNumaNodes = memoryTierConfigs[tid].getMemBind();
+ if (auto *v = std::get_if(&opts.typeOpts)) {
+ v->usePosix = config_.usePosixShm;
+ }
+
+ return opts;
+}
+
+template
+size_t CacheAllocator::memoryTierSize(TierId tid) const
+{
+ auto partitions = std::accumulate(memoryTierConfigs.begin(), memoryTierConfigs.end(), 0UL,
+ [](const size_t i, const MemoryTierCacheConfig& config){
+ return i + config.getRatio();
+ });
+
+ return memoryTierConfigs[tid].calculateTierSize(config_.getCacheSize(), partitions);
+}
+
+template
+std::vector>
+CacheAllocator::createPrivateAllocator() {
+ std::vector> allocators;
+
+ if (isOnShm_)
+ allocators.emplace_back(std::make_unique(
+ getAllocatorConfig(config_),
+ tempShm_->getAddr(),
+ config_.getCacheSize()));
+ else
+ allocators.emplace_back(std::make_unique(
+ getAllocatorConfig(config_),
+ config_.getCacheSize()));
+
+ return allocators;
+}
+
+template
+std::unique_ptr
+CacheAllocator::createNewMemoryAllocator(TierId tid) {
+ size_t tierSize = memoryTierSize(tid);
return std::make_unique(
getAllocatorConfig(config_),
shmManager_
- ->createShm(detail::kShmCacheName, config_.size,
- config_.slabMemoryBaseAddr, opts)
+ ->createShm(detail::kShmCacheName + std::to_string(tid),
+ tierSize, config_.slabMemoryBaseAddr,
+ createShmCacheOpts(tid))
.addr,
- config_.size);
+ tierSize);
}
template
std::unique_ptr
-CacheAllocator::restoreMemoryAllocator() {
- ShmSegmentOpts opts;
- opts.alignment = sizeof(Slab);
+CacheAllocator::restoreMemoryAllocator(TierId tid) {
return std::make_unique(
deserializer_->deserialize(),
shmManager_
- ->attachShm(detail::kShmCacheName, config_.slabMemoryBaseAddr, opts)
- .addr,
- config_.size,
+ ->attachShm(detail::kShmCacheName + std::to_string(tid),
+ config_.slabMemoryBaseAddr, createShmCacheOpts(tid)).addr,
+ memoryTierSize(tid),
config_.disableFullCoredump);
}
+template
+std::vector>
+CacheAllocator::createAllocators() {
+ std::vector> allocators;
+ for (int tid = 0; tid < getNumTiers(); tid++) {
+ allocators.emplace_back(createNewMemoryAllocator(tid));
+ }
+ return allocators;
+}
+
+template
+std::vector>
+CacheAllocator::restoreAllocators() {
+ std::vector> allocators;
+ for (int tid = 0; tid < getNumTiers(); tid++) {
+ allocators.emplace_back(restoreMemoryAllocator(tid));
+ }
+ return allocators;
+}
+
template
std::unique_ptr
-CacheAllocator::restoreCCacheManager() {
+CacheAllocator::restoreCCacheManager(TierId tid) {
return std::make_unique(
deserializer_->deserialize(),
- *allocator_);
+ *allocator_[tid]);
}
template
@@ -196,7 +236,10 @@ void CacheAllocator::initCommon(bool dramCacheAttached) {
}
initStats();
initNvmCache(dramCacheAttached);
- initWorkers();
+
+ if (!config_.delayCacheWorkersStart) {
+ initWorkers();
+ }
}
template
@@ -225,19 +268,19 @@ void CacheAllocator::initNvmCache(bool dramCacheAttached) {
template
void CacheAllocator::initWorkers() {
- if (config_.poolResizingEnabled()) {
+ if (config_.poolResizingEnabled() && !poolResizer_) {
startNewPoolResizer(config_.poolResizeInterval,
config_.poolResizeSlabsPerIter,
config_.poolResizeStrategy);
}
- if (config_.poolRebalancingEnabled()) {
+ if (config_.poolRebalancingEnabled() && !poolRebalancer_) {
startNewPoolRebalancer(config_.poolRebalanceInterval,
config_.defaultPoolRebalanceStrategy,
config_.poolRebalancerFreeAllocThreshold);
}
- if (config_.memMonitoringEnabled()) {
+ if (config_.memMonitoringEnabled() && !memMonitor_) {
if (!isOnShm_) {
throw std::invalid_argument(
"Memory monitoring is not supported for cache on heap. It is "
@@ -249,28 +292,97 @@ void CacheAllocator::initWorkers() {
config_.poolAdviseStrategy);
}
- if (config_.itemsReaperEnabled()) {
+ if (config_.itemsReaperEnabled() && !reaper_) {
startNewReaper(config_.reaperInterval, config_.reaperConfig);
}
- if (config_.poolOptimizerEnabled()) {
+ if (config_.poolOptimizerEnabled() && !poolOptimizer_) {
startNewPoolOptimizer(config_.regularPoolOptimizeInterval,
config_.compactCacheOptimizeInterval,
config_.poolOptimizeStrategy,
config_.ccacheOptimizeStepSizePercent);
}
+
+ if (config_.backgroundEvictorEnabled()) {
+ startNewBackgroundEvictor(config_.backgroundEvictorInterval,
+ config_.backgroundEvictorStrategy,
+ config_.backgroundEvictorThreads);
+ }
+
+ if (config_.backgroundPromoterEnabled()) {
+ startNewBackgroundPromoter(config_.backgroundPromoterInterval,
+ config_.backgroundPromoterStrategy,
+ config_.backgroundPromoterThreads);
+ }
+}
+
+template
+std::vector>
+CacheAllocator::initAllocator(
+ InitMemType type) {
+ if (type == InitMemType::kNone) {
+ return createPrivateAllocator();
+ } else if (type == InitMemType::kMemNew) {
+ return createAllocators();
+ } else if (type == InitMemType::kMemAttach) {
+ return restoreAllocators();
+ }
+
+ // Invalid type
+ throw std::runtime_error(folly::sformat(
+ "Cannot initialize memory allocator, unknown InitMemType: {}.",
+ static_cast(type)));
+}
+
+template
+std::unique_ptr::AccessContainer>
+CacheAllocator::initAccessContainer(InitMemType type,
+ const std::string name,
+ AccessConfig config,
+ bool usePosixShm) {
+ if (type == InitMemType::kNone) {
+ return std::make_unique(
+ config, compressor_,
+ [this](Item* it) -> WriteHandle { return acquire(it); });
+ } else if (type == InitMemType::kMemNew) {
+ return std::make_unique(
+ config,
+ shmManager_
+ ->createShm(
+ name,
+ AccessContainer::getRequiredSize(config.getNumBuckets()),
+ nullptr,
+ ShmSegmentOpts(config.getPageSize(), false, usePosixShm))
+ .addr,
+ compressor_,
+ [this](Item* it) -> WriteHandle { return acquire(it); });
+ } else if (type == InitMemType::kMemAttach) {
+ return std::make_unique(
+ deserializer_->deserialize(),
+ config,
+ shmManager_->attachShm(name, nullptr,
+ ShmSegmentOpts(config.getPageSize(), false, usePosixShm)),
+ compressor_,
+ [this](Item* it) -> WriteHandle { return acquire(it); });
+ }
+
+ // Invalid type
+ throw std::runtime_error(folly::sformat(
+ "Cannot initialize access container, unknown InitMemType: {}.",
+ static_cast(type)));
}
template
std::unique_ptr CacheAllocator::createDeserializer() {
- auto infoAddr = shmManager_->attachShm(detail::kShmInfoName);
+ auto infoAddr = shmManager_->attachShm(detail::kShmInfoName, nullptr,
+ ShmSegmentOpts(PageSizeT::NORMAL, false, config_.isUsingPosixShm()));
return std::make_unique(
reinterpret_cast(infoAddr.addr),
reinterpret_cast(infoAddr.addr) + infoAddr.size);
}
template
-typename CacheAllocator::ItemHandle
+typename CacheAllocator::WriteHandle
CacheAllocator::allocate(PoolId poolId,
typename Item::Key key,
uint32_t size,
@@ -284,12 +396,30 @@ CacheAllocator::allocate(PoolId poolId,
}
template
-typename CacheAllocator::ItemHandle
-CacheAllocator::allocateInternal(PoolId pid,
- typename Item::Key key,
- uint32_t size,
- uint32_t creationTime,
- uint32_t expiryTime) {
+bool CacheAllocator::shouldWakeupBgEvictor(TierId tid, PoolId pid, ClassId cid) {
+ // TODO: should we also work on lower tiers? should we have separate set of params?
+ if (tid == 1) return false;
+ return getAllocationClassStats(tid, pid, cid).approxFreePercent <= config_.lowEvictionAcWatermark;
+}
+
+template
+size_t CacheAllocator::backgroundWorkerId(TierId tid, PoolId pid, ClassId cid, size_t numWorkers) {
+ XDCHECK(numWorkers);
+
+ // TODO: came up with some better sharding (use some hashing)
+ return (tid + pid + cid) % numWorkers;
+}
+
+
+template
+typename CacheAllocator::WriteHandle
+CacheAllocator::allocateInternalTier(TierId tid,
+ PoolId pid,
+ typename Item::Key key,
+ uint32_t size,
+ uint32_t creationTime,
+ uint32_t expiryTime,
+ bool fromBgThread) {
util::LatencyTracker tracker{stats().allocateLatency_};
SCOPE_FAIL { stats_.invalidAllocs.inc(); };
@@ -298,16 +428,24 @@ CacheAllocator::allocateInternal(PoolId pid,
const auto requiredSize = Item::getRequiredSize(key, size);
// the allocation class in our memory allocator.
- const auto cid = allocator_->getAllocationClassId(pid, requiredSize);
+ const auto cid = allocator_[tid]->getAllocationClassId(pid, requiredSize);
+ util::RollingLatencyTracker rollTracker{(*stats_.classAllocLatency)[tid][pid][cid]};
+ // TODO: per-tier
(*stats_.allocAttempts)[pid][cid].inc();
-
- void* memory = allocator_->allocate(pid, requiredSize);
- if (memory == nullptr && !config_.disableEviction) {
- memory = findEviction(pid, cid);
+
+ void* memory = allocator_[tid]->allocate(pid, requiredSize);
+
+ if (backgroundEvictor_.size() && !fromBgThread && (memory == nullptr || shouldWakeupBgEvictor(tid, pid, cid))) {
+ backgroundEvictor_[backgroundWorkerId(tid, pid, cid, backgroundEvictor_.size())]->wakeUp();
+ }
+ // TODO: Today isEvictionDisabled means do not evict from memory (DRAM).
+ // Should we support eviction between memory tiers (e.g. from DRAM to PMEM)?
+ if (memory == nullptr && !config_.isEvictionDisabled()) {
+ memory = findEviction(tid, pid, cid);
}
- ItemHandle handle;
+ WriteHandle handle;
if (memory != nullptr) {
// At this point, we have a valid memory allocation that is ready for use.
// Ensure that when we abort from here under any circumstances, we free up
@@ -315,7 +453,7 @@ CacheAllocator::allocateInternal(PoolId pid,
// for example.
SCOPE_FAIL {
// free back the memory to the allocator since we failed.
- allocator_->free(memory);
+ allocator_[tid]->free(memory);
};
handle = acquire(new (memory) Item(key, size, creationTime, expiryTime));
@@ -326,7 +464,7 @@ CacheAllocator::allocateInternal(PoolId pid,
}
} else { // failed to allocate memory.
- (*stats_.allocFailures)[pid][cid].inc();
+ (*stats_.allocFailures)[pid][cid].inc(); // TODO: per-tier
// wake up rebalancer
if (poolRebalancer_) {
poolRebalancer_->wakeUp();
@@ -343,6 +481,22 @@ CacheAllocator::allocateInternal(PoolId pid,
return handle;
}
+template
+typename CacheAllocator::WriteHandle
+CacheAllocator::allocateInternal(PoolId pid,
+ typename Item::Key key,
+ uint32_t size,
+ uint32_t creationTime,
+ uint32_t expiryTime,
+ bool fromBgThread) {
+ auto tid = 0; /* TODO: consult admission policy */
+ for(TierId tid = 0; tid < getNumTiers(); ++tid) {
+ auto handle = allocateInternalTier(tid, pid, key, size, creationTime, expiryTime, fromBgThread);
+ if (handle) return handle;
+ }
+ return {};
+}
+
template
typename CacheAllocator::WriteHandle
CacheAllocator::allocateChainedItem(const ReadHandle& parent,
@@ -373,21 +527,28 @@ CacheAllocator::allocateChainedItemInternal(
// number of bytes required for this item
const auto requiredSize = ChainedItem::getRequiredSize(size);
- const auto pid = allocator_->getAllocInfo(parent->getMemory()).poolId;
- const auto cid = allocator_->getAllocationClassId(pid, requiredSize);
+ // TODO: is this correct?
+ auto tid = getTierId(*parent);
+ const auto pid = allocator_[tid]->getAllocInfo(parent->getMemory()).poolId;
+ const auto cid = allocator_[tid]->getAllocationClassId(pid, requiredSize);
+
+ util::RollingLatencyTracker rollTracker{(*stats_.classAllocLatency)[tid][pid][cid]};
+
+ // TODO: per-tier? Right now stats_ are not used in any public periodic
+ // worker
(*stats_.allocAttempts)[pid][cid].inc();
- void* memory = allocator_->allocate(pid, requiredSize);
+ void* memory = allocator_[tid]->allocate(pid, requiredSize);
if (memory == nullptr) {
- memory = findEviction(pid, cid);
+ memory = findEviction(tid, pid, cid);
}
if (memory == nullptr) {
(*stats_.allocFailures)[pid][cid].inc();
- return ItemHandle{};
+ return WriteHandle{};
}
- SCOPE_FAIL { allocator_->free(memory); };
+ SCOPE_FAIL { allocator_[tid]->free(memory); };
auto child = acquire(
new (memory) ChainedItem(compressor_.compress(parent.getInternal()), size,
@@ -403,8 +564,8 @@ CacheAllocator::allocateChainedItemInternal(
}
template
-void CacheAllocator::addChainedItem(ItemHandle& parent,
- ItemHandle child) {
+void CacheAllocator::addChainedItem(WriteHandle& parent,
+ WriteHandle child) {
if (!parent || !child || !child->isChainedItem()) {
throw std::invalid_argument(
folly::sformat("Invalid parent or child. parent: {}, child: {}",
@@ -448,14 +609,14 @@ void CacheAllocator::addChainedItem(ItemHandle& parent,
}
template
-typename CacheAllocator::ItemHandle
-CacheAllocator::popChainedItem(ItemHandle& parent) {
+typename CacheAllocator::WriteHandle
+CacheAllocator::popChainedItem(WriteHandle& parent) {
if (!parent || !parent->hasChainedItem()) {
throw std::invalid_argument(folly::sformat(
"Invalid parent {}", parent ? parent->toString() : nullptr));
}
- ItemHandle head;
+ WriteHandle head;
{ // scope of chained item lock.
auto l = chainedItemLocks_.lockExclusive(parent->getKey());
@@ -502,8 +663,8 @@ CacheAllocator::getParentKey(const Item& chainedItem) {
}
template
-void CacheAllocator::transferChainLocked(ItemHandle& parent,
- ItemHandle& newParent) {
+void CacheAllocator::transferChainLocked(WriteHandle& parent,
+ WriteHandle& newParent) {
// parent must be in a state to not have concurrent readers. Eviction code
// paths rely on holding the last item handle. Since we hold on to an item
// handle here, the chain will not be touched by any eviction code path.
@@ -545,7 +706,7 @@ void CacheAllocator::transferChainLocked(ItemHandle& parent,
template
void CacheAllocator::transferChainAndReplace(
- ItemHandle& parent, ItemHandle& newParent) {
+ WriteHandle& parent, WriteHandle& newParent) {
if (!parent || !newParent) {
throw std::invalid_argument("invalid parent or new parent");
}
@@ -591,9 +752,9 @@ bool CacheAllocator::replaceIfAccessible(Item& oldItem,
}
template
-typename CacheAllocator::ItemHandle
+typename CacheAllocator::WriteHandle
CacheAllocator::replaceChainedItem(Item& oldItem,
- ItemHandle newItemHandle,
+ WriteHandle newItemHandle,
Item& parent) {
if (!newItemHandle) {
throw std::invalid_argument("Empty handle for newItem");
@@ -618,9 +779,9 @@ CacheAllocator::replaceChainedItem(Item& oldItem,
}
template
-typename CacheAllocator::ItemHandle
+typename CacheAllocator::WriteHandle
CacheAllocator::replaceChainedItemLocked(Item& oldItem,
- ItemHandle newItemHdl,
+ WriteHandle newItemHdl,
const Item& parent) {
XDCHECK(newItemHdl != nullptr);
XDCHECK_GE(1u, oldItem.getRefCount());
@@ -696,8 +857,8 @@ CacheAllocator::releaseBackToAllocator(Item& it,
throw std::runtime_error(
folly::sformat("cannot release this item: {}", it.toString()));
}
-
- const auto allocInfo = allocator_->getAllocInfo(it.getMemory());
+ const auto tid = getTierId(it);
+ const auto allocInfo = allocator_[tid]->getAllocInfo(it.getMemory());
if (ctx == RemoveContext::kEviction) {
const auto timeNow = util::getCurrentTimeSec();
@@ -721,8 +882,7 @@ CacheAllocator::releaseBackToAllocator(Item& it,
folly::sformat("Can not recycle a chained item {}, toRecyle",
it.toString(), toRecycle->toString()));
}
-
- allocator_->free(&it);
+ allocator_[tid]->free(&it);
return ReleaseRes::kReleased;
}
@@ -781,7 +941,7 @@ CacheAllocator::releaseBackToAllocator(Item& it,
auto next = head->getNext(compressor_);
const auto childInfo =
- allocator_->getAllocInfo(static_cast(head));
+ allocator_[tid]->getAllocInfo(static_cast(head));
(*stats_.fragmentationSize)[childInfo.poolId][childInfo.classId].sub(
util::getFragmentation(*this, *head));
@@ -814,7 +974,7 @@ CacheAllocator::releaseBackToAllocator(Item& it,
XDCHECK(ReleaseRes::kReleased != res);
res = ReleaseRes::kRecycled;
} else {
- allocator_->free(head);
+ allocator_[tid]->free(head);
}
}
@@ -829,7 +989,7 @@ CacheAllocator::releaseBackToAllocator(Item& it,
res = ReleaseRes::kRecycled;
} else {
XDCHECK(it.isDrained());
- allocator_->free(&it);
+ allocator_[tid]->free(&it);
}
return res;
@@ -850,16 +1010,16 @@ RefcountWithFlags::Value CacheAllocator::decRef(Item& it) {
}
template
-typename CacheAllocator::ItemHandle
+typename CacheAllocator::WriteHandle
CacheAllocator::acquire(Item* it) {
if (UNLIKELY(!it)) {
- return ItemHandle{};
+ return WriteHandle{};
}
SCOPE_FAIL { stats_.numRefcountOverflow.inc(); };
incRef(*it);
- return ItemHandle{it, *this};
+ return WriteHandle{it, *this};
}
template
@@ -901,6 +1061,25 @@ bool CacheAllocator::replaceInMMContainer(Item& oldItem,
}
}
+template
+bool CacheAllocator::replaceInMMContainer(Item* oldItem,
+ Item& newItem) {
+ return replaceInMMContainer(*oldItem, newItem);
+}
+
+template
+bool CacheAllocator::replaceInMMContainer(EvictionIterator& oldItemIt,
+ Item& newItem) {
+ auto& oldContainer = getMMContainer(*oldItemIt);
+ auto& newContainer = getMMContainer(newItem);
+
+ // This function is used for eviction across tiers
+ XDCHECK(&oldContainer != &newContainer);
+ oldContainer.remove(oldItemIt);
+
+ return newContainer.add(newItem);
+}
+
template
bool CacheAllocator::replaceChainedItemInMMContainer(
Item& oldItem, Item& newItem) {
@@ -943,12 +1122,12 @@ void CacheAllocator::insertInMMContainer(Item& item) {
*/
template
-bool CacheAllocator::insert(const ItemHandle& handle) {
+bool CacheAllocator::insert(const WriteHandle& handle) {
return insertImpl(handle, AllocatorApiEvent::INSERT);
}
template
-bool CacheAllocator::insertImpl(const ItemHandle& handle,
+bool CacheAllocator::insertImpl(const WriteHandle& handle,
AllocatorApiEvent event) {
XDCHECK(handle);
XDCHECK(event == AllocatorApiEvent::INSERT ||
@@ -984,17 +1163,19 @@ bool CacheAllocator::insertImpl(const ItemHandle& handle,
}
template
-typename CacheAllocator::ItemHandle
-CacheAllocator::insertOrReplace(const ItemHandle& handle) {
+typename CacheAllocator::WriteHandle
+CacheAllocator::insertOrReplace(const WriteHandle& handle) {
XDCHECK(handle);
if (handle->isAccessible()) {
throw std::invalid_argument("Handle is already accessible");
}
+ HashedKey hk{handle->getKey()};
+
insertInMMContainer(*(handle.getInternal()));
- ItemHandle replaced;
+ WriteHandle replaced;
try {
- auto lock = nvmCache_ ? nvmCache_->getItemDestructorLock(handle->getKey())
+ auto lock = nvmCache_ ? nvmCache_->getItemDestructorLock(hk)
: std::unique_lock();
replaced = accessContainer_->insertOrReplace(*(handle.getInternal()));
@@ -1003,7 +1184,7 @@ CacheAllocator::insertOrReplace(const ItemHandle& handle) {
// item is to be replaced and the destructor will be executed
// upon memory released, mark it in nvm to avoid destructor
// executed from nvm
- nvmCache_->markNvmItemRemovedLocked(handle->getKey());
+ nvmCache_->markNvmItemRemovedLocked(hk);
}
} catch (const std::exception&) {
removeFromMMContainer(*(handle.getInternal()));
@@ -1026,8 +1207,7 @@ CacheAllocator::insertOrReplace(const ItemHandle& handle) {
// We can avoid nvm delete only if we have non nvm clean item in cache.
// In all other cases we must enqueue delete.
if (!replaced || replaced->isNvmClean()) {
- nvmCache_->remove(handle->getKey(),
- nvmCache_->createDeleteTombStone(handle->getKey()));
+ nvmCache_->remove(hk, nvmCache_->createDeleteTombStone(hk));
}
}
@@ -1045,9 +1225,160 @@ CacheAllocator::insertOrReplace(const ItemHandle& handle) {
return replaced;
}
+/* Next two methods are used to asynchronously move Item between memory tiers.
+ *
+ * The thread, which moves Item, allocates new Item in the tier we are moving to
+ * and calls moveRegularItemWithSync() method. This method does the following:
+ * 1. Create MoveCtx and put it to the movesMap.
+ * 2. Update the access container with the new item from the tier we are
+ * moving to. This Item has kIncomplete flag set.
+ * 3. Copy data from the old Item to the new one.
+ * 4. Unset the kIncomplete flag and Notify MoveCtx
+ *
+ * Concurrent threads which are getting handle to the same key:
+ * 1. When a handle is created it checks if the kIncomplete flag is set
+ * 2. If so, Handle implementation creates waitContext and adds it to the
+ * MoveCtx by calling addWaitContextForMovingItem() method.
+ * 3. Wait until the moving thread will complete its job.
+ */
+template
+bool CacheAllocator::addWaitContextForMovingItem(
+ folly::StringPiece key, std::shared_ptr> waiter) {
+ auto shard = getShardForKey(key);
+ auto& movesMap = getMoveMapForShard(shard);
+ auto lock = getMoveLockForShard(shard);
+ auto it = movesMap.find(key);
+ if (it == movesMap.end()) {
+ return false;
+ }
+ auto ctx = it->second.get();
+ ctx->addWaiter(std::move(waiter));
+ return true;
+}
+
+template
+template
+typename CacheAllocator::WriteHandle
+CacheAllocator::moveRegularItemWithSync(
+ Item& oldItem, WriteHandle& newItemHdl, P&& predicate) {
+ XDCHECK(oldItem.isMoving());
+ // TODO: should we introduce new latency tracker. E.g. evictRegularLatency_
+ // ??? util::LatencyTracker tracker{stats_.evictRegularLatency_};
+
+ if (!oldItem.isAccessible() || oldItem.isExpired()) {
+ return {};
+ }
+
+ XDCHECK_EQ(newItemHdl->getSize(), oldItem.getSize());
+ XDCHECK_NE(getTierId(oldItem), getTierId(*newItemHdl));
+
+ // take care of the flags before we expose the item to be accessed. this
+ // will ensure that when another thread removes the item from RAM, we issue
+ // a delete accordingly. See D7859775 for an example
+ if (oldItem.isNvmClean()) {
+ newItemHdl->markNvmClean();
+ }
+
+ folly::StringPiece key(oldItem.getKey());
+ auto shard = getShardForKey(key);
+ auto& movesMap = getMoveMapForShard(shard);
+ MoveCtx* ctx(nullptr);
+ {
+ auto lock = getMoveLockForShard(shard);
+ auto res = movesMap.try_emplace(key, std::make_unique());
+ if (!res.second) {
+ return {};
+ }
+ ctx = res.first->second.get();
+ }
+
+ auto resHdl = WriteHandle{};
+ auto guard = folly::makeGuard([key, this, ctx, shard, &resHdl]() {
+ auto& movesMap = getMoveMapForShard(shard);
+ if (resHdl)
+ resHdl->unmarkIncomplete();
+ auto lock = getMoveLockForShard(shard);
+ ctx->setItemHandle(std::move(resHdl));
+ movesMap.erase(key);
+ });
+
+ // TODO: Possibly we can use markMoving() instead. But today
+ // moveOnSlabRelease logic assume that we mark as moving old Item
+ // and than do copy and replace old Item with the new one in access
+ // container. Furthermore, Item can be marked as Moving only
+ // if it is linked to MM container. In our case we mark the new Item
+ // and update access container before the new Item is ready (content is
+ // copied).
+ newItemHdl->markIncomplete();
+
+ // Inside the access container's lock, this checks if the old item is
+ // accessible and its refcount is zero. If the item is not accessible,
+ // there is no point to replace it since it had already been removed
+ // or in the process of being removed. If the item is in cache but the
+ // refcount is non-zero, it means user could be attempting to remove
+ // this item through an API such as remove(ItemHandle). In this case,
+ // it is unsafe to replace the old item with a new one, so we should
+ // also abort.
+ if (!accessContainer_->replaceIf(oldItem, *newItemHdl,
+ predicate)) {
+ return {};
+ }
+
+ if (config_.moveCb) {
+ // Execute the move callback. We cannot make any guarantees about the
+ // consistency of the old item beyond this point, because the callback can
+ // do more than a simple memcpy() e.g. update external references. If there
+ // are any remaining handles to the old item, it is the caller's
+ // responsibility to invalidate them. The move can only fail after this
+ // statement if the old item has been removed or replaced, in which case it
+ // should be fine for it to be left in an inconsistent state.
+ config_.moveCb(oldItem, *newItemHdl, nullptr);
+ } else {
+ std::memcpy(newItemHdl->getMemory(), oldItem.getMemory(),
+ oldItem.getSize());
+ }
+
+ // Inside the MM container's lock, this checks if the old item exists to
+ // make sure that no other thread removed it, and only then replaces it.
+ if (!replaceInMMContainer(oldItem, *newItemHdl)) {
+ accessContainer_->remove(*newItemHdl);
+ return {};
+ }
+
+ // Replacing into the MM container was successful, but someone could have
+ // called insertOrReplace() or remove() before or after the
+ // replaceInMMContainer() operation, which would invalidate newItemHdl.
+ if (!newItemHdl->isAccessible()) {
+ removeFromMMContainer(*newItemHdl);
+ return {};
+ }
+
+ // no one can add or remove chained items at this point
+ if (oldItem.hasChainedItem()) {
+ // safe to acquire handle for a moving Item
+ auto oldHandle = acquire(&oldItem);
+ XDCHECK_EQ(1u, oldHandle->getRefCount()) << oldHandle->toString();
+ XDCHECK(!newItemHdl->hasChainedItem()) << newItemHdl->toString();
+ try {
+ auto l = chainedItemLocks_.lockExclusive(oldItem.getKey());
+ transferChainLocked(oldHandle, newItemHdl);
+ } catch (const std::exception& e) {
+ // this should never happen because we drained all the handles.
+ XLOGF(DFATAL, "{}", e.what());
+ throw;
+ }
+
+ XDCHECK(!oldItem.hasChainedItem());
+ XDCHECK(newItemHdl->hasChainedItem());
+ }
+ newItemHdl.unmarkNascent();
+ resHdl = std::move(newItemHdl); // guard will assign it to ctx under lock
+ return acquire(&oldItem);
+}
+
template
bool CacheAllocator::moveRegularItem(Item& oldItem,
- ItemHandle& newItemHdl) {
+ WriteHandle& newItemHdl) {
XDCHECK(config_.moveCb);
util::LatencyTracker tracker{stats_.moveRegularLatency_};
@@ -1080,7 +1411,7 @@ bool CacheAllocator::moveRegularItem(Item& oldItem,
// there is no point to replace it since it had already been removed
// or in the process of being removed. If the item is in cache but the
// refcount is non-zero, it means user could be attempting to remove
- // this item through an API such as remove(ItemHandle). In this case,
+ // this item through an API such as remove(itemHandle). In this case,
// it is unsafe to replace the old item with a new one, so we should
// also abort.
if (!accessContainer_->replaceIf(oldItem, *newItemHdl, itemMovingPredicate)) {
@@ -1126,7 +1457,7 @@ bool CacheAllocator::moveRegularItem(Item& oldItem,
template
bool CacheAllocator::moveChainedItem(ChainedItem& oldItem,
- ItemHandle& newItemHdl) {
+ WriteHandle& newItemHdl) {
XDCHECK(config_.moveCb);
util::LatencyTracker tracker{stats_.moveChainedLatency_};
@@ -1167,7 +1498,7 @@ bool CacheAllocator::moveChainedItem(ChainedItem& oldItem,
return false;
}
- auto parentPtr = parentHandle.get();
+ auto parentPtr = parentHandle.getInternal();
XDCHECK_EQ(reinterpret_cast(parentPtr),
reinterpret_cast(&oldItem.getParentItem(compressor_)));
@@ -1187,41 +1518,78 @@ bool CacheAllocator::moveChainedItem(ChainedItem& oldItem,
template
typename CacheAllocator::Item*
-CacheAllocator::findEviction(PoolId pid, ClassId cid) {
- auto& mmContainer = getMMContainer(pid, cid);
+CacheAllocator::findEviction(TierId tid, PoolId pid, ClassId cid) {
+ auto& mmContainer = getMMContainer(tid, pid, cid);
// Keep searching for a candidate until we were able to evict it
// or until the search limit has been exhausted
unsigned int searchTries = 0;
- auto itr = mmContainer.getEvictionIterator();
while ((config_.evictionSearchTries == 0 ||
- config_.evictionSearchTries > searchTries) &&
- itr) {
+ config_.evictionSearchTries > searchTries)) {
++searchTries;
+ (*stats_.evictionAttempts)[pid][cid].inc();
+
+ Item* toRecycle = nullptr;
+ Item* candidate = nullptr;
+
+ mmContainer.withEvictionIterator([this, &candidate, &toRecycle, &searchTries](auto &&itr){
+ while ((config_.evictionSearchTries == 0 ||
+ config_.evictionSearchTries > searchTries) && itr) {
+ ++searchTries;
+
+ auto *toRecycle_ = itr.get();
+ auto *candidate_ = toRecycle_->isChainedItem()
+ ? &toRecycle_->asChainedItem().getParentItem(compressor_)
+ : toRecycle_;
+
+ // make sure no other thead is evicting the item
+ if (candidate_->getRefCount() == 0 && candidate_->markMoving()) {
+ toRecycle = toRecycle_;
+ candidate = candidate_;
+ return;
+ }
+
+ ++itr;
+ }
+ });
+
+ if (!toRecycle)
+ continue;
+
+ XDCHECK(toRecycle);
+ XDCHECK(candidate);
- Item* candidate = itr.get();
// for chained items, the ownership of the parent can change. We try to
// evict what we think as parent and see if the eviction of parent
// recycles the child we intend to.
auto toReleaseHandle =
- itr->isChainedItem()
- ? advanceIteratorAndTryEvictChainedItem(itr)
- : advanceIteratorAndTryEvictRegularItem(mmContainer, itr);
+ evictNormalItem(*candidate, true /* skipIfTokenInvalid */);
+ auto ref = candidate->unmarkMoving();
- if (toReleaseHandle) {
- if (toReleaseHandle->hasChainedItem()) {
+ if (toReleaseHandle || ref == 0u) {
+ if (candidate->hasChainedItem()) {
(*stats_.chainedItemEvictions)[pid][cid].inc();
} else {
(*stats_.regularItemEvictions)[pid][cid].inc();
}
+ } else {
+ if (candidate->hasChainedItem()) {
+ stats_.evictFailParentAC.inc();
+ } else {
+ stats_.evictFailAC.inc();
+ }
+ }
- // Invalidate iterator since later on we may use this mmContainer
- // again, which cannot be done unless we drop this iterator
- itr.destroy();
+ if (toReleaseHandle) {
+ if (auto eventTracker = getEventTracker()) {
+ eventTracker->record(
+ AllocatorApiEvent::DRAM_EVICT, toReleaseHandle->getKey(),
+ AllocatorApiResult::EVICTED, toReleaseHandle->getSize(),
+ toReleaseHandle->getConfiguredTTL().count());
+ }
- // we must be the last handle and for chained items, this will be
- // the parent.
- XDCHECK(toReleaseHandle.get() == candidate || candidate->isChainedItem());
+ XDCHECK(toReleaseHandle.get() == candidate);
+ XDCHECK(toRecycle == candidate || toRecycle->isChainedItem());
XDCHECK_EQ(1u, toReleaseHandle->getRefCount());
// We manually release the item here because we don't want to
@@ -1237,15 +1605,18 @@ CacheAllocator::findEviction(PoolId pid, ClassId cid) {
// recycle the candidate.
if (ReleaseRes::kRecycled ==
releaseBackToAllocator(itemToRelease, RemoveContext::kEviction,
- /* isNascent */ false, candidate)) {
- return candidate;
+ /* isNascent */ false, toRecycle)) {
+ return toRecycle;
+ }
+ } else if (ref == 0u) {
+ // it's safe to recycle the item here as there are no more
+ // references and the item could not been marked as moving
+ // by other thread since it's detached from MMContainer.
+ if (ReleaseRes::kRecycled ==
+ releaseBackToAllocator(*candidate, RemoveContext::kEviction,
+ /* isNascent */ false, toRecycle)) {
+ return toRecycle;
}
- }
-
- // If we destroyed the itr to possibly evict and failed, we restart
- // from the beginning again
- if (!itr) {
- itr.resetToBegin();
}
}
return nullptr;
@@ -1300,142 +1671,79 @@ bool CacheAllocator::shouldWriteToNvmCacheExclusive(
}
template
-typename CacheAllocator::ItemHandle
-CacheAllocator::advanceIteratorAndTryEvictRegularItem(
- MMContainer& mmContainer, EvictionIterator& itr) {
- // we should flush this to nvmcache if it is not already present in nvmcache
- // and the item is not expired.
- Item& item = *itr;
- const bool evictToNvmCache = shouldWriteToNvmCache(item);
-
- auto token = evictToNvmCache ? nvmCache_->createPutToken(item.getKey())
- : typename NvmCacheT::PutToken{};
- // record the in-flight eviciton. If not, we move on to next item to avoid
- // stalling eviction.
- if (evictToNvmCache && !token.isValid()) {
- ++itr;
- stats_.evictFailConcurrentFill.inc();
- return ItemHandle{};
- }
-
- // If there are other accessors, we should abort. Acquire a handle here since
- // if we remove the item from both access containers and mm containers
- // below, we will need a handle to ensure proper cleanup in case we end up
- // not evicting this item
- auto evictHandle = accessContainer_->removeIf(item, &itemEvictionPredicate);
+typename CacheAllocator::WriteHandle
+CacheAllocator::tryEvictToNextMemoryTier(
+ TierId tid, PoolId pid, Item& item, bool fromBgThread) {
+ if(item.isChainedItem()) return {}; // TODO: We do not support ChainedItem yet
+ if(item.isExpired()) return acquire(&item);
+
+ TierId nextTier = tid; // TODO - calculate this based on some admission policy
+ while (++nextTier < getNumTiers()) { // try to evict down to the next memory tiers
+ // allocateInternal might trigger another eviction
+ auto newItemHdl = allocateInternalTier(nextTier, pid,
+ item.getKey(),
+ item.getSize(),
+ item.getCreationTime(),
+ item.getExpiryTime(),
+ fromBgThread);
- if (!evictHandle) {
- ++itr;
- stats_.evictFailAC.inc();
- return evictHandle;
+ if (newItemHdl) {
+ XDCHECK_EQ(newItemHdl->getSize(), item.getSize());
+ return moveRegularItemWithSync(item, newItemHdl, itemMovingPredicate);
+ }
}
- mmContainer.remove(itr);
- XDCHECK_EQ(reinterpret_cast(evictHandle.get()),
- reinterpret_cast