Initial commit - Tutus Bolt database

2025-12-21 05:51:30 -05:00 · 2025-12-21 05:51:30 -05:00 · 721b9ee0a8
commit 721b9ee0a8
130 changed files with 25545 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -0,0 +1,4 @@
+# ensure that line endings for Windows builds are properly formatted
+# see https://github.com/golangci/golangci-lint-action?tab=readme-ov-file#how-to-use
+# at "Multiple OS Example" section
+*.go text eol=lf
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@ -0,0 +1,11 @@
+version: 2
+updates:
+  - package-ecosystem: github-actions
+    directory: /
+    schedule:
+      interval: weekly
+
+  - package-ecosystem: gomod
+    directory: /
+    schedule:
+      interval: weekly
--- a/.github/workflows/benchmark-pr.yaml
+++ b/.github/workflows/benchmark-pr.yaml
@ -0,0 +1,9 @@
+---
+name: Benchmarks on PRs (AMD64)
+permissions: read-all
+on: [pull_request]
+jobs:
+  amd64:
+    uses: ./.github/workflows/benchmark-template.yaml
+    with:
+      benchGitRef: ${{ github.event.pull_request.base.sha }}
--- a/.github/workflows/benchmark-releases.yaml
+++ b/.github/workflows/benchmark-releases.yaml
@ -0,0 +1,13 @@
+---
+name: Nightly Benchmarks against last release (AMD64)
+permissions: read-all
+on:
+  schedule:
+    - cron: '10 5 * * *' # runs every day at 05:10 UTC
+  # workflow_dispatch enables manual testing of this job by maintainers
+  workflow_dispatch:
+jobs:
+  amd64:
+    uses: ./.github/workflows/benchmark-template.yaml
+    with:
+      benchGitRef: release-1.3
--- a/.github/workflows/benchmark-template.yaml
+++ b/.github/workflows/benchmark-template.yaml
@ -0,0 +1,57 @@
+---
+name: Reusable Benchmark Template
+on:
+  workflow_call:
+    inputs:
+      # which git reference to benchmark against
+      benchGitRef:
+        required: true
+        type: string
+      maxAcceptableDifferencePercent:
+        required: false
+        type: number
+        default: 5
+      runs-on:
+        required: false
+        type: string
+        default: "['ubuntu-latest']"
+permissions: read-all
+
+jobs:
+  benchmark:
+    runs-on: ${{ fromJson(inputs.runs-on) }}
+    steps:
+    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      with:
+        fetch-depth: 0
+    - id: goversion
+      run: echo "goversion=$(cat .go-version)" >> "$GITHUB_OUTPUT"
+    - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0
+      with:
+        go-version: ${{ steps.goversion.outputs.goversion }}
+    - name: Run Benchmarks
+      run: |
+        BENCHSTAT_OUTPUT_FILE=result.txt make test-benchmark-compare REF=${{ inputs.benchGitRef }}
+    - run: |
+        echo "\`\`\`" >> "$GITHUB_STEP_SUMMARY"
+        cat result.txt >> "$GITHUB_STEP_SUMMARY"
+        echo "\`\`\`" >> "$GITHUB_STEP_SUMMARY"
+        cat <<EOL >> "$GITHUB_STEP_SUMMARY"
+        <hr />
+        The table shows the median and 90% confidence interval (CI) summaries for each benchmark comparing the HEAD and the BASE, and an A/B comparison under "vs base". The last column shows the statistical p-value with ten runs (n=10).
+        The last row has the Geometric Mean (geomean) for the given rows in the table.
+        Refer to [benchstat's documentation](https://pkg.go.dev/golang.org/x/perf/cmd/benchstat) for more help.
+        EOL
+    - name: Validate results under acceptable limit
+      run: |
+        export MAX_ACCEPTABLE_DIFFERENCE=${{ inputs.maxAcceptableDifferencePercent }}
+        while IFS= read -r line; do
+          # Get fourth value, which is the comparison with the base.
+          value="$(echo "$line" | awk '{print $4}')"
+          if [[ "$value" = +* ]] || [[ "$value" = -* ]]; then
+            if (( $(echo "${value//[^0-9.]/}"'>'"$MAX_ACCEPTABLE_DIFFERENCE" | bc -l) )); then
+              echo "::error::$value is above the maximum acceptable difference ($MAX_ACCEPTABLE_DIFFERENCE)"
+              exit 1
+            fi
+          fi
+        done < <(grep geomean result.txt)
--- a/.github/workflows/failpoint_test.yaml
+++ b/.github/workflows/failpoint_test.yaml
@ -0,0 +1,20 @@
+---
+name: Failpoint test
+on: [push, pull_request]
+permissions: read-all
+jobs:
+  test:
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - id: goversion
+        run: echo "goversion=$(cat .go-version)" >> "$GITHUB_OUTPUT"
+      - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0
+        with:
+          go-version: ${{ steps.goversion.outputs.goversion }}
+      - run: |
+          make gofail-enable
+          make test-failpoint
--- a/.github/workflows/gh-workflow-approve.yaml
+++ b/.github/workflows/gh-workflow-approve.yaml
@ -0,0 +1,42 @@
+---
+name: Approve GitHub Workflows
+permissions: read-all
+on:
+  pull_request_target:
+    types:
+      - labeled
+      - synchronize
+    branches:
+      - main
+      - release-1.3
+
+jobs:
+  approve:
+    name: Approve ok-to-test
+    if: contains(github.event.pull_request.labels.*.name, 'ok-to-test')
+    runs-on: ubuntu-latest
+    permissions:
+      actions: write
+    steps:
+      - name: Update PR
+        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+        continue-on-error: true
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          debug: ${{ secrets.ACTIONS_RUNNER_DEBUG == 'true' }}
+          script: |
+            const result = await github.rest.actions.listWorkflowRunsForRepo({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              event: "pull_request",
+              status: "action_required",
+              head_sha: context.payload.pull_request.head.sha,
+              per_page: 100
+            });
+            for (var run of result.data.workflow_runs) {
+              await github.rest.actions.approveWorkflowRun({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                run_id: run.id
+              });
+            }
--- a/.github/workflows/robustness_nightly.yaml
+++ b/.github/workflows/robustness_nightly.yaml
@ -0,0 +1,17 @@
+---
+name: Robustness Nightly
+permissions: read-all
+on:
+  schedule:
+    - cron: '25 9 * * *' # runs every day at 09:25 UTC
+  # workflow_dispatch enables manual testing of this job by maintainers
+  workflow_dispatch:
+
+jobs:
+  amd64:
+    # GHA has a maximum amount of 6h execution time, we try to get done within 3h
+    uses: ./.github/workflows/robustness_template.yaml
+    with:
+      count: 100
+      testTimeout: 200m
+      runs-on: "['ubuntu-latest']"
--- a/.github/workflows/robustness_template.yaml
+++ b/.github/workflows/robustness_template.yaml
@ -0,0 +1,50 @@
+---
+name: Reusable Robustness Workflow
+on:
+  workflow_call:
+    inputs:
+      count:
+        required: true
+        type: number
+      testTimeout:
+        required: false
+        type: string
+        default: '30m'
+      runs-on:
+        required: false
+        type: string
+        default: "['ubuntu-latest']"
+permissions: read-all
+
+jobs:
+  test:
+    # this is to prevent the job to run at forked projects
+    if: github.repository == 'etcd-io/bbolt'
+    timeout-minutes: 210
+    runs-on: ${{ fromJson(inputs.runs-on) }}
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - id: goversion
+        run: echo "goversion=$(cat .go-version)" >> "$GITHUB_OUTPUT"
+      - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0
+        with:
+          go-version: ${{ steps.goversion.outputs.goversion }}
+      - name: test-robustness
+        run: |
+          set -euo pipefail
+          sudo apt-get install -y dmsetup xfsprogs
+
+          ROBUSTNESS_TESTFLAGS="--count ${{ inputs.count }} --timeout ${{ inputs.testTimeout }} -failfast" make test-robustness
+
+      - name: Host Status
+        if: always()
+        run: |
+          set -x
+          mount
+          df
+          losetup -l
+      - name: Kernel Message
+        if: failure()
+        run: |
+          sudo lsmod
+          sudo dmesg -T -f kern
--- a/.github/workflows/robustness_test.yaml
+++ b/.github/workflows/robustness_test.yaml
@ -0,0 +1,16 @@
+name: Robustness Test
+on: [push, pull_request]
+permissions: read-all
+jobs:
+  amd64:
+    uses: ./.github/workflows/robustness_template.yaml
+    with:
+      count: 10
+      testTimeout: 30m
+      runs-on: "['ubuntu-latest']"
+  arm64:
+    uses: ./.github/workflows/robustness_template.yaml
+    with:
+      count: 10
+      testTimeout: 30m
+      runs-on: "['ubuntu-24.04-arm']"
--- a/.github/workflows/stale.yaml
+++ b/.github/workflows/stale.yaml
@ -0,0 +1,19 @@
+name: 'Close stale issues and PRs'
+on:
+  schedule:
+    - cron: '0 0 * * *'  # every day at 00:00 UTC
+
+permissions:
+  issues: write
+  pull-requests: write
+
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0
+        with:
+          days-before-stale: 90
+          days-before-close: 21
+          stale-issue-label: stale
+          stale-pr-label: stale
--- a/.github/workflows/tests-template.yml
+++ b/.github/workflows/tests-template.yml
@ -0,0 +1,55 @@
+---
+name: Reusable unit test Workflow
+on:
+  workflow_call:
+    inputs:
+      runs-on:
+        required: false
+        type: string
+        default: ubuntu-latest
+      targets:
+        required: false
+        type: string
+        default: "['linux-unit-test-1-cpu','linux-unit-test-2-cpu','linux-unit-test-4-cpu']"
+permissions: read-all
+
+jobs:
+  test-linux:
+    strategy:
+      fail-fast: false
+      matrix:
+        target: ${{ fromJSON(inputs.targets) }}
+    runs-on: ${{ inputs.runs-on }}
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - id: goversion
+        run: echo "goversion=$(cat .go-version)" >> "$GITHUB_OUTPUT"
+      - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0
+        with:
+          go-version: ${{ steps.goversion.outputs.goversion }}
+      - run: make fmt
+      - env:
+          TARGET: ${{ matrix.target }}
+        run: |
+          case "${TARGET}" in
+            linux-unit-test-1-cpu)
+              CPU=1 make test
+              ;;
+            linux-unit-test-2-cpu)
+              CPU=2 make test
+              ;;
+            linux-unit-test-4-cpu)
+              CPU=4 make test
+              ;;
+            linux-unit-test-4-cpu-race)
+              CPU=4 ENABLE_RACE=true make test
+              ;;
+            *)
+              echo "Failed to find target"
+              exit 1
+              ;;
+          esac
+      - name: golangci-lint
+        uses: golangci/golangci-lint-action@4afd733a84b1f43292c63897423277bb7f4313a9 # v8.0.0
+        with:
+          version: v2.1.6
--- a/.github/workflows/tests_amd64.yaml
+++ b/.github/workflows/tests_amd64.yaml
@ -0,0 +1,26 @@
+---
+name: Tests AMD64
+permissions: read-all
+on: [push, pull_request]
+jobs:
+  test-linux-amd64:
+    uses: ./.github/workflows/tests-template.yml
+  test-linux-amd64-race:
+    uses: ./.github/workflows/tests-template.yml
+    with:
+      runs-on: ubuntu-latest
+      targets: "['linux-unit-test-4-cpu-race']"
+
+  coverage:
+    needs:
+      - test-linux-amd64
+      - test-linux-amd64-race
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - id: goversion
+        run: echo "goversion=$(cat .go-version)" >> "$GITHUB_OUTPUT"
+      - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0
+        with:
+          go-version: ${{ steps.goversion.outputs.goversion }}
+      - run: make coverage
--- a/.github/workflows/tests_arm64.yaml
+++ b/.github/workflows/tests_arm64.yaml
@ -0,0 +1,26 @@
+---
+name: Tests ARM64
+permissions: read-all
+on: [push, pull_request]
+jobs:
+  test-linux-arm64:
+    uses: ./.github/workflows/tests-template.yml
+  test-linux-arm64-race:
+    uses: ./.github/workflows/tests-template.yml
+    with:
+      runs-on: ubuntu-24.04-arm
+      targets: "['linux-unit-test-4-cpu-race']"
+
+  coverage:
+    needs:
+      - test-linux-arm64
+      - test-linux-arm64-race
+    runs-on: ubuntu-24.04-arm
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - id: goversion
+        run: echo "goversion=$(cat .go-version)" >> "$GITHUB_OUTPUT"
+      - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0
+        with:
+          go-version: ${{ steps.goversion.outputs.goversion }}
+      - run: make coverage
--- a/.github/workflows/tests_windows.yml
+++ b/.github/workflows/tests_windows.yml
@ -0,0 +1,57 @@
+---
+name: Tests
+on: [push, pull_request]
+permissions: read-all
+jobs:
+  test-windows:
+    strategy:
+      fail-fast: false
+      matrix:
+        target:
+          - windows-amd64-unit-test-4-cpu
+        # FIXME(fuweid):
+        #
+        # The windows will throws the following error when enable race.
+        # We skip it until we have solution.
+        #
+        #   ThreadSanitizer failed to allocate 0x000200000000 (8589934592) bytes at 0x0400c0000000 (error code: 1455)
+        #
+        # - windows-amd64-unit-test-4-cpu-race
+    runs-on: windows-latest
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - id: goversion
+        run: echo "goversion=$(cat .go-version)" >> "$GITHUB_OUTPUT"
+      - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0
+        with:
+          go-version: ${{ steps.goversion.outputs.goversion }}
+      - run: make fmt
+      - env:
+          TARGET: ${{ matrix.target }}
+        run: |
+          case "${TARGET}" in
+            windows-amd64-unit-test-4-cpu)
+              CPU=4 make test
+              ;;
+            *)
+              echo "Failed to find target"
+              exit 1
+              ;;
+          esac
+        shell: bash
+      - name: golangci-lint
+        uses: golangci/golangci-lint-action@4afd733a84b1f43292c63897423277bb7f4313a9 # v8.0.0
+        with:
+          version: v2.1.6
+
+  coverage:
+    needs: ["test-windows"]
+    runs-on: windows-latest
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - id: goversion
+        run: echo "goversion=$(cat .go-version)" >> "$GITHUB_OUTPUT"
+      - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0
+        with:
+          go-version: ${{ steps.goversion.outputs.goversion }}
+      - run: make coverage
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,12 @@
+*.prof
+*.test
+*.swp
+/bin/
+cover.out
+cover-*.out
+/.idea
+*.iml
+/bbolt
+/cmd/bbolt/bbolt
+.DS_Store
+
--- a/.go-version
+++ b/.go-version
@ -0,0 +1 @@
+1.24.3
--- a/.golangci.yaml
+++ b/.golangci.yaml
@ -0,0 +1,34 @@
+formatters:
+  enable:
+    - gofmt
+    - goimports
+  settings: # please keep this alphabetized
+    goimports:
+      local-prefixes:
+        - go.etcd.io # Put imports beginning with prefix after 3rd-party packages.
+issues:
+  max-same-issues: 0
+linters:
+  default: none
+  enable: # please keep this alphabetized
+    - errcheck
+    - govet
+    - ineffassign
+    - staticcheck
+    - unused
+  exclusions:
+    presets:
+      - comments
+      - common-false-positives
+      - legacy
+      - std-error-handling
+  settings: # please keep this alphabetized
+    staticcheck:
+      checks:
+        - all
+        - -QF1003 # Convert if/else-if chain to tagged switch
+        - -QF1010 # Convert slice of bytes to string when printing it
+        - -ST1003 # Poorly chosen identifier
+        - -ST1005 # Incorrectly formatted error string
+        - -ST1012 # Poorly chosen name for error variable
+version: "2"
--- a/CHANGELOG/CHANGELOG-1.3.md
+++ b/CHANGELOG/CHANGELOG-1.3.md
@ -0,0 +1,90 @@
+Note that we start to track changes starting from v1.3.7.
+
+<hr>
+
+## v1.3.11(2024-08-21)
+
+### BoltDB
+- Fix [the `freelist.allocs` isn't rollbacked when a tx is rollbacked](https://github.com/etcd-io/bbolt/pull/823).
+
+### CMD
+- Add [`-gobench-output` option for bench command to adapt to benchstat](https://github.com/etcd-io/bbolt/pull/802).
+
+### Other
+- [Bump go version to 1.22.x](https://github.com/etcd-io/bbolt/pull/822).
+- This patch also added `dmflakey` package, which can be reused by other projects. See https://github.com/etcd-io/bbolt/pull/812.
+
+<hr>
+
+## v1.3.10(2024-05-06)
+
+### BoltDB
+- [Remove deprecated `UnsafeSlice` and use `unsafe.Slice`](https://github.com/etcd-io/bbolt/pull/717)
+- [Stabilize the behaviour of Prev when the cursor already points to the first element](https://github.com/etcd-io/bbolt/pull/744)
+
+### Other
+- [Bump go version to 1.21.9](https://github.com/etcd-io/bbolt/pull/713)
+
+<hr>
+
+## v1.3.9(2024-02-24)
+
+### BoltDB
+- [Clone the key before operating data in bucket against the key](https://github.com/etcd-io/bbolt/pull/639)
+
+### CMD
+- [Fix `bbolt keys` and `bbolt get` to prevent them from panicking when no parameter provided](https://github.com/etcd-io/bbolt/pull/683)
+
+<hr>
+
+## v1.3.8(2023-10-26)
+
+### BoltDB
+- Fix [db.close() doesn't unlock the db file if db.munnmap() fails](https://github.com/etcd-io/bbolt/pull/439).
+- [Avoid syscall.Syscall use on OpenBSD](https://github.com/etcd-io/bbolt/pull/406).
+- Fix [rollback panicking after mlock failed or both meta pages corrupted](https://github.com/etcd-io/bbolt/pull/444).
+- Fix [bbolt panicking due to 64bit unaligned on arm32](https://github.com/etcd-io/bbolt/pull/584).
+
+### CMD
+- [Update the usage of surgery command](https://github.com/etcd-io/bbolt/pull/411).
+
+<hr>
+
+## v1.3.7(2023-01-31)
+
+### BoltDB
+- Add [recursive checker to confirm database consistency](https://github.com/etcd-io/bbolt/pull/225).
+- Add [support to get the page size from the second meta page if the first one is invalid](https://github.com/etcd-io/bbolt/pull/294).
+- Add [support for loong64 arch](https://github.com/etcd-io/bbolt/pull/303).
+- Add [internal iterator to Bucket that goes over buckets](https://github.com/etcd-io/bbolt/pull/356).
+- Add [validation on page read and write](https://github.com/etcd-io/bbolt/pull/358).
+- Add [PreLoadFreelist option to support loading free pages in readonly mode](https://github.com/etcd-io/bbolt/pull/381).
+- Add [(*Tx) CheckWithOption to support generating human-readable diagnostic messages](https://github.com/etcd-io/bbolt/pull/395).
+- Fix [Use `golang.org/x/sys/windows` for `FileLockEx`/`UnlockFileEx`](https://github.com/etcd-io/bbolt/pull/283).
+- Fix [readonly file mapping on windows](https://github.com/etcd-io/bbolt/pull/307).
+- Fix [the "Last" method might return no data due to not skipping the empty pages](https://github.com/etcd-io/bbolt/pull/341).
+- Fix [panic on db.meta when rollback](https://github.com/etcd-io/bbolt/pull/362).
+
+### CMD
+- Add [support for get keys in sub buckets in `bbolt get` command](https://github.com/etcd-io/bbolt/pull/295).
+- Add [support for `--format` flag for `bbolt keys` command](https://github.com/etcd-io/bbolt/pull/306).
+- Add [safeguards to bbolt CLI commands](https://github.com/etcd-io/bbolt/pull/354).
+- Add [`bbolt page` supports --all and --value-format=redacted formats](https://github.com/etcd-io/bbolt/pull/359).
+- Add [`bbolt surgery` commands](https://github.com/etcd-io/bbolt/issues/370).
+- Fix [open db file readonly mode for commands which shouldn't update the db file](https://github.com/etcd-io/bbolt/pull/365), see also [pull/292](https://github.com/etcd-io/bbolt/pull/292).
+
+### Other
+- [Build bbolt CLI tool, test and format the source code using golang 1.17.13](https://github.com/etcd-io/bbolt/pull/297).
+- [Bump golang.org/x/sys to v0.4.0](https://github.com/etcd-io/bbolt/pull/397).
+
+### Summary
+Release v1.3.7 contains following critical fixes:
+- fix to problem that `Last` method might return incorrect value ([#341](https://github.com/etcd-io/bbolt/pull/341))
+- fix of potential panic when performing transaction's rollback ([#362](https://github.com/etcd-io/bbolt/pull/362))
+
+Other changes focused on defense-in-depth ([#358](https://github.com/etcd-io/bbolt/pull/358), [#294](https://github.com/etcd-io/bbolt/pull/294), [#225](https://github.com/etcd-io/bbolt/pull/225), [#395](https://github.com/etcd-io/bbolt/pull/395))
+
+`bbolt` command line tool was expanded to:
+- allow fixing simple corruptions by `bbolt surgery` ([#370](https://github.com/etcd-io/bbolt/pull/370))
+- be flexible about output formatting ([#306](https://github.com/etcd-io/bbolt/pull/306), [#359](https://github.com/etcd-io/bbolt/pull/359))
+- allow accessing data in subbuckets ([#295](https://github.com/etcd-io/bbolt/pull/295))
--- a/CHANGELOG/CHANGELOG-1.4.md
+++ b/CHANGELOG/CHANGELOG-1.4.md
@ -0,0 +1,76 @@
+
+<hr>
+
+## v1.4.0(2025-02-05)
+There isn't any production code change since v1.4.0-beta.0. Only some dependencies
+are bumped, also updated some typos in comment and readme, and removed the legacy
+build tag `// +build` in https://github.com/etcd-io/bbolt/pull/879.
+
+<hr>
+
+## v1.4.0-beta.0(2024-11-04)
+
+### BoltDB
+- Reorganized the directory structure of freelist source code
+  - [Move array related freelist source code into a separate file](https://github.com/etcd-io/bbolt/pull/777)
+  - [Move method `freePages` into freelist.go](https://github.com/etcd-io/bbolt/pull/783)
+  - [Add an interface for freelist](https://github.com/etcd-io/bbolt/pull/775)
+- [Rollback alloc map when a transaction is rollbacked](https://github.com/etcd-io/bbolt/pull/819)
+- [No handling freelist as a special case when freeing a page](https://github.com/etcd-io/bbolt/pull/788)
+- [Ensure hashmap init method clears the data structures](https://github.com/etcd-io/bbolt/pull/794)
+- [Panicking when a write transaction tries to free a page allocated by itself](https://github.com/etcd-io/bbolt/pull/792)
+
+### CMD
+- [Add `-gobench-output` flag for `bbolt bench` command](https://github.com/etcd-io/bbolt/pull/765)
+
+### Other
+- [Bump go version to 1.23.x](https://github.com/etcd-io/bbolt/pull/821)
+
+<hr>
+
+## v1.4.0-alpha.1(2024-05-06)
+
+### BoltDB
+- [Enhance check functionality to support checking starting from a pageId](https://github.com/etcd-io/bbolt/pull/659)
+- [Optimize the logger performance for frequent called methods](https://github.com/etcd-io/bbolt/pull/741)
+- [Stabilize the behaviour of Prev when the cursor already points to the first element](https://github.com/etcd-io/bbolt/pull/734)
+
+### CMD
+- [Fix `bbolt keys` and `bbolt get` to prevent them from panicking when no parameter provided](https://github.com/etcd-io/bbolt/pull/682)
+- [Fix surgery freelist command in info logs](https://github.com/etcd-io/bbolt/pull/700)
+- [Remove txid references in surgery meta command's comment and description](https://github.com/etcd-io/bbolt/pull/703)
+- [Add rnd read capabilities to bbolt bench](https://github.com/etcd-io/bbolt/pull/711)
+- [Use `cobra.ExactArgs` to simplify the argument number check](https://github.com/etcd-io/bbolt/pull/728)
+- [Migrate `bbolt check` command to cobra style](https://github.com/etcd-io/bbolt/pull/723)
+- [Simplify the naming of cobra commands](https://github.com/etcd-io/bbolt/pull/732)
+- [Aggregate adding completed ops for read test of the `bbolt bench` command](https://github.com/etcd-io/bbolt/pull/721)
+- [Add `--from-page` flag to `bbolt check` command](https://github.com/etcd-io/bbolt/pull/737)
+
+### Document
+- [Add document for a known issue on the writing a value with a length of 0](https://github.com/etcd-io/bbolt/pull/730)
+
+### Test
+- [Enhance robustness test to cover XFS](https://github.com/etcd-io/bbolt/pull/707)
+
+### Other
+- [Bump go toolchain version to 1.22.2](https://github.com/etcd-io/bbolt/pull/712)
+
+<hr>
+
+## v1.4.0-alpha.0(2024-01-12)
+
+### BoltDB
+- [Improve the performance of hashmapGetFreePageIDs](https://github.com/etcd-io/bbolt/pull/419)
+- [Improve CreateBucketIfNotExists to avoid double searching the same key](https://github.com/etcd-io/bbolt/pull/532)
+- [Support Android platform](https://github.com/etcd-io/bbolt/pull/571)
+- [Record the count of free page to improve the performance of hashmapFreeCount](https://github.com/etcd-io/bbolt/pull/585)
+- [Add logger to bbolt](https://github.com/etcd-io/bbolt/issues/509)
+- [Support moving bucket inside the same db](https://github.com/etcd-io/bbolt/pull/635)
+- [Support inspecting database structure](https://github.com/etcd-io/bbolt/pull/674)
+
+### CMD
+- [Add `surgery clear-page-elements` command](https://github.com/etcd-io/bbolt/pull/417)
+- [Add `surgery abandon-freelist` command](https://github.com/etcd-io/bbolt/pull/443)
+- [Add `bbolt version` command](https://github.com/etcd-io/bbolt/pull/552)
+- [Add `bbolt inspect` command](https://github.com/etcd-io/bbolt/pull/674)
+- [Add `--no-sync` option to `bbolt compact` command](https://github.com/etcd-io/bbolt/pull/290)
--- a/20
+++ b/20
@ -0,0 +1,20 @@
+The MIT License (MIT)
+
+Copyright (c) 2013 Ben Johnson
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/108
+++ b/108
@ -0,0 +1,108 @@
+BRANCH=`git rev-parse --abbrev-ref HEAD`
+COMMIT=`git rev-parse --short HEAD`
+GOLDFLAGS="-X main.branch $(BRANCH) -X main.commit $(COMMIT)"
+GOFILES = $(shell find . -name \*.go)
+
+TESTFLAGS_RACE=-race=false
+ifdef ENABLE_RACE
+	TESTFLAGS_RACE=-race=true
+endif
+
+TESTFLAGS_CPU=
+ifdef CPU
+	TESTFLAGS_CPU=-cpu=$(CPU)
+endif
+TESTFLAGS = $(TESTFLAGS_RACE) $(TESTFLAGS_CPU) $(EXTRA_TESTFLAGS)
+
+TESTFLAGS_TIMEOUT=30m
+ifdef TIMEOUT
+	TESTFLAGS_TIMEOUT=$(TIMEOUT)
+endif
+
+TESTFLAGS_ENABLE_STRICT_MODE=false
+ifdef ENABLE_STRICT_MODE
+	TESTFLAGS_ENABLE_STRICT_MODE=$(ENABLE_STRICT_MODE)
+endif
+
+.EXPORT_ALL_VARIABLES:
+TEST_ENABLE_STRICT_MODE=${TESTFLAGS_ENABLE_STRICT_MODE}
+
+.PHONY: fmt
+fmt:
+	@echo "Verifying gofmt, failures can be fixed with ./scripts/fix.sh"
+	@!(gofmt -l -s -d ${GOFILES} | grep '[a-z]')
+
+	@echo "Verifying goimports, failures can be fixed with ./scripts/fix.sh"
+	@!(go run golang.org/x/tools/cmd/goimports@latest -l -d ${GOFILES} | grep '[a-z]')
+
+.PHONY: lint
+lint:
+	golangci-lint run ./...
+
+.PHONY: test
+test:
+	@echo "hashmap freelist test"
+	BBOLT_VERIFY=all TEST_FREELIST_TYPE=hashmap go test -v ${TESTFLAGS} -timeout ${TESTFLAGS_TIMEOUT}
+	BBOLT_VERIFY=all TEST_FREELIST_TYPE=hashmap go test -v ${TESTFLAGS} ./internal/...
+	BBOLT_VERIFY=all TEST_FREELIST_TYPE=hashmap go test -v ${TESTFLAGS} ./cmd/bbolt
+
+	@echo "array freelist test"
+	BBOLT_VERIFY=all TEST_FREELIST_TYPE=array go test -v ${TESTFLAGS} -timeout ${TESTFLAGS_TIMEOUT}
+	BBOLT_VERIFY=all TEST_FREELIST_TYPE=array go test -v ${TESTFLAGS} ./internal/...
+	BBOLT_VERIFY=all TEST_FREELIST_TYPE=array go test -v ${TESTFLAGS} ./cmd/bbolt
+
+.PHONY: coverage
+coverage:
+	@echo "hashmap freelist test"
+	TEST_FREELIST_TYPE=hashmap go test -v -timeout ${TESTFLAGS_TIMEOUT} \
+		-coverprofile cover-freelist-hashmap.out -covermode atomic
+
+	@echo "array freelist test"
+	TEST_FREELIST_TYPE=array go test -v -timeout ${TESTFLAGS_TIMEOUT} \
+		-coverprofile cover-freelist-array.out -covermode atomic
+
+BOLT_CMD=bbolt
+
+build:
+	go build -o bin/${BOLT_CMD} ./cmd/${BOLT_CMD}
+
+.PHONY: clean
+clean: # Clean binaries
+	rm -f ./bin/${BOLT_CMD}
+
+.PHONY: gofail-enable
+gofail-enable: install-gofail
+	gofail enable .
+
+.PHONY: gofail-disable
+gofail-disable: install-gofail
+	gofail disable .
+
+.PHONY: install-gofail
+install-gofail:
+	go install go.etcd.io/gofail
+
+.PHONY: test-failpoint
+test-failpoint:
+	@echo "[failpoint] hashmap freelist test"
+	BBOLT_VERIFY=all TEST_FREELIST_TYPE=hashmap go test -v ${TESTFLAGS} -timeout 30m ./tests/failpoint
+
+	@echo "[failpoint] array freelist test"
+	BBOLT_VERIFY=all TEST_FREELIST_TYPE=array go test -v ${TESTFLAGS} -timeout 30m ./tests/failpoint
+
+.PHONY: test-robustness # Running robustness tests requires root permission for now
+# TODO: Remove sudo once we fully migrate to the prow infrastructure
+test-robustness: gofail-enable build
+	sudo env PATH=$$PATH go test -v ${TESTFLAGS} ./tests/dmflakey -test.root
+	sudo env PATH=$(PWD)/bin:$$PATH go test -v ${TESTFLAGS} ${ROBUSTNESS_TESTFLAGS} ./tests/robustness -test.root
+
+.PHONY: test-benchmark-compare
+# Runs benchmark tests on the current git ref and the given REF, and compares
+# the two.
+test-benchmark-compare: install-benchstat
+	@git fetch
+	./scripts/compare_benchmarks.sh $(REF)
+
+.PHONY: install-benchstat
+install-benchstat:
+	go install golang.org/x/perf/cmd/benchstat@latest
--- a/10
+++ b/10
@ -0,0 +1,10 @@
+# See the OWNERS docs at https://go.k8s.io/owners
+
+approvers:
+  - ahrtr           # Benjamin Wang <benjamin.ahrtr@gmail.com> <benjamin.wang@broadcom.com>
+  - serathius       # Marek Siarkowicz <siarkowicz@google.com> <marek.siarkowicz@gmail.com>
+  - ptabor          # Piotr Tabor <piotr.tabor@gmail.com>
+  - spzala          # Sahdev Zala <spzala@us.ibm.com>
+reviewers:
+  - fuweid          # Wei Fu <fuweid89@gmail.com>
+  - tjungblu        # Thomas Jungblut <tjungblu@redhat.com>
--- a/README.md
+++ b/README.md
@ -0,0 +1,21 @@
+# Tutus Bolt
+
+Embedded key/value database for the Tutus blockchain.
+
+## Overview
+
+Tutus Bolt is a pure Go embedded database providing ACID transactions with serializable isolation.
+
+## Installation
+
+```go
+import "github.com/tutus-one/tutus-bolt"
+```
+
+## License
+
+MIT License
+
+---
+
+Part of the [Tutus](https://github.com/tutus-one/tutus-chain) blockchain infrastructure.
--- a/allocate_test.go
+++ b/allocate_test.go
@ -0,0 +1,39 @@
+package bbolt
+
+import (
+	"testing"
+
+	"github.com/tutus-one/tutus-bolt/internal/common"
+	"github.com/tutus-one/tutus-bolt/internal/freelist"
+)
+
+func TestTx_allocatePageStats(t *testing.T) {
+	for n, f := range map[string]freelist.Interface{"hashmap": freelist.NewHashMapFreelist(), "array": freelist.NewArrayFreelist()} {
+		t.Run(n, func(t *testing.T) {
+			ids := []common.Pgid{2, 3}
+			f.Init(ids)
+
+			tx := &Tx{
+				db: &DB{
+					freelist: f,
+					pageSize: common.DefaultPageSize,
+				},
+				meta:  &common.Meta{},
+				pages: make(map[common.Pgid]*common.Page),
+			}
+
+			txStats := tx.Stats()
+			prePageCnt := txStats.GetPageCount()
+			allocateCnt := f.FreeCount()
+
+			if _, err := tx.allocate(allocateCnt); err != nil {
+				t.Fatal(err)
+			}
+
+			txStats = tx.Stats()
+			if txStats.GetPageCount() != prePageCnt+int64(allocateCnt) {
+				t.Errorf("Allocated %d but got %d page in stats", allocateCnt, txStats.GetPageCount())
+			}
+		})
+	}
+}
--- a/bolt_386.go
+++ b/bolt_386.go
@ -0,0 +1,7 @@
+package bbolt
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0x7FFFFFFF // 2GB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0xFFFFFFF
--- a/bolt_aix.go
+++ b/bolt_aix.go
@ -0,0 +1,90 @@
+//go:build aix
+
+package bbolt
+
+import (
+	"fmt"
+	"syscall"
+	"time"
+	"unsafe"
+
+	"golang.org/x/sys/unix"
+)
+
+// flock acquires an advisory lock on a file descriptor.
+func flock(db *DB, exclusive bool, timeout time.Duration) error {
+	var t time.Time
+	if timeout != 0 {
+		t = time.Now()
+	}
+	fd := db.file.Fd()
+	var lockType int16
+	if exclusive {
+		lockType = syscall.F_WRLCK
+	} else {
+		lockType = syscall.F_RDLCK
+	}
+	for {
+		// Attempt to obtain an exclusive lock.
+		lock := syscall.Flock_t{Type: lockType}
+		err := syscall.FcntlFlock(fd, syscall.F_SETLK, &lock)
+		if err == nil {
+			return nil
+		} else if err != syscall.EAGAIN {
+			return err
+		}
+
+		// If we timed out then return an error.
+		if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
+			return ErrTimeout
+		}
+
+		// Wait for a bit and try again.
+		time.Sleep(flockRetryTimeout)
+	}
+}
+
+// funlock releases an advisory lock on a file descriptor.
+func funlock(db *DB) error {
+	var lock syscall.Flock_t
+	lock.Start = 0
+	lock.Len = 0
+	lock.Type = syscall.F_UNLCK
+	lock.Whence = 0
+	return syscall.FcntlFlock(uintptr(db.file.Fd()), syscall.F_SETLK, &lock)
+}
+
+// mmap memory maps a DB's data file.
+func mmap(db *DB, sz int) error {
+	// Map the data file to memory.
+	b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
+	if err != nil {
+		return err
+	}
+
+	// Advise the kernel that the mmap is accessed randomly.
+	if err := unix.Madvise(b, syscall.MADV_RANDOM); err != nil {
+		return fmt.Errorf("madvise: %s", err)
+	}
+
+	// Save the original byte slice and convert to a byte array pointer.
+	db.dataref = b
+	db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
+	db.datasz = sz
+	return nil
+}
+
+// munmap unmaps a DB's data file from memory.
+func munmap(db *DB) error {
+	// Ignore the unmap if we have no mapped data.
+	if db.dataref == nil {
+		return nil
+	}
+
+	// Unmap using the original byte slice.
+	err := unix.Munmap(db.dataref)
+	db.dataref = nil
+	db.data = nil
+	db.datasz = 0
+	return err
+}
--- a/bolt_amd64.go
+++ b/bolt_amd64.go
@ -0,0 +1,7 @@
+package bbolt
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0xFFFFFFFFFFFF // 256TB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0x7FFFFFFF
--- a/bolt_android.go
+++ b/bolt_android.go
@ -0,0 +1,90 @@
+package bbolt
+
+import (
+	"fmt"
+	"syscall"
+	"time"
+	"unsafe"
+
+	"golang.org/x/sys/unix"
+)
+
+// flock acquires an advisory lock on a file descriptor.
+func flock(db *DB, exclusive bool, timeout time.Duration) error {
+	var t time.Time
+	if timeout != 0 {
+		t = time.Now()
+	}
+	fd := db.file.Fd()
+	var lockType int16
+	if exclusive {
+		lockType = syscall.F_WRLCK
+	} else {
+		lockType = syscall.F_RDLCK
+	}
+	for {
+		// Attempt to obtain an exclusive lock.
+		lock := syscall.Flock_t{Type: lockType}
+		err := syscall.FcntlFlock(fd, syscall.F_SETLK, &lock)
+		if err == nil {
+			return nil
+		} else if err != syscall.EAGAIN {
+			return err
+		}
+
+		// If we timed out then return an error.
+		if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
+			return ErrTimeout
+		}
+
+		// Wait for a bit and try again.
+		time.Sleep(flockRetryTimeout)
+	}
+}
+
+// funlock releases an advisory lock on a file descriptor.
+func funlock(db *DB) error {
+	var lock syscall.Flock_t
+	lock.Start = 0
+	lock.Len = 0
+	lock.Type = syscall.F_UNLCK
+	lock.Whence = 0
+	return syscall.FcntlFlock(uintptr(db.file.Fd()), syscall.F_SETLK, &lock)
+}
+
+// mmap memory maps a DB's data file.
+func mmap(db *DB, sz int) error {
+	// Map the data file to memory.
+	b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
+	if err != nil {
+		return err
+	}
+
+	// Advise the kernel that the mmap is accessed randomly.
+	err = unix.Madvise(b, syscall.MADV_RANDOM)
+	if err != nil && err != syscall.ENOSYS {
+		// Ignore not implemented error in kernel because it still works.
+		return fmt.Errorf("madvise: %s", err)
+	}
+
+	// Save the original byte slice and convert to a byte array pointer.
+	db.dataref = b
+	db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
+	db.datasz = sz
+	return nil
+}
+
+// munmap unmaps a DB's data file from memory.
+func munmap(db *DB) error {
+	// Ignore the unmap if we have no mapped data.
+	if db.dataref == nil {
+		return nil
+	}
+
+	// Unmap using the original byte slice.
+	err := unix.Munmap(db.dataref)
+	db.dataref = nil
+	db.data = nil
+	db.datasz = 0
+	return err
+}
--- a/bolt_arm.go
+++ b/bolt_arm.go
@ -0,0 +1,7 @@
+package bbolt
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0x7FFFFFFF // 2GB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0xFFFFFFF
--- a/bolt_arm64.go
+++ b/bolt_arm64.go
@ -0,0 +1,9 @@
+//go:build arm64
+
+package bbolt
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0xFFFFFFFFFFFF // 256TB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0x7FFFFFFF
--- a/bolt_linux.go
+++ b/bolt_linux.go
@ -0,0 +1,10 @@
+package bbolt
+
+import (
+	"syscall"
+)
+
+// fdatasync flushes written data to a file descriptor.
+func fdatasync(db *DB) error {
+	return syscall.Fdatasync(int(db.file.Fd()))
+}
--- a/bolt_loong64.go
+++ b/bolt_loong64.go
@ -0,0 +1,9 @@
+//go:build loong64
+
+package bbolt
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0xFFFFFFFFFFFF // 256TB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0x7FFFFFFF
--- a/bolt_mips64x.go
+++ b/bolt_mips64x.go
@ -0,0 +1,9 @@
+//go:build mips64 || mips64le
+
+package bbolt
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0x8000000000 // 512GB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0x7FFFFFFF
--- a/bolt_mipsx.go
+++ b/bolt_mipsx.go
@ -0,0 +1,9 @@
+//go:build mips || mipsle
+
+package bbolt
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0x40000000 // 1GB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0xFFFFFFF
--- a/bolt_openbsd.go
+++ b/bolt_openbsd.go
@ -0,0 +1,16 @@
+package bbolt
+
+import (
+	"golang.org/x/sys/unix"
+)
+
+func msync(db *DB) error {
+	return unix.Msync(db.data[:db.datasz], unix.MS_INVALIDATE)
+}
+
+func fdatasync(db *DB) error {
+	if db.data != nil {
+		return msync(db)
+	}
+	return db.file.Sync()
+}
--- a/bolt_ppc.go
+++ b/bolt_ppc.go
@ -0,0 +1,9 @@
+//go:build ppc
+
+package bbolt
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0x7FFFFFFF // 2GB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0xFFFFFFF
--- a/bolt_ppc64.go
+++ b/bolt_ppc64.go
@ -0,0 +1,9 @@
+//go:build ppc64
+
+package bbolt
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0xFFFFFFFFFFFF // 256TB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0x7FFFFFFF
--- a/bolt_ppc64le.go
+++ b/bolt_ppc64le.go
@ -0,0 +1,9 @@
+//go:build ppc64le
+
+package bbolt
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0xFFFFFFFFFFFF // 256TB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0x7FFFFFFF
--- a/bolt_riscv64.go
+++ b/bolt_riscv64.go
@ -0,0 +1,9 @@
+//go:build riscv64
+
+package bbolt
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0xFFFFFFFFFFFF // 256TB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0x7FFFFFFF
--- a/bolt_s390x.go
+++ b/bolt_s390x.go
@ -0,0 +1,9 @@
+//go:build s390x
+
+package bbolt
+
+// maxMapSize represents the largest mmap size supported by Bolt.
+const maxMapSize = 0xFFFFFFFFFFFF // 256TB
+
+// maxAllocSize is the size used when creating array pointers.
+const maxAllocSize = 0x7FFFFFFF
--- a/bolt_solaris.go
+++ b/bolt_solaris.go
@ -0,0 +1,88 @@
+package bbolt
+
+import (
+	"fmt"
+	"syscall"
+	"time"
+	"unsafe"
+
+	"golang.org/x/sys/unix"
+)
+
+// flock acquires an advisory lock on a file descriptor.
+func flock(db *DB, exclusive bool, timeout time.Duration) error {
+	var t time.Time
+	if timeout != 0 {
+		t = time.Now()
+	}
+	fd := db.file.Fd()
+	var lockType int16
+	if exclusive {
+		lockType = syscall.F_WRLCK
+	} else {
+		lockType = syscall.F_RDLCK
+	}
+	for {
+		// Attempt to obtain an exclusive lock.
+		lock := syscall.Flock_t{Type: lockType}
+		err := syscall.FcntlFlock(fd, syscall.F_SETLK, &lock)
+		if err == nil {
+			return nil
+		} else if err != syscall.EAGAIN {
+			return err
+		}
+
+		// If we timed out then return an error.
+		if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
+			return ErrTimeout
+		}
+
+		// Wait for a bit and try again.
+		time.Sleep(flockRetryTimeout)
+	}
+}
+
+// funlock releases an advisory lock on a file descriptor.
+func funlock(db *DB) error {
+	var lock syscall.Flock_t
+	lock.Start = 0
+	lock.Len = 0
+	lock.Type = syscall.F_UNLCK
+	lock.Whence = 0
+	return syscall.FcntlFlock(uintptr(db.file.Fd()), syscall.F_SETLK, &lock)
+}
+
+// mmap memory maps a DB's data file.
+func mmap(db *DB, sz int) error {
+	// Map the data file to memory.
+	b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
+	if err != nil {
+		return err
+	}
+
+	// Advise the kernel that the mmap is accessed randomly.
+	if err := unix.Madvise(b, syscall.MADV_RANDOM); err != nil {
+		return fmt.Errorf("madvise: %s", err)
+	}
+
+	// Save the original byte slice and convert to a byte array pointer.
+	db.dataref = b
+	db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
+	db.datasz = sz
+	return nil
+}
+
+// munmap unmaps a DB's data file from memory.
+func munmap(db *DB) error {
+	// Ignore the unmap if we have no mapped data.
+	if db.dataref == nil {
+		return nil
+	}
+
+	// Unmap using the original byte slice.
+	err := unix.Munmap(db.dataref)
+	db.dataref = nil
+	db.data = nil
+	db.datasz = 0
+	return err
+}
--- a/bolt_unix.go
+++ b/bolt_unix.go
@ -0,0 +1,88 @@
+//go:build !windows && !plan9 && !solaris && !aix && !android
+
+package bbolt
+
+import (
+	"fmt"
+	"syscall"
+	"time"
+	"unsafe"
+
+	"golang.org/x/sys/unix"
+
+	"github.com/tutus-one/tutus-bolt/errors"
+)
+
+// flock acquires an advisory lock on a file descriptor.
+func flock(db *DB, exclusive bool, timeout time.Duration) error {
+	var t time.Time
+	if timeout != 0 {
+		t = time.Now()
+	}
+	fd := db.file.Fd()
+	flag := syscall.LOCK_NB
+	if exclusive {
+		flag |= syscall.LOCK_EX
+	} else {
+		flag |= syscall.LOCK_SH
+	}
+	for {
+		// Attempt to obtain an exclusive lock.
+		err := syscall.Flock(int(fd), flag)
+		if err == nil {
+			return nil
+		} else if err != syscall.EWOULDBLOCK {
+			return err
+		}
+
+		// If we timed out then return an error.
+		if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
+			return errors.ErrTimeout
+		}
+
+		// Wait for a bit and try again.
+		time.Sleep(flockRetryTimeout)
+	}
+}
+
+// funlock releases an advisory lock on a file descriptor.
+func funlock(db *DB) error {
+	return syscall.Flock(int(db.file.Fd()), syscall.LOCK_UN)
+}
+
+// mmap memory maps a DB's data file.
+func mmap(db *DB, sz int) error {
+	// Map the data file to memory.
+	b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
+	if err != nil {
+		return err
+	}
+
+	// Advise the kernel that the mmap is accessed randomly.
+	err = unix.Madvise(b, syscall.MADV_RANDOM)
+	if err != nil && err != syscall.ENOSYS {
+		// Ignore not implemented error in kernel because it still works.
+		return fmt.Errorf("madvise: %s", err)
+	}
+
+	// Save the original byte slice and convert to a byte array pointer.
+	db.dataref = b
+	db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
+	db.datasz = sz
+	return nil
+}
+
+// munmap unmaps a DB's data file from memory.
+func munmap(db *DB) error {
+	// Ignore the unmap if we have no mapped data.
+	if db.dataref == nil {
+		return nil
+	}
+
+	// Unmap using the original byte slice.
+	err := unix.Munmap(db.dataref)
+	db.dataref = nil
+	db.data = nil
+	db.datasz = 0
+	return err
+}
--- a/bolt_windows.go
+++ b/bolt_windows.go
@ -0,0 +1,132 @@
+package bbolt
+
+import (
+	"fmt"
+	"os"
+	"syscall"
+	"time"
+	"unsafe"
+
+	"golang.org/x/sys/windows"
+
+	"github.com/tutus-one/tutus-bolt/errors"
+)
+
+// fdatasync flushes written data to a file descriptor.
+func fdatasync(db *DB) error {
+	return db.file.Sync()
+}
+
+// flock acquires an advisory lock on a file descriptor.
+func flock(db *DB, exclusive bool, timeout time.Duration) error {
+	var t time.Time
+	if timeout != 0 {
+		t = time.Now()
+	}
+	var flags uint32 = windows.LOCKFILE_FAIL_IMMEDIATELY
+	if exclusive {
+		flags |= windows.LOCKFILE_EXCLUSIVE_LOCK
+	}
+	for {
+		// Fix for https://github.com/etcd-io/bbolt/issues/121. Use byte-range
+		// -1..0 as the lock on the database file.
+		var m1 uint32 = (1 << 32) - 1 // -1 in a uint32
+		err := windows.LockFileEx(windows.Handle(db.file.Fd()), flags, 0, 1, 0, &windows.Overlapped{
+			Offset:     m1,
+			OffsetHigh: m1,
+		})
+
+		if err == nil {
+			return nil
+		} else if err != windows.ERROR_LOCK_VIOLATION {
+			return err
+		}
+
+		// If we timed oumercit then return an error.
+		if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
+			return errors.ErrTimeout
+		}
+
+		// Wait for a bit and try again.
+		time.Sleep(flockRetryTimeout)
+	}
+}
+
+// funlock releases an advisory lock on a file descriptor.
+func funlock(db *DB) error {
+	var m1 uint32 = (1 << 32) - 1 // -1 in a uint32
+	return windows.UnlockFileEx(windows.Handle(db.file.Fd()), 0, 1, 0, &windows.Overlapped{
+		Offset:     m1,
+		OffsetHigh: m1,
+	})
+}
+
+// mmap memory maps a DB's data file.
+// Based on: https://github.com/edsrzf/mmap-go
+func mmap(db *DB, sz int) error {
+	var sizelo, sizehi uint32
+
+	if !db.readOnly {
+		if db.MaxSize > 0 && sz > db.MaxSize {
+			// The max size only limits future writes; however, we don’t block opening
+			// and mapping the database if it already exceeds the limit.
+			fileSize, err := db.fileSize()
+			if err != nil {
+				return fmt.Errorf("could not check existing db file size: %s", err)
+			}
+
+			if sz > fileSize {
+				return errors.ErrMaxSizeReached
+			}
+		}
+
+		// Truncate the database to the size of the mmap.
+		if err := db.file.Truncate(int64(sz)); err != nil {
+			return fmt.Errorf("truncate: %s", err)
+		}
+		sizehi = uint32(sz >> 32)
+		sizelo = uint32(sz)
+	}
+
+	// Open a file mapping handle.
+	h, errno := syscall.CreateFileMapping(syscall.Handle(db.file.Fd()), nil, syscall.PAGE_READONLY, sizehi, sizelo, nil)
+	if h == 0 {
+		return os.NewSyscallError("CreateFileMapping", errno)
+	}
+
+	// Create the memory map.
+	addr, errno := syscall.MapViewOfFile(h, syscall.FILE_MAP_READ, 0, 0, 0)
+	if addr == 0 {
+		// Do our best and report error returned from MapViewOfFile.
+		_ = syscall.CloseHandle(h)
+		return os.NewSyscallError("MapViewOfFile", errno)
+	}
+
+	// Close mapping handle.
+	if err := syscall.CloseHandle(syscall.Handle(h)); err != nil {
+		return os.NewSyscallError("CloseHandle", err)
+	}
+
+	// Convert to a byte array.
+	db.data = (*[maxMapSize]byte)(unsafe.Pointer(addr))
+	db.datasz = sz
+
+	return nil
+}
+
+// munmap unmaps a pointer from a file.
+// Based on: https://github.com/edsrzf/mmap-go
+func munmap(db *DB) error {
+	if db.data == nil {
+		return nil
+	}
+
+	addr := (uintptr)(unsafe.Pointer(&db.data[0]))
+	var err1 error
+	if err := syscall.UnmapViewOfFile(addr); err != nil {
+		err1 = os.NewSyscallError("UnmapViewOfFile", err)
+	}
+	db.data = nil
+	db.datasz = 0
+	return err1
+}
--- a/boltsync_unix.go
+++ b/boltsync_unix.go
@ -0,0 +1,8 @@
+//go:build !windows && !plan9 && !linux && !openbsd
+
+package bbolt
+
+// fdatasync flushes written data to a file descriptor.
+func fdatasync(db *DB) error {
+	return db.file.Sync()
+}
--- a/bucket.go
+++ b/bucket.go
--- a/bucket_test.go
+++ b/bucket_test.go
--- a/cmd/bbolt/OWNERS
+++ b/cmd/bbolt/OWNERS
@ -0,0 +1,12 @@
+# See the OWNERS docs at https://go.k8s.io/owners
+
+approvers:
+  - ahrtr           # Benjamin Wang <benjamin.ahrtr@gmail.com> <benjamin.wang@broadcom.com>
+  - fuweid          # Wei Fu <fuweid89@gmail.com>
+  - serathius       # Marek Siarkowicz <siarkowicz@google.com> <marek.siarkowicz@gmail.com>
+  - ptabor          # Piotr Tabor <piotr.tabor@gmail.com>
+  - spzala          # Sahdev Zala <spzala@us.ibm.com>
+  - tjungblu        # Thomas Jungblut <tjungblu@redhat.com>
+reviewers:
+  - elbehery        # Mustafa Elbehery <melbeher@redhat.com> 
+  - ivanvc          # Ivan Valdes <ivan@vald.es> 
--- a/cmd/bbolt/README.md
+++ b/cmd/bbolt/README.md
@ -0,0 +1,453 @@
+# Introduction to bbolt command line
+
+`bbolt` provides a command line utility for inspecting and manipulating bbolt database files. To install bbolt command-line please refer [here](https://github.com/etcd-io/bbolt#installing)
+
+**Note**: [etcd](https://github.com/etcd-io/etcd) uses bbolt as its backend storage engine. In this document, we take etcd as an example to demonstrate the usage of bbolt commands. Refer to [install etcd](https://etcd.io/docs/v3.5/install/) for installing etcd.
+
+1. Start a single member etcd cluster with this command below:
+
+    ```bash
+    $etcd
+    ```
+
+    It will create a directory `default.etcd` by default under current working directory, and the directory structure will look like this:
+
+    ```bash
+    $tree default.etcd
+    default.etcd
+    └── member
+        ├── snap
+        │   └── db // this is bbolt database file
+        └── wal
+            └── 0000000000000000-0000000000000000.wal
+
+    3 directories, 2 files
+    ```
+
+2. Put some dummy data using [etcdctl](https://github.com/etcd-io/etcd/tree/main/etcdctl).
+3. Stop the etcd instance. Note a bbolt database file can only be opened by one read-write process, because it is exclusively locked when opened.
+
+## Usage
+
+- `bbolt command [arguments]`
+
+### help
+
+- help will print information about that command
+
+  ```bash
+  $bbolt help
+
+  The commands are:
+
+      version     prints the current version of bbolt
+      bench       run synthetic benchmark against bbolt
+      buckets     print a list of buckets
+      check       verifies integrity of bbolt database
+      compact     copies a bbolt database, compacting it in the process
+      dump        print a hexadecimal dump of a single page
+      get         print the value of a key in a bucket
+      info        print basic info
+      keys        print a list of keys in a bucket
+      help        print this screen
+      page        print one or more pages in human readable format
+      pages       print list of pages with their types
+      page-item   print the key and value of a page item.
+      stats       iterate over all pages and generate usage stats
+      surgery     perform surgery on bbolt database
+  ```
+
+- you can use `help` with any command: `bbolt [command] -h` for more information about command.
+
+## Analyse bbolt database with bbolt command line
+
+### version
+
+- `version` print the current version information of bbolt command-line.
+- usage:
+  `bbolt version`
+
+  Example:
+  
+  ```bash
+  $bbolt version
+  bbolt version: 1.3.7
+  Go Version: go1.21.6
+  Go OS/Arch: darwin/arm64
+  ```
+
+### info
+
+- `info` print the basic information about the given Bbolt database.
+- usage:
+  `bbolt info [path to the bbolt database]`
+
+    Example:
+
+    ```bash
+    $bbolt info ~/default.etcd/member/snap/db
+    Page Size: 4096
+    ```
+
+  - **note**: page size is given in bytes
+  - Bbolt database is using page size of 4KB
+
+### buckets
+
+- `buckets` print a list of buckets of Bbolt database is currently having. Find more information on buckets [here](https://github.com/etcd-io/bbolt#using-buckets)
+- usage:
+  `bbolt buckets [path to the bbolt database]`
+
+    Example:
+
+    ```bash
+    $bbolt buckets ~/default.etcd/member/snap/db
+    alarm
+    auth
+    authRoles
+    authUsers
+    cluster
+    key
+    lease
+    members
+    members_removed
+    meta
+    ```
+
+  - It means when you start an etcd, it creates these `10` buckets using bbolt database.
+
+### check
+
+- `check` opens a database at a given `[PATH]` and runs an exhaustive check to verify that all pages are accessible or are marked as freed. It also verifies that no pages are double referenced.
+- usage:
+  `bbolt check [path to the bbolt database]`
+
+    Example:
+
+    ```bash
+    $bbolt check ~/default.etcd/member/snap/db
+    ok
+    ```
+
+  - It returns `ok` as our database file `db` is not corrupted.
+
+### stats
+
+- To gather essential statistics about the bbolt database: `stats` performs an extensive search of the database to track every page reference. It starts at the current meta page and recursively iterates through every accessible bucket.
+- usage:
+  `bbolt stats [path to the bbolt database]`
+
+  Example:
+
+  ```bash
+  $bbolt stats ~/default.etcd/member/snap/db
+  Aggregate statistics for 10 buckets
+
+  Page count statistics
+      Number of logical branch pages: 0
+      Number of physical branch overflow pages: 0
+      Number of logical leaf pages: 0
+      Number of physical leaf overflow pages: 0
+  Tree statistics
+      Number of keys/value pairs: 11
+      Number of levels in B+tree: 1
+  Page size utilization
+      Bytes allocated for physical branch pages: 0
+      Bytes actually used for branch data: 0 (0%)
+      Bytes allocated for physical leaf pages: 0
+      Bytes actually used for leaf data: 0 (0%)
+  Bucket statistics
+      Total number of buckets: 10
+      Total number on inlined buckets: 10 (100%)
+      Bytes used for inlined buckets: 780 (0%)
+  ```
+
+### inspect
+- `inspect` inspect the structure of the database.
+- Usage: `bbolt inspect [path to the bbolt database]`
+
+  Example:
+```bash
+$ ./bbolt inspect ~/default.etcd/member/snap/db
+{
+    "name": "root",
+    "keyN": 0,
+    "buckets": [
+        {
+            "name": "alarm",
+            "keyN": 0
+        },
+        {
+            "name": "auth",
+            "keyN": 2
+        },
+        {
+            "name": "authRoles",
+            "keyN": 1
+        },
+        {
+            "name": "authUsers",
+            "keyN": 1
+        },
+        {
+            "name": "cluster",
+            "keyN": 1
+        },
+        {
+            "name": "key",
+            "keyN": 1285
+        },
+        {
+            "name": "lease",
+            "keyN": 2
+        },
+        {
+            "name": "members",
+            "keyN": 1
+        },
+        {
+            "name": "members_removed",
+            "keyN": 0
+        },
+        {
+            "name": "meta",
+            "keyN": 3
+        }
+    ]
+}
+```
+
+### pages
+
+- Pages prints a table of pages with their type (meta, leaf, branch, freelist).
+- The `meta` will store the metadata information of database.
+- The `leaf` and `branch` pages will show a key count in the `items` column.
+- The `freelist` will show the number of free pages, which are free for writing again.
+- The `overflow` column shows the number of blocks that the page spills over into.
+- usage:
+  `bbolt pages [path to the bbolt database]`
+
+  Example:
+
+  ```bash
+  $bbolt pages ~/default.etcd/member/snap/db
+  ID       TYPE       ITEMS  OVRFLW
+  ======== ========== ====== ======
+  0        meta       0
+  1        meta       0
+  2        free
+  3        leaf       10
+  4        freelist   2
+  5        free
+  ```
+
+### page
+
+- Page prints one or more pages in human readable format.
+- usage:
+
+  ```bash
+  bolt page [path to the bbolt database] pageid [pageid...]
+  or: bolt page --all [path to the bbolt database]
+
+  Additional options include:
+
+  --all
+    prints all pages (only skips pages that were considered successful overflow pages)
+  --format-value=auto|ascii-encoded|hex|bytes|redacted (default: auto)
+    prints values (on the leaf page) using the given format
+  ```
+
+  Example:
+
+  ```bash
+  $bbolt page ~/default.etcd/member/snap/db 3
+  Page ID:    3
+  Page Type:  leaf
+  Total Size: 4096 bytes
+  Overflow pages: 0
+  Item Count: 10
+
+  "alarm": <pgid=0,seq=0>
+  "auth": <pgid=0,seq=0>
+  "authRoles": <pgid=0,seq=0>
+  "authUsers": <pgid=0,seq=0>
+  "cluster": <pgid=0,seq=0>
+  "key": <pgid=0,seq=0>
+  "lease": <pgid=0,seq=0>
+  "members": <pgid=0,seq=0>
+  "members_removed": <pgid=0,seq=0>
+  "meta": <pgid=0,seq=0>
+  ```
+
+  - It prints information of page `page ID: 3`
+
+### page-item
+
+- page-item prints a page item's key and value.
+- usage:
+
+  ```bash
+  bolt page-item [options] [path to the bbolt database] <pageId> <itemId>
+  Additional options include:
+
+      --key-only
+          Print only the key
+      --value-only
+          Print only the value
+      --format
+          Output format. One of: auto|ascii-encoded|hex|bytes|redacted (default=auto)
+  ```
+
+  Example:
+
+  ```bash
+  $bbolt page-item --key-only ~/default.etcd/member/snap/db 3 7
+  "members"
+  ```
+
+  - It returns the key as `--key-only` flag is passed of `pageID: 3` and `itemID: 7`
+
+### dump
+
+- Dump prints a hexadecimal dump of one or more given pages.
+- usage:
+  `bolt dump [path to the bbolt database] [pageid...]`
+
+### keys
+
+- Print a list of keys in the given bucket.
+- usage:
+
+  ```bash
+  bolt keys [path to the bbolt database] [BucketName]
+
+  Additional options include:
+  --format
+    Output format. One of: auto|ascii-encoded|hex|bytes|redacted (default=auto)
+  ```
+
+  Example 1:
+
+  ```bash
+  $bbolt keys ~/default.etcd/member/snap/db meta
+  confState
+  consistent_index
+  term
+  ```
+
+  - It list all the keys in bucket: `meta`
+
+  Example 2:
+
+  ```bash
+  $bbolt keys ~/default.etcd/member/snap/db members
+  8e9e05c52164694d
+  ```
+
+  - It list all the keys in `members` bucket which is a `memberId` of etcd cluster member.
+  - In this case we are running a single member etcd cluster, hence only `one memberId` is present. If we would have run a `3` member etcd cluster then it will return a `3 memberId` as `3 cluster members` would have been present in `members` bucket.
+
+### get
+
+- Print the value of the given key in the given bucket.
+- usage:
+  
+  ```bash
+  bolt get [path to the bbolt database] [BucketName] [Key]
+
+  Additional options include:
+  --format
+    Output format. One of: auto|ascii-encoded|hex|bytes|redacted (default=auto)
+  --parse-format
+    Input format (of key). One of: ascii-encoded|hex (default=ascii-encoded)"
+  ```
+
+  Example 1:
+
+  ```bash
+  $bbolt get --format=hex ~/default.etcd/member/snap/db meta term
+  0000000000000004
+  ```
+
+  - It returns the value present in bucket: `meta` for key: `term` in hexadecimal format.
+
+  Example 2:
+
+  ```bash
+  $bbolt get ~/default.etcd/member/snap/db members 8e9e05c52164694d
+  {"id":10276657743932975437,"peerURLs":["http://localhost:2380"],"name":"default","clientURLs":["http://localhost:2379"]}
+  ```
+
+  - It returns the value present in bucket: `members` for key: `8e9e05c52164694d`.
+
+### compact
+
+- Compact opens a database at given `[Source Path]` and walks it recursively, copying keys as they are found from all buckets, to a newly created database at `[Destination Path]`. The original database is left untouched.
+- usage:
+
+  ```bash
+  bbolt compact [options] -o [Destination Path] [Source Path]
+
+  Additional options include:
+
+  -tx-max-size NUM
+    Specifies the maximum size of individual transactions.
+    Defaults to 64KB
+  ```
+
+  Example:
+
+  ```bash
+  $bbolt compact -o ~/db.compact ~/default.etcd/member/snap/db
+  16805888 -> 32768 bytes (gain=512.88x)
+  ```
+
+  - It will create a compacted database file: `db.compact` at given path.
+
+### bench
+
+- run synthetic benchmark against bbolt database.
+- usage:
+
+    ```bash
+    Usage:
+    -batch-size int
+
+    -blockprofile string
+
+    -count int
+            (default 1000)
+    -cpuprofile string
+
+    -fill-percent float
+            (default 0.5)
+    -key-size int
+            (default 8)
+    -memprofile string
+
+    -no-sync
+
+    -path string
+
+    -profile-mode string
+            (default "rw")
+    -read-mode string
+            (default "seq")
+    -value-size int
+            (default 32)
+    -work
+
+    -write-mode string
+            (default "seq")
+    ```
+
+    Example:
+
+    ```bash
+    $bbolt bench ~/default.etcd/member/snap/db -batch-size 400 -key-size 16
+    # Write	68.523572ms	(68.523µs/op)	(14593 op/sec)
+    # Read	1.000015152s	(11ns/op)	(90909090 op/sec)
+    ```
+
+  - It runs a benchmark with batch size of `400` and with key size of `16` while for others parameters default value is taken.
--- a/cmd/bbolt/command_check.go
+++ b/cmd/bbolt/command_check.go
@ -0,0 +1,73 @@
+package main
+
+import (
+	"fmt"
+
+	"github.com/spf13/cobra"
+	"github.com/spf13/pflag"
+
+	bolt "github.com/tutus-one/tutus-bolt"
+	"github.com/tutus-one/tutus-bolt/internal/guts_cli"
+)
+
+type checkOptions struct {
+	fromPageID uint64
+}
+
+func (o *checkOptions) AddFlags(fs *pflag.FlagSet) {
+	fs.Uint64VarP(&o.fromPageID, "from-page", "", o.fromPageID, "check db integrity starting from the given page ID")
+}
+
+func newCheckCommand() *cobra.Command {
+	var o checkOptions
+	checkCmd := &cobra.Command{
+		Use:   "check <bbolt-file>",
+		Short: "verify integrity of bbolt database data",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return checkFunc(cmd, args[0], o)
+		},
+	}
+
+	o.AddFlags(checkCmd.Flags())
+	return checkCmd
+}
+
+func checkFunc(cmd *cobra.Command, dbPath string, cfg checkOptions) error {
+	if _, err := checkSourceDBPath(dbPath); err != nil {
+		return err
+	}
+
+	// Open database.
+	db, err := bolt.Open(dbPath, 0600, &bolt.Options{
+		ReadOnly:        true,
+		PreLoadFreelist: true,
+	})
+	if err != nil {
+		return err
+	}
+	defer db.Close()
+
+	opts := []bolt.CheckOption{bolt.WithKVStringer(CmdKvStringer())}
+	if cfg.fromPageID != 0 {
+		opts = append(opts, bolt.WithPageId(cfg.fromPageID))
+	}
+	// Perform consistency check.
+	return db.View(func(tx *bolt.Tx) error {
+		var count int
+		for err := range tx.Check(opts...) {
+			fmt.Fprintln(cmd.OutOrStdout(), err)
+			count++
+		}
+
+		// Print summary of errors.
+		if count > 0 {
+			fmt.Fprintf(cmd.OutOrStdout(), "%d errors found\n", count)
+			return guts_cli.ErrCorrupt
+		}
+
+		// Notify user that database is valid.
+		fmt.Fprintln(cmd.OutOrStdout(), "OK")
+		return nil
+	})
+}
--- a/cmd/bbolt/command_check_test.go
+++ b/cmd/bbolt/command_check_test.go
@ -0,0 +1,66 @@
+package main_test
+
+import (
+	"bytes"
+	"io"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	main "github.com/tutus-one/tutus-bolt/cmd/bbolt"
+	"github.com/tutus-one/tutus-bolt/internal/btesting"
+	"github.com/tutus-one/tutus-bolt/internal/guts_cli"
+)
+
+func TestCheckCommand_Run(t *testing.T) {
+	testCases := []struct {
+		name      string
+		args      []string
+		expErr    error
+		expOutput string
+	}{
+		{
+			name:      "check whole db",
+			args:      []string{"check", "path"},
+			expErr:    nil,
+			expOutput: "OK\n",
+		},
+		{
+			name:      "check valid pageId",
+			args:      []string{"check", "path", "--from-page", "3"},
+			expErr:    nil,
+			expOutput: "OK\n",
+		},
+		{
+			name:      "check invalid pageId",
+			args:      []string{"check", "path", "--from-page", "1"},
+			expErr:    guts_cli.ErrCorrupt,
+			expOutput: "page ID (1) out of range [2, 4)",
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+
+			t.Log("Creating sample DB")
+			db := btesting.MustCreateDB(t)
+			db.Close()
+			defer requireDBNoChange(t, dbData(t, db.Path()), db.Path())
+
+			t.Log("Running check cmd")
+			rootCmd := main.NewRootCommand()
+			outputBuf := bytes.NewBufferString("") // capture output for assertion
+			rootCmd.SetOut(outputBuf)
+
+			tc.args[1] = db.Path() // path to be replaced with db.Path()
+			rootCmd.SetArgs(tc.args)
+			err := rootCmd.Execute()
+			require.Equal(t, tc.expErr, err)
+
+			t.Log("Checking output")
+			output, err := io.ReadAll(outputBuf)
+			require.NoError(t, err)
+			require.Containsf(t, string(output), tc.expOutput, "unexpected stdout:\n\n%s", string(output))
+		})
+	}
+}
--- a/cmd/bbolt/command_inspect.go
+++ b/cmd/bbolt/command_inspect.go
@ -0,0 +1,46 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+
+	"github.com/spf13/cobra"
+
+	bolt "github.com/tutus-one/tutus-bolt"
+)
+
+func newInspectCommand() *cobra.Command {
+	inspectCmd := &cobra.Command{
+		Use:   "inspect <bbolt-file>",
+		Short: "inspect the structure of the database",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return inspectFunc(args[0])
+		},
+	}
+
+	return inspectCmd
+}
+
+func inspectFunc(srcDBPath string) error {
+	if _, err := checkSourceDBPath(srcDBPath); err != nil {
+		return err
+	}
+
+	db, err := bolt.Open(srcDBPath, 0600, &bolt.Options{ReadOnly: true})
+	if err != nil {
+		return err
+	}
+	defer db.Close()
+
+	return db.View(func(tx *bolt.Tx) error {
+		bs := tx.Inspect()
+		out, err := json.MarshalIndent(bs, "", "    ")
+		if err != nil {
+			return err
+		}
+		fmt.Fprintln(os.Stdout, string(out))
+		return nil
+	})
+}
--- a/cmd/bbolt/command_inspect_test.go
+++ b/cmd/bbolt/command_inspect_test.go
@ -0,0 +1,27 @@
+package main_test
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	bolt "github.com/tutus-one/tutus-bolt"
+	main "github.com/tutus-one/tutus-bolt/cmd/bbolt"
+	"github.com/tutus-one/tutus-bolt/internal/btesting"
+)
+
+func TestInspect(t *testing.T) {
+	pageSize := 4096
+	db := btesting.MustCreateDBWithOption(t, &bolt.Options{PageSize: pageSize})
+	srcPath := db.Path()
+	db.Close()
+
+	defer requireDBNoChange(t, dbData(t, db.Path()), db.Path())
+
+	rootCmd := main.NewRootCommand()
+	rootCmd.SetArgs([]string{
+		"inspect", srcPath,
+	})
+	err := rootCmd.Execute()
+	require.NoError(t, err)
+}
--- a/cmd/bbolt/command_root.go
+++ b/cmd/bbolt/command_root.go
@ -0,0 +1,27 @@
+package main
+
+import (
+	"github.com/spf13/cobra"
+)
+
+const (
+	cliName        = "bbolt"
+	cliDescription = "A simple command line tool for inspecting bbolt databases"
+)
+
+func NewRootCommand() *cobra.Command {
+	rootCmd := &cobra.Command{
+		Use:     cliName,
+		Short:   cliDescription,
+		Version: "dev",
+	}
+
+	rootCmd.AddCommand(
+		newVersionCommand(),
+		newSurgeryCommand(),
+		newInspectCommand(),
+		newCheckCommand(),
+	)
+
+	return rootCmd
+}
--- a/cmd/bbolt/command_surgery.go
+++ b/cmd/bbolt/command_surgery.go
@ -0,0 +1,300 @@
+package main
+
+import (
+	"errors"
+	"fmt"
+	"os"
+
+	"github.com/spf13/cobra"
+	"github.com/spf13/pflag"
+
+	"github.com/tutus-one/tutus-bolt/internal/common"
+	"github.com/tutus-one/tutus-bolt/internal/guts_cli"
+	"github.com/tutus-one/tutus-bolt/internal/surgeon"
+)
+
+var (
+	ErrSurgeryFreelistAlreadyExist = errors.New("the file already has freelist, please consider to abandon the freelist to forcibly rebuild it")
+)
+
+func newSurgeryCommand() *cobra.Command {
+	surgeryCmd := &cobra.Command{
+		Use:   "surgery <subcommand>",
+		Short: "surgery related commands",
+	}
+
+	surgeryCmd.AddCommand(newSurgeryRevertMetaPageCommand())
+	surgeryCmd.AddCommand(newSurgeryCopyPageCommand())
+	surgeryCmd.AddCommand(newSurgeryClearPageCommand())
+	surgeryCmd.AddCommand(newSurgeryClearPageElementsCommand())
+	surgeryCmd.AddCommand(newSurgeryFreelistCommand())
+	surgeryCmd.AddCommand(newSurgeryMetaCommand())
+
+	return surgeryCmd
+}
+
+type surgeryBaseOptions struct {
+	outputDBFilePath string
+}
+
+func (o *surgeryBaseOptions) AddFlags(fs *pflag.FlagSet) {
+	fs.StringVar(&o.outputDBFilePath, "output", o.outputDBFilePath, "path to the filePath db file")
+	_ = cobra.MarkFlagRequired(fs, "output")
+}
+
+func (o *surgeryBaseOptions) Validate() error {
+	if o.outputDBFilePath == "" {
+		return errors.New("output database path wasn't given, specify output database file path with --output option")
+	}
+	return nil
+}
+
+func newSurgeryRevertMetaPageCommand() *cobra.Command {
+	var o surgeryBaseOptions
+	revertMetaPageCmd := &cobra.Command{
+		Use:   "revert-meta-page <bbolt-file>",
+		Short: "Revert the meta page to revert the changes performed by the latest transaction",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			if err := o.Validate(); err != nil {
+				return err
+			}
+			return surgeryRevertMetaPageFunc(args[0], o)
+		},
+	}
+	o.AddFlags(revertMetaPageCmd.Flags())
+	return revertMetaPageCmd
+}
+
+func surgeryRevertMetaPageFunc(srcDBPath string, cfg surgeryBaseOptions) error {
+	if _, err := checkSourceDBPath(srcDBPath); err != nil {
+		return err
+	}
+
+	if err := common.CopyFile(srcDBPath, cfg.outputDBFilePath); err != nil {
+		return fmt.Errorf("[revert-meta-page] copy file failed: %w", err)
+	}
+
+	if err := surgeon.RevertMetaPage(cfg.outputDBFilePath); err != nil {
+		return fmt.Errorf("revert-meta-page command failed: %w", err)
+	}
+
+	fmt.Fprintln(os.Stdout, "The meta page is reverted.")
+
+	return nil
+}
+
+type surgeryCopyPageOptions struct {
+	surgeryBaseOptions
+	sourcePageId      uint64
+	destinationPageId uint64
+}
+
+func (o *surgeryCopyPageOptions) AddFlags(fs *pflag.FlagSet) {
+	o.surgeryBaseOptions.AddFlags(fs)
+	fs.Uint64VarP(&o.sourcePageId, "from-page", "", o.sourcePageId, "source page Id")
+	fs.Uint64VarP(&o.destinationPageId, "to-page", "", o.destinationPageId, "destination page Id")
+	_ = cobra.MarkFlagRequired(fs, "from-page")
+	_ = cobra.MarkFlagRequired(fs, "to-page")
+}
+
+func (o *surgeryCopyPageOptions) Validate() error {
+	if err := o.surgeryBaseOptions.Validate(); err != nil {
+		return err
+	}
+	if o.sourcePageId == o.destinationPageId {
+		return fmt.Errorf("'--from-page' and '--to-page' have the same value: %d", o.sourcePageId)
+	}
+	return nil
+}
+
+func newSurgeryCopyPageCommand() *cobra.Command {
+	var o surgeryCopyPageOptions
+	copyPageCmd := &cobra.Command{
+		Use:   "copy-page <bbolt-file>",
+		Short: "Copy page from the source page Id to the destination page Id",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			if err := o.Validate(); err != nil {
+				return err
+			}
+			return surgeryCopyPageFunc(args[0], o)
+		},
+	}
+	o.AddFlags(copyPageCmd.Flags())
+	return copyPageCmd
+}
+
+func surgeryCopyPageFunc(srcDBPath string, cfg surgeryCopyPageOptions) error {
+	if _, err := checkSourceDBPath(srcDBPath); err != nil {
+		return err
+	}
+
+	if err := common.CopyFile(srcDBPath, cfg.outputDBFilePath); err != nil {
+		return fmt.Errorf("[copy-page] copy file failed: %w", err)
+	}
+
+	if err := surgeon.CopyPage(cfg.outputDBFilePath, common.Pgid(cfg.sourcePageId), common.Pgid(cfg.destinationPageId)); err != nil {
+		return fmt.Errorf("copy-page command failed: %w", err)
+	}
+
+	meta, err := readMetaPage(srcDBPath)
+	if err != nil {
+		return err
+	}
+	if meta.IsFreelistPersisted() {
+		fmt.Fprintf(os.Stdout, "WARNING: the free list might have changed.\n")
+		fmt.Fprintf(os.Stdout, "Please consider executing `./bbolt surgery freelist abandon ...`\n")
+	}
+
+	fmt.Fprintf(os.Stdout, "The page %d was successfully copied to page %d\n", cfg.sourcePageId, cfg.destinationPageId)
+	return nil
+}
+
+type surgeryClearPageOptions struct {
+	surgeryBaseOptions
+	pageId uint64
+}
+
+func (o *surgeryClearPageOptions) AddFlags(fs *pflag.FlagSet) {
+	o.surgeryBaseOptions.AddFlags(fs)
+	fs.Uint64VarP(&o.pageId, "pageId", "", o.pageId, "page Id")
+	_ = cobra.MarkFlagRequired(fs, "pageId")
+}
+
+func (o *surgeryClearPageOptions) Validate() error {
+	if err := o.surgeryBaseOptions.Validate(); err != nil {
+		return err
+	}
+	if o.pageId < 2 {
+		return fmt.Errorf("the pageId must be at least 2, but got %d", o.pageId)
+	}
+	return nil
+}
+
+func newSurgeryClearPageCommand() *cobra.Command {
+	var o surgeryClearPageOptions
+	clearPageCmd := &cobra.Command{
+		Use:   "clear-page <bbolt-file>",
+		Short: "Clears all elements from the given page, which can be a branch or leaf page",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			if err := o.Validate(); err != nil {
+				return err
+			}
+			return surgeryClearPageFunc(args[0], o)
+		},
+	}
+	o.AddFlags(clearPageCmd.Flags())
+	return clearPageCmd
+}
+
+func surgeryClearPageFunc(srcDBPath string, cfg surgeryClearPageOptions) error {
+	if _, err := checkSourceDBPath(srcDBPath); err != nil {
+		return err
+	}
+
+	if err := common.CopyFile(srcDBPath, cfg.outputDBFilePath); err != nil {
+		return fmt.Errorf("[clear-page] copy file failed: %w", err)
+	}
+
+	needAbandonFreelist, err := surgeon.ClearPage(cfg.outputDBFilePath, common.Pgid(cfg.pageId))
+	if err != nil {
+		return fmt.Errorf("clear-page command failed: %w", err)
+	}
+
+	if needAbandonFreelist {
+		fmt.Fprintf(os.Stdout, "WARNING: The clearing has abandoned some pages that are not yet referenced from free list.\n")
+		fmt.Fprintf(os.Stdout, "Please consider executing `./bbolt surgery freelist abandon ...`\n")
+	}
+
+	fmt.Fprintf(os.Stdout, "The page (%d) was cleared\n", cfg.pageId)
+	return nil
+}
+
+type surgeryClearPageElementsOptions struct {
+	surgeryBaseOptions
+	pageId          uint64
+	startElementIdx int
+	endElementIdx   int
+}
+
+func (o *surgeryClearPageElementsOptions) AddFlags(fs *pflag.FlagSet) {
+	o.surgeryBaseOptions.AddFlags(fs)
+	fs.Uint64VarP(&o.pageId, "pageId", "", o.pageId, "page id")
+	fs.IntVarP(&o.startElementIdx, "from-index", "", o.startElementIdx, "start element index (included) to clear, starting from 0")
+	fs.IntVarP(&o.endElementIdx, "to-index", "", o.endElementIdx, "end element index (excluded) to clear, starting from 0, -1 means to the end of page")
+	_ = cobra.MarkFlagRequired(fs, "pageId")
+	_ = cobra.MarkFlagRequired(fs, "from-index")
+	_ = cobra.MarkFlagRequired(fs, "to-index")
+}
+
+func (o *surgeryClearPageElementsOptions) Validate() error {
+	if err := o.surgeryBaseOptions.Validate(); err != nil {
+		return err
+	}
+	if o.pageId < 2 {
+		return fmt.Errorf("the pageId must be at least 2, but got %d", o.pageId)
+	}
+	return nil
+}
+
+func newSurgeryClearPageElementsCommand() *cobra.Command {
+	var o surgeryClearPageElementsOptions
+	clearElementCmd := &cobra.Command{
+		Use:   "clear-page-elements <bbolt-file>",
+		Short: "Clears elements from the given page, which can be a branch or leaf page",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			if err := o.Validate(); err != nil {
+				return err
+			}
+			return surgeryClearPageElementFunc(args[0], o)
+		},
+	}
+	o.AddFlags(clearElementCmd.Flags())
+	return clearElementCmd
+}
+
+func surgeryClearPageElementFunc(srcDBPath string, cfg surgeryClearPageElementsOptions) error {
+	if _, err := checkSourceDBPath(srcDBPath); err != nil {
+		return err
+	}
+
+	if err := common.CopyFile(srcDBPath, cfg.outputDBFilePath); err != nil {
+		return fmt.Errorf("[clear-page-element] copy file failed: %w", err)
+	}
+
+	needAbandonFreelist, err := surgeon.ClearPageElements(cfg.outputDBFilePath, common.Pgid(cfg.pageId), cfg.startElementIdx, cfg.endElementIdx, false)
+	if err != nil {
+		return fmt.Errorf("clear-page-element command failed: %w", err)
+	}
+
+	if needAbandonFreelist {
+		fmt.Fprintf(os.Stdout, "WARNING: The clearing has abandoned some pages that are not yet referenced from free list.\n")
+		fmt.Fprintf(os.Stdout, "Please consider executing `./bbolt surgery freelist abandon ...`\n")
+	}
+
+	fmt.Fprintf(os.Stdout, "All elements in [%d, %d) in page %d were cleared\n", cfg.startElementIdx, cfg.endElementIdx, cfg.pageId)
+	return nil
+}
+
+func readMetaPage(path string) (*common.Meta, error) {
+	pageSize, _, err := guts_cli.ReadPageAndHWMSize(path)
+	if err != nil {
+		return nil, fmt.Errorf("read Page size failed: %w", err)
+	}
+
+	m := make([]*common.Meta, 2)
+	for i := 0; i < 2; i++ {
+		m[i], _, err = ReadMetaPageAt(path, uint32(i), uint32(pageSize))
+		if err != nil {
+			return nil, fmt.Errorf("read meta page %d failed: %w", i, err)
+		}
+	}
+
+	if m[0].Txid() > m[1].Txid() {
+		return m[0], nil
+	}
+	return m[1], nil
+}
--- a/cmd/bbolt/command_surgery_freelist.go
+++ b/cmd/bbolt/command_surgery_freelist.go
@ -0,0 +1,111 @@
+package main
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/spf13/cobra"
+
+	bolt "github.com/tutus-one/tutus-bolt"
+	"github.com/tutus-one/tutus-bolt/internal/common"
+	"github.com/tutus-one/tutus-bolt/internal/surgeon"
+)
+
+func newSurgeryFreelistCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "freelist <subcommand>",
+		Short: "freelist related surgery commands",
+	}
+
+	cmd.AddCommand(newSurgeryFreelistAbandonCommand())
+	cmd.AddCommand(newSurgeryFreelistRebuildCommand())
+
+	return cmd
+}
+
+func newSurgeryFreelistAbandonCommand() *cobra.Command {
+	var o surgeryBaseOptions
+	abandonFreelistCmd := &cobra.Command{
+		Use:   "abandon <bbolt-file>",
+		Short: "Abandon the freelist from both meta pages",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			if err := o.Validate(); err != nil {
+				return err
+			}
+			return surgeryFreelistAbandonFunc(args[0], o)
+		},
+	}
+	o.AddFlags(abandonFreelistCmd.Flags())
+
+	return abandonFreelistCmd
+}
+
+func surgeryFreelistAbandonFunc(srcDBPath string, cfg surgeryBaseOptions) error {
+	if _, err := checkSourceDBPath(srcDBPath); err != nil {
+		return err
+	}
+
+	if err := common.CopyFile(srcDBPath, cfg.outputDBFilePath); err != nil {
+		return fmt.Errorf("[freelist abandon] copy file failed: %w", err)
+	}
+
+	if err := surgeon.ClearFreelist(cfg.outputDBFilePath); err != nil {
+		return fmt.Errorf("abandom-freelist command failed: %w", err)
+	}
+
+	fmt.Fprintf(os.Stdout, "The freelist was abandoned in both meta pages.\nIt may cause some delay on next startup because bbolt needs to scan the whole db to reconstruct the free list.\n")
+	return nil
+}
+
+func newSurgeryFreelistRebuildCommand() *cobra.Command {
+	var o surgeryBaseOptions
+	rebuildFreelistCmd := &cobra.Command{
+		Use:   "rebuild <bbolt-file>",
+		Short: "Rebuild the freelist",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			if err := o.Validate(); err != nil {
+				return err
+			}
+			return surgeryFreelistRebuildFunc(args[0], o)
+		},
+	}
+	o.AddFlags(rebuildFreelistCmd.Flags())
+
+	return rebuildFreelistCmd
+}
+
+func surgeryFreelistRebuildFunc(srcDBPath string, cfg surgeryBaseOptions) error {
+	// Ensure source file exists.
+	fi, err := checkSourceDBPath(srcDBPath)
+	if err != nil {
+		return err
+	}
+
+	// make sure the freelist isn't present in the file.
+	meta, err := readMetaPage(srcDBPath)
+	if err != nil {
+		return err
+	}
+	if meta.IsFreelistPersisted() {
+		return ErrSurgeryFreelistAlreadyExist
+	}
+
+	if err := common.CopyFile(srcDBPath, cfg.outputDBFilePath); err != nil {
+		return fmt.Errorf("[freelist rebuild] copy file failed: %w", err)
+	}
+
+	// bboltDB automatically reconstruct & sync freelist in write mode.
+	db, err := bolt.Open(cfg.outputDBFilePath, fi.Mode(), &bolt.Options{NoFreelistSync: false})
+	if err != nil {
+		return fmt.Errorf("[freelist rebuild] open db file failed: %w", err)
+	}
+	err = db.Close()
+	if err != nil {
+		return fmt.Errorf("[freelist rebuild] close db file failed: %w", err)
+	}
+
+	fmt.Fprintf(os.Stdout, "The freelist was successfully rebuilt.\n")
+	return nil
+}
--- a/cmd/bbolt/command_surgery_freelist_test.go
+++ b/cmd/bbolt/command_surgery_freelist_test.go
@ -0,0 +1,103 @@
+package main_test
+
+import (
+	"path/filepath"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	bolt "github.com/tutus-one/tutus-bolt"
+	main "github.com/tutus-one/tutus-bolt/cmd/bbolt"
+	"github.com/tutus-one/tutus-bolt/internal/btesting"
+	"github.com/tutus-one/tutus-bolt/internal/common"
+)
+
+func TestSurgery_Freelist_Abandon(t *testing.T) {
+	pageSize := 4096
+	db := btesting.MustCreateDBWithOption(t, &bolt.Options{PageSize: pageSize})
+	srcPath := db.Path()
+
+	defer requireDBNoChange(t, dbData(t, srcPath), srcPath)
+
+	rootCmd := main.NewRootCommand()
+	output := filepath.Join(t.TempDir(), "db")
+	rootCmd.SetArgs([]string{
+		"surgery", "freelist", "abandon", srcPath,
+		"--output", output,
+	})
+	err := rootCmd.Execute()
+	require.NoError(t, err)
+
+	meta0 := loadMetaPage(t, output, 0)
+	assert.Equal(t, common.PgidNoFreelist, meta0.Freelist())
+	meta1 := loadMetaPage(t, output, 1)
+	assert.Equal(t, common.PgidNoFreelist, meta1.Freelist())
+}
+
+func TestSurgery_Freelist_Rebuild(t *testing.T) {
+	testCases := []struct {
+		name          string
+		hasFreelist   bool
+		expectedError error
+	}{
+		{
+			name:          "normal operation",
+			hasFreelist:   false,
+			expectedError: nil,
+		},
+		{
+			name:          "already has freelist",
+			hasFreelist:   true,
+			expectedError: main.ErrSurgeryFreelistAlreadyExist,
+		},
+	}
+
+	for _, tc := range testCases {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			pageSize := 4096
+			db := btesting.MustCreateDBWithOption(t, &bolt.Options{
+				PageSize:       pageSize,
+				NoFreelistSync: !tc.hasFreelist,
+			})
+			srcPath := db.Path()
+
+			err := db.Update(func(tx *bolt.Tx) error {
+				// do nothing
+				return nil
+			})
+			require.NoError(t, err)
+
+			defer requireDBNoChange(t, dbData(t, srcPath), srcPath)
+
+			// Verify the freelist isn't synced in the beginning
+			meta := readMetaPage(t, srcPath)
+			if tc.hasFreelist {
+				if meta.Freelist() <= 1 || meta.Freelist() >= meta.Pgid() {
+					t.Fatalf("freelist (%d) isn't in the valid range (1, %d)", meta.Freelist(), meta.Pgid())
+				}
+			} else {
+				require.Equal(t, common.PgidNoFreelist, meta.Freelist())
+			}
+
+			// Execute `surgery freelist rebuild` command
+			rootCmd := main.NewRootCommand()
+			output := filepath.Join(t.TempDir(), "db")
+			rootCmd.SetArgs([]string{
+				"surgery", "freelist", "rebuild", srcPath,
+				"--output", output,
+			})
+			err = rootCmd.Execute()
+			require.Equal(t, tc.expectedError, err)
+
+			if tc.expectedError == nil {
+				// Verify the freelist has already been rebuilt.
+				meta = readMetaPage(t, output)
+				if meta.Freelist() <= 1 || meta.Freelist() >= meta.Pgid() {
+					t.Fatalf("freelist (%d) isn't in the valid range (1, %d)", meta.Freelist(), meta.Pgid())
+				}
+			}
+		})
+	}
+}
--- a/cmd/bbolt/command_surgery_meta.go
+++ b/cmd/bbolt/command_surgery_meta.go
@ -0,0 +1,275 @@
+package main
+
+import (
+	"fmt"
+	"io"
+	"os"
+	"strconv"
+	"strings"
+
+	"github.com/spf13/cobra"
+	"github.com/spf13/pflag"
+
+	"github.com/tutus-one/tutus-bolt/internal/common"
+)
+
+const (
+	metaFieldPageSize = "pageSize"
+	metaFieldRoot     = "root"
+	metaFieldFreelist = "freelist"
+	metaFieldPgid     = "pgid"
+)
+
+func newSurgeryMetaCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "meta <subcommand>",
+		Short: "meta page related surgery commands",
+	}
+
+	cmd.AddCommand(newSurgeryMetaValidateCommand())
+	cmd.AddCommand(newSurgeryMetaUpdateCommand())
+
+	return cmd
+}
+
+func newSurgeryMetaValidateCommand() *cobra.Command {
+	metaValidateCmd := &cobra.Command{
+		Use:   "validate <bbolt-file>",
+		Short: "Validate both meta pages",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return surgeryMetaValidateFunc(args[0])
+		},
+	}
+	return metaValidateCmd
+}
+
+func surgeryMetaValidateFunc(srcDBPath string) error {
+	if _, err := checkSourceDBPath(srcDBPath); err != nil {
+		return err
+	}
+
+	var pageSize uint32
+
+	for i := 0; i <= 1; i++ {
+		m, _, err := ReadMetaPageAt(srcDBPath, uint32(i), pageSize)
+		if err != nil {
+			return fmt.Errorf("read meta page %d failed: %w", i, err)
+		}
+		if mValidateErr := m.Validate(); mValidateErr != nil {
+			fmt.Fprintf(os.Stdout, "WARNING: The meta page %d isn't valid: %v!\n", i, mValidateErr)
+		} else {
+			fmt.Fprintf(os.Stdout, "The meta page %d is valid!\n", i)
+		}
+
+		pageSize = m.PageSize()
+	}
+
+	return nil
+}
+
+type surgeryMetaUpdateOptions struct {
+	surgeryBaseOptions
+	fields     []string
+	metaPageId uint32
+}
+
+var allowedMetaUpdateFields = map[string]struct{}{
+	metaFieldPageSize: {},
+	metaFieldRoot:     {},
+	metaFieldFreelist: {},
+	metaFieldPgid:     {},
+}
+
+// AddFlags sets the flags for `meta update` command.
+// Example: --fields root:16,freelist:8 --fields pgid:128
+// Result: []string{"root:16", "freelist:8", "pgid:128"}
+func (o *surgeryMetaUpdateOptions) AddFlags(fs *pflag.FlagSet) {
+	o.surgeryBaseOptions.AddFlags(fs)
+	fs.StringSliceVarP(&o.fields, "fields", "", o.fields, "comma separated list of fields (supported fields: pageSize, root, freelist and pgid) to be updated, and each item is a colon-separated key-value pair")
+	fs.Uint32VarP(&o.metaPageId, "meta-page", "", o.metaPageId, "the meta page ID to operate on, valid values are 0 and 1")
+}
+
+func (o *surgeryMetaUpdateOptions) Validate() error {
+	if err := o.surgeryBaseOptions.Validate(); err != nil {
+		return err
+	}
+
+	if o.metaPageId > 1 {
+		return fmt.Errorf("invalid meta page id: %d", o.metaPageId)
+	}
+
+	for _, field := range o.fields {
+		kv := strings.Split(field, ":")
+		if len(kv) != 2 {
+			return fmt.Errorf("invalid key-value pair: %s", field)
+		}
+
+		if _, ok := allowedMetaUpdateFields[kv[0]]; !ok {
+			return fmt.Errorf("field %q isn't allowed to be updated", kv[0])
+		}
+
+		if _, err := strconv.ParseUint(kv[1], 10, 64); err != nil {
+			return fmt.Errorf("invalid value %q for field %q", kv[1], kv[0])
+		}
+	}
+
+	return nil
+}
+
+func newSurgeryMetaUpdateCommand() *cobra.Command {
+	var o surgeryMetaUpdateOptions
+	metaUpdateCmd := &cobra.Command{
+		Use:   "update <bbolt-file>",
+		Short: "Update fields in meta pages",
+		Args:  cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			if err := o.Validate(); err != nil {
+				return err
+			}
+			return surgeryMetaUpdateFunc(args[0], o)
+		},
+	}
+	o.AddFlags(metaUpdateCmd.Flags())
+	return metaUpdateCmd
+}
+
+func surgeryMetaUpdateFunc(srcDBPath string, cfg surgeryMetaUpdateOptions) error {
+	if _, err := checkSourceDBPath(srcDBPath); err != nil {
+		return err
+	}
+
+	if err := common.CopyFile(srcDBPath, cfg.outputDBFilePath); err != nil {
+		return fmt.Errorf("[meta update] copy file failed: %w", err)
+	}
+
+	// read the page size from the first meta page if we want to edit the second meta page.
+	var pageSize uint32
+	if cfg.metaPageId == 1 {
+		m0, _, err := ReadMetaPageAt(cfg.outputDBFilePath, 0, pageSize)
+		if err != nil {
+			return fmt.Errorf("read the first meta page failed: %w", err)
+		}
+		pageSize = m0.PageSize()
+	}
+
+	// update the specified meta page
+	m, buf, err := ReadMetaPageAt(cfg.outputDBFilePath, cfg.metaPageId, pageSize)
+	if err != nil {
+		return fmt.Errorf("read meta page %d failed: %w", cfg.metaPageId, err)
+	}
+	mChanged := updateMetaField(m, parseFields(cfg.fields))
+	if mChanged {
+		if err := writeMetaPageAt(cfg.outputDBFilePath, buf, cfg.metaPageId, pageSize); err != nil {
+			return fmt.Errorf("[meta update] write meta page %d failed: %w", cfg.metaPageId, err)
+		}
+	}
+
+	if cfg.metaPageId == 1 && pageSize != m.PageSize() {
+		fmt.Fprintf(os.Stdout, "WARNING: The page size (%d) in the first meta page doesn't match the second meta page (%d)\n", pageSize, m.PageSize())
+	}
+
+	// Display results
+	if !mChanged {
+		fmt.Fprintln(os.Stdout, "Nothing changed!")
+	}
+
+	if mChanged {
+		fmt.Fprintf(os.Stdout, "The meta page %d has been updated!\n", cfg.metaPageId)
+	}
+
+	return nil
+}
+
+func parseFields(fields []string) map[string]uint64 {
+	fieldsMap := make(map[string]uint64)
+	for _, field := range fields {
+		kv := strings.SplitN(field, ":", 2)
+		val, _ := strconv.ParseUint(kv[1], 10, 64)
+		fieldsMap[kv[0]] = val
+	}
+	return fieldsMap
+}
+
+func updateMetaField(m *common.Meta, fields map[string]uint64) bool {
+	changed := false
+	for key, val := range fields {
+		switch key {
+		case metaFieldPageSize:
+			m.SetPageSize(uint32(val))
+		case metaFieldRoot:
+			m.SetRootBucket(common.NewInBucket(common.Pgid(val), 0))
+		case metaFieldFreelist:
+			m.SetFreelist(common.Pgid(val))
+		case metaFieldPgid:
+			m.SetPgid(common.Pgid(val))
+		}
+
+		changed = true
+	}
+
+	if m.Magic() != common.Magic {
+		m.SetMagic(common.Magic)
+		changed = true
+	}
+	if m.Version() != common.Version {
+		m.SetVersion(common.Version)
+		changed = true
+	}
+	if m.Flags() != common.MetaPageFlag {
+		m.SetFlags(common.MetaPageFlag)
+		changed = true
+	}
+
+	newChecksum := m.Sum64()
+	if m.Checksum() != newChecksum {
+		m.SetChecksum(newChecksum)
+		changed = true
+	}
+
+	return changed
+}
+
+func ReadMetaPageAt(dbPath string, metaPageId uint32, pageSize uint32) (*common.Meta, []byte, error) {
+	if metaPageId > 1 {
+		return nil, nil, fmt.Errorf("invalid metaPageId: %d", metaPageId)
+	}
+
+	f, err := os.OpenFile(dbPath, os.O_RDONLY, 0444)
+	if err != nil {
+		return nil, nil, err
+	}
+	defer f.Close()
+
+	// The meta page is just 64 bytes, and definitely less than 1024 bytes,
+	// so it's fine to only read 1024 bytes. Note we don't care about the
+	// pageSize when reading the first meta page, because we always read the
+	// file starting from offset 0. Actually the passed pageSize is 0 when
+	// reading the first meta page in the `surgery meta update` command.
+	buf := make([]byte, 1024)
+	n, err := f.ReadAt(buf, int64(metaPageId*pageSize))
+	if n == len(buf) && (err == nil || err == io.EOF) {
+		return common.LoadPageMeta(buf), buf, nil
+	}
+
+	return nil, nil, err
+}
+
+func writeMetaPageAt(dbPath string, buf []byte, metaPageId uint32, pageSize uint32) error {
+	if metaPageId > 1 {
+		return fmt.Errorf("invalid metaPageId: %d", metaPageId)
+	}
+
+	f, err := os.OpenFile(dbPath, os.O_RDWR, 0666)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	n, err := f.WriteAt(buf, int64(metaPageId*pageSize))
+	if n == len(buf) && (err == nil || err == io.EOF) {
+		return nil
+	}
+
+	return err
+}
--- a/cmd/bbolt/command_surgery_meta_test.go
+++ b/cmd/bbolt/command_surgery_meta_test.go
@ -0,0 +1,126 @@
+package main_test
+
+import (
+	"fmt"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	bolt "github.com/tutus-one/tutus-bolt"
+	main "github.com/tutus-one/tutus-bolt/cmd/bbolt"
+	"github.com/tutus-one/tutus-bolt/internal/btesting"
+	"github.com/tutus-one/tutus-bolt/internal/common"
+)
+
+func TestSurgery_Meta_Validate(t *testing.T) {
+	pageSize := 4096
+	db := btesting.MustCreateDBWithOption(t, &bolt.Options{PageSize: pageSize})
+	srcPath := db.Path()
+
+	defer requireDBNoChange(t, dbData(t, db.Path()), db.Path())
+
+	// validate the meta pages
+	rootCmd := main.NewRootCommand()
+	rootCmd.SetArgs([]string{
+		"surgery", "meta", "validate", srcPath,
+	})
+	err := rootCmd.Execute()
+	require.NoError(t, err)
+
+	// TODD: add one more case that the validation may fail. We need to
+	// make the command output configurable, so that test cases can set
+	// a customized io.Writer.
+}
+
+func TestSurgery_Meta_Update(t *testing.T) {
+	testCases := []struct {
+		name     string
+		root     common.Pgid
+		freelist common.Pgid
+		pgid     common.Pgid
+	}{
+		{
+			name: "root changed",
+			root: 50,
+		},
+		{
+			name:     "freelist changed",
+			freelist: 40,
+		},
+		{
+			name: "pgid changed",
+			pgid: 600,
+		},
+		{
+			name:     "both root and freelist changed",
+			root:     45,
+			freelist: 46,
+		},
+		{
+			name:     "both pgid and freelist changed",
+			pgid:     256,
+			freelist: 47,
+		},
+		{
+			name:     "all fields changed",
+			root:     43,
+			freelist: 62,
+			pgid:     256,
+		},
+	}
+
+	for _, tc := range testCases {
+		for i := 0; i <= 1; i++ {
+			tc := tc
+			metaPageId := uint32(i)
+
+			t.Run(tc.name, func(t *testing.T) {
+				pageSize := 4096
+				db := btesting.MustCreateDBWithOption(t, &bolt.Options{PageSize: pageSize})
+				srcPath := db.Path()
+
+				defer requireDBNoChange(t, dbData(t, db.Path()), db.Path())
+
+				var fields []string
+				if tc.root != 0 {
+					fields = append(fields, fmt.Sprintf("root:%d", tc.root))
+				}
+				if tc.freelist != 0 {
+					fields = append(fields, fmt.Sprintf("freelist:%d", tc.freelist))
+				}
+				if tc.pgid != 0 {
+					fields = append(fields, fmt.Sprintf("pgid:%d", tc.pgid))
+				}
+
+				rootCmd := main.NewRootCommand()
+				output := filepath.Join(t.TempDir(), "db")
+				rootCmd.SetArgs([]string{
+					"surgery", "meta", "update", srcPath,
+					"--output", output,
+					"--meta-page", fmt.Sprintf("%d", metaPageId),
+					"--fields", strings.Join(fields, ","),
+				})
+				err := rootCmd.Execute()
+				require.NoError(t, err)
+
+				m, _, err := main.ReadMetaPageAt(output, metaPageId, 4096)
+				require.NoError(t, err)
+
+				require.Equal(t, common.Magic, m.Magic())
+				require.Equal(t, common.Version, m.Version())
+
+				if tc.root != 0 {
+					require.Equal(t, tc.root, m.RootBucket().RootPage())
+				}
+				if tc.freelist != 0 {
+					require.Equal(t, tc.freelist, m.Freelist())
+				}
+				if tc.pgid != 0 {
+					require.Equal(t, tc.pgid, m.Pgid())
+				}
+			})
+		}
+	}
+}
--- a/cmd/bbolt/command_surgery_test.go
+++ b/cmd/bbolt/command_surgery_test.go
@ -0,0 +1,636 @@
+package main_test
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	bolt "github.com/tutus-one/tutus-bolt"
+	main "github.com/tutus-one/tutus-bolt/cmd/bbolt"
+	"github.com/tutus-one/tutus-bolt/internal/btesting"
+	"github.com/tutus-one/tutus-bolt/internal/common"
+	"github.com/tutus-one/tutus-bolt/internal/guts_cli"
+)
+
+func TestSurgery_RevertMetaPage(t *testing.T) {
+	pageSize := 4096
+	db := btesting.MustCreateDBWithOption(t, &bolt.Options{PageSize: pageSize})
+	srcPath := db.Path()
+
+	defer requireDBNoChange(t, dbData(t, db.Path()), db.Path())
+
+	srcFile, err := os.Open(srcPath)
+	require.NoError(t, err)
+	defer srcFile.Close()
+
+	// Read both meta0 and meta1 from srcFile
+	srcBuf0 := readPage(t, srcPath, 0, pageSize)
+	srcBuf1 := readPage(t, srcPath, 1, pageSize)
+	meta0Page := common.LoadPageMeta(srcBuf0)
+	meta1Page := common.LoadPageMeta(srcBuf1)
+
+	// Get the non-active meta page
+	nonActiveSrcBuf := srcBuf0
+	nonActiveMetaPageId := 0
+	if meta0Page.Txid() > meta1Page.Txid() {
+		nonActiveSrcBuf = srcBuf1
+		nonActiveMetaPageId = 1
+	}
+	t.Logf("non active meta page id: %d", nonActiveMetaPageId)
+
+	// revert the meta page
+	rootCmd := main.NewRootCommand()
+	output := filepath.Join(t.TempDir(), "db")
+	rootCmd.SetArgs([]string{
+		"surgery", "revert-meta-page", srcPath,
+		"--output", output,
+	})
+	err = rootCmd.Execute()
+	require.NoError(t, err)
+
+	// read both meta0 and meta1 from dst file
+	dstBuf0 := readPage(t, output, 0, pageSize)
+	dstBuf1 := readPage(t, output, 1, pageSize)
+
+	// check result. Note we should skip the page ID
+	assert.Equal(t, pageDataWithoutPageId(nonActiveSrcBuf), pageDataWithoutPageId(dstBuf0))
+	assert.Equal(t, pageDataWithoutPageId(nonActiveSrcBuf), pageDataWithoutPageId(dstBuf1))
+}
+
+func TestSurgery_CopyPage(t *testing.T) {
+	pageSize := 4096
+	db := btesting.MustCreateDBWithOption(t, &bolt.Options{PageSize: pageSize})
+	srcPath := db.Path()
+
+	// Insert some sample data
+	t.Log("Insert some sample data")
+	err := db.Fill([]byte("data"), 1, 20,
+		func(tx int, k int) []byte { return []byte(fmt.Sprintf("%04d", k)) },
+		func(tx int, k int) []byte { return make([]byte, 10) },
+	)
+	require.NoError(t, err)
+
+	defer requireDBNoChange(t, dbData(t, srcPath), srcPath)
+
+	// copy page 3 to page 2
+	t.Log("copy page 3 to page 2")
+	rootCmd := main.NewRootCommand()
+	output := filepath.Join(t.TempDir(), "dstdb")
+	rootCmd.SetArgs([]string{
+		"surgery", "copy-page", srcPath,
+		"--output", output,
+		"--from-page", "3",
+		"--to-page", "2",
+	})
+	err = rootCmd.Execute()
+	require.NoError(t, err)
+
+	// The page 2 should have exactly the same data as page 3.
+	t.Log("Verify result")
+	srcPageId3Data := readPage(t, srcPath, 3, pageSize)
+	dstPageId3Data := readPage(t, output, 3, pageSize)
+	dstPageId2Data := readPage(t, output, 2, pageSize)
+
+	assert.Equal(t, srcPageId3Data, dstPageId3Data)
+	assert.Equal(t, pageDataWithoutPageId(srcPageId3Data), pageDataWithoutPageId(dstPageId2Data))
+}
+
+// TODO(ahrtr): add test case below for `surgery clear-page` command:
+//  1. The page is a branch page. All its children should become free pages.
+func TestSurgery_ClearPage(t *testing.T) {
+	pageSize := 4096
+	db := btesting.MustCreateDBWithOption(t, &bolt.Options{PageSize: pageSize})
+	srcPath := db.Path()
+
+	// Insert some sample data
+	t.Log("Insert some sample data")
+	err := db.Fill([]byte("data"), 1, 20,
+		func(tx int, k int) []byte { return []byte(fmt.Sprintf("%04d", k)) },
+		func(tx int, k int) []byte { return make([]byte, 10) },
+	)
+	require.NoError(t, err)
+
+	defer requireDBNoChange(t, dbData(t, srcPath), srcPath)
+
+	// clear page 3
+	t.Log("clear page 3")
+	rootCmd := main.NewRootCommand()
+	output := filepath.Join(t.TempDir(), "dstdb")
+	rootCmd.SetArgs([]string{
+		"surgery", "clear-page", srcPath,
+		"--output", output,
+		"--pageId", "3",
+	})
+	err = rootCmd.Execute()
+	require.NoError(t, err)
+
+	t.Log("Verify result")
+	dstPageId3Data := readPage(t, output, 3, pageSize)
+
+	p := common.LoadPage(dstPageId3Data)
+	assert.Equal(t, uint16(0), p.Count())
+	assert.Equal(t, uint32(0), p.Overflow())
+}
+
+func TestSurgery_ClearPageElements_Without_Overflow(t *testing.T) {
+	testCases := []struct {
+		name                 string
+		from                 int
+		to                   int
+		isBranchPage         bool
+		setEndIdxAsCount     bool
+		removeOnlyOneElement bool // only valid when setEndIdxAsCount == true, and startIdx = endIdx -1 in this case.
+		expectError          bool
+	}{
+		// normal range in leaf page
+		{
+			name: "normal range in leaf page: [4, 8)",
+			from: 4,
+			to:   8,
+		},
+		{
+			name: "normal range in leaf page: [5, -1)",
+			from: 4,
+			to:   -1,
+		},
+		{
+			name: "normal range in leaf page: all",
+			from: 0,
+			to:   -1,
+		},
+		{
+			name: "normal range in leaf page: [0, 7)",
+			from: 0,
+			to:   7,
+		},
+		{
+			name:             "normal range in leaf page: [3, count)",
+			from:             4,
+			setEndIdxAsCount: true,
+		},
+		// normal range in branch page
+		{
+			name:         "normal range in branch page: [4, 8)",
+			from:         4,
+			to:           8,
+			isBranchPage: true,
+		},
+		{
+			name:         "normal range in branch page: [5, -1)",
+			from:         4,
+			to:           -1,
+			isBranchPage: true,
+		},
+		{
+			name:         "normal range in branch page: all",
+			from:         0,
+			to:           -1,
+			isBranchPage: true,
+		},
+		{
+			name:         "normal range in branch page: [0, 7)",
+			from:         0,
+			to:           7,
+			isBranchPage: true,
+		},
+		{
+			name:             "normal range in branch page: [3, count)",
+			from:             4,
+			isBranchPage:     true,
+			setEndIdxAsCount: true,
+		},
+		// remove only one element
+		{
+			name: "one element: the first one",
+			from: 0,
+			to:   1,
+		},
+		{
+			name: "one element: [6, 7)",
+			from: 6,
+			to:   7,
+		},
+		{
+			name:                 "one element: the last one",
+			setEndIdxAsCount:     true,
+			removeOnlyOneElement: true,
+		},
+		// abnormal range
+		{
+			name:        "abnormal range: [-1, 4)",
+			from:        -1,
+			to:          4,
+			expectError: true,
+		},
+		{
+			name:        "abnormal range: [-2, 5)",
+			from:        -1,
+			to:          5,
+			expectError: true,
+		},
+		{
+			name:        "abnormal range: [3, 3)",
+			from:        3,
+			to:          3,
+			expectError: true,
+		},
+		{
+			name:        "abnormal range: [5, 3)",
+			from:        5,
+			to:          3,
+			expectError: true,
+		},
+		{
+			name:        "abnormal range: [3, -2)",
+			from:        3,
+			to:          -2,
+			expectError: true,
+		},
+		{
+			name:        "abnormal range: [3, 1000000)",
+			from:        -1,
+			to:          4,
+			expectError: true,
+		},
+	}
+	for _, tc := range testCases {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			testSurgeryClearPageElementsWithoutOverflow(t, tc.from, tc.to, tc.isBranchPage, tc.setEndIdxAsCount, tc.removeOnlyOneElement, tc.expectError)
+		})
+	}
+}
+
+func testSurgeryClearPageElementsWithoutOverflow(t *testing.T, startIdx, endIdx int, isBranchPage, setEndIdxAsCount, removeOnlyOne, expectError bool) {
+	pageSize := 4096
+	db := btesting.MustCreateDBWithOption(t, &bolt.Options{PageSize: pageSize})
+	srcPath := db.Path()
+
+	// Generate sample db
+	t.Log("Generate some sample data")
+	err := db.Fill([]byte("data"), 10, 200,
+		func(tx int, k int) []byte { return []byte(fmt.Sprintf("%04d", tx*10000+k)) },
+		func(tx int, k int) []byte { return make([]byte, 10) },
+	)
+	require.NoError(t, err)
+
+	defer requireDBNoChange(t, dbData(t, srcPath), srcPath)
+
+	// find a page with at least 10 elements
+	var (
+		pageId       uint64 = 2
+		elementCount uint16 = 0
+	)
+	for {
+		p, _, err := guts_cli.ReadPage(srcPath, pageId)
+		require.NoError(t, err)
+
+		if isBranchPage {
+			if p.IsBranchPage() && p.Count() > 10 {
+				elementCount = p.Count()
+				break
+			}
+		} else {
+			if p.IsLeafPage() && p.Count() > 10 {
+				elementCount = p.Count()
+				break
+			}
+		}
+		pageId++
+	}
+	t.Logf("The original element count: %d", elementCount)
+
+	if setEndIdxAsCount {
+		t.Logf("Set the endIdx as the element count: %d", elementCount)
+		endIdx = int(elementCount)
+		if removeOnlyOne {
+			startIdx = endIdx - 1
+			t.Logf("Set the startIdx as the endIdx-1: %d", startIdx)
+		}
+	}
+
+	// clear elements [startIdx, endIdx) in the page
+	rootCmd := main.NewRootCommand()
+	output := filepath.Join(t.TempDir(), "db")
+	rootCmd.SetArgs([]string{
+		"surgery", "clear-page-elements", srcPath,
+		"--output", output,
+		"--pageId", fmt.Sprintf("%d", pageId),
+		"--from-index", fmt.Sprintf("%d", startIdx),
+		"--to-index", fmt.Sprintf("%d", endIdx),
+	})
+	err = rootCmd.Execute()
+	if expectError {
+		require.Error(t, err)
+		return
+	}
+
+	require.NoError(t, err)
+
+	// check the element count again
+	expectedCnt := 0
+	if endIdx == -1 {
+		expectedCnt = startIdx
+	} else {
+		expectedCnt = int(elementCount) - (endIdx - startIdx)
+	}
+	p, _, err := guts_cli.ReadPage(output, pageId)
+	require.NoError(t, err)
+	assert.Equal(t, expectedCnt, int(p.Count()))
+
+	compareDataAfterClearingElement(t, srcPath, output, pageId, isBranchPage, startIdx, endIdx)
+}
+
+func compareDataAfterClearingElement(t *testing.T, srcPath, dstPath string, pageId uint64, isBranchPage bool, startIdx, endIdx int) {
+	srcPage, _, err := guts_cli.ReadPage(srcPath, pageId)
+	require.NoError(t, err)
+
+	dstPage, _, err := guts_cli.ReadPage(dstPath, pageId)
+	require.NoError(t, err)
+
+	var dstIdx uint16
+	for i := uint16(0); i < srcPage.Count(); i++ {
+		// skip the cleared elements
+		if dstIdx >= uint16(startIdx) && (dstIdx < uint16(endIdx) || endIdx == -1) {
+			continue
+		}
+
+		if isBranchPage {
+			srcElement := srcPage.BranchPageElement(i)
+			dstElement := dstPage.BranchPageElement(dstIdx)
+
+			require.Equal(t, srcElement.Key(), dstElement.Key())
+			require.Equal(t, srcElement.Pgid(), dstElement.Pgid())
+		} else {
+			srcElement := srcPage.LeafPageElement(i)
+			dstElement := dstPage.LeafPageElement(dstIdx)
+
+			require.Equal(t, srcElement.Flags(), dstElement.Flags())
+			require.Equal(t, srcElement.Key(), dstElement.Key())
+			require.Equal(t, srcElement.Value(), dstElement.Value())
+		}
+
+		dstIdx++
+	}
+}
+
+func TestSurgery_ClearPageElements_With_Overflow(t *testing.T) {
+	testCases := []struct {
+		name             string
+		from             int
+		to               int
+		valueSizes       []int
+		expectedOverflow int
+	}{
+		// big element
+		{
+			name:             "remove a big element at the end",
+			valueSizes:       []int{500, 500, 500, 2600},
+			from:             3,
+			to:               4,
+			expectedOverflow: 0,
+		},
+		{
+			name:             "remove a big element at the begin",
+			valueSizes:       []int{2600, 500, 500, 500},
+			from:             0,
+			to:               1,
+			expectedOverflow: 0,
+		},
+		{
+			name:             "remove a big element in the middle",
+			valueSizes:       []int{500, 2600, 500, 500},
+			from:             1,
+			to:               2,
+			expectedOverflow: 0,
+		},
+		// small element
+		{
+			name:             "remove a small element at the end",
+			valueSizes:       []int{500, 500, 3100, 100},
+			from:             3,
+			to:               4,
+			expectedOverflow: 1,
+		},
+		{
+			name:             "remove a small element at the begin",
+			valueSizes:       []int{100, 500, 3100, 500},
+			from:             0,
+			to:               1,
+			expectedOverflow: 1,
+		},
+		{
+			name:             "remove a small element in the middle",
+			valueSizes:       []int{500, 100, 3100, 500},
+			from:             1,
+			to:               2,
+			expectedOverflow: 1,
+		},
+		{
+			name:             "remove a small element at the end of page with big overflow",
+			valueSizes:       []int{500, 500, 4096 * 5, 100},
+			from:             3,
+			to:               4,
+			expectedOverflow: 5,
+		},
+		{
+			name:             "remove a small element at the begin of page with big overflow",
+			valueSizes:       []int{100, 500, 4096 * 6, 500},
+			from:             0,
+			to:               1,
+			expectedOverflow: 6,
+		},
+		{
+			name:             "remove a small element in the middle of page with big overflow",
+			valueSizes:       []int{500, 100, 4096 * 4, 500},
+			from:             1,
+			to:               2,
+			expectedOverflow: 4,
+		},
+		// huge element
+		{
+			name:             "remove a huge element at the end",
+			valueSizes:       []int{500, 500, 500, 4096 * 5},
+			from:             3,
+			to:               4,
+			expectedOverflow: 0,
+		},
+		{
+			name:             "remove a huge element at the begin",
+			valueSizes:       []int{4096 * 5, 500, 500, 500},
+			from:             0,
+			to:               1,
+			expectedOverflow: 0,
+		},
+		{
+			name:             "remove a huge element in the middle",
+			valueSizes:       []int{500, 4096 * 5, 500, 500},
+			from:             1,
+			to:               2,
+			expectedOverflow: 0,
+		},
+	}
+
+	for _, tc := range testCases {
+		tc := tc
+
+		t.Run(tc.name, func(t *testing.T) {
+			testSurgeryClearPageElementsWithOverflow(t, tc.from, tc.to, tc.valueSizes, tc.expectedOverflow)
+		})
+	}
+}
+
+func testSurgeryClearPageElementsWithOverflow(t *testing.T, startIdx, endIdx int, valueSizes []int, expectedOverflow int) {
+	pageSize := 4096
+	db := btesting.MustCreateDBWithOption(t, &bolt.Options{PageSize: pageSize})
+	srcPath := db.Path()
+
+	// Generate sample db
+	err := db.Update(func(tx *bolt.Tx) error {
+		b, _ := tx.CreateBucketIfNotExists([]byte("data"))
+		for i, valueSize := range valueSizes {
+			key := []byte(fmt.Sprintf("%04d", i))
+			val := make([]byte, valueSize)
+			if putErr := b.Put(key, val); putErr != nil {
+				return putErr
+			}
+		}
+		return nil
+	})
+	require.NoError(t, err)
+
+	defer requireDBNoChange(t, dbData(t, srcPath), srcPath)
+
+	// find a page with overflow pages
+	var (
+		pageId       uint64 = 2
+		elementCount uint16 = 0
+	)
+	for {
+		p, _, err := guts_cli.ReadPage(srcPath, pageId)
+		require.NoError(t, err)
+
+		if p.Overflow() > 0 {
+			elementCount = p.Count()
+			break
+		}
+		pageId++
+	}
+	t.Logf("The original element count: %d", elementCount)
+
+	// clear elements [startIdx, endIdx) in the page
+	rootCmd := main.NewRootCommand()
+	output := filepath.Join(t.TempDir(), "db")
+	rootCmd.SetArgs([]string{
+		"surgery", "clear-page-elements", srcPath,
+		"--output", output,
+		"--pageId", fmt.Sprintf("%d", pageId),
+		"--from-index", fmt.Sprintf("%d", startIdx),
+		"--to-index", fmt.Sprintf("%d", endIdx),
+	})
+	err = rootCmd.Execute()
+	require.NoError(t, err)
+
+	// check the element count again
+	expectedCnt := 0
+	if endIdx == -1 {
+		expectedCnt = startIdx
+	} else {
+		expectedCnt = int(elementCount) - (endIdx - startIdx)
+	}
+	p, _, err := guts_cli.ReadPage(output, pageId)
+	require.NoError(t, err)
+	assert.Equal(t, expectedCnt, int(p.Count()))
+
+	assert.Equal(t, expectedOverflow, int(p.Overflow()))
+
+	compareDataAfterClearingElement(t, srcPath, output, pageId, false, startIdx, endIdx)
+}
+
+func TestSurgeryRequiredFlags(t *testing.T) {
+	errMsgFmt := `required flag(s) "%s" not set`
+	testCases := []struct {
+		name           string
+		args           []string
+		expectedErrMsg string
+	}{
+		// --output is required for all surgery commands
+		{
+			name:           "no output flag for revert-meta-page",
+			args:           []string{"surgery", "revert-meta-page", "db"},
+			expectedErrMsg: fmt.Sprintf(errMsgFmt, "output"),
+		},
+		{
+			name:           "no output flag for copy-page",
+			args:           []string{"surgery", "copy-page", "db", "--from-page", "3", "--to-page", "2"},
+			expectedErrMsg: fmt.Sprintf(errMsgFmt, "output"),
+		},
+		{
+			name:           "no output flag for clear-page",
+			args:           []string{"surgery", "clear-page", "db", "--pageId", "3"},
+			expectedErrMsg: fmt.Sprintf(errMsgFmt, "output"),
+		},
+		{
+			name:           "no output flag for clear-page-element",
+			args:           []string{"surgery", "clear-page-elements", "db", "--pageId", "4", "--from-index", "3", "--to-index", "5"},
+			expectedErrMsg: fmt.Sprintf(errMsgFmt, "output"),
+		},
+		{
+			name:           "no output flag for freelist abandon",
+			args:           []string{"surgery", "freelist", "abandon", "db"},
+			expectedErrMsg: fmt.Sprintf(errMsgFmt, "output"),
+		},
+		{
+			name:           "no output flag for freelist rebuild",
+			args:           []string{"surgery", "freelist", "rebuild", "db"},
+			expectedErrMsg: fmt.Sprintf(errMsgFmt, "output"),
+		},
+		// --from-page and --to-page are required for 'surgery copy-page' command
+		{
+			name:           "no from-page flag for copy-page",
+			args:           []string{"surgery", "copy-page", "db", "--output", "db", "--to-page", "2"},
+			expectedErrMsg: fmt.Sprintf(errMsgFmt, "from-page"),
+		},
+		{
+			name:           "no to-page flag for copy-page",
+			args:           []string{"surgery", "copy-page", "db", "--output", "db", "--from-page", "2"},
+			expectedErrMsg: fmt.Sprintf(errMsgFmt, "to-page"),
+		},
+		// --pageId is required for 'surgery clear-page' command
+		{
+			name:           "no pageId flag for clear-page",
+			args:           []string{"surgery", "clear-page", "db", "--output", "db"},
+			expectedErrMsg: fmt.Sprintf(errMsgFmt, "pageId"),
+		},
+		// --pageId, --from-index and --to-index are required for 'surgery clear-page-element' command
+		{
+			name:           "no pageId flag for clear-page-element",
+			args:           []string{"surgery", "clear-page-elements", "db", "--output", "newdb", "--from-index", "3", "--to-index", "5"},
+			expectedErrMsg: fmt.Sprintf(errMsgFmt, "pageId"),
+		},
+		{
+			name:           "no from-index flag for clear-page-element",
+			args:           []string{"surgery", "clear-page-elements", "db", "--output", "newdb", "--pageId", "2", "--to-index", "5"},
+			expectedErrMsg: fmt.Sprintf(errMsgFmt, "from-index"),
+		},
+		{
+			name:           "no to-index flag for clear-page-element",
+			args:           []string{"surgery", "clear-page-elements", "db", "--output", "newdb", "--pageId", "2", "--from-index", "3"},
+			expectedErrMsg: fmt.Sprintf(errMsgFmt, "to-index"),
+		},
+	}
+
+	for _, tc := range testCases {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			rootCmd := main.NewRootCommand()
+			rootCmd.SetArgs(tc.args)
+			err := rootCmd.Execute()
+			require.ErrorContains(t, err, tc.expectedErrMsg)
+		})
+	}
+}
--- a/cmd/bbolt/command_version.go
+++ b/cmd/bbolt/command_version.go
@ -0,0 +1,25 @@
+package main
+
+import (
+	"fmt"
+	"runtime"
+
+	"github.com/spf13/cobra"
+
+	"github.com/tutus-one/tutus-bolt/version"
+)
+
+func newVersionCommand() *cobra.Command {
+	versionCmd := &cobra.Command{
+		Use:   "version",
+		Short: "print the current version of bbolt",
+		Long:  "print the current version of bbolt",
+		Run: func(cmd *cobra.Command, args []string) {
+			fmt.Printf("bbolt Version: %s\n", version.Version)
+			fmt.Printf("Go Version: %s\n", runtime.Version())
+			fmt.Printf("Go OS/Arch: %s/%s\n", runtime.GOOS, runtime.GOARCH)
+		},
+	}
+
+	return versionCmd
+}
--- a/cmd/bbolt/main.go
+++ b/cmd/bbolt/main.go
--- a/cmd/bbolt/main_test.go
+++ b/cmd/bbolt/main_test.go
@ -0,0 +1,754 @@
+package main_test
+
+import (
+	"bytes"
+	crypto "crypto/rand"
+	"encoding/binary"
+	"encoding/hex"
+	"fmt"
+	"io"
+	"math/rand"
+	"os"
+	"strconv"
+	"strings"
+	"sync"
+	"testing"
+
+	"github.com/tutus-one/tutus-bolt/internal/btesting"
+	"github.com/tutus-one/tutus-bolt/internal/guts_cli"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	bolt "github.com/tutus-one/tutus-bolt"
+	main "github.com/tutus-one/tutus-bolt/cmd/bbolt"
+)
+
+// Ensure the "info" command can print information about a database.
+func TestInfoCommand_Run(t *testing.T) {
+	db := btesting.MustCreateDB(t)
+	db.Close()
+
+	defer requireDBNoChange(t, dbData(t, db.Path()), db.Path())
+
+	// Run the info command.
+	m := NewMain()
+	if err := m.Run("info", db.Path()); err != nil {
+		t.Fatal(err)
+	}
+}
+
+// Ensure the "stats" command executes correctly with an empty database.
+func TestStatsCommand_Run_EmptyDatabase(t *testing.T) {
+	// Ignore
+	if os.Getpagesize() != 4096 {
+		t.Skip("system does not use 4KB page size")
+	}
+
+	db := btesting.MustCreateDB(t)
+	db.Close()
+
+	defer requireDBNoChange(t, dbData(t, db.Path()), db.Path())
+
+	// Generate expected result.
+	exp := "Aggregate statistics for 0 buckets\n\n" +
+		"Page count statistics\n" +
+		"\tNumber of logical branch pages: 0\n" +
+		"\tNumber of physical branch overflow pages: 0\n" +
+		"\tNumber of logical leaf pages: 0\n" +
+		"\tNumber of physical leaf overflow pages: 0\n" +
+		"Tree statistics\n" +
+		"\tNumber of keys/value pairs: 0\n" +
+		"\tNumber of levels in B+tree: 0\n" +
+		"Page size utilization\n" +
+		"\tBytes allocated for physical branch pages: 0\n" +
+		"\tBytes actually used for branch data: 0 (0%)\n" +
+		"\tBytes allocated for physical leaf pages: 0\n" +
+		"\tBytes actually used for leaf data: 0 (0%)\n" +
+		"Bucket statistics\n" +
+		"\tTotal number of buckets: 0\n" +
+		"\tTotal number on inlined buckets: 0 (0%)\n" +
+		"\tBytes used for inlined buckets: 0 (0%)\n"
+
+	// Run the command.
+	m := NewMain()
+	if err := m.Run("stats", db.Path()); err != nil {
+		t.Fatal(err)
+	} else if m.Stdout.String() != exp {
+		t.Fatalf("unexpected stdout:\n\n%s", m.Stdout.String())
+	}
+}
+
+func TestDumpCommand_Run(t *testing.T) {
+	db := btesting.MustCreateDBWithOption(t, &bolt.Options{PageSize: 4096})
+	db.Close()
+
+	defer requireDBNoChange(t, dbData(t, db.Path()), db.Path())
+
+	exp := `0000010 edda 0ced 0200 0000 0010 0000 0000 0000`
+
+	m := NewMain()
+	err := m.Run("dump", db.Path(), "0")
+	require.NoError(t, err)
+	if !strings.Contains(m.Stdout.String(), exp) {
+		t.Fatalf("unexpected stdout:\n%s\n", m.Stdout.String())
+	}
+}
+
+func TestPageCommand_Run(t *testing.T) {
+	db := btesting.MustCreateDBWithOption(t, &bolt.Options{PageSize: 4096})
+	db.Close()
+
+	defer requireDBNoChange(t, dbData(t, db.Path()), db.Path())
+
+	exp := "Page ID:    0\n" +
+		"Page Type:  meta\n" +
+		"Total Size: 4096 bytes\n" +
+		"Overflow pages: 0\n" +
+		"Version:    2\n" +
+		"Page Size:  4096 bytes\n" +
+		"Flags:      00000000\n" +
+		"Root:       <pgid=3>\n" +
+		"Freelist:   <pgid=2>\n" +
+		"HWM:        <pgid=4>\n" +
+		"Txn ID:     0\n" +
+		"Checksum:   07516e114689fdee\n\n"
+
+	m := NewMain()
+	err := m.Run("page", db.Path(), "0")
+	require.NoError(t, err)
+	if m.Stdout.String() != exp {
+		t.Fatalf("unexpected stdout:\n%s\n%s", m.Stdout.String(), exp)
+	}
+}
+
+func TestPageItemCommand_Run(t *testing.T) {
+	testCases := []struct {
+		name          string
+		printable     bool
+		itemId        string
+		expectedKey   string
+		expectedValue string
+	}{
+		{
+			name:          "printable items",
+			printable:     true,
+			itemId:        "0",
+			expectedKey:   "key_0",
+			expectedValue: "value_0",
+		},
+		{
+			name:          "non printable items",
+			printable:     false,
+			itemId:        "0",
+			expectedKey:   hex.EncodeToString(convertInt64IntoBytes(0 + 1)),
+			expectedValue: hex.EncodeToString(convertInt64IntoBytes(0 + 2)),
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			db := btesting.MustCreateDBWithOption(t, &bolt.Options{PageSize: 4096})
+			srcPath := db.Path()
+
+			t.Log("Insert some sample data")
+			err := db.Update(func(tx *bolt.Tx) error {
+				b, bErr := tx.CreateBucketIfNotExists([]byte("data"))
+				if bErr != nil {
+					return bErr
+				}
+
+				for i := 0; i < 100; i++ {
+					if tc.printable {
+						if bErr = b.Put([]byte(fmt.Sprintf("key_%d", i)), []byte(fmt.Sprintf("value_%d", i))); bErr != nil {
+							return bErr
+						}
+					} else {
+						k, v := convertInt64IntoBytes(int64(i+1)), convertInt64IntoBytes(int64(i+2))
+						if bErr = b.Put(k, v); bErr != nil {
+							return bErr
+						}
+					}
+				}
+				return nil
+			})
+			require.NoError(t, err)
+			defer requireDBNoChange(t, dbData(t, srcPath), srcPath)
+
+			meta := readMetaPage(t, srcPath)
+			leafPageId := 0
+			for i := 2; i < int(meta.Pgid()); i++ {
+				p, _, err := guts_cli.ReadPage(srcPath, uint64(i))
+				require.NoError(t, err)
+				if p.IsLeafPage() && p.Count() > 1 {
+					leafPageId = int(p.Id())
+				}
+			}
+			require.NotEqual(t, 0, leafPageId)
+
+			m := NewMain()
+			err = m.Run("page-item", db.Path(), fmt.Sprintf("%d", leafPageId), tc.itemId)
+			require.NoError(t, err)
+			if !strings.Contains(m.Stdout.String(), tc.expectedKey) || !strings.Contains(m.Stdout.String(), tc.expectedValue) {
+				t.Fatalf("Unexpected output:\n%s\n", m.Stdout.String())
+			}
+		})
+	}
+}
+
+// Ensure the "stats" command can execute correctly.
+func TestStatsCommand_Run(t *testing.T) {
+	// Ignore
+	if os.Getpagesize() != 4096 {
+		t.Skip("system does not use 4KB page size")
+	}
+
+	db := btesting.MustCreateDB(t)
+
+	if err := db.Update(func(tx *bolt.Tx) error {
+		// Create "foo" bucket.
+		b, err := tx.CreateBucket([]byte("foo"))
+		if err != nil {
+			return err
+		}
+		for i := 0; i < 10; i++ {
+			if err := b.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i))); err != nil {
+				return err
+			}
+		}
+
+		// Create "bar" bucket.
+		b, err = tx.CreateBucket([]byte("bar"))
+		if err != nil {
+			return err
+		}
+		for i := 0; i < 100; i++ {
+			if err := b.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i))); err != nil {
+				return err
+			}
+		}
+
+		// Create "baz" bucket.
+		b, err = tx.CreateBucket([]byte("baz"))
+		if err != nil {
+			return err
+		}
+		if err := b.Put([]byte("key"), []byte("value")); err != nil {
+			return err
+		}
+
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+	db.Close()
+
+	defer requireDBNoChange(t, dbData(t, db.Path()), db.Path())
+
+	// Generate expected result.
+	exp := "Aggregate statistics for 3 buckets\n\n" +
+		"Page count statistics\n" +
+		"\tNumber of logical branch pages: 0\n" +
+		"\tNumber of physical branch overflow pages: 0\n" +
+		"\tNumber of logical leaf pages: 1\n" +
+		"\tNumber of physical leaf overflow pages: 0\n" +
+		"Tree statistics\n" +
+		"\tNumber of keys/value pairs: 111\n" +
+		"\tNumber of levels in B+tree: 1\n" +
+		"Page size utilization\n" +
+		"\tBytes allocated for physical branch pages: 0\n" +
+		"\tBytes actually used for branch data: 0 (0%)\n" +
+		"\tBytes allocated for physical leaf pages: 4096\n" +
+		"\tBytes actually used for leaf data: 1996 (48%)\n" +
+		"Bucket statistics\n" +
+		"\tTotal number of buckets: 3\n" +
+		"\tTotal number on inlined buckets: 2 (66%)\n" +
+		"\tBytes used for inlined buckets: 236 (11%)\n"
+
+	// Run the command.
+	m := NewMain()
+	if err := m.Run("stats", db.Path()); err != nil {
+		t.Fatal(err)
+	} else if m.Stdout.String() != exp {
+		t.Fatalf("unexpected stdout:\n\n%s", m.Stdout.String())
+	}
+}
+
+// Ensure the "buckets" command can print a list of buckets.
+func TestBucketsCommand_Run(t *testing.T) {
+	db := btesting.MustCreateDB(t)
+
+	if err := db.Update(func(tx *bolt.Tx) error {
+		for _, name := range []string{"foo", "bar", "baz"} {
+			_, err := tx.CreateBucket([]byte(name))
+			if err != nil {
+				return err
+			}
+		}
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+	db.Close()
+
+	defer requireDBNoChange(t, dbData(t, db.Path()), db.Path())
+
+	expected := "bar\nbaz\nfoo\n"
+
+	// Run the command.
+	m := NewMain()
+	if err := m.Run("buckets", db.Path()); err != nil {
+		t.Fatal(err)
+	} else if actual := m.Stdout.String(); actual != expected {
+		t.Fatalf("unexpected stdout:\n\n%s", actual)
+	}
+}
+
+// Ensure the "keys" command can print a list of keys for a bucket.
+func TestKeysCommand_Run(t *testing.T) {
+	testCases := []struct {
+		name       string
+		printable  bool
+		testBucket string
+		expected   string
+	}{
+		{
+			name:       "printable keys",
+			printable:  true,
+			testBucket: "foo",
+			expected:   "foo-0\nfoo-1\nfoo-2\n",
+		},
+		{
+			name:       "non printable keys",
+			printable:  false,
+			testBucket: "bar",
+			expected:   convertInt64KeysIntoHexString(100001, 100002, 100003),
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Logf("creating test database for subtest '%s'", tc.name)
+			db := btesting.MustCreateDB(t)
+
+			err := db.Update(func(tx *bolt.Tx) error {
+				t.Logf("creating test bucket %s", tc.testBucket)
+				b, bErr := tx.CreateBucketIfNotExists([]byte(tc.testBucket))
+				if bErr != nil {
+					return fmt.Errorf("error creating test bucket %q: %v", tc.testBucket, bErr)
+				}
+
+				t.Logf("inserting test data into test bucket %s", tc.testBucket)
+				if tc.printable {
+					for i := 0; i < 3; i++ {
+						key := fmt.Sprintf("%s-%d", tc.testBucket, i)
+						if pErr := b.Put([]byte(key), []byte{0}); pErr != nil {
+							return pErr
+						}
+					}
+				} else {
+					for i := 100001; i < 100004; i++ {
+						k := convertInt64IntoBytes(int64(i))
+						if pErr := b.Put(k, []byte{0}); pErr != nil {
+							return pErr
+						}
+					}
+				}
+				return nil
+			})
+			require.NoError(t, err)
+			db.Close()
+
+			defer requireDBNoChange(t, dbData(t, db.Path()), db.Path())
+
+			t.Log("running Keys cmd")
+			m := NewMain()
+			kErr := m.Run("keys", db.Path(), tc.testBucket)
+			require.NoError(t, kErr)
+			actual := m.Stdout.String()
+			assert.Equal(t, tc.expected, actual)
+		})
+	}
+}
+
+// Ensure the "get" command can print the value of a key in a bucket.
+func TestGetCommand_Run(t *testing.T) {
+	testCases := []struct {
+		name          string
+		printable     bool
+		testBucket    string
+		testKey       string
+		expectedValue string
+	}{
+		{
+			name:          "printable data",
+			printable:     true,
+			testBucket:    "foo",
+			testKey:       "foo-1",
+			expectedValue: "val-foo-1\n",
+		},
+		{
+			name:          "non printable data",
+			printable:     false,
+			testBucket:    "bar",
+			testKey:       "100001",
+			expectedValue: hex.EncodeToString(convertInt64IntoBytes(100001)) + "\n",
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			db := btesting.MustCreateDB(t)
+
+			if err := db.Update(func(tx *bolt.Tx) error {
+				b, err := tx.CreateBucket([]byte(tc.testBucket))
+				if err != nil {
+					return err
+				}
+				if tc.printable {
+					val := fmt.Sprintf("val-%s", tc.testKey)
+					if err := b.Put([]byte(tc.testKey), []byte(val)); err != nil {
+						return err
+					}
+				} else {
+					if err := b.Put([]byte(tc.testKey), convertInt64IntoBytes(100001)); err != nil {
+						return err
+					}
+				}
+				return nil
+			}); err != nil {
+				t.Fatal(err)
+			}
+			db.Close()
+
+			defer requireDBNoChange(t, dbData(t, db.Path()), db.Path())
+
+			// Run the command.
+			m := NewMain()
+			if err := m.Run("get", db.Path(), tc.testBucket, tc.testKey); err != nil {
+				t.Fatal(err)
+			}
+			actual := m.Stdout.String()
+			assert.Equal(t, tc.expectedValue, actual)
+		})
+	}
+}
+
+// Ensure the "pages" command neither panic, nor change the db file.
+func TestPagesCommand_Run(t *testing.T) {
+	db := btesting.MustCreateDB(t)
+
+	err := db.Update(func(tx *bolt.Tx) error {
+		for _, name := range []string{"foo", "bar"} {
+			b, err := tx.CreateBucket([]byte(name))
+			if err != nil {
+				return err
+			}
+			for i := 0; i < 3; i++ {
+				key := fmt.Sprintf("%s-%d", name, i)
+				val := fmt.Sprintf("val-%s-%d", name, i)
+				if err := b.Put([]byte(key), []byte(val)); err != nil {
+					return err
+				}
+			}
+		}
+		return nil
+	})
+	require.NoError(t, err)
+	db.Close()
+
+	defer requireDBNoChange(t, dbData(t, db.Path()), db.Path())
+
+	// Run the command.
+	m := NewMain()
+	err = m.Run("pages", db.Path())
+	require.NoError(t, err)
+}
+
+// Ensure the "bench" command runs and exits without errors
+func TestBenchCommand_Run(t *testing.T) {
+	tests := map[string]struct {
+		args []string
+	}{
+		"no-args":    {},
+		"100k count": {[]string{"-count", "100000"}},
+	}
+
+	for name, test := range tests {
+		t.Run(name, func(t *testing.T) {
+			// Run the command.
+			m := NewMain()
+			args := append([]string{"bench"}, test.args...)
+			if err := m.Run(args...); err != nil {
+				t.Fatal(err)
+			}
+
+			stderr := m.Stderr.String()
+			stdout := m.Stdout.String()
+			if !strings.Contains(stderr, "starting write benchmark.") || !strings.Contains(stderr, "starting read benchmark.") {
+				t.Fatal(fmt.Errorf("benchmark result does not contain read/write start output:\n%s", stderr))
+			}
+
+			if strings.Contains(stderr, "iter mismatch") {
+				t.Fatal(fmt.Errorf("found iter mismatch in stdout:\n%s", stderr))
+			}
+
+			if !strings.Contains(stdout, "# Write") || !strings.Contains(stdout, "# Read") {
+				t.Fatal(fmt.Errorf("benchmark result does not contain read/write output:\n%s", stdout))
+			}
+		})
+	}
+}
+
+type ConcurrentBuffer struct {
+	m   sync.Mutex
+	buf bytes.Buffer
+}
+
+func (b *ConcurrentBuffer) Read(p []byte) (n int, err error) {
+	b.m.Lock()
+	defer b.m.Unlock()
+
+	return b.buf.Read(p)
+}
+
+func (b *ConcurrentBuffer) Write(p []byte) (n int, err error) {
+	b.m.Lock()
+	defer b.m.Unlock()
+
+	return b.buf.Write(p)
+}
+
+func (b *ConcurrentBuffer) String() string {
+	b.m.Lock()
+	defer b.m.Unlock()
+
+	return b.buf.String()
+}
+
+// Main represents a test wrapper for main.Main that records output.
+type Main struct {
+	*main.Main
+	Stdin  ConcurrentBuffer
+	Stdout ConcurrentBuffer
+	Stderr ConcurrentBuffer
+}
+
+// NewMain returns a new instance of Main.
+func NewMain() *Main {
+	m := &Main{Main: main.NewMain()}
+	m.Main.Stdin = &m.Stdin
+	m.Main.Stdout = &m.Stdout
+	m.Main.Stderr = &m.Stderr
+	return m
+}
+
+func TestCompactCommand_Run(t *testing.T) {
+	dstdb := btesting.MustCreateDB(t)
+	dstdb.Close()
+
+	// fill the db
+	db := btesting.MustCreateDB(t)
+	if err := db.Update(func(tx *bolt.Tx) error {
+		n := 2 + rand.Intn(5)
+		for i := 0; i < n; i++ {
+			k := []byte(fmt.Sprintf("b%d", i))
+			b, err := tx.CreateBucketIfNotExists(k)
+			if err != nil {
+				return err
+			}
+			if err := b.SetSequence(uint64(i)); err != nil {
+				return err
+			}
+			if err := fillBucket(b, append(k, '.')); err != nil {
+				return err
+			}
+		}
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+
+	// make the db grow by adding large values, and delete them.
+	if err := db.Update(func(tx *bolt.Tx) error {
+		b, err := tx.CreateBucketIfNotExists([]byte("large_vals"))
+		if err != nil {
+			return err
+		}
+		n := 5 + rand.Intn(5)
+		for i := 0; i < n; i++ {
+			v := make([]byte, 1000*1000*(1+rand.Intn(5)))
+			_, err := crypto.Read(v)
+			if err != nil {
+				return err
+			}
+			if err := b.Put([]byte(fmt.Sprintf("l%d", i)), v); err != nil {
+				return err
+			}
+		}
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+	if err := db.Update(func(tx *bolt.Tx) error {
+		c := tx.Bucket([]byte("large_vals")).Cursor()
+		for k, _ := c.First(); k != nil; k, _ = c.Next() {
+			if err := c.Delete(); err != nil {
+				return err
+			}
+		}
+		return tx.DeleteBucket([]byte("large_vals"))
+	}); err != nil {
+		t.Fatal(err)
+	}
+	db.Close()
+
+	dbChk, err := chkdb(db.Path())
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	m := NewMain()
+	if err := m.Run("compact", "-o", dstdb.Path(), db.Path()); err != nil {
+		t.Fatal(err)
+	}
+
+	dbChkAfterCompact, err := chkdb(db.Path())
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	dstdbChk, err := chkdb(dstdb.Path())
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if !bytes.Equal(dbChk, dbChkAfterCompact) {
+		t.Error("the original db has been touched")
+	}
+	if !bytes.Equal(dbChk, dstdbChk) {
+		t.Error("the compacted db data isn't the same than the original db")
+	}
+}
+
+func TestCommands_Run_NoArgs(t *testing.T) {
+	testCases := []struct {
+		name   string
+		cmd    string
+		expErr error
+	}{
+		{
+			name:   "get",
+			cmd:    "get",
+			expErr: main.ErrNotEnoughArgs,
+		},
+		{
+			name:   "keys",
+			cmd:    "keys",
+			expErr: main.ErrNotEnoughArgs,
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			m := NewMain()
+			err := m.Run(tc.cmd)
+			require.ErrorIs(t, err, main.ErrNotEnoughArgs)
+		})
+	}
+}
+
+func fillBucket(b *bolt.Bucket, prefix []byte) error {
+	n := 10 + rand.Intn(50)
+	for i := 0; i < n; i++ {
+		v := make([]byte, 10*(1+rand.Intn(4)))
+		_, err := crypto.Read(v)
+		if err != nil {
+			return err
+		}
+		k := append(prefix, []byte(fmt.Sprintf("k%d", i))...)
+		if err := b.Put(k, v); err != nil {
+			return err
+		}
+	}
+	// limit depth of subbuckets
+	s := 2 + rand.Intn(4)
+	if len(prefix) > (2*s + 1) {
+		return nil
+	}
+	n = 1 + rand.Intn(3)
+	for i := 0; i < n; i++ {
+		k := append(prefix, []byte(fmt.Sprintf("b%d", i))...)
+		sb, err := b.CreateBucket(k)
+		if err != nil {
+			return err
+		}
+		if err := fillBucket(sb, append(k, '.')); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func chkdb(path string) ([]byte, error) {
+	db, err := bolt.Open(path, 0600, &bolt.Options{ReadOnly: true})
+	if err != nil {
+		return nil, err
+	}
+	defer db.Close()
+	var buf bytes.Buffer
+	err = db.View(func(tx *bolt.Tx) error {
+		return tx.ForEach(func(name []byte, b *bolt.Bucket) error {
+			return walkBucket(b, name, nil, &buf)
+		})
+	})
+	if err != nil {
+		return nil, err
+	}
+	return buf.Bytes(), nil
+}
+
+func walkBucket(parent *bolt.Bucket, k []byte, v []byte, w io.Writer) error {
+	if _, err := fmt.Fprintf(w, "%d:%x=%x\n", parent.Sequence(), k, v); err != nil {
+		return err
+	}
+
+	// not a bucket, exit.
+	if v != nil {
+		return nil
+	}
+	return parent.ForEach(func(k, v []byte) error {
+		if v == nil {
+			return walkBucket(parent.Bucket(k), k, nil, w)
+		}
+		return walkBucket(parent, k, v, w)
+	})
+}
+
+func dbData(t *testing.T, filePath string) []byte {
+	data, err := os.ReadFile(filePath)
+	require.NoError(t, err)
+	return data
+}
+
+func requireDBNoChange(t *testing.T, oldData []byte, filePath string) {
+	newData, err := os.ReadFile(filePath)
+	require.NoError(t, err)
+
+	noChange := bytes.Equal(oldData, newData)
+	require.True(t, noChange)
+}
+
+func convertInt64IntoBytes(num int64) []byte {
+	buf := make([]byte, binary.MaxVarintLen64)
+	n := binary.PutVarint(buf, num)
+	return buf[:n]
+}
+
+func convertInt64KeysIntoHexString(nums ...int64) string {
+	var res []string
+	for _, num := range nums {
+		res = append(res, hex.EncodeToString(convertInt64IntoBytes(num)))
+	}
+	return strings.Join(res, "\n") + "\n" // last newline char
+}
--- a/cmd/bbolt/page_command.go
+++ b/cmd/bbolt/page_command.go
@ -0,0 +1,290 @@
+package main
+
+import (
+	"bytes"
+	"flag"
+	"fmt"
+	"io"
+	"os"
+	"strings"
+
+	"github.com/tutus-one/tutus-bolt/internal/common"
+	"github.com/tutus-one/tutus-bolt/internal/guts_cli"
+)
+
+// pageCommand represents the "page" command execution.
+type pageCommand struct {
+	baseCommand
+}
+
+// newPageCommand returns a pageCommand.
+func newPageCommand(m *Main) *pageCommand {
+	c := &pageCommand{}
+	c.baseCommand = m.baseCommand
+	return c
+}
+
+// Run executes the command.
+func (cmd *pageCommand) Run(args ...string) error {
+	// Parse flags.
+	fs := flag.NewFlagSet("", flag.ContinueOnError)
+	help := fs.Bool("h", false, "")
+	all := fs.Bool("all", false, "list all pages")
+	formatValue := fs.String("format-value", "auto", "One of: "+FORMAT_MODES+" . Applies to values on the leaf page.")
+
+	if err := fs.Parse(args); err != nil {
+		return err
+	} else if *help {
+		fmt.Fprintln(cmd.Stderr, cmd.Usage())
+		return ErrUsage
+	}
+
+	// Require database path and page id.
+	path := fs.Arg(0)
+	if path == "" {
+		return ErrPathRequired
+	} else if _, err := os.Stat(path); os.IsNotExist(err) {
+		return ErrFileNotFound
+	}
+
+	if !*all {
+		// Read page ids.
+		pageIDs, err := stringToPages(fs.Args()[1:])
+		if err != nil {
+			return err
+		} else if len(pageIDs) == 0 {
+			return ErrPageIDRequired
+		}
+		cmd.printPages(pageIDs, path, formatValue)
+	} else {
+		cmd.printAllPages(path, formatValue)
+	}
+	return nil
+}
+
+func (cmd *pageCommand) printPages(pageIDs []uint64, path string, formatValue *string) {
+	// Print each page listed.
+	for i, pageID := range pageIDs {
+		// Print a separator.
+		if i > 0 {
+			fmt.Fprintln(cmd.Stdout, "===============================================")
+		}
+		_, err2 := cmd.printPage(path, pageID, *formatValue)
+		if err2 != nil {
+			fmt.Fprintf(cmd.Stdout, "Prining page %d failed: %s. Continuing...\n", pageID, err2)
+		}
+	}
+}
+
+func (cmd *pageCommand) printAllPages(path string, formatValue *string) {
+	_, hwm, err := guts_cli.ReadPageAndHWMSize(path)
+	if err != nil {
+		fmt.Fprintf(cmd.Stdout, "cannot read number of pages: %v", err)
+	}
+
+	// Print each page listed.
+	for pageID := uint64(0); pageID < uint64(hwm); {
+		// Print a separator.
+		if pageID > 0 {
+			fmt.Fprintln(cmd.Stdout, "===============================================")
+		}
+		overflow, err2 := cmd.printPage(path, pageID, *formatValue)
+		if err2 != nil {
+			fmt.Fprintf(cmd.Stdout, "Prining page %d failed: %s. Continuing...\n", pageID, err2)
+			pageID++
+		} else {
+			pageID += uint64(overflow) + 1
+		}
+	}
+}
+
+// printPage prints given page to cmd.Stdout and returns error or number of interpreted pages.
+func (cmd *pageCommand) printPage(path string, pageID uint64, formatValue string) (numPages uint32, reterr error) {
+	defer func() {
+		if err := recover(); err != nil {
+			reterr = fmt.Errorf("%s", err)
+		}
+	}()
+
+	// Retrieve page info and page size.
+	p, buf, err := guts_cli.ReadPage(path, pageID)
+	if err != nil {
+		return 0, err
+	}
+
+	// Print basic page info.
+	fmt.Fprintf(cmd.Stdout, "Page ID:    %d\n", p.Id())
+	fmt.Fprintf(cmd.Stdout, "Page Type:  %s\n", p.Typ())
+	fmt.Fprintf(cmd.Stdout, "Total Size: %d bytes\n", len(buf))
+	fmt.Fprintf(cmd.Stdout, "Overflow pages: %d\n", p.Overflow())
+
+	// Print type-specific data.
+	switch p.Typ() {
+	case "meta":
+		err = cmd.PrintMeta(cmd.Stdout, buf)
+	case "leaf":
+		err = cmd.PrintLeaf(cmd.Stdout, buf, formatValue)
+	case "branch":
+		err = cmd.PrintBranch(cmd.Stdout, buf)
+	case "freelist":
+		err = cmd.PrintFreelist(cmd.Stdout, buf)
+	}
+	if err != nil {
+		return 0, err
+	}
+	return p.Overflow(), nil
+}
+
+// PrintMeta prints the data from the meta page.
+func (cmd *pageCommand) PrintMeta(w io.Writer, buf []byte) error {
+	m := common.LoadPageMeta(buf)
+	m.Print(w)
+	return nil
+}
+
+// PrintLeaf prints the data for a leaf page.
+func (cmd *pageCommand) PrintLeaf(w io.Writer, buf []byte, formatValue string) error {
+	p := common.LoadPage(buf)
+
+	// Print number of items.
+	fmt.Fprintf(w, "Item Count: %d\n", p.Count())
+	fmt.Fprintf(w, "\n")
+
+	// Print each key/value.
+	for i := uint16(0); i < p.Count(); i++ {
+		e := p.LeafPageElement(i)
+
+		// Format key as string.
+		var k string
+		if isPrintable(string(e.Key())) {
+			k = fmt.Sprintf("%q", string(e.Key()))
+		} else {
+			k = fmt.Sprintf("%x", string(e.Key()))
+		}
+
+		// Format value as string.
+		var v string
+		if e.IsBucketEntry() {
+			b := e.Bucket()
+			v = b.String()
+		} else {
+			var err error
+			v, err = formatBytes(e.Value(), formatValue)
+			if err != nil {
+				return err
+			}
+		}
+
+		fmt.Fprintf(w, "%s: %s\n", k, v)
+	}
+	fmt.Fprintf(w, "\n")
+	return nil
+}
+
+// PrintBranch prints the data for a leaf page.
+func (cmd *pageCommand) PrintBranch(w io.Writer, buf []byte) error {
+	p := common.LoadPage(buf)
+
+	// Print number of items.
+	fmt.Fprintf(w, "Item Count: %d\n", p.Count())
+	fmt.Fprintf(w, "\n")
+
+	// Print each key/value.
+	for i := uint16(0); i < p.Count(); i++ {
+		e := p.BranchPageElement(i)
+
+		// Format key as string.
+		var k string
+		if isPrintable(string(e.Key())) {
+			k = fmt.Sprintf("%q", string(e.Key()))
+		} else {
+			k = fmt.Sprintf("%x", string(e.Key()))
+		}
+
+		fmt.Fprintf(w, "%s: <pgid=%d>\n", k, e.Pgid())
+	}
+	fmt.Fprintf(w, "\n")
+	return nil
+}
+
+// PrintFreelist prints the data for a freelist page.
+func (cmd *pageCommand) PrintFreelist(w io.Writer, buf []byte) error {
+	p := common.LoadPage(buf)
+
+	// Print number of items.
+	_, cnt := p.FreelistPageCount()
+	fmt.Fprintf(w, "Item Count: %d\n", cnt)
+	fmt.Fprintf(w, "Overflow: %d\n", p.Overflow())
+
+	fmt.Fprintf(w, "\n")
+
+	// Print each page in the freelist.
+	ids := p.FreelistPageIds()
+	for _, ids := range ids {
+		fmt.Fprintf(w, "%d\n", ids)
+	}
+	fmt.Fprintf(w, "\n")
+	return nil
+}
+
+// PrintPage prints a given page as hexadecimal.
+func (cmd *pageCommand) PrintPage(w io.Writer, r io.ReaderAt, pageID int, pageSize int) error {
+	const bytesPerLineN = 16
+
+	// Read page into buffer.
+	buf := make([]byte, pageSize)
+	addr := pageID * pageSize
+	if n, err := r.ReadAt(buf, int64(addr)); err != nil {
+		return err
+	} else if n != pageSize {
+		return io.ErrUnexpectedEOF
+	}
+
+	// Write out to writer in 16-byte lines.
+	var prev []byte
+	var skipped bool
+	for offset := 0; offset < pageSize; offset += bytesPerLineN {
+		// Retrieve current 16-byte line.
+		line := buf[offset : offset+bytesPerLineN]
+		isLastLine := offset == (pageSize - bytesPerLineN)
+
+		// If it's the same as the previous line then print a skip.
+		if bytes.Equal(line, prev) && !isLastLine {
+			if !skipped {
+				fmt.Fprintf(w, "%07x *\n", addr+offset)
+				skipped = true
+			}
+		} else {
+			// Print line as hexadecimal in 2-byte groups.
+			fmt.Fprintf(w, "%07x %04x %04x %04x %04x %04x %04x %04x %04x\n", addr+offset,
+				line[0:2], line[2:4], line[4:6], line[6:8],
+				line[8:10], line[10:12], line[12:14], line[14:16],
+			)
+
+			skipped = false
+		}
+
+		// Save the previous line.
+		prev = line
+	}
+	fmt.Fprint(w, "\n")
+
+	return nil
+}
+
+// Usage returns the help message.
+func (cmd *pageCommand) Usage() string {
+	return strings.TrimLeft(`
+usage: bolt page PATH pageid [pageid...]
+   or: bolt page --all PATH
+
+Additional options include:
+
+	--all
+		prints all pages (only skips pages that were considered successful overflow pages) 
+	--format-value=`+FORMAT_MODES+` (default: auto)
+		prints values (on the leaf page) using the given format.
+
+Page prints one or more pages in human readable format.
+`, "\n")
+}
--- a/cmd/bbolt/utils.go
+++ b/cmd/bbolt/utils.go
@ -0,0 +1,16 @@
+package main
+
+import (
+	"fmt"
+	"os"
+)
+
+func checkSourceDBPath(srcPath string) (os.FileInfo, error) {
+	fi, err := os.Stat(srcPath)
+	if os.IsNotExist(err) {
+		return nil, fmt.Errorf("source database file %q doesn't exist", srcPath)
+	} else if err != nil {
+		return nil, fmt.Errorf("failed to open source database file %q: %v", srcPath, err)
+	}
+	return fi, nil
+}
--- a/cmd/bbolt/utils_test.go
+++ b/cmd/bbolt/utils_test.go
@ -0,0 +1,46 @@
+package main_test
+
+import (
+	"os"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/tutus-one/tutus-bolt/internal/common"
+	"github.com/tutus-one/tutus-bolt/internal/guts_cli"
+)
+
+func loadMetaPage(t *testing.T, dbPath string, pageID uint64) *common.Meta {
+	_, buf, err := guts_cli.ReadPage(dbPath, pageID)
+	require.NoError(t, err)
+	return common.LoadPageMeta(buf)
+}
+
+func readMetaPage(t *testing.T, path string) *common.Meta {
+	_, activeMetaPageId, err := guts_cli.GetRootPage(path)
+	require.NoError(t, err)
+	_, buf, err := guts_cli.ReadPage(path, uint64(activeMetaPageId))
+	require.NoError(t, err)
+	return common.LoadPageMeta(buf)
+}
+
+func readPage(t *testing.T, path string, pageId int, pageSize int) []byte {
+	dbFile, err := os.Open(path)
+	require.NoError(t, err)
+	defer dbFile.Close()
+
+	fi, err := dbFile.Stat()
+	require.NoError(t, err)
+	require.GreaterOrEqual(t, fi.Size(), int64((pageId+1)*pageSize))
+
+	buf := make([]byte, pageSize)
+	byteRead, err := dbFile.ReadAt(buf, int64(pageId*pageSize))
+	require.NoError(t, err)
+	require.Equal(t, pageSize, byteRead)
+
+	return buf
+}
+
+func pageDataWithoutPageId(buf []byte) []byte {
+	return buf[8:]
+}
--- a/code-of-conduct.md
+++ b/code-of-conduct.md
@ -0,0 +1,3 @@
+# etcd Community Code of Conduct
+
+Please refer to [etcd Community Code of Conduct](https://github.com/etcd-io/etcd/blob/main/code-of-conduct.md).
--- a/compact.go
+++ b/compact.go
@ -0,0 +1,119 @@
+package bbolt
+
+// Compact will create a copy of the source DB and in the destination DB. This may
+// reclaim space that the source database no longer has use for. txMaxSize can be
+// used to limit the transactions size of this process and may trigger intermittent
+// commits. A value of zero will ignore transaction sizes.
+// TODO: merge with: https://github.com/etcd-io/etcd/blob/b7f0f52a16dbf83f18ca1d803f7892d750366a94/mvcc/backend/backend.go#L349
+func Compact(dst, src *DB, txMaxSize int64) error {
+	// commit regularly, or we'll run out of memory for large datasets if using one transaction.
+	var size int64
+	tx, err := dst.Begin(true)
+	if err != nil {
+		return err
+	}
+	defer func() {
+		if tempErr := tx.Rollback(); tempErr != nil {
+			err = tempErr
+		}
+	}()
+
+	if err := walk(src, func(keys [][]byte, k, v []byte, seq uint64) error {
+		// On each key/value, check if we have exceeded tx size.
+		sz := int64(len(k) + len(v))
+		if size+sz > txMaxSize && txMaxSize != 0 {
+			// Commit previous transaction.
+			if err := tx.Commit(); err != nil {
+				return err
+			}
+
+			// Start new transaction.
+			tx, err = dst.Begin(true)
+			if err != nil {
+				return err
+			}
+			size = 0
+		}
+		size += sz
+
+		// Create bucket on the root transaction if this is the first level.
+		nk := len(keys)
+		if nk == 0 {
+			bkt, err := tx.CreateBucket(k)
+			if err != nil {
+				return err
+			}
+			if err := bkt.SetSequence(seq); err != nil {
+				return err
+			}
+			return nil
+		}
+
+		// Create buckets on subsequent levels, if necessary.
+		b := tx.Bucket(keys[0])
+		if nk > 1 {
+			for _, k := range keys[1:] {
+				b = b.Bucket(k)
+			}
+		}
+
+		// Fill the entire page for best compaction.
+		b.FillPercent = 1.0
+
+		// If there is no value then this is a bucket call.
+		if v == nil {
+			bkt, err := b.CreateBucket(k)
+			if err != nil {
+				return err
+			}
+			if err := bkt.SetSequence(seq); err != nil {
+				return err
+			}
+			return nil
+		}
+
+		// Otherwise treat it as a key/value pair.
+		return b.Put(k, v)
+	}); err != nil {
+		return err
+	}
+	err = tx.Commit()
+
+	return err
+}
+
+// walkFunc is the type of the function called for keys (buckets and "normal"
+// values) discovered by Walk. keys is the list of keys to descend to the bucket
+// owning the discovered key/value pair k/v.
+type walkFunc func(keys [][]byte, k, v []byte, seq uint64) error
+
+// walk walks recursively the bolt database db, calling walkFn for each key it finds.
+func walk(db *DB, walkFn walkFunc) error {
+	return db.View(func(tx *Tx) error {
+		return tx.ForEach(func(name []byte, b *Bucket) error {
+			return walkBucket(b, nil, name, nil, b.Sequence(), walkFn)
+		})
+	})
+}
+
+func walkBucket(b *Bucket, keypath [][]byte, k, v []byte, seq uint64, fn walkFunc) error {
+	// Execute callback.
+	if err := fn(keypath, k, v, seq); err != nil {
+		return err
+	}
+
+	// If this is not a bucket then stop.
+	if v != nil {
+		return nil
+	}
+
+	// Iterate over each child key/value.
+	keypath = append(keypath, k)
+	return b.ForEach(func(k, v []byte) error {
+		if v == nil {
+			bkt := b.Bucket(k)
+			return walkBucket(bkt, keypath, k, nil, bkt.Sequence(), fn)
+		}
+		return walkBucket(b, keypath, k, v, b.Sequence(), fn)
+	})
+}
--- a/concurrent_test.go
+++ b/concurrent_test.go
@ -0,0 +1,956 @@
+package bbolt_test
+
+import (
+	"bytes"
+	crand "crypto/rand"
+	"encoding/hex"
+	"encoding/json"
+	"fmt"
+	"io"
+	mrand "math/rand"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+	"unicode/utf8"
+
+	"github.com/stretchr/testify/require"
+	"golang.org/x/sync/errgroup"
+
+	bolt "github.com/tutus-one/tutus-bolt"
+)
+
+const (
+	bucketPrefix = "bucket"
+	keyPrefix    = "key"
+	noopTxKey    = "%magic-no-op-key%"
+
+	// TestConcurrentCaseDuration is used as a env variable to specify the
+	// concurrent test duration.
+	testConcurrentCaseDuration    = "TEST_CONCURRENT_CASE_DURATION"
+	defaultConcurrentTestDuration = 30 * time.Second
+)
+
+type duration struct {
+	min time.Duration
+	max time.Duration
+}
+
+type bytesRange struct {
+	min int
+	max int
+}
+
+type operationChance struct {
+	operation OperationType
+	chance    int
+}
+
+type concurrentConfig struct {
+	bucketCount    int
+	keyCount       int
+	workInterval   duration
+	operationRatio []operationChance
+	readInterval   duration   // only used by readOperation
+	noopWriteRatio int        // only used by writeOperation
+	writeBytes     bytesRange // only used by writeOperation
+}
+
+/*
+TestConcurrentGenericReadAndWrite verifies:
+ 1. Repeatable read: a read transaction should always see the same data
+    view during its lifecycle.
+ 2. Any data written by a writing transaction should be visible to any
+    following reading transactions (with txid >= previous writing txid).
+ 3. The txid should never decrease.
+*/
+func TestConcurrentGenericReadAndWrite(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping test in short mode.")
+	}
+
+	testDuration := concurrentTestDuration(t)
+	conf := concurrentConfig{
+		bucketCount:  5,
+		keyCount:     10000,
+		workInterval: duration{},
+		operationRatio: []operationChance{
+			{operation: Read, chance: 60},
+			{operation: Write, chance: 20},
+			{operation: Delete, chance: 20},
+		},
+		readInterval: duration{
+			min: 50 * time.Millisecond,
+			max: 100 * time.Millisecond,
+		},
+		noopWriteRatio: 20,
+		writeBytes: bytesRange{
+			min: 200,
+			max: 16000,
+		},
+	}
+
+	testCases := []struct {
+		name         string
+		workerCount  int
+		conf         concurrentConfig
+		testDuration time.Duration
+	}{
+		{
+			name:         "1 worker",
+			workerCount:  1,
+			conf:         conf,
+			testDuration: testDuration,
+		},
+		{
+			name:         "10 workers",
+			workerCount:  10,
+			conf:         conf,
+			testDuration: testDuration,
+		},
+		{
+			name:         "50 workers",
+			workerCount:  50,
+			conf:         conf,
+			testDuration: testDuration,
+		},
+		{
+			name:         "100 workers",
+			workerCount:  100,
+			conf:         conf,
+			testDuration: testDuration,
+		},
+		{
+			name:         "200 workers",
+			workerCount:  200,
+			conf:         conf,
+			testDuration: testDuration,
+		},
+	}
+
+	for _, tc := range testCases {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			concurrentReadAndWrite(t,
+				tc.workerCount,
+				tc.conf,
+				tc.testDuration)
+		})
+	}
+}
+
+func concurrentTestDuration(t *testing.T) time.Duration {
+	durationInEnv := strings.ToLower(os.Getenv(testConcurrentCaseDuration))
+	if durationInEnv == "" {
+		t.Logf("%q not set, defaults to %s", testConcurrentCaseDuration, defaultConcurrentTestDuration)
+		return defaultConcurrentTestDuration
+	}
+
+	d, err := time.ParseDuration(durationInEnv)
+	if err != nil {
+		t.Logf("Failed to parse %s=%s, error: %v, defaults to %s", testConcurrentCaseDuration, durationInEnv, err, defaultConcurrentTestDuration)
+		return defaultConcurrentTestDuration
+	}
+
+	t.Logf("Concurrent test duration set by %s=%s", testConcurrentCaseDuration, d)
+	return d
+}
+
+func concurrentReadAndWrite(t *testing.T,
+	workerCount int,
+	conf concurrentConfig,
+	testDuration time.Duration) {
+
+	t.Log("Preparing db.")
+	db := mustCreateDB(t, &bolt.Options{
+		PageSize: 4096,
+	})
+	defer db.Close()
+	err := db.Update(func(tx *bolt.Tx) error {
+		for i := 0; i < conf.bucketCount; i++ {
+			if _, err := tx.CreateBucketIfNotExists(bucketName(i)); err != nil {
+				return err
+			}
+		}
+		return nil
+	})
+	require.NoError(t, err)
+
+	var records historyRecords
+	// t.Failed() returns false during panicking. We need to forcibly
+	// save data on panicking.
+	// Refer to: https://github.com/golang/go/issues/49929
+	panicked := true
+	defer func() {
+		t.Log("Save data if failed.")
+		saveDataIfFailed(t, db, records, panicked)
+	}()
+
+	t.Log("Starting workers.")
+	records = runWorkers(t,
+		db,
+		workerCount,
+		conf,
+		testDuration)
+
+	t.Log("Analyzing the history records.")
+	if err := validateSequential(records); err != nil {
+		t.Errorf("The history records are not sequential:\n %v", err)
+	}
+
+	t.Log("Checking database consistency.")
+	if err := checkConsistency(t, db); err != nil {
+		t.Errorf("The data isn't consistency: %v", err)
+	}
+
+	panicked = false
+	// TODO (ahrtr):
+	//   1. intentionally inject a random failpoint.
+}
+
+// mustCreateDB is created in place of `btesting.MustCreateDB`, and it's
+// only supposed to be used by the concurrent test case. The purpose is
+// to ensure the test case can be executed on old branches or versions,
+// e.g. `release-1.3` or `1.3.[5-7]`.
+func mustCreateDB(t *testing.T, o *bolt.Options) *bolt.DB {
+	f := filepath.Join(t.TempDir(), "db")
+
+	return mustOpenDB(t, f, o)
+}
+
+func mustReOpenDB(t *testing.T, db *bolt.DB, o *bolt.Options) *bolt.DB {
+	f := db.Path()
+
+	t.Logf("Closing bbolt DB at: %s", f)
+	err := db.Close()
+	require.NoError(t, err)
+
+	return mustOpenDB(t, f, o)
+}
+
+func mustOpenDB(t *testing.T, dbPath string, o *bolt.Options) *bolt.DB {
+	t.Logf("Opening bbolt DB at: %s", dbPath)
+	if o == nil {
+		o = bolt.DefaultOptions
+	}
+
+	freelistType := bolt.FreelistArrayType
+	if env := os.Getenv("TEST_FREELIST_TYPE"); env == string(bolt.FreelistMapType) {
+		freelistType = bolt.FreelistMapType
+	}
+
+	o.FreelistType = freelistType
+
+	db, err := bolt.Open(dbPath, 0600, o)
+	require.NoError(t, err)
+
+	return db
+}
+
+func checkConsistency(t *testing.T, db *bolt.DB) error {
+	return db.View(func(tx *bolt.Tx) error {
+		cnt := 0
+		for err := range tx.Check() {
+			t.Errorf("Consistency error: %v", err)
+			cnt++
+		}
+		if cnt > 0 {
+			return fmt.Errorf("%d consistency errors found", cnt)
+		}
+		return nil
+	})
+}
+
+/*
+*********************************************************
+Data structures and functions/methods for running concurrent
+workers, which execute different operations, including `Read`,
+`Write` and `Delete`.
+*********************************************************
+*/
+func runWorkers(t *testing.T,
+	db *bolt.DB,
+	workerCount int,
+	conf concurrentConfig,
+	testDuration time.Duration) historyRecords {
+	stopCh := make(chan struct{}, 1)
+	errCh := make(chan error, workerCount)
+
+	var mu sync.Mutex
+	var rs historyRecords
+
+	g := new(errgroup.Group)
+	for i := 0; i < workerCount; i++ {
+		w := &worker{
+			id: i,
+			db: db,
+
+			conf: conf,
+
+			errCh:  errCh,
+			stopCh: stopCh,
+			t:      t,
+		}
+		g.Go(func() error {
+			wrs, err := runWorker(t, w, errCh)
+			mu.Lock()
+			rs = append(rs, wrs...)
+			mu.Unlock()
+			return err
+		})
+	}
+
+	t.Logf("Keep all workers running for about %s.", testDuration)
+	select {
+	case <-time.After(testDuration):
+	case <-errCh:
+	}
+
+	close(stopCh)
+	t.Log("Waiting for all workers to finish.")
+	if err := g.Wait(); err != nil {
+		t.Errorf("Received error: %v", err)
+	}
+
+	return rs
+}
+
+func runWorker(t *testing.T, w *worker, errCh chan error) (historyRecords, error) {
+	rs, err := w.run()
+	if len(rs) > 0 && err == nil {
+		if terr := validateIncrementalTxid(rs); terr != nil {
+			txidErr := fmt.Errorf("[%s]: %w", w.name(), terr)
+			t.Error(txidErr)
+			errCh <- txidErr
+			return rs, txidErr
+		}
+	}
+	return rs, err
+}
+
+type worker struct {
+	id int
+	db *bolt.DB
+
+	conf concurrentConfig
+
+	errCh  chan error
+	stopCh chan struct{}
+
+	t *testing.T
+}
+
+func (w *worker) name() string {
+	return fmt.Sprintf("worker-%d", w.id)
+}
+
+func (w *worker) run() (historyRecords, error) {
+	var rs historyRecords
+
+	ticker := time.NewTicker(1 * time.Second)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-w.stopCh:
+			return rs, nil
+		default:
+		}
+
+		err := w.db.Update(func(tx *bolt.Tx) error {
+			for {
+				op := w.pickOperation()
+				bucket, key := w.pickBucket(), w.pickKey()
+				rec, eerr := executeOperation(op, tx, bucket, key, w.conf)
+				if eerr != nil {
+					opErr := fmt.Errorf("[%s: %s]: %w", w.name(), op, eerr)
+					w.t.Error(opErr)
+					w.errCh <- opErr
+					return opErr
+				}
+
+				rs = append(rs, rec)
+				if w.conf.workInterval != (duration{}) {
+					time.Sleep(randomDurationInRange(w.conf.workInterval.min, w.conf.workInterval.max))
+				}
+
+				select {
+				case <-ticker.C:
+					return nil
+				case <-w.stopCh:
+					return nil
+				default:
+				}
+			}
+		})
+		if err != nil {
+			return rs, err
+		}
+	}
+}
+
+func (w *worker) pickBucket() []byte {
+	return bucketName(mrand.Intn(w.conf.bucketCount))
+}
+
+func bucketName(index int) []byte {
+	bucket := fmt.Sprintf("%s_%d", bucketPrefix, index)
+	return []byte(bucket)
+}
+
+func (w *worker) pickKey() []byte {
+	key := fmt.Sprintf("%s_%d", keyPrefix, mrand.Intn(w.conf.keyCount))
+	return []byte(key)
+}
+
+func (w *worker) pickOperation() OperationType {
+	sum := 0
+	for _, op := range w.conf.operationRatio {
+		sum += op.chance
+	}
+	roll := mrand.Int() % sum
+	for _, op := range w.conf.operationRatio {
+		if roll < op.chance {
+			return op.operation
+		}
+		roll -= op.chance
+	}
+	panic("unexpected")
+}
+
+func executeOperation(op OperationType, tx *bolt.Tx, bucket []byte, key []byte, conf concurrentConfig) (historyRecord, error) {
+	switch op {
+	case Read:
+		return executeRead(tx, bucket, key, conf.readInterval)
+	case Write:
+		return executeWrite(tx, bucket, key, conf.writeBytes, conf.noopWriteRatio)
+	case Delete:
+		return executeDelete(tx, bucket, key)
+	default:
+		panic(fmt.Sprintf("unexpected operation type: %s", op))
+	}
+}
+
+func executeRead(tx *bolt.Tx, bucket []byte, key []byte, readInterval duration) (historyRecord, error) {
+	var rec historyRecord
+
+	b := tx.Bucket(bucket)
+
+	initialVal := b.Get(key)
+	time.Sleep(randomDurationInRange(readInterval.min, readInterval.max))
+	val := b.Get(key)
+
+	if !bytes.Equal(initialVal, val) {
+		return rec, fmt.Errorf("read different values for the same key (%q), value1: %q, value2: %q",
+			string(key), formatBytes(initialVal), formatBytes(val))
+	}
+
+	clonedVal := make([]byte, len(val))
+	copy(clonedVal, val)
+
+	rec = historyRecord{
+		OperationType: Read,
+		Bucket:        string(bucket),
+		Key:           string(key),
+		Value:         clonedVal,
+		Txid:          tx.ID(),
+	}
+
+	return rec, nil
+}
+
+func executeWrite(tx *bolt.Tx, bucket []byte, key []byte, writeBytes bytesRange, noopWriteRatio int) (historyRecord, error) {
+	var rec historyRecord
+
+	if mrand.Intn(100) < noopWriteRatio {
+		// A no-op write transaction has two consequences:
+		//    1. The txid increases by 1;
+		//    2. Two meta pages point to the same root page.
+		rec = historyRecord{
+			OperationType: Write,
+			Bucket:        string(bucket),
+			Key:           noopTxKey,
+			Value:         nil,
+			Txid:          tx.ID(),
+		}
+		return rec, nil
+	}
+
+	b := tx.Bucket(bucket)
+
+	valueBytes := randomIntInRange(writeBytes.min, writeBytes.max)
+	v := make([]byte, valueBytes)
+	if _, cErr := crand.Read(v); cErr != nil {
+		return rec, cErr
+	}
+
+	putErr := b.Put(key, v)
+	if putErr == nil {
+		rec = historyRecord{
+			OperationType: Write,
+			Bucket:        string(bucket),
+			Key:           string(key),
+			Value:         v,
+			Txid:          tx.ID(),
+		}
+	}
+
+	return rec, putErr
+}
+
+func executeDelete(tx *bolt.Tx, bucket []byte, key []byte) (historyRecord, error) {
+	var rec historyRecord
+
+	b := tx.Bucket(bucket)
+
+	err := b.Delete(key)
+	if err == nil {
+		rec = historyRecord{
+			OperationType: Delete,
+			Bucket:        string(bucket),
+			Key:           string(key),
+			Txid:          tx.ID(),
+		}
+	}
+
+	return rec, err
+}
+
+func randomDurationInRange(min, max time.Duration) time.Duration {
+	d := int64(max) - int64(min)
+	d = int64(mrand.Intn(int(d))) + int64(min)
+	return time.Duration(d)
+}
+
+func randomIntInRange(min, max int) int {
+	return mrand.Intn(max-min) + min
+}
+
+func formatBytes(val []byte) string {
+	if utf8.ValidString(string(val)) {
+		return string(val)
+	}
+
+	return hex.EncodeToString(val)
+}
+
+/*
+*********************************************************
+Functions for persisting test data, including db file
+and operation history
+*********************************************************
+*/
+func saveDataIfFailed(t *testing.T, db *bolt.DB, rs historyRecords, force bool) {
+	if t.Failed() || force {
+		t.Log("Saving data...")
+		dbPath := db.Path()
+		if err := db.Close(); err != nil {
+			t.Errorf("Failed to close db: %v", err)
+		}
+		backupPath := testResultsDirectory(t)
+		backupDB(t, dbPath, backupPath)
+		persistHistoryRecords(t, rs, backupPath)
+	}
+}
+
+func backupDB(t *testing.T, srcPath string, dstPath string) {
+	targetFile := filepath.Join(dstPath, "db.bak")
+	t.Logf("Saving the DB file to %s", targetFile)
+	err := copyFile(srcPath, targetFile)
+	require.NoError(t, err)
+	t.Logf("DB file saved to %s", targetFile)
+}
+
+func copyFile(srcPath, dstPath string) error {
+	// Ensure source file exists.
+	_, err := os.Stat(srcPath)
+	if os.IsNotExist(err) {
+		return fmt.Errorf("source file %q not found", srcPath)
+	} else if err != nil {
+		return err
+	}
+
+	// Ensure output file not exist.
+	_, err = os.Stat(dstPath)
+	if err == nil {
+		return fmt.Errorf("output file %q already exists", dstPath)
+	} else if !os.IsNotExist(err) {
+		return err
+	}
+
+	srcDB, err := os.Open(srcPath)
+	if err != nil {
+		return fmt.Errorf("failed to open source file %q: %w", srcPath, err)
+	}
+	defer srcDB.Close()
+	dstDB, err := os.Create(dstPath)
+	if err != nil {
+		return fmt.Errorf("failed to create output file %q: %w", dstPath, err)
+	}
+	defer dstDB.Close()
+	written, err := io.Copy(dstDB, srcDB)
+	if err != nil {
+		return fmt.Errorf("failed to copy database file from %q to %q: %w", srcPath, dstPath, err)
+	}
+
+	srcFi, err := srcDB.Stat()
+	if err != nil {
+		return fmt.Errorf("failed to get source file info %q: %w", srcPath, err)
+	}
+	initialSize := srcFi.Size()
+	if initialSize != written {
+		return fmt.Errorf("the byte copied (%q: %d) isn't equal to the initial db size (%q: %d)", dstPath, written, srcPath, initialSize)
+	}
+
+	return nil
+}
+
+func persistHistoryRecords(t *testing.T, rs historyRecords, path string) {
+	recordFilePath := filepath.Join(path, "history_records.json")
+	t.Logf("Saving history records to %s", recordFilePath)
+	recordFile, err := os.OpenFile(recordFilePath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0755)
+	require.NoError(t, err)
+	defer recordFile.Close()
+	encoder := json.NewEncoder(recordFile)
+	for _, rec := range rs {
+		err := encoder.Encode(rec)
+		require.NoError(t, err)
+	}
+}
+
+func testResultsDirectory(t *testing.T) string {
+	resultsDirectory, ok := os.LookupEnv("RESULTS_DIR")
+	var err error
+	if !ok {
+		resultsDirectory, err = os.MkdirTemp("", "*.db")
+		require.NoError(t, err)
+	}
+	resultsDirectory, err = filepath.Abs(resultsDirectory)
+	require.NoError(t, err)
+
+	path, err := filepath.Abs(filepath.Join(resultsDirectory, strings.ReplaceAll(t.Name(), "/", "_")))
+	require.NoError(t, err)
+
+	err = os.RemoveAll(path)
+	require.NoError(t, err)
+
+	err = os.MkdirAll(path, 0700)
+	require.NoError(t, err)
+
+	return path
+}
+
+/*
+*********************************************************
+Data structures and functions for analyzing history records
+*********************************************************
+*/
+type OperationType string
+
+const (
+	Read   OperationType = "read"
+	Write  OperationType = "write"
+	Delete OperationType = "delete"
+)
+
+type historyRecord struct {
+	OperationType OperationType `json:"operationType,omitempty"`
+	Txid          int           `json:"txid,omitempty"`
+	Bucket        string        `json:"bucket,omitempty"`
+	Key           string        `json:"key,omitempty"`
+	Value         []byte        `json:"value,omitempty"`
+}
+
+type historyRecords []historyRecord
+
+func (rs historyRecords) Len() int {
+	return len(rs)
+}
+
+func (rs historyRecords) Less(i, j int) bool {
+	// Sorted by (bucket, key) firstly: all records in the same
+	// (bucket, key) are grouped together.
+	bucketCmp := strings.Compare(rs[i].Bucket, rs[j].Bucket)
+	if bucketCmp != 0 {
+		return bucketCmp < 0
+	}
+	keyCmp := strings.Compare(rs[i].Key, rs[j].Key)
+	if keyCmp != 0 {
+		return keyCmp < 0
+	}
+
+	// Sorted by txid
+	return rs[i].Txid < rs[j].Txid
+}
+
+func (rs historyRecords) Swap(i, j int) {
+	rs[i], rs[j] = rs[j], rs[i]
+}
+
+func validateIncrementalTxid(rs historyRecords) error {
+	lastTxid := rs[0].Txid
+
+	for i := 1; i < len(rs); i++ {
+		if rs[i].Txid < lastTxid {
+			return fmt.Errorf("detected non-incremental txid(%d, %d) in %s mode", lastTxid, rs[i].Txid, rs[i].OperationType)
+		}
+		lastTxid = rs[i].Txid
+	}
+
+	return nil
+}
+
+func validateSequential(rs historyRecords) error {
+	sort.Stable(rs)
+
+	type bucketAndKey struct {
+		bucket string
+		key    string
+	}
+	lastWriteKeyValueMap := make(map[bucketAndKey]*historyRecord)
+
+	for _, rec := range rs {
+		bk := bucketAndKey{
+			bucket: rec.Bucket,
+			key:    rec.Key,
+		}
+		if v, ok := lastWriteKeyValueMap[bk]; ok {
+			if rec.OperationType == Write {
+				v.Txid = rec.Txid
+				if rec.Key != noopTxKey {
+					v.Value = rec.Value
+				}
+			} else if rec.OperationType == Delete {
+				delete(lastWriteKeyValueMap, bk)
+			} else {
+				if !bytes.Equal(v.Value, rec.Value) {
+					return fmt.Errorf("readOperation[txid: %d, bucket: %s, key: %s] read %x, \nbut writer[txid: %d] wrote %x",
+						rec.Txid, rec.Bucket, rec.Key, rec.Value, v.Txid, v.Value)
+				}
+			}
+		} else {
+			if rec.OperationType == Write && rec.Key != noopTxKey {
+				lastWriteKeyValueMap[bk] = &historyRecord{
+					OperationType: Write,
+					Bucket:        rec.Bucket,
+					Key:           rec.Key,
+					Value:         rec.Value,
+					Txid:          rec.Txid,
+				}
+			} else if rec.OperationType == Read {
+				if len(rec.Value) != 0 {
+					return fmt.Errorf("expected the first readOperation[txid: %d, bucket: %s, key: %s] read nil, \nbut got %x",
+						rec.Txid, rec.Bucket, rec.Key, rec.Value)
+				}
+			}
+		}
+	}
+
+	return nil
+}
+
+/*
+TestConcurrentRepeatableRead verifies repeatable read. The case
+intentionally creates a scenario that read and write transactions
+are interleaved. It performs several writing operations after starting
+each long-running read transaction to ensure it has a larger txid
+than previous read transaction. It verifies that bbolt correctly
+releases free pages, and will not pollute (e.g. prematurely release)
+any pages which are still being used by any read transaction.
+*/
+func TestConcurrentRepeatableRead(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping test in short mode.")
+	}
+
+	testCases := []struct {
+		name           string
+		noFreelistSync bool
+		freelistType   bolt.FreelistType
+	}{
+		// [array] freelist
+		{
+			name:           "sync array freelist",
+			noFreelistSync: false,
+			freelistType:   bolt.FreelistArrayType,
+		},
+		{
+			name:           "not sync array freelist",
+			noFreelistSync: true,
+			freelistType:   bolt.FreelistArrayType,
+		},
+		// [map] freelist
+		{
+			name:           "sync map freelist",
+			noFreelistSync: false,
+			freelistType:   bolt.FreelistMapType,
+		},
+		{
+			name:           "not sync map freelist",
+			noFreelistSync: true,
+			freelistType:   bolt.FreelistMapType,
+		},
+	}
+
+	for _, tc := range testCases {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+
+			t.Log("Preparing db.")
+			var (
+				bucket = []byte("data")
+				key    = []byte("mykey")
+
+				option = &bolt.Options{
+					PageSize:       4096,
+					NoFreelistSync: tc.noFreelistSync,
+					FreelistType:   tc.freelistType,
+				}
+			)
+
+			db := mustCreateDB(t, option)
+			defer func() {
+				// The db will be reopened later, so put `db.Close()` in a function
+				// to avoid premature evaluation of `db`. Note that the execution
+				// of a deferred function is deferred to the moment the surrounding
+				// function returns, but the function value and parameters to the
+				// call are evaluated as usual and saved anew.
+				db.Close()
+			}()
+
+			// Create lots of K/V to allocate some pages
+			err := db.Update(func(tx *bolt.Tx) error {
+				b, err := tx.CreateBucketIfNotExists(bucket)
+				if err != nil {
+					return err
+				}
+				for i := 0; i < 1000; i++ {
+					k := fmt.Sprintf("key_%d", i)
+					if err := b.Put([]byte(k), make([]byte, 1024)); err != nil {
+						return err
+					}
+				}
+				return nil
+			})
+			require.NoError(t, err)
+
+			// Remove all K/V to create some free pages
+			err = db.Update(func(tx *bolt.Tx) error {
+				b := tx.Bucket(bucket)
+				for i := 0; i < 1000; i++ {
+					k := fmt.Sprintf("key_%d", i)
+					if err := b.Delete([]byte(k)); err != nil {
+						return err
+					}
+				}
+				return b.Put(key, []byte("randomValue"))
+			})
+			require.NoError(t, err)
+
+			// bbolt will not release free pages directly after committing
+			// a writing transaction; instead all pages freed are putting
+			// into a pending list. Accordingly, the free pages might not
+			// be able to be reused by following writing transactions. So
+			// we reopen the db to completely release all free pages.
+			db = mustReOpenDB(t, db, option)
+
+			var (
+				wg                     sync.WaitGroup
+				longRunningReaderCount = 10
+				stopCh                 = make(chan struct{})
+				errCh                  = make(chan error, longRunningReaderCount)
+				readInterval           = duration{5 * time.Millisecond, 10 * time.Millisecond}
+
+				writeOperationCountInBetween = 5
+				writeBytes                   = bytesRange{10, 20}
+
+				testDuration = 10 * time.Second
+			)
+
+			for i := 0; i < longRunningReaderCount; i++ {
+				readWorkerName := fmt.Sprintf("reader_%d", i)
+				t.Logf("Starting long running read operation: %s", readWorkerName)
+				wg.Add(1)
+				go func() {
+					defer wg.Done()
+					rErr := executeLongRunningRead(t, readWorkerName, db, bucket, key, readInterval, stopCh)
+					if rErr != nil {
+						errCh <- rErr
+					}
+				}()
+				time.Sleep(500 * time.Millisecond)
+
+				t.Logf("Perform %d write operations after starting a long running read operation", writeOperationCountInBetween)
+				for j := 0; j < writeOperationCountInBetween; j++ {
+					err := db.Update(func(tx *bolt.Tx) error {
+						_, eerr := executeWrite(tx, bucket, key, writeBytes, 0)
+						return eerr
+					})
+
+					require.NoError(t, err)
+				}
+			}
+
+			t.Log("Perform lots of write operations to check whether the long running read operations will read dirty data")
+			wg.Add(1)
+			go func() {
+				defer wg.Done()
+				cnt := longRunningReaderCount * writeOperationCountInBetween
+				for i := 0; i < cnt; i++ {
+					select {
+					case <-stopCh:
+						return
+					default:
+					}
+					err := db.Update(func(tx *bolt.Tx) error {
+						_, eerr := executeWrite(tx, bucket, key, writeBytes, 0)
+						return eerr
+					})
+					require.NoError(t, err)
+				}
+			}()
+
+			t.Log("Waiting for result")
+			select {
+			case err := <-errCh:
+				close(stopCh)
+				t.Errorf("Detected dirty read: %v", err)
+			case <-time.After(testDuration):
+				close(stopCh)
+			}
+
+			wg.Wait()
+		})
+	}
+}
+
+func executeLongRunningRead(t *testing.T, name string, db *bolt.DB, bucket []byte, key []byte, readInterval duration, stopCh chan struct{}) error {
+	err := db.View(func(tx *bolt.Tx) error {
+		b := tx.Bucket(bucket)
+
+		initialVal := b.Get(key)
+
+		for {
+			select {
+			case <-stopCh:
+				t.Logf("%q finished.", name)
+				return nil
+			default:
+			}
+
+			time.Sleep(randomDurationInRange(readInterval.min, readInterval.max))
+			val := b.Get(key)
+
+			if !bytes.Equal(initialVal, val) {
+				dirtyReadErr := fmt.Errorf("read different values for the same key (%q), value1: %q, value2: %q",
+					string(key), formatBytes(initialVal), formatBytes(val))
+				return dirtyReadErr
+			}
+		}
+	})
+
+	return err
+}
--- a/cursor.go
+++ b/cursor.go
@ -0,0 +1,432 @@
+package bbolt
+
+import (
+	"bytes"
+	"fmt"
+	"sort"
+
+	"github.com/tutus-one/tutus-bolt/errors"
+	"github.com/tutus-one/tutus-bolt/internal/common"
+)
+
+// Cursor represents an iterator that can traverse over all key/value pairs in a bucket
+// in lexicographical order.
+// Cursors see nested buckets with value == nil.
+// Cursors can be obtained from a transaction and are valid as long as the transaction is open.
+//
+// Keys and values returned from the cursor are only valid for the life of the transaction.
+//
+// Changing data while traversing with a cursor may cause it to be invalidated
+// and return unexpected keys and/or values. You must reposition your cursor
+// after mutating data.
+type Cursor struct {
+	bucket *Bucket
+	stack  []elemRef
+}
+
+// Bucket returns the bucket that this cursor was created from.
+func (c *Cursor) Bucket() *Bucket {
+	return c.bucket
+}
+
+// First moves the cursor to the first item in the bucket and returns its key and value.
+// If the bucket is empty then a nil key and value are returned.
+// The returned key and value are only valid for the life of the transaction.
+func (c *Cursor) First() (key []byte, value []byte) {
+	common.Assert(c.bucket.tx.db != nil, "tx closed")
+	k, v, flags := c.first()
+	if (flags & uint32(common.BucketLeafFlag)) != 0 {
+		return k, nil
+	}
+	return k, v
+}
+
+func (c *Cursor) first() (key []byte, value []byte, flags uint32) {
+	c.stack = c.stack[:0]
+	p, n := c.bucket.pageNode(c.bucket.RootPage())
+	c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
+	c.goToFirstElementOnTheStack()
+
+	// If we land on an empty page then move to the next value.
+	// https://github.com/boltdb/bolt/issues/450
+	if c.stack[len(c.stack)-1].count() == 0 {
+		c.next()
+	}
+
+	k, v, flags := c.keyValue()
+	if (flags & uint32(common.BucketLeafFlag)) != 0 {
+		return k, nil, flags
+	}
+	return k, v, flags
+}
+
+// Last moves the cursor to the last item in the bucket and returns its key and value.
+// If the bucket is empty then a nil key and value are returned.
+// The returned key and value are only valid for the life of the transaction.
+func (c *Cursor) Last() (key []byte, value []byte) {
+	common.Assert(c.bucket.tx.db != nil, "tx closed")
+	c.stack = c.stack[:0]
+	p, n := c.bucket.pageNode(c.bucket.RootPage())
+	ref := elemRef{page: p, node: n}
+	ref.index = ref.count() - 1
+	c.stack = append(c.stack, ref)
+	c.last()
+
+	// If this is an empty page (calling Delete may result in empty pages)
+	// we call prev to find the last page that is not empty
+	for len(c.stack) > 1 && c.stack[len(c.stack)-1].count() == 0 {
+		c.prev()
+	}
+
+	if len(c.stack) == 0 {
+		return nil, nil
+	}
+
+	k, v, flags := c.keyValue()
+	if (flags & uint32(common.BucketLeafFlag)) != 0 {
+		return k, nil
+	}
+	return k, v
+}
+
+// Next moves the cursor to the next item in the bucket and returns its key and value.
+// If the cursor is at the end of the bucket then a nil key and value are returned.
+// The returned key and value are only valid for the life of the transaction.
+func (c *Cursor) Next() (key []byte, value []byte) {
+	common.Assert(c.bucket.tx.db != nil, "tx closed")
+	k, v, flags := c.next()
+	if (flags & uint32(common.BucketLeafFlag)) != 0 {
+		return k, nil
+	}
+	return k, v
+}
+
+// Prev moves the cursor to the previous item in the bucket and returns its key and value.
+// If the cursor is at the beginning of the bucket then a nil key and value are returned.
+// The returned key and value are only valid for the life of the transaction.
+func (c *Cursor) Prev() (key []byte, value []byte) {
+	common.Assert(c.bucket.tx.db != nil, "tx closed")
+	k, v, flags := c.prev()
+	if (flags & uint32(common.BucketLeafFlag)) != 0 {
+		return k, nil
+	}
+	return k, v
+}
+
+// Seek moves the cursor to a given key using a b-tree search and returns it.
+// If the key does not exist then the next key is used. If no keys
+// follow, a nil key is returned.
+// The returned key and value are only valid for the life of the transaction.
+func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) {
+	common.Assert(c.bucket.tx.db != nil, "tx closed")
+
+	k, v, flags := c.seek(seek)
+
+	// If we ended up after the last element of a page then move to the next one.
+	if ref := &c.stack[len(c.stack)-1]; ref.index >= ref.count() {
+		k, v, flags = c.next()
+	}
+
+	if k == nil {
+		return nil, nil
+	} else if (flags & uint32(common.BucketLeafFlag)) != 0 {
+		return k, nil
+	}
+	return k, v
+}
+
+// Delete removes the current key/value under the cursor from the bucket.
+// Delete fails if current key/value is a bucket or if the transaction is not writable.
+func (c *Cursor) Delete() error {
+	if c.bucket.tx.db == nil {
+		return errors.ErrTxClosed
+	} else if !c.bucket.Writable() {
+		return errors.ErrTxNotWritable
+	}
+
+	key, _, flags := c.keyValue()
+	// Return an error if current value is a bucket.
+	if (flags & common.BucketLeafFlag) != 0 {
+		return errors.ErrIncompatibleValue
+	}
+	c.node().del(key)
+
+	return nil
+}
+
+// seek moves the cursor to a given key and returns it.
+// If the key does not exist then the next key is used.
+func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) {
+	// Start from root page/node and traverse to correct page.
+	c.stack = c.stack[:0]
+	c.search(seek, c.bucket.RootPage())
+
+	// If this is a bucket then return a nil value.
+	return c.keyValue()
+}
+
+// first moves the cursor to the first leaf element under the last page in the stack.
+func (c *Cursor) goToFirstElementOnTheStack() {
+	for {
+		// Exit when we hit a leaf page.
+		var ref = &c.stack[len(c.stack)-1]
+		if ref.isLeaf() {
+			break
+		}
+
+		// Keep adding pages pointing to the first element to the stack.
+		var pgId common.Pgid
+		if ref.node != nil {
+			pgId = ref.node.inodes[ref.index].Pgid()
+		} else {
+			pgId = ref.page.BranchPageElement(uint16(ref.index)).Pgid()
+		}
+		p, n := c.bucket.pageNode(pgId)
+		c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
+	}
+}
+
+// last moves the cursor to the last leaf element under the last page in the stack.
+func (c *Cursor) last() {
+	for {
+		// Exit when we hit a leaf page.
+		ref := &c.stack[len(c.stack)-1]
+		if ref.isLeaf() {
+			break
+		}
+
+		// Keep adding pages pointing to the last element in the stack.
+		var pgId common.Pgid
+		if ref.node != nil {
+			pgId = ref.node.inodes[ref.index].Pgid()
+		} else {
+			pgId = ref.page.BranchPageElement(uint16(ref.index)).Pgid()
+		}
+		p, n := c.bucket.pageNode(pgId)
+
+		var nextRef = elemRef{page: p, node: n}
+		nextRef.index = nextRef.count() - 1
+		c.stack = append(c.stack, nextRef)
+	}
+}
+
+// next moves to the next leaf element and returns the key and value.
+// If the cursor is at the last leaf element then it stays there and returns nil.
+func (c *Cursor) next() (key []byte, value []byte, flags uint32) {
+	for {
+		// Attempt to move over one element until we're successful.
+		// Move up the stack as we hit the end of each page in our stack.
+		var i int
+		for i = len(c.stack) - 1; i >= 0; i-- {
+			elem := &c.stack[i]
+			if elem.index < elem.count()-1 {
+				elem.index++
+				break
+			}
+		}
+
+		// If we've hit the root page then stop and return. This will leave the
+		// cursor on the last element of the last page.
+		if i == -1 {
+			return nil, nil, 0
+		}
+
+		// Otherwise start from where we left off in the stack and find the
+		// first element of the first leaf page.
+		c.stack = c.stack[:i+1]
+		c.goToFirstElementOnTheStack()
+
+		// If this is an empty page then restart and move back up the stack.
+		// https://github.com/boltdb/bolt/issues/450
+		if c.stack[len(c.stack)-1].count() == 0 {
+			continue
+		}
+
+		return c.keyValue()
+	}
+}
+
+// prev moves the cursor to the previous item in the bucket and returns its key and value.
+// If the cursor is at the beginning of the bucket then a nil key and value are returned.
+func (c *Cursor) prev() (key []byte, value []byte, flags uint32) {
+	// Attempt to move back one element until we're successful.
+	// Move up the stack as we hit the beginning of each page in our stack.
+	for i := len(c.stack) - 1; i >= 0; i-- {
+		elem := &c.stack[i]
+		if elem.index > 0 {
+			elem.index--
+			break
+		}
+		// If we've hit the beginning, we should stop moving the cursor,
+		// and stay at the first element, so that users can continue to
+		// iterate over the elements in reverse direction by calling `Next`.
+		// We should return nil in such case.
+		// Refer to https://github.com/etcd-io/bbolt/issues/733
+		if len(c.stack) == 1 {
+			c.first()
+			return nil, nil, 0
+		}
+		c.stack = c.stack[:i]
+	}
+
+	// If we've hit the end then return nil.
+	if len(c.stack) == 0 {
+		return nil, nil, 0
+	}
+
+	// Move down the stack to find the last element of the last leaf under this branch.
+	c.last()
+	return c.keyValue()
+}
+
+// search recursively performs a binary search against a given page/node until it finds a given key.
+func (c *Cursor) search(key []byte, pgId common.Pgid) {
+	p, n := c.bucket.pageNode(pgId)
+	if p != nil && !p.IsBranchPage() && !p.IsLeafPage() {
+		panic(fmt.Sprintf("invalid page type: %d: %x", p.Id(), p.Flags()))
+	}
+	e := elemRef{page: p, node: n}
+	c.stack = append(c.stack, e)
+
+	// If we're on a leaf page/node then find the specific node.
+	if e.isLeaf() {
+		c.nsearch(key)
+		return
+	}
+
+	if n != nil {
+		c.searchNode(key, n)
+		return
+	}
+	c.searchPage(key, p)
+}
+
+func (c *Cursor) searchNode(key []byte, n *node) {
+	var exact bool
+	index := sort.Search(len(n.inodes), func(i int) bool {
+		// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
+		// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
+		ret := bytes.Compare(n.inodes[i].Key(), key)
+		if ret == 0 {
+			exact = true
+		}
+		return ret != -1
+	})
+	if !exact && index > 0 {
+		index--
+	}
+	c.stack[len(c.stack)-1].index = index
+
+	// Recursively search to the next page.
+	c.search(key, n.inodes[index].Pgid())
+}
+
+func (c *Cursor) searchPage(key []byte, p *common.Page) {
+	// Binary search for the correct range.
+	inodes := p.BranchPageElements()
+
+	var exact bool
+	index := sort.Search(int(p.Count()), func(i int) bool {
+		// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
+		// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
+		ret := bytes.Compare(inodes[i].Key(), key)
+		if ret == 0 {
+			exact = true
+		}
+		return ret != -1
+	})
+	if !exact && index > 0 {
+		index--
+	}
+	c.stack[len(c.stack)-1].index = index
+
+	// Recursively search to the next page.
+	c.search(key, inodes[index].Pgid())
+}
+
+// nsearch searches the leaf node on the top of the stack for a key.
+func (c *Cursor) nsearch(key []byte) {
+	e := &c.stack[len(c.stack)-1]
+	p, n := e.page, e.node
+
+	// If we have a node then search its inodes.
+	if n != nil {
+		index := sort.Search(len(n.inodes), func(i int) bool {
+			return bytes.Compare(n.inodes[i].Key(), key) != -1
+		})
+		e.index = index
+		return
+	}
+
+	// If we have a page then search its leaf elements.
+	inodes := p.LeafPageElements()
+	index := sort.Search(int(p.Count()), func(i int) bool {
+		return bytes.Compare(inodes[i].Key(), key) != -1
+	})
+	e.index = index
+}
+
+// keyValue returns the key and value of the current leaf element.
+func (c *Cursor) keyValue() ([]byte, []byte, uint32) {
+	ref := &c.stack[len(c.stack)-1]
+
+	// If the cursor is pointing to the end of page/node then return nil.
+	if ref.count() == 0 || ref.index >= ref.count() {
+		return nil, nil, 0
+	}
+
+	// Retrieve value from node.
+	if ref.node != nil {
+		inode := &ref.node.inodes[ref.index]
+		return inode.Key(), inode.Value(), inode.Flags()
+	}
+
+	// Or retrieve value from page.
+	elem := ref.page.LeafPageElement(uint16(ref.index))
+	return elem.Key(), elem.Value(), elem.Flags()
+}
+
+// node returns the node that the cursor is currently positioned on.
+func (c *Cursor) node() *node {
+	common.Assert(len(c.stack) > 0, "accessing a node with a zero-length cursor stack")
+
+	// If the top of the stack is a leaf node then just return it.
+	if ref := &c.stack[len(c.stack)-1]; ref.node != nil && ref.isLeaf() {
+		return ref.node
+	}
+
+	// Start from root and traverse down the hierarchy.
+	var n = c.stack[0].node
+	if n == nil {
+		n = c.bucket.node(c.stack[0].page.Id(), nil)
+	}
+	for _, ref := range c.stack[:len(c.stack)-1] {
+		common.Assert(!n.isLeaf, "expected branch node")
+		n = n.childAt(ref.index)
+	}
+	common.Assert(n.isLeaf, "expected leaf node")
+	return n
+}
+
+// elemRef represents a reference to an element on a given page/node.
+type elemRef struct {
+	page  *common.Page
+	node  *node
+	index int
+}
+
+// isLeaf returns whether the ref is pointing at a leaf page/node.
+func (r *elemRef) isLeaf() bool {
+	if r.node != nil {
+		return r.node.isLeaf
+	}
+	return r.page.IsLeafPage()
+}
+
+// count returns the number of inodes or page elements.
+func (r *elemRef) count() int {
+	if r.node != nil {
+		return len(r.node.inodes)
+	}
+	return int(r.page.Count())
+}
--- a/cursor_test.go
+++ b/cursor_test.go
@ -0,0 +1,986 @@
+package bbolt_test
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"log"
+	"os"
+	"reflect"
+	"sort"
+	"testing"
+	"testing/quick"
+
+	"github.com/stretchr/testify/require"
+
+	bolt "github.com/tutus-one/tutus-bolt"
+	"github.com/tutus-one/tutus-bolt/errors"
+	"github.com/tutus-one/tutus-bolt/internal/btesting"
+)
+
+// TestCursor_RepeatOperations verifies that a cursor can continue to
+// iterate over all elements in reverse direction when it has already
+// reached to the end or beginning.
+// Refer to https://github.com/etcd-io/bbolt/issues/733
+func TestCursor_RepeatOperations(t *testing.T) {
+	testCases := []struct {
+		name     string
+		testFunc func(t2 *testing.T, bucket *bolt.Bucket)
+	}{
+		{
+			name:     "Repeat NextPrevNext",
+			testFunc: testRepeatCursorOperations_NextPrevNext,
+		},
+		{
+			name:     "Repeat PrevNextPrev",
+			testFunc: testRepeatCursorOperations_PrevNextPrev,
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			db := btesting.MustCreateDBWithOption(t, &bolt.Options{PageSize: 4096})
+
+			bucketName := []byte("data")
+
+			_ = db.Update(func(tx *bolt.Tx) error {
+				b, _ := tx.CreateBucketIfNotExists(bucketName)
+				testCursorRepeatOperations_PrepareData(t, b)
+				return nil
+			})
+
+			_ = db.View(func(tx *bolt.Tx) error {
+				b := tx.Bucket(bucketName)
+				tc.testFunc(t, b)
+				return nil
+			})
+		})
+	}
+}
+
+func testCursorRepeatOperations_PrepareData(t *testing.T, b *bolt.Bucket) {
+	// ensure we have at least one branch page.
+	for i := 0; i < 1000; i++ {
+		k := []byte(fmt.Sprintf("%05d", i))
+		err := b.Put(k, k)
+		require.NoError(t, err)
+	}
+}
+
+func testRepeatCursorOperations_NextPrevNext(t *testing.T, b *bolt.Bucket) {
+	c := b.Cursor()
+	c.First()
+	startKey := []byte(fmt.Sprintf("%05d", 2))
+	returnedKey, _ := c.Seek(startKey)
+	require.Equal(t, startKey, returnedKey)
+
+	// Step 1: verify next
+	for i := 3; i < 1000; i++ {
+		expectedKey := []byte(fmt.Sprintf("%05d", i))
+		actualKey, _ := c.Next()
+		require.Equal(t, expectedKey, actualKey)
+	}
+
+	// Once we've reached the end, it should always return nil no matter how many times we call `Next`.
+	for i := 0; i < 10; i++ {
+		k, _ := c.Next()
+		require.Equal(t, []byte(nil), k)
+	}
+
+	// Step 2: verify prev
+	for i := 998; i >= 0; i-- {
+		expectedKey := []byte(fmt.Sprintf("%05d", i))
+		actualKey, _ := c.Prev()
+		require.Equal(t, expectedKey, actualKey)
+	}
+
+	// Once we've reached the beginning, it should always return nil no matter how many times we call `Prev`.
+	for i := 0; i < 10; i++ {
+		k, _ := c.Prev()
+		require.Equal(t, []byte(nil), k)
+	}
+
+	// Step 3: verify next again
+	for i := 1; i < 1000; i++ {
+		expectedKey := []byte(fmt.Sprintf("%05d", i))
+		actualKey, _ := c.Next()
+		require.Equal(t, expectedKey, actualKey)
+	}
+}
+
+func testRepeatCursorOperations_PrevNextPrev(t *testing.T, b *bolt.Bucket) {
+	c := b.Cursor()
+
+	startKey := []byte(fmt.Sprintf("%05d", 998))
+	returnedKey, _ := c.Seek(startKey)
+	require.Equal(t, startKey, returnedKey)
+
+	// Step 1: verify prev
+	for i := 997; i >= 0; i-- {
+		expectedKey := []byte(fmt.Sprintf("%05d", i))
+		actualKey, _ := c.Prev()
+		require.Equal(t, expectedKey, actualKey)
+	}
+
+	// Once we've reached the beginning, it should always return nil no matter how many times we call `Prev`.
+	for i := 0; i < 10; i++ {
+		k, _ := c.Prev()
+		require.Equal(t, []byte(nil), k)
+	}
+
+	// Step 2: verify next
+	for i := 1; i < 1000; i++ {
+		expectedKey := []byte(fmt.Sprintf("%05d", i))
+		actualKey, _ := c.Next()
+		require.Equal(t, expectedKey, actualKey)
+	}
+
+	// Once we've reached the end, it should always return nil no matter how many times we call `Next`.
+	for i := 0; i < 10; i++ {
+		k, _ := c.Next()
+		require.Equal(t, []byte(nil), k)
+	}
+
+	// Step 3: verify prev again
+	for i := 998; i >= 0; i-- {
+		expectedKey := []byte(fmt.Sprintf("%05d", i))
+		actualKey, _ := c.Prev()
+		require.Equal(t, expectedKey, actualKey)
+	}
+}
+
+// Ensure that a cursor can return a reference to the bucket that created it.
+func TestCursor_Bucket(t *testing.T) {
+	db := btesting.MustCreateDB(t)
+	if err := db.Update(func(tx *bolt.Tx) error {
+		b, err := tx.CreateBucket([]byte("widgets"))
+		if err != nil {
+			t.Fatal(err)
+		}
+		if cb := b.Cursor().Bucket(); !reflect.DeepEqual(cb, b) {
+			t.Fatal("cursor bucket mismatch")
+		}
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+}
+
+// Ensure that a Tx cursor can seek to the appropriate keys.
+func TestCursor_Seek(t *testing.T) {
+	db := btesting.MustCreateDB(t)
+	if err := db.Update(func(tx *bolt.Tx) error {
+		b, err := tx.CreateBucket([]byte("widgets"))
+		if err != nil {
+			t.Fatal(err)
+		}
+		if err := b.Put([]byte("foo"), []byte("0001")); err != nil {
+			t.Fatal(err)
+		}
+		if err := b.Put([]byte("bar"), []byte("0002")); err != nil {
+			t.Fatal(err)
+		}
+		if err := b.Put([]byte("baz"), []byte("0003")); err != nil {
+			t.Fatal(err)
+		}
+
+		if _, err := b.CreateBucket([]byte("bkt")); err != nil {
+			t.Fatal(err)
+		}
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := db.View(func(tx *bolt.Tx) error {
+		c := tx.Bucket([]byte("widgets")).Cursor()
+
+		// Exact match should go to the key.
+		if k, v := c.Seek([]byte("bar")); !bytes.Equal(k, []byte("bar")) {
+			t.Fatalf("unexpected key: %v", k)
+		} else if !bytes.Equal(v, []byte("0002")) {
+			t.Fatalf("unexpected value: %v", v)
+		}
+
+		// Inexact match should go to the next key.
+		if k, v := c.Seek([]byte("bas")); !bytes.Equal(k, []byte("baz")) {
+			t.Fatalf("unexpected key: %v", k)
+		} else if !bytes.Equal(v, []byte("0003")) {
+			t.Fatalf("unexpected value: %v", v)
+		}
+
+		// Low key should go to the first key.
+		if k, v := c.Seek([]byte("")); !bytes.Equal(k, []byte("bar")) {
+			t.Fatalf("unexpected key: %v", k)
+		} else if !bytes.Equal(v, []byte("0002")) {
+			t.Fatalf("unexpected value: %v", v)
+		}
+
+		// High key should return no key.
+		if k, v := c.Seek([]byte("zzz")); k != nil {
+			t.Fatalf("expected nil key: %v", k)
+		} else if v != nil {
+			t.Fatalf("expected nil value: %v", v)
+		}
+
+		// Buckets should return their key but no value.
+		if k, v := c.Seek([]byte("bkt")); !bytes.Equal(k, []byte("bkt")) {
+			t.Fatalf("unexpected key: %v", k)
+		} else if v != nil {
+			t.Fatalf("expected nil value: %v", v)
+		}
+
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestCursor_Delete(t *testing.T) {
+	db := btesting.MustCreateDB(t)
+
+	const count = 1000
+
+	// Insert every other key between 0 and $count.
+	if err := db.Update(func(tx *bolt.Tx) error {
+		b, err := tx.CreateBucket([]byte("widgets"))
+		if err != nil {
+			t.Fatal(err)
+		}
+		for i := 0; i < count; i += 1 {
+			k := make([]byte, 8)
+			binary.BigEndian.PutUint64(k, uint64(i))
+			if err := b.Put(k, make([]byte, 100)); err != nil {
+				t.Fatal(err)
+			}
+		}
+		if _, err := b.CreateBucket([]byte("sub")); err != nil {
+			t.Fatal(err)
+		}
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := db.Update(func(tx *bolt.Tx) error {
+		c := tx.Bucket([]byte("widgets")).Cursor()
+		bound := make([]byte, 8)
+		binary.BigEndian.PutUint64(bound, uint64(count/2))
+		for key, _ := c.First(); bytes.Compare(key, bound) < 0; key, _ = c.Next() {
+			if err := c.Delete(); err != nil {
+				t.Fatal(err)
+			}
+		}
+
+		c.Seek([]byte("sub"))
+		if err := c.Delete(); err != errors.ErrIncompatibleValue {
+			t.Fatalf("unexpected error: %s", err)
+		}
+
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := db.View(func(tx *bolt.Tx) error {
+		stats := tx.Bucket([]byte("widgets")).Stats()
+		if stats.KeyN != count/2+1 {
+			t.Fatalf("unexpected KeyN: %d", stats.KeyN)
+		}
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+}
+
+// Ensure that a Tx cursor can seek to the appropriate keys when there are a
+// large number of keys. This test also checks that seek will always move
+// forward to the next key.
+//
+// Related: https://github.com/boltdb/bolt/pull/187
+func TestCursor_Seek_Large(t *testing.T) {
+	db := btesting.MustCreateDB(t)
+
+	var count = 10000
+
+	// Insert every other key between 0 and $count.
+	if err := db.Update(func(tx *bolt.Tx) error {
+		b, err := tx.CreateBucket([]byte("widgets"))
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		for i := 0; i < count; i += 100 {
+			for j := i; j < i+100; j += 2 {
+				k := make([]byte, 8)
+				binary.BigEndian.PutUint64(k, uint64(j))
+				if err := b.Put(k, make([]byte, 100)); err != nil {
+					t.Fatal(err)
+				}
+			}
+		}
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := db.View(func(tx *bolt.Tx) error {
+		c := tx.Bucket([]byte("widgets")).Cursor()
+		for i := 0; i < count; i++ {
+			seek := make([]byte, 8)
+			binary.BigEndian.PutUint64(seek, uint64(i))
+
+			k, _ := c.Seek(seek)
+
+			// The last seek is beyond the end of the range so
+			// it should return nil.
+			if i == count-1 {
+				if k != nil {
+					t.Fatal("expected nil key")
+				}
+				continue
+			}
+
+			// Otherwise we should seek to the exact key or the next key.
+			num := binary.BigEndian.Uint64(k)
+			if i%2 == 0 {
+				if num != uint64(i) {
+					t.Fatalf("unexpected num: %d", num)
+				}
+			} else {
+				if num != uint64(i+1) {
+					t.Fatalf("unexpected num: %d", num)
+				}
+			}
+		}
+
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+}
+
+// Ensure that a cursor can iterate over an empty bucket without error.
+func TestCursor_EmptyBucket(t *testing.T) {
+	db := btesting.MustCreateDB(t)
+	if err := db.Update(func(tx *bolt.Tx) error {
+		_, err := tx.CreateBucket([]byte("widgets"))
+		return err
+	}); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := db.View(func(tx *bolt.Tx) error {
+		c := tx.Bucket([]byte("widgets")).Cursor()
+		k, v := c.First()
+		if k != nil {
+			t.Fatalf("unexpected key: %v", k)
+		} else if v != nil {
+			t.Fatalf("unexpected value: %v", v)
+		}
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+}
+
+// Ensure that a Tx cursor can reverse iterate over an empty bucket without error.
+func TestCursor_EmptyBucketReverse(t *testing.T) {
+	db := btesting.MustCreateDB(t)
+
+	if err := db.Update(func(tx *bolt.Tx) error {
+		_, err := tx.CreateBucket([]byte("widgets"))
+		return err
+	}); err != nil {
+		t.Fatal(err)
+	}
+	if err := db.View(func(tx *bolt.Tx) error {
+		c := tx.Bucket([]byte("widgets")).Cursor()
+		k, v := c.Last()
+		if k != nil {
+			t.Fatalf("unexpected key: %v", k)
+		} else if v != nil {
+			t.Fatalf("unexpected value: %v", v)
+		}
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+}
+
+// Ensure that a Tx cursor can iterate over a single root with a couple elements.
+func TestCursor_Iterate_Leaf(t *testing.T) {
+	db := btesting.MustCreateDB(t)
+
+	if err := db.Update(func(tx *bolt.Tx) error {
+		b, err := tx.CreateBucket([]byte("widgets"))
+		if err != nil {
+			t.Fatal(err)
+		}
+		if err := b.Put([]byte("baz"), []byte{}); err != nil {
+			t.Fatal(err)
+		}
+		if err := b.Put([]byte("foo"), []byte{0}); err != nil {
+			t.Fatal(err)
+		}
+		if err := b.Put([]byte("bar"), []byte{1}); err != nil {
+			t.Fatal(err)
+		}
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+	tx, err := db.Begin(false)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() { _ = tx.Rollback() }()
+
+	c := tx.Bucket([]byte("widgets")).Cursor()
+
+	k, v := c.First()
+	if !bytes.Equal(k, []byte("bar")) {
+		t.Fatalf("unexpected key: %v", k)
+	} else if !bytes.Equal(v, []byte{1}) {
+		t.Fatalf("unexpected value: %v", v)
+	}
+
+	k, v = c.Next()
+	if !bytes.Equal(k, []byte("baz")) {
+		t.Fatalf("unexpected key: %v", k)
+	} else if !bytes.Equal(v, []byte{}) {
+		t.Fatalf("unexpected value: %v", v)
+	}
+
+	k, v = c.Next()
+	if !bytes.Equal(k, []byte("foo")) {
+		t.Fatalf("unexpected key: %v", k)
+	} else if !bytes.Equal(v, []byte{0}) {
+		t.Fatalf("unexpected value: %v", v)
+	}
+
+	k, v = c.Next()
+	if k != nil {
+		t.Fatalf("expected nil key: %v", k)
+	} else if v != nil {
+		t.Fatalf("expected nil value: %v", v)
+	}
+
+	k, v = c.Next()
+	if k != nil {
+		t.Fatalf("expected nil key: %v", k)
+	} else if v != nil {
+		t.Fatalf("expected nil value: %v", v)
+	}
+
+	if err := tx.Rollback(); err != nil {
+		t.Fatal(err)
+	}
+}
+
+// Ensure that a Tx cursor can iterate in reverse over a single root with a couple elements.
+func TestCursor_LeafRootReverse(t *testing.T) {
+	db := btesting.MustCreateDB(t)
+
+	if err := db.Update(func(tx *bolt.Tx) error {
+		b, err := tx.CreateBucket([]byte("widgets"))
+		if err != nil {
+			t.Fatal(err)
+		}
+		if err := b.Put([]byte("baz"), []byte{}); err != nil {
+			t.Fatal(err)
+		}
+		if err := b.Put([]byte("foo"), []byte{0}); err != nil {
+			t.Fatal(err)
+		}
+		if err := b.Put([]byte("bar"), []byte{1}); err != nil {
+			t.Fatal(err)
+		}
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+	tx, err := db.Begin(false)
+	if err != nil {
+		t.Fatal(err)
+	}
+	c := tx.Bucket([]byte("widgets")).Cursor()
+
+	if k, v := c.Last(); !bytes.Equal(k, []byte("foo")) {
+		t.Fatalf("unexpected key: %v", k)
+	} else if !bytes.Equal(v, []byte{0}) {
+		t.Fatalf("unexpected value: %v", v)
+	}
+
+	if k, v := c.Prev(); !bytes.Equal(k, []byte("baz")) {
+		t.Fatalf("unexpected key: %v", k)
+	} else if !bytes.Equal(v, []byte{}) {
+		t.Fatalf("unexpected value: %v", v)
+	}
+
+	if k, v := c.Prev(); !bytes.Equal(k, []byte("bar")) {
+		t.Fatalf("unexpected key: %v", k)
+	} else if !bytes.Equal(v, []byte{1}) {
+		t.Fatalf("unexpected value: %v", v)
+	}
+
+	if k, v := c.Prev(); k != nil {
+		t.Fatalf("expected nil key: %v", k)
+	} else if v != nil {
+		t.Fatalf("expected nil value: %v", v)
+	}
+
+	if k, v := c.Prev(); k != nil {
+		t.Fatalf("expected nil key: %v", k)
+	} else if v != nil {
+		t.Fatalf("expected nil value: %v", v)
+	}
+
+	if err := tx.Rollback(); err != nil {
+		t.Fatal(err)
+	}
+}
+
+// Ensure that a Tx cursor can restart from the beginning.
+func TestCursor_Restart(t *testing.T) {
+	db := btesting.MustCreateDB(t)
+
+	if err := db.Update(func(tx *bolt.Tx) error {
+		b, err := tx.CreateBucket([]byte("widgets"))
+		if err != nil {
+			t.Fatal(err)
+		}
+		if err := b.Put([]byte("bar"), []byte{}); err != nil {
+			t.Fatal(err)
+		}
+		if err := b.Put([]byte("foo"), []byte{}); err != nil {
+			t.Fatal(err)
+		}
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+
+	tx, err := db.Begin(false)
+	if err != nil {
+		t.Fatal(err)
+	}
+	c := tx.Bucket([]byte("widgets")).Cursor()
+
+	if k, _ := c.First(); !bytes.Equal(k, []byte("bar")) {
+		t.Fatalf("unexpected key: %v", k)
+	}
+	if k, _ := c.Next(); !bytes.Equal(k, []byte("foo")) {
+		t.Fatalf("unexpected key: %v", k)
+	}
+
+	if k, _ := c.First(); !bytes.Equal(k, []byte("bar")) {
+		t.Fatalf("unexpected key: %v", k)
+	}
+	if k, _ := c.Next(); !bytes.Equal(k, []byte("foo")) {
+		t.Fatalf("unexpected key: %v", k)
+	}
+
+	if err := tx.Rollback(); err != nil {
+		t.Fatal(err)
+	}
+}
+
+// Ensure that a cursor can skip over empty pages that have been deleted.
+func TestCursor_First_EmptyPages(t *testing.T) {
+	db := btesting.MustCreateDB(t)
+
+	// Create 1000 keys in the "widgets" bucket.
+	if err := db.Update(func(tx *bolt.Tx) error {
+		b, err := tx.CreateBucket([]byte("widgets"))
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		for i := 0; i < 1000; i++ {
+			if err := b.Put(u64tob(uint64(i)), []byte{}); err != nil {
+				t.Fatal(err)
+			}
+		}
+
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+
+	// Delete half the keys and then try to iterate.
+	if err := db.Update(func(tx *bolt.Tx) error {
+		b := tx.Bucket([]byte("widgets"))
+		for i := 0; i < 600; i++ {
+			if err := b.Delete(u64tob(uint64(i))); err != nil {
+				t.Fatal(err)
+			}
+		}
+
+		c := b.Cursor()
+		var n int
+		for k, _ := c.First(); k != nil; k, _ = c.Next() {
+			n++
+		}
+		if n != 400 {
+			t.Fatalf("unexpected key count: %d", n)
+		}
+
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+}
+
+// Ensure that a cursor can skip over empty pages that have been deleted.
+func TestCursor_Last_EmptyPages(t *testing.T) {
+	db := btesting.MustCreateDB(t)
+
+	// Create 1000 keys in the "widgets" bucket.
+	if err := db.Update(func(tx *bolt.Tx) error {
+		b, err := tx.CreateBucket([]byte("widgets"))
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		for i := 0; i < 1000; i++ {
+			if err := b.Put(u64tob(uint64(i)), []byte{}); err != nil {
+				t.Fatal(err)
+			}
+		}
+
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+
+	// Delete last 800 elements to ensure last page is empty
+	if err := db.Update(func(tx *bolt.Tx) error {
+		b := tx.Bucket([]byte("widgets"))
+		for i := 200; i < 1000; i++ {
+			if err := b.Delete(u64tob(uint64(i))); err != nil {
+				t.Fatal(err)
+			}
+		}
+
+		c := b.Cursor()
+		var n int
+		for k, _ := c.Last(); k != nil; k, _ = c.Prev() {
+			n++
+		}
+		if n != 200 {
+			t.Fatalf("unexpected key count: %d", n)
+		}
+
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+}
+
+// Ensure that a Tx can iterate over all elements in a bucket.
+func TestCursor_QuickCheck(t *testing.T) {
+	f := func(items testdata) bool {
+		db := btesting.MustCreateDB(t)
+		defer db.MustClose()
+
+		// Bulk insert all values.
+		tx, err := db.Begin(true)
+		if err != nil {
+			t.Fatal(err)
+		}
+		b, err := tx.CreateBucket([]byte("widgets"))
+		if err != nil {
+			t.Fatal(err)
+		}
+		for _, item := range items {
+			if err := b.Put(item.Key, item.Value); err != nil {
+				t.Fatal(err)
+			}
+		}
+		if err := tx.Commit(); err != nil {
+			t.Fatal(err)
+		}
+
+		// Sort test data.
+		sort.Sort(items)
+
+		// Iterate over all items and check consistency.
+		var index = 0
+		tx, err = db.Begin(false)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		c := tx.Bucket([]byte("widgets")).Cursor()
+		for k, v := c.First(); k != nil && index < len(items); k, v = c.Next() {
+			if !bytes.Equal(k, items[index].Key) {
+				t.Fatalf("unexpected key: %v", k)
+			} else if !bytes.Equal(v, items[index].Value) {
+				t.Fatalf("unexpected value: %v", v)
+			}
+			index++
+		}
+		if len(items) != index {
+			t.Fatalf("unexpected item count: %v, expected %v", len(items), index)
+		}
+
+		if err := tx.Rollback(); err != nil {
+			t.Fatal(err)
+		}
+
+		return true
+	}
+	if err := quick.Check(f, qconfig()); err != nil {
+		t.Error(err)
+	}
+}
+
+// Ensure that a transaction can iterate over all elements in a bucket in reverse.
+func TestCursor_QuickCheck_Reverse(t *testing.T) {
+	f := func(items testdata) bool {
+		db := btesting.MustCreateDB(t)
+		defer db.MustClose()
+
+		// Bulk insert all values.
+		tx, err := db.Begin(true)
+		if err != nil {
+			t.Fatal(err)
+		}
+		b, err := tx.CreateBucket([]byte("widgets"))
+		if err != nil {
+			t.Fatal(err)
+		}
+		for _, item := range items {
+			if err := b.Put(item.Key, item.Value); err != nil {
+				t.Fatal(err)
+			}
+		}
+		if err := tx.Commit(); err != nil {
+			t.Fatal(err)
+		}
+
+		// Sort test data.
+		sort.Sort(revtestdata(items))
+
+		// Iterate over all items and check consistency.
+		var index = 0
+		tx, err = db.Begin(false)
+		if err != nil {
+			t.Fatal(err)
+		}
+		c := tx.Bucket([]byte("widgets")).Cursor()
+		for k, v := c.Last(); k != nil && index < len(items); k, v = c.Prev() {
+			if !bytes.Equal(k, items[index].Key) {
+				t.Fatalf("unexpected key: %v", k)
+			} else if !bytes.Equal(v, items[index].Value) {
+				t.Fatalf("unexpected value: %v", v)
+			}
+			index++
+		}
+		if len(items) != index {
+			t.Fatalf("unexpected item count: %v, expected %v", len(items), index)
+		}
+
+		if err := tx.Rollback(); err != nil {
+			t.Fatal(err)
+		}
+
+		return true
+	}
+	if err := quick.Check(f, qconfig()); err != nil {
+		t.Error(err)
+	}
+}
+
+// Ensure that a Tx cursor can iterate over subbuckets.
+func TestCursor_QuickCheck_BucketsOnly(t *testing.T) {
+	db := btesting.MustCreateDB(t)
+
+	if err := db.Update(func(tx *bolt.Tx) error {
+		b, err := tx.CreateBucket([]byte("widgets"))
+		if err != nil {
+			t.Fatal(err)
+		}
+		if _, err := b.CreateBucket([]byte("foo")); err != nil {
+			t.Fatal(err)
+		}
+		if _, err := b.CreateBucket([]byte("bar")); err != nil {
+			t.Fatal(err)
+		}
+		if _, err := b.CreateBucket([]byte("baz")); err != nil {
+			t.Fatal(err)
+		}
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := db.View(func(tx *bolt.Tx) error {
+		var names []string
+		c := tx.Bucket([]byte("widgets")).Cursor()
+		for k, v := c.First(); k != nil; k, v = c.Next() {
+			names = append(names, string(k))
+			if v != nil {
+				t.Fatalf("unexpected value: %v", v)
+			}
+		}
+		if !reflect.DeepEqual(names, []string{"bar", "baz", "foo"}) {
+			t.Fatalf("unexpected names: %+v", names)
+		}
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+}
+
+// Ensure that a Tx cursor can reverse iterate over subbuckets.
+func TestCursor_QuickCheck_BucketsOnly_Reverse(t *testing.T) {
+	db := btesting.MustCreateDB(t)
+
+	if err := db.Update(func(tx *bolt.Tx) error {
+		b, err := tx.CreateBucket([]byte("widgets"))
+		if err != nil {
+			t.Fatal(err)
+		}
+		if _, err := b.CreateBucket([]byte("foo")); err != nil {
+			t.Fatal(err)
+		}
+		if _, err := b.CreateBucket([]byte("bar")); err != nil {
+			t.Fatal(err)
+		}
+		if _, err := b.CreateBucket([]byte("baz")); err != nil {
+			t.Fatal(err)
+		}
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := db.View(func(tx *bolt.Tx) error {
+		var names []string
+		c := tx.Bucket([]byte("widgets")).Cursor()
+		for k, v := c.Last(); k != nil; k, v = c.Prev() {
+			names = append(names, string(k))
+			if v != nil {
+				t.Fatalf("unexpected value: %v", v)
+			}
+		}
+		if !reflect.DeepEqual(names, []string{"foo", "baz", "bar"}) {
+			t.Fatalf("unexpected names: %+v", names)
+		}
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func ExampleCursor() {
+	// Open the database.
+	db, err := bolt.Open(tempfile(), 0600, nil)
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer os.Remove(db.Path())
+
+	// Start a read-write transaction.
+	if err := db.Update(func(tx *bolt.Tx) error {
+		// Create a new bucket.
+		b, err := tx.CreateBucket([]byte("animals"))
+		if err != nil {
+			return err
+		}
+
+		// Insert data into a bucket.
+		if err := b.Put([]byte("dog"), []byte("fun")); err != nil {
+			log.Fatal(err)
+		}
+		if err := b.Put([]byte("cat"), []byte("lame")); err != nil {
+			log.Fatal(err)
+		}
+		if err := b.Put([]byte("liger"), []byte("awesome")); err != nil {
+			log.Fatal(err)
+		}
+
+		// Create a cursor for iteration.
+		c := b.Cursor()
+
+		// Iterate over items in sorted key order. This starts from the
+		// first key/value pair and updates the k/v variables to the
+		// next key/value on each iteration.
+		//
+		// The loop finishes at the end of the cursor when a nil key is returned.
+		for k, v := c.First(); k != nil; k, v = c.Next() {
+			fmt.Printf("A %s is %s.\n", k, v)
+		}
+
+		return nil
+	}); err != nil {
+		log.Fatal(err)
+	}
+
+	if err := db.Close(); err != nil {
+		log.Fatal(err)
+	}
+
+	// Output:
+	// A cat is lame.
+	// A dog is fun.
+	// A liger is awesome.
+}
+
+func ExampleCursor_reverse() {
+	// Open the database.
+	db, err := bolt.Open(tempfile(), 0600, nil)
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer os.Remove(db.Path())
+
+	// Start a read-write transaction.
+	if err := db.Update(func(tx *bolt.Tx) error {
+		// Create a new bucket.
+		b, err := tx.CreateBucket([]byte("animals"))
+		if err != nil {
+			return err
+		}
+
+		// Insert data into a bucket.
+		if err := b.Put([]byte("dog"), []byte("fun")); err != nil {
+			log.Fatal(err)
+		}
+		if err := b.Put([]byte("cat"), []byte("lame")); err != nil {
+			log.Fatal(err)
+		}
+		if err := b.Put([]byte("liger"), []byte("awesome")); err != nil {
+			log.Fatal(err)
+		}
+
+		// Create a cursor for iteration.
+		c := b.Cursor()
+
+		// Iterate over items in reverse sorted key order. This starts
+		// from the last key/value pair and updates the k/v variables to
+		// the previous key/value on each iteration.
+		//
+		// The loop finishes at the beginning of the cursor when a nil key
+		// is returned.
+		for k, v := c.Last(); k != nil; k, v = c.Prev() {
+			fmt.Printf("A %s is %s.\n", k, v)
+		}
+
+		return nil
+	}); err != nil {
+		log.Fatal(err)
+	}
+
+	// Close the database to release the file lock.
+	if err := db.Close(); err != nil {
+		log.Fatal(err)
+	}
+
+	// Output:
+	// A liger is awesome.
+	// A dog is fun.
+	// A cat is lame.
+}
--- a/db.go
+++ b/db.go
--- a/db_test.go
+++ b/db_test.go
--- a/db_whitebox_test.go
+++ b/db_whitebox_test.go
@ -0,0 +1,126 @@
+package bbolt
+
+import (
+	"path/filepath"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/tutus-one/tutus-bolt/errors"
+)
+
+func TestOpenWithPreLoadFreelist(t *testing.T) {
+	testCases := []struct {
+		name                    string
+		readonly                bool
+		preLoadFreePage         bool
+		expectedFreePagesLoaded bool
+	}{
+		{
+			name:                    "write mode always load free pages",
+			readonly:                false,
+			preLoadFreePage:         false,
+			expectedFreePagesLoaded: true,
+		},
+		{
+			name:                    "readonly mode load free pages when flag set",
+			readonly:                true,
+			preLoadFreePage:         true,
+			expectedFreePagesLoaded: true,
+		},
+		{
+			name:                    "readonly mode doesn't load free pages when flag not set",
+			readonly:                true,
+			preLoadFreePage:         false,
+			expectedFreePagesLoaded: false,
+		},
+	}
+
+	fileName, err := prepareData(t)
+	require.NoError(t, err)
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			db, err := Open(fileName, 0666, &Options{
+				ReadOnly:        tc.readonly,
+				PreLoadFreelist: tc.preLoadFreePage,
+			})
+			require.NoError(t, err)
+
+			assert.Equal(t, tc.expectedFreePagesLoaded, db.freelist != nil)
+
+			assert.NoError(t, db.Close())
+		})
+	}
+}
+
+func TestMethodPage(t *testing.T) {
+	testCases := []struct {
+		name            string
+		readonly        bool
+		preLoadFreePage bool
+		expectedError   error
+	}{
+		{
+			name:            "write mode",
+			readonly:        false,
+			preLoadFreePage: false,
+			expectedError:   nil,
+		},
+		{
+			name:            "readonly mode with preloading free pages",
+			readonly:        true,
+			preLoadFreePage: true,
+			expectedError:   nil,
+		},
+		{
+			name:            "readonly mode without preloading free pages",
+			readonly:        true,
+			preLoadFreePage: false,
+			expectedError:   errors.ErrFreePagesNotLoaded,
+		},
+	}
+
+	fileName, err := prepareData(t)
+	require.NoError(t, err)
+
+	for _, tc := range testCases {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			db, err := Open(fileName, 0666, &Options{
+				ReadOnly:        tc.readonly,
+				PreLoadFreelist: tc.preLoadFreePage,
+			})
+			require.NoError(t, err)
+			defer db.Close()
+
+			tx, err := db.Begin(!tc.readonly)
+			require.NoError(t, err)
+
+			_, err = tx.Page(0)
+			require.Equal(t, tc.expectedError, err)
+
+			if tc.readonly {
+				require.NoError(t, tx.Rollback())
+			} else {
+				require.NoError(t, tx.Commit())
+			}
+
+			require.NoError(t, db.Close())
+		})
+	}
+}
+
+func prepareData(t *testing.T) (string, error) {
+	fileName := filepath.Join(t.TempDir(), "db")
+	db, err := Open(fileName, 0666, nil)
+	if err != nil {
+		return "", err
+	}
+	if err := db.Close(); err != nil {
+		return "", err
+	}
+
+	return fileName, nil
+}
--- a/doc.go
+++ b/doc.go
@ -0,0 +1,40 @@
+/*
+package bbolt implements a low-level key/value store in pure Go. It supports
+fully serializable transactions, ACID semantics, and lock-free MVCC with
+multiple readers and a single writer. Bolt can be used for projects that
+want a simple data store without the need to add large dependencies such as
+Postgres or MySQL.
+
+Bolt is a single-level, zero-copy, B+tree data store. This means that Bolt is
+optimized for fast read access and does not require recovery in the event of a
+system crash. Transactions which have not finished committing will simply be
+rolled back in the event of a crash.
+
+The design of Bolt is based on Howard Chu's LMDB database project.
+
+Bolt currently works on Windows, Mac OS X, and Linux.
+
+# Basics
+
+There are only a few types in Bolt: DB, Bucket, Tx, and Cursor. The DB is
+a collection of buckets and is represented by a single file on disk. A bucket is
+a collection of unique keys that are associated with values.
+
+Transactions provide either read-only or read-write access to the database.
+Read-only transactions can retrieve key/value pairs and can use Cursors to
+iterate over the dataset sequentially. Read-write transactions can create and
+delete buckets and can insert and remove keys. Only one read-write transaction
+is allowed at a time.
+
+# Caveats
+
+The database uses a read-only, memory-mapped data file to ensure that
+applications cannot corrupt the database, however, this means that keys and
+values returned from Bolt cannot be changed. Writing to a read-only byte slice
+will cause Go to panic.
+
+Keys and values retrieved from the database are only valid for the life of
+the transaction. When used outside the transaction, these byte slices can
+point to different data or can point to invalid memory which will cause a panic.
+*/
+package bbolt
--- a/errors.go
+++ b/errors.go
@ -0,0 +1,108 @@
+package bbolt
+
+import "github.com/tutus-one/tutus-bolt/errors"
+
+// These errors can be returned when opening or calling methods on a DB.
+var (
+	// ErrDatabaseNotOpen is returned when a DB instance is accessed before it
+	// is opened or after it is closed.
+	//
+	// Deprecated: Use the error variables defined in the bbolt/errors package.
+	ErrDatabaseNotOpen = errors.ErrDatabaseNotOpen
+
+	// ErrInvalid is returned when both meta pages on a database are invalid.
+	// This typically occurs when a file is not a bolt database.
+	//
+	// Deprecated: Use the error variables defined in the bbolt/errors package.
+	ErrInvalid = errors.ErrInvalid
+
+	// ErrInvalidMapping is returned when the database file fails to get mapped.
+	//
+	// Deprecated: Use the error variables defined in the bbolt/errors package.
+	ErrInvalidMapping = errors.ErrInvalidMapping
+
+	// ErrVersionMismatch is returned when the data file was created with a
+	// different version of Bolt.
+	//
+	// Deprecated: Use the error variables defined in the bbolt/errors package.
+	ErrVersionMismatch = errors.ErrVersionMismatch
+
+	// ErrChecksum is returned when a checksum mismatch occurs on either of the two meta pages.
+	//
+	// Deprecated: Use the error variables defined in the bbolt/errors package.
+	ErrChecksum = errors.ErrChecksum
+
+	// ErrTimeout is returned when a database cannot obtain an exclusive lock
+	// on the data file after the timeout passed to Open().
+	//
+	// Deprecated: Use the error variables defined in the bbolt/errors package.
+	ErrTimeout = errors.ErrTimeout
+)
+
+// These errors can occur when beginning or committing a Tx.
+var (
+	// ErrTxNotWritable is returned when performing a write operation on a
+	// read-only transaction.
+	//
+	// Deprecated: Use the error variables defined in the bbolt/errors package.
+	ErrTxNotWritable = errors.ErrTxNotWritable
+
+	// ErrTxClosed is returned when committing or rolling back a transaction
+	// that has already been committed or rolled back.
+	//
+	// Deprecated: Use the error variables defined in the bbolt/errors package.
+	ErrTxClosed = errors.ErrTxClosed
+
+	// ErrDatabaseReadOnly is returned when a mutating transaction is started on a
+	// read-only database.
+	//
+	// Deprecated: Use the error variables defined in the bbolt/errors package.
+	ErrDatabaseReadOnly = errors.ErrDatabaseReadOnly
+
+	// ErrFreePagesNotLoaded is returned when a readonly transaction without
+	// preloading the free pages is trying to access the free pages.
+	//
+	// Deprecated: Use the error variables defined in the bbolt/errors package.
+	ErrFreePagesNotLoaded = errors.ErrFreePagesNotLoaded
+)
+
+// These errors can occur when putting or deleting a value or a bucket.
+var (
+	// ErrBucketNotFound is returned when trying to access a bucket that has
+	// not been created yet.
+	//
+	// Deprecated: Use the error variables defined in the bbolt/errors package.
+	ErrBucketNotFound = errors.ErrBucketNotFound
+
+	// ErrBucketExists is returned when creating a bucket that already exists.
+	//
+	// Deprecated: Use the error variables defined in the bbolt/errors package.
+	ErrBucketExists = errors.ErrBucketExists
+
+	// ErrBucketNameRequired is returned when creating a bucket with a blank name.
+	//
+	// Deprecated: Use the error variables defined in the bbolt/errors package.
+	ErrBucketNameRequired = errors.ErrBucketNameRequired
+
+	// ErrKeyRequired is returned when inserting a zero-length key.
+	//
+	// Deprecated: Use the error variables defined in the bbolt/errors package.
+	ErrKeyRequired = errors.ErrKeyRequired
+
+	// ErrKeyTooLarge is returned when inserting a key that is larger than MaxKeySize.
+	//
+	// Deprecated: Use the error variables defined in the bbolt/errors package.
+	ErrKeyTooLarge = errors.ErrKeyTooLarge
+
+	// ErrValueTooLarge is returned when inserting a value that is larger than MaxValueSize.
+	//
+	// Deprecated: Use the error variables defined in the bbolt/errors package.
+	ErrValueTooLarge = errors.ErrValueTooLarge
+
+	// ErrIncompatibleValue is returned when trying create or delete a bucket
+	// on an existing non-bucket key or when trying to create or delete a
+	// non-bucket key on an existing bucket key.
+	//
+	// Deprecated: Use the error variables defined in the bbolt/errors package.
+	ErrIncompatibleValue = errors.ErrIncompatibleValue
+)
--- a/errors/errors.go
+++ b/errors/errors.go
@ -0,0 +1,87 @@
+// Package errors defines the error variables that may be returned
+// during bbolt operations.
+package errors
+
+import "errors"
+
+// These errors can be returned when opening or calling methods on a DB.
+var (
+	// ErrDatabaseNotOpen is returned when a DB instance is accessed before it
+	// is opened or after it is closed.
+	ErrDatabaseNotOpen = errors.New("database not open")
+
+	// ErrInvalid is returned when both meta pages on a database are invalid.
+	// This typically occurs when a file is not a bolt database.
+	ErrInvalid = errors.New("invalid database")
+
+	// ErrInvalidMapping is returned when the database file fails to get mapped.
+	ErrInvalidMapping = errors.New("database isn't correctly mapped")
+
+	// ErrVersionMismatch is returned when the data file was created with a
+	// different version of Bolt.
+	ErrVersionMismatch = errors.New("version mismatch")
+
+	// ErrChecksum is returned when a checksum mismatch occurs on either of the two meta pages.
+	ErrChecksum = errors.New("checksum error")
+
+	// ErrTimeout is returned when a database cannot obtain an exclusive lock
+	// on the data file after the timeout passed to Open().
+	ErrTimeout = errors.New("timeout")
+)
+
+// These errors can occur when beginning or committing a Tx.
+var (
+	// ErrTxNotWritable is returned when performing a write operation on a
+	// read-only transaction.
+	ErrTxNotWritable = errors.New("tx not writable")
+
+	// ErrTxClosed is returned when committing or rolling back a transaction
+	// that has already been committed or rolled back.
+	ErrTxClosed = errors.New("tx closed")
+
+	// ErrDatabaseReadOnly is returned when a mutating transaction is started on a
+	// read-only database.
+	ErrDatabaseReadOnly = errors.New("database is in read-only mode")
+
+	// ErrFreePagesNotLoaded is returned when a readonly transaction without
+	// preloading the free pages is trying to access the free pages.
+	ErrFreePagesNotLoaded = errors.New("free pages are not pre-loaded")
+)
+
+// These errors can occur when putting or deleting a value or a bucket.
+var (
+	// ErrBucketNotFound is returned when trying to access a bucket that has
+	// not been created yet.
+	ErrBucketNotFound = errors.New("bucket not found")
+
+	// ErrBucketExists is returned when creating a bucket that already exists.
+	ErrBucketExists = errors.New("bucket already exists")
+
+	// ErrBucketNameRequired is returned when creating a bucket with a blank name.
+	ErrBucketNameRequired = errors.New("bucket name required")
+
+	// ErrKeyRequired is returned when inserting a zero-length key.
+	ErrKeyRequired = errors.New("key required")
+
+	// ErrKeyTooLarge is returned when inserting a key that is larger than MaxKeySize.
+	ErrKeyTooLarge = errors.New("key too large")
+
+	// ErrValueTooLarge is returned when inserting a value that is larger than MaxValueSize.
+	ErrValueTooLarge = errors.New("value too large")
+
+	// ErrMaxSizeReached is returned when the configured maximum size of the data file is reached.
+	ErrMaxSizeReached = errors.New("database reached maximum size")
+
+	// ErrIncompatibleValue is returned when trying to create or delete a bucket
+	// on an existing non-bucket key or when trying to create or delete a
+	// non-bucket key on an existing bucket key.
+	ErrIncompatibleValue = errors.New("incompatible value")
+
+	// ErrSameBuckets is returned when trying to move a sub-bucket between
+	// source and target buckets, while source and target buckets are the same.
+	ErrSameBuckets = errors.New("the source and target are the same bucket")
+
+	// ErrDifferentDB is returned when trying to move a sub-bucket between
+	// source and target buckets, while source and target buckets are in different database files.
+	ErrDifferentDB = errors.New("the source and target buckets are in different database files")
+)
--- a/go.mod
+++ b/go.mod
@ -0,0 +1,21 @@
+module github.com/tutus-one/tutus-bolt
+
+go 1.24
+
+toolchain go1.24.3
+
+require (
+	github.com/spf13/cobra v1.9.1
+	github.com/spf13/pflag v1.0.6
+	github.com/stretchr/testify v1.10.0
+	go.etcd.io/gofail v0.2.0
+	golang.org/x/sync v0.14.0
+	golang.org/x/sys v0.33.0
+)
+
+require (
+	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/inconshreveable/mousetrap v1.1.0 // indirect
+	github.com/pmezard/go-difflib v1.0.0 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
+)
--- a/go.sum
+++ b/go.sum
@ -0,0 +1,24 @@
+github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
+github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo=
+github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0=
+github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
+github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
+github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+go.etcd.io/gofail v0.2.0 h1:p19drv16FKK345a09a1iubchlw/vmRuksmRzgBIGjcA=
+go.etcd.io/gofail v0.2.0/go.mod h1:nL3ILMGfkXTekKI3clMBNazKnjUZjYLKmBHzsVAnC1o=
+golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ=
+golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
+golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
+golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/internal/btesting/btesting.go
+++ b/internal/btesting/btesting.go
@ -0,0 +1,230 @@
+package btesting
+
+import (
+	"flag"
+	"fmt"
+	"os"
+	"path/filepath"
+	"regexp"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
+
+	bolt "github.com/tutus-one/tutus-bolt"
+)
+
+var statsFlag = flag.Bool("stats", false, "show performance stats")
+
+const (
+	// TestFreelistType is used as an env variable for test to indicate the backend type.
+	TestFreelistType = "TEST_FREELIST_TYPE"
+	// TestEnableStrictMode is used to enable strict check by default after opening each DB.
+	TestEnableStrictMode = "TEST_ENABLE_STRICT_MODE"
+)
+
+// DB is a test wrapper for bolt.DB.
+type DB struct {
+	*bolt.DB
+	f string
+	o *bolt.Options
+	t testing.TB
+}
+
+// MustCreateDB returns a new, open DB at a temporary location.
+func MustCreateDB(t testing.TB) *DB {
+	return MustCreateDBWithOption(t, nil)
+}
+
+// MustCreateDBWithOption returns a new, open DB at a temporary location with given options.
+func MustCreateDBWithOption(t testing.TB, o *bolt.Options) *DB {
+	f := filepath.Join(t.TempDir(), "db")
+	return MustOpenDBWithOption(t, f, o)
+}
+
+func MustOpenDBWithOption(t testing.TB, f string, o *bolt.Options) *DB {
+	db, err := OpenDBWithOption(t, f, o)
+	require.NoError(t, err)
+	require.NotNil(t, db)
+	return db
+}
+
+func OpenDBWithOption(t testing.TB, f string, o *bolt.Options) (*DB, error) {
+	t.Logf("Opening bbolt DB at: %s", f)
+	if o == nil {
+		o = bolt.DefaultOptions
+	}
+
+	freelistType := bolt.FreelistArrayType
+	if env := os.Getenv(TestFreelistType); env == string(bolt.FreelistMapType) {
+		freelistType = bolt.FreelistMapType
+	}
+
+	o.FreelistType = freelistType
+
+	db, err := bolt.Open(f, 0600, o)
+	if err != nil {
+		return nil, err
+	}
+	resDB := &DB{
+		DB: db,
+		f:  f,
+		o:  o,
+		t:  t,
+	}
+	resDB.strictModeEnabledDefault()
+	t.Cleanup(resDB.PostTestCleanup)
+	return resDB, nil
+}
+
+func (db *DB) PostTestCleanup() {
+	// Check database consistency after every test.
+	if db.DB != nil {
+		db.MustCheck()
+		db.MustClose()
+	}
+}
+
+// Close closes the database but does NOT delete the underlying file.
+func (db *DB) Close() error {
+	if db.DB != nil {
+		// Log statistics.
+		if *statsFlag {
+			db.PrintStats()
+		}
+		db.t.Logf("Closing bbolt DB at: %s", db.f)
+		err := db.DB.Close()
+		if err != nil {
+			return err
+		}
+		db.DB = nil
+	}
+	return nil
+}
+
+// MustClose closes the database but does NOT delete the underlying file.
+func (db *DB) MustClose() {
+	err := db.Close()
+	require.NoError(db.t, err)
+}
+
+func (db *DB) MustDeleteFile() {
+	err := os.Remove(db.Path())
+	require.NoError(db.t, err)
+}
+
+func (db *DB) SetOptions(o *bolt.Options) {
+	db.o = o
+}
+
+// MustReopen reopen the database. Panic on error.
+func (db *DB) MustReopen() {
+	if db.DB != nil {
+		panic("Please call Close() before MustReopen()")
+	}
+	db.t.Logf("Reopening bbolt DB at: %s", db.f)
+	indb, err := bolt.Open(db.Path(), 0600, db.o)
+	require.NoError(db.t, err)
+	db.DB = indb
+	db.strictModeEnabledDefault()
+}
+
+// MustCheck runs a consistency check on the database and panics if any errors are found.
+func (db *DB) MustCheck() {
+	err := db.View(func(tx *bolt.Tx) error {
+		// Collect all the errors.
+		var errors []error
+		for err := range tx.Check() {
+			errors = append(errors, err)
+			if len(errors) > 10 {
+				break
+			}
+		}
+
+		// If errors occurred, copy the DB and print the errors.
+		if len(errors) > 0 {
+			var path = filepath.Join(db.t.TempDir(), "db.backup")
+			err := tx.CopyFile(path, 0600)
+			require.NoError(db.t, err)
+
+			// Print errors.
+			fmt.Print("\n\n")
+			fmt.Printf("consistency check failed (%d errors)\n", len(errors))
+			for _, err := range errors {
+				fmt.Println(err)
+			}
+			fmt.Println("")
+			fmt.Println("db saved to:")
+			fmt.Println(path)
+			fmt.Print("\n\n")
+			os.Exit(-1)
+		}
+
+		return nil
+	})
+	require.NoError(db.t, err)
+}
+
+// Fill - fills the DB using numTx transactions and numKeysPerTx.
+func (db *DB) Fill(bucket []byte, numTx int, numKeysPerTx int,
+	keyGen func(tx int, key int) []byte,
+	valueGen func(tx int, key int) []byte) error {
+	for tr := 0; tr < numTx; tr++ {
+		err := db.Update(func(tx *bolt.Tx) error {
+			b, _ := tx.CreateBucketIfNotExists(bucket)
+			for i := 0; i < numKeysPerTx; i++ {
+				if err := b.Put(keyGen(tr, i), valueGen(tr, i)); err != nil {
+					return err
+				}
+			}
+			return nil
+		})
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (db *DB) Path() string {
+	return db.f
+}
+
+// CopyTempFile copies a database to a temporary file.
+func (db *DB) CopyTempFile() {
+	path := filepath.Join(db.t.TempDir(), "db.copy")
+	err := db.View(func(tx *bolt.Tx) error {
+		return tx.CopyFile(path, 0600)
+	})
+	require.NoError(db.t, err)
+	fmt.Println("db copied to: ", path)
+}
+
+// PrintStats prints the database stats
+func (db *DB) PrintStats() {
+	var stats = db.Stats()
+	fmt.Printf("[db] %-20s %-20s %-20s\n",
+		fmt.Sprintf("pg(%d/%d)", stats.TxStats.GetPageCount(), stats.TxStats.GetPageAlloc()),
+		fmt.Sprintf("cur(%d)", stats.TxStats.GetCursorCount()),
+		fmt.Sprintf("node(%d/%d)", stats.TxStats.GetNodeCount(), stats.TxStats.GetNodeDeref()),
+	)
+	fmt.Printf("     %-20s %-20s %-20s\n",
+		fmt.Sprintf("rebal(%d/%v)", stats.TxStats.GetRebalance(), truncDuration(stats.TxStats.GetRebalanceTime())),
+		fmt.Sprintf("spill(%d/%v)", stats.TxStats.GetSpill(), truncDuration(stats.TxStats.GetSpillTime())),
+		fmt.Sprintf("w(%d/%v)", stats.TxStats.GetWrite(), truncDuration(stats.TxStats.GetWriteTime())),
+	)
+}
+
+func truncDuration(d time.Duration) string {
+	return regexp.MustCompile(`^(\d+)(\.\d+)`).ReplaceAllString(d.String(), "$1")
+}
+
+func (db *DB) strictModeEnabledDefault() {
+	strictModeEnabled := strings.ToLower(os.Getenv(TestEnableStrictMode))
+	db.StrictMode = strictModeEnabled == "true"
+}
+
+func (db *DB) ForceDisableStrictMode() {
+	db.StrictMode = false
+}
--- a/internal/common/bucket.go
+++ b/internal/common/bucket.go
@ -0,0 +1,54 @@
+package common
+
+import (
+	"fmt"
+	"unsafe"
+)
+
+const BucketHeaderSize = int(unsafe.Sizeof(InBucket{}))
+
+// InBucket represents the on-file representation of a bucket.
+// This is stored as the "value" of a bucket key. If the bucket is small enough,
+// then its root page can be stored inline in the "value", after the bucket
+// header. In the case of inline buckets, the "root" will be 0.
+type InBucket struct {
+	root     Pgid   // page id of the bucket's root-level page
+	sequence uint64 // monotonically incrementing, used by NextSequence()
+}
+
+func NewInBucket(root Pgid, seq uint64) InBucket {
+	return InBucket{
+		root:     root,
+		sequence: seq,
+	}
+}
+
+func (b *InBucket) RootPage() Pgid {
+	return b.root
+}
+
+func (b *InBucket) SetRootPage(id Pgid) {
+	b.root = id
+}
+
+// InSequence returns the sequence. The reason why not naming it `Sequence`
+// is to avoid duplicated name as `(*Bucket) Sequence()`
+func (b *InBucket) InSequence() uint64 {
+	return b.sequence
+}
+
+func (b *InBucket) SetInSequence(v uint64) {
+	b.sequence = v
+}
+
+func (b *InBucket) IncSequence() {
+	b.sequence++
+}
+
+func (b *InBucket) InlinePage(v []byte) *Page {
+	return (*Page)(unsafe.Pointer(&v[BucketHeaderSize]))
+}
+
+func (b *InBucket) String() string {
+	return fmt.Sprintf("<pgid=%d,seq=%d>", b.root, b.sequence)
+}
--- a/internal/common/inode.go
+++ b/internal/common/inode.go
@ -0,0 +1,115 @@
+package common
+
+import "unsafe"
+
+// Inode represents an internal node inside of a node.
+// It can be used to point to elements in a page or point
+// to an element which hasn't been added to a page yet.
+type Inode struct {
+	flags uint32
+	pgid  Pgid
+	key   []byte
+	value []byte
+}
+
+type Inodes []Inode
+
+func (in *Inode) Flags() uint32 {
+	return in.flags
+}
+
+func (in *Inode) SetFlags(flags uint32) {
+	in.flags = flags
+}
+
+func (in *Inode) Pgid() Pgid {
+	return in.pgid
+}
+
+func (in *Inode) SetPgid(id Pgid) {
+	in.pgid = id
+}
+
+func (in *Inode) Key() []byte {
+	return in.key
+}
+
+func (in *Inode) SetKey(key []byte) {
+	in.key = key
+}
+
+func (in *Inode) Value() []byte {
+	return in.value
+}
+
+func (in *Inode) SetValue(value []byte) {
+	in.value = value
+}
+
+func ReadInodeFromPage(p *Page) Inodes {
+	inodes := make(Inodes, int(p.Count()))
+	isLeaf := p.IsLeafPage()
+	for i := 0; i < int(p.Count()); i++ {
+		inode := &inodes[i]
+		if isLeaf {
+			elem := p.LeafPageElement(uint16(i))
+			inode.SetFlags(elem.Flags())
+			inode.SetKey(elem.Key())
+			inode.SetValue(elem.Value())
+		} else {
+			elem := p.BranchPageElement(uint16(i))
+			inode.SetPgid(elem.Pgid())
+			inode.SetKey(elem.Key())
+		}
+		Assert(len(inode.Key()) > 0, "read: zero-length inode key")
+	}
+
+	return inodes
+}
+
+func WriteInodeToPage(inodes Inodes, p *Page) uint32 {
+	// Loop over each item and write it to the page.
+	// off tracks the offset into p of the start of the next data.
+	off := unsafe.Sizeof(*p) + p.PageElementSize()*uintptr(len(inodes))
+	isLeaf := p.IsLeafPage()
+	for i, item := range inodes {
+		Assert(len(item.Key()) > 0, "write: zero-length inode key")
+
+		// Create a slice to write into of needed size and advance
+		// byte pointer for next iteration.
+		sz := len(item.Key()) + len(item.Value())
+		b := UnsafeByteSlice(unsafe.Pointer(p), off, 0, sz)
+		off += uintptr(sz)
+
+		// Write the page element.
+		if isLeaf {
+			elem := p.LeafPageElement(uint16(i))
+			elem.SetPos(uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem))))
+			elem.SetFlags(item.Flags())
+			elem.SetKsize(uint32(len(item.Key())))
+			elem.SetVsize(uint32(len(item.Value())))
+		} else {
+			elem := p.BranchPageElement(uint16(i))
+			elem.SetPos(uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem))))
+			elem.SetKsize(uint32(len(item.Key())))
+			elem.SetPgid(item.Pgid())
+			Assert(elem.Pgid() != p.Id(), "write: circular dependency occurred")
+		}
+
+		// Write data for the element to the end of the page.
+		l := copy(b, item.Key())
+		copy(b[l:], item.Value())
+	}
+
+	return uint32(off)
+}
+
+func UsedSpaceInPage(inodes Inodes, p *Page) uint32 {
+	off := unsafe.Sizeof(*p) + p.PageElementSize()*uintptr(len(inodes))
+	for _, item := range inodes {
+		sz := len(item.Key()) + len(item.Value())
+		off += uintptr(sz)
+	}
+
+	return uint32(off)
+}
--- a/internal/common/meta.go
+++ b/internal/common/meta.go
@ -0,0 +1,161 @@
+package common
+
+import (
+	"fmt"
+	"hash/fnv"
+	"io"
+	"unsafe"
+
+	"github.com/tutus-one/tutus-bolt/errors"
+)
+
+type Meta struct {
+	magic    uint32
+	version  uint32
+	pageSize uint32
+	flags    uint32
+	root     InBucket
+	freelist Pgid
+	pgid     Pgid
+	txid     Txid
+	checksum uint64
+}
+
+// Validate checks the marker bytes and version of the meta page to ensure it matches this binary.
+func (m *Meta) Validate() error {
+	if m.magic != Magic {
+		return errors.ErrInvalid
+	} else if m.version != Version {
+		return errors.ErrVersionMismatch
+	} else if m.checksum != m.Sum64() {
+		return errors.ErrChecksum
+	}
+	return nil
+}
+
+// Copy copies one meta object to another.
+func (m *Meta) Copy(dest *Meta) {
+	*dest = *m
+}
+
+// Write writes the meta onto a page.
+func (m *Meta) Write(p *Page) {
+	if m.root.root >= m.pgid {
+		panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid))
+	} else if m.freelist >= m.pgid && m.freelist != PgidNoFreelist {
+		// TODO: reject pgidNoFreeList if !NoFreelistSync
+		panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid))
+	}
+
+	// Page id is either going to be 0 or 1 which we can determine by the transaction ID.
+	p.id = Pgid(m.txid % 2)
+	p.SetFlags(MetaPageFlag)
+
+	// Calculate the checksum.
+	m.checksum = m.Sum64()
+
+	m.Copy(p.Meta())
+}
+
+// Sum64 generates the checksum for the meta.
+func (m *Meta) Sum64() uint64 {
+	var h = fnv.New64a()
+	_, _ = h.Write((*[unsafe.Offsetof(Meta{}.checksum)]byte)(unsafe.Pointer(m))[:])
+	return h.Sum64()
+}
+
+func (m *Meta) Magic() uint32 {
+	return m.magic
+}
+
+func (m *Meta) SetMagic(v uint32) {
+	m.magic = v
+}
+
+func (m *Meta) Version() uint32 {
+	return m.version
+}
+
+func (m *Meta) SetVersion(v uint32) {
+	m.version = v
+}
+
+func (m *Meta) PageSize() uint32 {
+	return m.pageSize
+}
+
+func (m *Meta) SetPageSize(v uint32) {
+	m.pageSize = v
+}
+
+func (m *Meta) Flags() uint32 {
+	return m.flags
+}
+
+func (m *Meta) SetFlags(v uint32) {
+	m.flags = v
+}
+
+func (m *Meta) SetRootBucket(b InBucket) {
+	m.root = b
+}
+
+func (m *Meta) RootBucket() *InBucket {
+	return &m.root
+}
+
+func (m *Meta) Freelist() Pgid {
+	return m.freelist
+}
+
+func (m *Meta) SetFreelist(v Pgid) {
+	m.freelist = v
+}
+
+func (m *Meta) IsFreelistPersisted() bool {
+	return m.freelist != PgidNoFreelist
+}
+
+func (m *Meta) Pgid() Pgid {
+	return m.pgid
+}
+
+func (m *Meta) SetPgid(id Pgid) {
+	m.pgid = id
+}
+
+func (m *Meta) Txid() Txid {
+	return m.txid
+}
+
+func (m *Meta) SetTxid(id Txid) {
+	m.txid = id
+}
+
+func (m *Meta) IncTxid() {
+	m.txid += 1
+}
+
+func (m *Meta) DecTxid() {
+	m.txid -= 1
+}
+
+func (m *Meta) Checksum() uint64 {
+	return m.checksum
+}
+
+func (m *Meta) SetChecksum(v uint64) {
+	m.checksum = v
+}
+
+func (m *Meta) Print(w io.Writer) {
+	fmt.Fprintf(w, "Version:    %d\n", m.version)
+	fmt.Fprintf(w, "Page Size:  %d bytes\n", m.pageSize)
+	fmt.Fprintf(w, "Flags:      %08x\n", m.flags)
+	fmt.Fprintf(w, "Root:       <pgid=%d>\n", m.root.root)
+	fmt.Fprintf(w, "Freelist:   <pgid=%d>\n", m.freelist)
+	fmt.Fprintf(w, "HWM:        <pgid=%d>\n", m.pgid)
+	fmt.Fprintf(w, "Txn ID:     %d\n", m.txid)
+	fmt.Fprintf(w, "Checksum:   %016x\n", m.checksum)
+	fmt.Fprintf(w, "\n")
+}
--- a/internal/common/page.go
+++ b/internal/common/page.go
@ -0,0 +1,391 @@
+package common
+
+import (
+	"fmt"
+	"os"
+	"sort"
+	"unsafe"
+)
+
+const PageHeaderSize = unsafe.Sizeof(Page{})
+
+const MinKeysPerPage = 2
+
+const BranchPageElementSize = unsafe.Sizeof(branchPageElement{})
+const LeafPageElementSize = unsafe.Sizeof(leafPageElement{})
+const pgidSize = unsafe.Sizeof(Pgid(0))
+
+const (
+	BranchPageFlag   = 0x01
+	LeafPageFlag     = 0x02
+	MetaPageFlag     = 0x04
+	FreelistPageFlag = 0x10
+)
+
+const (
+	BucketLeafFlag = 0x01
+)
+
+type Pgid uint64
+
+type Page struct {
+	id       Pgid
+	flags    uint16
+	count    uint16
+	overflow uint32
+}
+
+func NewPage(id Pgid, flags, count uint16, overflow uint32) *Page {
+	return &Page{
+		id:       id,
+		flags:    flags,
+		count:    count,
+		overflow: overflow,
+	}
+}
+
+// Typ returns a human-readable page type string used for debugging.
+func (p *Page) Typ() string {
+	if p.IsBranchPage() {
+		return "branch"
+	} else if p.IsLeafPage() {
+		return "leaf"
+	} else if p.IsMetaPage() {
+		return "meta"
+	} else if p.IsFreelistPage() {
+		return "freelist"
+	}
+	return fmt.Sprintf("unknown<%02x>", p.flags)
+}
+
+func (p *Page) IsBranchPage() bool {
+	return p.flags == BranchPageFlag
+}
+
+func (p *Page) IsLeafPage() bool {
+	return p.flags == LeafPageFlag
+}
+
+func (p *Page) IsMetaPage() bool {
+	return p.flags == MetaPageFlag
+}
+
+func (p *Page) IsFreelistPage() bool {
+	return p.flags == FreelistPageFlag
+}
+
+// Meta returns a pointer to the metadata section of the page.
+func (p *Page) Meta() *Meta {
+	return (*Meta)(UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)))
+}
+
+func (p *Page) FastCheck(id Pgid) {
+	Assert(p.id == id, "Page expected to be: %v, but self identifies as %v", id, p.id)
+	// Only one flag of page-type can be set.
+	Assert(p.IsBranchPage() ||
+		p.IsLeafPage() ||
+		p.IsMetaPage() ||
+		p.IsFreelistPage(),
+		"page %v: has unexpected type/flags: %x", p.id, p.flags)
+}
+
+// LeafPageElement retrieves the leaf node by index
+func (p *Page) LeafPageElement(index uint16) *leafPageElement {
+	return (*leafPageElement)(UnsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p),
+		LeafPageElementSize, int(index)))
+}
+
+// LeafPageElements retrieves a list of leaf nodes.
+func (p *Page) LeafPageElements() []leafPageElement {
+	if p.count == 0 {
+		return nil
+	}
+	data := UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
+	elems := unsafe.Slice((*leafPageElement)(data), int(p.count))
+	return elems
+}
+
+// BranchPageElement retrieves the branch node by index
+func (p *Page) BranchPageElement(index uint16) *branchPageElement {
+	return (*branchPageElement)(UnsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p),
+		unsafe.Sizeof(branchPageElement{}), int(index)))
+}
+
+// BranchPageElements retrieves a list of branch nodes.
+func (p *Page) BranchPageElements() []branchPageElement {
+	if p.count == 0 {
+		return nil
+	}
+	data := UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
+	elems := unsafe.Slice((*branchPageElement)(data), int(p.count))
+	return elems
+}
+
+func (p *Page) FreelistPageCount() (int, int) {
+	Assert(p.IsFreelistPage(), fmt.Sprintf("can't get freelist page count from a non-freelist page: %2x", p.flags))
+
+	// If the page.count is at the max uint16 value (64k) then it's considered
+	// an overflow and the size of the freelist is stored as the first element.
+	var idx, count = 0, int(p.count)
+	if count == 0xFFFF {
+		idx = 1
+		c := *(*Pgid)(UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)))
+		count = int(c)
+		if count < 0 {
+			panic(fmt.Sprintf("leading element count %d overflows int", c))
+		}
+	}
+
+	return idx, count
+}
+
+func (p *Page) FreelistPageIds() []Pgid {
+	Assert(p.IsFreelistPage(), fmt.Sprintf("can't get freelist page IDs from a non-freelist page: %2x", p.flags))
+
+	idx, count := p.FreelistPageCount()
+
+	if count == 0 {
+		return nil
+	}
+
+	data := UnsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p), pgidSize, idx)
+	ids := unsafe.Slice((*Pgid)(data), count)
+
+	return ids
+}
+
+// dump writes n bytes of the page to STDERR as hex output.
+func (p *Page) hexdump(n int) {
+	buf := UnsafeByteSlice(unsafe.Pointer(p), 0, 0, n)
+	fmt.Fprintf(os.Stderr, "%x\n", buf)
+}
+
+func (p *Page) PageElementSize() uintptr {
+	if p.IsLeafPage() {
+		return LeafPageElementSize
+	}
+	return BranchPageElementSize
+}
+
+func (p *Page) Id() Pgid {
+	return p.id
+}
+
+func (p *Page) SetId(target Pgid) {
+	p.id = target
+}
+
+func (p *Page) Flags() uint16 {
+	return p.flags
+}
+
+func (p *Page) SetFlags(v uint16) {
+	p.flags = v
+}
+
+func (p *Page) Count() uint16 {
+	return p.count
+}
+
+func (p *Page) SetCount(target uint16) {
+	p.count = target
+}
+
+func (p *Page) Overflow() uint32 {
+	return p.overflow
+}
+
+func (p *Page) SetOverflow(target uint32) {
+	p.overflow = target
+}
+
+func (p *Page) String() string {
+	return fmt.Sprintf("ID: %d, Type: %s, count: %d, overflow: %d", p.id, p.Typ(), p.count, p.overflow)
+}
+
+type Pages []*Page
+
+func (s Pages) Len() int           { return len(s) }
+func (s Pages) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
+func (s Pages) Less(i, j int) bool { return s[i].id < s[j].id }
+
+// branchPageElement represents a node on a branch page.
+type branchPageElement struct {
+	pos   uint32
+	ksize uint32
+	pgid  Pgid
+}
+
+func (n *branchPageElement) Pos() uint32 {
+	return n.pos
+}
+
+func (n *branchPageElement) SetPos(v uint32) {
+	n.pos = v
+}
+
+func (n *branchPageElement) Ksize() uint32 {
+	return n.ksize
+}
+
+func (n *branchPageElement) SetKsize(v uint32) {
+	n.ksize = v
+}
+
+func (n *branchPageElement) Pgid() Pgid {
+	return n.pgid
+}
+
+func (n *branchPageElement) SetPgid(v Pgid) {
+	n.pgid = v
+}
+
+// Key returns a byte slice of the node key.
+func (n *branchPageElement) Key() []byte {
+	return UnsafeByteSlice(unsafe.Pointer(n), 0, int(n.pos), int(n.pos)+int(n.ksize))
+}
+
+// leafPageElement represents a node on a leaf page.
+type leafPageElement struct {
+	flags uint32
+	pos   uint32
+	ksize uint32
+	vsize uint32
+}
+
+func NewLeafPageElement(flags, pos, ksize, vsize uint32) *leafPageElement {
+	return &leafPageElement{
+		flags: flags,
+		pos:   pos,
+		ksize: ksize,
+		vsize: vsize,
+	}
+}
+
+func (n *leafPageElement) Flags() uint32 {
+	return n.flags
+}
+
+func (n *leafPageElement) SetFlags(v uint32) {
+	n.flags = v
+}
+
+func (n *leafPageElement) Pos() uint32 {
+	return n.pos
+}
+
+func (n *leafPageElement) SetPos(v uint32) {
+	n.pos = v
+}
+
+func (n *leafPageElement) Ksize() uint32 {
+	return n.ksize
+}
+
+func (n *leafPageElement) SetKsize(v uint32) {
+	n.ksize = v
+}
+
+func (n *leafPageElement) Vsize() uint32 {
+	return n.vsize
+}
+
+func (n *leafPageElement) SetVsize(v uint32) {
+	n.vsize = v
+}
+
+// Key returns a byte slice of the node key.
+func (n *leafPageElement) Key() []byte {
+	i := int(n.pos)
+	j := i + int(n.ksize)
+	return UnsafeByteSlice(unsafe.Pointer(n), 0, i, j)
+}
+
+// Value returns a byte slice of the node value.
+func (n *leafPageElement) Value() []byte {
+	i := int(n.pos) + int(n.ksize)
+	j := i + int(n.vsize)
+	return UnsafeByteSlice(unsafe.Pointer(n), 0, i, j)
+}
+
+func (n *leafPageElement) IsBucketEntry() bool {
+	return n.flags&uint32(BucketLeafFlag) != 0
+}
+
+func (n *leafPageElement) Bucket() *InBucket {
+	if n.IsBucketEntry() {
+		return LoadBucket(n.Value())
+	} else {
+		return nil
+	}
+}
+
+// PageInfo represents human readable information about a page.
+type PageInfo struct {
+	ID            int
+	Type          string
+	Count         int
+	OverflowCount int
+}
+
+type Pgids []Pgid
+
+func (s Pgids) Len() int           { return len(s) }
+func (s Pgids) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
+func (s Pgids) Less(i, j int) bool { return s[i] < s[j] }
+
+// Merge returns the sorted union of a and b.
+func (s Pgids) Merge(b Pgids) Pgids {
+	// Return the opposite slice if one is nil.
+	if len(s) == 0 {
+		return b
+	}
+	if len(b) == 0 {
+		return s
+	}
+	merged := make(Pgids, len(s)+len(b))
+	Mergepgids(merged, s, b)
+	return merged
+}
+
+// Mergepgids copies the sorted union of a and b into dst.
+// If dst is too small, it panics.
+func Mergepgids(dst, a, b Pgids) {
+	if len(dst) < len(a)+len(b) {
+		panic(fmt.Errorf("mergepgids bad len %d < %d + %d", len(dst), len(a), len(b)))
+	}
+	// Copy in the opposite slice if one is nil.
+	if len(a) == 0 {
+		copy(dst, b)
+		return
+	}
+	if len(b) == 0 {
+		copy(dst, a)
+		return
+	}
+
+	// Merged will hold all elements from both lists.
+	merged := dst[:0]
+
+	// Assign lead to the slice with a lower starting value, follow to the higher value.
+	lead, follow := a, b
+	if b[0] < a[0] {
+		lead, follow = b, a
+	}
+
+	// Continue while there are elements in the lead.
+	for len(lead) > 0 {
+		// Merge largest prefix of lead that is ahead of follow[0].
+		n := sort.Search(len(lead), func(i int) bool { return lead[i] > follow[0] })
+		merged = append(merged, lead[:n]...)
+		if n >= len(lead) {
+			break
+		}
+
+		// Swap lead and follow.
+		lead, follow = follow, lead[n:]
+	}
+
+	// Append what's left in follow.
+	_ = append(merged, follow...)
+}
--- a/internal/common/page_test.go
+++ b/internal/common/page_test.go
@ -0,0 +1,72 @@
+package common
+
+import (
+	"reflect"
+	"sort"
+	"testing"
+	"testing/quick"
+)
+
+// Ensure that the page type can be returned in human readable format.
+func TestPage_typ(t *testing.T) {
+	if typ := (&Page{flags: BranchPageFlag}).Typ(); typ != "branch" {
+		t.Fatalf("exp=branch; got=%v", typ)
+	}
+	if typ := (&Page{flags: LeafPageFlag}).Typ(); typ != "leaf" {
+		t.Fatalf("exp=leaf; got=%v", typ)
+	}
+	if typ := (&Page{flags: MetaPageFlag}).Typ(); typ != "meta" {
+		t.Fatalf("exp=meta; got=%v", typ)
+	}
+	if typ := (&Page{flags: FreelistPageFlag}).Typ(); typ != "freelist" {
+		t.Fatalf("exp=freelist; got=%v", typ)
+	}
+	if typ := (&Page{flags: 20000}).Typ(); typ != "unknown<4e20>" {
+		t.Fatalf("exp=unknown<4e20>; got=%v", typ)
+	}
+}
+
+// Ensure that the hexdump debugging function doesn't blow up.
+func TestPage_dump(t *testing.T) {
+	(&Page{id: 256}).hexdump(16)
+}
+
+func TestPgids_merge(t *testing.T) {
+	a := Pgids{4, 5, 6, 10, 11, 12, 13, 27}
+	b := Pgids{1, 3, 8, 9, 25, 30}
+	c := a.Merge(b)
+	if !reflect.DeepEqual(c, Pgids{1, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30}) {
+		t.Errorf("mismatch: %v", c)
+	}
+
+	a = Pgids{4, 5, 6, 10, 11, 12, 13, 27, 35, 36}
+	b = Pgids{8, 9, 25, 30}
+	c = a.Merge(b)
+	if !reflect.DeepEqual(c, Pgids{4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30, 35, 36}) {
+		t.Errorf("mismatch: %v", c)
+	}
+}
+
+func TestPgids_merge_quick(t *testing.T) {
+	if err := quick.Check(func(a, b Pgids) bool {
+		// Sort incoming lists.
+		sort.Sort(a)
+		sort.Sort(b)
+
+		// Merge the two lists together.
+		got := a.Merge(b)
+
+		// The expected value should be the two lists combined and sorted.
+		exp := append(a, b...)
+		sort.Sort(exp)
+
+		if !reflect.DeepEqual(exp, got) {
+			t.Errorf("\nexp=%+v\ngot=%+v\n", exp, got)
+			return false
+		}
+
+		return true
+	}, nil); err != nil {
+		t.Fatal(err)
+	}
+}
--- a/internal/common/types.go
+++ b/internal/common/types.go
@ -0,0 +1,40 @@
+package common
+
+import (
+	"os"
+	"runtime"
+	"time"
+)
+
+// MaxMmapStep is the largest step that can be taken when remapping the mmap.
+const MaxMmapStep = 1 << 30 // 1GB
+
+// Version represents the data file format version.
+const Version uint32 = 2
+
+// Magic represents a marker value to indicate that a file is a Bolt DB.
+const Magic uint32 = 0xED0CDAED
+
+const PgidNoFreelist Pgid = 0xffffffffffffffff
+
+// DO NOT EDIT. Copied from the "bolt" package.
+const pageMaxAllocSize = 0xFFFFFFF
+
+// IgnoreNoSync specifies whether the NoSync field of a DB is ignored when
+// syncing changes to a file.  This is required as some operating systems,
+// such as OpenBSD, do not have a unified buffer cache (UBC) and writes
+// must be synchronized using the msync(2) syscall.
+const IgnoreNoSync = runtime.GOOS == "openbsd"
+
+// Default values if not set in a DB instance.
+const (
+	DefaultMaxBatchSize  int = 1000
+	DefaultMaxBatchDelay     = 10 * time.Millisecond
+	DefaultAllocSize         = 16 * 1024 * 1024
+)
+
+// DefaultPageSize is the default page size for db which is set to the OS page size.
+var DefaultPageSize = os.Getpagesize()
+
+// Txid represents the internal transaction identifier.
+type Txid uint64
--- a/internal/common/unsafe.go
+++ b/internal/common/unsafe.go
@ -0,0 +1,27 @@
+package common
+
+import (
+	"unsafe"
+)
+
+func UnsafeAdd(base unsafe.Pointer, offset uintptr) unsafe.Pointer {
+	return unsafe.Pointer(uintptr(base) + offset)
+}
+
+func UnsafeIndex(base unsafe.Pointer, offset uintptr, elemsz uintptr, n int) unsafe.Pointer {
+	return unsafe.Pointer(uintptr(base) + offset + uintptr(n)*elemsz)
+}
+
+func UnsafeByteSlice(base unsafe.Pointer, offset uintptr, i, j int) []byte {
+	// See: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices
+	//
+	// This memory is not allocated from C, but it is unmanaged by Go's
+	// garbage collector and should behave similarly, and the compiler
+	// should produce similar code.  Note that this conversion allows a
+	// subslice to begin after the base address, with an optional offset,
+	// while the URL above does not cover this case and only slices from
+	// index 0.  However, the wiki never says that the address must be to
+	// the beginning of a C allocation (or even that malloc was used at
+	// all), so this is believed to be correct.
+	return (*[pageMaxAllocSize]byte)(UnsafeAdd(base, offset))[i:j:j]
+}
--- a/internal/common/utils.go
+++ b/internal/common/utils.go
@ -0,0 +1,64 @@
+package common
+
+import (
+	"fmt"
+	"io"
+	"os"
+	"unsafe"
+)
+
+func LoadBucket(buf []byte) *InBucket {
+	return (*InBucket)(unsafe.Pointer(&buf[0]))
+}
+
+func LoadPage(buf []byte) *Page {
+	return (*Page)(unsafe.Pointer(&buf[0]))
+}
+
+func LoadPageMeta(buf []byte) *Meta {
+	return (*Meta)(unsafe.Pointer(&buf[PageHeaderSize]))
+}
+
+func CopyFile(srcPath, dstPath string) error {
+	// Ensure source file exists.
+	_, err := os.Stat(srcPath)
+	if os.IsNotExist(err) {
+		return fmt.Errorf("source file %q not found", srcPath)
+	} else if err != nil {
+		return err
+	}
+
+	// Ensure output file not exist.
+	_, err = os.Stat(dstPath)
+	if err == nil {
+		return fmt.Errorf("output file %q already exists", dstPath)
+	} else if !os.IsNotExist(err) {
+		return err
+	}
+
+	srcDB, err := os.Open(srcPath)
+	if err != nil {
+		return fmt.Errorf("failed to open source file %q: %w", srcPath, err)
+	}
+	defer srcDB.Close()
+	dstDB, err := os.Create(dstPath)
+	if err != nil {
+		return fmt.Errorf("failed to create output file %q: %w", dstPath, err)
+	}
+	defer dstDB.Close()
+	written, err := io.Copy(dstDB, srcDB)
+	if err != nil {
+		return fmt.Errorf("failed to copy database file from %q to %q: %w", srcPath, dstPath, err)
+	}
+
+	srcFi, err := srcDB.Stat()
+	if err != nil {
+		return fmt.Errorf("failed to get source file info %q: %w", srcPath, err)
+	}
+	initialSize := srcFi.Size()
+	if initialSize != written {
+		return fmt.Errorf("the byte copied (%q: %d) isn't equal to the initial db size (%q: %d)", dstPath, written, srcPath, initialSize)
+	}
+
+	return nil
+}
--- a/internal/common/verify.go
+++ b/internal/common/verify.go
@ -0,0 +1,67 @@
+// Copied from https://github.com/etcd-io/etcd/blob/main/client/pkg/verify/verify.go
+package common
+
+import (
+	"fmt"
+	"os"
+	"strings"
+)
+
+const ENV_VERIFY = "BBOLT_VERIFY"
+
+type VerificationType string
+
+const (
+	ENV_VERIFY_VALUE_ALL    VerificationType = "all"
+	ENV_VERIFY_VALUE_ASSERT VerificationType = "assert"
+)
+
+func getEnvVerify() string {
+	return strings.ToLower(os.Getenv(ENV_VERIFY))
+}
+
+func IsVerificationEnabled(verification VerificationType) bool {
+	env := getEnvVerify()
+	return env == string(ENV_VERIFY_VALUE_ALL) || env == strings.ToLower(string(verification))
+}
+
+// EnableVerifications sets `ENV_VERIFY` and returns a function that
+// can be used to bring the original settings.
+func EnableVerifications(verification VerificationType) func() {
+	previousEnv := getEnvVerify()
+	os.Setenv(ENV_VERIFY, string(verification))
+	return func() {
+		os.Setenv(ENV_VERIFY, previousEnv)
+	}
+}
+
+// EnableAllVerifications enables verification and returns a function
+// that can be used to bring the original settings.
+func EnableAllVerifications() func() {
+	return EnableVerifications(ENV_VERIFY_VALUE_ALL)
+}
+
+// DisableVerifications unsets `ENV_VERIFY` and returns a function that
+// can be used to bring the original settings.
+func DisableVerifications() func() {
+	previousEnv := getEnvVerify()
+	os.Unsetenv(ENV_VERIFY)
+	return func() {
+		os.Setenv(ENV_VERIFY, previousEnv)
+	}
+}
+
+// Verify performs verification if the assertions are enabled.
+// In the default setup running in tests and skipped in the production code.
+func Verify(f func()) {
+	if IsVerificationEnabled(ENV_VERIFY_VALUE_ASSERT) {
+		f()
+	}
+}
+
+// Assert will panic with a given formatted message if the given condition is false.
+func Assert(condition bool, msg string, v ...any) {
+	if !condition {
+		panic(fmt.Sprintf("assertion failed: "+msg, v...))
+	}
+}
--- a/internal/freelist/array.go
+++ b/internal/freelist/array.go
@ -0,0 +1,108 @@
+package freelist
+
+import (
+	"fmt"
+	"sort"
+
+	"github.com/tutus-one/tutus-bolt/internal/common"
+)
+
+type array struct {
+	*shared
+
+	ids []common.Pgid // all free and available free page ids.
+}
+
+func (f *array) Init(ids common.Pgids) {
+	f.ids = ids
+	f.reindex()
+}
+
+func (f *array) Allocate(txid common.Txid, n int) common.Pgid {
+	if len(f.ids) == 0 {
+		return 0
+	}
+
+	var initial, previd common.Pgid
+	for i, id := range f.ids {
+		if id <= 1 {
+			panic(fmt.Sprintf("invalid page allocation: %d", id))
+		}
+
+		// Reset initial page if this is not contiguous.
+		if previd == 0 || id-previd != 1 {
+			initial = id
+		}
+
+		// If we found a contiguous block then remove it and return it.
+		if (id-initial)+1 == common.Pgid(n) {
+			// If we're allocating off the beginning then take the fast path
+			// and just adjust the existing slice. This will use extra memory
+			// temporarily but the append() in free() will realloc the slice
+			// as is necessary.
+			if (i + 1) == n {
+				f.ids = f.ids[i+1:]
+			} else {
+				copy(f.ids[i-n+1:], f.ids[i+1:])
+				f.ids = f.ids[:len(f.ids)-n]
+			}
+
+			// Remove from the free cache.
+			for i := common.Pgid(0); i < common.Pgid(n); i++ {
+				delete(f.cache, initial+i)
+			}
+			f.allocs[initial] = txid
+			return initial
+		}
+
+		previd = id
+	}
+	return 0
+}
+
+func (f *array) FreeCount() int {
+	return len(f.ids)
+}
+
+func (f *array) freePageIds() common.Pgids {
+	return f.ids
+}
+
+func (f *array) mergeSpans(ids common.Pgids) {
+	sort.Sort(ids)
+	common.Verify(func() {
+		idsIdx := make(map[common.Pgid]struct{})
+		for _, id := range f.ids {
+			// The existing f.ids shouldn't have duplicated free ID.
+			if _, ok := idsIdx[id]; ok {
+				panic(fmt.Sprintf("detected duplicated free page ID: %d in existing f.ids: %v", id, f.ids))
+			}
+			idsIdx[id] = struct{}{}
+		}
+
+		prev := common.Pgid(0)
+		for _, id := range ids {
+			// The ids shouldn't have duplicated free ID. Note page 0 and 1
+			// are reserved for meta pages, so they can never be free page IDs.
+			if prev == id {
+				panic(fmt.Sprintf("detected duplicated free ID: %d in ids: %v", id, ids))
+			}
+			prev = id
+
+			// The ids shouldn't have any overlap with the existing f.ids.
+			if _, ok := idsIdx[id]; ok {
+				panic(fmt.Sprintf("detected overlapped free page ID: %d between ids: %v and existing f.ids: %v", id, ids, f.ids))
+			}
+		}
+	})
+	f.ids = common.Pgids(f.ids).Merge(ids)
+}
+
+func NewArrayFreelist() Interface {
+	a := &array{
+		shared: newShared(),
+		ids:    []common.Pgid{},
+	}
+	a.Interface = a
+	return a
+}
--- a/internal/freelist/array_test.go
+++ b/internal/freelist/array_test.go
@ -0,0 +1,91 @@
+package freelist
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/tutus-one/tutus-bolt/internal/common"
+)
+
+// Ensure that a freelist can find contiguous blocks of pages.
+func TestFreelistArray_allocate(t *testing.T) {
+	f := NewArrayFreelist()
+	ids := []common.Pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}
+	f.Init(ids)
+	if id := int(f.Allocate(1, 3)); id != 3 {
+		t.Fatalf("exp=3; got=%v", id)
+	}
+	if id := int(f.Allocate(1, 1)); id != 6 {
+		t.Fatalf("exp=6; got=%v", id)
+	}
+	if id := int(f.Allocate(1, 3)); id != 0 {
+		t.Fatalf("exp=0; got=%v", id)
+	}
+	if id := int(f.Allocate(1, 2)); id != 12 {
+		t.Fatalf("exp=12; got=%v", id)
+	}
+	if id := int(f.Allocate(1, 1)); id != 7 {
+		t.Fatalf("exp=7; got=%v", id)
+	}
+	if id := int(f.Allocate(1, 0)); id != 0 {
+		t.Fatalf("exp=0; got=%v", id)
+	}
+	if id := int(f.Allocate(1, 0)); id != 0 {
+		t.Fatalf("exp=0; got=%v", id)
+	}
+	if exp := common.Pgids([]common.Pgid{9, 18}); !reflect.DeepEqual(exp, f.freePageIds()) {
+		t.Fatalf("exp=%v; got=%v", exp, f.freePageIds())
+	}
+
+	if id := int(f.Allocate(1, 1)); id != 9 {
+		t.Fatalf("exp=9; got=%v", id)
+	}
+	if id := int(f.Allocate(1, 1)); id != 18 {
+		t.Fatalf("exp=18; got=%v", id)
+	}
+	if id := int(f.Allocate(1, 1)); id != 0 {
+		t.Fatalf("exp=0; got=%v", id)
+	}
+	if exp := common.Pgids([]common.Pgid{}); !reflect.DeepEqual(exp, f.freePageIds()) {
+		t.Fatalf("exp=%v; got=%v", exp, f.freePageIds())
+	}
+}
+
+func TestInvalidArrayAllocation(t *testing.T) {
+	f := NewArrayFreelist()
+	// page 0 and 1 are reserved for meta pages, so they should never be free pages.
+	ids := []common.Pgid{1}
+	f.Init(ids)
+	require.Panics(t, func() {
+		f.Allocate(common.Txid(1), 1)
+	})
+}
+
+func Test_Freelist_Array_Rollback(t *testing.T) {
+	f := newTestArrayFreelist()
+
+	f.Init([]common.Pgid{3, 5, 6, 7, 12, 13})
+
+	f.Free(100, common.NewPage(20, 0, 0, 1))
+	f.Allocate(100, 3)
+	f.Free(100, common.NewPage(25, 0, 0, 0))
+	f.Allocate(100, 2)
+
+	require.Equal(t, map[common.Pgid]common.Txid{5: 100, 12: 100}, f.allocs)
+	require.Equal(t, map[common.Txid]*txPending{100: {
+		ids:     []common.Pgid{20, 21, 25},
+		alloctx: []common.Txid{0, 0, 0},
+	}}, f.pending)
+
+	f.Rollback(100)
+
+	require.Equal(t, map[common.Pgid]common.Txid{}, f.allocs)
+	require.Equal(t, map[common.Txid]*txPending{}, f.pending)
+}
+
+func newTestArrayFreelist() *array {
+	f := NewArrayFreelist()
+	return f.(*array)
+}
--- a/internal/freelist/freelist.go
+++ b/internal/freelist/freelist.go
@ -0,0 +1,82 @@
+package freelist
+
+import (
+	"github.com/tutus-one/tutus-bolt/internal/common"
+)
+
+type ReadWriter interface {
+	// Read calls Init with the page ids stored in the given page.
+	Read(page *common.Page)
+
+	// Write writes the freelist into the given page.
+	Write(page *common.Page)
+
+	// EstimatedWritePageSize returns the size in bytes of the freelist after serialization in Write.
+	// This should never underestimate the size.
+	EstimatedWritePageSize() int
+}
+
+type Interface interface {
+	ReadWriter
+
+	// Init initializes this freelist with the given list of pages.
+	Init(ids common.Pgids)
+
+	// Allocate tries to allocate the given number of contiguous pages
+	// from the free list pages. It returns the starting page ID if
+	// available; otherwise, it returns 0.
+	Allocate(txid common.Txid, numPages int) common.Pgid
+
+	// Count returns the number of free and pending pages.
+	Count() int
+
+	// FreeCount returns the number of free pages.
+	FreeCount() int
+
+	// PendingCount returns the number of pending pages.
+	PendingCount() int
+
+	// AddReadonlyTXID adds a given read-only transaction id for pending page tracking.
+	AddReadonlyTXID(txid common.Txid)
+
+	// RemoveReadonlyTXID removes a given read-only transaction id for pending page tracking.
+	RemoveReadonlyTXID(txid common.Txid)
+
+	// ReleasePendingPages releases any pages associated with closed read-only transactions.
+	ReleasePendingPages()
+
+	// Free releases a page and its overflow for a given transaction id.
+	// If the page is already free or is one of the meta pages, then a panic will occur.
+	Free(txId common.Txid, p *common.Page)
+
+	// Freed returns whether a given page is in the free list.
+	Freed(pgId common.Pgid) bool
+
+	// Rollback removes the pages from a given pending tx.
+	Rollback(txId common.Txid)
+
+	// Copyall copies a list of all free ids and all pending ids in one sorted list.
+	// f.count returns the minimum length required for dst.
+	Copyall(dst []common.Pgid)
+
+	// Reload reads the freelist from a page and filters out pending items.
+	Reload(p *common.Page)
+
+	// NoSyncReload reads the freelist from Pgids and filters out pending items.
+	NoSyncReload(pgIds common.Pgids)
+
+	// freePageIds returns the IDs of all free pages. Returns an empty slice if no free pages are available.
+	freePageIds() common.Pgids
+
+	// pendingPageIds returns all pending pages by transaction id.
+	pendingPageIds() map[common.Txid]*txPending
+
+	// release moves all page ids for a transaction id (or older) to the freelist.
+	release(txId common.Txid)
+
+	// releaseRange moves pending pages allocated within an extent [begin,end] to the free list.
+	releaseRange(begin, end common.Txid)
+
+	// mergeSpans is merging the given pages into the freelist
+	mergeSpans(ids common.Pgids)
+}
--- a/internal/freelist/freelist_test.go
+++ b/internal/freelist/freelist_test.go
@ -0,0 +1,622 @@
+package freelist
+
+import (
+	"fmt"
+	"math"
+	"math/rand"
+	"os"
+	"reflect"
+	"slices"
+	"sort"
+	"testing"
+	"testing/quick"
+	"unsafe"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/tutus-one/tutus-bolt/internal/common"
+)
+
+// TestFreelistType is used as a env variable for test to indicate the backend type
+const TestFreelistType = "TEST_FREELIST_TYPE"
+
+// Ensure that a page is added to a transaction's freelist.
+func TestFreelist_free(t *testing.T) {
+	f := newTestFreelist()
+	f.Free(100, common.NewPage(12, 0, 0, 0))
+	if !reflect.DeepEqual([]common.Pgid{12}, f.pendingPageIds()[100].ids) {
+		t.Fatalf("exp=%v; got=%v", []common.Pgid{12}, f.pendingPageIds()[100].ids)
+	}
+}
+
+// Ensure that a page and its overflow is added to a transaction's freelist.
+func TestFreelist_free_overflow(t *testing.T) {
+	f := newTestFreelist()
+	f.Free(100, common.NewPage(12, 0, 0, 3))
+	if exp := []common.Pgid{12, 13, 14, 15}; !reflect.DeepEqual(exp, f.pendingPageIds()[100].ids) {
+		t.Fatalf("exp=%v; got=%v", exp, f.pendingPageIds()[100].ids)
+	}
+}
+
+// Ensure that double freeing a page is causing a panic
+func TestFreelist_free_double_free_panics(t *testing.T) {
+	f := newTestFreelist()
+	f.Free(100, common.NewPage(12, 0, 0, 3))
+	require.Panics(t, func() {
+		f.Free(100, common.NewPage(12, 0, 0, 3))
+	})
+}
+
+// Ensure that attempting to free the meta page panics
+func TestFreelist_free_meta_panics(t *testing.T) {
+	f := newTestFreelist()
+	require.Panics(t, func() {
+		f.Free(100, common.NewPage(0, 0, 0, 0))
+	})
+	require.Panics(t, func() {
+		f.Free(100, common.NewPage(1, 0, 0, 0))
+	})
+}
+
+func TestFreelist_free_freelist(t *testing.T) {
+	f := newTestFreelist()
+	f.Free(100, common.NewPage(12, common.FreelistPageFlag, 0, 0))
+	pp := f.pendingPageIds()[100]
+	require.Equal(t, []common.Pgid{12}, pp.ids)
+	require.Equal(t, []common.Txid{0}, pp.alloctx)
+}
+
+func TestFreelist_free_freelist_alloctx(t *testing.T) {
+	f := newTestFreelist()
+	f.Free(100, common.NewPage(12, common.FreelistPageFlag, 0, 0))
+	f.Rollback(100)
+	require.Empty(t, f.freePageIds())
+	require.Empty(t, f.pendingPageIds())
+	require.False(t, f.Freed(12))
+
+	f.Free(101, common.NewPage(12, common.FreelistPageFlag, 0, 0))
+	require.True(t, f.Freed(12))
+	if exp := []common.Pgid{12}; !reflect.DeepEqual(exp, f.pendingPageIds()[101].ids) {
+		t.Fatalf("exp=%v; got=%v", exp, f.pendingPageIds()[101].ids)
+	}
+	f.ReleasePendingPages()
+	require.True(t, f.Freed(12))
+	require.Empty(t, f.pendingPageIds())
+	if exp := common.Pgids([]common.Pgid{12}); !reflect.DeepEqual(exp, f.freePageIds()) {
+		t.Fatalf("exp=%v; got=%v", exp, f.freePageIds())
+	}
+}
+
+// Ensure that a transaction's free pages can be released.
+func TestFreelist_release(t *testing.T) {
+	f := newTestFreelist()
+	f.Free(100, common.NewPage(12, 0, 0, 1))
+	f.Free(100, common.NewPage(9, 0, 0, 0))
+	f.Free(102, common.NewPage(39, 0, 0, 0))
+	f.release(100)
+	f.release(101)
+	if exp := common.Pgids([]common.Pgid{9, 12, 13}); !reflect.DeepEqual(exp, f.freePageIds()) {
+		t.Fatalf("exp=%v; got=%v", exp, f.freePageIds())
+	}
+
+	f.release(102)
+	if exp := common.Pgids([]common.Pgid{9, 12, 13, 39}); !reflect.DeepEqual(exp, f.freePageIds()) {
+		t.Fatalf("exp=%v; got=%v", exp, f.freePageIds())
+	}
+}
+
+// Ensure that releaseRange handles boundary conditions correctly
+func TestFreelist_releaseRange(t *testing.T) {
+	type testRange struct {
+		begin, end common.Txid
+	}
+
+	type testPage struct {
+		id       common.Pgid
+		n        int
+		allocTxn common.Txid
+		freeTxn  common.Txid
+	}
+
+	var releaseRangeTests = []struct {
+		title         string
+		pagesIn       []testPage
+		releaseRanges []testRange
+		wantFree      []common.Pgid
+	}{
+		{
+			title:         "Single pending in range",
+			pagesIn:       []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}},
+			releaseRanges: []testRange{{1, 300}},
+			wantFree:      []common.Pgid{3},
+		},
+		{
+			title:         "Single pending with minimum end range",
+			pagesIn:       []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}},
+			releaseRanges: []testRange{{1, 200}},
+			wantFree:      []common.Pgid{3},
+		},
+		{
+			title:         "Single pending outsize minimum end range",
+			pagesIn:       []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}},
+			releaseRanges: []testRange{{1, 199}},
+			wantFree:      []common.Pgid{},
+		},
+		{
+			title:         "Single pending with minimum begin range",
+			pagesIn:       []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}},
+			releaseRanges: []testRange{{100, 300}},
+			wantFree:      []common.Pgid{3},
+		},
+		{
+			title:         "Single pending outside minimum begin range",
+			pagesIn:       []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}},
+			releaseRanges: []testRange{{101, 300}},
+			wantFree:      []common.Pgid{},
+		},
+		{
+			title:         "Single pending in minimum range",
+			pagesIn:       []testPage{{id: 3, n: 1, allocTxn: 199, freeTxn: 200}},
+			releaseRanges: []testRange{{199, 200}},
+			wantFree:      []common.Pgid{3},
+		},
+		{
+			title:         "Single pending and read transaction at 199",
+			pagesIn:       []testPage{{id: 3, n: 1, allocTxn: 199, freeTxn: 200}},
+			releaseRanges: []testRange{{100, 198}, {200, 300}},
+			wantFree:      []common.Pgid{},
+		},
+		{
+			title: "Adjacent pending and read transactions at 199, 200",
+			pagesIn: []testPage{
+				{id: 3, n: 1, allocTxn: 199, freeTxn: 200},
+				{id: 4, n: 1, allocTxn: 200, freeTxn: 201},
+			},
+			releaseRanges: []testRange{
+				{100, 198},
+				{200, 199}, // Simulate the ranges db.freePages might produce.
+				{201, 300},
+			},
+			wantFree: []common.Pgid{},
+		},
+		{
+			title: "Out of order ranges",
+			pagesIn: []testPage{
+				{id: 3, n: 1, allocTxn: 199, freeTxn: 200},
+				{id: 4, n: 1, allocTxn: 200, freeTxn: 201},
+			},
+			releaseRanges: []testRange{
+				{201, 199},
+				{201, 200},
+				{200, 200},
+			},
+			wantFree: []common.Pgid{},
+		},
+		{
+			title: "Multiple pending, read transaction at 150",
+			pagesIn: []testPage{
+				{id: 3, n: 1, allocTxn: 100, freeTxn: 200},
+				{id: 4, n: 1, allocTxn: 100, freeTxn: 125},
+				{id: 5, n: 1, allocTxn: 125, freeTxn: 150},
+				{id: 6, n: 1, allocTxn: 125, freeTxn: 175},
+				{id: 7, n: 2, allocTxn: 150, freeTxn: 175},
+				{id: 9, n: 2, allocTxn: 175, freeTxn: 200},
+			},
+			releaseRanges: []testRange{{50, 149}, {151, 300}},
+			wantFree:      []common.Pgid{4, 9, 10},
+		},
+	}
+
+	for _, c := range releaseRangeTests {
+		t.Run(c.title, func(t *testing.T) {
+			f := newTestFreelist()
+			var ids []common.Pgid
+			for _, p := range c.pagesIn {
+				for i := uint64(0); i < uint64(p.n); i++ {
+					ids = append(ids, common.Pgid(uint64(p.id)+i))
+				}
+			}
+			f.Init(ids)
+			for _, p := range c.pagesIn {
+				f.Allocate(p.allocTxn, p.n)
+			}
+
+			for _, p := range c.pagesIn {
+				f.Free(p.freeTxn, common.NewPage(p.id, 0, 0, uint32(p.n-1)))
+			}
+
+			for _, r := range c.releaseRanges {
+				f.releaseRange(r.begin, r.end)
+			}
+
+			require.Equal(t, common.Pgids(c.wantFree), f.freePageIds())
+		})
+	}
+}
+
+func TestFreeList_init(t *testing.T) {
+	buf := make([]byte, 4096)
+	f := newTestFreelist()
+	f.Init(common.Pgids{5, 6, 8})
+
+	p := common.LoadPage(buf)
+	f.Write(p)
+
+	f2 := newTestFreelist()
+	f2.Read(p)
+	require.Equal(t, common.Pgids{5, 6, 8}, f2.freePageIds())
+
+	// When initializing the freelist with an empty list of page ID,
+	// it should reset the freelist page IDs.
+	f2.Init([]common.Pgid{})
+	require.Equal(t, common.Pgids{}, f2.freePageIds())
+}
+
+func TestFreeList_reload(t *testing.T) {
+	buf := make([]byte, 4096)
+	f := newTestFreelist()
+	f.Init(common.Pgids{5, 6, 8})
+
+	p := common.LoadPage(buf)
+	f.Write(p)
+
+	f2 := newTestFreelist()
+	f2.Read(p)
+	require.Equal(t, common.Pgids{5, 6, 8}, f2.freePageIds())
+
+	f2.Free(common.Txid(5), common.NewPage(10, common.LeafPageFlag, 0, 2))
+
+	// reload shouldn't affect the pending list
+	f2.Reload(p)
+
+	require.Equal(t, common.Pgids{5, 6, 8}, f2.freePageIds())
+	require.Equal(t, []common.Pgid{10, 11, 12}, f2.pendingPageIds()[5].ids)
+}
+
+// Ensure that the txIDx swap, less and len are properly implemented
+func TestTxidSorting(t *testing.T) {
+	require.NoError(t, quick.Check(func(a []uint64) bool {
+		var txids []common.Txid
+		for _, txid := range a {
+			txids = append(txids, common.Txid(txid))
+		}
+
+		sort.Sort(txIDx(txids))
+
+		var r []uint64
+		for _, txid := range txids {
+			r = append(r, uint64(txid))
+		}
+
+		if !slices.IsSorted(r) {
+			t.Errorf("txids were not sorted correctly=%v", txids)
+			return false
+		}
+
+		return true
+	}, nil))
+}
+
+// Ensure that a freelist can deserialize from a freelist page.
+func TestFreelist_read(t *testing.T) {
+	// Create a page.
+	var buf [4096]byte
+	page := (*common.Page)(unsafe.Pointer(&buf[0]))
+	page.SetFlags(common.FreelistPageFlag)
+	page.SetCount(2)
+
+	// Insert 2 page ids.
+	ids := (*[3]common.Pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(page)) + unsafe.Sizeof(*page)))
+	ids[0] = 23
+	ids[1] = 50
+
+	// Deserialize page into a freelist.
+	f := newTestFreelist()
+	f.Read(page)
+
+	// Ensure that there are two page ids in the freelist.
+	if exp := common.Pgids([]common.Pgid{23, 50}); !reflect.DeepEqual(exp, f.freePageIds()) {
+		t.Fatalf("exp=%v; got=%v", exp, f.freePageIds())
+	}
+}
+
+// Ensure that we never read a non-freelist page
+func TestFreelist_read_panics(t *testing.T) {
+	buf := make([]byte, 4096)
+	page := common.LoadPage(buf)
+	page.SetFlags(common.BranchPageFlag)
+	page.SetCount(2)
+	f := newTestFreelist()
+	require.Panics(t, func() {
+		f.Read(page)
+	})
+}
+
+// Ensure that a freelist can serialize into a freelist page.
+func TestFreelist_write(t *testing.T) {
+	// Create a freelist and write it to a page.
+	var buf [4096]byte
+	f := newTestFreelist()
+
+	f.Init([]common.Pgid{12, 39})
+	f.pendingPageIds()[100] = &txPending{ids: []common.Pgid{28, 11}}
+	f.pendingPageIds()[101] = &txPending{ids: []common.Pgid{3}}
+	p := (*common.Page)(unsafe.Pointer(&buf[0]))
+	f.Write(p)
+
+	// Read the page back out.
+	f2 := newTestFreelist()
+	f2.Read(p)
+
+	// Ensure that the freelist is correct.
+	// All pages should be present and in reverse order.
+	if exp := common.Pgids([]common.Pgid{3, 11, 12, 28, 39}); !reflect.DeepEqual(exp, f2.freePageIds()) {
+		t.Fatalf("exp=%v; got=%v", exp, f2.freePageIds())
+	}
+}
+
+func TestFreelist_E2E_HappyPath(t *testing.T) {
+	f := newTestFreelist()
+	f.Init([]common.Pgid{})
+	requirePages(t, f, common.Pgids{}, common.Pgids{})
+
+	allocated := f.Allocate(common.Txid(1), 5)
+	require.Equal(t, common.Pgid(0), allocated)
+	// tx.go may now allocate more space, and eventually we need to delete a page again
+	f.Free(common.Txid(2), common.NewPage(5, common.LeafPageFlag, 0, 0))
+	f.Free(common.Txid(2), common.NewPage(3, common.LeafPageFlag, 0, 0))
+	f.Free(common.Txid(2), common.NewPage(8, common.LeafPageFlag, 0, 0))
+	// the above will only mark the pages as pending, so free pages should not return anything
+	requirePages(t, f, common.Pgids{}, common.Pgids{3, 5, 8})
+
+	// someone wants to do a read on top of the next tx id
+	f.AddReadonlyTXID(common.Txid(3))
+	// this should free the above pages for tx 2 entirely
+	f.ReleasePendingPages()
+	requirePages(t, f, common.Pgids{3, 5, 8}, common.Pgids{})
+
+	// no span of two pages available should yield a zero-page result
+	require.Equal(t, common.Pgid(0), f.Allocate(common.Txid(4), 2))
+	// we should be able to allocate those pages independently however,
+	// map and array differ in the order they return the pages
+	expectedPgids := map[common.Pgid]struct{}{3: {}, 5: {}, 8: {}}
+	for i := 0; i < 3; i++ {
+		allocated = f.Allocate(common.Txid(4), 1)
+		require.Contains(t, expectedPgids, allocated, "expected to find pgid %d", allocated)
+		require.False(t, f.Freed(allocated))
+		delete(expectedPgids, allocated)
+	}
+	require.Emptyf(t, expectedPgids, "unexpectedly more than one page was still found")
+	// no more free pages to allocate
+	require.Equal(t, common.Pgid(0), f.Allocate(common.Txid(4), 1))
+}
+
+func TestFreelist_E2E_MultiSpanOverflows(t *testing.T) {
+	f := newTestFreelist()
+	f.Init([]common.Pgid{})
+	f.Free(common.Txid(10), common.NewPage(20, common.LeafPageFlag, 0, 1))
+	f.Free(common.Txid(10), common.NewPage(25, common.LeafPageFlag, 0, 2))
+	f.Free(common.Txid(10), common.NewPage(35, common.LeafPageFlag, 0, 3))
+	f.Free(common.Txid(10), common.NewPage(39, common.LeafPageFlag, 0, 2))
+	f.Free(common.Txid(10), common.NewPage(45, common.LeafPageFlag, 0, 4))
+	requirePages(t, f, common.Pgids{}, common.Pgids{20, 21, 25, 26, 27, 35, 36, 37, 38, 39, 40, 41, 45, 46, 47, 48, 49})
+	f.ReleasePendingPages()
+	requirePages(t, f, common.Pgids{20, 21, 25, 26, 27, 35, 36, 37, 38, 39, 40, 41, 45, 46, 47, 48, 49}, common.Pgids{})
+
+	// that sequence, regardless of implementation, should always yield the same blocks of pages
+	allocSequence := []int{7, 5, 3, 2}
+	expectedSpanStarts := []common.Pgid{35, 45, 25, 20}
+	for i, pageNums := range allocSequence {
+		allocated := f.Allocate(common.Txid(11), pageNums)
+		require.Equal(t, expectedSpanStarts[i], allocated)
+		// ensure all pages in that span are not considered free anymore
+		for i := 0; i < pageNums; i++ {
+			require.False(t, f.Freed(allocated+common.Pgid(i)))
+		}
+	}
+}
+
+func TestFreelist_E2E_Rollbacks(t *testing.T) {
+	freelist := newTestFreelist()
+	freelist.Init([]common.Pgid{})
+	freelist.Free(common.Txid(2), common.NewPage(5, common.LeafPageFlag, 0, 1))
+	freelist.Free(common.Txid(2), common.NewPage(8, common.LeafPageFlag, 0, 0))
+	requirePages(t, freelist, common.Pgids{}, common.Pgids{5, 6, 8})
+	freelist.Rollback(common.Txid(2))
+	requirePages(t, freelist, common.Pgids{}, common.Pgids{})
+
+	// unknown transaction should not trigger anything
+	freelist.Free(common.Txid(4), common.NewPage(13, common.LeafPageFlag, 0, 3))
+	requirePages(t, freelist, common.Pgids{}, common.Pgids{13, 14, 15, 16})
+	freelist.ReleasePendingPages()
+	requirePages(t, freelist, common.Pgids{13, 14, 15, 16}, common.Pgids{})
+	freelist.Rollback(common.Txid(1337))
+	requirePages(t, freelist, common.Pgids{13, 14, 15, 16}, common.Pgids{})
+}
+
+func TestFreelist_E2E_RollbackPanics(t *testing.T) {
+	freelist := newTestFreelist()
+	freelist.Init([]common.Pgid{5})
+	requirePages(t, freelist, common.Pgids{5}, common.Pgids{})
+
+	_ = freelist.Allocate(common.Txid(5), 1)
+	require.Panics(t, func() {
+		// depending on the verification level, either should panic
+		freelist.Free(common.Txid(5), common.NewPage(5, common.LeafPageFlag, 0, 0))
+		freelist.Rollback(5)
+	})
+}
+
+// tests the reloading from another physical page
+func TestFreelist_E2E_Reload(t *testing.T) {
+	freelist := newTestFreelist()
+	freelist.Init([]common.Pgid{})
+	freelist.Free(common.Txid(2), common.NewPage(5, common.LeafPageFlag, 0, 1))
+	freelist.Free(common.Txid(2), common.NewPage(8, common.LeafPageFlag, 0, 0))
+	freelist.ReleasePendingPages()
+	requirePages(t, freelist, common.Pgids{5, 6, 8}, common.Pgids{})
+	buf := make([]byte, 4096)
+	p := common.LoadPage(buf)
+	freelist.Write(p)
+
+	freelist.Free(common.Txid(3), common.NewPage(3, common.LeafPageFlag, 0, 1))
+	freelist.Free(common.Txid(3), common.NewPage(10, common.LeafPageFlag, 0, 2))
+	requirePages(t, freelist, common.Pgids{5, 6, 8}, common.Pgids{3, 4, 10, 11, 12})
+
+	otherBuf := make([]byte, 4096)
+	px := common.LoadPage(otherBuf)
+	freelist.Write(px)
+
+	loadFreeList := newTestFreelist()
+	loadFreeList.Init([]common.Pgid{})
+	loadFreeList.Read(px)
+	requirePages(t, loadFreeList, common.Pgids{3, 4, 5, 6, 8, 10, 11, 12}, common.Pgids{})
+	// restore the original freelist again
+	loadFreeList.Reload(p)
+	requirePages(t, loadFreeList, common.Pgids{5, 6, 8}, common.Pgids{})
+
+	// reload another page with different free pages to test we are deduplicating the free pages with the pending ones correctly
+	freelist = newTestFreelist()
+	freelist.Init([]common.Pgid{})
+	freelist.Free(common.Txid(5), common.NewPage(5, common.LeafPageFlag, 0, 4))
+	freelist.Reload(p)
+	requirePages(t, freelist, common.Pgids{}, common.Pgids{5, 6, 7, 8, 9})
+}
+
+// tests the loading and reloading from physical pages
+func TestFreelist_E2E_SerDe_HappyPath(t *testing.T) {
+	freelist := newTestFreelist()
+	freelist.Init([]common.Pgid{})
+	freelist.Free(common.Txid(2), common.NewPage(5, common.LeafPageFlag, 0, 1))
+	freelist.Free(common.Txid(2), common.NewPage(8, common.LeafPageFlag, 0, 0))
+	freelist.ReleasePendingPages()
+	requirePages(t, freelist, common.Pgids{5, 6, 8}, common.Pgids{})
+
+	freelist.Free(common.Txid(3), common.NewPage(3, common.LeafPageFlag, 0, 1))
+	freelist.Free(common.Txid(3), common.NewPage(10, common.LeafPageFlag, 0, 2))
+	requirePages(t, freelist, common.Pgids{5, 6, 8}, common.Pgids{3, 4, 10, 11, 12})
+
+	buf := make([]byte, 4096)
+	p := common.LoadPage(buf)
+	require.Equal(t, 80, freelist.EstimatedWritePageSize())
+	freelist.Write(p)
+
+	loadFreeList := newTestFreelist()
+	loadFreeList.Init([]common.Pgid{})
+	loadFreeList.Read(p)
+	requirePages(t, loadFreeList, common.Pgids{3, 4, 5, 6, 8, 10, 11, 12}, common.Pgids{})
+}
+
+// tests the loading of a freelist against other implementations with various sizes
+func TestFreelist_E2E_SerDe_AcrossImplementations(t *testing.T) {
+	testSizes := []int{0, 1, 10, 100, 1000, math.MaxUint16, math.MaxUint16 + 1, math.MaxUint16 * 2}
+	for _, size := range testSizes {
+		t.Run(fmt.Sprintf("n=%d", size), func(t *testing.T) {
+			freelist := newTestFreelist()
+			expectedFreePgids := common.Pgids{}
+			for i := 0; i < size; i++ {
+				pgid := common.Pgid(i + 2)
+				freelist.Free(common.Txid(1), common.NewPage(pgid, common.LeafPageFlag, 0, 0))
+				expectedFreePgids = append(expectedFreePgids, pgid)
+			}
+			freelist.ReleasePendingPages()
+			requirePages(t, freelist, expectedFreePgids, common.Pgids{})
+			buf := make([]byte, freelist.EstimatedWritePageSize())
+			p := common.LoadPage(buf)
+			freelist.Write(p)
+
+			for n, loadFreeList := range map[string]Interface{
+				"hashmap": NewHashMapFreelist(),
+				"array":   NewArrayFreelist(),
+			} {
+				t.Run(n, func(t *testing.T) {
+					loadFreeList.Read(p)
+					requirePages(t, loadFreeList, expectedFreePgids, common.Pgids{})
+				})
+			}
+		})
+	}
+}
+
+func requirePages(t *testing.T, f Interface, freePageIds common.Pgids, pendingPageIds common.Pgids) {
+	require.Equal(t, f.FreeCount()+f.PendingCount(), f.Count())
+	require.Equalf(t, freePageIds, f.freePageIds(), "unexpected free pages")
+	require.Equal(t, len(freePageIds), f.FreeCount())
+
+	pp := allPendingPages(f.pendingPageIds())
+	require.Equalf(t, pendingPageIds, pp, "unexpected pending pages")
+	require.Equal(t, len(pp), f.PendingCount())
+
+	for _, pgid := range f.freePageIds() {
+		require.Truef(t, f.Freed(pgid), "expected free page to return true on Freed")
+	}
+
+	for _, pgid := range pp {
+		require.Truef(t, f.Freed(pgid), "expected pending page to return true on Freed")
+	}
+}
+
+func allPendingPages(p map[common.Txid]*txPending) common.Pgids {
+	pgids := common.Pgids{}
+	for _, pending := range p {
+		pgids = append(pgids, pending.ids...)
+	}
+	sort.Sort(pgids)
+	return pgids
+}
+
+func Benchmark_FreelistRelease10K(b *testing.B)    { benchmark_FreelistRelease(b, 10000) }
+func Benchmark_FreelistRelease100K(b *testing.B)   { benchmark_FreelistRelease(b, 100000) }
+func Benchmark_FreelistRelease1000K(b *testing.B)  { benchmark_FreelistRelease(b, 1000000) }
+func Benchmark_FreelistRelease10000K(b *testing.B) { benchmark_FreelistRelease(b, 10000000) }
+
+func benchmark_FreelistRelease(b *testing.B, size int) {
+	ids := randomPgids(size)
+	pending := randomPgids(len(ids) / 400)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		txp := &txPending{ids: pending}
+		f := newTestFreelist()
+		f.pendingPageIds()[1] = txp
+		f.Init(ids)
+		f.release(1)
+	}
+}
+
+func randomPgids(n int) []common.Pgid {
+	pgids := make(common.Pgids, n)
+	for i := range pgids {
+		pgids[i] = common.Pgid(rand.Int63())
+	}
+	sort.Sort(pgids)
+	return pgids
+}
+
+func Test_freelist_ReadIDs_and_getFreePageIDs(t *testing.T) {
+	f := newTestFreelist()
+	exp := common.Pgids([]common.Pgid{3, 4, 5, 6, 7, 9, 12, 13, 18})
+
+	f.Init(exp)
+
+	if got := f.freePageIds(); !reflect.DeepEqual(exp, got) {
+		t.Fatalf("exp=%v; got=%v", exp, got)
+	}
+
+	f2 := newTestFreelist()
+	exp2 := []common.Pgid{}
+	f2.Init(exp2)
+
+	if got2 := f2.freePageIds(); !reflect.DeepEqual(got2, common.Pgids(exp2)) {
+		t.Fatalf("exp2=%#v; got2=%#v", exp2, got2)
+	}
+
+}
+
+// newTestFreelist get the freelist type from env and initial the freelist
+func newTestFreelist() Interface {
+	if env := os.Getenv(TestFreelistType); env == "hashmap" {
+		return NewHashMapFreelist()
+	}
+
+	return NewArrayFreelist()
+}
--- a/internal/freelist/hashmap.go
+++ b/internal/freelist/hashmap.go
@ -0,0 +1,292 @@
+package freelist
+
+import (
+	"fmt"
+	"reflect"
+	"sort"
+
+	"github.com/tutus-one/tutus-bolt/internal/common"
+)
+
+// pidSet holds the set of starting pgids which have the same span size
+type pidSet map[common.Pgid]struct{}
+
+type hashMap struct {
+	*shared
+
+	freePagesCount uint64                 // count of free pages(hashmap version)
+	freemaps       map[uint64]pidSet      // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size
+	forwardMap     map[common.Pgid]uint64 // key is start pgid, value is its span size
+	backwardMap    map[common.Pgid]uint64 // key is end pgid, value is its span size
+}
+
+func (f *hashMap) Init(pgids common.Pgids) {
+	// reset the counter when freelist init
+	f.freePagesCount = 0
+	f.freemaps = make(map[uint64]pidSet)
+	f.forwardMap = make(map[common.Pgid]uint64)
+	f.backwardMap = make(map[common.Pgid]uint64)
+
+	if len(pgids) == 0 {
+		return
+	}
+
+	if !sort.SliceIsSorted([]common.Pgid(pgids), func(i, j int) bool { return pgids[i] < pgids[j] }) {
+		panic("pgids not sorted")
+	}
+
+	size := uint64(1)
+	start := pgids[0]
+
+	for i := 1; i < len(pgids); i++ {
+		// continuous page
+		if pgids[i] == pgids[i-1]+1 {
+			size++
+		} else {
+			f.addSpan(start, size)
+
+			size = 1
+			start = pgids[i]
+		}
+	}
+
+	// init the tail
+	if size != 0 && start != 0 {
+		f.addSpan(start, size)
+	}
+
+	f.reindex()
+}
+
+func (f *hashMap) Allocate(txid common.Txid, n int) common.Pgid {
+	if n == 0 {
+		return 0
+	}
+
+	// if we have a exact size match just return short path
+	if bm, ok := f.freemaps[uint64(n)]; ok {
+		for pid := range bm {
+			// remove the span
+			f.delSpan(pid, uint64(n))
+
+			f.allocs[pid] = txid
+
+			for i := common.Pgid(0); i < common.Pgid(n); i++ {
+				delete(f.cache, pid+i)
+			}
+			return pid
+		}
+	}
+
+	// lookup the map to find larger span
+	for size, bm := range f.freemaps {
+		if size < uint64(n) {
+			continue
+		}
+
+		for pid := range bm {
+			// remove the initial
+			f.delSpan(pid, size)
+
+			f.allocs[pid] = txid
+
+			remain := size - uint64(n)
+
+			// add remain span
+			f.addSpan(pid+common.Pgid(n), remain)
+
+			for i := common.Pgid(0); i < common.Pgid(n); i++ {
+				delete(f.cache, pid+i)
+			}
+			return pid
+		}
+	}
+
+	return 0
+}
+
+func (f *hashMap) FreeCount() int {
+	common.Verify(func() {
+		expectedFreePageCount := f.hashmapFreeCountSlow()
+		common.Assert(int(f.freePagesCount) == expectedFreePageCount,
+			"freePagesCount (%d) is out of sync with free pages map (%d)", f.freePagesCount, expectedFreePageCount)
+	})
+	return int(f.freePagesCount)
+}
+
+func (f *hashMap) freePageIds() common.Pgids {
+	count := f.FreeCount()
+	if count == 0 {
+		return common.Pgids{}
+	}
+
+	m := make([]common.Pgid, 0, count)
+
+	startPageIds := make([]common.Pgid, 0, len(f.forwardMap))
+	for k := range f.forwardMap {
+		startPageIds = append(startPageIds, k)
+	}
+	sort.Sort(common.Pgids(startPageIds))
+
+	for _, start := range startPageIds {
+		if size, ok := f.forwardMap[start]; ok {
+			for i := 0; i < int(size); i++ {
+				m = append(m, start+common.Pgid(i))
+			}
+		}
+	}
+
+	return m
+}
+
+func (f *hashMap) hashmapFreeCountSlow() int {
+	count := 0
+	for _, size := range f.forwardMap {
+		count += int(size)
+	}
+	return count
+}
+
+func (f *hashMap) addSpan(start common.Pgid, size uint64) {
+	f.backwardMap[start-1+common.Pgid(size)] = size
+	f.forwardMap[start] = size
+	if _, ok := f.freemaps[size]; !ok {
+		f.freemaps[size] = make(map[common.Pgid]struct{})
+	}
+
+	f.freemaps[size][start] = struct{}{}
+	f.freePagesCount += size
+}
+
+func (f *hashMap) delSpan(start common.Pgid, size uint64) {
+	delete(f.forwardMap, start)
+	delete(f.backwardMap, start+common.Pgid(size-1))
+	delete(f.freemaps[size], start)
+	if len(f.freemaps[size]) == 0 {
+		delete(f.freemaps, size)
+	}
+	f.freePagesCount -= size
+}
+
+func (f *hashMap) mergeSpans(ids common.Pgids) {
+	common.Verify(func() {
+		ids1Freemap := f.idsFromFreemaps()
+		ids2Forward := f.idsFromForwardMap()
+		ids3Backward := f.idsFromBackwardMap()
+
+		if !reflect.DeepEqual(ids1Freemap, ids2Forward) {
+			panic(fmt.Sprintf("Detected mismatch, f.freemaps: %v, f.forwardMap: %v", f.freemaps, f.forwardMap))
+		}
+		if !reflect.DeepEqual(ids1Freemap, ids3Backward) {
+			panic(fmt.Sprintf("Detected mismatch, f.freemaps: %v, f.backwardMap: %v", f.freemaps, f.backwardMap))
+		}
+
+		sort.Sort(ids)
+		prev := common.Pgid(0)
+		for _, id := range ids {
+			// The ids shouldn't have duplicated free ID.
+			if prev == id {
+				panic(fmt.Sprintf("detected duplicated free ID: %d in ids: %v", id, ids))
+			}
+			prev = id
+
+			// The ids shouldn't have any overlap with the existing f.freemaps.
+			if _, ok := ids1Freemap[id]; ok {
+				panic(fmt.Sprintf("detected overlapped free page ID: %d between ids: %v and existing f.freemaps: %v", id, ids, f.freemaps))
+			}
+		}
+	})
+	for _, id := range ids {
+		// try to see if we can merge and update
+		f.mergeWithExistingSpan(id)
+	}
+}
+
+// mergeWithExistingSpan merges pid to the existing free spans, try to merge it backward and forward
+func (f *hashMap) mergeWithExistingSpan(pid common.Pgid) {
+	prev := pid - 1
+	next := pid + 1
+
+	preSize, mergeWithPrev := f.backwardMap[prev]
+	nextSize, mergeWithNext := f.forwardMap[next]
+	newStart := pid
+	newSize := uint64(1)
+
+	if mergeWithPrev {
+		//merge with previous span
+		start := prev + 1 - common.Pgid(preSize)
+		f.delSpan(start, preSize)
+
+		newStart -= common.Pgid(preSize)
+		newSize += preSize
+	}
+
+	if mergeWithNext {
+		// merge with next span
+		f.delSpan(next, nextSize)
+		newSize += nextSize
+	}
+
+	f.addSpan(newStart, newSize)
+}
+
+// idsFromFreemaps get all free page IDs from f.freemaps.
+// used by test only.
+func (f *hashMap) idsFromFreemaps() map[common.Pgid]struct{} {
+	ids := make(map[common.Pgid]struct{})
+	for size, idSet := range f.freemaps {
+		for start := range idSet {
+			for i := 0; i < int(size); i++ {
+				id := start + common.Pgid(i)
+				if _, ok := ids[id]; ok {
+					panic(fmt.Sprintf("detected duplicated free page ID: %d in f.freemaps: %v", id, f.freemaps))
+				}
+				ids[id] = struct{}{}
+			}
+		}
+	}
+	return ids
+}
+
+// idsFromForwardMap get all free page IDs from f.forwardMap.
+// used by test only.
+func (f *hashMap) idsFromForwardMap() map[common.Pgid]struct{} {
+	ids := make(map[common.Pgid]struct{})
+	for start, size := range f.forwardMap {
+		for i := 0; i < int(size); i++ {
+			id := start + common.Pgid(i)
+			if _, ok := ids[id]; ok {
+				panic(fmt.Sprintf("detected duplicated free page ID: %d in f.forwardMap: %v", id, f.forwardMap))
+			}
+			ids[id] = struct{}{}
+		}
+	}
+	return ids
+}
+
+// idsFromBackwardMap get all free page IDs from f.backwardMap.
+// used by test only.
+func (f *hashMap) idsFromBackwardMap() map[common.Pgid]struct{} {
+	ids := make(map[common.Pgid]struct{})
+	for end, size := range f.backwardMap {
+		for i := 0; i < int(size); i++ {
+			id := end - common.Pgid(i)
+			if _, ok := ids[id]; ok {
+				panic(fmt.Sprintf("detected duplicated free page ID: %d in f.backwardMap: %v", id, f.backwardMap))
+			}
+			ids[id] = struct{}{}
+		}
+	}
+	return ids
+}
+
+func NewHashMapFreelist() Interface {
+	hm := &hashMap{
+		shared:      newShared(),
+		freemaps:    make(map[uint64]pidSet),
+		forwardMap:  make(map[common.Pgid]uint64),
+		backwardMap: make(map[common.Pgid]uint64),
+	}
+	hm.Interface = hm
+	return hm
+}
--- a/internal/freelist/hashmap_test.go
+++ b/internal/freelist/hashmap_test.go
@ -0,0 +1,187 @@
+package freelist
+
+import (
+	"math/rand"
+	"reflect"
+	"sort"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/tutus-one/tutus-bolt/internal/common"
+)
+
+func TestFreelistHashmap_init_panics(t *testing.T) {
+	f := NewHashMapFreelist()
+	require.Panics(t, func() {
+		// init expects sorted input
+		f.Init([]common.Pgid{25, 5})
+	})
+}
+
+func TestFreelistHashmap_allocate(t *testing.T) {
+	f := NewHashMapFreelist()
+
+	ids := []common.Pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}
+	f.Init(ids)
+
+	f.Allocate(1, 3)
+	if x := f.FreeCount(); x != 6 {
+		t.Fatalf("exp=6; got=%v", x)
+	}
+
+	f.Allocate(1, 2)
+	if x := f.FreeCount(); x != 4 {
+		t.Fatalf("exp=4; got=%v", x)
+	}
+	f.Allocate(1, 1)
+	if x := f.FreeCount(); x != 3 {
+		t.Fatalf("exp=3; got=%v", x)
+	}
+
+	f.Allocate(1, 0)
+	if x := f.FreeCount(); x != 3 {
+		t.Fatalf("exp=3; got=%v", x)
+	}
+}
+
+func TestFreelistHashmap_mergeWithExist(t *testing.T) {
+	bm1 := pidSet{1: struct{}{}}
+
+	bm2 := pidSet{5: struct{}{}}
+	tests := []struct {
+		name            string
+		ids             common.Pgids
+		pgid            common.Pgid
+		want            common.Pgids
+		wantForwardmap  map[common.Pgid]uint64
+		wantBackwardmap map[common.Pgid]uint64
+		wantfreemap     map[uint64]pidSet
+	}{
+		{
+			name:            "test1",
+			ids:             []common.Pgid{1, 2, 4, 5, 6},
+			pgid:            3,
+			want:            []common.Pgid{1, 2, 3, 4, 5, 6},
+			wantForwardmap:  map[common.Pgid]uint64{1: 6},
+			wantBackwardmap: map[common.Pgid]uint64{6: 6},
+			wantfreemap:     map[uint64]pidSet{6: bm1},
+		},
+		{
+			name:            "test2",
+			ids:             []common.Pgid{1, 2, 5, 6},
+			pgid:            3,
+			want:            []common.Pgid{1, 2, 3, 5, 6},
+			wantForwardmap:  map[common.Pgid]uint64{1: 3, 5: 2},
+			wantBackwardmap: map[common.Pgid]uint64{6: 2, 3: 3},
+			wantfreemap:     map[uint64]pidSet{3: bm1, 2: bm2},
+		},
+		{
+			name:            "test3",
+			ids:             []common.Pgid{1, 2},
+			pgid:            3,
+			want:            []common.Pgid{1, 2, 3},
+			wantForwardmap:  map[common.Pgid]uint64{1: 3},
+			wantBackwardmap: map[common.Pgid]uint64{3: 3},
+			wantfreemap:     map[uint64]pidSet{3: bm1},
+		},
+		{
+			name:            "test4",
+			ids:             []common.Pgid{2, 3},
+			pgid:            1,
+			want:            []common.Pgid{1, 2, 3},
+			wantForwardmap:  map[common.Pgid]uint64{1: 3},
+			wantBackwardmap: map[common.Pgid]uint64{3: 3},
+			wantfreemap:     map[uint64]pidSet{3: bm1},
+		},
+	}
+	for _, tt := range tests {
+		f := newTestHashMapFreelist()
+		f.Init(tt.ids)
+
+		f.mergeWithExistingSpan(tt.pgid)
+
+		if got := f.freePageIds(); !reflect.DeepEqual(tt.want, got) {
+			t.Fatalf("name %s; exp=%v; got=%v", tt.name, tt.want, got)
+		}
+		if got := f.forwardMap; !reflect.DeepEqual(tt.wantForwardmap, got) {
+			t.Fatalf("name %s; exp=%v; got=%v", tt.name, tt.wantForwardmap, got)
+		}
+		if got := f.backwardMap; !reflect.DeepEqual(tt.wantBackwardmap, got) {
+			t.Fatalf("name %s; exp=%v; got=%v", tt.name, tt.wantBackwardmap, got)
+		}
+		if got := f.freemaps; !reflect.DeepEqual(tt.wantfreemap, got) {
+			t.Fatalf("name %s; exp=%v; got=%v", tt.name, tt.wantfreemap, got)
+		}
+	}
+}
+
+func TestFreelistHashmap_GetFreePageIDs(t *testing.T) {
+	f := newTestHashMapFreelist()
+
+	N := int32(100000)
+	fm := make(map[common.Pgid]uint64)
+	i := int32(0)
+	val := int32(0)
+	for i = 0; i < N; {
+		val = rand.Int31n(1000)
+		fm[common.Pgid(i)] = uint64(val)
+		i += val
+		f.freePagesCount += uint64(val)
+	}
+
+	f.forwardMap = fm
+	res := f.freePageIds()
+
+	if !sort.SliceIsSorted(res, func(i, j int) bool { return res[i] < res[j] }) {
+		t.Fatalf("pgids not sorted")
+	}
+}
+
+func Test_Freelist_Hashmap_Rollback(t *testing.T) {
+	f := newTestHashMapFreelist()
+
+	f.Init([]common.Pgid{3, 5, 6, 7, 12, 13})
+
+	f.Free(100, common.NewPage(20, 0, 0, 1))
+	f.Allocate(100, 3)
+	f.Free(100, common.NewPage(25, 0, 0, 0))
+	f.Allocate(100, 2)
+
+	require.Equal(t, map[common.Pgid]common.Txid{5: 100, 12: 100}, f.allocs)
+	require.Equal(t, map[common.Txid]*txPending{100: {
+		ids:     []common.Pgid{20, 21, 25},
+		alloctx: []common.Txid{0, 0, 0},
+	}}, f.pending)
+
+	f.Rollback(100)
+
+	require.Equal(t, map[common.Pgid]common.Txid{}, f.allocs)
+	require.Equal(t, map[common.Txid]*txPending{}, f.pending)
+}
+
+func Benchmark_freelist_hashmapGetFreePageIDs(b *testing.B) {
+	f := newTestHashMapFreelist()
+	N := int32(100000)
+	fm := make(map[common.Pgid]uint64)
+	i := int32(0)
+	val := int32(0)
+	for i = 0; i < N; {
+		val = rand.Int31n(1000)
+		fm[common.Pgid(i)] = uint64(val)
+		i += val
+	}
+
+	f.forwardMap = fm
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	for n := 0; n < b.N; n++ {
+		f.freePageIds()
+	}
+}
+
+func newTestHashMapFreelist() *hashMap {
+	f := NewHashMapFreelist()
+	return f.(*hashMap)
+}
--- a/internal/freelist/shared.go
+++ b/internal/freelist/shared.go
@ -0,0 +1,310 @@
+package freelist
+
+import (
+	"fmt"
+	"math"
+	"sort"
+	"unsafe"
+
+	"github.com/tutus-one/tutus-bolt/internal/common"
+)
+
+type txPending struct {
+	ids              []common.Pgid
+	alloctx          []common.Txid // txids allocating the ids
+	lastReleaseBegin common.Txid   // beginning txid of last matching releaseRange
+}
+
+type shared struct {
+	Interface
+
+	readonlyTXIDs []common.Txid               // all readonly transaction IDs.
+	allocs        map[common.Pgid]common.Txid // mapping of Txid that allocated a pgid.
+	cache         map[common.Pgid]struct{}    // fast lookup of all free and pending page ids.
+	pending       map[common.Txid]*txPending  // mapping of soon-to-be free page ids by tx.
+}
+
+func newShared() *shared {
+	return &shared{
+		pending: make(map[common.Txid]*txPending),
+		allocs:  make(map[common.Pgid]common.Txid),
+		cache:   make(map[common.Pgid]struct{}),
+	}
+}
+
+func (t *shared) pendingPageIds() map[common.Txid]*txPending {
+	return t.pending
+}
+
+func (t *shared) PendingCount() int {
+	var count int
+	for _, txp := range t.pending {
+		count += len(txp.ids)
+	}
+	return count
+}
+
+func (t *shared) Count() int {
+	return t.FreeCount() + t.PendingCount()
+}
+
+func (t *shared) Freed(pgId common.Pgid) bool {
+	_, ok := t.cache[pgId]
+	return ok
+}
+
+func (t *shared) Free(txid common.Txid, p *common.Page) {
+	if p.Id() <= 1 {
+		panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.Id()))
+	}
+
+	// Free page and all its overflow pages.
+	txp := t.pending[txid]
+	if txp == nil {
+		txp = &txPending{}
+		t.pending[txid] = txp
+	}
+	allocTxid, ok := t.allocs[p.Id()]
+	common.Verify(func() {
+		if allocTxid == txid {
+			panic(fmt.Sprintf("free: freed page (%d) was allocated by the same transaction (%d)", p.Id(), txid))
+		}
+	})
+	if ok {
+		delete(t.allocs, p.Id())
+	}
+
+	for id := p.Id(); id <= p.Id()+common.Pgid(p.Overflow()); id++ {
+		// Verify that page is not already free.
+		if _, ok := t.cache[id]; ok {
+			panic(fmt.Sprintf("page %d already freed", id))
+		}
+		// Add to the freelist and cache.
+		txp.ids = append(txp.ids, id)
+		txp.alloctx = append(txp.alloctx, allocTxid)
+		t.cache[id] = struct{}{}
+	}
+}
+
+func (t *shared) Rollback(txid common.Txid) {
+	// Remove page ids from cache.
+	txp := t.pending[txid]
+	if txp == nil {
+		return
+	}
+	for i, pgid := range txp.ids {
+		delete(t.cache, pgid)
+		tx := txp.alloctx[i]
+		if tx == 0 {
+			continue
+		}
+		if tx != txid {
+			// Pending free aborted; restore page back to alloc list.
+			t.allocs[pgid] = tx
+		} else {
+			// A writing TXN should never free a page which was allocated by itself.
+			panic(fmt.Sprintf("rollback: freed page (%d) was allocated by the same transaction (%d)", pgid, txid))
+		}
+	}
+	// Remove pages from pending list and mark as free if allocated by txid.
+	delete(t.pending, txid)
+
+	// Remove pgids which are allocated by this txid
+	for pgid, tid := range t.allocs {
+		if tid == txid {
+			delete(t.allocs, pgid)
+		}
+	}
+}
+
+func (t *shared) AddReadonlyTXID(tid common.Txid) {
+	t.readonlyTXIDs = append(t.readonlyTXIDs, tid)
+}
+
+func (t *shared) RemoveReadonlyTXID(tid common.Txid) {
+	for i := range t.readonlyTXIDs {
+		if t.readonlyTXIDs[i] == tid {
+			last := len(t.readonlyTXIDs) - 1
+			t.readonlyTXIDs[i] = t.readonlyTXIDs[last]
+			t.readonlyTXIDs = t.readonlyTXIDs[:last]
+			break
+		}
+	}
+}
+
+type txIDx []common.Txid
+
+func (t txIDx) Len() int           { return len(t) }
+func (t txIDx) Swap(i, j int)      { t[i], t[j] = t[j], t[i] }
+func (t txIDx) Less(i, j int) bool { return t[i] < t[j] }
+
+func (t *shared) ReleasePendingPages() {
+	// Free all pending pages prior to the earliest open transaction.
+	sort.Sort(txIDx(t.readonlyTXIDs))
+	minid := common.Txid(math.MaxUint64)
+	if len(t.readonlyTXIDs) > 0 {
+		minid = t.readonlyTXIDs[0]
+	}
+	if minid > 0 {
+		t.release(minid - 1)
+	}
+	// Release unused txid extents.
+	for _, tid := range t.readonlyTXIDs {
+		t.releaseRange(minid, tid-1)
+		minid = tid + 1
+	}
+	t.releaseRange(minid, common.Txid(math.MaxUint64))
+	// Any page both allocated and freed in an extent is safe to release.
+}
+
+func (t *shared) release(txid common.Txid) {
+	m := make(common.Pgids, 0)
+	for tid, txp := range t.pending {
+		if tid <= txid {
+			// Move transaction's pending pages to the available freelist.
+			// Don't remove from the cache since the page is still free.
+			m = append(m, txp.ids...)
+			delete(t.pending, tid)
+		}
+	}
+	t.mergeSpans(m)
+}
+
+func (t *shared) releaseRange(begin, end common.Txid) {
+	if begin > end {
+		return
+	}
+	m := common.Pgids{}
+	for tid, txp := range t.pending {
+		if tid < begin || tid > end {
+			continue
+		}
+		// Don't recompute freed pages if ranges haven't updated.
+		if txp.lastReleaseBegin == begin {
+			continue
+		}
+		for i := 0; i < len(txp.ids); i++ {
+			if atx := txp.alloctx[i]; atx < begin || atx > end {
+				continue
+			}
+			m = append(m, txp.ids[i])
+			txp.ids[i] = txp.ids[len(txp.ids)-1]
+			txp.ids = txp.ids[:len(txp.ids)-1]
+			txp.alloctx[i] = txp.alloctx[len(txp.alloctx)-1]
+			txp.alloctx = txp.alloctx[:len(txp.alloctx)-1]
+			i--
+		}
+		txp.lastReleaseBegin = begin
+		if len(txp.ids) == 0 {
+			delete(t.pending, tid)
+		}
+	}
+	t.mergeSpans(m)
+}
+
+// Copyall copies a list of all free ids and all pending ids in one sorted list.
+// f.count returns the minimum length required for dst.
+func (t *shared) Copyall(dst []common.Pgid) {
+	m := make(common.Pgids, 0, t.PendingCount())
+	for _, txp := range t.pendingPageIds() {
+		m = append(m, txp.ids...)
+	}
+	sort.Sort(m)
+	common.Mergepgids(dst, t.freePageIds(), m)
+}
+
+func (t *shared) Reload(p *common.Page) {
+	t.Read(p)
+	t.NoSyncReload(t.freePageIds())
+}
+
+func (t *shared) NoSyncReload(pgIds common.Pgids) {
+	// Build a cache of only pending pages.
+	pcache := make(map[common.Pgid]bool)
+	for _, txp := range t.pending {
+		for _, pendingID := range txp.ids {
+			pcache[pendingID] = true
+		}
+	}
+
+	// Check each page in the freelist and build a new available freelist
+	// with any pages not in the pending lists.
+	a := []common.Pgid{}
+	for _, id := range pgIds {
+		if !pcache[id] {
+			a = append(a, id)
+		}
+	}
+
+	t.Init(a)
+}
+
+// reindex rebuilds the free cache based on available and pending free lists.
+func (t *shared) reindex() {
+	free := t.freePageIds()
+	pending := t.pendingPageIds()
+	t.cache = make(map[common.Pgid]struct{}, len(free))
+	for _, id := range free {
+		t.cache[id] = struct{}{}
+	}
+	for _, txp := range pending {
+		for _, pendingID := range txp.ids {
+			t.cache[pendingID] = struct{}{}
+		}
+	}
+}
+
+func (t *shared) Read(p *common.Page) {
+	if !p.IsFreelistPage() {
+		panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.Id(), p.Typ()))
+	}
+
+	ids := p.FreelistPageIds()
+
+	// Copy the list of page ids from the freelist.
+	if len(ids) == 0 {
+		t.Init([]common.Pgid{})
+	} else {
+		// copy the ids, so we don't modify on the freelist page directly
+		idsCopy := make([]common.Pgid, len(ids))
+		copy(idsCopy, ids)
+		// Make sure they're sorted.
+		sort.Sort(common.Pgids(idsCopy))
+
+		t.Init(idsCopy)
+	}
+}
+
+func (t *shared) EstimatedWritePageSize() int {
+	n := t.Count()
+	if n >= 0xFFFF {
+		// The first element will be used to store the count. See freelist.write.
+		n++
+	}
+	return int(common.PageHeaderSize) + (int(unsafe.Sizeof(common.Pgid(0))) * n)
+}
+
+func (t *shared) Write(p *common.Page) {
+	// Combine the old free pgids and pgids waiting on an open transaction.
+
+	// Update the header flag.
+	p.SetFlags(common.FreelistPageFlag)
+
+	// The page.count can only hold up to 64k elements so if we overflow that
+	// number then we handle it by putting the size in the first element.
+	l := t.Count()
+	if l == 0 {
+		p.SetCount(uint16(l))
+	} else if l < 0xFFFF {
+		p.SetCount(uint16(l))
+		data := common.UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
+		ids := unsafe.Slice((*common.Pgid)(data), l)
+		t.Copyall(ids)
+	} else {
+		p.SetCount(0xFFFF)
+		data := common.UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
+		ids := unsafe.Slice((*common.Pgid)(data), l+1)
+		ids[0] = common.Pgid(l)
+		t.Copyall(ids[1:])
+	}
+}
--- a/internal/guts_cli/guts_cli.go
+++ b/internal/guts_cli/guts_cli.go
@ -0,0 +1,141 @@
+package guts_cli
+
+// Low level access to pages / data-structures of the bbolt file.
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"os"
+
+	"github.com/tutus-one/tutus-bolt/internal/common"
+)
+
+var (
+	// ErrCorrupt is returned when a checking a data file finds errors.
+	ErrCorrupt = errors.New("invalid value")
+)
+
+// ReadPage reads Page info & full Page data from a path.
+// This is not transactionally safe.
+func ReadPage(path string, pageID uint64) (*common.Page, []byte, error) {
+	// Find Page size.
+	pageSize, hwm, err := ReadPageAndHWMSize(path)
+	if err != nil {
+		return nil, nil, fmt.Errorf("read Page size: %s", err)
+	}
+
+	// Open database file.
+	f, err := os.Open(path)
+	if err != nil {
+		return nil, nil, err
+	}
+	defer f.Close()
+
+	// Read one block into buffer.
+	buf := make([]byte, pageSize)
+	if n, err := f.ReadAt(buf, int64(pageID*pageSize)); err != nil {
+		return nil, nil, err
+	} else if n != len(buf) {
+		return nil, nil, io.ErrUnexpectedEOF
+	}
+
+	// Determine total number of blocks.
+	p := common.LoadPage(buf)
+	if p.Id() != common.Pgid(pageID) {
+		return nil, nil, fmt.Errorf("error: %w due to unexpected Page id: %d != %d", ErrCorrupt, p.Id(), pageID)
+	}
+	overflowN := p.Overflow()
+	if overflowN >= uint32(hwm)-3 { // we exclude 2 Meta pages and the current Page.
+		return nil, nil, fmt.Errorf("error: %w, Page claims to have %d overflow pages (>=hwm=%d). Interrupting to avoid risky OOM", ErrCorrupt, overflowN, hwm)
+	}
+
+	if overflowN == 0 {
+		return p, buf, nil
+	}
+
+	// Re-read entire Page (with overflow) into buffer.
+	buf = make([]byte, (uint64(overflowN)+1)*pageSize)
+	if n, err := f.ReadAt(buf, int64(pageID*pageSize)); err != nil {
+		return nil, nil, err
+	} else if n != len(buf) {
+		return nil, nil, io.ErrUnexpectedEOF
+	}
+	p = common.LoadPage(buf)
+	if p.Id() != common.Pgid(pageID) {
+		return nil, nil, fmt.Errorf("error: %w due to unexpected Page id: %d != %d", ErrCorrupt, p.Id(), pageID)
+	}
+
+	return p, buf, nil
+}
+
+func WritePage(path string, pageBuf []byte) error {
+	page := common.LoadPage(pageBuf)
+	pageSize, _, err := ReadPageAndHWMSize(path)
+	if err != nil {
+		return err
+	}
+	expectedLen := pageSize * (uint64(page.Overflow()) + 1)
+	if expectedLen != uint64(len(pageBuf)) {
+		return fmt.Errorf("WritePage: len(buf):%d != pageSize*(overflow+1):%d", len(pageBuf), expectedLen)
+	}
+	f, err := os.OpenFile(path, os.O_WRONLY, 0)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	_, err = f.WriteAt(pageBuf, int64(page.Id())*int64(pageSize))
+	return err
+}
+
+// ReadPageAndHWMSize reads Page size and HWM (id of the last+1 Page).
+// This is not transactionally safe.
+func ReadPageAndHWMSize(path string) (uint64, common.Pgid, error) {
+	// Open database file.
+	f, err := os.Open(path)
+	if err != nil {
+		return 0, 0, err
+	}
+	defer f.Close()
+
+	// Read 4KB chunk.
+	buf := make([]byte, 4096)
+	if _, err := io.ReadFull(f, buf); err != nil {
+		return 0, 0, err
+	}
+
+	// Read Page size from metadata.
+	m := common.LoadPageMeta(buf)
+	if m.Magic() != common.Magic {
+		return 0, 0, fmt.Errorf("the Meta Page has wrong (unexpected) magic")
+	}
+	return uint64(m.PageSize()), common.Pgid(m.Pgid()), nil
+}
+
+// GetRootPage returns the root-page (according to the most recent transaction).
+func GetRootPage(path string) (root common.Pgid, activeMeta common.Pgid, err error) {
+	m, id, err := GetActiveMetaPage(path)
+	if err != nil {
+		return 0, id, err
+	}
+	return m.RootBucket().RootPage(), id, nil
+}
+
+// GetActiveMetaPage returns the active meta page and its page ID (0 or 1).
+func GetActiveMetaPage(path string) (*common.Meta, common.Pgid, error) {
+	_, buf0, err0 := ReadPage(path, 0)
+	if err0 != nil {
+		return nil, 0, err0
+	}
+	m0 := common.LoadPageMeta(buf0)
+	_, buf1, err1 := ReadPage(path, 1)
+	if err1 != nil {
+		return nil, 1, err1
+	}
+	m1 := common.LoadPageMeta(buf1)
+	if m0.Txid() < m1.Txid() {
+		return m1, 1, nil
+	} else {
+		return m0, 0, nil
+	}
+}
--- a/internal/surgeon/surgeon.go
+++ b/internal/surgeon/surgeon.go
@ -0,0 +1,156 @@
+package surgeon
+
+import (
+	"fmt"
+
+	"github.com/tutus-one/tutus-bolt/internal/common"
+	"github.com/tutus-one/tutus-bolt/internal/guts_cli"
+)
+
+func CopyPage(path string, srcPage common.Pgid, target common.Pgid) error {
+	p1, d1, err1 := guts_cli.ReadPage(path, uint64(srcPage))
+	if err1 != nil {
+		return err1
+	}
+	p1.SetId(target)
+	return guts_cli.WritePage(path, d1)
+}
+
+func ClearPage(path string, pgId common.Pgid) (bool, error) {
+	return ClearPageElements(path, pgId, 0, -1, false)
+}
+
+// ClearPageElements supports clearing elements in both branch and leaf
+// pages. Note if the ${abandonFreelist} is true, the freelist may be cleaned
+// in the meta pages in the following two cases, and bbolt needs to scan the
+// db to reconstruct free list. It may cause some delay on next startup,
+// depending on the db size.
+//  1. Any branch elements are cleared;
+//  2. An object saved in overflow pages is cleared;
+//
+// Usually ${abandonFreelist} defaults to false, it means it will not clear the
+// freelist in meta pages automatically. Users will receive a warning message
+// to remind them to explicitly execute `bbolt surgery abandom-freelist`
+// afterwards; the first return parameter will be true in such case. But if
+// the freelist isn't synced at all, no warning message will be displayed.
+func ClearPageElements(path string, pgId common.Pgid, start, end int, abandonFreelist bool) (bool, error) {
+	// Read the page
+	p, buf, err := guts_cli.ReadPage(path, uint64(pgId))
+	if err != nil {
+		return false, fmt.Errorf("ReadPage failed: %w", err)
+	}
+
+	if !p.IsLeafPage() && !p.IsBranchPage() {
+		return false, fmt.Errorf("can't clear elements in %q page", p.Typ())
+	}
+
+	elementCnt := int(p.Count())
+
+	if elementCnt == 0 {
+		return false, nil
+	}
+
+	if start < 0 || start >= elementCnt {
+		return false, fmt.Errorf("the start index (%d) is out of range [0, %d)", start, elementCnt)
+	}
+
+	if (end < 0 || end > elementCnt) && end != -1 {
+		return false, fmt.Errorf("the end index (%d) is out of range [0, %d]", end, elementCnt)
+	}
+
+	if start > end && end != -1 {
+		return false, fmt.Errorf("the start index (%d) is bigger than the end index (%d)", start, end)
+	}
+
+	if start == end {
+		return false, fmt.Errorf("invalid: the start index (%d) is equal to the end index (%d)", start, end)
+	}
+
+	preOverflow := p.Overflow()
+
+	var (
+		dataWritten uint32
+	)
+	if end == int(p.Count()) || end == -1 {
+		inodes := common.ReadInodeFromPage(p)
+		inodes = inodes[:start]
+
+		p.SetCount(uint16(start))
+		// no need to write inode & data again, we just need to get
+		// the data size which will be kept.
+		dataWritten = common.UsedSpaceInPage(inodes, p)
+	} else {
+		inodes := common.ReadInodeFromPage(p)
+		inodes = append(inodes[:start], inodes[end:]...)
+
+		p.SetCount(uint16(len(inodes)))
+		dataWritten = common.WriteInodeToPage(inodes, p)
+	}
+
+	pageSize, _, err := guts_cli.ReadPageAndHWMSize(path)
+	if err != nil {
+		return false, fmt.Errorf("ReadPageAndHWMSize failed: %w", err)
+	}
+	if dataWritten%uint32(pageSize) == 0 {
+		p.SetOverflow(dataWritten/uint32(pageSize) - 1)
+	} else {
+		p.SetOverflow(dataWritten / uint32(pageSize))
+	}
+
+	datasz := pageSize * (uint64(p.Overflow()) + 1)
+	if err := guts_cli.WritePage(path, buf[0:datasz]); err != nil {
+		return false, fmt.Errorf("WritePage failed: %w", err)
+	}
+
+	if preOverflow != p.Overflow() || p.IsBranchPage() {
+		if abandonFreelist {
+			return false, ClearFreelist(path)
+		}
+		return true, nil
+	}
+
+	return false, nil
+}
+
+func ClearFreelist(path string) error {
+	if err := clearFreelistInMetaPage(path, 0); err != nil {
+		return fmt.Errorf("clearFreelist on meta page 0 failed: %w", err)
+	}
+	if err := clearFreelistInMetaPage(path, 1); err != nil {
+		return fmt.Errorf("clearFreelist on meta page 1 failed: %w", err)
+	}
+	return nil
+}
+
+func clearFreelistInMetaPage(path string, pageId uint64) error {
+	_, buf, err := guts_cli.ReadPage(path, pageId)
+	if err != nil {
+		return fmt.Errorf("ReadPage %d failed: %w", pageId, err)
+	}
+
+	meta := common.LoadPageMeta(buf)
+	meta.SetFreelist(common.PgidNoFreelist)
+	meta.SetChecksum(meta.Sum64())
+
+	if err := guts_cli.WritePage(path, buf); err != nil {
+		return fmt.Errorf("WritePage %d failed: %w", pageId, err)
+	}
+
+	return nil
+}
+
+// RevertMetaPage replaces the newer metadata page with the older.
+// It usually means that one transaction is being lost. But frequently
+// data corruption happens on the last transaction pages and the
+// previous state is consistent.
+func RevertMetaPage(path string) error {
+	_, activeMetaPage, err := guts_cli.GetRootPage(path)
+	if err != nil {
+		return err
+	}
+	if activeMetaPage == 0 {
+		return CopyPage(path, 1, 0)
+	} else {
+		return CopyPage(path, 0, 1)
+	}
+}
--- a/internal/surgeon/surgeon_test.go
+++ b/internal/surgeon/surgeon_test.go
@ -0,0 +1,57 @@
+package surgeon_test
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	bolt "github.com/tutus-one/tutus-bolt"
+	"github.com/tutus-one/tutus-bolt/internal/btesting"
+	"github.com/tutus-one/tutus-bolt/internal/surgeon"
+)
+
+func TestRevertMetaPage(t *testing.T) {
+	db := btesting.MustCreateDB(t)
+	assert.NoError(t,
+		db.Fill([]byte("data"), 1, 500,
+			func(tx int, k int) []byte { return []byte(fmt.Sprintf("%04d", k)) },
+			func(tx int, k int) []byte { return make([]byte, 100) },
+		))
+	assert.NoError(t,
+		db.Update(
+			func(tx *bolt.Tx) error {
+				b := tx.Bucket([]byte("data"))
+				assert.NoError(t, b.Put([]byte("0123"), []byte("new Value for 123")))
+				assert.NoError(t, b.Put([]byte("1234b"), []byte("additional object")))
+				assert.NoError(t, b.Delete([]byte("0246")))
+				return nil
+			}))
+
+	assert.NoError(t,
+		db.View(
+			func(tx *bolt.Tx) error {
+				b := tx.Bucket([]byte("data"))
+				assert.Equal(t, []byte("new Value for 123"), b.Get([]byte("0123")))
+				assert.Equal(t, []byte("additional object"), b.Get([]byte("1234b")))
+				assert.Nil(t, b.Get([]byte("0246")))
+				return nil
+			}))
+
+	db.Close()
+
+	// This causes the whole tree to be linked to the previous state
+	assert.NoError(t, surgeon.RevertMetaPage(db.Path()))
+
+	db.MustReopen()
+	db.MustCheck()
+	assert.NoError(t,
+		db.View(
+			func(tx *bolt.Tx) error {
+				b := tx.Bucket([]byte("data"))
+				assert.Equal(t, make([]byte, 100), b.Get([]byte("0123")))
+				assert.Nil(t, b.Get([]byte("1234b")))
+				assert.Equal(t, make([]byte, 100), b.Get([]byte("0246")))
+				return nil
+			}))
+}
--- a/internal/surgeon/xray.go
+++ b/internal/surgeon/xray.go
@ -0,0 +1,102 @@
+package surgeon
+
+// Library contains raw access to bbolt files for sake of testing or fixing of corrupted files.
+//
+// The library must not be used bbolt btree - just by CLI or tests.
+// It's not optimized for performance.
+
+import (
+	"bytes"
+	"fmt"
+
+	"github.com/tutus-one/tutus-bolt/internal/common"
+	"github.com/tutus-one/tutus-bolt/internal/guts_cli"
+)
+
+type XRay struct {
+	path string
+}
+
+func NewXRay(path string) XRay {
+	return XRay{path}
+}
+
+func (n XRay) traverse(stack []common.Pgid, callback func(page *common.Page, stack []common.Pgid) error) error {
+	p, data, err := guts_cli.ReadPage(n.path, uint64(stack[len(stack)-1]))
+	if err != nil {
+		return fmt.Errorf("failed reading page (stack %v): %w", stack, err)
+	}
+	err = callback(p, stack)
+	if err != nil {
+		return fmt.Errorf("failed callback for page (stack %v): %w", stack, err)
+	}
+	switch p.Typ() {
+	case "meta":
+		{
+			m := common.LoadPageMeta(data)
+			r := m.RootBucket().RootPage()
+			return n.traverse(append(stack, r), callback)
+		}
+	case "branch":
+		{
+			for i := uint16(0); i < p.Count(); i++ {
+				bpe := p.BranchPageElement(i)
+				if err := n.traverse(append(stack, bpe.Pgid()), callback); err != nil {
+					return err
+				}
+			}
+		}
+	case "leaf":
+		for i := uint16(0); i < p.Count(); i++ {
+			lpe := p.LeafPageElement(i)
+			if lpe.IsBucketEntry() {
+				pgid := lpe.Bucket().RootPage()
+				if pgid > 0 {
+					if err := n.traverse(append(stack, pgid), callback); err != nil {
+						return err
+					}
+				} else {
+					inlinePage := lpe.Bucket().InlinePage(lpe.Value())
+					if err := callback(inlinePage, stack); err != nil {
+						return fmt.Errorf("failed callback for inline page  (stack %v): %w", stack, err)
+					}
+				}
+			}
+		}
+	case "freelist":
+		return nil
+		// Free does not have children.
+	}
+	return nil
+}
+
+// FindPathsToKey finds all paths from root to the page that contains the given key.
+// As it traverses multiple buckets, so in theory there might be multiple keys with the given name.
+// Note: For simplicity it's currently implemented as traversing of the whole reachable tree.
+// If key is a bucket name, a page-path referencing the key will be returned as well.
+func (n XRay) FindPathsToKey(key []byte) ([][]common.Pgid, error) {
+	var found [][]common.Pgid
+
+	rootPage, _, err := guts_cli.GetRootPage(n.path)
+	if err != nil {
+		return nil, err
+	}
+	err = n.traverse([]common.Pgid{rootPage},
+		func(page *common.Page, stack []common.Pgid) error {
+			if page.Typ() == "leaf" {
+				for i := uint16(0); i < page.Count(); i++ {
+					if bytes.Equal(page.LeafPageElement(i).Key(), key) {
+						var copyPath []common.Pgid
+						copyPath = append(copyPath, stack...)
+						found = append(found, copyPath)
+					}
+				}
+			}
+			return nil
+		})
+	if err != nil {
+		return nil, err
+	} else {
+		return found, nil
+	}
+}
--- a/Show More
+++ b/Show More