diff --git a/.github/workflows/branch-e2e.yml b/.github/workflows/branch-e2e.yml index ebe783406..560213466 100644 --- a/.github/workflows/branch-e2e.yml +++ b/.github/workflows/branch-e2e.yml @@ -24,6 +24,7 @@ jobs: run_core_e2e: ${{ steps.labels.outputs.run_core_e2e }} run_gpu_e2e: ${{ steps.labels.outputs.run_gpu_e2e }} run_kubernetes_ha_e2e: ${{ steps.labels.outputs.run_kubernetes_ha_e2e }} + run_centos_e2e: ${{ steps.labels.outputs.run_centos_e2e }} run_any_e2e: ${{ steps.labels.outputs.run_any_e2e }} steps: - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 @@ -41,12 +42,14 @@ jobs: run_core_e2e=true run_gpu_e2e=true run_kubernetes_ha_e2e=true + run_centos_e2e=true else run_core_e2e="$(jq -r 'index("test:e2e") != null' <<< "$LABELS_JSON")" run_gpu_e2e="$(jq -r 'index("test:e2e-gpu") != null' <<< "$LABELS_JSON")" run_kubernetes_ha_e2e="$(jq -r 'index("test:e2e-kubernetes") != null' <<< "$LABELS_JSON")" + run_centos_e2e="$(jq -r 'index("test:e2e-centos") != null' <<< "$LABELS_JSON")" fi - if [ "$run_core_e2e" = "true" ] || [ "$run_gpu_e2e" = "true" ] || [ "$run_kubernetes_ha_e2e" = "true" ]; then + if [ "$run_core_e2e" = "true" ] || [ "$run_gpu_e2e" = "true" ] || [ "$run_kubernetes_ha_e2e" = "true" ] || [ "$run_centos_e2e" = "true" ]; then run_any_e2e=true else run_any_e2e=false @@ -55,6 +58,7 @@ jobs: echo "run_core_e2e=$run_core_e2e" echo "run_gpu_e2e=$run_gpu_e2e" echo "run_kubernetes_ha_e2e=$run_kubernetes_ha_e2e" + echo "run_centos_e2e=$run_centos_e2e" echo "run_any_e2e=$run_any_e2e" } >> "$GITHUB_OUTPUT" @@ -145,6 +149,19 @@ jobs: extra-helm-values: deploy/helm/openshell/ci/values-high-availability.yaml external-postgres-secret: openshell-ha-pg + centos-selinux-e2e: + needs: [pr_metadata, build-supervisor] + if: needs.pr_metadata.outputs.should_run == 'true' && needs.pr_metadata.outputs.run_centos_e2e == 'true' + # Optional proof-of-life suite (boots a CentOS Stream 10 VM under + # QEMU/KVM on the existing linux-amd64-cpu8 runner). Not part of the + # required E2E gate; see kubernetes-ha-e2e above for the same pattern. + permissions: + contents: read + packages: read + uses: ./.github/workflows/e2e-centos-selinux.yml + with: + image-tag: ${{ github.sha }} + core-e2e-result: name: Core E2E result needs: [pr_metadata, build-gateway, build-supervisor, e2e, kubernetes-e2e] diff --git a/.github/workflows/e2e-centos-selinux.yml b/.github/workflows/e2e-centos-selinux.yml new file mode 100644 index 000000000..fef4de989 --- /dev/null +++ b/.github/workflows/e2e-centos-selinux.yml @@ -0,0 +1,477 @@ +name: CentOS Stream 10 Docker/Podman/SELinux Compatibility + +# Exercises the Docker and Podman compute drivers on CentOS Stream 10 with +# SELinux enforcing. This is the cross-compatibility check requested in +# https://github.com/NVIDIA/OpenShell/pull/2092: RHEL-family distros ship +# Podman but not Docker, so this job installs both engines, confirms they +# coexist on one host, and runs the standard Docker/Podman e2e suites +# against a real SELinux-enforcing kernel (unlike the Ubuntu-based +# `ghcr.io/nvidia/openshell/ci` image used by e2e-test.yml, which does not +# enforce SELinux). This gives bind-mount relabeling changes (e.g. the +# `selinux_label`/`:z`/`:Z` support added in #2092) a host that can +# actually deny mislabeled mounts. +# +# Unlike a bare RHEL 10 runner, this does NOT require an org admin to +# provision a new GitHub-hosted runner class. Instead, it boots a real +# CentOS Stream 10 VM under QEMU/KVM on the existing self-hosted +# `linux-amd64-cpu8` runner (the same runner class already used by +# driver-vm-linux.yml with `--privileged`), and drives the guest entirely +# over SSH. This does require that runner class to expose `/dev/kvm`; +# the "Require KVM device on runner" step fails fast with a clear error if +# it doesn't. +# +# The host job itself runs in the same Ubuntu-based CI container as the +# rest of the suite; only the guest VM is CentOS Stream 10 with SELinux +# enforcing. + +on: + workflow_call: + inputs: + image-tag: + description: "Image tag to test (typically the commit SHA)" + required: true + type: string + runner: + description: "GitHub Actions runner label for the host job (must expose /dev/kvm)" + required: false + type: string + default: "linux-amd64-cpu8" + checkout-ref: + description: "Git ref to check out for test inputs (defaults to the workflow SHA)" + required: false + type: string + default: "" + mise-version: + description: "mise version to install in the CentOS Stream guest" + required: false + type: string + default: "v2026.4.25" + centos-image-url: + description: "CentOS Stream 10 GenericCloud qcow2 image URL" + required: false + type: string + default: "https://cloud.centos.org/centos/10-stream/x86_64/images/CentOS-Stream-GenericCloud-10-latest.x86_64.qcow2" + vm-memory-mb: + description: "Memory (MB) allocated to the guest VM" + required: false + type: string + default: "8192" + vm-cpus: + description: "vCPUs allocated to the guest VM" + required: false + type: string + default: "4" + vm-disk-gb: + description: "Guest root disk size (GB) after resize" + required: false + type: string + default: "40" + ssh-port: + description: "Host-side TCP port forwarded to the guest's SSH daemon" + required: false + type: string + default: "2222" + vm-boot-timeout-minutes: + description: "Minutes to wait for the guest SSH daemon to become reachable" + required: false + type: string + default: "10" + +permissions: + contents: read + packages: read + +jobs: + centos-selinux-compat: + name: CentOS Stream 10 Docker + Podman + SELinux + runs-on: ${{ inputs.runner }} + timeout-minutes: 45 + permissions: + contents: read + packages: read + container: + image: ghcr.io/nvidia/openshell/ci:latest + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + options: --privileged --device=/dev/kvm + env: + MISE_VERSION: ${{ inputs.mise-version }} + IMAGE_TAG: ${{ inputs.image-tag }} + OPENSHELL_SUPERVISOR_IMAGE: ${{ format('ghcr.io/nvidia/openshell/supervisor:{0}', inputs.image-tag) }} + OPENSHELL_REGISTRY: ghcr.io/nvidia/openshell + OPENSHELL_REGISTRY_HOST: ghcr.io + OPENSHELL_REGISTRY_NAMESPACE: nvidia/openshell + OPENSHELL_REGISTRY_USERNAME: ${{ github.actor }} + CENTOS_IMAGE_URL: ${{ inputs.centos-image-url }} + VM_MEMORY_MB: ${{ inputs.vm-memory-mb }} + VM_CPUS: ${{ inputs.vm-cpus }} + VM_DISK_GB: ${{ inputs.vm-disk-gb }} + SSH_PORT: ${{ inputs.ssh-port }} + VM_BOOT_TIMEOUT_MINUTES: ${{ inputs.vm-boot-timeout-minutes }} + GUEST_USER: e2e + GUEST_KEY: /tmp/openshell-e2e-centos/guest_ed25519 + GUEST_WORKDIR: /home/e2e/openshell + SSH_OPTS: -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -o BatchMode=yes + steps: + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + with: + ref: ${{ inputs['checkout-ref'] || github.sha }} + + - name: Record host facts + run: | + set -euo pipefail + cat /etc/os-release + uname -a + ls -l /dev/kvm || echo "/dev/kvm not present" + + - name: Require KVM device on runner + run: | + set -euo pipefail + if [ ! -c /dev/kvm ]; then + echo "::error::/dev/kvm is not available on this runner. This suite boots a real CentOS Stream 10 VM under QEMU/KVM and requires the '${{ inputs.runner }}' runner class to expose hardware virtualization." + exit 1 + fi + if [ ! -r /dev/kvm ] || [ ! -w /dev/kvm ]; then + echo "::error::/dev/kvm exists but is not readable/writable by this job. Check the container's --device/--privileged options." + exit 1 + fi + + - name: Install host VM tooling + run: | + set -euo pipefail + apt-get update + apt-get install -y --no-install-recommends \ + qemu-system-x86 \ + qemu-utils \ + genisoimage \ + openssh-client \ + curl \ + ca-certificates + rm -rf /var/lib/apt/lists/* + + - name: Generate ephemeral guest SSH keypair + run: | + set -euo pipefail + mkdir -p "$(dirname "${GUEST_KEY}")" + ssh-keygen -t ed25519 -N "" -C "openshell-e2e-centos" -f "${GUEST_KEY}" + + - name: Download CentOS Stream 10 GenericCloud image + working-directory: /tmp/openshell-e2e-centos + run: | + set -euo pipefail + curl -fsSL -o centos.qcow2 "${CENTOS_IMAGE_URL}" + if curl -fsSL -o centos.qcow2.SHA256SUM "${CENTOS_IMAGE_URL}.SHA256SUM"; then + expected="$(grep -oE '[0-9a-f]{64}' centos.qcow2.SHA256SUM | head -n1)" + actual="$(sha256sum centos.qcow2 | awk '{print $1}')" + if [ "${expected}" != "${actual}" ]; then + echo "::error::CentOS Stream image checksum mismatch (expected ${expected}, got ${actual})" + exit 1 + fi + echo "Verified image checksum: ${actual}" + else + echo "::warning::No SHA256SUM published alongside the image; skipping checksum verification" + fi + + - name: Grow guest disk image + working-directory: /tmp/openshell-e2e-centos + run: qemu-img resize centos.qcow2 "${VM_DISK_GB}G" + + - name: Build cloud-init seed image + working-directory: /tmp/openshell-e2e-centos + # Written line-by-line (not an indented heredoc) so the generated + # files have no leading whitespace: cloud-init requires "#cloud-config" + # to be the exact first bytes of user-data to recognize the format. + run: | + set -euo pipefail + GUEST_PUBKEY="$(cat "${GUEST_KEY}.pub")" + { + echo "instance-id: openshell-e2e-centos" + echo "local-hostname: openshell-e2e-guest" + } > meta-data + { + echo "#cloud-config" + echo "hostname: openshell-e2e-guest" + echo "manage_etc_hosts: true" + echo "disable_root: true" + echo "ssh_pwauth: false" + echo "users:" + echo " - name: ${GUEST_USER}" + echo " gecos: OpenShell E2E" + echo " groups: [wheel]" + echo " sudo: [\"ALL=(ALL) NOPASSWD:ALL\"]" + echo " shell: /bin/bash" + echo " lock_passwd: true" + echo " ssh_authorized_keys:" + echo " - ${GUEST_PUBKEY}" + echo "package_update: false" + echo "package_upgrade: false" + } > user-data + head -n1 user-data | grep -qx '#cloud-config' + genisoimage -output seed.iso -volid cidata -joliet -rock user-data meta-data + + - name: Boot the CentOS Stream 10 VM + working-directory: /tmp/openshell-e2e-centos + run: | + set -euo pipefail + qemu-system-x86_64 \ + -name openshell-e2e-centos \ + -machine q35,accel=kvm -cpu host \ + -smp "${VM_CPUS}" -m "${VM_MEMORY_MB}" \ + -drive file=centos.qcow2,if=virtio,format=qcow2 \ + -drive file=seed.iso,if=virtio,format=raw \ + -netdev "user,id=net0,hostfwd=tcp::${SSH_PORT}-:22" \ + -device virtio-net-pci,netdev=net0 \ + -nographic \ + -serial file:/tmp/openshell-e2e-centos/console.log \ + -pidfile /tmp/openshell-e2e-centos/qemu.pid \ + -daemonize + + - name: Wait for guest SSH + run: | + set -euo pipefail + deadline=$(( $(date +%s) + VM_BOOT_TIMEOUT_MINUTES * 60 )) + # shellcheck disable=SC2086 + until ssh -p "${SSH_PORT}" -i "${GUEST_KEY}" ${SSH_OPTS} "${GUEST_USER}@127.0.0.1" true 2>/dev/null; do + if [ "$(date +%s)" -ge "${deadline}" ]; then + echo "::error::Guest SSH did not become reachable within ${VM_BOOT_TIMEOUT_MINUTES} minutes" + echo "=== console log ===" + cat /tmp/openshell-e2e-centos/console.log || true + exit 1 + fi + sleep 5 + done + echo "Guest SSH is reachable." + + - name: Copy repository into the guest + run: | + set -euo pipefail + tar -C "${GITHUB_WORKSPACE}" -czf /tmp/openshell-e2e-centos/repo.tar.gz --exclude=.git . + # shellcheck disable=SC2086 + scp -P "${SSH_PORT}" -i "${GUEST_KEY}" ${SSH_OPTS} \ + /tmp/openshell-e2e-centos/repo.tar.gz "${GUEST_USER}@127.0.0.1:/tmp/repo.tar.gz" + # shellcheck disable=SC2086 + ssh -p "${SSH_PORT}" -i "${GUEST_KEY}" ${SSH_OPTS} "${GUEST_USER}@127.0.0.1" \ + "mkdir -p '${GUEST_WORKDIR}' && tar -xzf /tmp/repo.tar.gz -C '${GUEST_WORKDIR}'" + + - name: Write guest environment file + run: | + set -euo pipefail + # shellcheck disable=SC2086,SC2087 + ssh -p "${SSH_PORT}" -i "${GUEST_KEY}" ${SSH_OPTS} "${GUEST_USER}@127.0.0.1" \ + "cat > '${GUEST_WORKDIR}/.e2e.env' && chmod 600 '${GUEST_WORKDIR}/.e2e.env'" <> "$GITHUB_OUTPUT" + + - name: Install build and test dependencies (guest) + run: | + # shellcheck disable=SC2086 + ssh -p "${SSH_PORT}" -i "${GUEST_KEY}" ${SSH_OPTS} "${GUEST_USER}@127.0.0.1" bash -s <<'REMOTE' + set -euo pipefail + sudo dnf install -y \ + ca-certificates \ + curl \ + git \ + gcc \ + gcc-c++ \ + make \ + clang \ + clang-devel \ + z3-devel \ + pkgconf-pkg-config \ + openssl-devel \ + openssh-clients \ + python3 \ + cmake \ + socat \ + unzip \ + xz \ + jq \ + rsync \ + zstd \ + audit \ + policycoreutils + REMOTE + + - name: Install Docker Engine and Podman (guest) + # CentOS Stream's GenericCloud image ships neither by default, per + # the RHEL-family cross-compatibility check requested in + # https://github.com/NVIDIA/OpenShell/pull/2092#issuecomment-4856873804. + run: | + # shellcheck disable=SC2086 + ssh -p "${SSH_PORT}" -i "${GUEST_KEY}" ${SSH_OPTS} "${GUEST_USER}@127.0.0.1" bash -s <<'REMOTE' + set -euo pipefail + curl -fsSL https://get.docker.com | sudo bash + sudo sed -i "s#/usr/bin/dockerd#/usr/bin/dockerd --ip-forward-no-drop#g" /usr/lib/systemd/system/docker.service + sudo systemctl daemon-reload + sudo systemctl enable --now docker + sudo dnf install -y podman + REMOTE + + - name: Grant guest user access to the Docker socket + run: | + # shellcheck disable=SC2086 + ssh -p "${SSH_PORT}" -i "${GUEST_KEY}" ${SSH_OPTS} "${GUEST_USER}@127.0.0.1" bash -s <<'REMOTE' + set -euo pipefail + sudo groupadd -f docker + sudo usermod -aG docker "$(id -un)" + sudo chmod 666 /var/run/docker.sock + REMOTE + + - name: Verify Docker and Podman coexist (guest) + run: | + # shellcheck disable=SC2086 + ssh -p "${SSH_PORT}" -i "${GUEST_KEY}" ${SSH_OPTS} "${GUEST_USER}@127.0.0.1" bash -s <<'REMOTE' + set -euo pipefail + docker version + docker info + podman version + podman info + REMOTE + + - name: Install mise (guest) + run: | + # shellcheck disable=SC2086 + ssh -p "${SSH_PORT}" -i "${GUEST_KEY}" ${SSH_OPTS} "${GUEST_USER}@127.0.0.1" bash -s <<'REMOTE' + set -euo pipefail + source ~/openshell/.e2e.env + curl https://mise.run | MISE_VERSION="${MISE_VERSION}" sh + REMOTE + + - name: Install tools (guest) + run: | + # shellcheck disable=SC2086 + ssh -p "${SSH_PORT}" -i "${GUEST_KEY}" ${SSH_OPTS} "${GUEST_USER}@127.0.0.1" bash -s <<'REMOTE' + set -euo pipefail + source ~/openshell/.e2e.env + export PATH="$HOME/.local/bin:$HOME/.local/share/mise/shims:$PATH" + cd ~/openshell + mise install --locked + REMOTE + + - name: Log in to GHCR with Docker (guest) + run: | + # shellcheck disable=SC2086 + ssh -p "${SSH_PORT}" -i "${GUEST_KEY}" ${SSH_OPTS} "${GUEST_USER}@127.0.0.1" bash -s <<'REMOTE' + set -euo pipefail + source ~/openshell/.e2e.env + echo "${OPENSHELL_REGISTRY_PASSWORD}" | docker login ghcr.io -u "${OPENSHELL_REGISTRY_USERNAME}" --password-stdin + REMOTE + + - name: Log in to GHCR with Podman (guest) + run: | + # shellcheck disable=SC2086 + ssh -p "${SSH_PORT}" -i "${GUEST_KEY}" ${SSH_OPTS} "${GUEST_USER}@127.0.0.1" bash -s <<'REMOTE' + set -euo pipefail + source ~/openshell/.e2e.env + echo "${OPENSHELL_REGISTRY_PASSWORD}" | podman login ghcr.io -u "${OPENSHELL_REGISTRY_USERNAME}" --password-stdin + REMOTE + + - name: Run Docker-backed e2e suite (guest) + run: | + # shellcheck disable=SC2086 + ssh -p "${SSH_PORT}" -i "${GUEST_KEY}" ${SSH_OPTS} "${GUEST_USER}@127.0.0.1" bash -s <<'REMOTE' + set -euo pipefail + source ~/openshell/.e2e.env + export PATH="$HOME/.local/bin:$HOME/.local/share/mise/shims:$PATH" + cd ~/openshell + mise run --no-deps --skip-deps e2e:docker + REMOTE + + - name: Run Podman-backed e2e suite (guest) + run: | + # shellcheck disable=SC2086 + ssh -p "${SSH_PORT}" -i "${GUEST_KEY}" ${SSH_OPTS} "${GUEST_USER}@127.0.0.1" bash -s <<'REMOTE' + set -euo pipefail + source ~/openshell/.e2e.env + export PATH="$HOME/.local/bin:$HOME/.local/share/mise/shims:$PATH" + cd ~/openshell + mise run --no-deps --skip-deps e2e:podman + REMOTE + + - name: Check for unexpected SELinux denials (guest) + env: + AUDIT_WINDOW_START: ${{ steps.audit_window.outputs.start }} + run: | + set -euo pipefail + # shellcheck disable=SC2086 + ssh -p "${SSH_PORT}" -i "${GUEST_KEY}" ${SSH_OPTS} "${GUEST_USER}@127.0.0.1" \ + "AUDIT_WINDOW_START='${AUDIT_WINDOW_START}' bash -s" <<'REMOTE' + set -euo pipefail + echo "Searching audit log for AVC denials since ${AUDIT_WINDOW_START}" + if sudo ausearch -m avc,user_avc -ts "${AUDIT_WINDOW_START}" 2>/dev/null | tee /tmp/avc-denials.log | grep -q .; then + echo "::error::SELinux denied one or more operations during the e2e suites; see the log below" + cat /tmp/avc-denials.log + exit 1 + fi + echo "No SELinux denials recorded." + REMOTE + + - name: Collect SELinux/audit context on failure (guest) + if: failure() + env: + AUDIT_WINDOW_START: ${{ steps.audit_window.outputs.start }} + run: | + set +e + # shellcheck disable=SC2086 + ssh -p "${SSH_PORT}" -i "${GUEST_KEY}" ${SSH_OPTS} "${GUEST_USER}@127.0.0.1" \ + "AUDIT_WINDOW_START='${AUDIT_WINDOW_START}' bash -s" <<'REMOTE' + echo "=== getenforce ===" + getenforce + echo "=== recent AVC denials ===" + sudo ausearch -m avc,user_avc -ts "${AUDIT_WINDOW_START}" 2>/dev/null + echo "=== dockerd status ===" + sudo systemctl status docker --no-pager + echo "=== recent kernel log ===" + sudo journalctl -k --no-pager | tail -n 200 + REMOTE + echo "=== host QEMU console log ===" + cat /tmp/openshell-e2e-centos/console.log + exit 0 + + - name: Shut down the CentOS Stream VM + if: always() + run: | + set +e + # shellcheck disable=SC2086 + ssh -p "${SSH_PORT}" -i "${GUEST_KEY}" ${SSH_OPTS} "${GUEST_USER}@127.0.0.1" sudo poweroff + if [ -f /tmp/openshell-e2e-centos/qemu.pid ]; then + pid="$(cat /tmp/openshell-e2e-centos/qemu.pid)" + for _ in $(seq 1 20); do + kill -0 "${pid}" 2>/dev/null || break + sleep 1 + done + kill -9 "${pid}" 2>/dev/null + fi + exit 0 diff --git a/.github/workflows/e2e-label-help.yml b/.github/workflows/e2e-label-help.yml index 1190bcd3d..157734a5b 100644 --- a/.github/workflows/e2e-label-help.yml +++ b/.github/workflows/e2e-label-help.yml @@ -19,7 +19,7 @@ permissions: {} jobs: hint: name: Post next-step hint for E2E label - if: github.event.label.name == 'test:e2e' || github.event.label.name == 'test:e2e-gpu' || github.event.label.name == 'test:e2e-kubernetes' + if: github.event.label.name == 'test:e2e' || github.event.label.name == 'test:e2e-gpu' || github.event.label.name == 'test:e2e-kubernetes' || github.event.label.name == 'test:e2e-centos' runs-on: ubuntu-latest permissions: pull-requests: write @@ -55,6 +55,11 @@ jobs: build_summary="gateway and supervisor images" status_summary="This is an optional proof-of-life suite; failures are visible in the workflow run but do not publish a required CI gate status." ;; + test:e2e-centos) + suite_summary="the CentOS Stream 10 Docker/Podman/SELinux compatibility suite" + build_summary="supervisor image" + status_summary="This is an optional proof-of-life suite; failures are visible in the workflow run but do not publish a required CI gate status." + ;; *) echo "Unrecognized label $LABEL_NAME"; exit 1 ;; esac diff --git a/CI.md b/CI.md index d04668aaf..5760e729f 100644 --- a/CI.md +++ b/CI.md @@ -10,15 +10,17 @@ PR CI that runs on NVIDIA self-hosted runners uses NVIDIA's copy-pr-bot. The bot `Branch Checks` run automatically after copy-pr-bot mirrors the PR. `Required CI Gates` posts PR-head statuses that verify the mirror exists, is current, and ran the expected push-based workflows. E2E suites are opt-in because they are more expensive and publish temporary images. -Three opt-in labels enable the long-running E2E suites: +Four opt-in labels enable the long-running E2E suites: - `test:e2e` runs the standard E2E suite in `Branch E2E Checks` - `test:e2e-gpu` runs GPU E2E in `Branch E2E Checks` - `test:e2e-kubernetes` runs Kubernetes E2E with the HA Helm overlay (`replicaCount: 2` and bundled PostgreSQL) in `Branch E2E Checks` +- `test:e2e-centos` runs the Docker/Podman/SELinux compatibility suite + against a CentOS Stream 10 VM booted under QEMU/KVM in `Branch E2E Checks` When multiple labels are present, `Branch E2E Checks` builds the shared gateway and supervisor images once and fans out all enabled suites in parallel. -The `OpenShell / E2E` and `OpenShell / GPU E2E` required statuses are evaluated from separate suite result jobs inside that workflow. `test:e2e-kubernetes` is optional while HA behavior is under active iteration: failures are visible in the workflow run but do not publish a required CI gate status. +The `OpenShell / E2E` and `OpenShell / GPU E2E` required statuses are evaluated from separate suite result jobs inside that workflow. `test:e2e-kubernetes` and `test:e2e-centos` are optional: failures are visible in the workflow run but do not publish a required CI gate status. The GitHub ruleset should require the `OpenShell / ...` statuses published by `Required CI Gates`, not the push-triggered workflow jobs directly. @@ -71,7 +73,7 @@ Flow: 1. Open the PR. copy-pr-bot mirrors it to `pull-request/` automatically. 2. The mirror push runs `Branch Checks` automatically. `Required CI Gates` keeps the PR blocked until the mirror exists, matches the PR head SHA, and the required push-based workflow succeeds. The first `Branch E2E Checks` run only resolves metadata and skips expensive jobs unless an E2E label is already set. -3. A maintainer applies `test:e2e`, `test:e2e-gpu`, and/or `test:e2e-kubernetes`. `E2E Label Help` posts a comment with a link to the existing gated workflow run. +3. A maintainer applies `test:e2e`, `test:e2e-gpu`, `test:e2e-kubernetes`, and/or `test:e2e-centos`. `E2E Label Help` posts a comment with a link to the existing gated workflow run. 4. The maintainer opens that link and clicks **Re-run all jobs**. This time `pr_metadata` sees the label and the build/E2E jobs run. 5. When the run finishes, the matching `OpenShell / ...` gate status flips to green automatically. 6. New commits push to the mirror automatically and re-trigger `Branch Checks` plus any labeled E2E jobs in `Branch E2E Checks`. @@ -110,7 +112,8 @@ The bot's full administrator documentation is internal to NVIDIA. The only comma | File | Role | |---|---| | `.github/workflows/branch-checks.yml` | Required non-E2E PR checks. Triggers on `push: pull-request/[0-9]+`. | -| `.github/workflows/branch-e2e.yml` | Opt-in standard, GPU, and Kubernetes HA E2E. Triggers on `push: pull-request/[0-9]+` and runs jobs selected by `test:e2e`, `test:e2e-gpu`, or `test:e2e-kubernetes`. | +| `.github/workflows/branch-e2e.yml` | Opt-in standard, GPU, Kubernetes HA, and CentOS Stream 10 compatibility E2E. Triggers on `push: pull-request/[0-9]+` and runs jobs selected by `test:e2e`, `test:e2e-gpu`, `test:e2e-kubernetes`, or `test:e2e-centos`. | +| `.github/workflows/e2e-centos-selinux.yml` | Reusable workflow that boots a CentOS Stream 10 VM under QEMU/KVM on the `linux-amd64-cpu8` runner, installs Docker alongside Podman, verifies SELinux is enforcing, and runs the Docker/Podman e2e suites. | | `.github/workflows/helm-lint.yml` | Helm chart validation. Triggers on `push: pull-request/[0-9]+` and skips lint jobs unless Helm inputs changed. | | `.github/actions/pr-gate/action.yml` | Composite action that resolves PR metadata and verifies the required label is set. | | `.github/actions/pr-merge-base/action.yml` | Composite action that resolves and fetches the merge-base commit for `pull-request/` push workflows. | diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b78f36522..b66f9c353 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -305,4 +305,4 @@ DCO sign-off is separate from cryptographic commit signing. CI requires signing ## CI -How PR CI runs, the `test:e2e`, `test:e2e-gpu`, and `test:e2e-kubernetes` labels, copy-pr-bot, and commit-signing setup are documented in [CI.md](CI.md). +How PR CI runs, the `test:e2e`, `test:e2e-gpu`, `test:e2e-kubernetes`, and `test:e2e-centos` labels, copy-pr-bot, and commit-signing setup are documented in [CI.md](CI.md).