commit 260f0a3cb48e2df47c873b9f44c65ffbfda83619 Author: redxef Date: Mon May 13 19:14:45 2024 +0200 Initial commit. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8abf09a --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +SRCS := qemu-vm qemu-vm-* +TARGET ?= /usr/local + +install: $(SRCS) + install -Dm 0755 --owner=root --group=root -t $(TARGET)/bin $(SRCS) + +.PHONY: install diff --git a/qemu-vm b/qemu-vm new file mode 100755 index 0000000..60c2fb4 --- /dev/null +++ b/qemu-vm @@ -0,0 +1,118 @@ +#!/bin/bash + +set -eo pipefail + +SUDO=sudo +$SUDO true + +if [[ -z "$VMNAME" ]]; then + echo "VMNAME not specified, aborting" >&2 + exit 1 +fi + +set -u + +# create config directory +_="${XDG_CONFIG_HOME:=$HOME/.config}" +CONFIG_PATH="$XDG_CONFIG_HOME/qemu-vm" +CONFIG_PATH_ARGUMENTS="$CONFIG_PATH/arguments.d" +CONFIG_PATH_ARGUMENTS_SPECIFIC="$CONFIG_PATH/$VMNAME/arguments.d" + +# load config +[[ -e "$CONFIG_PATH/config.conf" ]] && source "$CONFIG_PATH/config.conf" +[[ -e "$CONFIG_PATH/config-$VMNAME.conf" ]] && source "$CONFIG_PATH/config-$VMNAME.conf" + +[[ -e "$CONFIG_PATH/pre-config" ]] && SUDO="$SUDO" "$CONFIG_PATH/pre-config" +[[ -e "$CONFIG_PATH/pre-config-$VMNAME" ]] && SUDO="$SUDO" "$CONFIG_PATH/pre-config-$VMNAME" + +# efi variables +EFI_FIRMWARE=/usr/share/ovmf/x64/OVMF_CODE.secboot.fd +EFI_VARS="$($SUDO mktemp)" +$SUDO cp /usr/share/ovmf/x64/OVMF_VARS.fd "$EFI_VARS" + +# setup network +NET_CONF_FILE="$($SUDO mktemp)" +NET_CONF_FILE="$NET_CONF_FILE" "$SUDO" --preserve-env=PATH,NET_CONF_FILE qemu-vm-net create || true + +# rebind devices +old_IFS="$IFS" +IFS=$'\n' +for device_override in $(< "$CONFIG_PATH/vfio-devices.txt") $(< "$CONFIG_PATH/vfio-devices-$VMNAME.txt"); do + device="$(echo "$device_override" | awk '{print $1}')" + override="$(echo "$device_override" | awk '{print $2}')" + [[ "$override" != 'true' ]] && continue + $SUDO --preserve-env=SUDO,PATH qemu-vm-pci vfio_override_device "$device" +done +$SUDO modprobe vfio-pci +for device_override in $(< "$CONFIG_PATH/vfio-devices.txt") $(< "$CONFIG_PATH/vfio-devices-$VMNAME.txt"); do + device="$(echo "$device_override" | awk '{print $1}')" + override="$(echo "$device_override" | awk '{print $2}')" + $SUDO --preserve-env=SUDO,PATH qemu-vm-pci vfio_rebind_device "$device" "$override" +done +IFS="$old_IFS" + +# memory backend +MEM_PATH="$(SUDO="$SUDO" qemu-vm-mem init "$NUM_MEM")" + +# generate arguments +t="$(mktemp -d)" +for f in "$CONFIG_PATH_ARGUMENTS/"*.sh "$CONFIG_PATH_ARGUMENTS_SPECIFIC/"*.sh; do + ( + export SUDO="$SUDO" + export MEM_PATH="$MEM_PATH" + source "$f" + newfile="$(basename "$f" .sh)" + envsubst < "${f%.sh}" > "$t/$newfile" + ) +done +for f in "$CONFIG_PATH_ARGUMENTS/"*.conf "$CONFIG_PATH_ARGUMENTS_SPECIFIC/"*.conf; do + [[ -e "$f.sh" ]] && continue + cp "$f" "$t/" +done + +qemu_arguments=() +for f in "$t/"*.conf; do + read -ra _qemu_arguments -d '' < "$f" || true + qemu_arguments+=("${_qemu_arguments[@]}") +done +unset _qemu_arguments + + +# pre-run callbacks +[[ -e "$CONFIG_PATH/pre-run" ]] && SUDO="$SUDO" "$CONFIG_PATH/pre-run" +[[ -e "$CONFIG_PATH/pre-run-$VMNAME" ]] && SUDO="$SUDO" "$CONFIG_PATH/pre-run-$VMNAME" + +# run and set affinity +set -x +$SUDO qemu-system-$(uname -m) \ + -name "$VMNAME,process=$VMNAME,debug-threads=on" \ + -monitor unix:"$MONITOR",server,nowait \ + -drive if=pflash,format=raw,readonly=on,file="$EFI_FIRMWARE" \ + -drive if=pflash,format=raw,file="$EFI_VARS" \ + "${qemu_arguments[@]}" & +qemu_pid="$!" +set +x +while :; do + _l="$(ps --ppid "$qemu_pid" | tail -n+2 || true)" + if [ "$_l" = '' ]; then + echo waiting + sleep .1 + continue + fi + qemu_pid="$(awk '{print $1}' <<< "$_l")" + break +done +$SUDO qemu-affinity \ + -k $(qemu-vm-cpus decompress_seq "$(qemu-vm-cpus compute_vm $NUM_PROCESSORS)") \ + -i *:$(qemu-vm-cpus compute_vm $NUM_PROCESSORS) \ + -- $qemu_pid + +echo 'Startup complete' +tail --pid="$qemu_pid" -f /dev/null +echo 'Stop stopped' + +SUDO="$SUDO" qemu-vm-mem restore "$MEM_PATH" +NET_CONF_FILE="$NET_CONF_FILE" $SUDO --preserve-env=PATH,NET_CONF_FILE qemu-vm-net delete + +[[ -e "$CONFIG_PATH/post-run" ]] && SUDO="$SUDO" "$CONFIG_PATH/post-run" +[[ -e "$CONFIG_PATH/post-run-$VMNAME" ]] && SUDO="$SUDO" "$CONFIG_PATH/post-run-$VMNAME" diff --git a/qemu-vm-cpus b/qemu-vm-cpus new file mode 100755 index 0000000..686b136 --- /dev/null +++ b/qemu-vm-cpus @@ -0,0 +1,117 @@ +#!/bin/sh + +all_processors() { + grep -E '(processor|core id)' /proc/cpuinfo | while : ; do + read -r line0 || break + read -r line1 || break + if echo "$line0" | grep -q 'processor'; then + processor="$(echo "$line0" | sed -En 's/^[^0-9]*([0-9]+)$/\1/p')" + core="$(echo "$line1" | sed -En 's/^[^0-9]*([0-9]+)$/\1/p')" + elif echo "$line1" | grep -q 'processor'; then + processor="$(echo "$line1" | sed -En 's/^[^0-9]*([0-9]+)$/\1/p')" + core="$(echo "$line0" | sed -En 's/^[^0-9]*([0-9]+)$/\1/p')" + fi + echo "$processor $core" + done +} + +take_host_processors() { + all_processors | sort -hk2 | head -n$1 +} + +take_vm_processors() { + all_processors | sort -hk2 | tail -n+$(($1+1)) +} + +_compress_seq_sub() { + first="$(echo "$@" | awk '{print $1}')" + last="$(echo "$@" | awk '{print $NF}')" + if [ $first = $last ]; then + printf '%s,' "$first" + else + printf '%s-%s,' "$first" "$last" + fi +} + +_compress_seq() { + buffer= + while read -r item; do + if [ -z "$buffer" ]; then + buffer=$item + continue + fi + if [ $(($(echo "$buffer" | awk '{print $NF}')+1)) -eq $item ]; then + buffer="$buffer $item" + else + _compress_seq_sub $buffer + buffer=$item + fi + done + _compress_seq_sub $buffer + echo +} + +compress_seq() { + _compress_seq | rev | cut -c2- | rev +} + +_decompress_seq() { + ( + IFS=, + for item in $@; do + if echo "$item" | grep -q '-'; then + num0="$(echo "$item" | awk -F- '{print $1}')" + num1="$(echo "$item" | awk -F- '{print $2}')" + printf '%s ' "$(seq $num0 $num1 | xargs echo)" + else + printf '%s ' $item + fi + done + echo + ) +} + +decompress_seq() { + _decompress_seq "$1" | rev | cut -c2- | rev +} + +check_argument_is_number() { + if [ -z "$1" ]; then + echo "Error: must specify number of processors" >&2 + return 1 + fi + if [ "$1" -eq "$1" ] 2>/dev/null; then + true + else + echo "Error: provided argument '$1' is not a number" >&2 + return 1 + fi +} + +compute_all() { + all_processors | sort -hk1 | awk '{print $1}' | compress_seq +} + +compute_host() { + if ! check_argument_is_number $1; then + return $? + fi + take_host_processors $1 | sort -hk1 | awk '{print $1}' | \ + compress_seq +} + +compute_vm() { + if ! check_argument_is_number $1; then + return $? + fi + take_vm_processors $1 | sort -hk1 | awk '{print $1}' | \ + compress_seq +} + +processors_per_core() { + processor_count=$(all_processors | awk '{print $1}' | sort -h | uniq | wc -l) + core_count=$(all_processors | awk '{print $2}' | sort -h | uniq | wc -l) + echo $((processor_count/core_count)) +} + +"$@" diff --git a/qemu-vm-mem b/qemu-vm-mem new file mode 100755 index 0000000..829c2c7 --- /dev/null +++ b/qemu-vm-mem @@ -0,0 +1,19 @@ +#!/bin/bash + +set -euo pipefail + +HUGEPAGES_PATH=/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages + +init() { + echo "$1" | $SUDO tee "$HUGEPAGES_PATH" >/dev/null + d="$($SUDO mktemp -d)" + $SUDO mount -t hugetlbfs -o pagesize=1024M hugetlbfs "$d" + echo "$d" +} + +restore() { + $SUDO umount "$1" + echo 0 | $SUDO tee "$HUGEPAGES_PATH" >/dev/null +} + +"$@" diff --git a/qemu-vm-net b/qemu-vm-net new file mode 100755 index 0000000..7014b66 --- /dev/null +++ b/qemu-vm-net @@ -0,0 +1,225 @@ +#!/bin/bash + +set -euo pipefail + +BASE_BRIDGE_NAME=br-q +BASE_TAP_NAME=tap-q + +randstr() { + dd if=/dev/urandom count=1 bs=3 2>/dev/null | xxd -p -g 0 +} + +default_route() { + ip route | awk '/^default/ && (NR==1) {print $5}' +} + +find_next_subnet() { + local i + for i in {20..254}; do + if ip route | grep -q "^172\.$i\."; then + true + else + break + fi + done + echo "172.$i" +} + +nft_rev() { + local family + local table + local chain + local protocol + local field + local port + local policy + + local filter + local remove + + source "$NET_CONF_FILE" + + family="$3" + table="$4" + chain="$5" + if [[ "$chain" = 'input' ]]; then + protocol="$6" + field="$7" + port="$8" + policy="$9" + + filter=" + .nftables[] | select(.rule) | .rule + | select(.family|test(\"$family\")) + | select(.table|test(\"$table\")) + | select(.chain|test(\"$chain\")) + | select(.expr[0]?.match?.op) + | select(.expr[0]?.match?.left?.payload?.protocol) + | select(.expr[0]?.match?.left?.payload?.field) + | select(.expr[0]?.match?.right) + | select(.expr[1]?.$policy == null) + | select(.expr[0].match.op == \"==\") + | select(.expr[0].match.left.payload.protocol == \"$protocol\") + | select(.expr[0].match.left.payload.field == \"$field\") + | select(.expr[0].match.right == $port) + | .handle + " + remove=" + echo nft delete rule $table $chain handle \$handle + nft delete rule $table $chain handle \$handle + " + elif [[ "$chain" = 'forward' ]]; then + key="$6" + ifname="$7" + policy="${13}" + + filter=" + .nftables[] | select(.rule) | .rule + | select(.family|test(\"$family\")) + | select(.table|test(\"$table\")) + | select(.chain|test(\"$chain\")) + | select(.expr[0]?.match?.op) + | select(.expr[0]?.match?.left?.meta?.key) + | select(.expr[0]?.match?.right) + | select(.expr[2]?.$policy == null) + | select(.expr[0].match.op == \"==\") + | select(.expr[0].match.left.meta.key == \"$key\") + | select(.expr[0].match.right == \"$ifname\") + | .handle + " + remove=" + echo nft delete rule $table $chain handle \$handle + nft delete rule $table $chain handle \$handle + " + else + echo "Warning: Don't know how to reverse 'nft $@'" 1>&2 + nft "$@" + return 0 + fi + set +u + if [[ -z "$restore_nft_file" ]]; then + restore_nft_file="$(mktemp)" + echo "#!/usr/bin/env sh" > "$restore_nft_file" + chmod 700 "$restore_nft_file" + chown root:root "$restore_nft_file" + echo "restore_nft_file=$restore_nft_file" >> "$NET_CONF_FILE" + fi + set -u + + echo "handle=\"\$(nft --json list ruleset | jq '$filter')\"" >> "$restore_nft_file" + echo "$remove" >> "$restore_nft_file" + cat "$restore_nft_file" + + nft "$@" +} + + +get_bridge_name() { + for i in {1..255}; do + br_name="br$i" + link_names="$(ip link list | awk -F:\ '/^[0-9]+: br.*/ {print $2}')" + if [[ -z "$link_names" ]] || echo "$link_names" | grep -vq "$br_name"; then + echo "$br_name" + return 0 + fi + done + return 1 +} + +get_tap_name() { + for i in {1..255}; do + tap_name="tap$i" + link_names="$(ip link list | awk -F:\ '/^[0-9]+: tap.*/ {print $2}')" + if [[ -z "$link_names" ]] || echo "$link_names" | grep -vq "$tap_name"; then + echo "$tap_name" + return 0 + fi + done + return 1 +} + +create() { + local next_subnet + + local bridge_name + local dhcp_subnet + local dhcp_range + local tap_name + local dnsmasq_pid + + next_subnet="$(find_next_subnet)" + dhcp_subnet="$next_subnet.0.1/24" + dhcp_range="$next_subnet.0.100,$next_subnet.0.200" + + bridge_name="$(get_bridge_name)" + tap_name="$(get_tap_name)" + echo "$bridge_name" + echo "$tap_name" + + echo > "$NET_CONF_FILE" + + set -x + modprobe tun tap + ip link add "$bridge_name" type bridge && sleep .1 + ip tuntap add dev "$tap_name" mode tap && sleep .1 + ip link set dev "$tap_name" master "$bridge_name" && sleep .1 + ip link set dev "$bridge_name" up && sleep .1 + ip link set dev "$tap_name" up && sleep .1 + ip addr add "$dhcp_subnet" dev "$bridge_name" && sleep .1 + ip link set dev "$bridge_name" up && sleep .1 + ip link set dev "$tap_name" up && sleep .1 + set +x + ip addr + dnsmasq -d --interface="$bridge_name" --bind-interface --dhcp-range="$dhcp_range" & + dnsmasq_pid="$!" + echo "bridge_name='$bridge_name'" >> "$NET_CONF_FILE" + echo "tap_name='$tap_name'" >> "$NET_CONF_FILE" + echo "dnsmasq_pid='$dnsmasq_pid'" >> "$NET_CONF_FILE" + disown -h "$dnsmasq_pid" + + echo "nft_ruleset='$(nft -s list ruleset)'" >> "$NET_CONF_FILE" + + # dhcp + nft_rev add rule ip filter input udp dport 67 accept + nft_rev add rule ip filter input tcp dport 67 accept + # dns + nft_rev add rule ip filter input udp dport 53 accept + nft_rev add rule ip filter input tcp dport 53 accept + # scream + nft_rev add rule ip filter input udp dport 4010 accept + nft_rev add rule ip filter input tcp dport 4010 accept + + # forward bridge + nft_rev add rule ip filter forward iifname "$bridge_name" \ + counter packets 0 bytes 0 accept + nft_rev add rule ip filter forward oifname "$bridge_name" \ + counter packets 0 bytes 0 accept + nft_rev add rule ip nat postrouting oifname "$(default_route)" \ + counter masquerade +} + +delete() { + source "$NET_CONF_FILE" + + kill "$dnsmasq_pid" + ip link del "$tap_name" + ip link del "$bridge_name" + # nft flush ruleset + # nft -f - <<< "$nft_ruleset" + "$restore_nft_file" + rm "$restore_nft_file" + rm "$NET_CONF_FILE" +} + +if [[ -z "$NET_CONF_FILE" ]]; then + echo Please specify the configuration file path \ + with NET_CONF_FILE >&2 + exit 1 +fi + +if [[ "EUID" -ne 0 ]]; then + echo "Please run as root" >&2 + exit 2 +fi + +"$@" diff --git a/qemu-vm-pci b/qemu-vm-pci new file mode 100755 index 0000000..42a540a --- /dev/null +++ b/qemu-vm-pci @@ -0,0 +1,34 @@ +#!/bin/bash + +set -euo pipefail + +BASE_PATH=/sys/bus/pci/ + +# sanitize variables +if [[ "${BASE_PATH: -1}" = '/' ]]; then + BASE_PATH="${BASE_PATH::${#BASE_PATH}-1}" +fi + +_="${SUDO:=sudo}" + +vfio_override_device() { + local pci_id + pci_id="$1" + if [[ -e "$BASE_PATH/devices/$pci_id" ]]; then + echo "vfio-pci" | $SUDO tee \ + "$BASE_PATH/devices/$pci_id/driver_override" > /dev/null + fi +} +vfio_rebind_device() { + local pci_id + pci_id="$1" + if [[ -e "$BASE_PATH/devices/$pci_id" ]]; then + [[ -e "$BASE_PATH/devices/$pci_id/driver/unbind" ]] \ + && echo "$pci_id" | $SUDO tee \ + "$BASE_PATH/devices/$pci_id/driver/unbind" >/dev/null + echo "$pci_id" | $SUDO tee \ + "$BASE_PATH/drivers/vfio-pci/bind" > /dev/null + fi +} + +"$@" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f7d23f9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +qemu-full +qemu-affinity +swtpm +dnsmasq