VEGA support pointed me to a site which they have used for GRIR443 benchmark. The slurm batch there is quite helpful if you are new to singularity myself.
#!/bin/bash
#SBATCH --nodes=2
#SBATCH --time 04:00:00
set -e; set -o pipefail
# Load required modules, if required
module load singularity
# Set cluster/experiment specific variables
readonly gpus_per_node=#
readonly benchmark_dir=#
readonly kpoints=2
readonly npool=2
readonly procs_per_gpu=2
readonly procs_per_node=$((gpus_per_node*procs_per_gpu))
readonly qe_sif="${benchmark_dir}/qe_6.8.sif"
# Build SIF, if it doesn't exist
if [[ ! -f "${qe_sif}" ]]; then
singularity build ${qe_sif} docker://nvcr.io/hpc/quantum_espresso:v6.8
fi
# Attempt to start MPS server within container if needed
if (( procs_per_gpu > 1 )); then
srun --ntasks-per-node=1 /bin/bash -c "nvidia-cuda-mps-control -d; sleep infinity" &
fi
echo "INFO: Running Quantum ESPRESSO with:"
echo " ${SLURM_JOB_NUM_NODES:-$SLURM_NNODES} Nodes"
echo " ${gpus_per_node} GPUs per node"
echo " ${procs_per_node} MPI processes per node"
echo " ${procs_per_gpu} MPI processes per GPU"
echo " ${npool} Pools"
echo " ${kpoints} KPoints"
# Launch parallel QE experiment
srun --mpi=pmi2 \
--ntasks-per-node=${procs_per_node} \
singularity run --nv "-B${benchmark_dir}:/host_pwd" --pwd /host_pwd \
${qe_sif} \
pw.x \
-input /host_pwd/ausurf.in \
-npool ${npool} \
2>&1 | tee qe_log.txt
# Attempt to exit MPS server
srun --ntasks-per-node=1 /bin/bash -c "echo quit | nvidia-cuda-mps-control || true"