https://forums.developer.nvidia.com/t/text-generation-webui-install-error/319717 nataraj@nvidiaai:~/aidata/jetson-containers$ jetson-containers build text-generation-webui Namespace(packages=['text-generation-webui'], name='', base='', multiple=False, build_flags='', build_args='', use_proxy=False, package_dirs=[''], list_packages=False, show_packages=False, skip_packages=[''], skip_errors=False, skip_tests=[''], test_only=[''], simulate=False, push='', logs='', verbose=False, no_github_api=False) -- L4T_VERSION=36.4.3 -- JETPACK_VERSION=6.2 -- CUDA_VERSION=12.6 -- PYTHON_VERSION=3.10 -- LSB_RELEASE=22.04 (jammy) -- copying /etc/nv_tegra_release to /home/nataraj/aidata/jetson-containers/packages/llm/ollama/nv_tegra_release Failed to fetch version information. Status code: 404 Failed to fetch version information. Status code: 404 -- Building containers ['build-essential', 'pip_cache:cu126', 'cuda:12.6', 'cudnn', 'python', 'numpy', 'cmake', 'onnx', 'pytorch:2.5', 'torchvision', 'huggingface_hub', 'rust', 'transformers', 'auto_gptq', 'flash-attention', 'exllama', 'sudonim', 'llama_cpp', 'triton', 'auto_awq', 'text-generation-webui'] -- Building container text-generation-webui:r36.4.3-build-essential DOCKER_BUILDKIT=0 docker build --network=host --tag text-generation-webui:r36.4.3-build-essential \ --file /home/nataraj/aidata/jetson-containers/packages/build/build-essential/Dockerfile \ --build-arg BASE_IMAGE=ubuntu:22.04 \ /home/nataraj/aidata/jetson-containers/packages/build/build-essential \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/build/text-generation-webui_r36.4.3-build-essential.txt; exit ${PIPESTATUS[0]} DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 19.97kB Step 1/5 : ARG BASE_IMAGE Step 2/5 : FROM ${BASE_IMAGE} ---> 560582227a09 Step 3/5 : ENV DEBIAN_FRONTEND=noninteractive LANGUAGE=en_US:en LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 ---> Using cache ---> b3e5e53f4c74 Step 4/5 : RUN set -ex && apt-get update && apt-get install -y --no-install-recommends locales locales-all tzdata && locale-gen en_US $LANG && update-locale LC_ALL=$LC_ALL LANG=$LANG && locale && apt-get install -y --no-install-recommends build-essential software-properties-common apt-transport-https ca-certificates lsb-release pkg-config gnupg git git-lfs gdb wget wget2 curl nano zip unzip time sshpass ssh-client && apt-get clean && rm -rf /var/lib/apt/lists/* && gcc --version && g++ --version ---> Using cache ---> d423e827637e Step 5/5 : COPY tarpack /usr/local/bin/ ---> Using cache ---> 88010dcf6174 Successfully built 88010dcf6174 Successfully tagged text-generation-webui:r36.4.3-build-essential -- Building container text-generation-webui:r36.4.3-pip_cache_cu126 DOCKER_BUILDKIT=0 docker build --network=host --tag text-generation-webui:r36.4.3-pip_cache_cu126 \ --file /home/nataraj/aidata/jetson-containers/packages/cuda/cuda/Dockerfile.pip \ --build-arg BASE_IMAGE=text-generation-webui:r36.4.3-build-essential \ --build-arg TAR_INDEX_URL="https://apt.jetson-ai-lab.dev/jp6/cu126" \ --build-arg PIP_INDEX_REPO="https://pypi.jetson-ai-lab.dev/jp6/cu126" \ --build-arg PIP_UPLOAD_REPO="http://localhost/jp6/cu126" \ --build-arg PIP_UPLOAD_USER="jp6" \ --build-arg PIP_UPLOAD_PASS="none" \ --build-arg SCP_UPLOAD_URL="localhost:/dist/jp6/cu126" \ --build-arg SCP_UPLOAD_USER="None" \ --build-arg SCP_UPLOAD_PASS="None" \ /home/nataraj/aidata/jetson-containers/packages/cuda/cuda \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/build/text-generation-webui_r36.4.3-pip_cache_cu126.txt; exit ${PIPESTATUS[0]} DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 40.45kB Step 1/4 : ARG BASE_IMAGE Step 2/4 : FROM ${BASE_IMAGE} ---> 88010dcf6174 Step 3/4 : ARG PIP_INDEX_REPO PIP_UPLOAD_REPO PIP_UPLOAD_USER PIP_UPLOAD_PASS PIP_TRUSTED_HOSTS TAR_INDEX_URL SCP_UPLOAD_URL SCP_UPLOAD_USER SCP_UPLOAD_PASS ---> Using cache ---> 4071a0b6e287 Step 4/4 : ENV TAR_INDEX_URL=${TAR_INDEX_URL} PIP_INDEX_URL=${PIP_INDEX_REPO} PIP_TRUSTED_HOST=${PIP_TRUSTED_HOSTS} TWINE_REPOSITORY_URL=${PIP_UPLOAD_REPO} TWINE_USERNAME=${PIP_UPLOAD_USER} TWINE_PASSWORD=${PIP_UPLOAD_PASS} SCP_UPLOAD_URL=${SCP_UPLOAD_URL} SCP_UPLOAD_USER=${SCP_UPLOAD_USER} SCP_UPLOAD_PASS=${SCP_UPLOAD_PASS} ---> Using cache ---> aeba093eaf91 Successfully built aeba093eaf91 Successfully tagged text-generation-webui:r36.4.3-pip_cache_cu126 -- Building container text-generation-webui:r36.4.3-cuda_12.6 DOCKER_BUILDKIT=0 docker build --network=host --tag text-generation-webui:r36.4.3-cuda_12.6 \ --file /home/nataraj/aidata/jetson-containers/packages/cuda/cuda/Dockerfile \ --build-arg BASE_IMAGE=text-generation-webui:r36.4.3-pip_cache_cu126 \ --build-arg CUDA_URL="https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda-tegra-repo-ubuntu2204-12-6-local_12.6.3-1_arm64.deb" \ --build-arg CUDA_DEB="cuda-tegra-repo-ubuntu2204-12-6-local" \ --build-arg CUDA_PACKAGES="cuda-toolkit*" \ --build-arg CUDA_ARCH_LIST="87" \ --build-arg DISTRO="ubuntu2204" \ /home/nataraj/aidata/jetson-containers/packages/cuda/cuda \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/build/text-generation-webui_r36.4.3-cuda_12.6.txt; exit ${PIPESTATUS[0]} DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 40.45kB Step 1/9 : ARG BASE_IMAGE Step 2/9 : FROM ${BASE_IMAGE} ---> aeba093eaf91 Step 3/9 : ARG CUDA_URL CUDA_DEB CUDA_PACKAGES CUDA_ARCH_LIST DISTRO="ubuntu2004" ---> Using cache ---> 653bdd97bd38 Step 4/9 : COPY install.sh /tmp/install_cuda.sh ---> Using cache ---> 90abf54ebc2b Step 5/9 : RUN /tmp/install_cuda.sh ---> Using cache ---> 365f85a65b4c Step 6/9 : ENV CUDA_HOME="/usr/local/cuda" ---> Using cache ---> 6b585838a38e Step 7/9 : ENV NVCC_PATH="$CUDA_HOME/bin/nvcc" ---> Using cache ---> d8a089ce5bb1 Step 8/9 : ENV NVIDIA_VISIBLE_DEVICES=all NVIDIA_DRIVER_CAPABILITIES=all CUDAARCHS=${CUDA_ARCH_LIST} CUDA_ARCHITECTURES=${CUDA_ARCH_LIST} CUDA_HOME="/usr/local/cuda" CUDNN_LIB_PATH="/usr/lib/aarch64-linux-gnu" CUDNN_LIB_INCLUDE_PATH="/usr/include" CMAKE_CUDA_COMPILER=${NVCC_PATH} CUDA_NVCC_EXECUTABLE=${NVCC_PATH} CUDACXX=${NVCC_PATH} TORCH_NVCC_FLAGS="-Xfatbin -compress-all" CUDA_BIN_PATH="${CUDA_HOME}/bin" CUDA_TOOLKIT_ROOT_DIR="${CUDA_HOME}" PATH="$CUDA_HOME/bin:${PATH}" LD_LIBRARY_PATH="${CUDA_HOME}/compat:${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}" DEBIAN_FRONTEND=noninteractive ---> Using cache ---> d66d2aee3da1 Step 9/9 : WORKDIR / ---> Using cache ---> 76c24886dcc6 Successfully built 76c24886dcc6 Successfully tagged text-generation-webui:r36.4.3-cuda_12.6 -- Testing container text-generation-webui:r36.4.3-cuda_12.6 (cuda:12.6/test.sh) docker run -t --rm --runtime=nvidia --network=host \ --volume /home/nataraj/aidata/jetson-containers/packages/cuda/cuda:/test \ --volume /home/nataraj/aidata/jetson-containers/data:/data \ --workdir /test \ text-generation-webui:r36.4.3-cuda_12.6 \ /bin/bash -c '/bin/bash test.sh' \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/test/text-generation-webui_r36.4.3-cuda_12.6_test.sh.txt; exit ${PIPESTATUS[0]} { "cuda" : { "name" : "CUDA SDK", "version" : "12.6.3" }, "cuda_cccl" : { "name" : "CUDA C++ Core Compute Libraries", "version" : "12.6.77" }, "cuda_cudart" : { "name" : "CUDA Runtime (cudart)", "version" : "12.6.77" }, "cuda_cuobjdump" : { "name" : "cuobjdump", "version" : "12.6.77" }, "cuda_cupti" : { "name" : "CUPTI", "version" : "12.6.80" }, "cuda_cuxxfilt" : { "name" : "CUDA cu++ filt", "version" : "12.6.77" }, "cuda_gdb" : { "name" : "CUDA GDB", "version" : "12.6.77" }, "cuda_nvcc" : { "name" : "CUDA NVCC", "version" : "12.6.85" }, "cuda_nvdisasm" : { "name" : "CUDA nvdisasm", "version" : "12.6.77" }, "cuda_nvml_dev" : { "name" : "CUDA NVML Headers", "version" : "12.6.77" }, "cuda_nvprune" : { "name" : "CUDA nvprune", "version" : "12.6.77" }, "cuda_nvrtc" : { "name" : "CUDA NVRTC", "version" : "12.6.85" }, "cuda_nvtx" : { "name" : "CUDA NVTX", "version" : "12.6.77" }, "cuda_sanitizer_api" : { "name" : "CUDA Compute Sanitizer API", "version" : "12.6.77" }, "libcublas" : { "name" : "CUDA cuBLAS", "version" : "12.6.4.1" }, "libcudla" : { "name" : "CUDA cuDLA", "version" : "12.6.77" }, "libcufft" : { "name" : "CUDA cuFFT", "version" : "11.3.0.4" }, "libcufile" : { "name" : "GPUDirect Storage (cufile)", "version" : "1.11.1.6" }, "libcurand" : { "name" : "CUDA cuRAND", "version" : "10.3.7.77" }, "libcusolver" : { "name" : "CUDA cuSOLVER", "version" : "11.7.1.2" }, "libcusparse" : { "name" : "CUDA cuSPARSE", "version" : "12.5.4.2" }, "libnpp" : { "name" : "CUDA NPP", "version" : "12.3.1.54" }, "libnvfatbin" : { "name" : "Fatbin interaction library", "version" : "12.6.77" }, "libnvjitlink" : { "name" : "JIT Linker Library", "version" : "12.6.85" }, "libnvjpeg" : { "name" : "CUDA nvJPEG", "version" : "12.3.3.54" }, "nsight_compute" : { "name" : "Nsight Compute", "version" : "2024.3.2.3" }, "nvidia_fs" : { "name" : "NVIDIA file-system", "version" : "2.22.3" } } -- Building container text-generation-webui:r36.4.3-cudnn DOCKER_BUILDKIT=0 docker build --network=host --tag text-generation-webui:r36.4.3-cudnn \ --file /home/nataraj/aidata/jetson-containers/packages/cuda/cudnn/Dockerfile \ --build-arg BASE_IMAGE=text-generation-webui:r36.4.3-cuda_12.6 \ --build-arg CUDNN_URL="https://developer.download.nvidia.com/compute/cudnn/9.4.0/local_installers/cudnn-local-tegra-repo-ubuntu2204-9.4.0_1.0-1_arm64.deb" \ --build-arg CUDNN_DEB="cudnn-local-tegra-repo-ubuntu2204-9.4.0" \ --build-arg CUDNN_PACKAGES="libcudnn*-dev libcudnn*-samples" \ /home/nataraj/aidata/jetson-containers/packages/cuda/cudnn \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/build/text-generation-webui_r36.4.3-cudnn.txt; exit ${PIPESTATUS[0]} DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 23.04kB Step 1/7 : ARG BASE_IMAGE Step 2/7 : FROM ${BASE_IMAGE} ---> 76c24886dcc6 Step 3/7 : ARG CUDNN_URL ---> Using cache ---> 25debca578e6 Step 4/7 : ARG CUDNN_DEB ---> Using cache ---> 765ecc909178 Step 5/7 : ARG CUDNN_PACKAGES ---> Using cache ---> a774d86f8492 Step 6/7 : RUN echo "Downloading ${CUDNN_DEB}" && mkdir /tmp/cudnn && cd /tmp/cudnn && wget --quiet --show-progress --progress=bar:force:noscroll ${CUDNN_URL} && dpkg -i *.deb && cp /var/cudnn-local-tegra-repo-*/cudnn-local-tegra-*-keyring.gpg /usr/share/keyrings/ && apt-get update && apt-cache search cudnn && apt-get install -y --no-install-recommends ${CUDNN_PACKAGES} && rm -rf /var/lib/apt/lists/* && apt-get clean && dpkg --list | grep cudnn && dpkg -P ${CUDNN_DEB} && rm -rf /tmp/cudnn ---> Using cache ---> 7dbbdc548581 Step 7/7 : RUN cd /usr/src/cudnn_samples_v*/conv_sample/ && make -j$(nproc) ---> Using cache ---> 7fcac1f4e50f Successfully built 7fcac1f4e50f Successfully tagged text-generation-webui:r36.4.3-cudnn -- Testing container text-generation-webui:r36.4.3-cudnn (cudnn:9.4/test.sh) docker run -t --rm --runtime=nvidia --network=host \ --volume /home/nataraj/aidata/jetson-containers/packages/cuda/cudnn:/test \ --volume /home/nataraj/aidata/jetson-containers/data:/data \ --workdir /test \ text-generation-webui:r36.4.3-cudnn \ /bin/bash -c '/bin/bash test.sh' \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/test/text-generation-webui_r36.4.3-cudnn_test.sh.txt; exit ${PIPESTATUS[0]} #define CUDNN_MAJOR 9 #define CUDNN_MINOR 4 #define CUDNN_VERSION (CUDNN_MAJOR * 10000 + CUDNN_MINOR * 100 + CUDNN_PATCHLEVEL) #define CUDNN_MAX_SM_MAJOR_NUMBER 9 #define CUDNN_MAX_SM_MINOR_NUMBER 0 #define CUDNN_MAX_DEVICE_VERSION (CUDNN_MAX_SM_MAJOR_NUMBER * 100 + CUDNN_MAX_SM_MINOR_NUMBER * 10) Executing: conv_sample Using format CUDNN_TENSOR_NCHW (for INT8x4 and INT8x32 tests use CUDNN_TENSOR_NCHW_VECT_C) Testing single precision ====USER DIMENSIONS==== input dims are 1, 32, 4, 4 filter dims are 32, 32, 1, 1 output dims are 1, 32, 4, 4 ====PADDING DIMENSIONS==== padded input dims are 1, 32, 4, 4 padded filter dims are 32, 32, 1, 1 padded output dims are 1, 32, 4, 4 Testing conv ^^^^ CUDA : elapsed = 0.00122761 sec, Test PASSED Testing half precision (math in single precision) ====USER DIMENSIONS==== input dims are 1, 32, 4, 4 filter dims are 32, 32, 1, 1 output dims are 1, 32, 4, 4 ====PADDING DIMENSIONS==== padded input dims are 1, 32, 4, 4 padded filter dims are 32, 32, 1, 1 padded output dims are 1, 32, 4, 4 Testing conv ^^^^ CUDA : elapsed = 0.0235721 sec, Test PASSED -- Building container text-generation-webui:r36.4.3-python DOCKER_BUILDKIT=0 docker build --network=host --tag text-generation-webui:r36.4.3-python \ --file /home/nataraj/aidata/jetson-containers/packages/build/python/Dockerfile \ --build-arg BASE_IMAGE=text-generation-webui:r36.4.3-cudnn \ --build-arg PYTHON_VERSION_ARG="3.10" \ /home/nataraj/aidata/jetson-containers/packages/build/python \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/build/text-generation-webui_r36.4.3-python.txt; exit ${PIPESTATUS[0]} DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 22.02kB Step 1/6 : ARG BASE_IMAGE Step 2/6 : FROM ${BASE_IMAGE} ---> 7fcac1f4e50f Step 3/6 : ARG PYTHON_VERSION_ARG ---> Using cache ---> 06fa0318ea7b Step 4/6 : ENV PYTHON_VERSION=${PYTHON_VERSION_ARG} PIP_DISABLE_PIP_VERSION_CHECK=on PIP_DEFAULT_TIMEOUT=100 PYTHONFAULTHANDLER=1 PYTHONUNBUFFERED=1 PYTHONIOENCODING=utf-8 PYTHONHASHSEED=random PIP_NO_CACHE_DIR=off PIP_CACHE_PURGE=true PIP_ROOT_USER_ACTION=ignore TWINE_NON_INTERACTIVE=1 DEBIAN_FRONTEND=noninteractive ---> Using cache ---> f35d90d7e565 Step 5/6 : COPY install.sh /tmp/install_python.sh ---> Using cache ---> 7914a84fe7de Step 6/6 : RUN /tmp/install_python.sh ---> Using cache ---> b19b5e59c456 Successfully built b19b5e59c456 Successfully tagged text-generation-webui:r36.4.3-python -- Building container text-generation-webui:r36.4.3-numpy DOCKER_BUILDKIT=0 docker build --network=host --tag text-generation-webui:r36.4.3-numpy \ --file /home/nataraj/aidata/jetson-containers/packages/numeric/numpy/Dockerfile \ --build-arg BASE_IMAGE=text-generation-webui:r36.4.3-python \ --build-arg NUMPY_PACKAGE="numpy<2" \ /home/nataraj/aidata/jetson-containers/packages/numeric/numpy \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/build/text-generation-webui_r36.4.3-numpy.txt; exit ${PIPESTATUS[0]} DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 19.46kB Step 1/7 : ARG BASE_IMAGE Step 2/7 : FROM ${BASE_IMAGE} ---> b19b5e59c456 Step 3/7 : ENV OPENBLAS_CORETYPE=ARMV8 ---> Using cache ---> e67487ca6c3e Step 4/7 : ARG NUMPY_PACKAGE="numpy" ---> Using cache ---> 73df60ecdd4f Step 5/7 : ENV NUMPY_PACKAGE="$NUMPY_PACKAGE" ---> Using cache ---> c50c1bd51759 Step 6/7 : COPY install.sh /tmp/numpy/ ---> Using cache ---> 467d91cd30aa Step 7/7 : RUN bash /tmp/numpy/install.sh ---> Using cache ---> 59b40e741afe Successfully built 59b40e741afe Successfully tagged text-generation-webui:r36.4.3-numpy -- Testing container text-generation-webui:r36.4.3-numpy (numpy/test.py) docker run -t --rm --runtime=nvidia --network=host \ --volume /home/nataraj/aidata/jetson-containers/packages/numeric/numpy:/test \ --volume /home/nataraj/aidata/jetson-containers/data:/data \ --workdir /test \ text-generation-webui:r36.4.3-numpy \ /bin/bash -c 'python3 test.py' \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/test/text-generation-webui_r36.4.3-numpy_test.py.txt; exit ${PIPESTATUS[0]} testing numpy... numpy version: 1.26.4 /usr/local/lib/python3.10/dist-packages/numpy/__config__.py:155: UserWarning: Install `pyyaml` for better output warnings.warn("Install `pyyaml` for better output", stacklevel=1) { "Compilers": { "c": { "name": "gcc", "linker": "ld.bfd", "version": "10.2.1", "commands": "cc", "args": "-fno-strict-aliasing", "linker args": "-Wl,--strip-debug, -fno-strict-aliasing" }, "cython": { "name": "cython", "linker": "cython", "version": "3.0.8", "commands": "cython" }, "c++": { "name": "gcc", "linker": "ld.bfd", "version": "10.2.1", "commands": "c++", "linker args": "-Wl,--strip-debug" } }, "Machine Information": { "host": { "cpu": "aarch64", "family": "aarch64", "endian": "little", "system": "linux" }, "build": { "cpu": "aarch64", "family": "aarch64", "endian": "little", "system": "linux" } }, "Build Dependencies": { "blas": { "name": "openblas64", "found": true, "version": "0.3.23.dev", "detection method": "pkgconfig", "include directory": "/usr/local/include", "lib directory": "/usr/local/lib", "openblas configuration": "USE_64BITINT=1 DYNAMIC_ARCH=1 DYNAMIC_OLDER= NO_CBLAS= NO_LAPACK= NO_LAPACKE= NO_AFFINITY=1 USE_OPENMP= NEOVERSEN1 MAX_THREADS=80", "pc file directory": "/usr/local/lib/pkgconfig" }, "lapack": { "name": "dep281473538264272", "found": true, "version": "1.26.4", "detection method": "internal", "include directory": "unknown", "lib directory": "unknown", "openblas configuration": "unknown", "pc file directory": "unknown" } }, "Python Information": { "path": "/opt/python/cp310-cp310/bin/python", "version": "3.10" }, "SIMD Extensions": { "baseline": [ "NEON", "NEON_FP16", "NEON_VFPV4", "ASIMD" ], "found": [ "ASIMDHP" ], "not found": [ "ASIMDFHM" ] } } None numpy OK -- Building container text-generation-webui:r36.4.3-cmake DOCKER_BUILDKIT=0 docker build --network=host --tag text-generation-webui:r36.4.3-cmake \ --file /home/nataraj/aidata/jetson-containers/packages/build/cmake/cmake_pip/Dockerfile \ --build-arg BASE_IMAGE=text-generation-webui:r36.4.3-numpy \ /home/nataraj/aidata/jetson-containers/packages/build/cmake/cmake_pip \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/build/text-generation-webui_r36.4.3-cmake.txt; exit ${PIPESTATUS[0]} DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 12.8kB Step 1/3 : ARG BASE_IMAGE Step 2/3 : FROM ${BASE_IMAGE} ---> 59b40e741afe Step 3/3 : RUN set -ex && pip3 install --upgrade --force-reinstall --no-cache-dir --verbose cmake && cmake --version && which cmake ---> Using cache ---> c1f77e40f53d Successfully built c1f77e40f53d Successfully tagged text-generation-webui:r36.4.3-cmake -- Building container text-generation-webui:r36.4.3-onnx DOCKER_BUILDKIT=0 docker build --network=host --tag text-generation-webui:r36.4.3-onnx \ --file /home/nataraj/aidata/jetson-containers/packages/ml/onnx/Dockerfile \ --build-arg BASE_IMAGE=text-generation-webui:r36.4.3-cmake \ --build-arg ONNX_VERSION="main" \ /home/nataraj/aidata/jetson-containers/packages/ml/onnx \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/build/text-generation-webui_r36.4.3-onnx.txt; exit ${PIPESTATUS[0]} DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 17.41kB Step 1/4 : ARG BASE_IMAGE Step 2/4 : FROM ${BASE_IMAGE} ---> c1f77e40f53d Step 3/4 : ARG ONNX_VERSION ---> Using cache ---> 3e14fe05a524 Step 4/4 : RUN pip3 install --no-cache-dir --verbose onnx || pip3 install --no-cache-dir --verbose git+https://github.com/onnx/onnx@${ONNX_VERSION} && pip3 show onnx && python3 -c 'import onnx; print(onnx.__version__)' ---> Using cache ---> 574674d7099c Successfully built 574674d7099c Successfully tagged text-generation-webui:r36.4.3-onnx -- Testing container text-generation-webui:r36.4.3-onnx (onnx/test.py) docker run -t --rm --runtime=nvidia --network=host \ --volume /home/nataraj/aidata/jetson-containers/packages/ml/onnx:/test \ --volume /home/nataraj/aidata/jetson-containers/data:/data \ --workdir /test \ text-generation-webui:r36.4.3-onnx \ /bin/bash -c 'python3 test.py' \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/test/text-generation-webui_r36.4.3-onnx_test.py.txt; exit ${PIPESTATUS[0]} testing onnx... onnx version: 1.17.0 onnx OK -- Building container text-generation-webui:r36.4.3-pytorch_2.5 DOCKER_BUILDKIT=0 docker build --network=host --tag text-generation-webui:r36.4.3-pytorch_2.5 \ --file /home/nataraj/aidata/jetson-containers/packages/pytorch/Dockerfile \ --build-arg BASE_IMAGE=text-generation-webui:r36.4.3-onnx \ --build-arg TORCH_CUDA_ARCH_ARGS="8.7" \ --build-arg TORCH_VERSION="2.5" \ --build-arg PYTORCH_BUILD_VERSION="2.5.0" \ --build-arg USE_NCCL="1" \ --build-arg USE_MPI="0" \ /home/nataraj/aidata/jetson-containers/packages/pytorch \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/build/text-generation-webui_r36.4.3-pytorch_2.5.txt; exit ${PIPESTATUS[0]} DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 141.3kB Step 1/6 : ARG BASE_IMAGE Step 2/6 : FROM ${BASE_IMAGE} ---> 574674d7099c Step 3/6 : ARG TORCH_CUDA_ARCH_ARGS TORCH_VERSION PYTORCH_BUILD_VERSION USE_NCCL=0 USE_GLOO=1 USE_MPI=1 USE_NNPACK=1 USE_XNNPACK=1 USE_PYTORCH_QNNPACK=1 FORCE_BUILD=off ---> Using cache ---> bbc1fcdc2bb1 Step 4/6 : ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_ARGS} TORCH_HOME=/data/models/torch ---> Using cache ---> 315b08ec40bb Step 5/6 : COPY install.sh build.sh /tmp/pytorch/ ---> Using cache ---> 36ce6d0d613a Step 6/6 : RUN /tmp/pytorch/install.sh || /tmp/pytorch/build.sh ---> Using cache ---> d2819d1344dc Successfully built d2819d1344dc Successfully tagged text-generation-webui:r36.4.3-pytorch_2.5 -- Testing container text-generation-webui:r36.4.3-pytorch_2.5 (pytorch:2.5/test.sh) docker run -t --rm --runtime=nvidia --network=host \ --volume /home/nataraj/aidata/jetson-containers/packages/pytorch:/test \ --volume /home/nataraj/aidata/jetson-containers/data:/data \ --workdir /test \ text-generation-webui:r36.4.3-pytorch_2.5 \ /bin/bash -c '/bin/bash test.sh' \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/test/text-generation-webui_r36.4.3-pytorch_2.5_test.sh.txt; exit ${PIPESTATUS[0]} /usr/lib/python3.10/runpy.py:126: RuntimeWarning: 'torch.utils.collect_env' found in sys.modules after import of package 'torch.utils', but prior to execution of 'torch.utils.collect_env'; this may result in unpredictable behaviour warn(RuntimeWarning(msg)) Collecting environment information... PyTorch version: 2.5.0 Is debug build: False CUDA used to build PyTorch: 12.6 ROCM used to build PyTorch: N/A OS: Ubuntu 22.04.5 LTS (aarch64) GCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 Clang version: Could not collect CMake version: version 3.31.4 Libc version: glibc-2.35 Python version: 3.10.12 (main, Jan 17 2025, 14:35:34) [GCC 11.4.0] (64-bit runtime) Python platform: Linux-5.15.148-tegra-aarch64-with-glibc2.35 Is CUDA available: True CUDA runtime version: 12.6.85 CUDA_MODULE_LOADING set to: LAZY GPU models and configuration: GPU 0: Orin (nvgpu) Nvidia driver version: 540.4.0 cuDNN version: Probably one of the following: /usr/lib/aarch64-linux-gnu/libcudnn.so.9.4.0 /usr/lib/aarch64-linux-gnu/libcudnn_adv.so.9.4.0 /usr/lib/aarch64-linux-gnu/libcudnn_cnn.so.9.4.0 /usr/lib/aarch64-linux-gnu/libcudnn_engines_precompiled.so.9.4.0 /usr/lib/aarch64-linux-gnu/libcudnn_engines_runtime_compiled.so.9.4.0 /usr/lib/aarch64-linux-gnu/libcudnn_graph.so.9.4.0 /usr/lib/aarch64-linux-gnu/libcudnn_heuristic.so.9.4.0 /usr/lib/aarch64-linux-gnu/libcudnn_ops.so.9.4.0 HIP runtime version: N/A MIOpen runtime version: N/A Is XNNPACK available: True CPU: Architecture: aarch64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 6 On-line CPU(s) list: 0-5 Vendor ID: ARM Model name: Cortex-A78AE Model: 1 Thread(s) per core: 1 Core(s) per cluster: 3 Socket(s): - Cluster(s): 2 Stepping: r0p1 CPU max MHz: 1728.0000 CPU min MHz: 115.2000 BogoMIPS: 62.50 Flags: fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop asimddp uscat ilrcpc flagm paca pacg L1d cache: 384 KiB (6 instances) L1i cache: 384 KiB (6 instances) L2 cache: 1.5 MiB (6 instances) L3 cache: 4 MiB (2 instances) NUMA node(s): 1 NUMA node0 CPU(s): 0-5 Vulnerability Gather data sampling: Not affected Vulnerability Itlb multihit: Not affected Vulnerability L1tf: Not affected Vulnerability Mds: Not affected Vulnerability Meltdown: Not affected Vulnerability Mmio stale data: Not affected Vulnerability Retbleed: Not affected Vulnerability Spec rstack overflow: Not affected Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl Vulnerability Spectre v1: Mitigation; __user pointer sanitization Vulnerability Spectre v2: Mitigation; CSV2, but not BHB Vulnerability Srbds: Not affected Vulnerability Tsx async abort: Not affected Versions of relevant libraries: [pip3] numpy==1.26.4 [pip3] onnx==1.17.0 [pip3] torch==2.5.0 [conda] Could not collect -- Testing container text-generation-webui:r36.4.3-pytorch_2.5 (pytorch:2.5/test.py) docker run -t --rm --runtime=nvidia --network=host \ --volume /home/nataraj/aidata/jetson-containers/packages/pytorch:/test \ --volume /home/nataraj/aidata/jetson-containers/data:/data \ --workdir /test \ text-generation-webui:r36.4.3-pytorch_2.5 \ /bin/bash -c 'python3 test.py' \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/test/text-generation-webui_r36.4.3-pytorch_2.5_test.py.txt; exit ${PIPESTATUS[0]} testing PyTorch... PyTorch version: 2.5.0 CUDA available: True cuDNN version: 90400 PyTorch built with: - GCC 11.4 - C++ Version: 201703 - OpenMP 201511 (a.k.a. OpenMP 4.5) - LAPACK is enabled (usually provided by MKL) - NNPACK is enabled - CPU capability usage: NO AVX - CUDA Runtime 12.6 - NVCC architecture flags: -gencode;arch=compute_87,code=sm_87 - CuDNN 90.4 - Build settings: BLAS_INFO=open, BUILD_TYPE=Release, CUDA_VERSION=12.6, CUDNN_VERSION=9.4.0, CXX_COMPILER=/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=open, TORCH_VERSION=2.5.0, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=OFF, USE_EIGEN_FOR_BLAS=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=1, USE_MKL=OFF, USE_MKLDNN=OFF, USE_MPI=0, USE_NCCL=1, USE_NNPACK=1, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, PyTorch 2.5.0 built with: * CUDA 12.6 (Orin) * cuDNN 90400 * CUDA BLAS _BlasBackend.Cublas * CUDA linalg _BlasBackend.Cublas * CUDA flash_attn True * CUDA flash_sdp True * CUDA cudnn_sdp True * CUDA math_sdp True * CUDA mem_efficient_sdp_enabled True * CUDA fp16_bf16_reduction_math_sdp False torch.distributed: True * NCCL backend: True * GLOO backend: True * MPI backend: False PACKAGING_VERSION=2.5.0 TORCH_CUDA_ARCH_LIST=8.7 /test/test.py:44: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at /opt/pytorch/torch/csrc/tensor/python_tensor.cpp:78.) a = torch.cuda.FloatTensor(2).zero_() Tensor a = tensor([0., 0.], device='cuda:0') Tensor b = tensor([-1.3115, -0.3192], device='cuda:0') Tensor c = tensor([-1.3115, -0.3192], device='cuda:0') testing LAPACK (OpenBLAS)... done testing LAPACK (OpenBLAS) testing torch.nn (cuDNN)... done testing torch.nn (cuDNN) testing CPU tensor vector operations... /test/test.py:83: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument. cpu_y = F.softmax(cpu_x) Tensor cpu_x = tensor([12.3450]) Tensor softmax = tensor([1.]) Tensor exp (float32) = tensor([[2.7183, 2.7183, 2.7183], [2.7183, 2.7183, 2.7183], [2.7183, 2.7183, 2.7183]]) Tensor exp (float64) = tensor([[2.7183, 2.7183, 2.7183], [2.7183, 2.7183, 2.7183], [2.7183, 2.7183, 2.7183]], dtype=torch.float64) Tensor exp (diff) = 7.429356050359104e-07 PyTorch OK -- Building container text-generation-webui:r36.4.3-torchvision DOCKER_BUILDKIT=0 docker build --network=host --tag text-generation-webui:r36.4.3-torchvision \ --file /home/nataraj/aidata/jetson-containers/packages/pytorch/torchvision/Dockerfile \ --build-arg BASE_IMAGE=text-generation-webui:r36.4.3-pytorch_2.5 \ --build-arg TORCHVISION_VERSION="0.20.0" \ /home/nataraj/aidata/jetson-containers/packages/pytorch/torchvision \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/build/text-generation-webui_r36.4.3-torchvision.txt; exit ${PIPESTATUS[0]} DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 34.3kB Step 1/6 : ARG BASE_IMAGE Step 2/6 : FROM ${BASE_IMAGE} ---> d2819d1344dc Step 3/6 : ARG TORCHVISION_VERSION FORCE_BUILD=off ---> Using cache ---> 2d1ddadcc5b0 Step 4/6 : COPY build.sh install.sh /tmp/torchvision/ ---> Using cache ---> d9b6f5a3bb3c Step 5/6 : RUN apt-get update && apt-get install -y --no-install-recommends libjpeg-dev libpng-dev zlib1g-dev && rm -rf /var/lib/apt/lists/* && apt-get clean ---> Using cache ---> 9d82caae820b Step 6/6 : RUN /tmp/torchvision/install.sh || /tmp/torchvision/build.sh ---> Using cache ---> ade6c648be32 Successfully built ade6c648be32 Successfully tagged text-generation-webui:r36.4.3-torchvision -- Testing container text-generation-webui:r36.4.3-torchvision (torchvision:0.20.0/test.py) docker run -t --rm --runtime=nvidia --network=host \ --volume /home/nataraj/aidata/jetson-containers/packages/pytorch/torchvision:/test \ --volume /home/nataraj/aidata/jetson-containers/data:/data \ --workdir /test \ text-generation-webui:r36.4.3-torchvision \ /bin/bash -c 'python3 test.py' \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/test/text-generation-webui_r36.4.3-torchvision_test.py.txt; exit ${PIPESTATUS[0]} testing torchvision... torchvision version: 0.20.0 testing torchvision extensions... torchvision classification models: alexnet | convnext_base | convnext_large | convnext_small | convnext_tiny | densenet121 | densenet161 | densenet169 | densenet201 | efficientnet_b0 | efficientnet_b1 | efficientnet_b2 | efficientnet_b3 | efficientnet_b4 | efficientnet_b5 | efficientnet_b6 | efficientnet_b7 | efficientnet_v2_l | efficientnet_v2_m | efficientnet_v2_s | get_model | get_model_builder | get_model_weights | get_weight | googlenet | inception_v3 | list_models | maxvit_t | mnasnet0_5 | mnasnet0_75 | mnasnet1_0 | mnasnet1_3 | mobilenet_v2 | mobilenet_v3_large | mobilenet_v3_small | regnet_x_16gf | regnet_x_1_6gf | regnet_x_32gf | regnet_x_3_2gf | regnet_x_400mf | regnet_x_800mf | regnet_x_8gf | regnet_y_128gf | regnet_y_16gf | regnet_y_1_6gf | regnet_y_32gf | regnet_y_3_2gf | regnet_y_400mf | regnet_y_800mf | regnet_y_8gf | resnet101 | resnet152 | resnet18 | resnet34 | resnet50 | resnext101_32x8d | resnext101_64x4d | resnext50_32x4d | shufflenet_v2_x0_5 | shufflenet_v2_x1_0 | shufflenet_v2_x1_5 | shufflenet_v2_x2_0 | squeezenet1_0 | squeezenet1_1 | swin_b | swin_s | swin_t | swin_v2_b | swin_v2_s | swin_v2_t | vgg11 | vgg11_bn | vgg13 | vgg13_bn | vgg16 | vgg16_bn | vgg19 | vgg19_bn | vit_b_16 | vit_b_32 | vit_h_14 | vit_l_16 | vit_l_32 | wide_resnet101_2 | wide_resnet50_2 Namespace(data_url='https://nvidia.box.com/shared/static/y1ygiahv8h75yiyh0pt50jqdqt7pohgx.gz', data_tar='ILSVRC2012_img_val_subset_5k.tar.gz', models=['resnet18'], resolution=224, workers=2, batch_size=8, print_freq=25, test_threshold=-10.0, use_cuda=True) using CUDA dataset classes: 1000 dataset images: 5000 batch size: 8 --------------------------------------------- -- resnet18 --------------------------------------------- loading model 'resnet18' /usr/local/lib/python3.10/dist-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. warnings.warn( /usr/local/lib/python3.10/dist-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights. warnings.warn(msg) loaded model 'resnet18' resnet18 [ 0/625] Time 0.578 ( 0.578) Acc@1 75.00 ( 75.00) Acc@5 100.00 (100.00) resnet18 [ 25/625] Time 0.011 ( 0.055) Acc@1 62.50 ( 79.81) Acc@5 87.50 ( 94.71) resnet18 [ 50/625] Time 0.059 ( 0.047) Acc@1 100.00 ( 73.53) Acc@5 100.00 ( 91.91) resnet18 [ 75/625] Time 0.010 ( 0.043) Acc@1 62.50 ( 76.48) Acc@5 87.50 ( 92.27) resnet18 [100/625] Time 0.056 ( 0.041) Acc@1 87.50 ( 78.22) Acc@5 100.00 ( 93.07) resnet18 [125/625] Time 0.010 ( 0.040) Acc@1 87.50 ( 77.38) Acc@5 100.00 ( 93.35) resnet18 [150/625] Time 0.050 ( 0.039) Acc@1 12.50 ( 76.49) Acc@5 100.00 ( 93.38) resnet18 [175/625] Time 0.010 ( 0.038) Acc@1 62.50 ( 76.42) Acc@5 100.00 ( 93.75) resnet18 [200/625] Time 0.009 ( 0.038) Acc@1 100.00 ( 76.55) Acc@5 100.00 ( 93.72) resnet18 [225/625] Time 0.023 ( 0.038) Acc@1 87.50 ( 76.77) Acc@5 100.00 ( 93.58) resnet18 [250/625] Time 0.051 ( 0.038) Acc@1 37.50 ( 76.64) Acc@5 62.50 ( 93.63) resnet18 [275/625] Time 0.009 ( 0.038) Acc@1 87.50 ( 75.72) Acc@5 100.00 ( 93.25) resnet18 [300/625] Time 0.059 ( 0.038) Acc@1 50.00 ( 74.38) Acc@5 87.50 ( 92.07) resnet18 [325/625] Time 0.026 ( 0.038) Acc@1 75.00 ( 73.27) Acc@5 100.00 ( 91.53) resnet18 [350/625] Time 0.061 ( 0.038) Acc@1 62.50 ( 72.69) Acc@5 87.50 ( 91.10) resnet18 [375/625] Time 0.011 ( 0.038) Acc@1 25.00 ( 72.64) Acc@5 62.50 ( 90.89) resnet18 [400/625] Time 0.058 ( 0.038) Acc@1 75.00 ( 71.95) Acc@5 87.50 ( 90.27) resnet18 [425/625] Time 0.010 ( 0.037) Acc@1 62.50 ( 71.33) Acc@5 87.50 ( 89.91) resnet18 [450/625] Time 0.064 ( 0.037) Acc@1 75.00 ( 71.26) Acc@5 87.50 ( 89.99) resnet18 [475/625] Time 0.009 ( 0.037) Acc@1 37.50 ( 70.75) Acc@5 75.00 ( 89.63) resnet18 [500/625] Time 0.009 ( 0.037) Acc@1 75.00 ( 70.33) Acc@5 87.50 ( 89.25) resnet18 [525/625] Time 0.010 ( 0.037) Acc@1 62.50 ( 69.94) Acc@5 87.50 ( 89.02) resnet18 [550/625] Time 0.051 ( 0.037) Acc@1 100.00 ( 69.67) Acc@5 100.00 ( 88.77) resnet18 [575/625] Time 0.063 ( 0.037) Acc@1 75.00 ( 69.31) Acc@5 87.50 ( 88.54) resnet18 [600/625] Time 0.010 ( 0.037) Acc@1 37.50 ( 69.68) Acc@5 87.50 ( 88.71) resnet18 * Acc@1 69.740 Expected 69.760 Delta -0.020 * Acc@5 88.760 Expected 89.080 Delta -0.320 * Images/sec 217.796 * PASS --------------------------------------------- -- Summary --------------------------------------------- resnet18 * Acc@1 69.740 Expected 69.760 Delta -0.020 * Acc@5 88.760 Expected 89.080 Delta -0.320 * Images/sec 217.796 * PASS Model tests passing: 1 / 1 torchvision OK -- Building container text-generation-webui:r36.4.3-huggingface_hub DOCKER_BUILDKIT=0 docker build --network=host --tag text-generation-webui:r36.4.3-huggingface_hub \ --file /home/nataraj/aidata/jetson-containers/packages/llm/huggingface_hub/Dockerfile \ --build-arg BASE_IMAGE=text-generation-webui:r36.4.3-torchvision \ /home/nataraj/aidata/jetson-containers/packages/llm/huggingface_hub \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/build/text-generation-webui_r36.4.3-huggingface_hub.txt; exit ${PIPESTATUS[0]} DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 16.9kB Step 1/6 : ARG BASE_IMAGE Step 2/6 : FROM ${BASE_IMAGE} ---> ade6c648be32 Step 3/6 : ENV TRANSFORMERS_CACHE=/data/models/huggingface HUGGINGFACE_HUB_CACHE=/data/models/huggingface HF_HOME=/data/models/huggingface ---> Using cache ---> 892f10ea016b Step 4/6 : COPY huggingface-downloader /usr/local/bin/ ---> Using cache ---> 0e92b04ff751 Step 5/6 : COPY huggingface-downloader.py /usr/local/bin/_huggingface-downloader.py ---> Using cache ---> cea400a57620 Step 6/6 : RUN set -ex && pip3 install --no-cache-dir --verbose huggingface_hub[cli] dataclasses && huggingface-cli --help && huggingface-downloader --help && pip3 show huggingface_hub && python3 -c 'import huggingface_hub; print(huggingface_hub.__version__)' && apt-get update && rm -rf /var/lib/apt/lists/* && apt-get clean ---> Using cache ---> 1a7940238054 Successfully built 1a7940238054 Successfully tagged text-generation-webui:r36.4.3-huggingface_hub -- Testing container text-generation-webui:r36.4.3-huggingface_hub (huggingface_hub/test.py) docker run -t --rm --runtime=nvidia --network=host \ --volume /home/nataraj/aidata/jetson-containers/packages/llm/huggingface_hub:/test \ --volume /home/nataraj/aidata/jetson-containers/data:/data \ --workdir /test \ text-generation-webui:r36.4.3-huggingface_hub \ /bin/bash -c 'python3 test.py' \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/test/text-generation-webui_r36.4.3-huggingface_hub_test.py.txt; exit ${PIPESTATUS[0]} testing huggingface_hub... huggingface_hub version: 0.28.1 huggingface_hub OK -- Building container text-generation-webui:r36.4.3-rust DOCKER_BUILDKIT=0 docker build --network=host --tag text-generation-webui:r36.4.3-rust \ --file /home/nataraj/aidata/jetson-containers/packages/build/rust/Dockerfile \ --build-arg BASE_IMAGE=text-generation-webui:r36.4.3-huggingface_hub \ /home/nataraj/aidata/jetson-containers/packages/build/rust \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/build/text-generation-webui_r36.4.3-rust.txt; exit ${PIPESTATUS[0]} DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 9.216kB Step 1/5 : ARG BASE_IMAGE Step 2/5 : FROM ${BASE_IMAGE} ---> 1a7940238054 Step 3/5 : RUN curl https://sh.rustup.rs -sSf | sh -s -- -y ---> Using cache ---> 3c572456e60a Step 4/5 : ENV PATH="/root/.cargo/bin:${PATH}" ---> Using cache ---> 3c7bbb7bb78b Step 5/5 : RUN rustc --version && pip3 install --no-cache-dir --verbose setuptools-rust ---> Using cache ---> a8293aeca21d Successfully built a8293aeca21d Successfully tagged text-generation-webui:r36.4.3-rust -- Building container text-generation-webui:r36.4.3-transformers DOCKER_BUILDKIT=0 docker build --network=host --tag text-generation-webui:r36.4.3-transformers \ --file /home/nataraj/aidata/jetson-containers/packages/llm/transformers/Dockerfile \ --build-arg BASE_IMAGE=text-generation-webui:r36.4.3-rust \ --build-arg TRANSFORMERS_PACKAGE="transformers==4.48.3" \ --build-arg TRANSFORMERS_VERSION="4.48.3" \ /home/nataraj/aidata/jetson-containers/packages/llm/transformers \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/build/text-generation-webui_r36.4.3-transformers.txt; exit ${PIPESTATUS[0]} DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 35.84kB Step 1/6 : ARG BASE_IMAGE Step 2/6 : FROM ${BASE_IMAGE} ---> a8293aeca21d Step 3/6 : ARG TRANSFORMERS_PACKAGE=transformers TRANSFORMERS_VERSION ---> Using cache ---> 5bc292403bae Step 4/6 : RUN pip3 install --no-cache-dir --verbose accelerate && pip3 install --no-cache-dir --verbose sentencepiece && pip3 install --no-cache-dir --verbose optimum && pip3 uninstall -y transformers && echo "Installing tranformers $TRANSFORMERS_VERSION (from $TRANSFORMERS_PACKAGE)" && pip3 install --no-cache-dir --verbose ${TRANSFORMERS_PACKAGE} && PYTHON_ROOT=`pip3 show transformers | grep Location: | cut -d' ' -f2` && sed -i -e 's|torch.distributed.is_initialized|torch.distributed.is_available|g' ${PYTHON_ROOT}/transformers/modeling_utils.py ---> Using cache ---> d40d08a49e9f Step 5/6 : COPY huggingface-benchmark.py /usr/local/bin ---> Using cache ---> aa889112a1b1 Step 6/6 : RUN pip3 show transformers && python3 -c 'import transformers; print(transformers.__version__)' ---> Using cache ---> 04d5e9dcdfd0 Successfully built 04d5e9dcdfd0 Successfully tagged text-generation-webui:r36.4.3-transformers -- Testing container text-generation-webui:r36.4.3-transformers (transformers:4.48.3/test_version.py) docker run -t --rm --runtime=nvidia --network=host \ --volume /home/nataraj/aidata/jetson-containers/packages/llm/transformers:/test \ --volume /home/nataraj/aidata/jetson-containers/data:/data \ --workdir /test \ text-generation-webui:r36.4.3-transformers \ /bin/bash -c 'python3 test_version.py' \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/test/text-generation-webui_r36.4.3-transformers_test_version.py.txt; exit ${PIPESTATUS[0]} /usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py:128: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. warnings.warn( transformers version: 4.48.3 -- Testing container text-generation-webui:r36.4.3-transformers (transformers:4.48.3/huggingface-benchmark.py) docker run -t --rm --runtime=nvidia --network=host \ --volume /home/nataraj/aidata/jetson-containers/packages/llm/transformers:/test \ --volume /home/nataraj/aidata/jetson-containers/data:/data \ --workdir /test \ text-generation-webui:r36.4.3-transformers \ /bin/bash -c 'python3 huggingface-benchmark.py' \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/test/text-generation-webui_r36.4.3-transformers_huggingface-benchmark.py.txt; exit ${PIPESTATUS[0]} /usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py:128: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. warnings.warn( Namespace(model='distilgpt2', prompt='Once upon a time,', precision=None, tokens=[128], token='', runs=2, warmup=2, save='') Running on device cuda Input tokens: tensor([[7454, 2402, 257, 640, 11]], device='cuda:0') shape: torch.Size([1, 5]) Loading model distilgpt2 (None) Generating 128 tokens with distilgpt2 on prompt: Once upon a time, The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation. The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. Once upon a time, the world was a little more like a place where you could go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and WARMUP 0 = 2.4295 seconds, 52.7 tokens/sec (None) The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation. Once upon a time, the world was a little more like a place where you could go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and WARMUP 1 = 1.9503 seconds, 65.6 tokens/sec (None) The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation. Once upon a time, the world was a little more like a place where you could go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and RUN 2 = 1.9586 seconds, 65.4 tokens/sec (None) The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation. Once upon a time, the world was a little more like a place where you could go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and go and RUN 3 = 1.9617 seconds, 65.2 tokens/sec (None) AVG = 1.9602 seconds, 65.3 tokens/sec memory=948.27 MB (--model=distilgpt2 --precision=None --tokens=128) -- Building container text-generation-webui:r36.4.3-auto_gptq DOCKER_BUILDKIT=0 docker build --network=host --tag text-generation-webui:r36.4.3-auto_gptq \ --file /home/nataraj/aidata/jetson-containers/packages/llm/auto_gptq/Dockerfile \ --build-arg BASE_IMAGE=text-generation-webui:r36.4.3-transformers \ --build-arg AUTOGPTQ_VERSION="0.8.0" \ --build-arg AUTOGPTQ_BRANCH="0.8.0" \ /home/nataraj/aidata/jetson-containers/packages/llm/auto_gptq \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/build/text-generation-webui_r36.4.3-auto_gptq.txt; exit ${PIPESTATUS[0]} DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 16.38kB Step 1/5 : ARG BASE_IMAGE Step 2/5 : FROM ${BASE_IMAGE} ---> 04d5e9dcdfd0 Step 3/5 : ARG AUTOGPTQ_VERSION AUTOGPTQ_BRANCH FORCE_BUILD="off" ---> Using cache ---> 824e64c0c53a Step 4/5 : COPY build.sh /tmp/build_auto_gptq.sh ---> Using cache ---> f9d73063e482 Step 5/5 : RUN pip3 install --no-cache-dir gekko && pip3 install --no-cache-dir --verbose auto-gptq==${AUTOGPTQ_VERSION} || /tmp/build_auto_gptq.sh ---> Using cache ---> 78cc3377302e Successfully built 78cc3377302e Successfully tagged text-generation-webui:r36.4.3-auto_gptq -- Testing container text-generation-webui:r36.4.3-auto_gptq (auto_gptq:0.8.0/test.py) docker run -t --rm --runtime=nvidia --network=host \ --volume /home/nataraj/aidata/jetson-containers/packages/llm/auto_gptq:/test \ --volume /home/nataraj/aidata/jetson-containers/data:/data \ --workdir /test \ text-generation-webui:r36.4.3-auto_gptq \ /bin/bash -c 'python3 test.py' \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/test/text-generation-webui_r36.4.3-auto_gptq_test.py.txt; exit ${PIPESTATUS[0]} testing AutoGPTQ... /usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py:128: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. warnings.warn( WARNING - AutoGPTQ has stopped development. Please transition to GPTQModel: https://github.com/ModelCoud/GPTQModel GPTQModel has been merged into Transformers/Optimum and full deprecation of AutoGPTQ within HF frameworks is planned in the near-future. AutoGPTQ OK -- Building container text-generation-webui:r36.4.3-flash-attention DOCKER_BUILDKIT=0 docker build --network=host --tag text-generation-webui:r36.4.3-flash-attention \ --file /home/nataraj/aidata/jetson-containers/packages/llm/flash-attention/Dockerfile \ --build-arg BASE_IMAGE=text-generation-webui:r36.4.3-auto_gptq \ --build-arg FLASH_ATTENTION_VERSION="2.7.2.post1" \ /home/nataraj/aidata/jetson-containers/packages/llm/flash-attention \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/build/text-generation-webui_r36.4.3-flash-attention.txt; exit ${PIPESTATUS[0]} DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 34.3kB Step 1/6 : ARG BASE_IMAGE Step 2/6 : FROM ${BASE_IMAGE} ---> 78cc3377302e Step 3/6 : ARG FLASH_ATTENTION_VERSION FORCE_BUILD=off ---> Using cache ---> a5d1c81f6cb8 Step 4/6 : COPY build.sh install.sh /tmp/flash-attention/ ---> Using cache ---> 3a42a8c1870c Step 5/6 : COPY patches/${FLASH_ATTENTION_VERSION}.diff /tmp/flash-attention/patch.diff ---> Using cache ---> c02b8801af67 Step 6/6 : RUN /tmp/flash-attention/install.sh || /tmp/flash-attention/build.sh ---> Using cache ---> a1d4c6aa0e57 Successfully built a1d4c6aa0e57 Successfully tagged text-generation-webui:r36.4.3-flash-attention -- Testing container text-generation-webui:r36.4.3-flash-attention (flash-attention:2.7.2.post1/test.py) docker run -t --rm --runtime=nvidia --network=host \ --volume /home/nataraj/aidata/jetson-containers/packages/llm/flash-attention:/test \ --volume /home/nataraj/aidata/jetson-containers/data:/data \ --workdir /test \ text-generation-webui:r36.4.3-flash-attention \ /bin/bash -c 'python3 test.py' \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/test/text-generation-webui_r36.4.3-flash-attention_test.py.txt; exit ${PIPESTATUS[0]} FlashAttention version 2.7.2.post1 -- Building container text-generation-webui:r36.4.3-exllama DOCKER_BUILDKIT=0 docker build --network=host --tag text-generation-webui:r36.4.3-exllama \ --file /home/nataraj/aidata/jetson-containers/packages/llm/exllama/Dockerfile \ --build-arg BASE_IMAGE=text-generation-webui:r36.4.3-flash-attention \ --build-arg EXLLAMA_VERSION="0.2.7" \ --build-arg EXLLAMA_BRANCH="0.2.7" \ --build-arg FORCE_BUILD="off" \ /home/nataraj/aidata/jetson-containers/packages/llm/exllama \ 2>&1 | tee /home/nataraj/aidata/jetson-containers/logs/20250210_030831/build/text-generation-webui_r36.4.3-exllama.txt; exit ${PIPESTATUS[0]} DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 19.46kB Step 1/5 : ARG BASE_IMAGE Step 2/5 : FROM ${BASE_IMAGE} ---> a1d4c6aa0e57 Step 3/5 : ARG EXLLAMA_VERSION EXLLAMA_BRANCH FORCE_BUILD=off ---> Using cache ---> 400ea6b78243 Step 4/5 : COPY install.sh build.sh /tmp/exllama/ ---> Using cache ---> a53b53f5d65c Step 5/5 : RUN /tmp/exllama/install.sh || /tmp/exllama/build.sh ---> Running in b647b6d7feca + git clone --branch=v0.2.7 --depth=1 --recursive https://github.com/turboderp/exllamav2 /opt/exllamav2 Cloning into '/opt/exllamav2'... Note: switching to 'ae241a9af586f0ebd6bb418aa0e2416b81bf4215'. You are in 'detached HEAD' state. You can look around, make experimental changes and commit them, and you can discard any commits you make in this state without impacting any branches by switching back to a branch. If you want to create a new branch to retain commits you create, you may do so (now or later) by using -c with the switch command. Example: git switch -c Or undo this operation with: git switch - Turn off this advice by setting config variable advice.detachedHead to false + '[' off == on ']' + pip3 install --no-cache-dir --verbose exllamav2==0.2.7 Using pip 25.0.1 from /usr/local/lib/python3.10/dist-packages/pip (python 3.10) Looking in indexes: https://pypi.jetson-ai-lab.dev/jp6/cu126 Collecting exllamav2==0.2.7 Downloading https://pypi.jetson-ai-lab.dev/root/pypi/%2Bf/410/d1a69fed329f6/exllamav2-0.2.7-py3-none-any.whl (1.5 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.5/1.5 MB 49.9 MB/s eta 0:00:00 Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from exllamav2==0.2.7) (2.2.3) Requirement already satisfied: ninja in /usr/local/lib/python3.10/dist-packages (from exllamav2==0.2.7) (1.11.1.3) Collecting fastparquet (from exllamav2==0.2.7) Downloading https://pypi.jetson-ai-lab.dev/root/pypi/%2Bf/0b7/4333914f45434/fastparquet-2024.11.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (1.7 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.7/1.7 MB 117.9 MB/s eta 0:00:00 Requirement already satisfied: torch>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from exllamav2==0.2.7) (2.5.0) Requirement already satisfied: safetensors>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from exllamav2==0.2.7) (0.5.2) Requirement already satisfied: sentencepiece>=0.1.97 in /usr/local/lib/python3.10/dist-packages (from exllamav2==0.2.7) (0.2.0) Requirement already satisfied: pygments in /usr/local/lib/python3.10/dist-packages (from exllamav2==0.2.7) (2.19.1) Collecting websockets (from exllamav2==0.2.7) Downloading https://pypi.jetson-ai-lab.dev/root/pypi/%2Bf/f2e/53c72052f2596/websockets-14.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (169 kB) Requirement already satisfied: regex in /usr/local/lib/python3.10/dist-packages (from exllamav2==0.2.7) (2024.11.6) Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from exllamav2==0.2.7) (1.26.4) Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from exllamav2==0.2.7) (13.9.4) Requirement already satisfied: pillow>=9.1.0 in /usr/local/lib/python3.10/dist-packages (from exllamav2==0.2.7) (11.1.0) Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=2.2.0->exllamav2==0.2.7) (3.17.0) Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch>=2.2.0->exllamav2==0.2.7) (4.12.2) Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=2.2.0->exllamav2==0.2.7) (3.4.2) Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=2.2.0->exllamav2==0.2.7) (3.1.5) Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=2.2.0->exllamav2==0.2.7) (2024.9.0) Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch>=2.2.0->exllamav2==0.2.7) (1.13.1) Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch>=2.2.0->exllamav2==0.2.7) (1.3.0) Collecting cramjam>=2.3 (from fastparquet->exllamav2==0.2.7) Downloading https://pypi.jetson-ai-lab.dev/root/pypi/%2Bf/8e0/c5d98a4e791f0/cramjam-2.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (2.1 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.1/2.1 MB 114.8 MB/s eta 0:00:00 Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from fastparquet->exllamav2==0.2.7) (24.2) Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->exllamav2==0.2.7) (2.9.0.post0) Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->exllamav2==0.2.7) (2025.1) Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->exllamav2==0.2.7) (2025.1) Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->exllamav2==0.2.7) (3.0.0) Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->exllamav2==0.2.7) (0.1.2) Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.2->pandas->exllamav2==0.2.7) (1.16.0) Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=2.2.0->exllamav2==0.2.7) (3.0.2) Installing collected packages: websockets, cramjam, fastparquet, exllamav2 Successfully installed cramjam-2.9.1 exllamav2-0.2.7 fastparquet-2024.11.0 websockets-14.2 + python3 -c 'import exllamav2; print(exllamav2.__version__);' Loading exllamav2_ext extension (JIT)...