Print debug information about the CUDA devices and library installations.
Source code in darts/src/darts/utils/cuda.py
| def debug_info():
"""Print debug information about the CUDA devices and library installations."""
import os
import torch
from xrspatial.utils import has_cuda_and_cupy
logger.debug("=== CUDA DEBUG INFO ===")
logger.debug(f"PyTorch version: {torch.__version__}")
logger.debug(f"PyTorch CUDA available: {torch.cuda.is_available()}")
logger.debug(f"Cupy+Numba CUDA available: {has_cuda_and_cupy()}")
logger.debug(f"LD_LIBRARY_PATH: {os.environ.get('LD_LIBRARY_PATH')}")
try:
from pynvml import ( # type: ignore
nvmlDeviceGetCount,
nvmlDeviceGetHandleByIndex,
nvmlDeviceGetMemoryInfo,
nvmlDeviceGetName,
nvmlInit,
nvmlShutdown,
nvmlSystemGetCudaDriverVersion_v2,
nvmlSystemGetDriverVersion,
)
nvmlInit()
driver_version = nvmlSystemGetDriverVersion().decode()
logger.debug(f"CUDA driver version: {driver_version}")
cuda_driver_version = nvmlSystemGetCudaDriverVersion_v2()
logger.debug(f"CUDA runtime version: {cuda_driver_version}")
ndevices = nvmlDeviceGetCount()
logger.debug(f"Number of CUDA devices: {ndevices}")
for i in range(ndevices):
handle = nvmlDeviceGetHandleByIndex(i)
device_name = nvmlDeviceGetName(handle).decode()
meminfo = nvmlDeviceGetMemoryInfo(handle)
logger.debug(f"Device {i} ({device_name}): {meminfo.used / meminfo.total:.2%} memory usage.")
nvmlShutdown()
except ImportError:
logger.debug("Module 'pynvml' not found, darts is probably installed without CUDA support.")
try:
import cupy # type: ignore
logger.debug(f"Cupy version: {cupy.__version__}")
# This is the version which is installed (dynamically linked via PATH or LD_LIBRARY_PATH) in the environment
env_runtime_version = cupy.cuda.get_local_runtime_version()
# This is the version which is used by cupy (statically linked)
cupy_runtime_version = cupy.cuda.runtime.runtimeGetVersion()
if env_runtime_version != cupy_runtime_version:
logger.warning(
"Cupy CUDA runtime versions don't match!\n"
f"Got {env_runtime_version} as local (dynamically linked) runtime version.\n"
f"Got {cupy_runtime_version} as by cupy statically linked runtime version.\n"
"Cupy will use the statically linked runtime version!"
)
else:
logger.debug(f"Cupy CUDA runtime version: {cupy_runtime_version}")
logger.debug(f"Cupy CUDA driver version: {cupy.cuda.runtime.driverGetVersion()}")
except ImportError:
logger.debug("Module 'cupy' not found, darts is probably installed without CUDA support.")
try:
import numba.cuda
cuda_available = numba.cuda.is_available()
logger.debug(f"Numba CUDA is available: {cuda_available}")
if cuda_available:
logger.debug(f"Numba CUDA runtime: {numba.cuda.runtime.get_version()}")
# logger.debug(f"Numba CUDA has supported devices: {numba.cuda.detect()}")
except ImportError:
logger.debug("Module 'numba.cuda' not found, darts is probably installed without CUDA support.")
try:
import cucim # type: ignore
logger.debug(f"Cucim version: {cucim.__version__}")
except ImportError:
logger.debug("Module 'cucim' not found, darts is probably installed without CUDA support.")
|