spikingjelly.activation_based.cuda_utils 源代码

import logging
import torch
import time
import numpy as np
from .. import configure
from typing import Callable
try:
    import cupy
except BaseException as e:
    logging.info(f'spikingjelly.activation_based.cuda_utils: {e}')
    cupy = None

[文档]def cpu_timer(f: Callable, *args, **kwargs): """ * :ref:`API in English <cpu_timer-en>` .. _cpu_timer-cn: 计算在CPU上执行 ``f(*args, **kwargs)`` 所需的时间 :param f: 函数 :type f: Callable :return: 用时,单位是毫秒 :rtype: float * :ref:`中文 API <cpu_timer-cn>` .. _cpu_timer-en: Returns the used time for calling ``f(*args, **kwargs)`` in CPU :param f: a function :type f: Callable :return: used time in milliseconds :rtype: float """ start = time.perf_counter() f(*args, **kwargs) return time.perf_counter() - start
[文档]def cuda_timer(device: torch.device or int, f: Callable, *args, **kwargs): """ * :ref:`API in English <cuda_timer-en>` .. _cuda_timer-cn: 计算在CUDA上执行 ``f(*args, **kwargs)`` 所需的时间 :param device: ``f`` 运行的CUDA设备 :type device: torch.device or int :param f: 函数 :type f: Callable :return: 用时,单位是毫秒 :rtype: float * :ref:`中文 API <cuda_timer-cn>` .. _cuda_timer-en: Returns the used time for calling ``f(*args, **kwargs)`` in CUDA :param device: on which cuda device that ``f`` is running :type device: torch.device or int :param f: a function :type f: Callable :return: used time in milliseconds :rtype: float """ torch.cuda.set_device(device) start = torch.cuda.Event(enable_timing=True) end = torch.cuda.Event(enable_timing=True) start.record() f(*args, **kwargs) end.record() torch.cuda.synchronize(device) return start.elapsed_time(end)
[文档]def cal_fun_t(n: int, device: str or torch.device or int, f: Callable, *args, **kwargs): """ * :ref:`API in English <cal_fun_t-en>` .. _cal_fun_t-cn: 测量在 ``device`` 上执行 ``n`` 次 ``f(*args, **kwargs)`` 的平均用时 .. note:: 当 ``n > 1`` 时,实际上会执行 ``2n`` 次,然后返回后 ``n`` 次的平均用时,以减小误差。 :param n: 重复的次数 :type n: int :param device: ``f`` 执行的设备,可以为 'cpu' 或CUDA设备 :type device: str or torch.device or int :param f: 函数 :type f: Callable :return: 用时,单位是毫秒 :rtype: float * :ref:`中文 API <cal_fun_t-cn>` .. _cal_fun_t-en: Returns the used time averaged by calling ``f(*args, **kwargs)`` over ``n`` times .. admonition:: Note :class: note If ``n > 1``, this function will call ``f`` for ``2n`` times and return the average used time by the last ``n`` times to reduce the measure error. :param n: repeat times :type n: int :param device: on which cuda device that ``f`` is running. It can be 'cpu' or a cuda deivce :type device: str or torch.device or int :param f: function :type f: Callable :return: used time in milliseconds :rtype: float """ if n == 1: if device == 'cpu': return cpu_timer(f, *args, **kwargs) else: return cuda_timer(device, f, *args, **kwargs) # warm up if device == 'cpu': cpu_timer(f, *args, **kwargs) else: cuda_timer(device, f, *args, **kwargs) t_list = [] for _ in range(n * 2): if device == 'cpu': ti = cpu_timer(f, *args, **kwargs) else: ti = cuda_timer(device, f, *args, **kwargs) t_list.append(ti) t_list = np.asarray(t_list) return t_list[n:].mean()
[文档]def cal_blocks(numel: int, threads: int = -1): """ * :ref:`API in English <cal_blocks-en>` .. _cal_blocks-cn: :param numel: 并行执行的CUDA内核的数量 :type numel: int :param threads: 每个cuda block中threads的数量,默认为-1,表示使用 ``configure.cuda_threads`` :type threads: int :return: blocks的数量 :rtype: int 此函数返回 blocks的数量,用来按照 ``kernel((blocks,), (configure.cuda_threads,), ...)`` 调用 :class:`cupy.RawKernel` * :ref:`中文 API <cal_blocks-cn>` .. _cal_blocks-en: :param numel: the number of parallel CUDA kernels :type numel: int :param threads: the number of threads in each cuda block. The defaule value is -1, indicating to use ``configure.cuda_threads`` :type threads: int :return: the number of blocks :rtype: int Returns the number of blocks to call :class:`cupy.RawKernel` by ``kernel((blocks,), (threads,), ...)`` """ if threads == -1: threads = configure.cuda_threads return (numel + threads - 1) // threads
[文档]def get_contiguous(*args): """ * :ref:`API in English <get_contiguous-en>` .. _get_contiguous-cn: 将 ``*args`` 中所有的 ``torch.Tensor`` 或 ``cupy.ndarray`` 进行连续化。 .. note:: 连续化的操作无法in-place,因此本函数返回一个新的list。 :return: 一个元素全部为连续的 ``torch.Tensor`` 或 ``cupy.ndarray`` 的 ``list`` :rtype: list * :ref:`中文 API <get_contiguous-cn>` .. _get_contiguous-en: :return: a list that contains the contiguous ``torch.Tensor`` or ``cupy.ndarray`` :rtype: list Makes ``torch.Tensor`` or ``cupy.ndarray`` in ``*args`` to be contiguous .. admonition:: Note :class: note The making contiguous operation can not be done in-place. Hence, this function will return a new list. """ ret_list = [] for item in args: if isinstance(item, torch.Tensor): ret_list.append(item.contiguous()) elif isinstance(item, cupy.ndarray): ret_list.append(cupy.ascontiguousarray(item)) else: raise TypeError(type(item)) return ret_list
[文档]def wrap_args_to_raw_kernel(device: int, *args): """ * :ref:`API in English <wrap_args_to_raw_kernel-en>` .. _wrap_args_to_raw_kernel-cn: :param device: raw kernel运行的CUDA设备 :type device: int :return: 一个包含用来调用 :class:`cupy.RawKernel` 的 ``tuple`` :rtype: tuple 此函数可以包装 ``torch.Tensor`` 和 ``cupy.ndarray`` 并将其作为 :class:`cupy.RawKernel.__call__` 的 ``args`` * :ref:`中文 API <wrap_args_to_raw_kernel-cn>` .. _wrap_args_to_raw_kernel-en: :param device: on which CUDA device the raw kernel will run :type device: int :return: a ``tuple`` that contains args to call :class:`cupy.RawKernel` :rtype: tuple This function can wrap ``torch.Tensor`` or ``cupy.ndarray`` to ``args`` in :class:`cupy.RawKernel.__call__` """ # note that the input must be contiguous # check device and get data_ptr from tensor ret_list = [] for item in args: if isinstance(item, torch.Tensor): assert item.get_device() == device assert item.is_contiguous() ret_list.append(item.data_ptr()) elif isinstance(item, cupy.ndarray): assert item.device.id == device assert item.flags['C_CONTIGUOUS'] ret_list.append(item) else: raise TypeError return tuple(ret_list)
[文档]class DeviceEnvironment: def __init__(self, device: int): """ * :ref:`API in English <DeviceEnvironment.__init__-en>` .. _DeviceEnvironment.__init__-cn: 这个模块可以被用作在指定的 ``device`` 上执行CuPy函数的上下文,用来避免 `torch.cuda.current_device()` 被CuPy意外改变( https://github.com/cupy/cupy/issues/6569 )。 代码示例: .. code-block:: python with DeviceEnvironment(device): kernel((blocks,), (configure.cuda_threads,), ...) * :ref:`中文 API <DeviceEnvironment.__init__-cn>` .. _DeviceEnvironment.__init__-en: :param device: the CUDA device :type device: int This module is used as a context to make CuPy use the specific device, and avoids `torch.cuda.current_device()` is changed by CuPy ( https://github.com/cupy/cupy/issues/6569 ). Codes example: .. code-block:: python with DeviceEnvironment(device): kernel((blocks,), (configure.cuda_threads,), ...) """ self.device = device self.previous_device = None def __enter__(self): current_device = torch.cuda.current_device() if current_device != self.device: torch.cuda.set_device(self.device) self.previous_device = current_device def __exit__(self, exc_type, exc_val, exc_tb): if self.previous_device is not None: torch.cuda.set_device(self.previous_device)