NTT
Example
use icicle_bn254::curve::{ScalarCfg, ScalarField};
use icicle_core::{ntt::{self, NTT}, traits::GenerateRandom};
use icicle_cuda_runtime::{device_context::DeviceContext, memory::HostOrDeviceSlice, stream::CudaStream};
fn main() {
let size = 1 << 12; // Define the size of your input, e.g., 2^10
let icicle_omega = <Bn254Fr as FftField>::get_root_of_unity(
size.try_into()
.unwrap(),
)
// Generate random inputs
println!("Generating random inputs...");
let scalars = HostOrDeviceSlice::Host(ScalarCfg::generate_random(size));
// Allocate memory on CUDA device for NTT results
let mut ntt_results: HostOrDeviceSlice<'_, ScalarField> = HostOrDeviceSlice::cuda_malloc(size).expect("Failed to allocate CUDA memory");
// Create a CUDA stream
let stream = CudaStream::create().expect("Failed to create CUDA stream");
let ctx = DeviceContext::default(); // Assuming default device context
ScalarCfg::initialize_domain(ScalarField::from_ark(icicle_omega), &ctx, true).unwrap();
// Configure NTT
let mut cfg = ntt::NTTConfig::default();
cfg.ctx.stream = &stream;
cfg.is_async = true; // Set to true for asynchronous execution
// Execute NTT on device
println!("Executing NTT on device...");
ntt::ntt(&scalars, ntt::NTTDir::kForward, &cfg, &mut ntt_results).expect("Failed to execute NTT");
// Synchronize CUDA stream to ensure completion
stream.synchronize().expect("Failed to synchronize CUDA stream");
// Optionally, move results to host for further processing or verification
println!("NTT execution complete.");
}
NTT API overview
pub fn ntt<F>(
input: &HostOrDeviceSlice<F>,
dir: NTTDir,
cfg: &NTTConfig<F>,
output: &mut HostOrDeviceSlice<F>,
) -> IcicleResult<()>
ntt:ntt
expects:
input
- buffer to read the inputs of the NTT from.dir
- whether to compute forward or inverse NTT.cfg
- config used to specify extra arguments of the NTT.output
- buffer to write the NTT outputs into. Must be of the same size as input.
The input
and output
buffers can be on device or on host. Being on host means that they will be transferred to device during runtime.
NTT Config
pub struct NTTConfig<'a, S> {
pub ctx: DeviceContext<'a>,
pub coset_gen: S,
pub batch_size: i32,
pub columns_batch: bool,
pub ordering: Ordering,
are_inputs_on_device: bool,
are_outputs_on_device: bool,
pub is_async: bool,
pub ntt_algorithm: NttAlgorithm,
}
The NTTConfig
struct is a configuration object used to specify parameters for an NTT instance.
Fields
-
ctx: DeviceContext<'a>
: Specifies the device context, including the device ID and the stream ID. -
coset_gen: S
: Defines the coset generator used for coset (i)NTTs. By default, this is set toS::one()
, indicating that no coset is being used. -
batch_size: i32
: Determines the number of NTTs to compute in a single batch. The default value is 1, meaning that operations are performed on individual inputs without batching. Batch processing can significantly improve performance by leveraging parallelism in GPU computations. -
columns_batch
: If true the function will compute the NTTs over the columns of the input matrix and not over the rows. Defaults tofalse
. -
ordering: Ordering
: Controls the ordering of inputs and outputs for the NTT operation. This field can be used to specify decimation strategies (in time or in frequency) and the type of butterfly algorithm (Cooley-Tukey or Gentleman-Sande). The ordering is crucial for compatibility with various algorithmic approaches and can impact the efficiency of the NTT. -
are_inputs_on_device: bool
: Indicates whether the input data has been preloaded on the device memory. Iffalse
inputs will be copied from host to device. -
are_outputs_on_device: bool
: Indicates whether the output data is preloaded in device memory. Iffalse
outputs will be copied from host to device. If the inputs and outputs are the same pointer NTT will be computed in place. -
is_async: bool
: Specifies whether the NTT operation should be performed asynchronously. When set totrue
, the NTT function will not block the CPU, allowing other operations to proceed concurrently. Asynchronous execution requires careful synchronization to ensure data integrity and correctness. -
ntt_algorithm: NttAlgorithm
: Can be one ofAuto
,Radix2
,MixedRadix
.Auto
will selectRadix 2
orMixed Radix
algorithm based on heuristics.Radix2
andMixedRadix
will force the use of an algorithm regardless of the input size or other considerations. You should use one of these options when you know for sure that you want to