/* |
File: DemonstrateFFT2D.c |
Abstract: Demonstration of vDSP two-dimemsional FFT routines. |
Version: 1.2 |
|
Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple |
Inc. ("Apple") in consideration of your agreement to the following |
terms, and your use, installation, modification or redistribution of |
this Apple software constitutes acceptance of these terms. If you do |
not agree with these terms, please do not use, install, modify or |
redistribute this Apple software. |
|
In consideration of your agreement to abide by the following terms, and |
subject to these terms, Apple grants you a personal, non-exclusive |
license, under Apple's copyrights in this original Apple software (the |
"Apple Software"), to use, reproduce, modify and redistribute the Apple |
Software, with or without modifications, in source and/or binary forms; |
provided that if you redistribute the Apple Software in its entirety and |
without modifications, you must retain this notice and the following |
text and disclaimers in all such redistributions of the Apple Software. |
Neither the name, trademarks, service marks or logos of Apple Inc. may |
be used to endorse or promote products derived from the Apple Software |
without specific prior written permission from Apple. Except as |
expressly stated in this notice, no other rights or licenses, express or |
implied, are granted by Apple herein, including but not limited to any |
patent rights that may be infringed by your derivative works or by other |
works in which the Apple Software may be incorporated. |
|
The Apple Software is provided by Apple on an "AS IS" basis. APPLE |
MAKES NO WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION |
THE IMPLIED WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS |
FOR A PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND |
OPERATION ALONE OR IN COMBINATION WITH YOUR PRODUCTS. |
|
IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL |
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
INTERRUPTION) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, |
MODIFICATION AND/OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED |
AND WHETHER UNDER THEORY OF CONTRACT, TORT (INCLUDING NEGLIGENCE), |
STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE |
POSSIBILITY OF SUCH DAMAGE. |
|
Copyright (C) 2012 Apple Inc. All Rights Reserved. |
|
|
This is a sample module to illustrate the use of vDSP's two-dimensional |
FFT functions. This module also times the functions. |
|
Copyright (C) 2007 Apple Inc. All rights reserved. |
*/ |
|
|
#include <math.h> |
#include <stdio.h> |
#include <stdlib.h> |
#include <string.h> |
|
#include <Accelerate/Accelerate.h> |
|
#include "Demonstrate.h" |
|
|
#define Iterations 10000 // Number of iterations used in the timing loop. |
|
#define Log2R 5u // Base-two logarithm of number of rows. |
#define Log2C 6u // Base-two logarithm of number of columns. |
#define R (1u<<Log2R) // Number of rows. |
#define C (1u<<Log2C) // Number of columns. |
#define N (R*C) // Total number of elements. |
|
|
static const float_t TwoPi = 0x3.243f6a8885a308d313198a2e03707344ap1; |
|
|
// Return the maximum of two numbers. |
#define max(a, b) ((a) < (b) ? (b) : (a)) |
|
|
/* Compare two complex vectors and report the relative error between them. |
(The vectors must have unit strides; other strides are not supported.) |
*/ |
static void CompareComplexVectors( |
DSPSplitComplex Expected, DSPSplitComplex Observed, vDSP_Length Length) |
{ |
double_t Error = 0, Magnitude = 0; |
|
int i; |
for (i = 0; i < Length; ++i) |
{ |
double_t re, im; |
|
// Accumulate square of magnitude of elements. |
re = Expected.realp[i]; |
im = Expected.imagp[i]; |
Magnitude += re*re + im*im; |
|
// Accumulate square of error. |
re = Expected.realp[i] - Observed.realp[i]; |
im = Expected.imagp[i] - Observed.imagp[i]; |
Error += re*re + im*im; |
} |
|
printf("\tRelative error in observed result is %g.\n", |
sqrt(Error / Magnitude)); |
} |
|
|
/* Demonstrate the real-to-complex two-dimensional in-place FFT, |
vDSP_fft2d_zrip. |
|
The in-place FFT writes results into the same array that contains the |
input data. |
|
Applications may need to rearrange data before calling the |
real-to-complex FFT. This is because the vDSP FFT routines currently |
use a separated-data complex format, in which real components and |
imaginary components are stored in different arrays. For the |
real-to-complex FFT, real data passed using the same arrangements used |
for complex data. (This is largely due to the nature of the algorithm |
used in performing in the real-to-complex FFT.) |
|
With a one-dimensional array, the mapping puts even-indexed elements of |
the real data in real components of the complex data and odd-indexed |
elements of the real data in imaginary components of the complex-data. |
With two-dimensional arrays stored in row-major order (which C uses and |
FORTRAN does not), the even-indexed elements within each row are mapped |
to the real components, and the odd-indexed elements are mapped to |
imaginary components. Thus, an element in row 13 and column 24 would |
be mapped to an element in row 13 and column 12 of the array containing |
real components. An element in row 13 and column 25 would be mapped to |
an element in row 13 and column 12 of the array containing imaginary |
components. |
|
(It is possible to improve this situation by implementing |
interleaved-data complex format. If you would benefit from such |
routines, please enter an enhancement request at |
http://developer.apple.com/bugreporter.) |
|
If an application's real data is stored sequentially in an array (as is |
common) and the design cannot be altered to provide data in the |
even-odd split configuration, then the data can be moved using the |
routine vDSP_ctoz. |
|
As noted in the one-dimensional FFT code and the vDSP Library manual, |
the output of the real-to-complex FFT with N real inputs contains only |
about N/2 complex numbers (actually N/2-1 complex numbers and two real |
numbers). The two-dimensional real-to-complex FFT has similar |
symmetries, and some of its mathematical outputs are packed to fit into |
memory with the same dimensions as the input. This packing results in |
some awkward arrangements in the first column (index 0) of the output. |
It is described in the vDSP Library manual and not further addressed |
here. |
*/ |
static void DemonstratevDSP_fft2d_zrip(FFTSetup Setup) |
{ |
/* Define a stride for the array be passed to the FFT. In many |
applications, the stride is one and is passed to the vDSP |
routine as a constant. |
*/ |
const vDSP_Stride Stride = 1; |
|
// Define variables for loop iterators. |
vDSP_Length i, r, c; |
|
// Define some variables used to time the routine. |
ClockData t0, t1; |
double Time; |
|
printf("\n\tTwo-dimensional real FFT of %lu*%lu elements.\n", |
(unsigned long) R, (unsigned long) C); |
|
// Allocate memory for the arrays. |
float *Signal = malloc(N * Stride * sizeof Signal); |
float *ObservedMemory = malloc(N * sizeof *ObservedMemory); |
|
if (ObservedMemory == NULL || Signal == NULL) |
{ |
fprintf(stderr, "Error, failed to allocate memory.\n"); |
exit(EXIT_FAILURE); |
} |
|
// Assign half of ObservedMemory to reals and half to imaginaries. |
DSPSplitComplex Observed = { ObservedMemory, ObservedMemory + N/2 }; |
|
/* Generate an input signal. In a real application, data would of |
course be provided from an image file, sensors, or other |
source. |
|
We inject two cosine waves, each with a frequency in the row |
dimension, a frequency in the column dimension, and a phase. |
*/ |
const float |
Frequency0R = 9, Frequency1R = 6, |
Frequency0C = 13, Frequency1C = 11, |
Phase0 = 0, Phase1 = .25; |
for (r = 0; r < R; ++r) |
for (c = 0; c < C; ++c) |
Signal[(r*C + c) * Stride] = |
cos((r*Frequency0R/R + c*Frequency0C/C + Phase0) * TwoPi) |
+ cos((r*Frequency1R/R + c*Frequency1C/C + Phase1) * TwoPi); |
|
/* Reinterpret the real signal as an interleaved-data complex |
vector and use vDSP_ctoz to move the data to a separated-data |
complex vector. |
|
Note that we pass vDSP_ctoz two times Signal's normal stride, |
because ctoz skips through a complex vector from real to real, |
skipping imaginary elements. Considering this as a stride of |
two real-sized elements rather than one complex element is a |
legacy use. |
|
Also, we assume here the rows of Signal are contiguous, that |
there is no additional stride between rows. When this is the |
case, we can use a single call to vDSP_ctoz to copy the whole |
array. If it were not the case, we would need to use a |
separate vDSP_ctoz call for each row. |
|
In the destination array, a stride of one is used regardless of |
the source stride. Since the destination is a buffer allocated |
just for this purpose, there is no point in replicating the |
source stride. |
*/ |
vDSP_ctoz((DSPComplex *) Signal, 2*Stride, &Observed, 1, N/2); |
|
/* Here and elsewhere we treat Signal and Observed as a |
one-dimensional array, even though they contain a |
two-dimensional signal. That is fine because they are |
laid out in memory as consecutive elements, so they are |
one-dimensional arrays for purposes of copying data, |
which do not care about the interpretation of the |
data. When we want to act on the two-dimensional |
signal, we do the necessary subscript arithmetic to |
convert indices in the two-dimensional signal to an |
index in the one-dimensional array. |
|
It is also possible to cast the one-dimensional array |
(or a pointer to it) to a two-dimensional array (or a |
pointer to it). The resulting behavior is not defined |
by the C standard. Many compilers produce code that |
behaves in the obvious way, but it is possible to get |
undesirable behavior, particularly when optimization is |
involved. |
*/ |
|
// Perform a real-to-complex FFT. |
vDSP_fft2d_zrip(Setup, &Observed, 1, 0, Log2C, Log2R, FFT_FORWARD); |
|
/* Observe that zero is passed as the row stride. |
Normally the row stride is number of elements from the |
start of one row to the start of the next. Zero is a |
special value that means to use the number of columns. |
This is the normal case when an array is not embedded |
in a larger array. If, for example, you were taking |
the DFT of a 16*16 array embedded inside a 1024*1024 |
array, you would pass 1024 as the row stride, because |
the rows of the 16*16 array begin 1024 elements apart |
in memory. |
*/ |
|
/* Prepare expected results based on analytical transformation of |
the input signal. |
*/ |
float *ExpectedMemory = malloc(N * sizeof *ExpectedMemory); |
if (ExpectedMemory == NULL) |
{ |
fprintf(stderr, "Error, failed to allocate memory.\n"); |
exit(EXIT_FAILURE); |
} |
|
// Assign half of ExpectedMemory to reals and half to imaginaries. |
DSPSplitComplex Expected = { ExpectedMemory, ExpectedMemory + N/2 }; |
|
for (r = 0; r < R; ++r) |
for (c = 0; c < C/2; ++c) |
Expected.realp[r*C/2 + c] |
= Expected.imagp[r*C/2 + c] = 0; |
|
// Add the frequencies in the signal to the expected results. |
r = Frequency0R; |
c = Frequency0C; |
Expected.realp[r*C/2 + c] = N * cos(Phase0 * TwoPi); |
Expected.imagp[r*C/2 + c] = N * sin(Phase0 * TwoPi); |
|
r = Frequency1R; |
c = Frequency1C; |
Expected.realp[r*C/2 + c] = N * cos(Phase1 * TwoPi); |
Expected.imagp[r*C/2 + c] = N * sin(Phase1 * TwoPi); |
|
// Compare the observed results to the expected results. |
CompareComplexVectors(Expected, Observed, N/2); |
|
// Release memory. |
free(ExpectedMemory); |
|
/* The above shows how to use the vDSP_fft2d_zrip routine. Now we |
will see how fast it is. |
*/ |
|
/* Zero the signal before timing because repeated FFTs on non-zero |
data can cause abnormalities such as infinities, NaNs, and |
subnormal numbers. |
*/ |
for (r = 0; r < R; ++r) |
for (c = 0; c < C; ++c) |
Signal[r*C + c] = 0; |
vDSP_ctoz((DSPComplex *) Signal, 2*Stride, &Observed, 1, N/2); |
|
// Time vDSP_fft2d_zrip by itself. |
|
t0 = Clock(); |
|
for (i = 0; i < Iterations; ++i) |
vDSP_fft2d_zrip(Setup, &Observed, 1, 0, Log2C, Log2R, |
FFT_FORWARD); |
|
t1 = Clock(); |
|
// Average the time over all the loop iterations. |
Time = ClockToSeconds(t1, t0) / Iterations; |
|
printf("\tvDSP_fft2d_zrip on %lu*%lu elements takes %g microseconds.\n", |
(unsigned long) R, (unsigned long) C, Time * 1e6); |
|
/* Time vDSP_fft2d_zrip with the vDSP_ctoz and vDSP_ztoc |
transformations. |
*/ |
|
t0 = Clock(); |
|
for (i = 0; i < Iterations; ++i) |
{ |
vDSP_ctoz((DSPComplex *) Signal, 2*Stride, &Observed, 1, N/2); |
vDSP_fft2d_zrip(Setup, &Observed, 1, 0, Log2C, Log2R, |
FFT_FORWARD); |
vDSP_ztoc(&Observed, 1, (DSPComplex *) Signal, 2*Stride, N/2); |
} |
|
t1 = Clock(); |
|
// Average the time over all the loop iterations. |
Time = ClockToSeconds(t1, t0) / Iterations; |
|
printf( |
"\tvDSP_fft2d_zrip with vDSP_ctoz and vDSP_ztoc takes " |
"%g microseconds.\n", |
Time * 1e6); |
|
// Release resources. |
free(ObservedMemory); |
free(Signal); |
} |
|
|
/* Demonstrate the real-to-complex two-dimensional out-of-place FFT, |
vDSP_fft2d_zrop. |
|
The out-of-place FFT writes results into a different array than the |
input. If you are using vDSP_ctoz to reformat the input, you do not |
need vDSP_fft2d_zrop because you move the data from an input array to |
an output array when you call vDSP_ctoz. vDSP_fft2d_zrop may be useful |
when incoming data arrives in the format used by vDSP_fft2d_zrop, and |
you want to simultaneously perform an FFT and store the results in |
another array. |
*/ |
static void DemonstratevDSP_fft2d_zrop(FFTSetup Setup) |
{ |
/* Define a stride for the array be passed to the FFT. In many |
applications, the stride is one and is passed to the vDSP |
routine as a constant. |
*/ |
const vDSP_Stride Stride = 1; |
|
// Define variables for loop iterators. |
vDSP_Length i, r, c; |
|
// Define some variables used to time the routine. |
ClockData t0, t1; |
double Time; |
|
printf("\n\tTwo-dimensional real FFT of %lu*%lu elements.\n", |
(unsigned long) R, (unsigned long) C); |
|
// Allocate memory for the arrays. |
float *Signal = malloc(N * Stride * sizeof Signal); |
float *BufferMemory = malloc(N * sizeof *BufferMemory); |
float *ObservedMemory = malloc(N * sizeof *ObservedMemory); |
|
if (ObservedMemory == NULL || BufferMemory == NULL || Signal == NULL) |
{ |
fprintf(stderr, "Error, failed to allocate memory.\n"); |
exit(EXIT_FAILURE); |
} |
|
// Assign half of BufferMemory to reals and half to imaginaries. |
DSPSplitComplex Buffer = { BufferMemory, BufferMemory + N/2 }; |
|
// Assign half of ObservedMemory to reals and half to imaginaries. |
DSPSplitComplex Observed = { ObservedMemory, ObservedMemory + N/2 }; |
|
/* Generate an input signal. In a real application, data would of |
course be provided from an image file, sensors, or other |
source. |
|
We inject two cosine waves, each with a frequency in the row |
dimension, a frequency in the column dimension, and a phase. |
*/ |
const float |
Frequency0R = 25 , Frequency1R = 8, |
Frequency0C = 17 , Frequency1C = 14, |
Phase0 = .71, Phase1 = .46; |
for (r = 0; r < R; ++r) |
for (c = 0; c < C; ++c) |
Signal[(r*C + c) * Stride] = |
cos((r*Frequency0R/R + c*Frequency0C/C + Phase0) * TwoPi) |
+ cos((r*Frequency1R/R + c*Frequency1C/C + Phase1) * TwoPi); |
|
/* Reinterpret the real signal as an interleaved-data complex |
vector and use vDSP_ctoz to move the data to a separated-data |
complex vector. |
|
Note that we pass vDSP_ctoz two times Signal's normal stride, |
because ctoz skips through a complex vector from real to real, |
skipping imaginary elements. Considering this as a stride of |
two real-sized elements rather than one complex element is a |
legacy use. |
|
Also, we assume here the rows of Signal are contiguous, that |
there is no additional stride between rows. When this is the |
case, we can use a single call to vDSP_ctoz to copy the whole |
array. If it were not the case, we would need to use a |
separate vDSP_ctoz call for each row. |
|
In the destination array, a stride of one is used regardless of |
the source stride. Since the destination is a buffer allocated |
just for this purpose, there is no point in replicating the |
source stride. |
*/ |
vDSP_ctoz((DSPComplex *) Signal, 2*Stride, &Buffer, 1, N/2); |
|
/* Here and elsewhere we treat Signal and Observed as a |
one-dimensional array, even though they contain a |
two-dimensional signal. That is fine because they are |
laid out in memory as consecutive elements, so they are |
one-dimensional arrays for purposes of copying data, |
which do not care about the interpretation of the |
data. When we want to act on the two-dimensional |
signal, we do the necessary subscript arithmetic to |
convert indices in the two-dimensional signal to an |
index in the one-dimensional array. |
|
It is also possible to cast the one-dimensional array |
(or a pointer to it) to a two-dimensional array (or a |
pointer to it). The resulting behavior is not defined |
by the C standard. Many compilers produce code that |
behaves in the obvious way, but it is possible to get |
undesirable behavior, particularly when optimization is |
involved. |
*/ |
|
// Perform a real-to-complex FFT. |
vDSP_fft2d_zrop(Setup, &Buffer, 1, 0, &Observed, 1, 0, |
Log2C, Log2R, FFT_FORWARD); |
|
/* Observe that zero is passed as the row stride. |
Normally the row |
stride is number of elements from the start of one row |
to the start of the next. Zero is a special value that |
means to use the number of columns. This is the normal |
case when an array is not embedded in a larger array. |
If, for example, you were taking the DFT of a 16*16 |
array embedded inside a 1024*1024 array, you would pass |
1024 as the row stride, because the rows of the 16*16 |
array begin 1024 elements apart in memory. |
*/ |
|
/* Prepare expected results based on analytical transformation of |
the input signal. |
*/ |
float *ExpectedMemory = malloc(N * sizeof *ExpectedMemory); |
if (ExpectedMemory == NULL) |
{ |
fprintf(stderr, "Error, failed to allocate memory.\n"); |
exit(EXIT_FAILURE); |
} |
|
// Assign half of ExpectedMemory to reals and half to imaginaries. |
DSPSplitComplex Expected = { ExpectedMemory, ExpectedMemory + N/2 }; |
|
for (r = 0; r < R; ++r) |
for (c = 0; c < C/2; ++c) |
Expected.realp[r*C/2 + c] |
= Expected.imagp[r*C/2 + c] = 0; |
|
// Add the frequencies in the signal to the expected results. |
r = Frequency0R; |
c = Frequency0C; |
Expected.realp[r*C/2 + c] = N * cos(Phase0 * TwoPi); |
Expected.imagp[r*C/2 + c] = N * sin(Phase0 * TwoPi); |
|
r = Frequency1R; |
c = Frequency1C; |
Expected.realp[r*C/2 + c] = N * cos(Phase1 * TwoPi); |
Expected.imagp[r*C/2 + c] = N * sin(Phase1 * TwoPi); |
|
// Compare the observed results to the expected results. |
CompareComplexVectors(Expected, Observed, N/2); |
|
// Release memory. |
free(ExpectedMemory); |
|
/* The above shows how to use the vDSP_fft2d_zrop routine. Now we |
will see how fast it is. |
*/ |
|
// Time vDSP_fft2d_zrop by itself. |
|
t0 = Clock(); |
|
for (i = 0; i < Iterations; ++i) |
vDSP_fft2d_zrop(Setup, &Buffer, 1, 0, &Observed, 1, 0, |
Log2C, Log2R, FFT_FORWARD); |
|
t1 = Clock(); |
|
// Average the time over all the loop iterations. |
Time = ClockToSeconds(t1, t0) / Iterations; |
|
printf("\tvDSP_fft2d_zrop on %lu*%lu elements takes %g microseconds.\n", |
(unsigned long) R, (unsigned long) C, Time * 1e6); |
|
/* Unlike the vDSP_fft2d_zrip example, we do not time |
vDSP_fft2d_zrop in conjunction with vDSP_ctoz and vDSP_ztoc. |
If your data arrangement requires you to use vDSP_ctoz, then |
you are already making a copy of the input data. So you would |
just do the FFT in-place in that copy; you would call |
vDSP_fft2d_zrip and not vDSP_fft2d_zrop. |
*/ |
|
// Release resources. |
free(ObservedMemory); |
free(BufferMemory); |
free(Signal); |
} |
|
|
/* Demonstrate the complex two-dimensional in-place FFT, vDSP_fft2d_zip. |
|
The in-place FFT writes results into the same array that contains the |
input data. This may be faster than an out-of-place routine because it |
uses less memory (so there is less data to load from memory and a |
greater chance of keeping data in cache). |
*/ |
static void DemonstratevDSP_fft2d_zip(FFTSetup Setup) |
{ |
/* Define a stride for the array be passed to the FFT. In many |
applications, the stride is one and is passed to the vDSP |
routine as a constant. |
*/ |
const vDSP_Stride SignalStride = 1; |
|
// Define variables for loop iterators. |
vDSP_Length i, r, c; |
|
// Define some variables used to time the routine. |
ClockData t0, t1; |
double Time; |
|
printf("\n\tTwo-dimensional complex FFT of %lu*%lu elements.\n", |
(unsigned long) R, (unsigned long) C); |
|
// Allocate memory for the arrays. |
DSPSplitComplex Signal; |
Signal.realp = malloc(N * SignalStride * sizeof Signal.realp); |
Signal.imagp = malloc(N * SignalStride * sizeof Signal.imagp); |
|
if (Signal.realp == NULL || Signal.imagp == NULL) |
{ |
fprintf(stderr, "Error, failed to allocate memory.\n"); |
exit(EXIT_FAILURE); |
} |
|
/* Generate an input signal. In a real application, data would of |
course be provided from an image file, sensors, or other |
source. |
|
We inject two cosine waves, each with a frequency in the row |
dimension, a frequency in the column dimension, and a phase. |
*/ |
const float |
Frequency0R = 18 , Frequency1R = 3, |
Frequency0C = 22 , Frequency1C = 17, |
Phase0 = .14, Phase1 = .67; |
for (r = 0; r < R; ++r) |
for (c = 0; c < C; ++c) |
{ |
Signal.realp[(r*C + c) * SignalStride] = |
cos((r*Frequency0R/R + c*Frequency0C/C + Phase0) * TwoPi) |
+ cos((r*Frequency1R/R + c*Frequency1C/C + Phase1) * TwoPi); |
Signal.imagp[(r*C + c) * SignalStride] = |
sin((r*Frequency0R/R + c*Frequency0C/C + Phase0) * TwoPi) |
+ sin((r*Frequency1R/R + c*Frequency1C/C + Phase1) * TwoPi); |
} |
|
// Perform an FFT. |
vDSP_fft2d_zip(Setup, &Signal, SignalStride, 0, Log2C, Log2R, |
FFT_FORWARD); |
|
/* Prepare expected results based on analytical transformation of |
the input signal. |
*/ |
DSPSplitComplex Expected; |
Expected.realp = malloc(N * sizeof Expected.realp); |
Expected.imagp = malloc(N * sizeof Expected.imagp); |
|
if (Expected.realp == NULL || Expected.imagp == NULL) |
{ |
fprintf(stderr, "Error, failed to allocate memory.\n"); |
exit(EXIT_FAILURE); |
} |
|
for (i = 0; i < N; ++i) |
Expected.realp[i] = Expected.imagp[i] = 0; |
|
// Add the frequencies in the signal to the expected results. |
r = Frequency0R; |
c = Frequency0C; |
Expected.realp[r*C + c] = N * cos(Phase0 * TwoPi); |
Expected.imagp[r*C + c] = N * sin(Phase0 * TwoPi); |
|
r = Frequency1R; |
c = Frequency1C; |
Expected.realp[r*C + c] = N * cos(Phase1 * TwoPi); |
Expected.imagp[r*C + c] = N * sin(Phase1 * TwoPi); |
|
// Compare the observed results to the expected results. |
CompareComplexVectors(Expected, Signal, N); |
|
// Release memory. |
free(Expected.realp); |
free(Expected.imagp); |
|
/* The above shows how to use the vDSP_fft2d_zip routine. Now we |
will see how fast it is. |
*/ |
|
/* Zero the signal before timing because repeated FFTs on non-zero |
data can cause abnormalities such as infinities, NaNs, and |
subnormal numbers. |
*/ |
for (i = 0; i < N; ++i) |
Signal.realp[i] = Signal.imagp[i] = 0; |
|
// Time vDSP_fft2d_zip by itself. |
|
t0 = Clock(); |
|
for (i = 0; i < Iterations; ++i) |
vDSP_fft2d_zip(Setup, &Signal, SignalStride, 0, |
Log2C, Log2R, FFT_FORWARD); |
|
t1 = Clock(); |
|
// Average the time over all the loop iterations. |
Time = ClockToSeconds(t1, t0) / Iterations; |
|
printf("\tvDSP_fft2d_zip on %lu*%lu elements takes %g microseconds.\n", |
(unsigned long) R, (unsigned long) C, Time * 1e6); |
|
// Release resources. |
free(Signal.realp); |
free(Signal.imagp); |
} |
|
|
/* Demonstrate the complex two-dimensional out-of-place FFT, |
vDSP_fft2d_zop. |
|
The out-of-place FFT writes results into a different array than the |
input. |
*/ |
static void DemonstratevDSP_fft2d_zop(FFTSetup Setup) |
{ |
/* Define strides for the arrays be passed to the FFT. In many |
applications, the strides are one and are passed to the vDSP |
routine as constants. |
*/ |
const vDSP_Stride SignalStride = 1, ObservedStride = 1; |
|
// Define variables for loop iterators. |
vDSP_Length i, r, c; |
|
// Define some variables used to time the routine. |
ClockData t0, t1; |
double Time; |
|
printf("\n\tTwo-dimensional complex FFT of %lu*%lu elements.\n", |
(unsigned long) R, (unsigned long) C); |
|
// Allocate memory for the arrays. |
DSPSplitComplex Signal, Observed; |
Signal.realp = malloc(N * SignalStride * sizeof Signal.realp); |
Signal.imagp = malloc(N * SignalStride * sizeof Signal.imagp); |
Observed.realp = malloc(N * ObservedStride * sizeof Observed.realp); |
Observed.imagp = malloc(N * ObservedStride * sizeof Observed.imagp); |
|
if (Signal.realp == NULL || Signal.imagp == NULL |
|| Observed.realp == NULL || Observed.imagp == NULL) |
{ |
fprintf(stderr, "Error, failed to allocate memory.\n"); |
exit(EXIT_FAILURE); |
} |
|
/* Generate an input signal. In a real application, data would of |
course be provided from an image file, sensors, or other |
source. |
|
We inject two cosine waves, each with a frequency in the row |
dimension, a frequency in the column dimension, and a phase. |
*/ |
const float |
Frequency0R = 25 , Frequency1R = 6, |
Frequency0C = 15 , Frequency1C = 18, |
Phase0 = .45, Phase1 = .37; |
for (r = 0; r < R; ++r) |
for (c = 0; c < C; ++c) |
{ |
Signal.realp[(r*C + c) * SignalStride] = |
cos((r*Frequency0R/R + c*Frequency0C/C + Phase0) * TwoPi) |
+ cos((r*Frequency1R/R + c*Frequency1C/C + Phase1) * TwoPi); |
Signal.imagp[(r*C + c) * SignalStride] = |
sin((r*Frequency0R/R + c*Frequency0C/C + Phase0) * TwoPi) |
+ sin((r*Frequency1R/R + c*Frequency1C/C + Phase1) * TwoPi); |
} |
|
// Perform an FFT. |
vDSP_fft2d_zop(Setup, &Signal, SignalStride, 0, |
&Observed, ObservedStride, 0, Log2C, Log2R, FFT_FORWARD); |
|
/* Prepare expected results based on analytical transformation of |
the input signal. |
*/ |
DSPSplitComplex Expected; |
Expected.realp = malloc(N * sizeof Expected.realp); |
Expected.imagp = malloc(N * sizeof Expected.imagp); |
|
if (Expected.realp == NULL || Expected.imagp == NULL) |
{ |
fprintf(stderr, "Error, failed to allocate memory.\n"); |
exit(EXIT_FAILURE); |
} |
|
for (i = 0; i < N; ++i) |
Expected.realp[i] = Expected.imagp[i] = 0; |
|
// Add the frequencies in the signal to the expected results. |
r = Frequency0R; |
c = Frequency0C; |
Expected.realp[r*C + c] = N * cos(Phase0 * TwoPi); |
Expected.imagp[r*C + c] = N * sin(Phase0 * TwoPi); |
|
r = Frequency1R; |
c = Frequency1C; |
Expected.realp[r*C + c] = N * cos(Phase1 * TwoPi); |
Expected.imagp[r*C + c] = N * sin(Phase1 * TwoPi); |
|
// Compare the observed results to the expected results. |
CompareComplexVectors(Expected, Observed, N); |
|
// Release memory. |
free(Expected.realp); |
free(Expected.imagp); |
|
/* The above shows how to use the vDSP_fft2d_zop routine. Now we |
will see how fast it is. |
*/ |
|
// Time vDSP_fft2d_zop by itself. |
|
t0 = Clock(); |
|
for (i = 0; i < Iterations; ++i) |
vDSP_fft2d_zop(Setup, &Signal, SignalStride, 0, |
&Observed, ObservedStride, 0, Log2C, Log2R, |
FFT_FORWARD); |
|
t1 = Clock(); |
|
// Average the time over all the loop iterations. |
Time = ClockToSeconds(t1, t0) / Iterations; |
|
printf("\tvDSP_fft2d_zop on %lu*%lu elements takes %g microseconds.\n", |
(unsigned long) R, (unsigned long) C, Time * 1e6); |
|
// Release resources. |
free(Signal.realp); |
free(Signal.imagp); |
free(Observed.realp); |
free(Observed.imagp); |
} |
|
|
// Demonstrate vDSP FFT functions. |
void DemonstrateFFT2D(void) |
{ |
printf("Begin %s.\n", __func__); |
|
/* Initialize data for the FFT routines. Note that the longest |
length we will use, in either dimension, is passed to the |
setup. |
*/ |
FFTSetup Setup = vDSP_create_fftsetup(max(Log2R, Log2C), FFT_RADIX2); |
if (Setup == NULL) |
{ |
fprintf(stderr, "Error, vDSP_create_fftsetup failed.\n"); |
exit (EXIT_FAILURE); |
} |
|
DemonstratevDSP_fft2d_zrip(Setup); |
DemonstratevDSP_fft2d_zrop(Setup); |
DemonstratevDSP_fft2d_zip(Setup); |
DemonstratevDSP_fft2d_zop(Setup); |
|
vDSP_destroy_fftsetup(Setup); |
|
printf("\nEnd %s.\n\n\n", __func__); |
} |