! File: rf16.f90 ! Public domain 2005 James Van Buskirk ! PURPOSE: Calculates real-half complex FFTs of order 2**n. ! Prompts the user for the exponent of the problem, and if the ! user wants to compute FFTs of order 2**n, he enters n at the ! prompt. He is then prompted for to choose whether to test ! rf16, rfs16, or rf64. Here, rf16 is a radix-16 DIT FFT ! without scaling, rfs16 is a radix-16 DIT FFT with scaling ! between stages, and rf64 is a radix-64 DIT FFT with ! internal scaling. The latter two alogrithms require ! asyptotically fewer operations than the split-radix ! algorithm. ! Four columns of data are produced: ! 1) Tells which input vector is being processed. The input ! is x(k) = delta(k,column_1) ! 2) Is the maximum absolute deviation of any output vector ! element from its corresponding theoretical output vector ! element. ! 3) Is the number of real additions required to compute the ! real-half complex DFT. ! 4) Is the number of real multiplication required to compute ! the real-half complex DFT. ! The last line of output is the maximum of column 2 above. ! *************************************************************** ! module mykinds ! Sets up kind type parameter for double precision and ! (if available) quadruple precision. If there is no ! quad precision type, double precision will be used. ! Also sets up kind type parameters for word (ik2) and ! quadword (ik8) integers. ! *************************************************************** module mykinds implicit none integer, parameter :: dp = selected_real_kind(15,300) integer, parameter :: qp_preferred = selected_real_kind(30,1000) integer, parameter :: qp = (1+sign(1,qp_preferred))/2*qp_preferred+ & (1-sign(1,qp_preferred))/2*dp integer, parameter :: ik2 = selected_int_kind(4) integer, parameter :: ik8 = selected_int_kind(18) end module mykinds ! *************************************************************** ! *************************************************************** ! module auxiliary_routines ! Provides subroutines to permute an array in bit-reversed order ! and to reverse the order of the sine half of the output. ! *************************************************************** module auxiliary_routines use mykinds, wp=>dp implicit none private wp contains !................................................................ ! subroutine reverse_table(table,k) ! Prepares a bit reversal table. The table required for a bit ! reversal on 2**k elements only needs to have 2**(k/2)-1 entries. ! Also note that the generated table is a 16-bit integer to save ! space. !................................................................ subroutine reverse_table(table,k) integer, intent(in) :: k ! Size of the problem will be 2**k integer(ik2), intent(out) :: table(0:2**(k/2)-1) ! Helper table ! Needed by subroutine bit_reverse integer j ! Current recursion step if(size(table) == 0) return table(0) = 0 j = 1 do while(2*j <= size(table)) table(0:j-1) = ishft(table(0:j-1),1) table(j:2*j-1) = ior(table(0:j-1),int(1,ik2)) j = 2*j end do end subroutine reverse_table !................................................................ !................................................................ ! subroutine bit_reverse(x,k,table) ! Carries out a bit-reversal permutation on REAL(wp) array ! x(0:2**k-1). Needs 16-bit integer table as generated by ! subroutine reverse_table. Follows the recipe in Big ! Jimmy Walker's book. !................................................................ subroutine bit_reverse(x,k,table) integer, intent(in) :: k ! Size of the problem will be 2**k real(wp), intent(inout) :: x(0:2**k-1) ! Array to be permuted integer(ik2), intent(in) :: table(0:2**(k/2)-1) ! Helper table integer Lo, mid, hi ! Low, middle, and high bits of index integer ind1, ind2 ! Indices of array elements to be swapped integer lim2 ! 2**(k/2) integer shift_count ! Distance to shift high bits integer ind1_start, ind2_start ! Bits known in outer loop real(wp) temp ! Temporarily holds swapped value lim2 = ishft(1,iand(k/2,31)) shift_count = (k+1)/2 mid = 0 do Lo = 0, lim2-2 ind1_start = Lo+mid ind2_start = mid+ishft(int(table(Lo)),iand(shift_count,31)) do hi = Lo+1, lim2-1 ind1 = ind1_start+ishft(int(table(hi)),iand(shift_count,31)) ind2 = hi+ind2_start temp = x(ind1) x(ind1) = x(ind2) x(ind2) = temp end do end do if(iand(k,1) /= 0) then mid = lim2 do Lo = 0, lim2-2 ind1_start = Lo+mid ind2_start = mid+ishft(int(table(Lo)),iand(shift_count,31)) do hi = Lo+1, lim2-1 ind1 = ind1_start+ishft(int(table(hi)),iand(shift_count,31)) ind2 = hi+ind2_start temp = x(ind1) x(ind1) = x(ind2) x(ind2) = temp end do end do end if end subroutine bit_reverse !................................................................ !................................................................ ! subroutine reverse_sins(x,k) ! Reverses the order of the sine outputs of the FFT algorithm. ! Necessary because we are doing real-half complex DFTs two ! abreast instead of four abreast in this file. Order of the ! problem is 2**k. !................................................................ subroutine reverse_sins(x,k) integer, intent(in) :: k ! Order of the problem = 2**k real(wp), intent(inout) :: x(0:2**k-1) ! Transformed vector integer i ! Low index of pair to be swapped real(wp) temp ! Temporary holding variable for swap do i = 1, 2**(k-2)-1 temp = x(2**(k-1)+i) x(2**(k-1)+i) = x(2**k-i) x(2**k-i) = temp end do end subroutine reverse_sins !................................................................ end module auxiliary_routines ! *************************************************************** ! *************************************************************** ! module rf16_routines ! Along with module auxiliary_routines contains everything you need ! to do a radix-16 real-half complex decimation in time DFT with ! bit-reversed inputs and sine-reversed outputs. First you should ! call subroutine rf16_sizes to get the sizes of the arrays that are ! required for the bit-reversal helper table and the table of cosines. ! Then call reverse_table to create the bit-reversal helper table and ! cos_table to make up the table of cosines. Then rf16 can be used ! to compute real-half complex DFTs. ! *************************************************************** module rf16_routines use mykinds, wp=>dp use auxiliary_routines implicit none private wp private step_2, step_4, step_8, step_16 contains !................................................................ ! subroutine cos_table(table, k) ! Computes a table of fourth-, first-, and second-quadrant cosines ! required by subroutine rf16. Given that it is desired to compute ! a DFT of order 2**k, one should first call subroutine rf16_sizes to ! determine how big the cosine table has to be. Then cos_table can ! be invoked to create the table. !................................................................ subroutine cos_table(table, k) integer, intent(in) :: k ! Size of the problem is 2**k real(wp), intent(out) :: table(-(2**(k-2)-1):2**(k-1)-1) ! Table of cosines integer j ! Multiple of fundamental angle integer N ! 2**k real(wp) pi ! Key lime, I hope. N = 2**k pi = 4*atan(1.0_wp) do j = lbound(table,1),ubound(table,1) table(j) = cos(2*pi*j/N) end do end subroutine cos_table !................................................................ !................................................................ ! subroutine step_2(x,N_new) ! Computes the first stage of radix-16 real-half complex DFT on ! 2**k elements if mod(k,4) == 1. !................................................................ subroutine step_2(x,N_new) integer, intent(in) :: N_new ! 2**k/2 real(wp), intent(inout) :: x(0:2*N_new-1) ! Array to be ! transformed integer n1 ! Current elements are x(2*n1) ! and x(2*n1+1) real(wp) x00 ! Holds intermediate result do n1 = 0, N_new-1 x00 = x(2*n1)+x(2*n1+1) x(2*n1+1) = x(2*n1)-x(2*n1+1) x(2*n1) = x00 end do end subroutine step_2 !................................................................ !................................................................ ! subroutine step_4(x,N_new) ! Computes the first stage of radix-16 real-half complex DFT on ! 2**k elements if mod(k,4) == 2. !................................................................ subroutine step_4(x,N_new) integer, intent(in) :: N_new ! 2**k/4 real(wp), intent(inout) :: x(0:4*N_new-1) ! Array to be ! transformed integer n1 ! Current elements are x(4*n1:4*n1+3) real(wp) x00, x01 ! Hold temporary results do n1 = 0, N_new-1 x00 = x(4*n1)+x(4*n1+1) x01 = x(4*n1+2)+x(4*n1+3) x(4*n1+1) = x(4*n1)-x(4*n1+1) x(4*n1+3) = x(4*n1+2)-x(4*n1+3) x(4*n1) = x00+x01 x(4*n1+2) = x00-x01 end do end subroutine step_4 !................................................................ !................................................................ ! subroutine step_8(x,N_new) ! Computes the first stage of radix-16 real-half complex DFT on ! 2**k elements if mod(k,4) == 3. !................................................................ subroutine step_8(x,N_new) integer, intent(in) :: N_new ! 2**k/8 real(wp), intent(inout) :: x(0:8*N_new-1) ! Array to be ! transformed ! Define parameter for cos(pi/4) real(wp), parameter :: c4 = 0.70710678118654752440084436210485_wp integer n1 ! Current elements are x(8*n1:8*n1+7) integer i0 ! 8*n1 real(wp) x00, x01, x02, x03 ! Hold temporary results real(wp) x04, x05, x06, x07 ! Hold temporary results real(wp) y00, y01 ! Hold temporary results do n1 = 0, N_new-1 i0 = 8*n1 ! Compute RF(8) x00 = x(i0)+x(i0+1) x01 = x(i0+4)+x(i0+5) x02 = x(i0+2)+x(i0+3) x03 = x(i0+6)+x(i0+7) x04 = x(i0)-x(i0+1) x05 = x(i0+4)-x(i0+5) x06 = x(i0+2)-x(i0+3) x07 = x(i0+6)-x(i0+7) y00 = x00+x02 y01 = x01+x03 x(i0) = y00+y01 x(i0+4) = y00-y01 x(i0+2) = x00-x02 x(i0+6) = x01-x03 y00 = c4*(x05-x07) y01 = c4*(x05+x07) x(i0+1) = x04+y00 x(i0+3) = x04-y00 ! Sin components output in backwards order x(i0+5) = y01-x06 x(i0+7) = y01+x06 end do end subroutine step_8 !................................................................ !................................................................ ! subroutine step_16(x,N_new,K_old,cosines) ! The main workhorse of radix-16 real-half complex DFT on 2**k ! elements without scaling. !................................................................ subroutine step_16(x,N_new,K_old,cosines) integer, intent(in) :: N_new ! Order of DFT to be completed later integer, intent(in) :: K_old ! Order of DFT completed previously real(wp), intent(inout) :: x(0:16*N_new*K_old-1) ! Vector to be ! transformed (bit-reversed input) real(wp), intent(in) :: cosines(-(4*N_new*K_old-1):8*N_new*K_old-1) ! Table of constants ! Define parameters for cos(pi/4), cos(pi/8), and sin(pi/8) real(wp), parameter :: cos4 = 0.70710678118654752440084436210485_wp real(wp), parameter :: cos8 = 0.92387953251128675612818318939679_wp real(wp), parameter :: sin8 = 0.38268343236508977172845998403040_wp ! Define parameters for cos(pi/16), cos(3*pi/15), sin(pi/16), and sin(3*pi/16) real(wp), parameter :: c16 = 0.98078528040323044912618223613424_wp real(wp), parameter :: c316 = 0.83146961230254523707878837761791_wp real(wp), parameter :: s16 = 0.19509032201612826784828486847702_wp real(wp), parameter :: s316 = 0.55557023301960222474283081394853_wp integer n1 ! Time index for remaining transform (bit-reversed) integer k2, k3 ! Low- and high-frequency parameters integer i0, i2 ! Offsets for first and second transform sets integer K_old_2 ! K_old/2 integer sin_org ! Origin for sines in table ! Temporaries for first transform set real(wp) x00,x01,x02,x03,x04,x05,x06,x07 real(wp) x08,x09,x0a,x0b,x0c,x0d,x0e,x0f ! Temporaries for second transform set real(wp) x10,x11,x12,x13,x14,x15,x16,x17 real(wp) x18,x19,x1a,x1b,x1c,x1d,x1e,x1f ! Temporaries for first transform set real(wp) y00,y01,y02,y03,y04,y05,y06,y07 ! Temporaries for second transform set real(wp) y10,y11,y12,y13,y14,y15,y16,y17 ! Givens plane rotation matrix elements for first and second transform sets real(wp) C1, S1, C2, S2, C3, S3, C4, S4, C5, S5, C6, S6, C7, S7, C8, S8 real(wp) C9, S9, C10, S10, C11, S11, C12, S12, C13, S13, C14, S14, C15, S15 ! k2 = 0 case (DC terms) do n1 = 0, N_new-1 i0 = 16*K_old*n1 ! Compute RF(16) x00 = x(i0)+x(i0+K_old) x01 = x(i0+8*K_old)+x(i0+9*K_old) x02 = x(i0+4*K_old)+x(i0+5*K_old) x03 = x(i0+12*K_old)+x(i0+13*K_old) x04 = x(i0+2*K_old)+x(i0+3*K_old) x05 = x(i0+10*K_old)+x(i0+11*K_old) x06 = x(i0+6*K_old)+x(i0+7*K_old) x07 = x(i0+14*K_old)+x(i0+15*K_old) x08 = x(i0)-x(i0+K_old) x09 = x(i0+8*K_old)-x(i0+9*K_old) x0a = x(i0+4*K_old)-x(i0+5*K_old) x0b = x(i0+12*K_old)-x(i0+13*K_old) x0c = x(i0+2*K_old)-x(i0+3*K_old) x0d = x(i0+10*K_old)-x(i0+11*K_old) x0e = x(i0+6*K_old)-x(i0+7*K_old) x0f = x(i0+14*K_old)-x(i0+15*K_old) ! Start of internal RF(8) y00 = x00+x04 y01 = x01+x05 y02 = x02+x06 y03 = x03+x07 y04 = y00+y02 y05 = y01+y03 x(i0) = y04+y05 x(i0+8*K_old) = y04-y05 x(i0+4*K_old) = y00-y02 x(i0+12*K_old) = y01-y03 y00 = x00-x04 y01 = x01-x05 y02 = x02-x06 y03 = x03-x07 y04 = cos4*(y01-y03) y05 = cos4*(y01+y03) x(i0+2*K_old) = y00+y04 x(i0+6*K_old) = y00-y04 x(i0+10*K_old) = y05-y02 x(i0+14*K_old) = y05+y02 ! End of internal RF(8) ! Start of internal OT(8) y00 = x09-x0f y01 = x0d-x0b y02 = x09+x0f y03 = x0d+x0b y04 = cos4*(x0a-x0e) y05 = cos4*(x0a+x0e) x0a = x08+y04 x0b = x08-y04 x0d = y05-x0c x0c = y05+x0c x08 = cos8*y00-sin8*y01 x09 = sin8*y00+cos8*y01 x0e = sin8*y02+cos8*y03 x0f = cos8*y02-sin8*y03 x(i0+K_old) = x0a+x08 x(i0+3*K_old) = x0b+x09 x(i0+5*K_old) = x0b-x09 x(i0+7*K_old) = x0a-x08 x(i0+9*K_old) = x0e-x0c x(i0+11*K_old) = x0f-x0d x(i0+13*K_old) = x0f+x0d x(i0+15*K_old) = x0e+x0c ! End of internal OT(8) end do if(K_old <= 1) return K_old_2 = ishft(K_old,-1) ! k2 = K_old/2 case (Nyquist frequency terms) do n1 = 0, N_new-1 i0 = 16*K_old*n1+K_old_2 ! Compute OT(16) x08 = x(i0+8*K_old)-x(i0+15*K_old) x09 = x(i0+10*K_old)-x(i0+13*K_old) x0a = x(i0+9*K_old)-x(i0+14*K_old) x0b = x(i0+11*K_old)-x(i0+12*K_old) x0c = x(i0+8*K_old)+x(i0+15*K_old) x0d = x(i0+10*K_old)+x(i0+13*K_old) x0e = x(i0+9*K_old)+x(i0+14*K_old) x0f = x(i0+11*K_old)+x(i0+12*K_old) y04 = cos4*(x09-x0b) y05 = cos4*(x09+x0b) y00 = x08+y04 y01 = x08-y04 y02 = y05+x0a y03 = y05-x0a x00 = c16*y00-s16*y02 x01 = c316*y01-s316*y03 x02 = s316*y01+c316*y03 x03 = s16*y00+c16*y02 y04 = cos4*(x0d-x0f) y05 = cos4*(x0d+x0f) y00 = x0c+y04 y01 = x0c-y04 y02 = y05+x0e y03 = y05-x0e x0c = s16*y00+c16*y02 x0d = s316*y01+c316*y03 x0e = c316*y01-s316*y03 x0f = c16*y00-s16*y02 ! Start of internal OT(8) x08 = x(i0+4*K_old)-x(i0+7*K_old) x09 = x(i0+5*K_old)-x(i0+6*K_old) y00 = x(i0+4*K_old)+x(i0+7*K_old) y01 = x(i0+5*K_old)+x(i0+6*K_old) x04 = cos8*x08-sin8*x09 x05 = sin8*x08+cos8*x09 x0a = sin8*y00+cos8*y01 y06 = cos8*y00-sin8*y01 y04 = cos4*(x(i0+2*K_old)-x(i0+3*K_old)) y05 = cos4*(x(i0+2*K_old)+x(i0+3*K_old)) y00 = x(i0)+y04 y01 = x(i0)-y04 y02 = y05+x(i0+K_old) y03 = y05-x(i0+K_old) x06 = y01-x05 x07 = y00-x04 x04 = y00+x04 x05 = y01+x05 x08 = x0a+y02 x09 = y06+y03 x0b = x0a-y02 x0a = y06-y03 ! End of internal OT(8) x(i0) = x04+x00 x(i0+K_old) = x05+x01 x(i0+2*K_old) = x06+x02 x(i0+3*K_Old) = x07+x03 x(i0+4*K_Old) = x07-x03 x(i0+5*K_old) = x06-x02 x(i0+6*K_old) = x05-x01 x(i0+7*K_old) = x04-x00 x(i0+8*K_old) = x0c-x08 x(i0+9*K_old) = x0d-x09 x(i0+10*K_old) = x0e-x0a x(i0+11*K_old) = x0f-x0b x(i0+12*K_old) = x0f+x0b x(i0+13*K_old) = x0e+x0a x(i0+14*K_old) = x0d+x09 x(i0+15*K_old) = x0c+x08 end do if(K_old <= 2) return sin_org = 4*N_new*K_old ! k2 = general case do k2 = 1, K_old/2-1 k3 = K_old-k2 ! Look up phase factors C1 = cosines(N_new*k2) S1 = cosines(sin_org-N_new*k2) C2 = cosines(2*N_new*k2) S2 = cosines(sin_org-2*N_new*k2) C3 = cosines(3*N_new*k2) S3 = cosines(sin_org-3*N_new*k2) C4 = cosines(4*N_new*k2) S4 = cosines(sin_org-4*N_new*k2) C5 = cosines(5*N_new*k2) S5 = cosines(sin_org-5*N_new*k2) C6 = cosines(6*N_new*k2) S6 = cosines(sin_org-6*N_new*k2) C7 = cosines(7*N_new*k2) S7 = cosines(sin_org-7*N_new*k2) C8 = cosines(8*N_new*k2) S8 = cosines(sin_org-8*N_new*k2) C9 = cosines(9*N_new*k2) S9 = cosines(sin_org-9*N_new*k2) C10 = cosines(10*N_new*k2) S10 = cosines(sin_org-10*N_new*k2) C11 = cosines(11*N_new*k2) S11 = cosines(sin_org-11*N_new*k2) C12 = cosines(12*N_new*k2) S12 = cosines(sin_org-12*N_new*k2) C13 = cosines(13*N_new*k2) S13 = cosines(sin_org-13*N_new*k2) C14 = cosines(14*N_new*k2) S14 = cosines(sin_org-14*N_new*k2) C15 = cosines(15*N_new*k2) S15 = cosines(sin_org-15*N_new*k2) do n1 = 0, N_new-1 i0 = 16*K_old*n1+k2 i2 = 16*K_old*n1+k3 ! Apply phase factors, mixed with first addition in RF(16) y00 = C8*x(i0+K_old)-S8*x(i2+K_old) y10 = S8*x(i0+K_old)+C8*x(i2+K_old) x00 = x(i0)+y00 x08 = x(i0)-y00 x10 = x(i2)+y10 x18 = x(i2)-y10 y00 = C1*x(i0+8*K_old)-S1*x(i2+8*K_old) y10 = S1*x(i0+8*K_old)+C1*x(i2+8*K_old) y01 = C9*x(i0+9*K_old)-S9*x(i2+9*K_old) y11 = S9*x(i0+9*K_old)+C9*x(i2+9*K_old) x01 = y00+y01 x09 = y00-y01 x11 = y10+y11 x19 = y10-y11 y00 = C2*x(i0+4*K_old)-S2*x(i2+4*K_old) y10 = S2*x(i0+4*K_old)+C2*x(i2+4*K_old) y01 = C10*x(i0+5*K_old)-S10*x(i2+5*K_old) y11 = S10*x(i0+5*K_old)+C10*x(i2+5*K_old) x02 = y00+y01 x0a = y00-y01 x12 = y10+y11 x1a = y10-y11 y00 = C3*x(i0+12*K_old)-S3*x(i2+12*K_old) y10 = S3*x(i0+12*K_old)+C3*x(i2+12*K_old) y01 = C11*x(i0+13*K_old)-S11*x(i2+13*K_old) y11 = S11*x(i0+13*K_old)+C11*x(i2+13*K_old) x03 = y00+y01 x0b = y00-y01 x13 = y10+y11 x1b = y10-y11 y00 = C4*x(i0+2*K_old)-S4*x(i2+2*K_old) y10 = S4*x(i0+2*K_old)+C4*x(i2+2*K_old) y01 = C12*x(i0+3*K_old)-S12*x(i2+3*K_old) y11 = S12*x(i0+3*K_old)+C12*x(i2+3*K_old) x04 = y00+y01 x0c = y00-y01 x14 = y10+y11 x1c = y10-y11 y00 = C5*x(i0+10*K_old)-S5*x(i2+10*K_old) y10 = S5*x(i0+10*K_old)+C5*x(i2+10*K_old) y01 = C13*x(i0+11*K_old)-S13*x(i2+11*K_old) y11 = S13*x(i0+11*K_old)+C13*x(i2+11*K_old) x05 = y00+y01 x0d = y00-y01 x15 = y10+y11 x1d = y10-y11 y00 = C6*x(i0+6*K_old)-S6*x(i2+6*K_old) y10 = S6*x(i0+6*K_old)+C6*x(i2+6*K_old) y01 = C14*x(i0+7*K_old)-S14*x(i2+7*K_old) y11 = S14*x(i0+7*K_old)+C14*x(i2+7*K_old) x06 = y00+y01 x0e = y00-y01 x16 = y10+y11 x1e = y10-y11 y00 = C7*x(i0+14*K_old)-S7*x(i2+14*K_old) y10 = S7*x(i0+14*K_old)+C7*x(i2+14*K_old) y01 = C15*x(i0+15*K_old)-S15*x(i2+15*K_old) y11 = S15*x(i0+15*K_old)+C15*x(i2+15*K_old) x07 = y00+y01 x0f = y00-y01 x17 = y10+y11 x1f = y10-y11 ! Complete RF(16), mixed with trig identity additions ! Start of internal RF(8) y00 = x00+x04 y01 = x01+x05 y02 = x02+x06 y03 = x03+x07 y04 = x00-x04 y05 = x01-x05 y06 = x02-x06 y07 = x03-x07 x01 = y00-y02 x03 = y01-y03 y00 = y00+y02 y01 = y01+y03 x00 = y00+y01 x02 = y00-y01 y00 = cos4*(y05-y07) y01 = cos4*(y05+y07) x04 = y04+y00 x05 = y04-y00 x06 = y01+y06 x07 = y01-y06 ! End of internal RF(8) ! Start of internal OT(8) y00 = x09-x0f y01 = x0d-x0b y02 = x09+x0f y03 = x0d+x0b y04 = cos4*(x0a-x0e) y05 = cos4*(x0a+x0e) x0b = x08-y04 y04 = x08+y04 x0d = y05-x0c y05 = y05+x0c x08 = cos8*y00-sin8*y01 y01 = sin8*y00+cos8*y01 x0e = sin8*y02+cos8*y03 y02 = cos8*y02-sin8*y03 x09 = x0b+y01 x0a = x0b-y01 x0b = y04-x08 x08 = y04+x08 x0c = x0e+y05 x0f = x0e-y05 x0e = y02-x0d x0d = y02+x0d ! End of internal OT(8) ! Start of internal RF(8) y10 = x10+x14 y11 = x11+x15 y12 = x12+x16 y13 = x13+x17 y14 = x10-x14 y15 = x11-x15 y16 = x12-x16 y17 = x13-x17 x11 = y10-y12 x13 = y11-y13 y10 = y10+y12 y11 = y11+y13 x10 = y10+y11 x12 = y11-y10 ! Because we need its additive inverse y10 = cos4*(y15-y17) y11 = cos4*(y15+y17) x14 = y14+y10 x15 = y14-y10 x16 = y11+y16 x17 = y11-y16 ! End of internal RF(8) ! Start of internal OT(8) y10 = x19-x1f y11 = x1d-x1b y12 = x19+x1f y13 = x1d+x1b y14 = cos4*(x1a-x1e) y15 = cos4*(x1a+x1e) x1b = x18-y14 y14 = x18+y14 x1d = y15-x1c y15 = y15+x1c x18 = cos8*y10-sin8*y11 y11 = sin8*y10+cos8*y11 x1e = sin8*y12+cos8*y13 y12 = cos8*y12-sin8*y13 x19 = x1b+y11 x1a = x1b-y11 x1b = y14-x18 x18 = y14+x18 x1c = x1e+y15 x1f = x1e-y15 x1e = y12-x1d x1d = y12+x1d ! End of internal OT(8) x(i2+7*K_old) = x02 x(i0+K_old) = x08-x1c x(i0+2*K_old) = x04-x16 x(i0+3*K_old) = x09-x1d x(i0+4*K_old) = x01-x13 x(i0+5*K_old) = x0a-x1e x(i0+6*K_old) = x05-x17 x(i0+7*K_old) = x0b-x1f x(i2) = x08+x1c x(i2+K_old) = x04+x16 x(i2+2*K_old) = x09+x1d x(i2+3*K_old) = x01+x13 x(i2+4*K_old) = x0a+x1e x(i2+5*K_old) = x05+x17 x(i2+6*K_old) = x0b+x1f x(i0) = x00 x(i2+15*K_old) = x10 x(i0+15*K_old) = x0c-x18 x(i2+14*K_old) = x0c+x18 x(i0+14*K_old) = x06-x14 x(i2+13*K_old) = x06+x14 x(i0+13*K_old) = x0d-x19 x(i2+12*K_old) = x0d+x19 x(i0+12*K_old) = x03-x11 x(i2+11*K_old) = x03+x11 x(i0+11*K_old) = x0e-x1a x(i2+10*K_old) = x0e+x1a x(i0+10*K_old) = x07-x15 x(i2+9*K_old) = x07+x15 x(i0+9*K_old) = x0f-x1b x(i2+8*K_old) = x0f+x1b x(i0+8*K_old) = x12 end do end do end subroutine step_16 !................................................................ ! subroutine rf16(x,k,cosines,table) ! Driver subroutine for radix-16 DIT real-half complex DFT on ! 2**k elements. We need to first invoke reverse_table and ! cosine_table to get the tables of constants, cosines and table, ! that will be required. Before that, invoke rf16_sizes to get the ! sizes of these tables !................................................................ subroutine rf16(x,k,cosines,table) integer, intent(in) :: k ! Order of the problem is 2**k real(wp), intent(inout) :: x(0:2**k-1) ! Vector to be ! transformed real(wp), intent(in) :: cosines(-(2**(k-2)-1):2**(k-1)-1) ! Trig function ! table integer(ik2), intent(in) :: table(0:2**(k/2)-1) ! Bit-reversal ! table integer N_new ! Order of transform yet to be performed integer K_old ! Order of transform completed ! Put inputs in bit-reversed order call bit_reverse(x,k,table) ! Handle non-power of 16 step if necessary if(mod(k,4) == 1) then N_new = 2**(k-1) call step_2(x,N_new) K_old = 2 else if(mod(k,4) == 2) then N_new = 2**(k-2) call step_4(x,N_new) K_old = 4 else if(mod(k,4) == 3) then N_new = 2**(k-3) call step_8(x,N_new) K_old = 8 else N_new = 2**k K_old = 1 end if ! Do power of 16 steps do while(N_new > 1) N_new = N_new/16 call step_16(x,N_new,K_old,cosines) K_old = K_old*16 end do ! Put sines in normal order call reverse_sins(x,k) end subroutine rf16 !................................................................ !................................................................ ! subroutine rf16_sizes(k, cosine_lbound, cosine_ubound, bitrev_size) ! Compute sizes of data tables required by rf16 !................................................................ subroutine rf16_sizes(k, cosine_lbound, cosine_ubound, bitrev_size) integer, intent(in) :: k ! Order of the problem = 2**k integer, intent(out) :: cosine_lbound ! Lower bound of cosine table integer, intent(out) :: cosine_ubound ! Upper bound of cosine table integer, intent(out) :: bitrev_size ! Size of bit-reversal ! table if(k < 0) then write(*,'(a)') ' Error: negative exponent in rf16_sizes' stop end if cosine_lbound = -(2**(k-2)-1) cosine_ubound = 2**(k-1)-1 bitrev_size = 2**(k/2) end subroutine rf16_sizes !................................................................ end module rf16_routines ! *************************************************************** ! *************************************************************** ! module rfs16_routines ! Along with module auxiliary_routines contains everything you need ! to do a radix-16 real-half complex decimation in time DFT with ! bit-reversed inputs, sine-reversed outputs, and intermediate scaling. ! First you should call subroutine rfs16_sizes to get the sizes of the ! arrays that are required for the bit-reversal helper table and the ! tables of cosines. Then call reverse_table to create the bit-reversal ! helper table and cos_table_s to make up the tables of cosines. Then ! rfs16 can be used to compute real-half complex DFTs. ! *************************************************************** module rfs16_routines use mykinds, wp=>dp use auxiliary_routines implicit none private wp private step_2, step_4, step_8, step_16 contains !................................................................ ! subroutine cos_table_s(table, table_s, k) ! Computes tables of fourth-, first-, and second-quadrant cosines ! required by subroutine rfs16. Given that it is desired to compute ! a DFT of order 2**k, one should first call subroutine rfs16_sizes to ! determine how big the cosine tables have to be. Then cos_table_s ! can be invoked to create the tables. !................................................................ subroutine cos_table_s(table, table_s, k) integer, intent(in) :: k ! Size of the problem is 2**k real(wp), intent(out) :: table(-(2**(k-2)-1):2**(k-1)-1) ! Table of cosines real(wp), intent(out) :: table_s(-(2**(k-2)-1):2**(k-1)-1) ! Scaled table of cosines integer j ! Multiple of fundamental angle integer N ! 2**k real(wp) pi ! Key lime, I hope. N = 2**k pi = 4*atan(1.0_wp) do j = lbound(table,1),ubound(table,1) table(j) = cos(2*pi*j/N) end do do j = lbound(table,1),ubound(table,1) table_s(j) = cos(2*pi*j/N)*cos(2*pi/16) end do end subroutine cos_table_s !................................................................ !................................................................ ! subroutine step_2(x,N_new) ! Computes the first stage of radix-16 real-half complex DFT on ! 2**k elements with intermediate scaling if mod(k,4) == 1. !................................................................ subroutine step_2(x,N_new) integer, intent(in) :: N_new ! 2**k/2 real(wp), intent(inout) :: x(0:2*N_new-1) ! Array to be ! transformed integer n1 ! Current elements are x(2*n1) ! and x(2*n1+1) real(wp) x00 ! Holds intermediate result do n1 = 0, N_new-1 x00 = x(2*n1)+x(2*n1+1) x(2*n1+1) = x(2*n1)-x(2*n1+1) x(2*n1) = x00 end do end subroutine step_2 !................................................................ !................................................................ ! subroutine step_4(x,N_new) ! Computes the first stage of radix-16 real-half complex DFT on ! 2**k elements with intermediate scaling if mod(k,4) == 2. !................................................................ subroutine step_4(x,N_new) integer, intent(in) :: N_new ! 2**k/4 real(wp), intent(inout) :: x(0:4*N_new-1) ! Array to be ! transformed integer n1 ! Current elements are x(4*n1:4*n1+3) real(wp) x00, x01 ! Hold temporary results do n1 = 0, N_new-1 x00 = x(4*n1)+x(4*n1+1) x01 = x(4*n1+2)+x(4*n1+3) x(4*n1+1) = x(4*n1)-x(4*n1+1) x(4*n1+3) = x(4*n1+2)-x(4*n1+3) x(4*n1) = x00+x01 x(4*n1+2) = x00-x01 end do end subroutine step_4 !................................................................ !................................................................ ! subroutine step_8(x,N_new) ! Computes the first stage of radix-16 real-half complex DFT on ! 2**k elements with intermediate scaling if mod(k,4) == 3. !................................................................ subroutine step_8(x,N_new) integer, intent(in) :: N_new ! 2**k/8 real(wp), intent(inout) :: x(0:8*N_new-1) ! Array to be ! transformed ! Define parameter for cos(pi/4) real(wp), parameter :: c4 = 0.70710678118654752440084436210485_wp integer n1 ! Current elements are x(8*n1:8*n1+7) integer i0 ! 8*n1 real(wp) x00, x01, x02, x03 ! Hold temporary results real(wp) x04, x05, x06, x07 ! Hold temporary results real(wp) y00, y01 ! Hold temporary results do n1 = 0, N_new-1 i0 = 8*n1 ! Compute RF(8) x00 = x(i0)+x(i0+1) x01 = x(i0+4)+x(i0+5) x02 = x(i0+2)+x(i0+3) x03 = x(i0+6)+x(i0+7) x04 = x(i0)-x(i0+1) x05 = x(i0+4)-x(i0+5) x06 = x(i0+2)-x(i0+3) x07 = x(i0+6)-x(i0+7) y00 = x00+x02 y01 = x01+x03 x(i0) = y00+y01 x(i0+4) = y00-y01 x(i0+2) = x00-x02 x(i0+6) = x01-x03 y00 = c4*(x05-x07) y01 = c4*(x05+x07) x(i0+1) = x04+y00 x(i0+3) = x04-y00 ! Sin components output in backwards order x(i0+5) = y01-x06 x(i0+7) = y01+x06 end do end subroutine step_8 !................................................................ !................................................................ ! subroutine step_16(x,N_new,K_old,cosines,cosines_s) ! The main workhorse of radix-16 real-half complex DFT on 2**k ! elements with intermediate scaling. !................................................................ subroutine step_16(x,N_new,K_old,cosines,cosines_s) integer, intent(in) :: N_new ! Order of DFT to be completed later integer, intent(in) :: K_old ! Order of DFT completed previously real(wp), intent(inout) :: x(0:16*N_new*K_old-1) ! Vector to be ! transformed (bit-reversed input) ! Table of constants (unscaled) real(wp), intent(in) :: cosines(-(4*N_new*K_old-1):8*N_new*K_old-1) ! Table of constants (scaled) real(wp), intent(in) :: cosines_s(-(4*N_new*K_old-1):8*N_new*K_old-1) ! Define parameters for cos(pi/4), cos(pi/8), and sin(pi/8) real(wp), parameter :: cos4 = 0.70710678118654752440084436210485_wp real(wp), parameter :: cos8 = 0.92387953251128675612818318939679_wp real(wp), parameter :: sin8 = 0.38268343236508977172845998403040_wp ! Define parameters for cos(pi/16), cos(3*pi/15), sin(pi/16), and sin(3*pi/16) real(wp), parameter :: c16 = 0.98078528040323044912618223613424_wp real(wp), parameter :: c316 = 0.83146961230254523707878837761791_wp real(wp), parameter :: s16 = 0.19509032201612826784828486847702_wp real(wp), parameter :: s316 = 0.55557023301960222474283081394853_wp ! Define parameter for cos(pi/4)/cos(pi/8) real(wp), parameter :: cos4_s = 0.7653668647301795434569199680608_wp ! Define parameter for 1/cos(pi/8) real(wp), parameter :: sec8 = 1.0823922002923939687994464107328_wp ! Define parameter for sin(pi/8)/cos(pi/8) real(wp), parameter :: tan8 = 0.4142135623730950488016887242097_wp ! Define parameter for cos(pi/16)/cos(pi/8) real(wp), parameter :: c16_s = 1.0615943376700451936138549313266_wp ! Define parameter for sin(pi/16)/cos(pi/8) real(wp), parameter :: s16_s = 0.21116424290278874484715208695518_wp ! Define parameter for cos(3*pi/16)/cos(pi/8) real(wp), parameter :: c316_s = 0.89997622313641570463850954094189_wp ! Define parameter for sin(3*pi/16)/cos(pi/8) real(wp), parameter :: s316_s = 0.60134488693504528054372182390922_wp integer n1 ! Time index for remaining transform (bit-reversed) integer n16 ! Like n1, but in steps of 16 integer k2, k3 ! Low- and high-frequency parameters integer i0, i2 ! Offsets for first and second transform sets integer K_old_2 ! K_old/2 integer sin_org ! Origin for sines in tables ! Temporaries for first transform set real(wp) x00,x01,x02,x03,x04,x05,x06,x07 real(wp) x08,x09,x0a,x0b,x0c,x0d,x0e,x0f ! Temporaries for second transform set real(wp) x10,x11,x12,x13,x14,x15,x16,x17 real(wp) x18,x19,x1a,x1b,x1c,x1d,x1e,x1f ! Temporaries for first transform set real(wp) y00,y01,y02,y03,y04,y05,y06,y07 ! Temporaries for second transform set real(wp) y10,y11,y12,y13,y14,y15,y16,y17 ! Givens plane rotation matrix elements for first and second transform sets real(wp) C1, S1, C2, S2, C3, S3, C4, S4, C5, S5, C6, S6, C7, S7, C8, S8 real(wp) C9, S9, C10, S10, C11, S11, C12, S12, C13, S13, C14, S14, C15, S15 integer k_test_min ! Minimum value of scaled range integer k_test_max ! Maximum value of scaled range integer K_old_8 ! K_old/8 integer k_test ! mod(K_old, K_old_8) ! k2 = 0 case (DC terms) do n16 = 0, N_new-1, 16 n1 = n16 i0 = 16*K_old*n1 ! Compute RF(16) x00 = x(i0)+x(i0+K_old) x01 = x(i0+8*K_old)+x(i0+9*K_old) x02 = x(i0+4*K_old)+x(i0+5*K_old) x03 = x(i0+12*K_old)+x(i0+13*K_old) x04 = x(i0+2*K_old)+x(i0+3*K_old) x05 = x(i0+10*K_old)+x(i0+11*K_old) x06 = x(i0+6*K_old)+x(i0+7*K_old) x07 = x(i0+14*K_old)+x(i0+15*K_old) x08 = x(i0)-x(i0+K_old) x09 = x(i0+8*K_old)-x(i0+9*K_old) x0a = x(i0+4*K_old)-x(i0+5*K_old) x0b = x(i0+12*K_old)-x(i0+13*K_old) x0c = x(i0+2*K_old)-x(i0+3*K_old) x0d = x(i0+10*K_old)-x(i0+11*K_old) x0e = x(i0+6*K_old)-x(i0+7*K_old) x0f = x(i0+14*K_old)-x(i0+15*K_old) ! Start of internal RF(8) y00 = x00+x04 y01 = x01+x05 y02 = x02+x06 y03 = x03+x07 y04 = y00+y02 y05 = y01+y03 x(i0) = y04+y05 x(i0+8*K_old) = y04-y05 x(i0+4*K_old) = y00-y02 x(i0+12*K_old) = y01-y03 y00 = x00-x04 y01 = x01-x05 y02 = x02-x06 y03 = x03-x07 y04 = cos4*(y01-y03) y05 = cos4*(y01+y03) x(i0+2*K_old) = y00+y04 x(i0+6*K_old) = y00-y04 x(i0+10*K_old) = y05-y02 x(i0+14*K_old) = y05+y02 ! End of internal RF(8) ! Start of internal OT(8) y00 = x09-x0f y01 = x0d-x0b y02 = x09+x0f y03 = x0d+x0b y04 = cos4*(x0a-x0e) y05 = cos4*(x0a+x0e) x0a = x08+y04 x0b = x08-y04 x0d = y05-x0c x0c = y05+x0c x08 = cos8*y00-sin8*y01 x09 = sin8*y00+cos8*y01 x0e = sin8*y02+cos8*y03 x0f = cos8*y02-sin8*y03 x(i0+K_old) = x0a+x08 x(i0+3*K_old) = x0b+x09 x(i0+5*K_old) = x0b-x09 x(i0+7*K_old) = x0a-x08 x(i0+9*K_old) = x0e-x0c x(i0+11*K_old) = x0f-x0d x(i0+13*K_old) = x0f+x0d x(i0+15*K_old) = x0e+x0c ! End of internal OT(8) do n1 = n16+1, min(n16+15,N_new-1) i0 = 16*K_old*n1 ! Compute RF(16) with SOT(8) x00 = x(i0)+x(i0+K_old) x01 = x(i0+8*K_old)+x(i0+9*K_old) x02 = x(i0+4*K_old)+x(i0+5*K_old) x03 = x(i0+12*K_old)+x(i0+13*K_old) x04 = x(i0+2*K_old)+x(i0+3*K_old) x05 = x(i0+10*K_old)+x(i0+11*K_old) x06 = x(i0+6*K_old)+x(i0+7*K_old) x07 = x(i0+14*K_old)+x(i0+15*K_old) ! Need scaling for internal SOT(8) x08 = sec8*(x(i0)-x(i0+K_old)) x09 = x(i0+8*K_old)-x(i0+9*K_old) x0a = x(i0+4*K_old)-x(i0+5*K_old) x0b = x(i0+12*K_old)-x(i0+13*K_old) ! Need scaling for internal SOT(8) x0c = sec8*(x(i0+2*K_old)-x(i0+3*K_old)) x0d = x(i0+10*K_old)-x(i0+11*K_old) x0e = x(i0+6*K_old)-x(i0+7*K_old) x0f = x(i0+14*K_old)-x(i0+15*K_old) ! Start of internal RF(8) y00 = x00+x04 y01 = x01+x05 y02 = x02+x06 y03 = x03+x07 y04 = y00+y02 y05 = y01+y03 x(i0) = y04+y05 x(i0+8*K_old) = y04-y05 x(i0+4*K_old) = y00-y02 x(i0+12*K_old) = y01-y03 y00 = x00-x04 y01 = x01-x05 y02 = x02-x06 y03 = x03-x07 y04 = cos4*(y01-y03) y05 = cos4*(y01+y03) x(i0+2*K_old) = y00+y04 x(i0+6*K_old) = y00-y04 x(i0+10*K_old) = y05-y02 x(i0+14*K_old) = y05+y02 ! End of internal RF(8) ! Start of internal SOT(8) y00 = x09-x0f y01 = x0d-x0b y02 = x09+x0f y03 = x0d+x0b y04 = cos4_s*(x0a-x0e) y05 = cos4_s*(x0a+x0e) x0a = x08+y04 x0b = x08-y04 x0d = y05-x0c x0c = y05+x0c x08 = y00-tan8*y01 x09 = tan8*y00+y01 x0e = tan8*y02+y03 x0f = y02-tan8*y03 x(i0+K_old) = x0a+x08 x(i0+3*K_old) = x0b+x09 x(i0+5*K_old) = x0b-x09 x(i0+7*K_old) = x0a-x08 x(i0+9*K_old) = x0e-x0c x(i0+11*K_old) = x0f-x0d x(i0+13*K_old) = x0f+x0d x(i0+15*K_old) = x0e+x0c ! End of internal OT(8) end do end do if(K_old <= 1) return K_old_2 = ishft(K_old,-1) ! k2 = K_old/2 case (Nyquist frequency terms) do n16 = 0, N_new-1, 16 n1 = n16 i0 = 16*K_old*n1+K_old_2 ! Compute OT(16) x08 = x(i0+8*K_old)-x(i0+15*K_old) x09 = x(i0+10*K_old)-x(i0+13*K_old) x0a = x(i0+9*K_old)-x(i0+14*K_old) x0b = x(i0+11*K_old)-x(i0+12*K_old) x0c = x(i0+8*K_old)+x(i0+15*K_old) x0d = x(i0+10*K_old)+x(i0+13*K_old) x0e = x(i0+9*K_old)+x(i0+14*K_old) x0f = x(i0+11*K_old)+x(i0+12*K_old) y04 = cos4*(x09-x0b) y05 = cos4*(x09+x0b) y00 = x08+y04 y01 = x08-y04 y02 = y05+x0a y03 = y05-x0a x00 = c16*y00-s16*y02 x01 = c316*y01-s316*y03 x02 = s316*y01+c316*y03 x03 = s16*y00+c16*y02 y04 = cos4*(x0d-x0f) y05 = cos4*(x0d+x0f) y00 = x0c+y04 y01 = x0c-y04 y02 = y05+x0e y03 = y05-x0e x0c = s16*y00+c16*y02 x0d = s316*y01+c316*y03 x0e = c316*y01-s316*y03 x0f = c16*y00-s16*y02 ! Start of internal OT(8) x08 = x(i0+4*K_old)-x(i0+7*K_old) x09 = x(i0+5*K_old)-x(i0+6*K_old) y00 = x(i0+4*K_old)+x(i0+7*K_old) y01 = x(i0+5*K_old)+x(i0+6*K_old) x04 = cos8*x08-sin8*x09 x05 = sin8*x08+cos8*x09 x0a = sin8*y00+cos8*y01 y06 = cos8*y00-sin8*y01 y04 = cos4*(x(i0+2*K_old)-x(i0+3*K_old)) y05 = cos4*(x(i0+2*K_old)+x(i0+3*K_old)) y00 = x(i0)+y04 y01 = x(i0)-y04 y02 = y05+x(i0+K_old) y03 = y05-x(i0+K_old) x06 = y01-x05 x07 = y00-x04 x04 = y00+x04 x05 = y01+x05 x08 = x0a+y02 x09 = y06+y03 x0b = x0a-y02 x0a = y06-y03 ! End of internal OT(8) x(i0) = x04+x00 x(i0+K_old) = x05+x01 x(i0+2*K_old) = x06+x02 x(i0+3*K_Old) = x07+x03 x(i0+4*K_Old) = x07-x03 x(i0+5*K_old) = x06-x02 x(i0+6*K_old) = x05-x01 x(i0+7*K_old) = x04-x00 x(i0+8*K_old) = x0c-x08 x(i0+9*K_old) = x0d-x09 x(i0+10*K_old) = x0e-x0a x(i0+11*K_old) = x0f-x0b x(i0+12*K_old) = x0f+x0b x(i0+13*K_old) = x0e+x0a x(i0+14*K_old) = x0d+x09 x(i0+15*K_old) = x0c+x08 do n1 = n16+1, min(n16+15,N_new-1) i0 = 16*K_old*n1+K_old_2 ! Compute OT(16) with SOT(8) x08 = x(i0+8*K_old)-x(i0+15*K_old) x09 = x(i0+10*K_old)-x(i0+13*K_old) x0a = x(i0+9*K_old)-x(i0+14*K_old) x0b = x(i0+11*K_old)-x(i0+12*K_old) x0c = x(i0+8*K_old)+x(i0+15*K_old) x0d = x(i0+10*K_old)+x(i0+13*K_old) x0e = x(i0+9*K_old)+x(i0+14*K_old) x0f = x(i0+11*K_old)+x(i0+12*K_old) y04 = cos4*(x09-x0b) y05 = cos4*(x09+x0b) y00 = x08+y04 y01 = x08-y04 y02 = y05+x0a y03 = y05-x0a x00 = c16_s*y00-s16_s*y02 x01 = c316_s*y01-s316_s*y03 x02 = s316_s*y01+c316_s*y03 x03 = s16_s*y00+c16_s*y02 y04 = cos4*(x0d-x0f) y05 = cos4*(x0d+x0f) y00 = x0c+y04 y01 = x0c-y04 y02 = y05+x0e y03 = y05-x0e x0c = s16_s*y00+c16_s*y02 x0d = s316_s*y01+c316_s*y03 x0e = c316_s*y01-s316_s*y03 x0f = c16_s*y00-s16_s*y02 ! Start of internal SOT(8) x08 = x(i0+4*K_old)-x(i0+7*K_old) x09 = x(i0+5*K_old)-x(i0+6*K_old) y00 = x(i0+4*K_old)+x(i0+7*K_old) y01 = x(i0+5*K_old)+x(i0+6*K_old) x04 = x08-tan8*x09 x05 = tan8*x08+x09 x0a = tan8*y00+y01 y06 = y00-tan8*y01 y04 = cos4_s*(x(i0+2*K_old)-x(i0+3*K_old)) y05 = cos4_s*(x(i0+2*K_old)+x(i0+3*K_old)) x0b = sec8*x(i0) y00 = x0b+y04 y01 = x0b-y04 y07 = sec8*x(i0+K_old) y02 = y05+y07 y03 = y05-y07 x06 = y01-x05 x07 = y00-x04 x04 = y00+x04 x05 = y01+x05 x08 = x0a+y02 x09 = y06+y03 x0b = x0a-y02 x0a = y06-y03 ! End of internal SOT(8) x(i0) = x04+x00 x(i0+K_old) = x05+x01 x(i0+2*K_old) = x06+x02 x(i0+3*K_Old) = x07+x03 x(i0+4*K_Old) = x07-x03 x(i0+5*K_old) = x06-x02 x(i0+6*K_old) = x05-x01 x(i0+7*K_old) = x04-x00 x(i0+8*K_old) = x0c-x08 x(i0+9*K_old) = x0d-x09 x(i0+10*K_old) = x0e-x0a x(i0+11*K_old) = x0f-x0b x(i0+12*K_old) = x0f+x0b x(i0+13*K_old) = x0e+x0a x(i0+14*K_old) = x0d+x09 x(i0+15*K_old) = x0c+x08 end do end do if(K_old <= 2) return sin_org = 4*N_new*K_old ! k2 = general case k_test_min = max(1,K_old/32) k_test_max = 3*k_test_min K_old_8 = K_old/8 k_test = 0 do k2 = 1, K_old/2-1 k3 = K_old-k2 ! Look up phase factors ! Decide whether scale factors are required k_test = k_test+1 if(k_test >= K_old_8) k_test = 0 if(k_test < k_test_min .OR. k_test_max < k_test) then ! Load unscaled phase factors C1 = cosines(N_new*k2) S1 = cosines(sin_org-N_new*k2) C2 = cosines(2*N_new*k2) S2 = cosines(sin_org-2*N_new*k2) C3 = cosines(3*N_new*k2) S3 = cosines(sin_org-3*N_new*k2) C4 = cosines(4*N_new*k2) S4 = cosines(sin_org-4*N_new*k2) C5 = cosines(5*N_new*k2) S5 = cosines(sin_org-5*N_new*k2) C6 = cosines(6*N_new*k2) S6 = cosines(sin_org-6*N_new*k2) C7 = cosines(7*N_new*k2) S7 = cosines(sin_org-7*N_new*k2) C8 = cosines(8*N_new*k2) S8 = cosines(sin_org-8*N_new*k2) C9 = cosines(9*N_new*k2) S9 = cosines(sin_org-9*N_new*k2) C10 = cosines(10*N_new*k2) S10 = cosines(sin_org-10*N_new*k2) C11 = cosines(11*N_new*k2) S11 = cosines(sin_org-11*N_new*k2) C12 = cosines(12*N_new*k2) S12 = cosines(sin_org-12*N_new*k2) C13 = cosines(13*N_new*k2) S13 = cosines(sin_org-13*N_new*k2) C14 = cosines(14*N_new*k2) S14 = cosines(sin_org-14*N_new*k2) C15 = cosines(15*N_new*k2) S15 = cosines(sin_org-15*N_new*k2) else ! Load scaled phase factors C1 = cosines_s(N_new*k2) S1 = cosines_s(sin_org-N_new*k2) C2 = cosines_s(2*N_new*k2) S2 = cosines_s(sin_org-2*N_new*k2) C3 = cosines_s(3*N_new*k2) S3 = cosines_s(sin_org-3*N_new*k2) C4 = cosines_s(4*N_new*k2) S4 = cosines_s(sin_org-4*N_new*k2) C5 = cosines_s(5*N_new*k2) S5 = cosines_s(sin_org-5*N_new*k2) C6 = cosines_s(6*N_new*k2) S6 = cosines_s(sin_org-6*N_new*k2) C7 = cosines_s(7*N_new*k2) S7 = cosines_s(sin_org-7*N_new*k2) C8 = cosines_s(8*N_new*k2) S8 = cosines_s(sin_org-8*N_new*k2) C9 = cosines_s(9*N_new*k2) S9 = cosines_s(sin_org-9*N_new*k2) C10 = cosines_s(10*N_new*k2) S10 = cosines_s(sin_org-10*N_new*k2) C11 = cosines_s(11*N_new*k2) S11 = cosines_s(sin_org-11*N_new*k2) C12 = cosines_s(12*N_new*k2) S12 = cosines_s(sin_org-12*N_new*k2) C13 = cosines_s(13*N_new*k2) S13 = cosines_s(sin_org-13*N_new*k2) C14 = cosines_s(14*N_new*k2) S14 = cosines_s(sin_org-14*N_new*k2) C15 = cosines_s(15*N_new*k2) S15 = cosines_s(sin_org-15*N_new*k2) end if do n16 = 0, N_new-1, 16 n1 = n16 i0 = 16*K_old*n1+k2 i2 = 16*K_old*n1+k3 ! Apply phase factors, mixed with first addition in RF(16) y00 = C8*x(i0+K_old)-S8*x(i2+K_old) y10 = S8*x(i0+K_old)+C8*x(i2+K_old) x00 = x(i0)+y00 x08 = x(i0)-y00 x10 = x(i2)+y10 x18 = x(i2)-y10 y00 = C1*x(i0+8*K_old)-S1*x(i2+8*K_old) y10 = S1*x(i0+8*K_old)+C1*x(i2+8*K_old) y01 = C9*x(i0+9*K_old)-S9*x(i2+9*K_old) y11 = S9*x(i0+9*K_old)+C9*x(i2+9*K_old) x01 = y00+y01 x09 = y00-y01 x11 = y10+y11 x19 = y10-y11 y00 = C2*x(i0+4*K_old)-S2*x(i2+4*K_old) y10 = S2*x(i0+4*K_old)+C2*x(i2+4*K_old) y01 = C10*x(i0+5*K_old)-S10*x(i2+5*K_old) y11 = S10*x(i0+5*K_old)+C10*x(i2+5*K_old) x02 = y00+y01 x0a = y00-y01 x12 = y10+y11 x1a = y10-y11 y00 = C3*x(i0+12*K_old)-S3*x(i2+12*K_old) y10 = S3*x(i0+12*K_old)+C3*x(i2+12*K_old) y01 = C11*x(i0+13*K_old)-S11*x(i2+13*K_old) y11 = S11*x(i0+13*K_old)+C11*x(i2+13*K_old) x03 = y00+y01 x0b = y00-y01 x13 = y10+y11 x1b = y10-y11 y00 = C4*x(i0+2*K_old)-S4*x(i2+2*K_old) y10 = S4*x(i0+2*K_old)+C4*x(i2+2*K_old) y01 = C12*x(i0+3*K_old)-S12*x(i2+3*K_old) y11 = S12*x(i0+3*K_old)+C12*x(i2+3*K_old) x04 = y00+y01 x0c = y00-y01 x14 = y10+y11 x1c = y10-y11 y00 = C5*x(i0+10*K_old)-S5*x(i2+10*K_old) y10 = S5*x(i0+10*K_old)+C5*x(i2+10*K_old) y01 = C13*x(i0+11*K_old)-S13*x(i2+11*K_old) y11 = S13*x(i0+11*K_old)+C13*x(i2+11*K_old) x05 = y00+y01 x0d = y00-y01 x15 = y10+y11 x1d = y10-y11 y00 = C6*x(i0+6*K_old)-S6*x(i2+6*K_old) y10 = S6*x(i0+6*K_old)+C6*x(i2+6*K_old) y01 = C14*x(i0+7*K_old)-S14*x(i2+7*K_old) y11 = S14*x(i0+7*K_old)+C14*x(i2+7*K_old) x06 = y00+y01 x0e = y00-y01 x16 = y10+y11 x1e = y10-y11 y00 = C7*x(i0+14*K_old)-S7*x(i2+14*K_old) y10 = S7*x(i0+14*K_old)+C7*x(i2+14*K_old) y01 = C15*x(i0+15*K_old)-S15*x(i2+15*K_old) y11 = S15*x(i0+15*K_old)+C15*x(i2+15*K_old) x07 = y00+y01 x0f = y00-y01 x17 = y10+y11 x1f = y10-y11 ! Complete RF(16), mixed with trig identity additions ! Start of internal RF(8) y00 = x00+x04 y01 = x01+x05 y02 = x02+x06 y03 = x03+x07 y04 = x00-x04 y05 = x01-x05 y06 = x02-x06 y07 = x03-x07 x01 = y00-y02 x03 = y01-y03 y00 = y00+y02 y01 = y01+y03 x00 = y00+y01 x02 = y00-y01 y00 = cos4*(y05-y07) y01 = cos4*(y05+y07) x04 = y04+y00 x05 = y04-y00 x06 = y01+y06 x07 = y01-y06 ! End of internal RF(8) ! Start of internal OT(8) y00 = x09-x0f y01 = x0d-x0b y02 = x09+x0f y03 = x0d+x0b y04 = cos4*(x0a-x0e) y05 = cos4*(x0a+x0e) x0b = x08-y04 y04 = x08+y04 x0d = y05-x0c y05 = y05+x0c x08 = cos8*y00-sin8*y01 y01 = sin8*y00+cos8*y01 x0e = sin8*y02+cos8*y03 y02 = cos8*y02-sin8*y03 x09 = x0b+y01 x0a = x0b-y01 x0b = y04-x08 x08 = y04+x08 x0c = x0e+y05 x0f = x0e-y05 x0e = y02-x0d x0d = y02+x0d ! End of internal OT(8) ! Start of internal RF(8) y10 = x10+x14 y11 = x11+x15 y12 = x12+x16 y13 = x13+x17 y14 = x10-x14 y15 = x11-x15 y16 = x12-x16 y17 = x13-x17 x11 = y10-y12 x13 = y11-y13 y10 = y10+y12 y11 = y11+y13 x10 = y10+y11 x12 = y11-y10 ! Because we need its additive inverse y10 = cos4*(y15-y17) y11 = cos4*(y15+y17) x14 = y14+y10 x15 = y14-y10 x16 = y11+y16 x17 = y11-y16 ! End of internal RF(8) ! Start of internal OT(8) y10 = x19-x1f y11 = x1d-x1b y12 = x19+x1f y13 = x1d+x1b y14 = cos4*(x1a-x1e) y15 = cos4*(x1a+x1e) x1b = x18-y14 y14 = x18+y14 x1d = y15-x1c y15 = y15+x1c x18 = cos8*y10-sin8*y11 y11 = sin8*y10+cos8*y11 x1e = sin8*y12+cos8*y13 y12 = cos8*y12-sin8*y13 x19 = x1b+y11 x1a = x1b-y11 x1b = y14-x18 x18 = y14+x18 x1c = x1e+y15 x1f = x1e-y15 x1e = y12-x1d x1d = y12+x1d ! End of internal OT(8) x(i2+7*K_old) = x02 x(i0+K_old) = x08-x1c x(i0+2*K_old) = x04-x16 x(i0+3*K_old) = x09-x1d x(i0+4*K_old) = x01-x13 x(i0+5*K_old) = x0a-x1e x(i0+6*K_old) = x05-x17 x(i0+7*K_old) = x0b-x1f x(i2) = x08+x1c x(i2+K_old) = x04+x16 x(i2+2*K_old) = x09+x1d x(i2+3*K_old) = x01+x13 x(i2+4*K_old) = x0a+x1e x(i2+5*K_old) = x05+x17 x(i2+6*K_old) = x0b+x1f x(i0) = x00 x(i2+15*K_old) = x10 x(i0+15*K_old) = x0c-x18 x(i2+14*K_old) = x0c+x18 x(i0+14*K_old) = x06-x14 x(i2+13*K_old) = x06+x14 x(i0+13*K_old) = x0d-x19 x(i2+12*K_old) = x0d+x19 x(i0+12*K_old) = x03-x11 x(i2+11*K_old) = x03+x11 x(i0+11*K_old) = x0e-x1a x(i2+10*K_old) = x0e+x1a x(i0+10*K_old) = x07-x15 x(i2+9*K_old) = x07+x15 x(i0+9*K_old) = x0f-x1b x(i2+8*K_old) = x0f+x1b x(i0+8*K_old) = x12 do n1 = n16+1, min(n16+15,N_new-1) i0 = 16*K_old*n1+k2 i2 = 16*K_old*n1+k3 ! Apply phase factors, mixed with first addition in RF(16) y00 = C8*x(i0+K_old)-S8*x(i2+K_old) y10 = S8*x(i0+K_old)+C8*x(i2+K_old) x00 = x(i0)+y00 ! Need scaling for internal SOT(8) x08 = sec8*(x(i0)-y00) x10 = x(i2)+y10 ! Need scaling for internal SOT(8) x18 = sec8*(x(i2)-y10) y00 = C1*x(i0+8*K_old)-S1*x(i2+8*K_old) y10 = S1*x(i0+8*K_old)+C1*x(i2+8*K_old) y01 = C9*x(i0+9*K_old)-S9*x(i2+9*K_old) y11 = S9*x(i0+9*K_old)+C9*x(i2+9*K_old) x01 = y00+y01 x09 = y00-y01 x11 = y10+y11 x19 = y10-y11 y00 = C2*x(i0+4*K_old)-S2*x(i2+4*K_old) y10 = S2*x(i0+4*K_old)+C2*x(i2+4*K_old) y01 = C10*x(i0+5*K_old)-S10*x(i2+5*K_old) y11 = S10*x(i0+5*K_old)+C10*x(i2+5*K_old) x02 = y00+y01 x0a = y00-y01 x12 = y10+y11 x1a = y10-y11 y00 = C3*x(i0+12*K_old)-S3*x(i2+12*K_old) y10 = S3*x(i0+12*K_old)+C3*x(i2+12*K_old) y01 = C11*x(i0+13*K_old)-S11*x(i2+13*K_old) y11 = S11*x(i0+13*K_old)+C11*x(i2+13*K_old) x03 = y00+y01 x0b = y00-y01 x13 = y10+y11 x1b = y10-y11 y00 = C4*x(i0+2*K_old)-S4*x(i2+2*K_old) y10 = S4*x(i0+2*K_old)+C4*x(i2+2*K_old) y01 = C12*x(i0+3*K_old)-S12*x(i2+3*K_old) y11 = S12*x(i0+3*K_old)+C12*x(i2+3*K_old) x04 = y00+y01 ! Need scaling for internal SOT(8) x0c = sec8*(y00-y01) x14 = y10+y11 ! Need scaling for internal SOT(8) x1c = sec8*(y10-y11) y00 = C5*x(i0+10*K_old)-S5*x(i2+10*K_old) y10 = S5*x(i0+10*K_old)+C5*x(i2+10*K_old) y01 = C13*x(i0+11*K_old)-S13*x(i2+11*K_old) y11 = S13*x(i0+11*K_old)+C13*x(i2+11*K_old) x05 = y00+y01 x0d = y00-y01 x15 = y10+y11 x1d = y10-y11 y00 = C6*x(i0+6*K_old)-S6*x(i2+6*K_old) y10 = S6*x(i0+6*K_old)+C6*x(i2+6*K_old) y01 = C14*x(i0+7*K_old)-S14*x(i2+7*K_old) y11 = S14*x(i0+7*K_old)+C14*x(i2+7*K_old) x06 = y00+y01 x0e = y00-y01 x16 = y10+y11 x1e = y10-y11 y00 = C7*x(i0+14*K_old)-S7*x(i2+14*K_old) y10 = S7*x(i0+14*K_old)+C7*x(i2+14*K_old) y01 = C15*x(i0+15*K_old)-S15*x(i2+15*K_old) y11 = S15*x(i0+15*K_old)+C15*x(i2+15*K_old) x07 = y00+y01 x0f = y00-y01 x17 = y10+y11 x1f = y10-y11 ! Complete RF(16), mixed with trig identity additions ! Start of internal RF(8) y00 = x00+x04 y01 = x01+x05 y02 = x02+x06 y03 = x03+x07 y04 = x00-x04 y05 = x01-x05 y06 = x02-x06 y07 = x03-x07 x01 = y00-y02 x03 = y01-y03 y00 = y00+y02 y01 = y01+y03 x00 = y00+y01 x02 = y00-y01 y00 = cos4*(y05-y07) y01 = cos4*(y05+y07) x04 = y04+y00 x05 = y04-y00 x06 = y01+y06 x07 = y01-y06 ! End of internal RF(8) ! Start of internal SOT(8) y00 = x09-x0f y01 = x0d-x0b y02 = x09+x0f y03 = x0d+x0b y04 = cos4_s*(x0a-x0e) y05 = cos4_s*(x0a+x0e) x0b = x08-y04 y04 = x08+y04 x0d = y05-x0c y05 = y05+x0c x08 = y00-tan8*y01 y01 = tan8*y00+y01 x0e = tan8*y02+y03 y02 = y02-tan8*y03 x09 = x0b+y01 x0a = x0b-y01 x0b = y04-x08 x08 = y04+x08 x0c = x0e+y05 x0f = x0e-y05 x0e = y02-x0d x0d = y02+x0d ! End of internal SOT(8) ! Start of internal RF(8) y10 = x10+x14 y11 = x11+x15 y12 = x12+x16 y13 = x13+x17 y14 = x10-x14 y15 = x11-x15 y16 = x12-x16 y17 = x13-x17 x11 = y10-y12 x13 = y11-y13 y10 = y10+y12 y11 = y11+y13 x10 = y10+y11 x12 = y11-y10 ! Because we need its additive inverse y10 = cos4*(y15-y17) y11 = cos4*(y15+y17) x14 = y14+y10 x15 = y14-y10 x16 = y11+y16 x17 = y11-y16 ! End of internal RF(8) ! Start of internal SOT(8) y10 = x19-x1f y11 = x1d-x1b y12 = x19+x1f y13 = x1d+x1b y14 = cos4_s*(x1a-x1e) y15 = cos4_s*(x1a+x1e) x1b = x18-y14 y14 = x18+y14 x1d = y15-x1c y15 = y15+x1c x18 = y10-tan8*y11 y11 = tan8*y10+y11 x1e = tan8*y12+y13 y12 = y12-tan8*y13 x19 = x1b+y11 x1a = x1b-y11 x1b = y14-x18 x18 = y14+x18 x1c = x1e+y15 x1f = x1e-y15 x1e = y12-x1d x1d = y12+x1d ! End of internal SOT(8) x(i2+7*K_old) = x02 x(i0+K_old) = x08-x1c x(i0+2*K_old) = x04-x16 x(i0+3*K_old) = x09-x1d x(i0+4*K_old) = x01-x13 x(i0+5*K_old) = x0a-x1e x(i0+6*K_old) = x05-x17 x(i0+7*K_old) = x0b-x1f x(i2) = x08+x1c x(i2+K_old) = x04+x16 x(i2+2*K_old) = x09+x1d x(i2+3*K_old) = x01+x13 x(i2+4*K_old) = x0a+x1e x(i2+5*K_old) = x05+x17 x(i2+6*K_old) = x0b+x1f x(i0) = x00 x(i2+15*K_old) = x10 x(i0+15*K_old) = x0c-x18 x(i2+14*K_old) = x0c+x18 x(i0+14*K_old) = x06-x14 x(i2+13*K_old) = x06+x14 x(i0+13*K_old) = x0d-x19 x(i2+12*K_old) = x0d+x19 x(i0+12*K_old) = x03-x11 x(i2+11*K_old) = x03+x11 x(i0+11*K_old) = x0e-x1a x(i2+10*K_old) = x0e+x1a x(i0+10*K_old) = x07-x15 x(i2+9*K_old) = x07+x15 x(i0+9*K_old) = x0f-x1b x(i2+8*K_old) = x0f+x1b x(i0+8*K_old) = x12 end do end do end do end subroutine step_16 !................................................................ ! subroutine rfs16(x,k,cosines,cosines_s,table) ! Driver subroutine for radix-16 DIT real-half complex DFT on ! 2**k elements with intermediate scaling. We need to first invoke ! reverse_table and cosine_table to get the tables of constants, ! cosines and cosines_s and table, that will be required. Before ! that, invoke rfs16_sizes to get the sizes of these tables. !................................................................ subroutine rfs16(x,k,cosines,cosines_s,table) integer, intent(in) :: k ! Order of the problem is 2**k real(wp), intent(inout) :: x(0:2**k-1) ! Vector to be ! transformed ! Trig function table real(wp), intent(in) :: cosines(-(2**(k-2)-1):2**(k-1)-1) ! Scaled trig function table real(wp), intent(in) :: cosines_s(-(2**(k-2)-1):2**(k-1)-1) integer(ik2), intent(in) :: table(0:2**(k/2)-1) ! Bit-reversal ! table integer N_new ! Order of transform yet to be performed integer K_old ! Order of transform completed ! Put inputs in bit-reversed order call bit_reverse(x,k,table) ! Handle non-power of 16 step if necessary if(mod(k,4) == 1) then N_new = 2**(k-1) call step_2(x,N_new) K_old = 2 else if(mod(k,4) == 2) then N_new = 2**(k-2) call step_4(x,N_new) K_old = 4 else if(mod(k,4) == 3) then N_new = 2**(k-3) call step_8(x,N_new) K_old = 8 else N_new = 2**k K_old = 1 end if ! Do power of 16 steps do while(N_new > 1) N_new = N_new/16 call step_16(x,N_new,K_old,cosines,cosines_s) K_old = K_old*16 end do ! Put sines in normal order call reverse_sins(x,k) end subroutine rfs16 !................................................................ !................................................................ ! subroutine rfs16_sizes(k, cosine_lbound, cosine_ubound, bitrev_size) ! Compute sizes of data tables required by rf16 !................................................................ subroutine rfs16_sizes(k, cosine_lbound, cosine_ubound, bitrev_size) integer, intent(in) :: k ! Order of the problem = 2**k integer, intent(out) :: cosine_lbound ! Lower bound of cosine tables integer, intent(out) :: cosine_ubound ! Upper bound of cosine tables integer, intent(out) :: bitrev_size ! Size of bit-reversal ! table if(k < 0) then write(*,'(a)') ' Error: negative exponent in rfs16_sizes' stop end if cosine_lbound = -(2**(k-2)-1) cosine_ubound = 2**(k-1)-1 bitrev_size = 2**(k/2) end subroutine rfs16_sizes !................................................................ end module rfs16_routines ! *************************************************************** ! *************************************************************** ! module rf64_routines ! Along with module auxiliary_routines contains everything you need ! to do a radix-64 real-half complex decimation in time DFT with ! bit-reversed inputs, sine-reversed outputs, and internal scaling. ! First you should call subroutine rf64_sizes to get the sizes of the ! arrays that are required for the bit-reversal helper table and the ! table of cosines. Then call reverse_table to create the bit-reversal ! helper table and cos_table to make up the table of cosines. ! Then rf64 can be used to compute real-half complex DFTs. ! *************************************************************** module rf64_routines use mykinds, wp=>dp use auxiliary_routines implicit none private wp private step_2, step_4, step_8, step_16, step_32, step_64 contains !................................................................ ! subroutine cos_table_64(table, k) ! Computes a table of fourth-, first-, and second-quadrant cosines ! required by subroutine rf64. Given that it is desired to compute ! a DFT of order 2**k, one should first call subroutine rf64_sizes to ! determine how big the cosine table has to be. Then cos_table can ! be invoked to create the table. !................................................................ subroutine cos_table_64(table, k) integer, intent(in) :: k ! Size of the problem is 2**k real(wp), intent(out) :: table(-(2**(k-2)-1):2**(k-1)-1) ! Table of cosines integer j ! Multiple of fundamental angle integer N ! 2**k real(wp) pi ! Key lime, I hope. N = 2**k pi = 4*atan(1.0_wp) do j = lbound(table,1),ubound(table,1) table(j) = cos(2*pi*j/N) end do end subroutine cos_table_64 !................................................................ !................................................................ ! subroutine step_2(x,N_new) ! Computes the first stage of radix-64 real-half complex DFT on ! 2**k elements if mod(k,6) == 1. !................................................................ subroutine step_2(x,N_new) integer, intent(in) :: N_new ! 2**k/2 real(wp), intent(inout) :: x(0:2*N_new-1) ! Array to be ! transformed integer n1 ! Current elements are x(2*n1) ! and x(2*n1+1) real(wp) x00 ! Holds intermediate result do n1 = 0, N_new-1 x00 = x(2*n1)+x(2*n1+1) x(2*n1+1) = x(2*n1)-x(2*n1+1) x(2*n1) = x00 end do end subroutine step_2 !................................................................ !................................................................ ! subroutine step_4(x,N_new) ! Computes the first stage of radix-64 real-half complex DFT on ! 2**k elements if mod(k,6) == 2. !................................................................ subroutine step_4(x,N_new) integer, intent(in) :: N_new ! 2**k/4 real(wp), intent(inout) :: x(0:4*N_new-1) ! Array to be ! transformed integer n1 ! Current elements are x(4*n1:4*n1+3) real(wp) x00, x01 ! Hold temporary results do n1 = 0, N_new-1 x00 = x(4*n1)+x(4*n1+1) x01 = x(4*n1+2)+x(4*n1+3) x(4*n1+1) = x(4*n1)-x(4*n1+1) x(4*n1+3) = x(4*n1+2)-x(4*n1+3) x(4*n1) = x00+x01 x(4*n1+2) = x00-x01 end do end subroutine step_4 !................................................................ !................................................................ ! subroutine step_8(x,N_new) ! Computes the first stage of radix-64 real-half complex DFT on ! 2**k elements if mod(k,6) == 3. !................................................................ subroutine step_8(x,N_new) integer, intent(in) :: N_new ! 2**k/8 real(wp), intent(inout) :: x(0:8*N_new-1) ! Array to be ! transformed ! Define parameter for cos(pi/4) real(wp), parameter :: c4 = 0.70710678118654752440084436210485_wp integer n1 ! Current elements are x(8*n1:8*n1+7) integer i0 ! 8*n1 real(wp) x00, x01, x02, x03 ! Hold temporary results real(wp) x04, x05, x06, x07 ! Hold temporary results real(wp) y00, y01 ! Hold temporary results do n1 = 0, N_new-1 i0 = 8*n1 ! Compute RF(8) x00 = x(i0)+x(i0+1) x01 = x(i0+4)+x(i0+5) x02 = x(i0+2)+x(i0+3) x03 = x(i0+6)+x(i0+7) x04 = x(i0)-x(i0+1) x05 = x(i0+4)-x(i0+5) x06 = x(i0+2)-x(i0+3) x07 = x(i0+6)-x(i0+7) y00 = x00+x02 y01 = x01+x03 x(i0) = y00+y01 x(i0+4) = y00-y01 x(i0+2) = x00-x02 x(i0+6) = x01-x03 y00 = c4*(x05-x07) y01 = c4*(x05+x07) x(i0+1) = x04+y00 x(i0+3) = x04-y00 ! Sin components output in backwards order x(i0+5) = y01-x06 x(i0+7) = y01+x06 end do end subroutine step_8 !................................................................ !................................................................ ! subroutine step_16(x,N_new) ! Computes the first stage of radix-64 real-half complex DFT on ! 2**k elements if mod(k,6) == 4. !................................................................ subroutine step_16(x,N_new) integer, intent(in) :: N_new ! 2**k/16 real(wp), intent(inout) :: x(0:16*N_new-1) ! Array to be ! transformed ! Define parameters for cos(pi/4), cos(pi/8), and sin(pi/8) real(wp), parameter :: cos4 = 0.70710678118654752440084436210485_wp real(wp), parameter :: cos8 = 0.92387953251128675612818318939679_wp real(wp), parameter :: sin8 = 0.38268343236508977172845998403040_wp integer n1 ! Current elements are x(16*n1:16*n1+15) integer i0 ! 16*n1 real(wp) x00, x01, x02, x03 ! Hold temporary results real(wp) x04, x05, x06, x07 ! Hold temporary results real(wp) x08, x09, x0a, x0b ! Hold temporary results real(wp) x0c, x0d, x0e, x0f ! Hold temporary results real(wp) y00, y01, y02, y03 ! Hold temporary results real(wp) y04, y05 ! Hold temporary results do n1 = 0, N_new-1 i0 = 16*n1 ! Compute RF(16) x00 = x(i0)+x(i0+1) x01 = x(i0+8)+x(i0+9) x02 = x(i0+4)+x(i0+5) x03 = x(i0+12)+x(i0+13) x04 = x(i0+2)+x(i0+3) x05 = x(i0+10)+x(i0+11) x06 = x(i0+6)+x(i0+7) x07 = x(i0+14)+x(i0+15) x08 = x(i0)-x(i0+1) x09 = x(i0+8)-x(i0+9) x0a = x(i0+4)-x(i0+5) x0b = x(i0+12)-x(i0+13) x0c = x(i0+2)-x(i0+3) x0d = x(i0+10)-x(i0+11) x0e = x(i0+6)-x(i0+7) x0f = x(i0+14)-x(i0+15) ! Start of internal RF(8) y00 = x00+x04 y01 = x01+x05 y02 = x02+x06 y03 = x03+x07 y04 = y00+y02 y05 = y01+y03 x(i0) = y04+y05 x(i0+8) = y04-y05 x(i0+4) = y00-y02 x(i0+12) = y01-y03 y00 = x00-x04 y01 = x01-x05 y02 = x02-x06 y03 = x03-x07 y04 = cos4*(y01-y03) y05 = cos4*(y01+y03) x(i0+2) = y00+y04 x(i0+6) = y00-y04 x(i0+10) = y05-y02 x(i0+14) = y05+y02 ! End of internal RF(8) ! Start of internal OT(8) y00 = x09-x0f y01 = x0d-x0b y02 = x09+x0f y03 = x0d+x0b y04 = cos4*(x0a-x0e) y05 = cos4*(x0a+x0e) x0a = x08+y04 x0b = x08-y04 x0d = y05-x0c x0c = y05+x0c x08 = cos8*y00-sin8*y01 x09 = sin8*y00+cos8*y01 x0e = sin8*y02+cos8*y03 x0f = cos8*y02-sin8*y03 x(i0+1) = x0a+x08 x(i0+3) = x0b+x09 x(i0+5) = x0b-x09 x(i0+7) = x0a-x08 x(i0+9) = x0e-x0c x(i0+11) = x0f-x0d x(i0+13) = x0f+x0d x(i0+15) = x0e+x0c ! End of internal OT(8) end do end subroutine step_16 !................................................................ !................................................................ ! subroutine step_32(x,N_new) ! Computes the first stage of radix-64 real-half complex DFT on ! 2**k elements if mod(k,6) == 5. !................................................................ subroutine step_32(x,N_new) integer, intent(in) :: N_new ! 2**k/32 real(wp), intent(inout) :: x(0:32*N_new-1) ! Array to be ! transformed ! Define parameters for cos(pi/4), cos(pi/8), and sin(pi/8) real(wp), parameter :: cos4 = 0.70710678118654752440084436210485_wp real(wp), parameter :: cos8 = 0.92387953251128675612818318939679_wp real(wp), parameter :: sin8 = 0.38268343236508977172845998403040_wp ! Define parameters for cos(pi/16), cos(3*pi/15), sin(pi/16), and sin(3*pi/16) real(wp), parameter :: c16 = 0.98078528040323044912618223613424_wp real(wp), parameter :: c316 = 0.83146961230254523707878837761791_wp real(wp), parameter :: s16 = 0.19509032201612826784828486847702_wp real(wp), parameter :: s316 = 0.55557023301960222474283081394853_wp integer n1 ! Current elements are x(32*n1:32*n1+31) integer i0 ! 32*n1 real(wp) x0_0, x0_1, x0_2, x0_3 ! Hold temporary results real(wp) x0_4, x0_5, x0_6, x0_7 ! Hold temporary results real(wp) x0_8, x0_9, x0_10, x0_11 ! Hold temporary results real(wp) x0_12, x0_13, x0_14, x0_15 ! Hold temporary results real(wp) x0_16, x0_17, x0_18, x0_19 ! Hold temporary results real(wp) x0_20, x0_21, x0_22, x0_23 ! Hold temporary results real(wp) x0_24, x0_25, x0_26, x0_27 ! Hold temporary results real(wp) x0_28, x0_29, x0_30, x0_31 ! Hold temporary results real(wp) y0_0, y0_1, y0_2, y0_3 ! Hold temporary results real(wp) y0_4, y0_5, y0_6, y0_7 ! Hold temporary results real(wp) y0_8, y0_9, y0_10, y0_11 ! Hold temporary results real(wp) y0_12, y0_13, y0_14, y0_15 ! Hold temporary results real(wp) z0_0, z0_1, z0_2, z0_3 ! Hold temporary results real(wp) z0_4, z0_5, z0_6, z0_7 ! Hold temporary results do n1 = 0, N_new-1 i0 = 32*n1 ! Compute RF(32) x0_0 = x(i0)+x(i0+1) x0_1 = x(i0+16)+x(i0+17) x0_2 = x(i0+8)+x(i0+9) x0_3 = x(i0+24)+x(i0+25) x0_4 = x(i0+4)+x(i0+5) x0_5 = x(i0+20)+x(i0+21) x0_6 = x(i0+12)+x(i0+13) x0_7 = x(i0+28)+x(i0+29) x0_8 = x(i0+2)+x(i0+3) x0_9 = x(i0+18)+x(i0+19) x0_10 = x(i0+10)+x(i0+11) x0_11 = x(i0+26)+x(i0+27) x0_12 = x(i0+6)+x(i0+7) x0_13 = x(i0+22)+x(i0+23) x0_14 = x(i0+14)+x(i0+15) x0_15 = x(i0+30)+x(i0+31) x0_16 = x(i0)-x(i0+1) x0_17 = x(i0+16)-x(i0+17) x0_18 = x(i0+8)-x(i0+9) x0_19 = x(i0+24)-x(i0+25) x0_20 = x(i0+4)-x(i0+5) x0_21 = x(i0+20)-x(i0+21) x0_22 = x(i0+12)-x(i0+13) x0_23 = x(i0+28)-x(i0+29) x0_24 = x(i0+2)-x(i0+3) x0_25 = x(i0+18)-x(i0+19) x0_26 = x(i0+10)-x(i0+11) x0_27 = x(i0+26)-x(i0+27) x0_28 = x(i0+6)-x(i0+7) x0_29 = x(i0+22)-x(i0+23) x0_30 = x(i0+14)-x(i0+15) x0_31 = x(i0+30)-x(i0+31) ! Start of internal RF(16) y0_0 = x0_0+x0_8 y0_1 = x0_1+x0_9 y0_2 = x0_2+x0_10 y0_3 = x0_3+x0_11 y0_4 = x0_4+x0_12 y0_5 = x0_5+x0_13 y0_6 = x0_6+x0_14 y0_7 = x0_7+x0_15 y0_8 = x0_0-x0_8 y0_9 = x0_1-x0_9 y0_10 = x0_2-x0_10 y0_11 = x0_3-x0_11 y0_12 = x0_4-x0_12 y0_13 = x0_5-x0_13 y0_14 = x0_6-x0_14 y0_15 = x0_7-x0_15 ! Start of internal RF(8) z0_0 = y0_0+y0_4 z0_1 = y0_1+y0_5 z0_2 = y0_2+y0_6 z0_3 = y0_3+y0_7 z0_4 = y0_0-y0_4 z0_5 = y0_1-y0_5 z0_6 = y0_2-y0_6 z0_7 = y0_3-y0_7 x(i0+8) = z0_0-z0_2 x(i0+24) = z0_1-z0_3 z0_0 = z0_0+z0_2 z0_1 = z0_1+z0_3 x(i0) = z0_0+z0_1 x(i0+16) = z0_0-z0_1 z0_0 = cos4*(z0_5-z0_7) z0_1 = cos4*(z0_5+z0_7) x(i0+4) = z0_4+z0_0 x(i0+12) = z0_4-z0_0 x(i0+28) = z0_1+z0_6 x(i0+20) = z0_1-z0_6 ! End of internal RF(8) ! Start of internal OT(8) z0_0 = y0_9-y0_15 z0_1 = y0_13-y0_11 z0_2 = y0_9+y0_15 z0_3 = y0_13+y0_11 y0_0 = cos4*(y0_10-y0_14) y0_1 = cos4*(y0_10+y0_14) y0_2 = y0_8+y0_0 y0_3 = y0_8-y0_0 y0_4 = y0_1+y0_12 y0_5 = y0_1-y0_12 y0_0 = cos8*z0_0-sin8*z0_1 y0_1 = sin8*z0_0+cos8*z0_1 y0_6 = sin8*z0_2+cos8*z0_3 y0_7 = cos8*z0_2-sin8*z0_3 x(i0+2) = y0_2+y0_0 x(i0+6) = y0_3+y0_1 x(i0+10) = y0_3-y0_1 x(i0+14) = y0_2-y0_0 x(i0+30) = y0_6+y0_4 x(i0+26) = y0_7+y0_5 x(i0+22) = y0_7-y0_5 x(i0+18) = y0_6-y0_4 ! End of internal OT(8) ! End of internal RF(16) ! Start of internal OT(16) y0_8 = x0_17-x0_31 y0_9 = x0_21-x0_27 y0_10 = x0_25-x0_23 y0_11 = x0_29-x0_19 y0_12 = x0_17+x0_31 y0_13 = x0_21+x0_27 y0_14 = x0_25+x0_23 y0_15 = x0_29+x0_19 ! Start of internal OT(8) z0_0 = x0_18-x0_30 z0_1 = x0_26-x0_22 z0_2 = x0_18+x0_30 z0_3 = x0_26+x0_22 y0_0 = cos4*(x0_20-x0_28) y0_1 = cos4*(x0_20+x0_28) y0_2 = x0_16+y0_0 y0_3 = x0_16-y0_0 y0_4 = y0_1+x0_24 y0_5 = y0_1-x0_24 y0_0 = cos8*z0_0-sin8*z0_1 y0_1 = sin8*z0_0+cos8*z0_1 y0_6 = sin8*z0_2+cos8*z0_3 y0_7 = cos8*z0_2-sin8*z0_3 x0_4 = y0_2+y0_0 x0_5 = y0_3+y0_1 x0_6 = y0_3-y0_1 x0_7 = y0_2-y0_0 x0_8 = y0_6+y0_4 x0_9 = y0_7+y0_5 x0_10 = y0_7-y0_5 x0_11 = y0_6-y0_4 ! End of internal OT(8) y0_0 = cos4*(y0_9-y0_11) y0_1 = cos4*(y0_9+y0_11) z0_0 = y0_8+y0_0 z0_1 = y0_8-y0_0 z0_2 = y0_1+y0_10 z0_3 = y0_1-y0_10 x0_0 = c16*z0_0-s16*z0_2 x0_1 = c316*z0_1-s316*z0_3 x0_2 = s316*z0_1+c316*z0_3 x0_3 = s16*z0_0+c16*z0_2 y0_0 = cos4*(y0_13-y0_15) y0_1 = cos4*(y0_13+y0_15) z0_0 = y0_12+y0_0 z0_1 = y0_12-y0_0 z0_2 = y0_1+y0_14 z0_3 = y0_1-y0_14 x0_12 = s16*z0_0+c16*z0_2 x0_13 = s316*z0_1+c316*z0_3 x0_14 = c316*z0_1-s316*z0_3 x0_15 = c16*z0_0-s16*z0_2 x(i0+1) = x0_4+x0_0 x(i0+3) = x0_5+x0_1 x(i0+5) = x0_6+x0_2 x(i0+7) = x0_7+x0_3 x(i0+9) = x0_7-x0_3 x(i0+11) = x0_6-x0_2 x(i0+13) = x0_5-x0_1 x(i0+15) = x0_4-x0_0 x(i0+31) = x0_12+x0_8 x(i0+29) = x0_13+x0_9 x(i0+27) = x0_14+x0_10 x(i0+25) = x0_15+x0_11 x(i0+23) = x0_15-x0_11 x(i0+21) = x0_14-x0_10 x(i0+19) = x0_13-x0_9 x(i0+17) = x0_12-x0_8 ! End of internal OT(16) end do end subroutine step_32 !................................................................ !................................................................ ! subroutine step_64(x,N_new,K_old,cosines) ! The main workhorse of radix-64 real-half complex DFT on 2**k ! elements with internal scaling. !................................................................ subroutine step_64(x,N_new,K_old,cosines) integer, intent(in) :: N_new ! Order of DFT to be completed later integer, intent(in) :: K_old ! Order of DFT completed previously real(wp), intent(inout) :: x(0:64*N_new*K_old-1) ! Vector to be ! transformed (bit-reversed input) real(wp), intent(in) :: cosines(-(16*N_new*K_old-1):32*N_new*K_old-1) ! Table of constants ! Define parameters for cos(pi/4), cos(pi/8), and sin(pi/8) real(wp), parameter :: cos4 = 0.70710678118654752440084436210485_wp real(wp), parameter :: cos8 = 0.92387953251128675612818318939679_wp real(wp), parameter :: sin8 = 0.38268343236508977172845998403040_wp ! Define parameters for cos(pi/16), cos(3*pi/15), sin(pi/16), and sin(3*pi/16) real(wp), parameter :: cos16 = 0.98078528040323044912618223613424_wp real(wp), parameter :: cos316 = 0.83146961230254523707878837761791_wp real(wp), parameter :: sin16 = 0.19509032201612826784828486847702_wp real(wp), parameter :: sin316 = 0.55557023301960222474283081394853_wp ! Define parameters for cos(pi/32), cos(3*pi/32), cos(5*pi/32), and cos(7*pi/32), ! all times cos(pi/8) real(wp), parameter :: cos32_s = 0.91943080004028194732427737532033_wp real(wp), parameter :: cos332_s = 0.88409759001746692352787178143397_wp real(wp), parameter :: cos532_s = 0.81478900541792119223000428316749_wp real(wp), parameter :: cos732_s = 0.71416853627910325674609275644276_wp ! Define parameters for sin(pi/32), sin(3*pi/32), sin(5*pi/32), and sin(7*pi/32), ! all times cos(pi/8) real(wp), parameter :: sin32_s = 0.09055602978576764046009762504393_wp real(wp), parameter :: sin332_s = 0.26818807191704244811048802466843_wp real(wp), parameter :: sin532_s = 0.43551379684614878140255108682356_wp real(wp), parameter :: sin732_s = 0.58610297080140869867447461973889_wp ! Define parameter for cos(pi/4)/cos(pi/8) real(wp), parameter :: cos4_s = 0.7653668647301795434569199680608_wp ! Define parameter for 1/cos(pi/8) real(wp), parameter :: sec8 = 1.0823922002923939687994464107328_wp ! Define parameter for sin(pi/8)/cos(pi/8) real(wp), parameter :: tan8 = 0.4142135623730950488016887242097_wp ! Define parameter for cos(pi/16)/cos(pi/8) real(wp), parameter :: c16_s = 1.0615943376700451936138549313266_wp ! Define parameter for sin(pi/16)/cos(pi/8) real(wp), parameter :: s16_s = 0.21116424290278874484715208695518_wp ! Define parameter for cos(3*pi/16)/cos(pi/8) real(wp), parameter :: c316_s = 0.89997622313641570463850954094189_wp ! Define parameter for sin(3*pi/16)/cos(pi/8) real(wp), parameter :: s316_s = 0.60134488693504528054372182390922_wp ! Define parameters for cos(pi/64), cos(3*pi/64), cos(5*pi/64), cos(7*pi/64), ! cos(9*pi/64), cos(11*pi/64), cos(13*pi/64), and cos(15*pi/64), ! all times cos(pi/8) real(wp), parameter :: cos64_s = 0.92276667915323205499935484991502_wp real(wp), parameter :: cos364_s = 0.91387993159740803125312709578544_wp real(wp), parameter :: cos564_s = 0.89619202072271294162917512548969_wp real(wp), parameter :: cos764_s = 0.86987329078006574194519425112713_wp real(wp), parameter :: cos964_s = 0.83517720552609539667007422759326_wp real(wp), parameter :: cos1164_s = 0.79243790722860715845935513342311_wp real(wp), parameter :: cos1364_s = 0.74206699869388285959883902312746_wp real(wp), parameter :: cos1564_s = 0.68454957930665143636673812974415_wp ! Define parameters for sin(pi/64), sin(3*pi/64), sin(5*pi/64), sin(7*pi/64), ! sin(9*pi/64), sin(11*pi/64), sin(13*pi/64), and sin(15*pi/64), ! all times cos(pi/8) real(wp), parameter :: sin64_s = 0.045332620019031021815856822134825_wp real(wp), parameter :: sin364_s = 0.13556128214497891832270983844567_wp real(wp), parameter :: sin564_s = 0.22448441501853579590409319959163_wp real(wp), parameter :: sin764_s = 0.31124564025980019318521093674237_wp real(wp), parameter :: sin964_s = 0.39500939984118855271528593721892_wp real(wp), parameter :: sin1164_s = 0.47496900296800333073276332130493_wp real(wp), parameter :: sin1364_s = 0.55035439495176797992527207318112_wp real(wp), parameter :: sin1564_s = 0.62043957325783169113772675492484_wp integer n1 ! Time index for remaining transform (bit-reversed) integer k2, k3 ! Low- and high-frequency parameters integer i0, i2 ! Offsets for first and second transform sets integer K_old_2 ! K_old/2 integer sin_org ! Origin for sines in table ! Temporaries for first transform set real(wp) x0_0,x0_1,x0_2,x0_3,x0_4,x0_5,x0_6,x0_7 real(wp) x0_8,x0_9,x0_10,x0_11,x0_12,x0_13,x0_14,x0_15 real(wp) x0_16,x0_17,x0_18,x0_19,x0_20,x0_21,x0_22,x0_23 real(wp) x0_24,x0_25,x0_26,x0_27,x0_28,x0_29,x0_30,x0_31 real(wp) x0_32,x0_33,x0_34,x0_35,x0_36,x0_37,x0_38,x0_39 real(wp) x0_40,x0_41,x0_42,x0_43,x0_44,x0_45,x0_46,x0_47 real(wp) x0_48,x0_49,x0_50,x0_51,x0_52,x0_53,x0_54,x0_55 real(wp) x0_56,x0_57,x0_58,x0_59,x0_60,x0_61,x0_62,x0_63 ! Temporaries for second transform set real(wp) x1_0,x1_1,x1_2,x1_3,x1_4,x1_5,x1_6,x1_7 real(wp) x1_8,x1_9,x1_10,x1_11,x1_12,x1_13,x1_14,x1_15 real(wp) x1_16,x1_17,x1_18,x1_19,x1_20,x1_21,x1_22,x1_23 real(wp) x1_24,x1_25,x1_26,x1_27,x1_28,x1_29,x1_30,x1_31 real(wp) x1_32,x1_33,x1_34,x1_35,x1_36,x1_37,x1_38,x1_39 real(wp) x1_40,x1_41,x1_42,x1_43,x1_44,x1_45,x1_46,x1_47 real(wp) x1_48,x1_49,x1_50,x1_51,x1_52,x1_53,x1_54,x1_55 real(wp) x1_56,x1_57,x1_58,x1_59,x1_60,x1_61,x1_62,x1_63 ! Temporaries for first transform set real(wp) y0_0,y0_1,y0_2,y0_3,y0_4,y0_5,y0_6,y0_7 real(wp) y0_8,y0_9,y0_10,y0_11,y0_12,y0_13,y0_14,y0_15 real(wp) y0_16,y0_17,y0_18,y0_19,y0_20,y0_21,y0_22,y0_23 real(wp) y0_24,y0_25,y0_26,y0_27,y0_28,y0_29,y0_30,y0_31 ! Temporaries for second transform set real(wp) y1_0,y1_1,y1_2,y1_3,y1_4,y1_5,y1_6,y1_7 real(wp) y1_8,y1_9,y1_10,y1_11,y1_12,y1_13,y1_14,y1_15 real(wp) y1_16,y1_17,y1_18,y1_19,y1_20,y1_21,y1_22,y1_23 real(wp) y1_24,y1_25,y1_26,y1_27,y1_28,y1_29,y1_30,y1_31 ! Temporaries for first transform set real(wp) z0_0,z0_1,z0_2,z0_3,z0_4,z0_5,z0_6,z0_7 real(wp) z0_8,z0_9,z0_10,z0_11,z0_12,z0_13,z0_14,z0_15 ! Temporaries for second transform set real(wp) z1_0,z1_1,z1_2,z1_3,z1_4,z1_5,z1_6,z1_7 real(wp) z1_8,z1_9,z1_10,z1_11,z1_12,z1_13,z1_14,z1_15 ! Temporaries for first transform set real(wp) w0_0,w0_1,w0_2,w0_3,w0_4,w0_5,w0_6,w0_7 ! Temporaries for second transform set real(wp) w1_0,w1_1,w1_2,w1_3,w1_4,w1_5,w1_6,w1_7 ! Givens plane rotation matrix elements for first and second transform sets real(wp) C1, S1, C2, S2, C3, S3, C4, S4, C5, S5, C6, S6, C7, S7 real(wp) C8, S8, C9, S9, C10, S10, C11, S11, C12, S12, C13, S13 real(wp) C14, S14, C15, S15, C16, S16, C17, S17, C18, S18, C19, S19 real(wp) C20, S20, C21, S21, C22, S22, C23, S23, C24, S24, C25, S25 real(wp) C26, S26, C27, S27, C28, S28, C29, S29, C30, S30, C31, S31 real(wp) C32, S32, C33, S33, C34, S34, C35, S35, C36, S36, C37, S37 real(wp) C38, S38, C39, S39, C40, S40, C41, S41, C42, S42, C43, S43 real(wp) C44, S44, C45, S45, C46, S46, C47, S47, C48, S48, C49, S49 real(wp) C50, S50, C51, S51, C52, S52, C53, S53, C54, S54, C55, S55 real(wp) C56, S56, C57, S57, C58, S58, C59, S59, C60, S60, C61, S61 real(wp) C62, S62, C63, S63, C64, S64, C65, S65 ! k2 = 0 case (DC terms) do n1 = 0, N_new-1 i0 = 64*K_old*n1 ! Compute RF(64) x0_0 = x(i0)+x(i0+K_old) x0_1 = x(i0+32*K_old)+x(i0+33*K_old) x0_2 = x(i0+16*K_old)+x(i0+17*K_old) x0_3 = x(i0+48*K_old)+x(i0+49*K_old) x0_4 = x(i0+8*K_old)+x(i0+9*K_old) x0_5 = x(i0+40*K_old)+x(i0+41*K_old) x0_6 = x(i0+24*K_old)+x(i0+25*K_old) x0_7 = x(i0+56*K_old)+x(i0+57*K_old) x0_8 = x(i0+4*K_old)+x(i0+5*K_old) x0_9 = x(i0+36*K_old)+x(i0+37*K_old) x0_10 = x(i0+20*K_old)+x(i0+21*K_old) x0_11 = x(i0+52*K_old)+x(i0+53*K_old) x0_12 = x(i0+12*K_old)+x(i0+13*K_old) x0_13 = x(i0+44*K_old)+x(i0+45*K_old) x0_14 = x(i0+28*K_old)+x(i0+29*K_old) x0_15 = x(i0+60*K_old)+x(i0+61*K_old) x0_16 = x(i0+2*K_old)+x(i0+3*K_old) x0_17 = x(i0+34*K_old)+x(i0+35*K_old) x0_18 = x(i0+18*K_old)+x(i0+19*K_old) x0_19 = x(i0+50*K_old)+x(i0+51*K_old) x0_20 = x(i0+10*K_old)+x(i0+11*K_old) x0_21 = x(i0+42*K_old)+x(i0+43*K_old) x0_22 = x(i0+26*K_old)+x(i0+27*K_old) x0_23 = x(i0+58*K_old)+x(i0+59*K_old) x0_24 = x(i0+6*K_old)+x(i0+7*K_old) x0_25 = x(i0+38*K_old)+x(i0+39*K_old) x0_26 = x(i0+22*K_old)+x(i0+23*K_old) x0_27 = x(i0+54*K_old)+x(i0+55*K_old) x0_28 = x(i0+14*K_old)+x(i0+15*K_old) x0_29 = x(i0+46*K_old)+x(i0+47*K_old) x0_30 = x(i0+30*K_old)+x(i0+31*K_old) x0_31 = x(i0+62*K_old)+x(i0+63*K_old) x0_32 = x(i0)-x(i0+K_old) x0_33 = x(i0+32*K_old)-x(i0+33*K_old) x0_34 = x(i0+16*K_old)-x(i0+17*K_old) x0_35 = x(i0+48*K_old)-x(i0+49*K_old) x0_36 = x(i0+8*K_old)-x(i0+9*K_old) x0_37 = x(i0+40*K_old)-x(i0+41*K_old) x0_38 = x(i0+24*K_old)-x(i0+25*K_old) x0_39 = x(i0+56*K_old)-x(i0+57*K_old) x0_40 = x(i0+4*K_old)-x(i0+5*K_old) x0_41 = x(i0+36*K_old)-x(i0+37*K_old) x0_42 = x(i0+20*K_old)-x(i0+21*K_old) x0_43 = x(i0+52*K_old)-x(i0+53*K_old) x0_44 = x(i0+12*K_old)-x(i0+13*K_old) x0_45 = x(i0+44*K_old)-x(i0+45*K_old) x0_46 = x(i0+28*K_old)-x(i0+29*K_old) x0_47 = x(i0+60*K_old)-x(i0+61*K_old) x0_48 = x(i0+2*K_old)-x(i0+3*K_old) x0_49 = x(i0+34*K_old)-x(i0+35*K_old) x0_50 = x(i0+18*K_old)-x(i0+19*K_old) x0_51 = x(i0+50*K_old)-x(i0+51*K_old) x0_52 = x(i0+10*K_old)-x(i0+11*K_old) x0_53 = x(i0+42*K_old)-x(i0+43*K_old) x0_54 = x(i0+26*K_old)-x(i0+27*K_old) x0_55 = x(i0+58*K_old)-x(i0+59*K_old) x0_56 = x(i0+6*K_old)-x(i0+7*K_old) x0_57 = x(i0+38*K_old)-x(i0+39*K_old) x0_58 = x(i0+22*K_old)-x(i0+23*K_old) x0_59 = x(i0+54*K_old)-x(i0+55*K_old) x0_60 = x(i0+14*K_old)-x(i0+15*K_old) x0_61 = x(i0+46*K_old)-x(i0+47*K_old) x0_62 = x(i0+30*K_old)-x(i0+31*K_old) x0_63 = x(i0+62*K_old)-x(i0+63*K_old) ! Start of internal RF(32) y0_0 = x0_0+x0_16 y0_1 = x0_1+x0_17 y0_2 = x0_2+x0_18 y0_3 = x0_3+x0_19 y0_4 = x0_4+x0_20 y0_5 = x0_5+x0_21 y0_6 = x0_6+x0_22 y0_7 = x0_7+x0_23 y0_8 = x0_8+x0_24 y0_9 = x0_9+x0_25 y0_10 = x0_10+x0_26 y0_11 = x0_11+x0_27 y0_12 = x0_12+x0_28 y0_13 = x0_13+x0_29 y0_14 = x0_14+x0_30 y0_15 = x0_15+x0_31 y0_16 = x0_0-x0_16 y0_17 = x0_1-x0_17 y0_18 = x0_2-x0_18 y0_19 = x0_3-x0_19 y0_20 = x0_4-x0_20 y0_21 = x0_5-x0_21 y0_22 = x0_6-x0_22 y0_23 = x0_7-x0_23 y0_24 = x0_8-x0_24 y0_25 = x0_9-x0_25 y0_26 = x0_10-x0_26 y0_27 = x0_11-x0_27 y0_28 = x0_12-x0_28 y0_29 = x0_13-x0_29 y0_30 = x0_14-x0_30 y0_31 = x0_15-x0_31 ! Start of internal RF(16) z0_0 = y0_0+y0_8 z0_1 = y0_1+y0_9 z0_2 = y0_2+y0_10 z0_3 = y0_3+y0_11 z0_4 = y0_4+y0_12 z0_5 = y0_5+y0_13 z0_6 = y0_6+y0_14 z0_7 = y0_7+y0_15 z0_8 = y0_0-y0_8 z0_9 = y0_1-y0_9 z0_10 = y0_2-y0_10 z0_11 = y0_3-y0_11 z0_12 = y0_4-y0_12 z0_13 = y0_5-y0_13 z0_14 = y0_6-y0_14 z0_15 = y0_7-y0_15 ! Start of internal RF(8) w0_0 = z0_0+z0_4 w0_1 = z0_1+z0_5 w0_2 = z0_2+z0_6 w0_3 = z0_3+z0_7 w0_4 = z0_0-z0_4 w0_5 = z0_1-z0_5 w0_6 = z0_2-z0_6 w0_7 = z0_3-z0_7 x(i0+16*K_old) = w0_0-w0_2 x(i0+48*K_old) = w0_1-w0_3 w0_0 = w0_0+w0_2 w0_1 = w0_1+w0_3 x(i0) = w0_0+w0_1 x(i0+32*K_old) = w0_0-w0_1 w0_0 = cos4*(w0_5-w0_7) w0_1 = cos4*(w0_5+w0_7) x(i0+8*K_old) = w0_4+w0_0 x(i0+24*K_old) = w0_4-w0_0 x(i0+56*K_old) = w0_1+w0_6 x(i0+40*K_old) = w0_1-w0_6 ! End of internal RF(8) ! Start of internal OT(8) w0_0 = z0_9-z0_15 w0_1 = z0_13-z0_11 w0_2 = z0_9+z0_15 w0_3 = z0_13+z0_11 z0_0 = cos4*(z0_10-z0_14) z0_1 = cos4*(z0_10+z0_14) z0_2 = z0_8+z0_0 z0_3 = z0_8-z0_0 z0_4 = z0_1+z0_12 z0_5 = z0_1-z0_12 z0_0 = cos8*w0_0-sin8*w0_1 z0_1 = sin8*w0_0+cos8*w0_1 z0_6 = sin8*w0_2+cos8*w0_3 z0_7 = cos8*w0_2-sin8*w0_3 x(i0+4*K_old) = z0_2+z0_0 x(i0+12*K_old) = z0_3+z0_1 x(i0+20*K_old) = z0_3-z0_1 x(i0+28*K_old) = z0_2-z0_0 x(i0+60*K_old) = z0_6+z0_4 x(i0+52*K_old) = z0_7+z0_5 x(i0+44*K_old) = z0_7-z0_5 x(i0+36*K_old) = z0_6-z0_4 ! End of internal OT(8) ! End of internal RF(16) ! Start of internal OT(16) z0_0 = y0_17-y0_31 z0_1 = y0_21-y0_27 z0_2 = y0_25-y0_23 z0_3 = y0_29-y0_19 z0_4 = y0_17+y0_31 z0_5 = y0_21+y0_27 z0_6 = y0_25+y0_23 z0_7 = y0_29+y0_19 ! Start of internal OT(8) w0_0 = y0_18-y0_30 w0_1 = y0_26-y0_22 w0_2 = y0_18+y0_30 w0_3 = y0_26+y0_22 z0_8 = cos4*(y0_20-y0_28) z0_9 = cos4*(y0_20+y0_28) z0_10 = y0_16+z0_8 z0_11 = y0_16-z0_8 z0_12 = z0_9+y0_24 z0_13 = z0_9-y0_24 z0_8 = cos8*w0_0-sin8*w0_1 z0_9 = sin8*w0_0+cos8*w0_1 z0_14 = sin8*w0_2+cos8*w0_3 z0_15 = cos8*w0_2-sin8*w0_3 y0_4 = z0_10+z0_8 y0_5 = z0_11+z0_9 y0_6 = z0_11-z0_9 y0_7 = z0_10-z0_8 y0_8 = z0_14+z0_12 y0_9 = z0_15+z0_13 y0_10 = z0_15-z0_13 y0_11 = z0_14-z0_12 ! End of internal OT(8) w0_4 = cos4*(z0_1-z0_3) w0_5 = cos4*(z0_1+z0_3) w0_0 = z0_0+w0_4 w0_1 = z0_0-w0_4 w0_2 = w0_5+z0_2 w0_3 = w0_5-z0_2 y0_0 = cos16*w0_0-sin16*w0_2 y0_1 = cos316*w0_1-sin316*w0_3 y0_2 = sin316*w0_1+cos316*w0_3 y0_3 = sin16*w0_0+cos16*w0_2 w0_4 = cos4*(z0_5-z0_7) w0_5 = cos4*(z0_5+z0_7) w0_0 = z0_4+w0_4 w0_1 = z0_4-w0_4 w0_2 = w0_5+z0_6 w0_3 = w0_5-z0_6 y0_12 = sin16*w0_0+cos16*w0_2 y0_13 = sin316*w0_1+cos316*w0_3 y0_14 = cos316*w0_1-sin316*w0_3 y0_15 = cos16*w0_0-sin16*w0_2 x(i0+2*K_old) = y0_4+y0_0 x(i0+6*K_old) = y0_5+y0_1 x(i0+10*K_old) = y0_6+y0_2 x(i0+14*K_old) = y0_7+y0_3 x(i0+18*K_old) = y0_7-y0_3 x(i0+22*K_old) = y0_6-y0_2 x(i0+26*K_old) = y0_5-y0_1 x(i0+30*K_old) = y0_4-y0_0 x(i0+62*K_old) = y0_12+y0_8 x(i0+58*K_old) = y0_13+y0_9 x(i0+54*K_old) = y0_14+y0_10 x(i0+50*K_old) = y0_15+y0_11 x(i0+46*K_old) = y0_15-y0_11 x(i0+42*K_old) = y0_14-y0_10 x(i0+38*K_old) = y0_13-y0_9 x(i0+34*K_old) = y0_12-y0_8 ! End of internal OT(16) ! End of internal RF(32) ! Start of internal OT(32) ! Need scaling for internal SOT(8) y0_16 = sec8*(x0_33-x0_63) y0_17 = x0_37-x0_59 y0_18 = x0_41-x0_55 y0_19 = x0_45-x0_51 ! Need scaling for internal SOT(8) y0_20 = sec8*(x0_49-x0_47) y0_21 = x0_53-x0_43 y0_22 = x0_57-x0_39 y0_23 = x0_61-x0_35 ! Need scaling for internal SOT(8) y0_24 = sec8*(x0_33+x0_63) y0_25 = x0_37+x0_59 y0_26 = x0_41+x0_55 y0_27 = x0_45+x0_51 ! Need scaling for internal SOT(8) y0_28 = sec8*(x0_49+x0_47) y0_29 = x0_53+x0_43 y0_30 = x0_57+x0_39 y0_31 = x0_61+x0_35 ! Start of internal OT(16) z0_0 = x0_34-x0_62 z0_1 = x0_42-x0_54 z0_2 = x0_50-x0_46 z0_3 = x0_58-x0_38 z0_4 = x0_34+x0_62 z0_5 = x0_42+x0_54 z0_6 = x0_50+x0_46 z0_7 = x0_58+x0_38 ! Start of internal OT(8) w0_0 = x0_36-x0_60 w0_1 = x0_52-x0_44 w0_2 = x0_36+x0_60 w0_3 = x0_52+x0_44 z0_8 = cos4*(x0_40-x0_56) z0_9 = cos4*(x0_40+x0_56) z0_10 = x0_32+z0_8 z0_11 = x0_32-z0_8 z0_12 = z0_9+x0_48 z0_13 = z0_9-x0_48 z0_8 = cos8*w0_0-sin8*w0_1 z0_9 = sin8*w0_0+cos8*w0_1 z0_14 = sin8*w0_2+cos8*w0_3 z0_15 = cos8*w0_2-sin8*w0_3 y0_4 = z0_10+z0_8 y0_5 = z0_11+z0_9 y0_6 = z0_11-z0_9 y0_7 = z0_10-z0_8 y0_8 = z0_14+z0_12 y0_9 = z0_15+z0_13 y0_10 = z0_15-z0_13 y0_11 = z0_14-z0_12 ! End of internal OT(8) w0_4 = cos4*(z0_1-z0_3) w0_5 = cos4*(z0_1+z0_3) w0_0 = z0_0+w0_4 w0_1 = z0_0-w0_4 w0_2 = w0_5+z0_2 w0_3 = w0_5-z0_2 y0_0 = cos16*w0_0-sin16*w0_2 y0_1 = cos316*w0_1-sin316*w0_3 y0_2 = sin316*w0_1+cos316*w0_3 y0_3 = sin16*w0_0+cos16*w0_2 w0_4 = cos4*(z0_5-z0_7) w0_5 = cos4*(z0_5+z0_7) w0_0 = z0_4+w0_4 w0_1 = z0_4-w0_4 w0_2 = w0_5+z0_6 w0_3 = w0_5-z0_6 y0_12 = sin16*w0_0+cos16*w0_2 y0_13 = sin316*w0_1+cos316*w0_3 y0_14 = cos316*w0_1-sin316*w0_3 y0_15 = cos16*w0_0-sin16*w0_2 x0_8 = y0_4+y0_0 x0_9 = y0_5+y0_1 x0_10 = y0_6+y0_2 x0_11 = y0_7+y0_3 x0_12 = y0_7-y0_3 x0_13 = y0_6-y0_2 x0_14 = y0_5-y0_1 x0_15 = y0_4-y0_0 x0_16 = y0_12+y0_8 x0_17 = y0_13+y0_9 x0_18 = y0_14+y0_10 x0_19 = y0_15+y0_11 x0_20 = y0_15-y0_11 x0_21 = y0_14-y0_10 x0_22 = y0_13-y0_9 x0_23 = y0_12-y0_8 ! End of internal OT(16) ! Start of internal SOT(8) w0_0 = y0_17-y0_23 w0_1 = y0_21-y0_19 w0_2 = y0_17+y0_23 w0_3 = y0_21+y0_19 z0_0 = cos4_s*(y0_18-y0_22) z0_1 = cos4_s*(y0_18+y0_22) z0_2 = y0_16+z0_0 z0_3 = y0_16-z0_0 z0_4 = z0_1+y0_20 z0_5 = z0_1-y0_20 z0_0 = w0_0-tan8*w0_1 z0_1 = tan8*w0_0+w0_1 z0_6 = tan8*w0_2+w0_3 z0_7 = w0_2-tan8*w0_3 z0_8 = z0_2+z0_0 z0_9 = z0_3+z0_1 z0_10 = z0_3-z0_1 z0_11 = z0_2-z0_0 z0_12 = z0_6+z0_4 z0_13 = z0_7+z0_5 z0_14 = z0_7-z0_5 z0_15 = z0_6-z0_4 ! End of internal SOT(8) x0_0 = cos32_s*z0_8-sin32_s*z0_12 x0_1 = cos332_s*z0_9-sin332_s*z0_13 x0_2 = cos532_s*z0_10-sin532_s*z0_14 x0_3 = cos732_s*z0_11-sin732_s*z0_15 x0_4 = sin732_s*z0_11+cos732_s*z0_15 x0_5 = sin532_s*z0_10+cos532_s*z0_14 x0_6 = sin332_s*z0_9+cos332_s*z0_13 x0_7 = sin32_s*z0_8+cos32_s*z0_12 ! Start of internal SOT(8) w0_0 = y0_25-y0_31 w0_1 = y0_29-y0_27 w0_2 = y0_25+y0_31 w0_3 = y0_29+y0_27 z0_0 = cos4_s*(y0_26-y0_30) z0_1 = cos4_s*(y0_26+y0_30) z0_2 = y0_24+z0_0 z0_3 = y0_24-z0_0 z0_4 = z0_1+y0_28 z0_5 = z0_1-y0_28 z0_0 = w0_0-tan8*w0_1 z0_1 = tan8*w0_0+w0_1 z0_6 = tan8*w0_2+w0_3 z0_7 = w0_2-tan8*w0_3 z0_8 = z0_2+z0_0 z0_9 = z0_3+z0_1 z0_10 = z0_3-z0_1 z0_11 = z0_2-z0_0 z0_12 = z0_6+z0_4 z0_13 = z0_7+z0_5 z0_14 = z0_7-z0_5 z0_15 = z0_6-z0_4 ! End of internal SOT(8) x0_24 = sin32_s*z0_8+cos32_s*z0_12 x0_25 = sin332_s*z0_9+cos332_s*z0_13 x0_26 = sin532_s*z0_10+cos532_s*z0_14 x0_27 = sin732_s*z0_11+cos732_s*z0_15 x0_28 = cos732_s*z0_11-sin732_s*z0_15 x0_29 = cos532_s*z0_10-sin532_s*z0_14 x0_30 = cos332_s*z0_9-sin332_s*z0_13 x0_31 = cos32_s*z0_8-sin32_s*z0_12 x(i0+K_old) = x0_8+x0_0 x(i0+3*K_old) = x0_9+x0_1 x(i0+5*K_old) = x0_10+x0_2 x(i0+7*K_old) = x0_11+x0_3 x(i0+9*K_old) = x0_12+x0_4 x(i0+11*K_old) = x0_13+x0_5 x(i0+13*K_old) = x0_14+x0_6 x(i0+15*K_old) = x0_15+x0_7 x(i0+17*K_old) = x0_15-x0_7 x(i0+19*K_old) = x0_14-x0_6 x(i0+21*K_old) = x0_13-x0_5 x(i0+23*K_old) = x0_12-x0_4 x(i0+25*K_old) = x0_11-x0_3 x(i0+27*K_old) = x0_10-x0_2 x(i0+29*K_old) = x0_9-x0_1 x(i0+31*K_old) = x0_8-x0_0 x(i0+63*K_old) = x0_24+x0_16 x(i0+61*K_old) = x0_25+x0_17 x(i0+59*K_old) = x0_26+x0_18 x(i0+57*K_old) = x0_27+x0_19 x(i0+55*K_old) = x0_28+x0_20 x(i0+53*K_old) = x0_29+x0_21 x(i0+51*K_old) = x0_30+x0_22 x(i0+49*K_old) = x0_31+x0_23 x(i0+47*K_old) = x0_31-x0_23 x(i0+45*K_old) = x0_30-x0_22 x(i0+43*K_old) = x0_29-x0_21 x(i0+41*K_old) = x0_28-x0_20 x(i0+39*K_old) = x0_27-x0_19 x(i0+37*K_old) = x0_26-x0_18 x(i0+35*K_old) = x0_25-x0_17 x(i0+33*K_old) = x0_24-x0_16 ! End of internal OT(32) end do if(K_old <= 1) return K_old_2 = ishft(K_old,-1) ! k2 = K_old/2 case (Nyquist frequency terms) do n1 = 0, N_new-1 i0 = 16*K_old*n1+K_old_2 ! Compute OT(64) ! Need scaling for internal OT(8) (with scaling) x0_0 = sec8*(x(i0+32*K_old)-x(i0+63*K_old)) x0_1 = x(i0+40*K_old)-x(i0+55*K_old) x0_2 = x(i0+36*K_old)-x(i0+59*K_old) x0_3 = x(i0+44*K_old)-x(i0+51*K_old) x0_4 = x(i0+34*K_old)-x(i0+61*K_old) x0_5 = x(i0+42*K_old)-x(i0+53*K_old) x0_6 = x(i0+38*K_old)-x(i0+57*K_old) x0_7 = x(i0+46*K_old)-x(i0+49*K_old) ! Need scaling for internal OT(8) (with scaling) x0_8 = sec8*(x(i0+33*K_old)-x(i0+62*K_old)) x0_9 = x(i0+41*K_old)-x(i0+54*K_old) x0_10 = x(i0+37*K_old)-x(i0+58*K_old) x0_11 = x(i0+45*K_old)-x(i0+50*K_old) x0_12 = x(i0+35*K_old)-x(i0+60*K_old) x0_13 = x(i0+43*K_old)-x(i0+52*K_old) x0_14 = x(i0+39*K_old)-x(i0+56*K_old) x0_15 = x(i0+47*K_old)-x(i0+48*K_old) ! Need scaling for internal OT(8) (with scaling) x0_48 = sec8*(x(i0+32*K_old)+x(i0+63*K_old)) x0_49 = x(i0+40*K_old)+x(i0+55*K_old) x0_50 = x(i0+36*K_old)+x(i0+59*K_old) x0_51 = x(i0+44*K_old)+x(i0+51*K_old) x0_52 = x(i0+34*K_old)+x(i0+61*K_old) x0_53 = x(i0+42*K_old)+x(i0+53*K_old) x0_54 = x(i0+38*K_old)+x(i0+57*K_old) x0_55 = x(i0+46*K_old)+x(i0+49*K_old) ! Need scaling for internal OT(8) (with scaling) x0_56 = sec8*(x(i0+33*K_old)+x(i0+62*K_old)) x0_57 = x(i0+41*K_old)+x(i0+54*K_old) x0_58 = x(i0+37*K_old)+x(i0+58*K_old) x0_59 = x(i0+45*K_old)+x(i0+50*K_old) x0_60 = x(i0+35*K_old)+x(i0+60*K_old) x0_61 = x(i0+43*K_old)+x(i0+52*K_old) x0_62 = x(i0+39*K_old)+x(i0+56*K_old) x0_63 = x(i0+47*K_old)+x(i0+48*K_old) ! Start of internal OT(32) ! Need scaling for internal SOT(8) y0_0 = sec8*(x(i0+16*K_old)-x(i0+31*K_old)) y0_1 = x(i0+20*K_old)-x(i0+27*K_old) y0_2 = x(i0+18*K_old)-x(i0+29*K_old) y0_3 = x(i0+22*K_old)-x(i0+25*K_old) ! Need scaling for internal SOT(8) y0_4 = sec8*(x(i0+17*K_old)-x(i0+30*K_old)) y0_5 = x(i0+21*K_old)-x(i0+26*K_old) y0_6 = x(i0+19*K_old)-x(i0+28*K_old) y0_7 = x(i0+23*K_old)-x(i0+24*K_old) ! Need scaling for internal SOT(8) y0_24 = sec8*(x(i0+16*K_old)+x(i0+31*K_old)) y0_25 = x(i0+20*K_old)+x(i0+27*K_old) y0_26 = x(i0+18*K_old)+x(i0+29*K_old) y0_27 = x(i0+22*K_old)+x(i0+25*K_old) ! Need scaling for internal SOT(8) y0_28 = sec8*(x(i0+17*K_old)+x(i0+30*K_old)) y0_29 = x(i0+21*K_old)+x(i0+26*K_old) y0_30 = x(i0+19*K_old)+x(i0+28*K_old) y0_31 = x(i0+23*K_old)+x(i0+24*K_old) ! Start of internal OT(16) z0_0 = x(i0+8*K_old)-x(i0+15*K_old) z0_1 = x(i0+10*K_old)-x(i0+13*K_old) z0_2 = x(i0+9*K_old)-x(i0+14*K_old) z0_3 = x(i0+11*K_old)-x(i0+12*K_old) z0_12 = x(i0+8*K_old)+x(i0+15*K_old) z0_13 = x(i0+10*K_old)+x(i0+13*K_old) z0_14 = x(i0+9*K_old)+x(i0+14*K_old) z0_15 = x(i0+11*K_old)+x(i0+12*K_old) ! Start of internal OT(8) w0_0 = x(i0+4*K_old)-x(i0+7*K_old) w0_1 = x(i0+5*K_old)-x(i0+6*K_old) w0_2 = x(i0+4*K_old)+x(i0+7*K_old) w0_3 = x(i0+5*K_old)+x(i0+6*K_old) x0_16 = cos4*(x(i0+2*K_old)-x(i0+3*K_old)) x0_17 = cos4*(x(i0+2*K_old)+x(i0+3*K_old)) x0_18 = x(i0)+x0_16 x0_19 = x(i0)-x0_16 x0_20 = x0_17+x(i0+K_old) x0_21 = x0_17-x(i0+K_old) x0_16 = cos8*w0_0-sin8*w0_1 x0_17 = sin8*w0_0+cos8*w0_1 x0_22 = sin8*w0_2+cos8*w0_3 x0_23 = cos8*w0_2-sin8*w0_3 z0_4 = x0_18+x0_16 z0_5 = x0_19+x0_17 z0_6 = x0_19-x0_17 z0_7 = x0_18-x0_16 z0_8 = x0_22+x0_20 z0_9 = x0_23+x0_21 z0_10 = x0_23-x0_21 z0_11 = x0_22-x0_20 ! End of internal OT(8) w0_1 = cos4*(z0_1-z0_3) w0_3 = cos4*(z0_1+z0_3) w0_0 = z0_0+w0_1 w0_1 = z0_0-w0_1 w0_2 = w0_3+z0_2 w0_3 = w0_3-z0_2 z0_0 = cos16*w0_0-sin16*w0_2 z0_1 = cos316*w0_1-sin316*w0_3 z0_2 = sin316*w0_1+cos316*w0_3 z0_3 = sin16*w0_0+cos16*w0_2 w0_1 = cos4*(z0_13-z0_15) w0_3 = cos4*(z0_13+z0_15) w0_0 = z0_12+w0_1 w0_1 = z0_12-w0_1 w0_2 = w0_3+z0_14 w0_3 = w0_3-z0_14 z0_12 = sin16*w0_0+cos16*w0_2 z0_13 = sin316*w0_1+cos316*w0_3 z0_14 = cos316*w0_1-sin316*w0_3 z0_15 = cos16*w0_0-sin16*w0_2 y0_8 = z0_4+z0_0 y0_9 = z0_5+z0_1 y0_10 = z0_6+z0_2 y0_11 = z0_7+z0_3 y0_12 = z0_7-z0_3 y0_13 = z0_6-z0_2 y0_14 = z0_5-z0_1 y0_15 = z0_4-z0_0 y0_16 = z0_12+z0_8 y0_17 = z0_13+z0_9 y0_18 = z0_14+z0_10 y0_19 = z0_15+z0_11 y0_20 = z0_15-z0_11 y0_21 = z0_14-z0_10 y0_22 = z0_13-z0_9 y0_23 = z0_12-z0_8 ! End of internal OT(16) ! Start of internal SOT(8) w0_0 = y0_1-y0_7 w0_1 = y0_5-y0_3 w0_2 = y0_1+y0_7 w0_3 = y0_5+y0_3 x0_16 = cos4_s*(y0_2-y0_6) x0_17 = cos4_s*(y0_2+y0_6) x0_18 = y0_0+x0_16 x0_19 = y0_0-x0_16 x0_20 = x0_17+y0_4 x0_21 = x0_17-y0_4 x0_16 = w0_0-tan8*w0_1 x0_17 = tan8*w0_0+w0_1 x0_22 = tan8*w0_2+w0_3 x0_23 = w0_2-tan8*w0_3 z0_4 = x0_18+x0_16 z0_5 = x0_19+x0_17 z0_6 = x0_19-x0_17 z0_7 = x0_18-x0_16 z0_8 = x0_22+x0_20 z0_9 = x0_23+x0_21 z0_10 = x0_23-x0_21 z0_11 = x0_22-x0_20 ! End of internal SOT(8) y0_0 = cos32_s*z0_4-sin32_s*z0_8 y0_1 = cos332_s*z0_5-sin332_s*z0_9 y0_2 = cos532_s*z0_6-sin532_s*z0_10 y0_3 = cos732_s*z0_7-sin732_s*z0_11 y0_4 = sin732_s*z0_7+cos732_s*z0_11 y0_5 = sin532_s*z0_6+cos532_s*z0_10 y0_6 = sin332_s*z0_5+cos332_s*z0_9 y0_7 = sin32_s*z0_4+cos32_s*z0_8 ! Start of internal SOT(8) w0_0 = y0_25-y0_31 w0_1 = y0_29-y0_27 w0_2 = y0_25+y0_31 w0_3 = y0_29+y0_27 x0_16 = cos4_s*(y0_26-y0_30) x0_17 = cos4_s*(y0_26+y0_30) x0_18 = y0_24+x0_16 x0_19 = y0_24-x0_16 x0_20 = x0_17+y0_28 x0_21 = x0_17-y0_28 x0_16 = w0_0-tan8*w0_1 x0_17 = tan8*w0_0+w0_1 x0_22 = tan8*w0_2+w0_3 x0_23 = w0_2-tan8*w0_3 z0_4 = x0_18+x0_16 z0_5 = x0_19+x0_17 z0_6 = x0_19-x0_17 z0_7 = x0_18-x0_16 z0_8 = x0_22+x0_20 z0_9 = x0_23+x0_21 z0_10 = x0_23-x0_21 z0_11 = x0_22-x0_20 ! End of internal SOT(8) y0_24 = sin32_s*z0_4+cos32_s*z0_8 y0_25 = sin332_s*z0_5+cos332_s*z0_9 y0_26 = sin532_s*z0_6+cos532_s*z0_10 y0_27 = sin732_s*z0_7+cos732_s*z0_11 y0_28 = cos732_s*z0_7-sin732_s*z0_11 y0_29 = cos532_s*z0_6-sin532_s*z0_10 y0_30 = cos332_s*z0_5-sin332_s*z0_9 y0_31 = cos32_s*z0_4-sin32_s*z0_8 x0_16 = y0_8+y0_0 x0_17 = y0_9+y0_1 x0_18 = y0_10+y0_2 x0_19 = y0_11+y0_3 x0_20 = y0_12+y0_4 x0_21 = y0_13+y0_5 x0_22 = y0_14+y0_6 x0_23 = y0_15+y0_7 x0_24 = y0_15-y0_7 x0_25 = y0_14-y0_6 x0_26 = y0_13-y0_5 x0_27 = y0_12-y0_4 x0_28 = y0_11-y0_3 x0_29 = y0_10-y0_2 x0_30 = y0_9-y0_1 x0_31 = y0_8-y0_0 x0_32 = y0_24+y0_16 x0_33 = y0_25+y0_17 x0_34 = y0_26+y0_18 x0_35 = y0_27+y0_19 x0_36 = y0_28+y0_20 x0_37 = y0_29+y0_21 x0_38 = y0_30+y0_22 x0_39 = y0_31+y0_23 x0_40 = y0_31-y0_23 x0_41 = y0_30-y0_22 x0_42 = y0_29-y0_21 x0_43 = y0_28-y0_20 x0_44 = y0_27-y0_19 x0_45 = y0_26-y0_18 x0_46 = y0_25-y0_17 x0_47 = y0_24-y0_16 ! End of internal OT(32) ! Start of internal OT(16) (with scaling) z0_0 = x0_1-x0_15 z0_1 = x0_5-x0_11 z0_2 = x0_9-x0_7 z0_3 = x0_13-x0_3 z0_12 = x0_1+x0_15 z0_13 = x0_5+x0_11 z0_14 = x0_9+x0_7 z0_15 = x0_13+x0_3 ! Start of internal SOT(8) w0_0 = x0_2-x0_14 w0_1 = x0_10-x0_6 w0_2 = x0_2+x0_14 w0_3 = x0_10+x0_6 y0_16 = cos4_s*(x0_4-x0_12) y0_17 = cos4_s*(x0_4+x0_12) y0_18 = x0_0+y0_16 y0_19 = x0_0-y0_16 y0_20 = y0_17+x0_8 y0_21 = y0_17-x0_8 y0_16 = w0_0-tan8*w0_1 y0_17 = tan8*w0_0+w0_1 y0_22 = tan8*w0_2+w0_3 y0_23 = w0_2-tan8*w0_3 z0_4 = y0_18+y0_16 z0_5 = y0_19+y0_17 z0_6 = y0_19-y0_17 z0_7 = y0_18-y0_16 z0_8 = y0_22+y0_20 z0_9 = y0_23+y0_21 z0_10 = y0_23-y0_21 z0_11 = y0_22-y0_20 ! End of internal SOT(8) w0_1 = cos4*(z0_1-z0_3) w0_3 = cos4*(z0_1+z0_3) w0_0 = z0_0+w0_1 w0_1 = z0_0-w0_1 w0_2 = w0_3+z0_2 w0_3 = w0_3-z0_2 z0_0 = c16_s*w0_0-s16_s*w0_2 z0_1 = c316_s*w0_1-s316_s*w0_3 z0_2 = s316_s*w0_1+c316_s*w0_3 z0_3 = s16_s*w0_0+c16_s*w0_2 w0_1 = cos4*(z0_13-z0_15) w0_3 = cos4*(z0_13+z0_15) w0_0 = z0_12+w0_1 w0_1 = z0_12-w0_1 w0_2 = w0_3+z0_14 w0_3 = w0_3-z0_14 z0_12 = s16_s*w0_0+c16_s*w0_2 z0_13 = s316_s*w0_1+c316_s*w0_3 z0_14 = c316_s*w0_1-s316_s*w0_3 z0_15 = c16_s*w0_0-s16_s*w0_2 y0_8 = z0_4+z0_0 y0_9 = z0_5+z0_1 y0_10 = z0_6+z0_2 y0_11 = z0_7+z0_3 y0_12 = z0_7-z0_3 y0_13 = z0_6-z0_2 y0_14 = z0_5-z0_1 y0_15 = z0_4-z0_0 y0_16 = z0_12+z0_8 y0_17 = z0_13+z0_9 y0_18 = z0_14+z0_10 y0_19 = z0_15+z0_11 y0_20 = z0_15-z0_11 y0_21 = z0_14-z0_10 y0_22 = z0_13-z0_9 y0_23 = z0_12-z0_8 ! End of internal OT(16) (with scaling) x0_0 = cos64_s*y0_8-sin64_s*y0_16 x0_1 = cos364_s*y0_9-sin364_s*y0_17 x0_2 = cos564_s*y0_10-sin564_s*y0_18 x0_3 = cos764_s*y0_11-sin764_s*y0_19 x0_4 = cos964_s*y0_12-sin964_s*y0_20 x0_5 = cos1164_s*y0_13-sin1164_s*y0_21 x0_6 = cos1364_s*y0_14-sin1364_s*y0_22 x0_7 = cos1564_s*y0_15-sin1564_s*y0_23 x0_8 = sin1564_s*y0_15+cos1564_s*y0_23 x0_9 = sin1364_s*y0_14+cos1364_s*y0_22 x0_10 = sin1164_s*y0_13+cos1164_s*y0_21 x0_11 = sin964_s*y0_12+cos964_s*y0_20 x0_12 = sin764_s*y0_11+cos764_s*y0_19 x0_13 = sin564_s*y0_10+cos564_s*y0_18 x0_14 = sin364_s*y0_9+cos364_s*y0_17 x0_15 = sin64_s*y0_8+cos64_s*y0_16 ! Start of internal OT(16) (with scaling) z0_0 = x0_49-x0_63 z0_1 = x0_53-x0_59 z0_2 = x0_57-x0_55 z0_3 = x0_61-x0_51 z0_12 = x0_49+x0_63 z0_13 = x0_53+x0_59 z0_14 = x0_57+x0_55 z0_15 = x0_61+x0_51 ! Start of internal SOT(8) w0_0 = x0_50-x0_62 w0_1 = x0_58-x0_54 w0_2 = x0_50+x0_62 w0_3 = x0_58+x0_54 y0_16 = cos4_s*(x0_52-x0_60) y0_17 = cos4_s*(x0_52+x0_60) y0_18 = x0_48+y0_16 y0_19 = x0_48-y0_16 y0_20 = y0_17+x0_56 y0_21 = y0_17-x0_56 y0_16 = w0_0-tan8*w0_1 y0_17 = tan8*w0_0+w0_1 y0_22 = tan8*w0_2+w0_3 y0_23 = w0_2-tan8*w0_3 z0_4 = y0_18+y0_16 z0_5 = y0_19+y0_17 z0_6 = y0_19-y0_17 z0_7 = y0_18-y0_16 z0_8 = y0_22+y0_20 z0_9 = y0_23+y0_21 z0_10 = y0_23-y0_21 z0_11 = y0_22-y0_20 ! End of internal SOT(8) w0_1 = cos4*(z0_1-z0_3) w0_3 = cos4*(z0_1+z0_3) w0_0 = z0_0+w0_1 w0_1 = z0_0-w0_1 w0_2 = w0_3+z0_2 w0_3 = w0_3-z0_2 z0_0 = c16_s*w0_0-s16_s*w0_2 z0_1 = c316_s*w0_1-s316_s*w0_3 z0_2 = s316_s*w0_1+c316_s*w0_3 z0_3 = s16_s*w0_0+c16_s*w0_2 w0_1 = cos4*(z0_13-z0_15) w0_3 = cos4*(z0_13+z0_15) w0_0 = z0_12+w0_1 w0_1 = z0_12-w0_1 w0_2 = w0_3+z0_14 w0_3 = w0_3-z0_14 z0_12 = s16_s*w0_0+c16_s*w0_2 z0_13 = s316_s*w0_1+c316_s*w0_3 z0_14 = c316_s*w0_1-s316_s*w0_3 z0_15 = c16_s*w0_0-s16_s*w0_2 y0_8 = z0_4+z0_0 y0_9 = z0_5+z0_1 y0_10 = z0_6+z0_2 y0_11 = z0_7+z0_3 y0_12 = z0_7-z0_3 y0_13 = z0_6-z0_2 y0_14 = z0_5-z0_1 y0_15 = z0_4-z0_0 y0_16 = z0_12+z0_8 y0_17 = z0_13+z0_9 y0_18 = z0_14+z0_10 y0_19 = z0_15+z0_11 y0_20 = z0_15-z0_11 y0_21 = z0_14-z0_10 y0_22 = z0_13-z0_9 y0_23 = z0_12-z0_8 ! End of internal OT(16) (with scaling) x0_48 = sin64_s*y0_8+cos64_s*y0_16 x0_49 = sin364_s*y0_9+cos364_s*y0_17 x0_50 = sin564_s*y0_10+cos564_s*y0_18 x0_51 = sin764_s*y0_11+cos764_s*y0_19 x0_52 = sin964_s*y0_12+cos964_s*y0_20 x0_53 = sin1164_s*y0_13+cos1164_s*y0_21 x0_54 = sin1364_s*y0_14+cos1364_s*y0_22 x0_55 = sin1564_s*y0_15+cos1564_s*y0_23 x0_56 = cos1564_s*y0_15-sin1564_s*y0_23 x0_57 = cos1364_s*y0_14-sin1364_s*y0_22 x0_58 = cos1164_s*y0_13-sin1164_s*y0_21 x0_59 = cos964_s*y0_12-sin964_s*y0_20 x0_60 = cos764_s*y0_11-sin764_s*y0_19 x0_61 = cos564_s*y0_10-sin564_s*y0_18 x0_62 = cos364_s*y0_9-sin364_s*y0_17 x0_63 = cos64_s*y0_8-sin64_s*y0_16 x(i0) = x0_16+x0_0 x(i0+K_old) = x0_17+x0_1 x(i0+2*K_old) = x0_18+x0_2 x(i0+3*K_old) = x0_19+x0_3 x(i0+4*K_old) = x0_20+x0_4 x(i0+5*K_old) = x0_21+x0_5 x(i0+6*K_old) = x0_22+x0_6 x(i0+7*K_old) = x0_23+x0_7 x(i0+8*K_old) = x0_24+x0_8 x(i0+9*K_old) = x0_25+x0_9 x(i0+10*K_old) = x0_26+x0_10 x(i0+11*K_old) = x0_27+x0_11 x(i0+12*K_old) = x0_28+x0_12 x(i0+13*K_old) = x0_29+x0_13 x(i0+14*K_old) = x0_30+x0_14 x(i0+15*K_old) = x0_31+x0_15 x(i0+16*K_old) = x0_31-x0_15 x(i0+17*K_old) = x0_30-x0_14 x(i0+18*K_old) = x0_29-x0_13 x(i0+19*K_old) = x0_28-x0_12 x(i0+20*K_old) = x0_27-x0_11 x(i0+21*K_old) = x0_26-x0_10 x(i0+22*K_old) = x0_25-x0_9 x(i0+23*K_old) = x0_24-x0_8 x(i0+24*K_old) = x0_23-x0_7 x(i0+25*K_old) = x0_22-x0_6 x(i0+26*K_old) = x0_21-x0_5 x(i0+27*K_old) = x0_20-x0_4 x(i0+28*K_old) = x0_19-x0_3 x(i0+29*K_old) = x0_18-x0_2 x(i0+30*K_old) = x0_17-x0_1 x(i0+31*K_old) = x0_16-x0_0 x(i0+63*K_old) = x0_48+x0_32 x(i0+62*K_old) = x0_49+x0_33 x(i0+61*K_old) = x0_50+x0_34 x(i0+60*K_old) = x0_51+x0_35 x(i0+59*K_old) = x0_52+x0_36 x(i0+58*K_old) = x0_53+x0_37 x(i0+57*K_old) = x0_54+x0_38 x(i0+56*K_old) = x0_55+x0_39 x(i0+55*K_old) = x0_56+x0_40 x(i0+54*K_old) = x0_57+x0_41 x(i0+53*K_old) = x0_58+x0_42 x(i0+52*K_old) = x0_59+x0_43 x(i0+51*K_old) = x0_60+x0_44 x(i0+50*K_old) = x0_61+x0_45 x(i0+49*K_old) = x0_62+x0_46 x(i0+48*K_old) = x0_63+x0_47 x(i0+47*K_old) = x0_63-x0_47 x(i0+46*K_old) = x0_62-x0_46 x(i0+45*K_old) = x0_61-x0_45 x(i0+44*K_old) = x0_60-x0_44 x(i0+43*K_old) = x0_59-x0_43 x(i0+42*K_old) = x0_58-x0_42 x(i0+41*K_old) = x0_57-x0_41 x(i0+40*K_old) = x0_56-x0_40 x(i0+39*K_old) = x0_55-x0_39 x(i0+38*K_old) = x0_54-x0_38 x(i0+37*K_old) = x0_53-x0_37 x(i0+36*K_old) = x0_52-x0_36 x(i0+35*K_old) = x0_51-x0_35 x(i0+34*K_old) = x0_50-x0_34 x(i0+33*K_old) = x0_49-x0_33 x(i0+32*K_old) = x0_48-x0_32 end do if(K_old <= 2) return sin_org = 16*N_new*K_old ! k2 = general case do k2 = 1, K_old/2-1 k3 = K_old-k2 ! Look up phase factors C1 = cosines(N_new*k2) S1 = cosines(sin_org-N_new*k2) C2 = cosines(2*N_new*k2) S2 = cosines(sin_org-2*N_new*k2) C3 = cosines(3*N_new*k2) S3 = cosines(sin_org-3*N_new*k2) C4 = cosines(4*N_new*k2) S4 = cosines(sin_org-4*N_new*k2) C5 = cosines(5*N_new*k2) S5 = cosines(sin_org-5*N_new*k2) C6 = cosines(6*N_new*k2) S6 = cosines(sin_org-6*N_new*k2) C7 = cosines(7*N_new*k2) S7 = cosines(sin_org-7*N_new*k2) C8 = cosines(8*N_new*k2) S8 = cosines(sin_org-8*N_new*k2) C9 = cosines(9*N_new*k2) S9 = cosines(sin_org-9*N_new*k2) C10 = cosines(10*N_new*k2) S10 = cosines(sin_org-10*N_new*k2) C11 = cosines(11*N_new*k2) S11 = cosines(sin_org-11*N_new*k2) C12 = cosines(12*N_new*k2) S12 = cosines(sin_org-12*N_new*k2) C13 = cosines(13*N_new*k2) S13 = cosines(sin_org-13*N_new*k2) C14 = cosines(14*N_new*k2) S14 = cosines(sin_org-14*N_new*k2) C15 = cosines(15*N_new*k2) S15 = cosines(sin_org-15*N_new*k2) C16 = cosines(16*N_new*k2) S16 = cosines(sin_org-16*N_new*k2) C17 = cosines(17*N_new*k2) S17 = cosines(sin_org-17*N_new*k2) C18 = cosines(18*N_new*k2) S18 = cosines(sin_org-18*N_new*k2) C19 = cosines(19*N_new*k2) S19 = cosines(sin_org-19*N_new*k2) C20 = cosines(20*N_new*k2) S20 = cosines(sin_org-20*N_new*k2) C21 = cosines(21*N_new*k2) S21 = cosines(sin_org-21*N_new*k2) C22 = cosines(22*N_new*k2) S22 = cosines(sin_org-22*N_new*k2) C23 = cosines(23*N_new*k2) S23 = cosines(sin_org-23*N_new*k2) C24 = cosines(24*N_new*k2) S24 = cosines(sin_org-24*N_new*k2) C25 = cosines(25*N_new*k2) S25 = cosines(sin_org-25*N_new*k2) C26 = cosines(26*N_new*k2) S26 = cosines(sin_org-26*N_new*k2) C27 = cosines(27*N_new*k2) S27 = cosines(sin_org-27*N_new*k2) C28 = cosines(28*N_new*k2) S28 = cosines(sin_org-28*N_new*k2) C29 = cosines(29*N_new*k2) S29 = cosines(sin_org-29*N_new*k2) C30 = cosines(30*N_new*k2) S30 = cosines(sin_org-30*N_new*k2) C31 = cosines(31*N_new*k2) S31 = cosines(sin_org-31*N_new*k2) C32 = cosines(32*N_new*k2) S32 = cosines(sin_org-32*N_new*k2) C33 = cosines(33*N_new*k2) S33 = cosines(sin_org-33*N_new*k2) C34 = cosines(34*N_new*k2) S34 = cosines(sin_org-34*N_new*k2) C35 = cosines(35*N_new*k2) S35 = cosines(sin_org-35*N_new*k2) C36 = cosines(36*N_new*k2) S36 = cosines(sin_org-36*N_new*k2) C37 = cosines(37*N_new*k2) S37 = cosines(sin_org-37*N_new*k2) C38 = cosines(38*N_new*k2) S38 = cosines(sin_org-38*N_new*k2) C39 = cosines(39*N_new*k2) S39 = cosines(sin_org-39*N_new*k2) C40 = cosines(40*N_new*k2) S40 = cosines(sin_org-40*N_new*k2) C41 = cosines(41*N_new*k2) S41 = cosines(sin_org-41*N_new*k2) C42 = cosines(42*N_new*k2) S42 = cosines(sin_org-42*N_new*k2) C43 = cosines(43*N_new*k2) S43 = cosines(sin_org-43*N_new*k2) C44 = cosines(44*N_new*k2) S44 = cosines(sin_org-44*N_new*k2) C45 = cosines(45*N_new*k2) S45 = cosines(sin_org-45*N_new*k2) C46 = cosines(46*N_new*k2) S46 = cosines(sin_org-46*N_new*k2) C47 = cosines(47*N_new*k2) S47 = cosines(sin_org-47*N_new*k2) C48 = cosines(48*N_new*k2) S48 = cosines(sin_org-48*N_new*k2) C49 = cosines(49*N_new*k2) S49 = cosines(sin_org-49*N_new*k2) C50 = cosines(50*N_new*k2) S50 = cosines(sin_org-50*N_new*k2) C51 = cosines(51*N_new*k2) S51 = cosines(sin_org-51*N_new*k2) C52 = cosines(52*N_new*k2) S52 = cosines(sin_org-52*N_new*k2) C53 = cosines(53*N_new*k2) S53 = cosines(sin_org-53*N_new*k2) C54 = cosines(54*N_new*k2) S54 = cosines(sin_org-54*N_new*k2) C55 = cosines(55*N_new*k2) S55 = cosines(sin_org-55*N_new*k2) C56 = cosines(56*N_new*k2) S56 = cosines(sin_org-56*N_new*k2) C57 = cosines(57*N_new*k2) S57 = cosines(sin_org-57*N_new*k2) C58 = cosines(58*N_new*k2) S58 = cosines(sin_org-58*N_new*k2) C59 = cosines(59*N_new*k2) S59 = cosines(sin_org-59*N_new*k2) C60 = cosines(60*N_new*k2) S60 = cosines(sin_org-60*N_new*k2) C61 = cosines(61*N_new*k2) S61 = cosines(sin_org-61*N_new*k2) C62 = cosines(62*N_new*k2) S62 = cosines(sin_org-62*N_new*k2) C63 = cosines(63*N_new*k2) S63 = cosines(sin_org-63*N_new*k2) do n1 = 0, N_new-1 i0 = 64*K_old*n1+k2 i2 = 64*K_old*n1+k3 ! Apply phase factors, mixed with first addition in RF(64) y0_0 = C32*x(i0+K_old)-S32*x(i2+K_old) y1_0 = S32*x(i0+K_old)+C32*x(i2+K_old) x0_0 = x(i0)+y0_0 x0_32 = x(i0)-y0_0 x1_0 = x(i2)+y1_0 x1_32 = x(i2)-y1_0 y0_0 = C1*x(i0+32*K_old)-S1*x(i2+32*K_old) y1_0 = S1*x(i0+32*K_old)+C1*x(i2+32*K_old) y0_1 = C33*x(i0+33*K_old)-S33*x(i2+33*K_old) y1_1 = S33*x(i0+33*K_old)+C33*x(i2+33*K_old) x0_1 = y0_0+y0_1 x0_33 = y0_0-y0_1 x1_1 = y1_0+y1_1 x1_33 = y1_0-y1_1 y0_0 = C2*x(i0+16*K_old)-S2*x(i2+16*K_old) y1_0 = S2*x(i0+16*K_old)+C2*x(i2+16*K_old) y0_1 = C34*x(i0+17*K_old)-S34*x(i2+17*K_old) y1_1 = S34*x(i0+17*K_old)+C34*x(i2+17*K_old) x0_2 = y0_0+y0_1 x0_34 = y0_0-y0_1 x1_2 = y1_0+y1_1 x1_34 = y1_0-y1_1 y0_0 = C3*x(i0+48*K_old)-S3*x(i2+48*K_old) y1_0 = S3*x(i0+48*K_old)+C3*x(i2+48*K_old) y0_1 = C35*x(i0+49*K_old)-S35*x(i2+49*K_old) y1_1 = S35*x(i0+49*K_old)+C35*x(i2+49*K_old) x0_3 = y0_0+y0_1 x0_35 = y0_0-y0_1 x1_3 = y1_0+y1_1 x1_35 = y1_0-y1_1 y0_0 = C4*x(i0+8*K_old)-S4*x(i2+8*K_old) y1_0 = S4*x(i0+8*K_old)+C4*x(i2+8*K_old) y0_1 = C36*x(i0+9*K_old)-S36*x(i2+9*K_old) y1_1 = S36*x(i0+9*K_old)+C36*x(i2+9*K_old) x0_4 = y0_0+y0_1 x0_36 = y0_0-y0_1 x1_4 = y1_0+y1_1 x1_36 = y1_0-y1_1 y0_0 = C5*x(i0+40*K_old)-S5*x(i2+40*K_old) y1_0 = S5*x(i0+40*K_old)+C5*x(i2+40*K_old) y0_1 = C37*x(i0+41*K_old)-S37*x(i2+41*K_old) y1_1 = S37*x(i0+41*K_old)+C37*x(i2+41*K_old) x0_5 = y0_0+y0_1 x0_37 = y0_0-y0_1 x1_5 = y1_0+y1_1 x1_37 = y1_0-y1_1 y0_0 = C6*x(i0+24*K_old)-S6*x(i2+24*K_old) y1_0 = S6*x(i0+24*K_old)+C6*x(i2+24*K_old) y0_1 = C38*x(i0+25*K_old)-S38*x(i2+25*K_old) y1_1 = S38*x(i0+25*K_old)+C38*x(i2+25*K_old) x0_6 = y0_0+y0_1 x0_38 = y0_0-y0_1 x1_6 = y1_0+y1_1 x1_38 = y1_0-y1_1 y0_0 = C7*x(i0+56*K_old)-S7*x(i2+56*K_old) y1_0 = S7*x(i0+56*K_old)+C7*x(i2+56*K_old) y0_1 = C39*x(i0+57*K_old)-S39*x(i2+57*K_old) y1_1 = S39*x(i0+57*K_old)+C39*x(i2+57*K_old) x0_7 = y0_0+y0_1 x0_39 = y0_0-y0_1 x1_7 = y1_0+y1_1 x1_39 = y1_0-y1_1 y0_0 = C8*x(i0+4*K_old)-S8*x(i2+4*K_old) y1_0 = S8*x(i0+4*K_old)+C8*x(i2+4*K_old) y0_1 = C40*x(i0+5*K_old)-S40*x(i2+5*K_old) y1_1 = S40*x(i0+5*K_old)+C40*x(i2+5*K_old) x0_8 = y0_0+y0_1 x0_40 = y0_0-y0_1 x1_8 = y1_0+y1_1 x1_40 = y1_0-y1_1 y0_0 = C9*x(i0+36*K_old)-S9*x(i2+36*K_old) y1_0 = S9*x(i0+36*K_old)+C9*x(i2+36*K_old) y0_1 = C41*x(i0+37*K_old)-S41*x(i2+37*K_old) y1_1 = S41*x(i0+37*K_old)+C41*x(i2+37*K_old) x0_9 = y0_0+y0_1 x0_41 = y0_0-y0_1 x1_9 = y1_0+y1_1 x1_41 = y1_0-y1_1 y0_0 = C10*x(i0+20*K_old)-S10*x(i2+20*K_old) y1_0 = S10*x(i0+20*K_old)+C10*x(i2+20*K_old) y0_1 = C42*x(i0+21*K_old)-S42*x(i2+21*K_old) y1_1 = S42*x(i0+21*K_old)+C42*x(i2+21*K_old) x0_10 = y0_0+y0_1 x0_42 = y0_0-y0_1 x1_10 = y1_0+y1_1 x1_42 = y1_0-y1_1 y0_0 = C11*x(i0+52*K_old)-S11*x(i2+52*K_old) y1_0 = S11*x(i0+52*K_old)+C11*x(i2+52*K_old) y0_1 = C43*x(i0+53*K_old)-S43*x(i2+53*K_old) y1_1 = S43*x(i0+53*K_old)+C43*x(i2+53*K_old) x0_11 = y0_0+y0_1 x0_43 = y0_0-y0_1 x1_11 = y1_0+y1_1 x1_43 = y1_0-y1_1 y0_0 = C12*x(i0+12*K_old)-S12*x(i2+12*K_old) y1_0 = S12*x(i0+12*K_old)+C12*x(i2+12*K_old) y0_1 = C44*x(i0+13*K_old)-S44*x(i2+13*K_old) y1_1 = S44*x(i0+13*K_old)+C44*x(i2+13*K_old) x0_12 = y0_0+y0_1 x0_44 = y0_0-y0_1 x1_12 = y1_0+y1_1 x1_44 = y1_0-y1_1 y0_0 = C13*x(i0+44*K_old)-S13*x(i2+44*K_old) y1_0 = S13*x(i0+44*K_old)+C13*x(i2+44*K_old) y0_1 = C45*x(i0+45*K_old)-S45*x(i2+45*K_old) y1_1 = S45*x(i0+45*K_old)+C45*x(i2+45*K_old) x0_13 = y0_0+y0_1 x0_45 = y0_0-y0_1 x1_13 = y1_0+y1_1 x1_45 = y1_0-y1_1 y0_0 = C14*x(i0+28*K_old)-S14*x(i2+28*K_old) y1_0 = S14*x(i0+28*K_old)+C14*x(i2+28*K_old) y0_1 = C46*x(i0+29*K_old)-S46*x(i2+29*K_old) y1_1 = S46*x(i0+29*K_old)+C46*x(i2+29*K_old) x0_14 = y0_0+y0_1 x0_46 = y0_0-y0_1 x1_14 = y1_0+y1_1 x1_46 = y1_0-y1_1 y0_0 = C15*x(i0+60*K_old)-S15*x(i2+60*K_old) y1_0 = S15*x(i0+60*K_old)+C15*x(i2+60*K_old) y0_1 = C47*x(i0+61*K_old)-S47*x(i2+61*K_old) y1_1 = S47*x(i0+61*K_old)+C47*x(i2+61*K_old) x0_15 = y0_0+y0_1 x0_47 = y0_0-y0_1 x1_15 = y1_0+y1_1 x1_47 = y1_0-y1_1 y0_0 = C16*x(i0+2*K_old)-S16*x(i2+2*K_old) y1_0 = S16*x(i0+2*K_old)+C16*x(i2+2*K_old) y0_1 = C48*x(i0+3*K_old)-S48*x(i2+3*K_old) y1_1 = S48*x(i0+3*K_old)+C48*x(i2+3*K_old) x0_16 = y0_0+y0_1 x0_48 = y0_0-y0_1 x1_16 = y1_0+y1_1 x1_48 = y1_0-y1_1 y0_0 = C17*x(i0+34*K_old)-S17*x(i2+34*K_old) y1_0 = S17*x(i0+34*K_old)+C17*x(i2+34*K_old) y0_1 = C49*x(i0+35*K_old)-S49*x(i2+35*K_old) y1_1 = S49*x(i0+35*K_old)+C49*x(i2+35*K_old) x0_17 = y0_0+y0_1 x0_49 = y0_0-y0_1 x1_17 = y1_0+y1_1 x1_49 = y1_0-y1_1 y0_0 = C18*x(i0+18*K_old)-S18*x(i2+18*K_old) y1_0 = S18*x(i0+18*K_old)+C18*x(i2+18*K_old) y0_1 = C50*x(i0+19*K_old)-S50*x(i2+19*K_old) y1_1 = S50*x(i0+19*K_old)+C50*x(i2+19*K_old) x0_18 = y0_0+y0_1 x0_50 = y0_0-y0_1 x1_18 = y1_0+y1_1 x1_50 = y1_0-y1_1 y0_0 = C19*x(i0+50*K_old)-S19*x(i2+50*K_old) y1_0 = S19*x(i0+50*K_old)+C19*x(i2+50*K_old) y0_1 = C51*x(i0+51*K_old)-S51*x(i2+51*K_old) y1_1 = S51*x(i0+51*K_old)+C51*x(i2+51*K_old) x0_19 = y0_0+y0_1 x0_51 = y0_0-y0_1 x1_19 = y1_0+y1_1 x1_51 = y1_0-y1_1 y0_0 = C20*x(i0+10*K_old)-S20*x(i2+10*K_old) y1_0 = S20*x(i0+10*K_old)+C20*x(i2+10*K_old) y0_1 = C52*x(i0+11*K_old)-S52*x(i2+11*K_old) y1_1 = S52*x(i0+11*K_old)+C52*x(i2+11*K_old) x0_20 = y0_0+y0_1 x0_52 = y0_0-y0_1 x1_20 = y1_0+y1_1 x1_52 = y1_0-y1_1 y0_0 = C21*x(i0+42*K_old)-S21*x(i2+42*K_old) y1_0 = S21*x(i0+42*K_old)+C21*x(i2+42*K_old) y0_1 = C53*x(i0+43*K_old)-S53*x(i2+43*K_old) y1_1 = S53*x(i0+43*K_old)+C53*x(i2+43*K_old) x0_21 = y0_0+y0_1 x0_53 = y0_0-y0_1 x1_21 = y1_0+y1_1 x1_53 = y1_0-y1_1 y0_0 = C22*x(i0+26*K_old)-S22*x(i2+26*K_old) y1_0 = S22*x(i0+26*K_old)+C22*x(i2+26*K_old) y0_1 = C54*x(i0+27*K_old)-S54*x(i2+27*K_old) y1_1 = S54*x(i0+27*K_old)+C54*x(i2+27*K_old) x0_22 = y0_0+y0_1 x0_54 = y0_0-y0_1 x1_22 = y1_0+y1_1 x1_54 = y1_0-y1_1 y0_0 = C23*x(i0+58*K_old)-S23*x(i2+58*K_old) y1_0 = S23*x(i0+58*K_old)+C23*x(i2+58*K_old) y0_1 = C55*x(i0+59*K_old)-S55*x(i2+59*K_old) y1_1 = S55*x(i0+59*K_old)+C55*x(i2+59*K_old) x0_23 = y0_0+y0_1 x0_55 = y0_0-y0_1 x1_23 = y1_0+y1_1 x1_55 = y1_0-y1_1 y0_0 = C24*x(i0+6*K_old)-S24*x(i2+6*K_old) y1_0 = S24*x(i0+6*K_old)+C24*x(i2+6*K_old) y0_1 = C56*x(i0+7*K_old)-S56*x(i2+7*K_old) y1_1 = S56*x(i0+7*K_old)+C56*x(i2+7*K_old) x0_24 = y0_0+y0_1 x0_56 = y0_0-y0_1 x1_24 = y1_0+y1_1 x1_56 = y1_0-y1_1 y0_0 = C25*x(i0+38*K_old)-S25*x(i2+38*K_old) y1_0 = S25*x(i0+38*K_old)+C25*x(i2+38*K_old) y0_1 = C57*x(i0+39*K_old)-S57*x(i2+39*K_old) y1_1 = S57*x(i0+39*K_old)+C57*x(i2+39*K_old) x0_25 = y0_0+y0_1 x0_57 = y0_0-y0_1 x1_25 = y1_0+y1_1 x1_57 = y1_0-y1_1 y0_0 = C26*x(i0+22*K_old)-S26*x(i2+22*K_old) y1_0 = S26*x(i0+22*K_old)+C26*x(i2+22*K_old) y0_1 = C58*x(i0+23*K_old)-S58*x(i2+23*K_old) y1_1 = S58*x(i0+23*K_old)+C58*x(i2+23*K_old) x0_26 = y0_0+y0_1 x0_58 = y0_0-y0_1 x1_26 = y1_0+y1_1 x1_58 = y1_0-y1_1 y0_0 = C27*x(i0+54*K_old)-S27*x(i2+54*K_old) y1_0 = S27*x(i0+54*K_old)+C27*x(i2+54*K_old) y0_1 = C59*x(i0+55*K_old)-S59*x(i2+55*K_old) y1_1 = S59*x(i0+55*K_old)+C59*x(i2+55*K_old) x0_27 = y0_0+y0_1 x0_59 = y0_0-y0_1 x1_27 = y1_0+y1_1 x1_59 = y1_0-y1_1 y0_0 = C28*x(i0+14*K_old)-S28*x(i2+14*K_old) y1_0 = S28*x(i0+14*K_old)+C28*x(i2+14*K_old) y0_1 = C60*x(i0+15*K_old)-S60*x(i2+15*K_old) y1_1 = S60*x(i0+15*K_old)+C60*x(i2+15*K_old) x0_28 = y0_0+y0_1 x0_60 = y0_0-y0_1 x1_28 = y1_0+y1_1 x1_60 = y1_0-y1_1 y0_0 = C29*x(i0+46*K_old)-S29*x(i2+46*K_old) y1_0 = S29*x(i0+46*K_old)+C29*x(i2+46*K_old) y0_1 = C61*x(i0+47*K_old)-S61*x(i2+47*K_old) y1_1 = S61*x(i0+47*K_old)+C61*x(i2+47*K_old) x0_29 = y0_0+y0_1 x0_61 = y0_0-y0_1 x1_29 = y1_0+y1_1 x1_61 = y1_0-y1_1 y0_0 = C30*x(i0+30*K_old)-S30*x(i2+30*K_old) y1_0 = S30*x(i0+30*K_old)+C30*x(i2+30*K_old) y0_1 = C62*x(i0+31*K_old)-S62*x(i2+31*K_old) y1_1 = S62*x(i0+31*K_old)+C62*x(i2+31*K_old) x0_30 = y0_0+y0_1 x0_62 = y0_0-y0_1 x1_30 = y1_0+y1_1 x1_62 = y1_0-y1_1 y0_0 = C31*x(i0+62*K_old)-S31*x(i2+62*K_old) y1_0 = S31*x(i0+62*K_old)+C31*x(i2+62*K_old) y0_1 = C63*x(i0+63*K_old)-S63*x(i2+63*K_old) y1_1 = S63*x(i0+63*K_old)+C63*x(i2+63*K_old) x0_31 = y0_0+y0_1 x0_63 = y0_0-y0_1 x1_31 = y1_0+y1_1 x1_63 = y1_0-y1_1 ! Complete RF(64), mixed with trig identity additions ! Start of internal RF(32) -- first set y0_0 = x0_0+x0_16 y0_1 = x0_1+x0_17 y0_2 = x0_2+x0_18 y0_3 = x0_3+x0_19 y0_4 = x0_4+x0_20 y0_5 = x0_5+x0_21 y0_6 = x0_6+x0_22 y0_7 = x0_7+x0_23 y0_8 = x0_8+x0_24 y0_9 = x0_9+x0_25 y0_10 = x0_10+x0_26 y0_11 = x0_11+x0_27 y0_12 = x0_12+x0_28 y0_13 = x0_13+x0_29 y0_14 = x0_14+x0_30 y0_15 = x0_15+x0_31 y0_16 = x0_0-x0_16 y0_17 = x0_1-x0_17 y0_18 = x0_2-x0_18 y0_19 = x0_3-x0_19 y0_20 = x0_4-x0_20 y0_21 = x0_5-x0_21 y0_22 = x0_6-x0_22 y0_23 = x0_7-x0_23 y0_24 = x0_8-x0_24 y0_25 = x0_9-x0_25 y0_26 = x0_10-x0_26 y0_27 = x0_11-x0_27 y0_28 = x0_12-x0_28 y0_29 = x0_13-x0_29 y0_30 = x0_14-x0_30 y0_31 = x0_15-x0_31 ! Start of internal RF(16) -- first set z0_0 = y0_0+y0_8 z0_1 = y0_1+y0_9 z0_2 = y0_2+y0_10 z0_3 = y0_3+y0_11 z0_4 = y0_4+y0_12 z0_5 = y0_5+y0_13 z0_6 = y0_6+y0_14 z0_7 = y0_7+y0_15 z0_8 = y0_0-y0_8 z0_9 = y0_1-y0_9 z0_10 = y0_2-y0_10 z0_11 = y0_3-y0_11 z0_12 = y0_4-y0_12 z0_13 = y0_5-y0_13 z0_14 = y0_6-y0_14 z0_15 = y0_7-y0_15 ! Start internal RF(8) -- first set w0_0 = z0_0+z0_4 w0_1 = z0_1+z0_5 w0_2 = z0_2+z0_6 w0_3 = z0_3+z0_7 w0_4 = z0_0-z0_4 w0_5 = z0_1-z0_5 w0_6 = z0_2-z0_6 w0_7 = z0_3-z0_7 ! Start internal RF(32) -- second set y1_0 = x1_0+x1_16 y1_1 = x1_1+x1_17 y1_2 = x1_2+x1_18 y1_3 = x1_3+x1_19 y1_4 = x1_4+x1_20 y1_5 = x1_5+x1_21 y1_6 = x1_6+x1_22 y1_7 = x1_7+x1_23 y1_8 = x1_8+x1_24 y1_9 = x1_9+x1_25 y1_10 = x1_10+x1_26 y1_11 = x1_11+x1_27 y1_12 = x1_12+x1_28 y1_13 = x1_13+x1_29 y1_14 = x1_14+x1_30 y1_15 = x1_15+x1_31 y1_16 = x1_0-x1_16 y1_17 = x1_1-x1_17 y1_18 = x1_2-x1_18 y1_19 = x1_3-x1_19 y1_20 = x1_4-x1_20 y1_21 = x1_5-x1_21 y1_22 = x1_6-x1_22 y1_23 = x1_7-x1_23 y1_24 = x1_8-x1_24 y1_25 = x1_9-x1_25 y1_26 = x1_10-x1_26 y1_27 = x1_11-x1_27 y1_28 = x1_12-x1_28 y1_29 = x1_13-x1_29 y1_30 = x1_14-x1_30 y1_31 = x1_15-x1_31 ! Start of internal RF(16) -- second set z1_0 = y1_0+y1_8 z1_1 = y1_1+y1_9 z1_2 = y1_2+y1_10 z1_3 = y1_3+y1_11 z1_4 = y1_4+y1_12 z1_5 = y1_5+y1_13 z1_6 = y1_6+y1_14 z1_7 = y1_7+y1_15 z1_8 = y1_0-y1_8 z1_9 = y1_1-y1_9 z1_10 = y1_2-y1_10 z1_11 = y1_3-y1_11 z1_12 = y1_4-y1_12 z1_13 = y1_5-y1_13 z1_14 = y1_6-y1_14 z1_15 = y1_7-y1_15 ! Start of internal RF(8) -- second set w1_0 = z1_0+z1_4 w1_1 = z1_1+z1_5 w1_2 = z1_2+z1_6 w1_3 = z1_3+z1_7 w1_4 = z1_0-z1_4 w1_5 = z1_1-z1_5 w1_6 = z1_2-z1_6 w1_7 = z1_3-z1_7 z0_0 = w0_0-w0_2 ! gcc(16) z0_1 = w0_1-w0_3 ! gsc(16) z1_0 = w1_0-w1_2 ! gcs(16) z1_1 = w1_1-w1_3 ! gss(16) x(i0+16*K_old) = z0_0-z1_1 x(i2+47*K_old) = z0_1+z1_0 x(i2+15*K_old) = z0_0+z1_1 x(i0+48*K_old) = z0_1-z1_0 w0_0 = w0_0+w0_2 w0_1 = w0_1+w0_3 w1_0 = w1_0+w1_2 w1_1 = w1_1+w1_3 x(i0) = w0_0+w0_1 ! gcc(0) x(i2+31*K_old) = w0_0-w0_1 ! gcc(32) x(i2+63*K_old) = w1_0+w1_1 ! gcs(0) x(i0+32*K_old) = w1_1-w1_0 ! gcs(32) w0_0 = cos4*(w0_5-w0_7) w0_1 = cos4*(w0_5+w0_7) w1_0 = cos4*(w1_5-w1_7) w1_1 = cos4*(w1_5+w1_7) z0_0 = w0_4+w0_0 ! gcc(8) z0_1 = w0_4-w0_0 ! gcc(24) z0_2 = w0_1+w0_6 ! gsc(8) z0_3 = w0_1-w0_6 ! gsc(24) ! End of internal RF(8) -- first set z1_0 = w1_4+w1_0 ! gcs(8) z1_1 = w1_4-w1_0 ! gcs(24) z1_2 = w1_1+w1_6 ! gss(8) z1_3 = w1_1-w1_6 ! gss(24) ! End of internal RF(8) -- second set x(i0+8*K_old) = z0_0-z1_2 x(i0+24*K_old) = z0_1-z1_3 x(i2+55*K_old) = z0_2+z1_0 x(i2+39*K_old) = z0_3+z1_1 x(i2+7*K_old) = z0_0+z1_2 x(i2+23*K_old) = z0_1+z1_3 x(i0+56*K_old) = z0_2-z1_0 x(i0+40*K_old) = z0_3-z1_1 ! Start of internal OT(8) -- first set w0_0 = z0_9-z0_15 w0_1 = z0_13-z0_11 w0_2 = z0_9+z0_15 w0_3 = z0_13+z0_11 z0_0 = cos4*(z0_10-z0_14) z0_1 = cos4*(z0_10+z0_14) z0_2 = z0_8+z0_0 z0_3 = z0_8-z0_0 z0_4 = z0_1+z0_12 z0_5 = z0_1-z0_12 z0_0 = cos8*w0_0-sin8*w0_1 z0_1 = sin8*w0_0+cos8*w0_1 z0_6 = sin8*w0_2+cos8*w0_3 z0_7 = cos8*w0_2-sin8*w0_3 ! Start of internal OT(8) -- second set w1_0 = z1_9-z1_15 w1_1 = z1_13-z1_11 w1_2 = z1_9+z1_15 w1_3 = z1_13+z1_11 z1_0 = cos4*(z1_10-z1_14) z1_1 = cos4*(z1_10+z1_14) z1_2 = z1_8+z1_0 z1_3 = z1_8-z1_0 z1_4 = z1_1+z1_12 z1_5 = z1_1-z1_12 z1_0 = cos8*w1_0-sin8*w1_1 z1_1 = sin8*w1_0+cos8*w1_1 z1_6 = sin8*w1_2+cos8*w1_3 z1_7 = cos8*w1_2-sin8*w1_3 w0_0 = z0_2+z0_0 ! gcc(4) w0_1 = z0_3+z0_1 ! gcc(12) w0_2 = z0_3-z0_1 ! gcc(20) w0_3 = z0_2-z0_0 ! gcc(28) w0_4 = z0_6+z0_4 ! gsc(4) w0_5 = z0_7+z0_5 ! gsc(12) w0_6 = z0_7-z0_5 ! gsc(20) w0_7 = z0_6-z0_4 ! gsc(28) ! End of internal OT(8) -- first set ! End of internal RF(16) -- first set w1_0 = z1_2+z1_0 ! gcs(4) w1_1 = z1_3+z1_1 ! gcs(12) w1_2 = z1_3-z1_1 ! gcs(20) w1_3 = z1_2-z1_0 ! gcs(28) w1_4 = z1_6+z1_4 ! gss(4) w1_5 = z1_7+z1_5 ! gss(12) w1_6 = z1_7-z1_5 ! gss(20) w1_7 = z1_6-z1_4 ! gss(28) ! End of internal OT(8) -- second set ! End of internal RF(16) -- second set x(i0+4*K_old) = w0_0-w1_4 x(i0+12*K_old) = w0_1-w1_5 x(i0+20*K_old) = w0_2-w1_6 x(i0+28*K_old) = w0_3-w1_7 x(i2+59*K_old) = w0_4+w1_0 x(i2+51*K_old) = w0_5+w1_1 x(i2+43*K_old) = w0_6+w1_2 x(i2+35*K_old) = w0_7+w1_3 x(i2+3*K_old) = w0_0+w1_4 x(i2+11*K_old) = w0_1+w1_5 x(i2+19*K_old) = w0_2+w1_6 x(i2+27*K_old) = w0_3+w1_7 x(i0+60*K_old) = w0_4-w1_0 x(i0+52*K_old) = w0_5-w1_1 x(i0+44*K_old) = w0_6-w1_2 x(i0+36*K_old) = w0_7-w1_3 ! Start of internal OT(16) -- first set z0_0 = y0_17-y0_31 z0_1 = y0_21-y0_27 z0_2 = y0_25-y0_23 z0_3 = y0_29-y0_19 z0_4 = y0_17+y0_31 z0_5 = y0_21+y0_27 z0_6 = y0_25+y0_23 z0_7 = y0_29+y0_19 ! Start of internal OT(8) -- first set w0_0 = y0_18-y0_30 w0_1 = y0_26-y0_22 w0_2 = y0_18+y0_30 w0_3 = y0_26+y0_22 z0_8 = cos4*(y0_20-y0_28) z0_9 = cos4*(y0_20+y0_28) z0_10 = y0_16+z0_8 z0_11 = y0_16-z0_8 z0_12 = z0_9+y0_24 z0_13 = z0_9-y0_24 z0_8 = cos8*w0_0-sin8*w0_1 z0_9 = sin8*w0_0+cos8*w0_1 z0_14 = sin8*w0_2+cos8*w0_3 z0_15 = cos8*w0_2-sin8*w0_3 y0_4 = z0_10+z0_8 y0_5 = z0_11+z0_9 y0_6 = z0_11-z0_9 y0_7 = z0_10-z0_8 y0_8 = z0_14+z0_12 y0_9 = z0_15+z0_13 y0_10 = z0_15-z0_13 y0_11 = z0_14-z0_12 ! End of internal OT(8) -- first set w0_4 = cos4*(z0_1-z0_3) w0_5 = cos4*(z0_1+z0_3) w0_0 = z0_0+w0_4 w0_1 = z0_0-w0_4 w0_2 = w0_5+z0_2 w0_3 = w0_5-z0_2 y0_0 = cos16*w0_0-sin16*w0_2 y0_1 = cos316*w0_1-sin316*w0_3 y0_2 = sin316*w0_1+cos316*w0_3 y0_3 = sin16*w0_0+cos16*w0_2 w0_4 = cos4*(z0_5-z0_7) w0_5 = cos4*(z0_5+z0_7) w0_0 = z0_4+w0_4 w0_1 = z0_4-w0_4 w0_2 = w0_5+z0_6 w0_3 = w0_5-z0_6 y0_12 = sin16*w0_0+cos16*w0_2 y0_13 = sin316*w0_1+cos316*w0_3 y0_14 = cos316*w0_1-sin316*w0_3 y0_15 = cos16*w0_0-sin16*w0_2 ! Start of internal OT(16) -- second set z1_0 = y1_17-y1_31 z1_1 = y1_21-y1_27 z1_2 = y1_25-y1_23 z1_3 = y1_29-y1_19 z1_4 = y1_17+y1_31 z1_5 = y1_21+y1_27 z1_6 = y1_25+y1_23 z1_7 = y1_29+y1_19 ! Start of internal OT(8) -- second set w1_0 = y1_18-y1_30 w1_1 = y1_26-y1_22 w1_2 = y1_18+y1_30 w1_3 = y1_26+y1_22 z1_8 = cos4*(y1_20-y1_28) z1_9 = cos4*(y1_20+y1_28) z1_10 = y1_16+z1_8 z1_11 = y1_16-z1_8 z1_12 = z1_9+y1_24 z1_13 = z1_9-y1_24 z1_8 = cos8*w1_0-sin8*w1_1 z1_9 = sin8*w1_0+cos8*w1_1 z1_14 = sin8*w1_2+cos8*w1_3 z1_15 = cos8*w1_2-sin8*w1_3 y1_4 = z1_10+z1_8 y1_5 = z1_11+z1_9 y1_6 = z1_11-z1_9 y1_7 = z1_10-z1_8 y1_8 = z1_14+z1_12 y1_9 = z1_15+z1_13 y1_10 = z1_15-z1_13 y1_11 = z1_14-z1_12 ! End of internal OT(8) -- second set w1_4 = cos4*(z1_1-z1_3) w1_5 = cos4*(z1_1+z1_3) w1_0 = z1_0+w1_4 w1_1 = z1_0-w1_4 w1_2 = w1_5+z1_2 w1_3 = w1_5-z1_2 y1_0 = cos16*w1_0-sin16*w1_2 y1_1 = cos316*w1_1-sin316*w1_3 y1_2 = sin316*w1_1+cos316*w1_3 y1_3 = sin16*w1_0+cos16*w1_2 w1_4 = cos4*(z1_5-z1_7) w1_5 = cos4*(z1_5+z1_7) w1_0 = z1_4+w1_4 w1_1 = z1_4-w1_4 w1_2 = w1_5+z1_6 w1_3 = w1_5-z1_6 y1_12 = sin16*w1_0+cos16*w1_2 y1_13 = sin316*w1_1+cos316*w1_3 y1_14 = cos316*w1_1-sin316*w1_3 y1_15 = cos16*w1_0-sin16*w1_2 z0_0 = y0_4+y0_0 ! gcc(2) z0_1 = y0_5+y0_1 ! gcc(6) z0_2 = y0_6+y0_2 ! gcc(10) z0_3 = y0_7+y0_3 ! gcc(14) z0_4 = y0_7-y0_3 ! gcc(18) z0_5 = y0_6-y0_2 ! gcc(22) z0_6 = y0_5-y0_1 ! gcc(26) z0_7 = y0_4-y0_0 ! gcc(30) z0_8 = y0_12+y0_8 ! gsc(2) z0_9 = y0_13+y0_9 ! gsc(6) z0_10 = y0_14+y0_10 ! gsc(10) z0_11 = y0_15+y0_11 ! gsc(14) z0_12 = y0_15-y0_11 ! gsc(18) z0_13 = y0_14-y0_10 ! gsc(22) z0_14 = y0_13-y0_9 ! gsc(26) z0_15 = y0_12-y0_8 ! gsc(30) ! End of internal OT(16) -- first set ! End of internal RF(32) -- first set z1_0 = y1_4+y1_0 ! gcs(2) z1_1 = y1_5+y1_1 ! gcs(6) z1_2 = y1_6+y1_2 ! gcs(10) z1_3 = y1_7+y1_3 ! gcs(14) z1_4 = y1_7-y1_3 ! gcs(18) z1_5 = y1_6-y1_2 ! gcs(22) z1_6 = y1_5-y1_1 ! gcs(26) z1_7 = y1_4-y1_0 ! gcs(30) z1_8 = y1_12+y1_8 ! gss(2) z1_9 = y1_13+y1_9 ! gss(6) z1_10 = y1_14+y1_10 ! gss(10) z1_11 = y1_15+y1_11 ! gss(14) z1_12 = y1_15-y1_11 ! gss(18) z1_13 = y1_14-y1_10 ! gss(22) z1_14 = y1_13-y1_9 ! gss(26) z1_15 = y1_12-y1_8 ! gss(30) ! End of internal OT(16) -- second set ! End of internal RF(32) -- second set x(i0+2*K_old) = z0_0-z1_8 x(i0+6*K_old) = z0_1-z1_9 x(i0+10*K_old) = z0_2-z1_10 x(i0+14*K_old) = z0_3-z1_11 x(i0+18*K_old) = z0_4-z1_12 x(i0+22*K_old) = z0_5-z1_13 x(i0+26*K_old) = z0_6-z1_14 x(i0+30*K_old) = z0_7-z1_15 x(i2+61*K_old) = z0_8+z1_0 x(i2+57*K_old) = z0_9+z1_1 x(i2+53*K_old) = z0_10+z1_2 x(i2+49*K_old) = z0_11+z1_3 x(i2+45*K_old) = z0_12+z1_4 x(i2+41*K_old) = z0_13+z1_5 x(i2+37*K_old) = z0_14+z1_6 x(i2+33*K_old) = z0_15+z1_7 x(i2+K_old) = z0_0+z1_8 x(i2+5*K_old) = z0_1+z1_9 x(i2+9*K_old) = z0_2+z1_10 x(i2+13*K_old) = z0_3+z1_11 x(i2+17*K_old) = z0_4+z1_12 x(i2+21*K_old) = z0_5+z1_13 x(i2+25*K_old) = z0_6+z1_14 x(i2+29*K_old) = z0_7+z1_15 x(i0+62*K_old) = z0_8-z1_0 x(i0+58*K_old) = z0_9-z1_1 x(i0+54*K_old) = z0_10-z1_2 x(i0+50*K_old) = z0_11-z1_3 x(i0+46*K_old) = z0_12-z1_4 x(i0+42*K_old) = z0_13-z1_5 x(i0+38*K_old) = z0_14-z1_6 x(i0+34*K_old) = z0_15-z1_7 ! Start of internal OT(32) -- first set ! Need scaling for internal SOT(8) y0_16 = sec8*(x0_33-x0_63) y0_17 = x0_37-x0_59 y0_18 = x0_41-x0_55 y0_19 = x0_45-x0_51 ! Need scaling for internal SOT(8) y0_20 = sec8*(x0_49-x0_47) y0_21 = x0_53-x0_43 y0_22 = x0_57-x0_39 y0_23 = x0_61-x0_35 ! Need scaling for internal SOT(8) y0_24 = sec8*(x0_33+x0_63) y0_25 = x0_37+x0_59 y0_26 = x0_41+x0_55 y0_27 = x0_45+x0_51 ! Need scaling for internal SOT(8) y0_28 = sec8*(x0_49+x0_47) y0_29 = x0_53+x0_43 y0_30 = x0_57+x0_39 y0_31 = x0_61+x0_35 ! Start of internal OT(16) -- first set z0_0 = x0_34-x0_62 z0_1 = x0_42-x0_54 z0_2 = x0_50-x0_46 z0_3 = x0_58-x0_38 z0_4 = x0_34+x0_62 z0_5 = x0_42+x0_54 z0_6 = x0_50+x0_46 z0_7 = x0_58+x0_38 ! Start of internal OT(8) -- first set w0_0 = x0_36-x0_60 w0_1 = x0_52-x0_44 w0_2 = x0_36+x0_60 w0_3 = x0_52+x0_44 z0_8 = cos4*(x0_40-x0_56) z0_9 = cos4*(x0_40+x0_56) z0_10 = x0_32+z0_8 z0_11 = x0_32-z0_8 z0_12 = z0_9+x0_48 z0_13 = z0_9-x0_48 z0_8 = cos8*w0_0-sin8*w0_1 z0_9 = sin8*w0_0+cos8*w0_1 z0_14 = sin8*w0_2+cos8*w0_3 z0_15 = cos8*w0_2-sin8*w0_3 y0_4 = z0_10+z0_8 y0_5 = z0_11+z0_9 y0_6 = z0_11-z0_9 y0_7 = z0_10-z0_8 y0_8 = z0_14+z0_12 y0_9 = z0_15+z0_13 y0_10 = z0_15-z0_13 y0_11 = z0_14-z0_12 ! End of internal OT(8) -- first set w0_4 = cos4*(z0_1-z0_3) w0_5 = cos4*(z0_1+z0_3) w0_0 = z0_0+w0_4 w0_1 = z0_0-w0_4 w0_2 = w0_5+z0_2 w0_3 = w0_5-z0_2 y0_0 = cos16*w0_0-sin16*w0_2 y0_1 = cos316*w0_1-sin316*w0_3 y0_2 = sin316*w0_1+cos316*w0_3 y0_3 = sin16*w0_0+cos16*w0_2 w0_4 = cos4*(z0_5-z0_7) w0_5 = cos4*(z0_5+z0_7) w0_0 = z0_4+w0_4 w0_1 = z0_4-w0_4 w0_2 = w0_5+z0_6 w0_3 = w0_5-z0_6 y0_12 = sin16*w0_0+cos16*w0_2 y0_13 = sin316*w0_1+cos316*w0_3 y0_14 = cos316*w0_1-sin316*w0_3 y0_15 = cos16*w0_0-sin16*w0_2 x0_8 = y0_4+y0_0 x0_9 = y0_5+y0_1 x0_10 = y0_6+y0_2 x0_11 = y0_7+y0_3 x0_12 = y0_7-y0_3 x0_13 = y0_6-y0_2 x0_14 = y0_5-y0_1 x0_15 = y0_4-y0_0 x0_16 = y0_12+y0_8 x0_17 = y0_13+y0_9 x0_18 = y0_14+y0_10 x0_19 = y0_15+y0_11 x0_20 = y0_15-y0_11 x0_21 = y0_14-y0_10 x0_22 = y0_13-y0_9 x0_23 = y0_12-y0_8 ! End of internal OT(16) -- first set ! Start of internal SOT(8) -- first set w0_0 = y0_17-y0_23 w0_1 = y0_21-y0_19 w0_2 = y0_17+y0_23 w0_3 = y0_21+y0_19 z0_0 = cos4_s*(y0_18-y0_22) z0_1 = cos4_s*(y0_18+y0_22) z0_2 = y0_16+z0_0 z0_3 = y0_16-z0_0 z0_4 = z0_1+y0_20 z0_5 = z0_1-y0_20 z0_0 = w0_0-tan8*w0_1 z0_1 = tan8*w0_0+w0_1 z0_6 = tan8*w0_2+w0_3 z0_7 = w0_2-tan8*w0_3 z0_8 = z0_2+z0_0 z0_9 = z0_3+z0_1 z0_10 = z0_3-z0_1 z0_11 = z0_2-z0_0 z0_12 = z0_6+z0_4 z0_13 = z0_7+z0_5 z0_14 = z0_7-z0_5 z0_15 = z0_6-z0_4 ! End of internal SOT(8) -- first set x0_0 = cos32_s*z0_8-sin32_s*z0_12 x0_1 = cos332_s*z0_9-sin332_s*z0_13 x0_2 = cos532_s*z0_10-sin532_s*z0_14 x0_3 = cos732_s*z0_11-sin732_s*z0_15 x0_4 = sin732_s*z0_11+cos732_s*z0_15 x0_5 = sin532_s*z0_10+cos532_s*z0_14 x0_6 = sin332_s*z0_9+cos332_s*z0_13 x0_7 = sin32_s*z0_8+cos32_s*z0_12 ! Start of internal SOT(8) -- first set w0_0 = y0_25-y0_31 w0_1 = y0_29-y0_27 w0_2 = y0_25+y0_31 w0_3 = y0_29+y0_27 z0_0 = cos4_s*(y0_26-y0_30) z0_1 = cos4_s*(y0_26+y0_30) z0_2 = y0_24+z0_0 z0_3 = y0_24-z0_0 z0_4 = z0_1+y0_28 z0_5 = z0_1-y0_28 z0_0 = w0_0-tan8*w0_1 z0_1 = tan8*w0_0+w0_1 z0_6 = tan8*w0_2+w0_3 z0_7 = w0_2-tan8*w0_3 z0_8 = z0_2+z0_0 z0_9 = z0_3+z0_1 z0_10 = z0_3-z0_1 z0_11 = z0_2-z0_0 z0_12 = z0_6+z0_4 z0_13 = z0_7+z0_5 z0_14 = z0_7-z0_5 z0_15 = z0_6-z0_4 ! End of internal SOT(8) -- first set x0_24 = sin32_s*z0_8+cos32_s*z0_12 x0_25 = sin332_s*z0_9+cos332_s*z0_13 x0_26 = sin532_s*z0_10+cos532_s*z0_14 x0_27 = sin732_s*z0_11+cos732_s*z0_15 x0_28 = cos732_s*z0_11-sin732_s*z0_15 x0_29 = cos532_s*z0_10-sin532_s*z0_14 x0_30 = cos332_s*z0_9-sin332_s*z0_13 x0_31 = cos32_s*z0_8-sin32_s*z0_12 ! Start of internal OT(32) -- second set ! Need scaling for internal SOT(8) y1_16 = sec8*(x1_33-x1_63) y1_17 = x1_37-x1_59 y1_18 = x1_41-x1_55 y1_19 = x1_45-x1_51 ! Need scaling for internal SOT(8) y1_20 = sec8*(x1_49-x1_47) y1_21 = x1_53-x1_43 y1_22 = x1_57-x1_39 y1_23 = x1_61-x1_35 ! Need scaling for internal SOT(8) y1_24 = sec8*(x1_33+x1_63) y1_25 = x1_37+x1_59 y1_26 = x1_41+x1_55 y1_27 = x1_45+x1_51 ! Need scaling for internal SOT(8) y1_28 = sec8*(x1_49+x1_47) y1_29 = x1_53+x1_43 y1_30 = x1_57+x1_39 y1_31 = x1_61+x1_35 ! Start of internal OT(16) -- second set z1_0 = x1_34-x1_62 z1_1 = x1_42-x1_54 z1_2 = x1_50-x1_46 z1_3 = x1_58-x1_38 z1_4 = x1_34+x1_62 z1_5 = x1_42+x1_54 z1_6 = x1_50+x1_46 z1_7 = x1_58+x1_38 ! Start of internal OT(8) -- second set w1_0 = x1_36-x1_60 w1_1 = x1_52-x1_44 w1_2 = x1_36+x1_60 w1_3 = x1_52+x1_44 z1_8 = cos4*(x1_40-x1_56) z1_9 = cos4*(x1_40+x1_56) z1_10 = x1_32+z1_8 z1_11 = x1_32-z1_8 z1_12 = z1_9+x1_48 z1_13 = z1_9-x1_48 z1_8 = cos8*w1_0-sin8*w1_1 z1_9 = sin8*w1_0+cos8*w1_1 z1_14 = sin8*w1_2+cos8*w1_3 z1_15 = cos8*w1_2-sin8*w1_3 y1_4 = z1_10+z1_8 y1_5 = z1_11+z1_9 y1_6 = z1_11-z1_9 y1_7 = z1_10-z1_8 y1_8 = z1_14+z1_12 y1_9 = z1_15+z1_13 y1_10 = z1_15-z1_13 y1_11 = z1_14-z1_12 ! End of internal OT(8) -- second set w1_4 = cos4*(z1_1-z1_3) w1_5 = cos4*(z1_1+z1_3) w1_0 = z1_0+w1_4 w1_1 = z1_0-w1_4 w1_2 = w1_5+z1_2 w1_3 = w1_5-z1_2 y1_0 = cos16*w1_0-sin16*w1_2 y1_1 = cos316*w1_1-sin316*w1_3 y1_2 = sin316*w1_1+cos316*w1_3 y1_3 = sin16*w1_0+cos16*w1_2 w1_4 = cos4*(z1_5-z1_7) w1_5 = cos4*(z1_5+z1_7) w1_0 = z1_4+w1_4 w1_1 = z1_4-w1_4 w1_2 = w1_5+z1_6 w1_3 = w1_5-z1_6 y1_12 = sin16*w1_0+cos16*w1_2 y1_13 = sin316*w1_1+cos316*w1_3 y1_14 = cos316*w1_1-sin316*w1_3 y1_15 = cos16*w1_0-sin16*w1_2 x1_8 = y1_4+y1_0 x1_9 = y1_5+y1_1 x1_10 = y1_6+y1_2 x1_11 = y1_7+y1_3 x1_12 = y1_7-y1_3 x1_13 = y1_6-y1_2 x1_14 = y1_5-y1_1 x1_15 = y1_4-y1_0 x1_16 = y1_12+y1_8 x1_17 = y1_13+y1_9 x1_18 = y1_14+y1_10 x1_19 = y1_15+y1_11 x1_20 = y1_15-y1_11 x1_21 = y1_14-y1_10 x1_22 = y1_13-y1_9 x1_23 = y1_12-y1_8 ! End of internal OT(16) -- second set ! Start of internal SOT(8) -- second set w1_0 = y1_17-y1_23 w1_1 = y1_21-y1_19 w1_2 = y1_17+y1_23 w1_3 = y1_21+y1_19 z1_0 = cos4_s*(y1_18-y1_22) z1_1 = cos4_s*(y1_18+y1_22) z1_2 = y1_16+z1_0 z1_3 = y1_16-z1_0 z1_4 = z1_1+y1_20 z1_5 = z1_1-y1_20 z1_0 = w1_0-tan8*w1_1 z1_1 = tan8*w1_0+w1_1 z1_6 = tan8*w1_2+w1_3 z1_7 = w1_2-tan8*w1_3 z1_8 = z1_2+z1_0 z1_9 = z1_3+z1_1 z1_10 = z1_3-z1_1 z1_11 = z1_2-z1_0 z1_12 = z1_6+z1_4 z1_13 = z1_7+z1_5 z1_14 = z1_7-z1_5 z1_15 = z1_6-z1_4 ! End of internal SOT(8) -- second set x1_0 = cos32_s*z1_8-sin32_s*z1_12 x1_1 = cos332_s*z1_9-sin332_s*z1_13 x1_2 = cos532_s*z1_10-sin532_s*z1_14 x1_3 = cos732_s*z1_11-sin732_s*z1_15 x1_4 = sin732_s*z1_11+cos732_s*z1_15 x1_5 = sin532_s*z1_10+cos532_s*z1_14 x1_6 = sin332_s*z1_9+cos332_s*z1_13 x1_7 = sin32_s*z1_8+cos32_s*z1_12 ! Start of internal SOT(8) -- second set w1_0 = y1_25-y1_31 w1_1 = y1_29-y1_27 w1_2 = y1_25+y1_31 w1_3 = y1_29+y1_27 z1_0 = cos4_s*(y1_26-y1_30) z1_1 = cos4_s*(y1_26+y1_30) z1_2 = y1_24+z1_0 z1_3 = y1_24-z1_0 z1_4 = z1_1+y1_28 z1_5 = z1_1-y1_28 z1_0 = w1_0-tan8*w1_1 z1_1 = tan8*w1_0+w1_1 z1_6 = tan8*w1_2+w1_3 z1_7 = w1_2-tan8*w1_3 z1_8 = z1_2+z1_0 z1_9 = z1_3+z1_1 z1_10 = z1_3-z1_1 z1_11 = z1_2-z1_0 z1_12 = z1_6+z1_4 z1_13 = z1_7+z1_5 z1_14 = z1_7-z1_5 z1_15 = z1_6-z1_4 ! End of internal SOT(8) -- second set x1_24 = sin32_s*z1_8+cos32_s*z1_12 x1_25 = sin332_s*z1_9+cos332_s*z1_13 x1_26 = sin532_s*z1_10+cos532_s*z1_14 x1_27 = sin732_s*z1_11+cos732_s*z1_15 x1_28 = cos732_s*z1_11-sin732_s*z1_15 x1_29 = cos532_s*z1_10-sin532_s*z1_14 x1_30 = cos332_s*z1_9-sin332_s*z1_13 x1_31 = cos32_s*z1_8-sin32_s*z1_12 y0_0 = x0_8+x0_0 ! gcc(1) y0_1 = x0_9+x0_1 ! gcc(3) y0_2 = x0_10+x0_2 ! gcc(5) y0_3 = x0_11+x0_3 ! gcc(7) y0_4 = x0_12+x0_4 ! gcc(9) y0_5 = x0_13+x0_5 ! gcc(11) y0_6 = x0_14+x0_6 ! gcc(13) y0_7 = x0_15+x0_7 ! gcc(15) y0_8 = x0_15-x0_7 ! gcc(17) y0_9 = x0_14-x0_6 ! gcc(19) y0_10 = x0_13-x0_5 ! gcc(21) y0_11 = x0_12-x0_4 ! gcc(23) y0_12 = x0_11-x0_3 ! gcc(25) y0_13 = x0_10-x0_2 ! gcc(27) y0_14 = x0_9-x0_1 ! gcc(29) y0_15 = x0_8-x0_0 ! gcc(31) y0_16 = x0_24+x0_16 ! gsc(1) y0_17 = x0_25+x0_17 ! gsc(3) y0_18 = x0_26+x0_18 ! gsc(5) y0_19 = x0_27+x0_19 ! gsc(7) y0_20 = x0_28+x0_20 ! gsc(9) y0_21 = x0_29+x0_21 ! gsc(11) y0_22 = x0_30+x0_22 ! gsc(13) y0_23 = x0_31+x0_23 ! gsc(15) y0_24 = x0_31-x0_23 ! gsc(17) y0_25 = x0_30-x0_22 ! gsc(19) y0_26 = x0_29-x0_21 ! gsc(21) y0_27 = x0_28-x0_20 ! gsc(23) y0_28 = x0_27-x0_19 ! gsc(25) y0_29 = x0_26-x0_18 ! gsc(27) y0_30 = x0_25-x0_17 ! gsc(29) y0_31 = x0_24-x0_16 ! gsc(31) ! End of internal OT(32) -- first set y1_0 = x1_8+x1_0 ! gcs(1) y1_1 = x1_9+x1_1 ! gcs(3) y1_2 = x1_10+x1_2 ! gcs(5) y1_3 = x1_11+x1_3 ! gcs(7) y1_4 = x1_12+x1_4 ! gcs(9) y1_5 = x1_13+x1_5 ! gcs(11) y1_6 = x1_14+x1_6 ! gcs(13) y1_7 = x1_15+x1_7 ! gcs(15) y1_8 = x1_15-x1_7 ! gcs(17) y1_9 = x1_14-x1_6 ! gcs(19) y1_10 = x1_13-x1_5 ! gcs(21) y1_11 = x1_12-x1_4 ! gcs(23) y1_12 = x1_11-x1_3 ! gcs(25) y1_13 = x1_10-x1_2 ! gcs(27) y1_14 = x1_9-x1_1 ! gcs(29) y1_15 = x1_8-x1_0 ! gcs(31) y1_16 = x1_24+x1_16 ! gss(1) y1_17 = x1_25+x1_17 ! gss(3) y1_18 = x1_26+x1_18 ! gss(5) y1_19 = x1_27+x1_19 ! gss(7) y1_20 = x1_28+x1_20 ! gss(9) y1_21 = x1_29+x1_21 ! gss(11) y1_22 = x1_30+x1_22 ! gss(13) y1_23 = x1_31+x1_23 ! gss(15) y1_24 = x1_31-x1_23 ! gss(17) y1_25 = x1_30-x1_22 ! gss(19) y1_26 = x1_29-x1_21 ! gss(21) y1_27 = x1_28-x1_20 ! gss(23) y1_28 = x1_27-x1_19 ! gss(25) y1_29 = x1_26-x1_18 ! gss(27) y1_30 = x1_25-x1_17 ! gss(29) y1_31 = x1_24-x1_16 ! gss(31) ! End of internal OT(32) -- first set x(i0+K_old) = y0_0-y1_16 x(i0+3*K_old) = y0_1-y1_17 x(i0+5*K_old) = y0_2-y1_18 x(i0+7*K_old) = y0_3-y1_19 x(i0+9*K_old) = y0_4-y1_20 x(i0+11*K_old) = y0_5-y1_21 x(i0+13*K_old) = y0_6-y1_22 x(i0+15*K_old) = y0_7-y1_23 x(i0+17*K_old) = y0_8-y1_24 x(i0+19*K_old) = y0_9-y1_25 x(i0+21*K_old) = y0_10-y1_26 x(i0+23*K_old) = y0_11-y1_27 x(i0+25*K_old) = y0_12-y1_28 x(i0+27*K_old) = y0_13-y1_29 x(i0+29*K_old) = y0_14-y1_30 x(i0+31*K_old) = y0_15-y1_31 x(i2+62*K_old) = y0_16+y1_0 x(i2+60*K_old) = y0_17+y1_1 x(i2+58*K_old) = y0_18+y1_2 x(i2+56*K_old) = y0_19+y1_3 x(i2+54*K_old) = y0_20+y1_4 x(i2+52*K_old) = y0_21+y1_5 x(i2+50*K_old) = y0_22+y1_6 x(i2+48*K_old) = y0_23+y1_7 x(i2+46*K_old) = y0_24+y1_8 x(i2+44*K_old) = y0_25+y1_9 x(i2+42*K_old) = y0_26+y1_10 x(i2+40*K_old) = y0_27+y1_11 x(i2+38*K_old) = y0_28+y1_12 x(i2+36*K_old) = y0_29+y1_13 x(i2+34*K_old) = y0_30+y1_14 x(i2+32*K_old) = y0_31+y1_15 x(i2) = y0_0+y1_16 x(i2+2*K_old) = y0_1+y1_17 x(i2+4*K_old) = y0_2+y1_18 x(i2+6*K_old) = y0_3+y1_19 x(i2+8*K_old) = y0_4+y1_20 x(i2+10*K_old) = y0_5+y1_21 x(i2+12*K_old) = y0_6+y1_22 x(i2+14*K_old) = y0_7+y1_23 x(i2+16*K_old) = y0_8+y1_24 x(i2+18*K_old) = y0_9+y1_25 x(i2+20*K_old) = y0_10+y1_26 x(i2+22*K_old) = y0_11+y1_27 x(i2+24*K_old) = y0_12+y1_28 x(i2+26*K_old) = y0_13+y1_29 x(i2+28*K_old) = y0_14+y1_30 x(i2+30*K_old) = y0_15+y1_31 x(i0+63*K_old) = y0_16-y1_0 x(i0+61*K_old) = y0_17-y1_1 x(i0+59*K_old) = y0_18-y1_2 x(i0+57*K_old) = y0_19-y1_3 x(i0+55*K_old) = y0_20-y1_4 x(i0+53*K_old) = y0_21-y1_5 x(i0+51*K_old) = y0_22-y1_6 x(i0+49*K_old) = y0_23-y1_7 x(i0+47*K_old) = y0_24-y1_8 x(i0+45*K_old) = y0_25-y1_9 x(i0+43*K_old) = y0_26-y1_10 x(i0+41*K_old) = y0_27-y1_11 x(i0+39*K_old) = y0_28-y1_12 x(i0+37*K_old) = y0_29-y1_13 x(i0+35*K_old) = y0_30-y1_14 x(i0+33*K_old) = y0_31-y1_15 end do end do end subroutine step_64 !................................................................ ! subroutine rf64(x,k,cosines,table) ! Driver subroutine for radix-64 DIT real-half complex DFT on ! 2**k elements with internal scaling. We need to first invoke ! reverse_table and cosine_table_64 to get the tables of constants, ! cosines and table, that will be required. Before that, invoke ! rf64_sizes to get the sizes of these tables. !................................................................ subroutine rf64(x,k,cosines,table) integer, intent(in) :: k ! Order of the problem is 2**k real(wp), intent(inout) :: x(0:2**k-1) ! Vector to be ! transformed real(wp), intent(in) :: cosines(-(2**(k-2)-1):2**(k-1)-1) ! Trig function ! table integer(ik2), intent(in) :: table(0:2**(k/2)-1) ! Bit-reversal ! table integer N_new ! Order of transform yet to be performed integer K_old ! Order of transform completed ! Put inputs in bit-reversed order call bit_reverse(x,k,table) ! Handle non-power of 64 step if necessary if(mod(k,6) == 1) then N_new = 2**(k-1) call step_2(x,N_new) K_old = 2 else if(mod(k,6) == 2) then N_new = 2**(k-2) call step_4(x,N_new) K_old = 4 else if(mod(k,6) == 3) then N_new = 2**(k-3) call step_8(x,N_new) K_old = 8 else if(mod(k,6) == 4) then N_new = 2**(k-4) call step_16(x,N_new) K_old = 16 else if(mod(k,6) == 5) then N_new = 2**(k-5) call step_32(x,N_new) K_old = 32 else N_new = 2**k K_old = 1 end if ! Do power of 64 steps do while(N_new > 1) N_new = N_new/64 call step_64(x,N_new,K_old,cosines) K_old = K_old*64 end do ! Put sines in normal order call reverse_sins(x,k) end subroutine rf64 !................................................................ !................................................................ ! subroutine rf64_sizes(k, cosine_lbound, cosine_ubound, bitrev_size) ! Compute sizes of data tables required by rf64 !................................................................ subroutine rf64_sizes(k, cosine_lbound, cosine_ubound, bitrev_size) integer, intent(in) :: k ! Order of the problem = 2**k integer, intent(out) :: cosine_lbound ! Lower bound of cosine table integer, intent(out) :: cosine_ubound ! Upper bound of cosine table integer, intent(out) :: bitrev_size ! Size of bit-reversal ! table if(k < 0) then write(*,'(a)') ' Error: negative exponent in rf64_sizes' stop end if cosine_lbound = -(2**(k-2)-1) cosine_ubound = 2**(k-1)-1 bitrev_size = 2**(k/2) end subroutine rf64_sizes !................................................................ end module rf64_routines ! *************************************************************** ! *************************************************************** ! program test16 ! Test program for real-half complex DFT subroutines. Shows how ! to invoke them and enables the user to test them for speed and ! correctness or for accuracy. ! *************************************************************** program test16 use mykinds, wp => dp ! Set working precision to double precision use rf16_routines ! For radix-16 DIT real-half complex DFT use rfs16_routines ! For radix-16 DIT real-half complex DFT (scaled) use rf64_routines ! For radix-64 DIT real-half complex DFT use auxiliary_routines ! Bit-reversal routines needed by all implicit none integer k ! Size of the problem = 2**k integer(ik2), allocatable :: bit_rev(:) ! Helper bit-reversal table real(wp), allocatable :: cosines(:) ! Cosine table needed by all real(wp), allocatable :: cosines_s(:) ! Scaled cosine table needed by rfs16 integer cosine_lbound, cosine_ubound, bitrev_size ! Sizes of helper tables real(wp), allocatable :: x0(:), x1(:) ! Input and output vectors integer i ! Elementary vector index or current iteration number integer j ! Index of element in current subvector real(wp) current_max ! Maximum absolute error in any element for ! current elementary vector real(wp) global_max ! Maximum absolute error in any element for ! all elementary vectors real(wp) pi ! 3.1415926535897932384626433832795 integer Ak, Mk ! Number of floating point additions and ! mutiplications for current algorithm integer choice ! User's choice (input) ! Get the size of the problem from the user. write(*,'(a)',advance='no') ' Enter the value of k:> ' read(*,*) k pi = 4*atan(1.0_wp) ! Propmpt user for which test to run write(*,'(a)') ' You have the following choices:' write(*,'(a)') ' 1) Test rf16 for correctness' write(*,'(a)') ' 2) Test rfs16 for correctness' write(*,'(a)') ' 3) Test rf64 for correctness' write(*,'(a)',advance='no') ' Enter your choice here:> ' read(*,*) choice ! Run selected test select case(choice) !------------------------------------------------------------------------- case(1) ! Test rf16 for correctness ! Calculate number of additions and multiplications required Ak = nint((522.0_wp*k*2**k+1280+2**k*(-777+6*cos(2*pi*modulo(k,4)/4)- & 6*sin(2*pi*modulo(k,4)/4)-5*(-1)**k))/384) Mk = nint((210.0_wp*k*2**k+896+2**k*(-655-6*cos(2*pi*modulo(k,4)/4)+ & 2*sin(2*pi*modulo(k,4)/4)+5*(-1)**k))/320) ! First we get sizes of the trig table and helper bit-reversal ! table needed for order 2**k problem call rf16_sizes(k, cosine_lbound, cosine_ubound, bitrev_size) allocate(bit_rev(bitrev_size)) ! Create helper bit-reversal table call reverse_table(bit_rev,k) allocate(cosines(cosine_lbound:cosine_ubound)) ! Create trig table call cos_table(cosines,k) allocate(x0(0:2**k-1), x1(0:2**k-1)) global_max = 0 write(*,'(a)') ' Accuracy results for radix-16'// & ' real-half complex DFT' write(*,'(1x,a9,1x,a13,2(1x,a9))') 'k','max error','adds','muls' ! Run test over all elementary vectors do i = 0, 2**k-1 ! Analytic result x0 = (/(cos(2*pi*modulo(i*j,2**k)/2**k),j=0,2**(k-1)), & (sin(2*pi*modulo(i*j,2**k)/2**k),j=1,2**(k-1)-1)/) x1 = 0 x1(i) = 1 ! Invoke rf16 to convert x1 into its real-half complex DFT call rf16(x1,k,cosines,bit_rev) current_max = maxval(abs(x1-x0)) write(*,'(1x,i9,1x,es13.6,2(1x,i9))') i, current_max, Ak, Mk global_max = max(global_max, current_max) end do write(*,'(1x,a,1x,es13.6)') 'Maximum overall error = ',global_max !------------------------------------------------------------------------- case(2) ! Test rfs16 for correctness ! Calculate number of additions and multiplications required Ak = nint((522.0_wp*k*2**k+1280+2**k*(-777+6*cos(2*pi*modulo(k,4)/4)- & 6*sin(2*pi*modulo(k,4)/4)-5*(-1)**k))/384) if(k <= 3) then Mk = nint((210.0_wp*k*2**k+896+2**k*(-655-6*cos(2*pi*modulo(k,4)/4)+ & 2*sin(2*pi*modulo(k,4)/4)+5*(-1)**k))/320) else Mk = nint((3210.0_wp*k*2**k+14336+2**k*(-9655-246*cos(2*pi*modulo(k,4)/4)- & 118*sin(2*pi*modulo(k,4)/4)+5*(-1)**k))/5120) end if ! First we get sizes of the trig table and helper bit-reversal ! table needed for order 2**k problem call rfs16_sizes(k, cosine_lbound, cosine_ubound, bitrev_size) allocate(bit_rev(bitrev_size)) ! Create helper bit-reversal table call reverse_table(bit_rev,k) allocate(cosines(cosine_lbound:cosine_ubound)) allocate(cosines_s(cosine_lbound:cosine_ubound)) ! Create trig table call cos_table_s(cosines,cosines_s,k) allocate(x0(0:2**k-1), x1(0:2**k-1)) global_max = 0 write(*,'(a)') ' Accuracy results for radix-16'// & ' real-half complex DFT with intermediate scaling' write(*,'(1x,a9,1x,a13,2(1x,a9))') 'k','max error','adds','muls' ! Run test over all elementary vectors do i = 0, 2**k-1 ! Analytic result x0 = (/(cos(2*pi*modulo(i*j,2**k)/2**k),j=0,2**(k-1)), & (sin(2*pi*modulo(i*j,2**k)/2**k),j=1,2**(k-1)-1)/) x1 = 0 x1(i) = 1 ! Invoke rfs16 to convert x1 into its real-half complex DFT call rfs16(x1,k,cosines,cosines_s,bit_rev) current_max = maxval(abs(x1-x0)) write(*,'(1x,i9,1x,es13.6,2(1x,i9))') i, current_max, Ak, Mk global_max = max(global_max, current_max) end do write(*,'(1x,a,1x,es13.6)') 'Maximum overall error = ',global_max !------------------------------------------------------------------------- case(3) ! Test rf64 for correctness ! Calculate number of additions and multiplications required Ak = nint((1038.0_wp*k*2**k+2560+2**k*(-1537-6*cos(2*pi*modulo(k,6)/6)+ & 10*cos(2*pi*modulo(2*k,6)/6)-7*(-1)**k- & 10*sqrt(3.0_wp)*sin(2*pi*modulo(k,6)/6)+ & 2*sqrt(3.0_wp)*sin(2*pi*modulo(2*k,6)/6)))/768) Mk = nint((1722.0_wp*k*2**k+7424+2**k*(-5369-14*cos(2*pi*modulo(k,6)/6)- & 46*cos(2*pi*modulo(2*k,6)/6)+21*(-1)**k- & 14*sqrt(3.0_wp)*sin(2*pi*modulo(k,6)/6)- & 26*sqrt(3.0_wp)*sin(2*pi*modulo(2*k,6)/6)))/2688) ! First we get sizes of the trig table and helper bit-reversal ! table needed for order 2**k problem call rf64_sizes(k, cosine_lbound, cosine_ubound, bitrev_size) allocate(bit_rev(bitrev_size)) ! Create helper bit-reversal table call reverse_table(bit_rev,k) allocate(cosines(cosine_lbound:cosine_ubound)) ! Create trig table call cos_table_64(cosines,k) allocate(x0(0:2**k-1), x1(0:2**k-1)) global_max = 0 write(*,'(a)') ' Accuracy results for radix-64'// & ' real-half complex DFT with internal scaling' write(*,'(1x,a9,1x,a13,2(1x,a9))') 'k','max error','adds','muls' ! Run test over all elementary vectors do i = 0, 2**k-1 ! Analytic result x0 = (/(cos(2*pi*modulo(i*j,2**k)/2**k),j=0,2**(k-1)), & (sin(2*pi*modulo(i*j,2**k)/2**k),j=1,2**(k-1)-1)/) x1 = 0 x1(i) = 1 ! Invoke rf64 to convert x1 into its real-half complex DFT call rf64(x1,k,cosines,bit_rev) current_max = maxval(abs(x1-x0)) write(*,'(1x,i9,1x,es13.6,2(1x,i9))') i, current_max, Ak, Mk global_max = max(global_max, current_max) end do write(*,'(1x,a,1x,es13.6)') 'Maximum overall error = ',global_max !------------------------------------------------------------------------- case default ! Invalid choice catchall write(*,'(a,i0)') ' Invalid choice: ', choice stop !------------------------------------------------------------------------- end select end program test16 ! ***************************************************************