[Search for users] [Overall Top Noters] [List of all Conferences] [Download this site]

Conference nicctr::dxml

Title:Digital Extended Math Library
Notice:Kit locations: 9.last (UNIX), 10.last (VMS)
Moderator:RTL::CHAOFGREN
Created:Mon Apr 30 1990
Last Modified:Tue Jun 03 1997
Last Successful Update:Fri Jun 06 1997
Number of topics:324
Total number of notes:1402

318.0. "xposted from FORTRAn : on FFT's" by RTOMS::PARETIJ () Mon Mar 31 1997 12:58

      The following code does some sort of fft; there
      are 2 routines fft_ce and FFT_se calling cosft1 (sinft1) and
      realft colling four1. There are no comments, which strains
      my imagination. I am again looking for a way to use dxml.
      May be putting the code out to the forum will generate some
      questions I could refer to the customer.


	subroutine fft_ce(A,Re_,Im_,nx,ny,isign)
	implicit none
	integer nx,ny,isign
	real*8 A(0:nx,0:ny-1),Re_(0:nx,0:ny/2),Im_(0:nx,0:ny/2)
	integer i,j
	real*8 vec_x(0:nx), vec_y(0:ny-1)

	if(isign.eq.1)then

	do j=0,ny-1
	  do i=0,nx
	    vec_x(i)=A(i,j)
	  enddo
	  call cosft1(vec_x,nx)
	  A(0,j)=vec_x(0)/nx
	  do i=1,nx-1
	    A(i,j)=vec_x(i)*2/nx
	  enddo
	  A(nx,j)=vec_x(nx)/nx
	enddo
	do i=0,nx
	   do j=0,ny-1
	    vec_y(j)=A(i,j)
	   enddo
	   call realft(vec_y,ny,isign)
	   Re_(i,0)=vec_y(0)/ny
	   Im_(i,0)=0
	   do j=1,ny/2-1
	      Re_(i,j)=vec_y(2*j)/ny
	      Im_(i,j)=-vec_y(2*j+1)/ny
	   enddo
       Re_(i,ny/2)=0.5*vec_y(1)/ny
	   Im_(i,ny/2)=0
	enddo

	else

	do i=0,nx
		vec_y(0)=2*Re_(i,0)
		vec_y(1)=4*Re_(i,ny/2)
		do j=1,ny/2-1
			vec_y(2*j)=2*Re_(i,j)
			vec_y(2*j+1)=-2*Im_(i,j)
		enddo
		call realft(vec_y,ny,isign)
	    do j=0,ny-1
 			A(i,j)=vec_y(j)
	    enddo
	enddo
	do j=0,ny-1
	  vec_x(0)=2*A(0,j)
	  do i=1,nx-1
	    vec_x(i)=A(i,j)
	  enddo
	  vec_x(nx)=2*A(nx,j)
	  call cosft1(vec_x,nx)
	  do i=0,nx
	    A(i,j)=vec_x(i)
	  enddo
	enddo

	endif

	end
	
	
	
	subroutine fft_se(A,Re_,Im_,nx,ny,isign)
	implicit none
	integer nx,ny,isign
	real*8 A(0:nx,0:ny-1),Re_(0:nx,0:ny/2),Im_(0:nx,0:ny/2)
	integer i,j
	real*8 vec_x(0:nx-1), vec_y(0:ny-1)

	if(isign.eq.1)then

	do j=0,ny-1
	  vec_x(0)=0
	  do i=1,nx-1
	    vec_x(i)=A(i,j)
	  enddo
	  call sinft(vec_x,nx)
	  do i=0,nx-1
	    A(i,j)=vec_x(i)*2/nx
	  enddo
	enddo
	do i=1,nx-1
	   do j=0,ny-1
	    vec_y(j)=A(i,j)
	   enddo
	   call realft(vec_y,ny,isign)
	   Re_(i,0)=vec_y(0)/ny
	   Im_(i,0)=0
	   do j=1,ny/2-1
	      Re_(i,j)=vec_y(2*j)/ny
	      Im_(i,j)=-vec_y(2*j+1)/ny
	   enddo
       Re_(i,ny/2)=0.5*vec_y(1)/ny
	   Im_(i,ny/2)=0
	enddo
	do j=0,ny/2
		Re_(0,j)=0
		Im_(0,j)=0
		Re_(nx,j)=0
		Im_(nx,j)=0
	enddo

	else

	do i=1,nx-1
		vec_y(0)=2*Re_(i,0)
		vec_y(1)=4*Re_(i,ny/2)
		do j=1,ny/2-1
			vec_y(2*j)=2*Re_(i,j)
			vec_y(2*j+1)=-2*Im_(i,j)
		enddo
		call realft(vec_y,ny,isign)
	    do j=0,ny-1
 			A(i,j)=vec_y(j)
	    enddo
	enddo
	do j=0,ny-1
	  vec_x(0)=0
	  do i=1,nx-1
	    vec_x(i)=A(i,j)
	  enddo
	  call sinft(vec_x,nx)
	  do i=0,nx-1
	    A(i,j)=vec_x(i)
	  enddo
	enddo
	do j=0,ny-1
		A(nx,j)=0
	enddo

	endif

	end
	
	
      SUBROUTINE four1(data,nn,isign)
      INTEGER isign,nn
      real*8 data(2*nn)
      INTEGER i,istep,j,m,mmax,n
      real*8 tempi,tempr
      DOUBLE PRECISION theta,wi,wpi,wpr,wr,wtemp
      n=2*nn
      j=1
      do 11 i=1,n,2
        if(j.gt.i)then
          tempr=data(j)
          tempi=data(j+1)
          data(j)=data(i)
          data(j+1)=data(i+1)
          data(i)=tempr
          data(i+1)=tempi
        endif
        m=n/2
1       if ((m.ge.2).and.(j.gt.m)) then
          j=j-m
          m=m/2
        goto 1
        endif
        j=j+m
11    continue
      mmax=2
2     if (n.gt.mmax) then
        istep=2*mmax
        theta=6.28318530717959d0/(isign*mmax)
        wpr=-2.d0*dsin(0.5d0*theta)**2
        wpi=dsin(theta)
        wr=1.d0
        wi=0.d0
        do 13 m=1,mmax,2
          do 12 i=m,n,istep
            j=i+mmax
            tempr=(wr)*data(j)-(wi)*data(j+1)
            tempi=(wr)*data(j+1)+(wi)*data(j)
            data(j)=data(i)-tempr
            data(j+1)=data(i+1)-tempi
            data(i)=data(i)+tempr
            data(i+1)=data(i+1)+tempi
12        continue
          wtemp=wr
          wr=wr*wpr-wi*wpi+wr
          wi=wi*wpr+wtemp*wpi+wi
13      continue
        mmax=istep
      goto 2
      endif
      return
      END
      
      
	SUBROUTINE realft(data,n,isign)
      INTEGER isign,n
      real*8 data(n)
CU    USES four1
      INTEGER i,i1,i2,i3,i4,n2p3
      real*8 c1,c2,h1i,h1r,h2i,h2r,wis,wrs
      DOUBLE PRECISION theta,wi,wpi,wpr,wr,wtemp
      theta=3.141592653589793d0/dble(n/2)
      c1=0.5
      if (isign.eq.1) then
        c2=-0.5
        call four1(data,n/2,+1)
      else
        c2=0.5
        theta=-theta
      endif
      wpr=-2.0d0*dsin(0.5d0*theta)**2
      wpi=dsin(theta)
      wr=1.0d0+wpr
      wi=wpi
      n2p3=n+3
      do 11 i=2,n/4
        i1=2*i-1
        i2=i1+1
        i3=n2p3-i2
        i4=i3+1
        wrs=(wr)
        wis=(wi)
        h1r=c1*(data(i1)+data(i3))
        h1i=c1*(data(i2)-data(i4))
        h2r=-c2*(data(i2)+data(i4))
        h2i=c2*(data(i1)-data(i3))
        data(i1)=h1r+wrs*h2r-wis*h2i
        data(i2)=h1i+wrs*h2i+wis*h2r
        data(i3)=h1r-wrs*h2r+wis*h2i
        data(i4)=-h1i+wrs*h2i+wis*h2r
        wtemp=wr
        wr=wr*wpr-wi*wpi+wr
        wi=wi*wpr+wtemp*wpi+wi
11    continue
      if (isign.eq.1) then
        h1r=data(1)
        data(1)=h1r+data(2)
        data(2)=h1r-data(2)
      else
        h1r=data(1)
        data(1)=c1*(h1r+data(2))
        data(2)=c1*(h1r-data(2))
        call four1(data,n/2,-1)
      endif
      return
      END


	SUBROUTINE sinft(y,n)
      INTEGER n
      real*8 y(n)
CU    USES realft
      INTEGER j
      real*8 sum,y1,y2
      DOUBLE PRECISION theta,wi,wpi,wpr,wr,wtemp
      theta=3.141592653589793d0/dble(n)
      wr=1.0d0
      wi=0.0d0
      wpr=-2.0d0*dsin(0.5d0*theta)**2
      wpi=dsin(theta)
      y(1)=0.0
      do 11 j=1,n/2
        wtemp=wr
        wr=wr*wpr-wi*wpi+wr
        wi=wi*wpr+wtemp*wpi+wi
        y1=wi*(y(j+1)+y(n-j+1))
        y2=0.5*(y(j+1)-y(n-j+1))
        y(j+1)=y1+y2
        y(n-j+1)=y1-y2
11    continue
      call realft(y,n,+1)
      sum=0.0
      y(1)=0.5*y(1)
      y(2)=0.0
      do 12 j=1,n-1,2
        sum=sum+y(j)
        y(j)=y(j+1)
        y(j+1)=sum
12    continue
      return
      END

     
	SUBROUTINE cosft1(y,n)
      INTEGER n
      real*8 y(n+1)
CU    USES realft
      INTEGER j
      real*8 sum,y1,y2
      DOUBLE PRECISION theta,wi,wpi,wpr,wr,wtemp
      theta=3.141592653589793d0/n
      wr=1.0d0
      wi=0.0d0
      wpr=-2.0d0*dsin(0.5d0*theta)**2
      wpi=dsin(theta)
      sum=0.5*(y(1)-y(n+1))
      y(1)=0.5*(y(1)+y(n+1))
      do 11 j=1,n/2-1
        wtemp=wr
        wr=wr*wpr-wi*wpi+wr
        wi=wi*wpr+wtemp*wpi+wi
        y1=0.5*(y(j+1)+y(n-j+1))
        y2=(y(j+1)-y(n-j+1))
        y(j+1)=y1-wi*y2
        y(n-j+1)=y1+wi*y2
        sum=sum+wr*y2
11    continue
      call realft(y,n,+1)
      y(n+1)=y(2)
      y(2)=sum
      do 12 j=4,n,2
        sum=sum+y(j)
        y(j)=sum
12    continue
      return
      END
T.RTitleUserPersonal
Name
DateLines
318.1HPCGRP::MANLEYMon Mar 31 1997 16:2118
>       The following code does some sort of fft; there
>       are 2 routines fft_ce and FFT_se calling cosft1 (sinft1) and
>       realft colling four1. There are no comments, which strains
>       my imagination. I am again looking for a way to use dxml.
>       May be putting the code out to the forum will generate some
>       questions I could refer to the customer.

	The routines cosft1, sinft, realft, and four1, have been copied
	exactly from Numerical Recipes (see pages 501-513 of the second
	edition for Fortran).

	DXML roiutines DFCT, DFST, and DFFT may be substituted for cosft1,
	sinft, and realft. Although fairly straightforward, the substitution
	is not plug and play. I'll make the changes and post them here.

	I assume you only need fft_ce and fft_se???

318.2RTOMS::PARETIJTue Apr 01 1997 10:3052
>I assume you only need fft_ce and fft_se???
Yes. But since I don't really know what they do 
I wrote a test program with reduced dimensions 
to compute only elements 0-1-2 of the arrays
by feeding in the elements 0-1-2 of the input array with their
values at some point in the customer's program
and found DIFFERENT output:

         real  *8  omega(0:2,0:2)
         real  *8 R_omega(0:2,0:2)
         real  *8 I_omega(0:2,0:2)
c
          omega(1,0)=0.234867902391953
          omega(1,1)=36.9781659782871
          omega(1,2)=154.890240058231

          omega(2,0)=0.385906851779590
          omega(2,1)=68.8872388030323
          omega(2,2)=287.391883911758

c
         nx=2
         ny=2
c

      write(*,*)' joe - debug before call - omega '
      do ijoe=0,2
      write(*,*)(omega(ijoe,jjoe),jjoe=0,2)
      end do
      write(*,*)' joe - debug R_omega '
      do ijoe=0,2
      write(*,*)(R_omega(ijoe,jjoe),jjoe=0,2)
      end do
      write(*,*)' joe - debug I_omega '
      do ijoe=0,2
      write(*,*)(I_omega(ijoe,jjoe),jjoe=0,2)
      end do
      call fft_se(omega,R_omega,I_omega,ny,nx,1)
      write(*,*)' joe - debug after fft_se call - omega '
            do ijoe=0,2
      write(*,*)(omega(ijoe,jjoe),jjoe=0,2)
            end do
      write(*,*)' joe - debug R_omega '
      do ijoe=0,2
      write(*,*)(R_omega(ijoe,jjoe),jjoe=0,2)
      end do
      write(*,*)' joe - debug I_omega '
      do ijoe=0,2
      write(*,*)(I_omega(ijoe,jjoe),jjoe=0,2)
      end do
 7215          continue
        end
318.3HPCGRP::MANLEYWed Apr 02 1997 14:1512
Joseph,

For a "+" sign, the columns of "A" are overwritten by output from the cosine
(fft_ce) or sine (fft_se) transforms, but not by the fft that's then done
across the rows of A. Is it possible to perform both cosine (sine) transform
and fft in-place, instead of copying data to Re_ and Im_? (Note: Re_ and Im_
may be defined to overlap A in an interleaved way, thus minimizing the impact
on the rest of the code.)

	- Dwight -

318.4Some code ...HPCGRP::MANLEYThu Apr 03 1997 16:55210
Joseph,

Please try the code below. However, before you do, re-size the A, Re_, and Im_
matrices to match what the new routines expect (a few extra columns and/or 
rows are required. I have boldly assumed the A matrix may be treated as both
input and scratch for forward transforms, and scratch and output for inverse
transforms. If that's not true, this code will not work. Also, if you can,
define Re_ and Im_ to overlap A, and get rid of the copy operations altogether.

The new code is not optimal. Three step FFTs should be used, but are not. You
didn't share much of the application with us ;-), so I don't know how three
step FFTs can best be integrated with rest of the application.

Best Regards,

	- Dwight -

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

	subroutine fft_ce(A,Re_,Im_,nx,ny,isign)
	implicit none
	integer nx,ny,isign
	real*8 A(0:nx+1,0:ny+1),Re_(0:nx+1,0:ny/2),Im_(0:nx+1,0:ny/2)
	integer i,j

	if(isign.eq.1)then

	do j=0,ny-1
	   call dcosft( A(0,j),nx)
	   A(0,j ) = A(0,j)*0.5
	   A(nx,j) = A(nx,j)*0.5
	enddo
	call dfft_grp( 'r','c','f',A,A,ny,nx+1,nx+2,1,1 )
	do i=0,nx
	   Re_(i,0)=A(i,0)*(2.0/(nx*ny))
	   Im_(i,0)=0.0
	enddo
	do j=1,ny/2-1
	   do i=0,nx
	      Re_(i,j)=A(i,2*j+0)*(2.0/(nx*ny))
	      Im_(i,j)=A(i,2*j+1)*(2.0/(nx*ny))
	   enddo
	enddo
	do i=0,nx
           Re_(i,ny/2)=A(i,ny)*(1.0/(nx*ny))
	   Im_(i,ny/2)=0.0
	enddo

	else

	do i=0,nx
	   A(i,0)=ny*Re_(i,0)
           A(i,1)=0.0
	   do j=1,ny/2-1
	      A(i,2*j+0)=ny*Re_(i,j)
	      A(i,2*j+1)=ny*Im_(i,j)
	   enddo
	   A(i,ny+0)=(2.0*ny)*Re_(i,ny/2)
           A(i,ny+1)=0.0
	enddo
        call dfft_grp( 'c','r','b',A,A,ny,nx+1,nx+2,1,1 )
	do j=0,ny-1
	   A(0,j )=2.0*A(0,j )
	   A(nx,j)=2.0*A(nx,j)
	   call dcosft( A(0,j),nx )
	enddo

	endif

	end



	subroutine fft_se(A,Re_,Im_,nx,ny,isign)
	implicit none
	integer nx,ny,isign
	real*8 A(0:nx+1,0:ny+1),Re_(0:nx+1,0:ny/2),Im_(0:nx+1,0:ny/2)
	integer i,j
	real*8 vec_x(0:nx+1), vec_y(0:ny+1)

	if(isign.eq.1)then

	do j=0,ny-1
	  A(0,j) = 0.0
	  call dsinft( A(0,j),nx)
	enddo
        call dfft_grp( 'r','c','f',A(1,0),A(1,0),ny,nx-1,nx+2,1,1 )
        Re_(0,0)=0.0
        Im_(0,0)=0.0
	do i=1,nx-1
	   Re_(i,0)=A(i,0)*(2.0/(nx*ny))
	   Im_(i,0)=0.0
	enddo
	Re_(nx,0)=0.0
	Im_(nx,0)=0.0
	do j=1,ny/2-1
           Re_(0,j)=0.0
           Im_(0,j)=0.0
	   do i=1,nx-1
	      Re_(i,j)=A(i,2*j+0)*(2.0/(nx*ny))
	      Im_(i,j)=A(i,2*j+1)*(2.0/(nx*ny))
	   enddo
           Re_(nx,j)=0.0
	   Im_(nx,j)=0.0
	enddo
        Re_(0,ny/2)=0.0
        Im_(0,ny/2)=0.0
	do i=1,nx-1
           Re_(i,ny/2)=A(i,ny)*(1.0/(nx*ny))
	   Im_(i,ny/2)=0.0
	enddo
	Re_(nx,ny/2)=0.0
	Im_(nx,ny/2)=0.0

	else

	do i=1,nx-1
	   A(i,0)=ny*Re_(i,0)
           A(i,1)=0.0
	enddo
	do j=1,ny/2-1
	   do i=1,nx-1
	      A(i,2*j+0)=ny*Re_(i,j)
	      A(i,2*j+1)=ny*Im_(i,j)
	   enddo
	enddo
	do i=1,nx-1
	   A(i,ny+0)=2.0*ny*Re_(i,ny/2)
           A(i,ny+1)=0.0
	enddo
        call dfft_grp( 'c','r','b',A(1,0),A(1,0),ny,nx-1,nx+2,1,1 )
	do j=0,ny-1
	   A(0,j )=0.0
	   call dsinft( A(0,j),nx )
	   A(nx,j)=0.0
	enddo

	endif

	end

	

      SUBROUTINE dsinft(y,n)
      INTEGER n
      real*8 y(n)
      INTEGER j
      real*8 sum,y1,y2
      DOUBLE PRECISION theta,wi,wpi,wpr,wr,wtemp
      theta=3.141592653589793d0/dble(n)
      wr=1.0d0
      wi=0.0d0
      wpr=-2.0d0*dsin(0.5d0*theta)**2
      wpi=dsin(theta)
      y(1)=0.0
      do j=1,n/2
        wtemp=wr
        wr=wr*wpr-wi*wpi+wr
        wi=wi*wpr+wtemp*wpi+wi
        y1=wi*(y(j+1)+y(n-j+1))
        y2=0.5*(y(j+1)-y(n-j+1))
        y(j+1)=y1+y2
        y(n-j+1)=y1-y2
      enddo
      call dfft( 'r','c','f',y,y,n,1 )
      sum=0.0
      y(1)=0.5*y(1)
      do j=1,n-1,2
        sum=sum+y(j)
        y(j)=-y(j+1)
        y(j+1)=sum
      enddo
      return
      END


     
      SUBROUTINE dcosft( y,n )
      INTEGER n
      real*8 y(n+1)
      INTEGER j
      real*8 sum,y1,y2
      DOUBLE PRECISION theta,wi,wpi,wpr,wr,wtemp
      theta=3.141592653589793d0/n
      wr=1.0d0
      wi=0.0d0
      wpr=-2.0d0*dsin(0.5d0*theta)**2
      wpi=dsin(theta)
      sum=0.5*(y(1)-y(n+1))
      y(1)=0.5*(y(1)+y(n+1))
      do j=1,n/2-1
        wtemp=wr
        wr=wr*wpr-wi*wpi+wr
        wi=wi*wpr+wtemp*wpi+wi
        y1=0.5*(y(j+1)+y(n-j+1))
        y2=(y(j+1)-y(n-j+1))
        y(j+1)=y1-wi*y2
        y(n-j+1)=y1+wi*y2
        sum=sum+wr*y2
      enddo
      call dfft( 'r','c','f',y,y,n,1 )
      y(2)=sum
      do j=4,n,2
         sum  = sum - y(j)
         y(j) = sum
      enddo
      return
      END

318.5thanks Dwight !RTOMS::PARETIJSun Apr 06 1997 07:11284
Hi Dwight,
thank you so much for your code using DXML. Here's the
main, using the fourier routines, as well as the include file
with modified dimensions to use your code.
The results are correct. The performance of this new code
is somehow worse than in the original one (like 120 vs. 150 "" )
However I 've not attempted yet the matrices overlap you suggested
Also, could you please post the complete reference to "Numerical REcipes"

Thanks again,
-Joseph

      program NSE_2D

      implicit none
      include 'NSE_2D.inc'

        real*8 FLD(1:DiffDim)
        real*8 T0,T1,alx,tstep

        integer nstep
        integer error
        integer loop
        integer loop1
		  integer i, j
        real*8 t_start, elapsed
        real*8 auxiliary(0:nx,0:ny)

         external  RHS_0

	t_start = secnds(0.0)

         OPEN(Unit=7,FILE='NSE_2D.dat')
         read(7,*)nstep
         read(7,*)FR
         read(7,*)alx
         read(7,*)tstep
         close(unit=7)

	Lx = 2.*pi
	Ly = 2.*pi
        

        open(Unit=7,FILE='state.dat')
        do loop = 1, Diffdim
			read(7,*) fld(loop)
		  end do
        close(unit=7)
        
        call InitializeRungeKutta
        open(Unit=8,FILE='NSE_2D.sim',status='UNKNOWN')

C------ main loop -----------------------------------------------

        do loop=1,nstep
        
                T1=T0+tstep
                write(*,*) 'integration time = ',T1
                CALL  RungeKutta(T0,T1,DiffDim,FLD,RHS_0)
					 write (8,100) (fld(i+63), i = 1, 10)
                T0=T1
        enddo

 100	format (10F12.6)
        close (Unit=8)

C---------------------------------------------------------------
        
        open(Unit = 7, FILE = 'state.dat')
        do loop = 1, Diffdim
        	write(7,*) fld(loop)
        end do
        close (Unit=7)

        do loop=0,ny
           do loop1=0,nx-1
              auxiliary(loop1,loop)=psi(loop,loop1)
           enddo
           auxiliary(nx,loop)=psi(loop,0)
        enddo
       
        open(Unit=7,FILE='stream.dat')
        do i = 0, ny
        	do j = 0, nx-1
			write(7,*) auxiliary(i,j)
	   	end do
		end do
        close(unit=7)
        
	elapsed = secnds(t_start)
	write(*,*)
	write(*,*) 'elapsed time(seconds): ', elapsed   
         
      STOP
      end


      subroutine RHS_0(neq,t0,fld,f_fld)


      implicit none
      include 'NSE_2D.inc'
      integer neq
      real*8 t0
      real*8 fld(1:neq),f_fld(1:neq)
      integer num,loop,loop1 
      do loop=0,ny  
         do loop1=0,nx/2
            R_omega(loop,loop1)=0.0
            I_omega(loop,loop1)=0.0
         enddo
      enddo
      num=0
      loop1=0
      do loop=1,ny-1
         R_omega(loop,loop1)=fld(num+1)
         num=num+1
      enddo

      do loop1=1,nx/2
         do loop=1,ny-1
            R_omega(loop,loop1)=fld(num+1)
            I_omega(loop,loop1)=fld(num+2)
            if(loop1.eq.0) I_omega(loop,loop1)=0.0
            num=num+2
         enddo
      enddo
      call rhs
      num=0
      loop1=0
      do loop=1,ny-1
         f_fld(num+1)=F_R_omega(loop,loop1)
         num=num+1
      enddo
      do loop1=1,nx/2
         do loop=1,ny-1
            f_fld(num+1)=F_R_omega(loop,loop1)
            f_fld(num+2)=F_I_omega(loop,loop1)
            num=num+2
         enddo
      enddo
      return
      end



      subroutine rhs

      implicit none
      include 'NSE_2D.inc'
      
      integer loop,loop1
      real*8 tpi_Lx,pi_Ly
      real*8 kx,ky,ksq
      real*8 v_1_x_omega(0:ny+1,0:nx+1)
      real*8 v_2_x_omega(0:ny+1,0:nx+1)
      real*8 R_v_1_x_omega(0:ny+1,0:nx/2)
      real*8 I_v_1_x_omega(0:ny+1,0:nx/2)
      real*8 R_v_2_x_omega(0:ny+1,0:nx/2)
      real*8 I_v_2_x_omega(0:ny+1,0:nx/2)
      
      tpi_Lx=2.0*pi/Lx
      pi_Ly=pi/Ly


      do loop1=0,nx/2
         do loop=0,ny
            ky=pi_Ly*loop
            kx=tpi_Lx*loop1
            ksq=kx*kx+ky*ky
            if(loop.eq.0.and.loop1.eq.0) then
               R_v_1(loop,loop1)=0.0
               I_v_1(loop,loop1)=0.0
               R_v_2(loop,loop1)=0.0
               I_v_2(loop,loop1)=0.0
            else 
               R_v_1(loop,loop1)=ky*R_omega(loop,loop1)/ksq
               I_v_1(loop,loop1)=ky*I_omega(loop,loop1)/ksq
               R_v_2(loop,loop1)=kx*I_omega(loop,loop1)/ksq
               I_v_2(loop,loop1)=-kx*R_omega(loop,loop1)/ksq
            endif
         enddo
      enddo

      call fft_ce(v_1,R_v_1,I_v_1,ny,nx,-1)
      call fft_se(v_2,R_v_2,I_v_2,ny,nx,-1)
      call fft_se(omega,R_omega,I_omega,ny,nx,-1)

      do loop1=0,nx-1
         do loop=0,ny
            v_1_x_omega(loop,loop1)=v_1(loop,loop1)*omega(loop,loop1)
            v_2_x_omega(loop,loop1)=v_2(loop,loop1)*omega(loop,loop1)
         enddo
      enddo

      call fft_se(v_1_x_omega,R_v_1_x_omega,I_v_1_x_omega,ny,nx,1)
      call fft_ce(v_2_x_omega,R_v_2_x_omega,I_v_2_x_omega,ny,nx,1)

      do loop1=0,nx/2
         kx=tpi_Lx*loop1
         do loop=1,ny-1
         ky=pi_Ly*loop
            ksq=kx*kx+ky*ky
            F_R_omega(loop,loop1)=-ksq*R_omega(loop,loop1)+
     &           kx*I_v_1_x_omega(loop,loop1)+
     &           ky*R_v_2_x_omega(loop,loop1)
            F_I_omega(loop,loop1)=-ksq*I_omega(loop,loop1)-
     &           kx*R_v_1_x_omega(loop,loop1)+
     &           ky*I_v_2_x_omega(loop,loop1)
         enddo
      enddo
      F_I_omega(nf,mf)= F_I_omega(nf,mf)-Fr/2.
      return
      end

















c
	real*8 pi
	parameter(pi=3.141592653589793)
	
C------ nx and ny are the number of grid points
	integer nx, ny
	parameter ( nx = 128, ny = 128 )

C------ Lx and Ly are the length of the container, Fr is the forcing
	real*8 Lx, Ly, Fr

C------ nf number of forced vortices in the y-direction
C------ mf number of forced pairs of vortices in the x-direction
	integer nf, mf
	parameter (nf=8,mf=4)

C------ DiffDim is the dimension of the resulting system of OdE's	
	integer DiffDim
	parameter (DiffDim=2*(ny-1)*nx/2+(ny-1))

C------ omega      : vorticity
C------ psi        : streamfunction
C------ v_1, v_2: components of velocity
C------ R_omega, I_omega: real and imaginary part of omega
C------ R_psi, I_psi: real and imaginary part of the streamfunction
C------ R_v_1 and I_v_1: real and imaginary part of the fist 
C			 component of the velocity
C------ R_v_2 and I_v_2: real and imaginary part of the second 
C			 component of the velocity
      real*8 omega(0:ny+1,0:nx+1),psi(0:ny,0:nx-1),
     1     v_1(0:ny+1,0:nx+1),v_2(0:ny+1,0:nx+1),
     1     R_omega(0:ny+1,0:nx/2),I_omega(0:ny+1,0:nx/2),
     1     R_psi(0:ny,0:nx/2),I_psi(0:ny,0:nx/2),
     1     R_v_1(0:ny+1,0:nx/2),I_v_1(0:ny+1,0:nx/2),
     1     R_v_2(0:ny+1,0:nx/2),I_v_2(0:ny+1,0:nx/2),
     1     F_R_omega(0:ny,0:nx/2),F_I_omega(0:ny,0:nx/2)

	common/fields/ omega,psi,v_1,v_2,
     1     R_omega,I_omega,
     1     R_psi,I_psi,
     1     R_v_1,I_v_2,
     1     F_R_omega,F_I_omega

	common/params/ Lx, Ly, Fr









318.5thanks DwightRTOMS::PARETIJMon Apr 07 1997 09:07285
Hi Dwight,
thank you so much for your code using DXML. Here's the
main, using the fourier routines, as well as the include file
with modified dimensions to use your code.

The performance of this new code
is somehow worse than in the original one (like 120 vs. 150 "" )
However I 've not attempted yet the matrices overlap you suggested
Also, could you please post the complete reference to "Numerical REcipes"

Thanks again,
-Joseph

      program NSE_2D

      implicit none
      include 'NSE_2D.inc'

        real*8 FLD(1:DiffDim)
        real*8 T0,T1,alx,tstep

        integer nstep
        integer error
        integer loop
        integer loop1
		  integer i, j
        real*8 t_start, elapsed
        real*8 auxiliary(0:nx,0:ny)

         external  RHS_0

	t_start = secnds(0.0)

         OPEN(Unit=7,FILE='NSE_2D.dat')
         read(7,*)nstep
         read(7,*)FR
         read(7,*)alx
         read(7,*)tstep
         close(unit=7)

	Lx = 2.*pi
	Ly = 2.*pi
        

        open(Unit=7,FILE='state.dat')
        do loop = 1, Diffdim
			read(7,*) fld(loop)
		  end do
        close(unit=7)
        
        call InitializeRungeKutta
        open(Unit=8,FILE='NSE_2D.sim',status='UNKNOWN')

C------ main loop -----------------------------------------------

        do loop=1,nstep
        
                T1=T0+tstep
                write(*,*) 'integration time = ',T1
                CALL  RungeKutta(T0,T1,DiffDim,FLD,RHS_0)
					 write (8,100) (fld(i+63), i = 1, 10)
                T0=T1
        enddo

 100	format (10F12.6)
        close (Unit=8)

C---------------------------------------------------------------
        
        open(Unit = 7, FILE = 'state.dat')
        do loop = 1, Diffdim
        	write(7,*) fld(loop)
        end do
        close (Unit=7)

        do loop=0,ny
           do loop1=0,nx-1
              auxiliary(loop1,loop)=psi(loop,loop1)
           enddo
           auxiliary(nx,loop)=psi(loop,0)
        enddo
       
        open(Unit=7,FILE='stream.dat')
        do i = 0, ny
        	do j = 0, nx-1
			write(7,*) auxiliary(i,j)
	   	end do
		end do
        close(unit=7)
        
	elapsed = secnds(t_start)
	write(*,*)
	write(*,*) 'elapsed time(seconds): ', elapsed   
         
      STOP
      end


      subroutine RHS_0(neq,t0,fld,f_fld)


      implicit none
      include 'NSE_2D.inc'
      integer neq
      real*8 t0
      real*8 fld(1:neq),f_fld(1:neq)
      integer num,loop,loop1 
      do loop=0,ny  
         do loop1=0,nx/2
            R_omega(loop,loop1)=0.0
            I_omega(loop,loop1)=0.0
         enddo
      enddo
      num=0
      loop1=0
      do loop=1,ny-1
         R_omega(loop,loop1)=fld(num+1)
         num=num+1
      enddo

      do loop1=1,nx/2
         do loop=1,ny-1
            R_omega(loop,loop1)=fld(num+1)
            I_omega(loop,loop1)=fld(num+2)
            if(loop1.eq.0) I_omega(loop,loop1)=0.0
            num=num+2
         enddo
      enddo
      call rhs
      num=0
      loop1=0
      do loop=1,ny-1
         f_fld(num+1)=F_R_omega(loop,loop1)
         num=num+1
      enddo
      do loop1=1,nx/2
         do loop=1,ny-1
            f_fld(num+1)=F_R_omega(loop,loop1)
            f_fld(num+2)=F_I_omega(loop,loop1)
            num=num+2
         enddo
      enddo
      return
      end



      subroutine rhs

      implicit none
      include 'NSE_2D.inc'
      
      integer loop,loop1
      real*8 tpi_Lx,pi_Ly
      real*8 kx,ky,ksq
      real*8 v_1_x_omega(0:ny+1,0:nx+1)
      real*8 v_2_x_omega(0:ny+1,0:nx+1)
      real*8 R_v_1_x_omega(0:ny+1,0:nx/2)
      real*8 I_v_1_x_omega(0:ny+1,0:nx/2)
      real*8 R_v_2_x_omega(0:ny+1,0:nx/2)
      real*8 I_v_2_x_omega(0:ny+1,0:nx/2)
      
      tpi_Lx=2.0*pi/Lx
      pi_Ly=pi/Ly


      do loop1=0,nx/2
         do loop=0,ny
            ky=pi_Ly*loop
            kx=tpi_Lx*loop1
            ksq=kx*kx+ky*ky
            if(loop.eq.0.and.loop1.eq.0) then
               R_v_1(loop,loop1)=0.0
               I_v_1(loop,loop1)=0.0
               R_v_2(loop,loop1)=0.0
               I_v_2(loop,loop1)=0.0
            else 
               R_v_1(loop,loop1)=ky*R_omega(loop,loop1)/ksq
               I_v_1(loop,loop1)=ky*I_omega(loop,loop1)/ksq
               R_v_2(loop,loop1)=kx*I_omega(loop,loop1)/ksq
               I_v_2(loop,loop1)=-kx*R_omega(loop,loop1)/ksq
            endif
         enddo
      enddo

      call fft_ce(v_1,R_v_1,I_v_1,ny,nx,-1)
      call fft_se(v_2,R_v_2,I_v_2,ny,nx,-1)
      call fft_se(omega,R_omega,I_omega,ny,nx,-1)

      do loop1=0,nx-1
         do loop=0,ny
            v_1_x_omega(loop,loop1)=v_1(loop,loop1)*omega(loop,loop1)
            v_2_x_omega(loop,loop1)=v_2(loop,loop1)*omega(loop,loop1)
         enddo
      enddo

      call fft_se(v_1_x_omega,R_v_1_x_omega,I_v_1_x_omega,ny,nx,1)
      call fft_ce(v_2_x_omega,R_v_2_x_omega,I_v_2_x_omega,ny,nx,1)

      do loop1=0,nx/2
         kx=tpi_Lx*loop1
         do loop=1,ny-1
         ky=pi_Ly*loop
            ksq=kx*kx+ky*ky
            F_R_omega(loop,loop1)=-ksq*R_omega(loop,loop1)+
     &           kx*I_v_1_x_omega(loop,loop1)+
     &           ky*R_v_2_x_omega(loop,loop1)
            F_I_omega(loop,loop1)=-ksq*I_omega(loop,loop1)-
     &           kx*R_v_1_x_omega(loop,loop1)+
     &           ky*I_v_2_x_omega(loop,loop1)
         enddo
      enddo
      F_I_omega(nf,mf)= F_I_omega(nf,mf)-Fr/2.
      return
      end

















c
	real*8 pi
	parameter(pi=3.141592653589793)
	
C------ nx and ny are the number of grid points
	integer nx, ny
	parameter ( nx = 128, ny = 128 )

C------ Lx and Ly are the length of the container, Fr is the forcing
	real*8 Lx, Ly, Fr

C------ nf number of forced vortices in the y-direction
C------ mf number of forced pairs of vortices in the x-direction
	integer nf, mf
	parameter (nf=8,mf=4)

C------ DiffDim is the dimension of the resulting system of OdE's	
	integer DiffDim
	parameter (DiffDim=2*(ny-1)*nx/2+(ny-1))

C------ omega      : vorticity
C------ psi        : streamfunction
C------ v_1, v_2: components of velocity
C------ R_omega, I_omega: real and imaginary part of omega
C------ R_psi, I_psi: real and imaginary part of the streamfunction
C------ R_v_1 and I_v_1: real and imaginary part of the fist 
C			 component of the velocity
C------ R_v_2 and I_v_2: real and imaginary part of the second 
C			 component of the velocity
      real*8 omega(0:ny+1,0:nx+1),psi(0:ny,0:nx-1),
     1     v_1(0:ny+1,0:nx+1),v_2(0:ny+1,0:nx+1),
     1     R_omega(0:ny+1,0:nx/2),I_omega(0:ny+1,0:nx/2),
     1     R_psi(0:ny,0:nx/2),I_psi(0:ny,0:nx/2),
     1     R_v_1(0:ny+1,0:nx/2),I_v_1(0:ny+1,0:nx/2),
     1     R_v_2(0:ny+1,0:nx/2),I_v_2(0:ny+1,0:nx/2),
     1     F_R_omega(0:ny,0:nx/2),F_I_omega(0:ny,0:nx/2)

	common/fields/ omega,psi,v_1,v_2,
     1     R_omega,I_omega,
     1     R_psi,I_psi,
     1     R_v_1,I_v_2,
     1     F_R_omega,F_I_omega

	common/params/ Lx, Ly, Fr









318.6HPCGRP::MANLEYMon Apr 07 1997 11:0011
> The performance of this new code
> is somehow worse than in the original one (like 120 vs. 150 "" )

This comes as no surprise, one step ffts are not efficient.

Thank you for the include file. I will fix the program to use the versions of
fftce and fftse I sent you via e-mail last week. I will also change the program
to use three step fast sine transforms, three step fast cosine transforms, and
three step ffts.