From ef2cf6b0aa4c7a7cac33ced1c75415fd7f764dcf Mon Sep 17 00:00:00 2001 From: mruizherrerab Date: Fri, 20 Feb 2026 15:48:11 +0100 Subject: [PATCH] CoLoRe-2LPT: 2LPT velocities added for particles, tracers and beams --- Makefile | 10 +- param_colore_2lpt.cfg | 91 ++++++++++++ param_example.cfg | 5 +- src/beaming.c | 28 +++- src/common.h | 12 ++ src/cosmo.c | 10 +- src/density.c | 336 ++++++++++++++++++++++++++++++++++++++++-- src/fourier.c | 41 ++++++ src/io.c | 91 ++++++++++++ src/srcs.c | 48 ++++-- 10 files changed, 630 insertions(+), 42 deletions(-) create mode 100755 param_colore_2lpt.cfg diff --git a/Makefile b/Makefile index fa7e3ff..22f7129 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ OPTIONS = -Wall -Wno-format-overflow -O3 -std=c99 #Use double precision integer (enable in general) DEFINEFLAGS += -D_LONGIDS #Use normalized bias model -DEFINEFLAGS += -D_BIAS_MODEL_2 +DEFINEFLAGS += -D_BIAS_MODEL_3 #Use linear bias model #DEFINEFLAGS += -D_BIAS_MODEL_3 #Use new lensing method @@ -33,8 +33,8 @@ USE_MPI = yes #GSL #GSL_INC = -I/add/path #GSL_LIB = -L/add/path -GSL_INC = -I/home/alonso/include -GSL_LIB = -L/home/alonso/lib +GSL_INC = +GSL_LIB = #FFTW FFTW_INC = FFTW_LIB = @@ -45,8 +45,8 @@ FITS_LIB = HDF5_INC = HDF5_LIB = #libconfig -CONF_INC = -CONF_LIB = +CONF_INC = -I/home/mruizh/software_libconf/include +CONF_LIB = -L/home/mruizh/software_libconf/lib #healpix HPIX_INC = HPIX_LIB = diff --git a/param_colore_2lpt.cfg b/param_colore_2lpt.cfg new file mode 100755 index 0000000..a17ccf0 --- /dev/null +++ b/param_colore_2lpt.cfg @@ -0,0 +1,91 @@ +global: +{ + #Output prefix. Output will be in prefix_. + prefix_out= "/lustre/home/mruizh/2lpt_paper/codes/CoLoRe-master/new_out/out"; + #Output format. Select HDF5, FITS or ASCII + output_format= "FITS"; + #Output Gaussian overdensity field at z=0? + output_density= false + #Path to power spectrum at z=0. Power spectrum file must + #be in CAMB format: k (h/Mpc), P(k) (Mpc/h)^3. + pk_filename= "/lustre/home/mruizh/CoLoRe_2LPT/CoLoRe_2LPT/input_files/Pk_Abacus_Cosmology_z0.txt" + #This redshift range also defines the size of the box + z_min= 1.6 + z_max= 3.79 + #RNG seed note that output will depend on number of nodes, etc not only + #on the RNG seed + seed= 1 + #Set to true if you want to generate the theory prediction for the 3D power spectrum + #of the different tracers. + write_pred=false + #Intervals of redshift at which the prediction will be produced. + pred_dz=0.1 + #If write_pred is true, set this to true if you just want to generate the prediction + #and then exit. This is also useful if you want to inspect the memory requirements + #before a run. + just_write_pred= false +} + +field_par: +{ + #Extra Gaussian smoothing scale [Mpc/h] (set to a + #negative value if you don't want any smoothing) + r_smooth= -1 + #Do you want to smooth the Newtonian potential as well? + smooth_potential= false + #Will use a Cartesian grid with n_grid^3 cells + n_grid= 4096 + #Density field type + # 0-lognormal + # 1-1LPT + # 2-2LPT + dens_type= 2 + #If dens_type==1 or 2, buffer size (fraction per particle) + lpt_buffer_fraction= 0.6 + #If dens_type==1 or 2, scheme to interpolate particle + #positions into a grid + # 0-NGP + # 1-CIC + # 2-TSC + lpt_interp_type= 1 + lpt_vzty= 1 + #Set to 1 if you want to output the LPT particle positions + output_lpt= 0 +} + +cosmo_par: +{ + #Non-relativistic matter + omega_M= 0.3153 + #Dark energy + omega_L= 0.6847 + #Baryons + omega_B= 0.049301692328524445 + #Hubble parameter (in units of 100 km/s/Mpc) + h= 0.6736 + #Dark energy equation of state + w= -1.0 + #Primordial scalar spectral index, used only to extrapolate + #P(k) at low k end (-3 used at high k end) + ns= 0.9649 + #Power spectrum normalization. The input power spectrum will be + #renormalized to this sigma8 + sigma_8= 0.8111 +} + +#For each galaxy population, create a section called srcsX, starting with X=1 +srcs1: +{ + #Path to N(z) file. Should contain two columns + # 1-> z, 2-> dN(z)/dz*dOmega + # with dN/dzdOmega in units of deg^-2 + nz_filename= "/lustre/home/mruizh/CoLoRe_2LPT/CoLoRe_2LPT/input_files/Nz_qso_130618_2_colore1_hZs.txt" + #Path to bias file. Should contain two columns + # 1-> z, 2-> b(z) + bias_filename= "/lustre/home/mruizh/CoLoRe_2LPT/CoLoRe_2LPT/input_files/Bz_clustering.txt" + threshold_filename= "/lustre/home/mruizh/CoLoRe_2LPT/CoLoRe_2LPT/input_files/Tz_clustering.txt" + #Do you want to include shear ellipticities? + include_lensing= false + #Do you want to store line-of-sight skewers for each object? + store_skewers= true +} diff --git a/param_example.cfg b/param_example.cfg index 9935fc5..b5dfa44 100644 --- a/param_example.cfg +++ b/param_example.cfg @@ -48,7 +48,10 @@ field_par: # 1-CIC # 2-TSC lpt_interp_type= 1 - #Set to 1 if you want to output the LPT particle positions + + lpt_vzty= 0 #Set to 1 if you want to use 2LPT velocities with 2LPT densities. If used, only CIC available (for now) for this case! + + #Set to 1 if you want to output the LPT particle positions (if using 2LPT, also velocities will be in the output!) output_lpt= 0 } diff --git a/src/beaming.c b/src/beaming.c index 9850279..4ffb9df 100644 --- a/src/beaming.c +++ b/src/beaming.c @@ -67,14 +67,20 @@ static void get_element(ParamCoLoRe *par,long ix,long iy,long iz, //Get velocity if(flag_return & RETURN_VEL) { - v[0]=par->grid_npot[ix_hi+iy_0+iz_0]-par->grid_npot[ix_lo+iy_0+iz_0]; - v[1]=par->grid_npot[ix_0+iy_hi+iz_0]-par->grid_npot[ix_0+iy_lo+iz_0]; - if(iz==0) - v[2]=par->grid_npot[ix_0+iy_0+iz_hi]-par->slice_left[ix_0+iy_0]; - else if(iz==par->nz_here-1) - v[2]=par->slice_right[ix_0+iy_0]-par->grid_npot[ix_0+iy_0+iz_lo]; - else - v[2]=par->grid_npot[ix_0+iy_0+iz_hi]-par->grid_npot[ix_0+iy_0+iz_lo]; + if (par->lpt_vzty){ + v[0]=par->grid_velx[ix_0+iy_0+iz_0]; + v[1]=par->grid_vely[ix_0+iy_0+iz_0]; + v[2]=par->grid_velz[ix_0+iy_0+iz_0]; + } else { + v[0]=par->grid_npot[ix_hi+iy_0+iz_0]-par->grid_npot[ix_lo+iy_0+iz_0]; + v[1]=par->grid_npot[ix_0+iy_hi+iz_0]-par->grid_npot[ix_0+iy_lo+iz_0]; + if(iz==0) + v[2]=par->grid_npot[ix_0+iy_0+iz_hi]-par->slice_left[ix_0+iy_0]; + else if(iz==par->nz_here-1) + v[2]=par->slice_right[ix_0+iy_0]-par->grid_npot[ix_0+iy_0+iz_lo]; + else + v[2]=par->grid_npot[ix_0+iy_0+iz_hi]-par->grid_npot[ix_0+iy_0+iz_lo]; + } } //Get ISW @@ -319,6 +325,9 @@ void get_beam_properties(ParamCoLoRe *par) #ifdef _HAVE_MPI long size_slice_npot=(par->nz_max+2)*((long)(2*par->n_grid*(par->n_grid/2+1))); long size_slice_dens=par->nz_max*((long)(2*par->n_grid*(par->n_grid/2+1))); + long size_slice_velx=par->nz_max*((long)(2*par->n_grid*(par->n_grid/2+1))); + long size_slice_vely=par->nz_max*((long)(2*par->n_grid*(par->n_grid/2+1))); + long size_slice_velz=par->nz_max*((long)(2*par->n_grid*(par->n_grid/2+1))); flouble *buffer_sr=my_malloc(size_slice_npot*sizeof(flouble)); #endif //_HAVE_MPI @@ -330,6 +339,9 @@ void get_beam_properties(ParamCoLoRe *par) #endif //_DEBUG mpi_sendrecv_wrap(par->grid_npot,buffer_sr,size_slice_npot,i); mpi_sendrecv_wrap(par->grid_dens,buffer_sr,size_slice_dens,i); + mpi_sendrecv_wrap(par->grid_velx,buffer_sr,size_slice_velx,i); + mpi_sendrecv_wrap(par->grid_vely,buffer_sr,size_slice_vely,i); + mpi_sendrecv_wrap(par->grid_velz,buffer_sr,size_slice_velz,i); #endif //_HAVE_MPI par->nz_here=par->nz_all[node_i_am_now]; diff --git a/src/common.h b/src/common.h index 38f67fc..b1ce904 100644 --- a/src/common.h +++ b/src/common.h @@ -110,6 +110,8 @@ #define BG_KZ_CSTM 1012 #define BG_BZ_CSTM 1013 #define BG_NORM_CSTM 1014 +#define BG_F1 1015 +#define BG_F2 1016 //Density field parameters #define DZ_SIGMA 0.05 @@ -248,6 +250,8 @@ typedef struct { double growth_d2_arr[NA]; //Array of density growth factors used to compute D_d(r) double growth_v_arr[NA]; //Array of velocity growth factors used to compute D_v(r) double growth_pd_arr[NA]; //Array of potential derivative factors used to compute \dot{\phi} + double growth_f1_arr[NA]; // Array of 1 order growth rates used to compute f_d(r) + double growth_f2_arr[NA]; // Array of 2 order growth rates used to compute f_d(r) double ihub_arr[NA]; //Array of 1/H(z) double glob_idr; //1/dr, where dr is the radial comoving distance interval used in the arrays above // Power spectra @@ -267,6 +271,7 @@ typedef struct { int smooth_potential; //Do we smooth the newtonian potential as well? int dens_type; //Method to produce the density field int lpt_interp_type; + int lpt_vzty; // Use 2LPT velocities (only makes sense if dens_type is 2LPT) double lpt_buffer_fraction; //Fraction of memory saved for buffer particles int output_lpt; unsigned int seed_rng; //RNG seed @@ -284,6 +289,12 @@ typedef struct { dftw_complex *grid_dens_f; //Fourier-space grid for the density field flouble *grid_dens; //Real-space grid for the density field dftw_complex *grid_npot_f; //Fourier-space grid for the Newtonian potential + flouble *grid_velx; //Real-space grid for the x component of the 2lpt velocity field + flouble *grid_vely; //Real-space grid for the y component of the 2lpt velocity field + flouble *grid_velz; //Real-space grid for the z component of the 2lpt velocity field + dftw_complex *grid_velx_f; //Fourier-space grid for the x component of the 2lpt velocity field (not used right?) + dftw_complex *grid_vely_f; //Fourier-space grid for the y component of the 2lpt velocity field (not used right?) + dftw_complex *grid_velz_f; //Fourier-space grid for the z component of the 2lpt velocity field (not used right?) flouble *grid_npot; //Real-space grid for the Newtonian potential flouble *slice_left; //Dummy array to store grid cells coming from the left node flouble *slice_right; //Dummy array to store grid cells coming from the right node @@ -448,6 +459,7 @@ flouble *compute_lensing_spacing(ParamCoLoRe *par); ParamCoLoRe *read_run_params(char *fname,int test_memory); void write_density_grid(ParamCoLoRe *par,char *prefix_dens); void write_lpt(ParamCoLoRe *par,unsigned long long npart,flouble *x,flouble *y,flouble *z); +void write_lpt_wvel(ParamCoLoRe *par,unsigned long long npart,flouble *x,flouble *y,flouble *z, flouble *vx, flouble *vy, flouble *vz); //Same as above but including 2lpt velocities (merge) void write_srcs(ParamCoLoRe *par); void write_imap(ParamCoLoRe *par); void write_cstm(ParamCoLoRe *par); diff --git a/src/cosmo.c b/src/cosmo.c index 2211849..af414d1 100644 --- a/src/cosmo.c +++ b/src/cosmo.c @@ -47,6 +47,11 @@ double get_bg(ParamCoLoRe *par,double r,int tag,int ipop) return f_of_r_linear(par,r,par->growth_d2_arr,par->growth_d2_arr[0], par->growth_d2_arr[NA-1]); } + else if(tag==BG_F1) + return f_of_r_linear(par,r,par->growth_f1_arr,par->growth_f1_arr[0],par->growth_f1_arr[NA-1]); + else if(tag==BG_F2) { + return f_of_r_linear(par,r,par->growth_f2_arr,par->growth_f2_arr[0],par->growth_f2_arr[NA-1]); + } else if(tag==BG_V1) return f_of_r_linear(par,r,par->growth_v_arr,par->growth_v_arr[0],par->growth_v_arr[NA-1]); else if(tag==BG_PD) { @@ -717,11 +722,14 @@ void cosmo_set(ParamCoLoRe *par) double gz=csm_growth_factor(pars,a)/growth0; double om=csm_omega_m(pars,a); double fz=csm_f_growth(pars,a); + double f2z=2*pow(om, 0.5714285714285714); // Using 2LPTic Paper aproximation for f2(z) consistent with the one for D2(z) double hhz=csm_hubble(pars,a); par->z_arr_r2z[ii]=z; par->r_arr_r2z[ii]=r; par->growth_d_arr[ii]=gz; - par->growth_d2_arr[ii]=-0.42857142857*gz*gz*pow(om,-0.00699300699); + par->growth_d2_arr[ii]=-0.42857142857*gz*gz*pow(om,-0.00699300699); // Using Paper aproximation for D2(z) = -3/7*D1**2*Om**eps; consistent with the one for f2(z) + par->growth_f1_arr[ii]=fz; // Initialize the f1 and f2 arrays + par->growth_f2_arr[ii]=f2z; par->growth_v_arr[ii]=(gz*hhz*fz)/(par->fgrowth_0*par->hubble_0); //This is for the comoving velocity par->growth_pd_arr[ii]=gz*hhz*(fz-1); par->ihub_arr[ii]=1./hhz; diff --git a/src/density.c b/src/density.c index 9aadba5..a502575 100644 --- a/src/density.c +++ b/src/density.c @@ -186,6 +186,95 @@ static void pos_2_dens(ParamCoLoRe *par,unsigned long long np_here, else report_error(1,"Wrong interpolation type\n"); } +//Here I am using a CIC for the 2LPT case including the velocities. TO DO: have a pos_2_dens merged (including velocities) with NGP and TSC too. +static void pos_2_dens_2lpt(ParamCoLoRe *par,unsigned long long np_here, + flouble *x,flouble *y,flouble *z,flouble *dens,flouble *vx, flouble *vy,flouble *vz, flouble *velsx, flouble *velsy, flouble *velsz) +{ + unsigned long long ii; + flouble i_agrid=par->n_grid/par->l_box; + long ngx=2*(par->n_grid/2+1); + + for(ii=0;iin_grid; + if(i1[ax]<0) i1[ax]+=par->n_grid; + if(i0[ax]>=par->n_grid) i0[ax]-=par->n_grid; + if(i1[ax]>=par->n_grid) i1[ax]-=par->n_grid; + } + i0[2]-=par->iz0_here; + i1[2]-=par->iz0_here; + + if((i0[2]>=0) && (i0[2]nz_here)) { + + dens[i0[0]+ngx*(i0[1]+par->n_grid*i0[2])]+=a0[0]*a0[1]*a0[2]; + dens[i1[0]+ngx*(i0[1]+par->n_grid*i0[2])]+=a1[0]*a0[1]*a0[2]; + dens[i0[0]+ngx*(i1[1]+par->n_grid*i0[2])]+=a0[0]*a1[1]*a0[2]; + dens[i1[0]+ngx*(i1[1]+par->n_grid*i0[2])]+=a1[0]*a1[1]*a0[2]; + // Get velocity field + velsx[i0[0]+ngx*(i0[1]+par->n_grid*i0[2])]+=a0[0]*a0[1]*a0[2]*vx[ii]; + velsx[i1[0]+ngx*(i0[1]+par->n_grid*i0[2])]+=a1[0]*a0[1]*a0[2]*vx[ii]; + velsx[i0[0]+ngx*(i1[1]+par->n_grid*i0[2])]+=a0[0]*a1[1]*a0[2]*vx[ii]; + velsx[i1[0]+ngx*(i1[1]+par->n_grid*i0[2])]+=a1[0]*a1[1]*a0[2]*vx[ii]; + + velsy[i0[0]+ngx*(i0[1]+par->n_grid*i0[2])]+=a0[0]*a0[1]*a0[2]*vy[ii]; + velsy[i1[0]+ngx*(i0[1]+par->n_grid*i0[2])]+=a1[0]*a0[1]*a0[2]*vy[ii]; + velsy[i0[0]+ngx*(i1[1]+par->n_grid*i0[2])]+=a0[0]*a1[1]*a0[2]*vy[ii]; + velsy[i1[0]+ngx*(i1[1]+par->n_grid*i0[2])]+=a1[0]*a1[1]*a0[2]*vy[ii]; + + velsz[i0[0]+ngx*(i0[1]+par->n_grid*i0[2])]+=a0[0]*a0[1]*a0[2]*vz[ii]; + velsz[i1[0]+ngx*(i0[1]+par->n_grid*i0[2])]+=a1[0]*a0[1]*a0[2]*vz[ii]; + velsz[i0[0]+ngx*(i1[1]+par->n_grid*i0[2])]+=a0[0]*a1[1]*a0[2]*vz[ii]; + velsz[i1[0]+ngx*(i1[1]+par->n_grid*i0[2])]+=a1[0]*a1[1]*a0[2]*vz[ii]; + } + if((i1[2]>=0) && (i1[2]nz_here)) { + + dens[i0[0]+ngx*(i0[1]+par->n_grid*i1[2])]+=a0[0]*a0[1]*a1[2]; + dens[i1[0]+ngx*(i0[1]+par->n_grid*i1[2])]+=a1[0]*a0[1]*a1[2]; + dens[i0[0]+ngx*(i1[1]+par->n_grid*i1[2])]+=a0[0]*a1[1]*a1[2]; + dens[i1[0]+ngx*(i1[1]+par->n_grid*i1[2])]+=a1[0]*a1[1]*a1[2]; + // Get velocity field + velsx[i0[0]+ngx*(i0[1]+par->n_grid*i1[2])]+=a0[0]*a0[1]*a1[2]*vx[ii]; + velsx[i1[0]+ngx*(i0[1]+par->n_grid*i1[2])]+=a1[0]*a0[1]*a1[2]*vx[ii]; + velsx[i0[0]+ngx*(i1[1]+par->n_grid*i1[2])]+=a0[0]*a1[1]*a1[2]*vx[ii]; + velsx[i1[0]+ngx*(i1[1]+par->n_grid*i1[2])]+=a1[0]*a1[1]*a1[2]*vx[ii]; + + velsy[i0[0]+ngx*(i0[1]+par->n_grid*i1[2])]+=a0[0]*a0[1]*a1[2]*vy[ii]; + velsy[i1[0]+ngx*(i0[1]+par->n_grid*i1[2])]+=a1[0]*a0[1]*a1[2]*vy[ii]; + velsy[i0[0]+ngx*(i1[1]+par->n_grid*i1[2])]+=a0[0]*a1[1]*a1[2]*vy[ii]; + velsy[i1[0]+ngx*(i1[1]+par->n_grid*i1[2])]+=a1[0]*a1[1]*a1[2]*vy[ii]; + + velsz[i0[0]+ngx*(i0[1]+par->n_grid*i1[2])]+=a0[0]*a0[1]*a1[2]*vz[ii]; + velsz[i1[0]+ngx*(i0[1]+par->n_grid*i1[2])]+=a1[0]*a0[1]*a1[2]*vz[ii]; + velsz[i0[0]+ngx*(i1[1]+par->n_grid*i1[2])]+=a0[0]*a1[1]*a1[2]*vz[ii]; + velsz[i1[0]+ngx*(i1[1]+par->n_grid*i1[2])]+=a1[0]*a1[1]*a1[2]*vz[ii]; + } + } + // Normalize velocity by density + for(ii=0;iinz_here*par->n_grid*ngx;ii++) { + //if(dens[ii]!=0){ + if(dens[ii]!=0) { + velsx[ii] /= dens[ii]; + velsy[ii] /= dens[ii]; + velsz[ii] /= dens[ii]; + } else { + velsx[ii] = 0; + velsy[ii] = 0; + velsz[ii] = 0; + } + } + +} #ifdef _HAVE_MPI static void share_particles(ParamCoLoRe *par,unsigned long long np_allocated,unsigned long long np_real, @@ -373,6 +462,193 @@ static void share_particles(ParamCoLoRe *par,unsigned long long np_allocated,uns } #endif //_HAVE_MPI +//This is essentially the same as above but including the velocities of the particles for the 2LPT case. TO DO: Merge with the above! +#ifdef _HAVE_MPI +static void share_particles_2lpt(ParamCoLoRe *par,unsigned long long np_allocated,unsigned long long np_real, + flouble *x,flouble *y,flouble *z, + flouble *vx,flouble *vy,flouble *vz, + unsigned long long *np_here) +{ + int inode; + unsigned long long ip; + flouble dx=par->l_box/par->n_grid; + flouble *z_left=my_malloc(NNodes*sizeof(flouble)); + flouble *z_right=my_malloc(NNodes*sizeof(flouble)); + flouble *z_true_left=my_malloc(NNodes*sizeof(flouble)); + flouble *z_true_right=my_malloc(NNodes*sizeof(flouble)); + flouble *z_bleft_left=my_malloc(NNodes*sizeof(flouble)); + flouble *z_bleft_right=my_malloc(NNodes*sizeof(flouble)); + flouble *z_bright_left=my_malloc(NNodes*sizeof(flouble)); + flouble *z_bright_right=my_malloc(NNodes*sizeof(flouble)); + unsigned long long nbuffer=(unsigned long long)(par->lpt_buffer_fraction*par->nz_here* + ((long)(par->n_grid*par->n_grid))); + flouble *x_b=my_malloc(nbuffer*sizeof(flouble)); + flouble *y_b=my_malloc(nbuffer*sizeof(flouble)); + flouble *z_b=my_malloc(nbuffer*sizeof(flouble)); + flouble *vx_b=my_malloc(nbuffer*sizeof(flouble)); + flouble *vy_b=my_malloc(nbuffer*sizeof(flouble)); + flouble *vz_b=my_malloc(nbuffer*sizeof(flouble)); + + for(inode=0;inodeiz0_all[inode]*dx; + z_right[inode]=(par->iz0_all[inode]+par->nz_all[inode])*dx; + z_true_left[inode]=(par->iz0_all[inode]+(par->lpt_interp_type+1)*0.5)*dx; + z_true_right[inode]=(par->iz0_all[inode]+par->nz_all[inode]-(par->lpt_interp_type+1)*0.5)*dx; + z_bleft_left[inode]=(par->iz0_all[inode]-(par->lpt_interp_type+1)*0.5)*dx; + z_bleft_right[inode]=z_left[inode]; + z_bright_left[inode]=z_right[inode]; + z_bright_right[inode]=(par->iz0_all[inode]+par->nz_all[inode]+(par->lpt_interp_type+1)*0.5)*dx; + if(inode==0) { + z_bleft_left[inode]=(par->n_grid-(par->lpt_interp_type+1)*0.5)*dx; + z_bleft_right[inode]=par->n_grid*dx; + } + else if(inode==NNodes-1) { + z_bright_left[inode]=0; + z_bright_right[inode]=(par->lpt_interp_type+1)*0.5*dx; + } + } + + //Figure out which particles need to be moved and which stay + unsigned long long n_inrange=0,n_inbuffer=0; + for(ip=0;ip=z_left[NodeThis]) && (z[ip]=z_bleft_left[NodeThis]) && (z[ip]=z_bright_left[NodeThis]) && (z[ip]=z_true_right[NodeThis])) + send=1; + + if(send) { + if(n_inbuffer>=nbuffer) + report_error(1,"Out of memory, enlarge buffer %llu (%llu,%llu) %llu\n", + np_allocated,n_inbuffer,nbuffer,n_inrange); + x_b[n_inbuffer]=x[ip]; + y_b[n_inbuffer]=y[ip]; + z_b[n_inbuffer]=z[ip]; + vx_b[n_inbuffer]=vx[ip]; + vy_b[n_inbuffer]=vy[ip]; + vz_b[n_inbuffer]=vz[ip]; + n_inbuffer++; + } + + if(include) { + if(n_inrange>=np_allocated) + report_error(1,"Out of memory, enlarge buffer %llu %llu %llu\n",np_allocated,n_inbuffer,n_inrange); + x[n_inrange]=x[ip]; + y[n_inrange]=y[ip]; + z[n_inrange]=z[ip]; + vx[n_inrange]=vx[ip]; + vy[n_inrange]=vy[ip]; + vz[n_inrange]=vz[ip]; + n_inrange++; + } + } + + for(inode=1;inode=z_left[node_to]) && (z_b[i]=z_bleft_left[node_to]) && (z_b[i]=z_bright_left[node_to]) && (z_b[i]=0 && + (((z_b[j]>=z_left[node_to]) && (z_b[j]=z_bleft_left[node_to]) && (z_b[j]=z_bright_left[node_to]) && (z_b[j]np_allocated) + report_error(1,"Not enough memory, enlarge buffer\n"); + +#ifdef _HAVE_MPI + MPI_Sendrecv(x_send,nsend*sizeof(flouble),FLOUBLE_MPI,node_to,tag, + x+n_inrange,nrecv*sizeof(flouble),FLOUBLE_MPI,node_from,tag,MPI_COMM_WORLD,&status); + MPI_Sendrecv(y_send,nsend*sizeof(flouble),FLOUBLE_MPI,node_to,tag, + y+n_inrange,nrecv*sizeof(flouble),FLOUBLE_MPI,node_from,tag,MPI_COMM_WORLD,&status); + MPI_Sendrecv(z_send,nsend*sizeof(flouble),FLOUBLE_MPI,node_to,tag, + z+n_inrange,nrecv*sizeof(flouble),FLOUBLE_MPI,node_from,tag,MPI_COMM_WORLD,&status); + MPI_Sendrecv(vx_send,nsend*sizeof(flouble),FLOUBLE_MPI,node_to,tag, + vx+n_inrange,nrecv*sizeof(flouble),FLOUBLE_MPI,node_from,tag,MPI_COMM_WORLD,&status); + MPI_Sendrecv(vy_send,nsend*sizeof(flouble),FLOUBLE_MPI,node_to,tag, + vy+n_inrange,nrecv*sizeof(flouble),FLOUBLE_MPI,node_from,tag,MPI_COMM_WORLD,&status); + MPI_Sendrecv(vz_send,nsend*sizeof(flouble),FLOUBLE_MPI,node_to,tag, + vz+n_inrange,nrecv*sizeof(flouble),FLOUBLE_MPI,node_from,tag,MPI_COMM_WORLD,&status); +#endif //_HAVE_MPI + + n_inrange+=nrecv; + } + + *np_here=n_inrange; + + free(z_left); + free(z_right); + free(z_bleft_left); + free(z_bleft_right); + free(z_bright_left); + free(z_bright_right); + free(x_b); + free(y_b); + free(z_b); + free(vx_b); + free(vy_b); + free(vz_b); + + return; +} +#endif //_HAVE_MPI + static void lpt_1(ParamCoLoRe *par) { int axis; @@ -496,6 +772,9 @@ static void lpt_1(ParamCoLoRe *par) disp[ax][index]=p; } par->grid_dens[index]=0; + par->grid_velx[index]=0; //Fill the 2LPT velocities with zero not needed (Maybe improve?) + par->grid_vely[index]=0; + par->grid_velz[index]=0; } } } //end omp for @@ -646,22 +925,25 @@ static void lpt_1(ParamCoLoRe *par) static void lpt_2(ParamCoLoRe *par) { int axis; - - dftw_complex *(cdisp[3]),*(cdigrad[6]); - flouble *(disp[3]),*(digrad[6]); + print_info(" 2LPT\n"); + dftw_complex *(cdisp[3]),*(cdigrad[9]); + flouble *(disp[3]),*(digrad[9]); + if (par->lpt_vzty){ + print_info(" with 2LPT velocities\n"); + } else { + print_info(" without 2LPT velocities\n"); + } ptrdiff_t dsize=par->nz_here*((long)(par->n_grid*(par->n_grid/2+1))); ptrdiff_t dsize_buff=(ptrdiff_t)(dsize*(1+par->lpt_buffer_fraction)); - print_info(" 2LPT\n"); - print_info(" - Transforming density field\n"); for(axis=0;axis<2;axis++) { cdisp[axis]=dftw_alloc_complex(dsize_buff); disp[axis]=(flouble *)cdisp[axis]; } - for(axis=0;axis<6;axis++) { - cdigrad[axis]=dftw_alloc_complex(dsize_buff); - digrad[axis]=(flouble *)cdigrad[axis]; + for(axis=0;axis<9;axis++) { + cdigrad[axis]=dftw_alloc_complex(dsize_buff); + digrad[axis]=(flouble *)cdigrad[axis]; } cdisp[2]=par->grid_dens_f; disp[2]=par->grid_dens; @@ -858,12 +1140,20 @@ static void lpt_2(ParamCoLoRe *par) for(ix=0;ixn_grid;ix++) { int ax; double r,dg,d2g; + //Define growth rates and H(z)^-1 + double f1g,f2g,invhz; long index=ix+indexy+indexz; long index_nopad=ix+par->n_grid*((long)(iy+par->n_grid*iz)); xv[0]=(ix+0.0)*dx-par->pos_obs[0]; r=sqrt(xv[0]*xv[0]+xv[1]*xv[1]+xv[2]*xv[2]); dg=get_bg(par,r,BG_D1,0); d2g=get_bg(par,r,BG_D2,0); + if (par->lpt_vzty) { //Calculate growth rates and H(z)^-1 + f1g=get_bg(par,r,BG_F1,0); + f2g=get_bg(par,r,BG_F2,0); + invhz=get_bg(par,r,BG_IH,0); + } + for(ax=0;ax<3;ax++) { #ifdef _DEBUG d_mean_1_thr[ax]+=disp[ax][index]; @@ -872,11 +1162,18 @@ static void lpt_2(ParamCoLoRe *par) d_sigma2_2_thr+=digrad[ax][index]*digrad[ax][index]; #endif //_DEBUG flouble p=xv[ax]+dg*disp[ax][index]+d2g*digrad[ax][index]+par->pos_obs[ax]; + if (par->lpt_vzty) { //Get the particles 2LPT velocities! + flouble vzty = (dg*f1g*disp[ax][index]+d2g*f2g*digrad[ax][index])/invhz; + digrad[6+ax][index_nopad]=vzty; + } if(p<0) p+=par->l_box; if(p>=par->l_box) p-=par->l_box; digrad[3+ax][index_nopad]=p; } par->grid_dens[index]=0; + par->grid_velx[index]=0; // Keep the grids to zero, we will fill them with interpolation scheme (only CIC now; see above) + par->grid_vely[index]=0; + par->grid_velz[index]=0; } } } //end omp for @@ -946,24 +1243,37 @@ static void lpt_2(ParamCoLoRe *par) #endif //_SPREC unsigned long long np_here; - if(par->output_lpt) { + if (par->output_lpt) { np_here=par->nz_here*((long)(par->n_grid*par->n_grid)); print_info(" - Writing LPT positions\n"); - write_lpt(par,np_here,digrad[3],digrad[4],digrad[5]); + if (par->lpt_vzty) { + write_lpt_wvel(par,np_here,digrad[3],digrad[4],digrad[5], digrad[6], digrad[7], digrad[8]); + } else { + write_lpt(par,np_here,digrad[3],digrad[4],digrad[5]); + } } #ifdef _HAVE_MPI print_info(" - Sharing particle positions\n"); - share_particles(par,(unsigned long long)(2*dsize_buff), + if (par->lpt_vzty) { + share_particles_2lpt(par,(unsigned long long)(2*dsize_buff), + (unsigned long long)(par->nz_here*((long)(par->n_grid*par->n_grid))), + digrad[3],digrad[4],digrad[5],digrad[6],digrad[7],digrad[8],&np_here); + } else { + share_particles(par,(unsigned long long)(2*dsize_buff), (unsigned long long)(par->nz_here*((long)(par->n_grid*par->n_grid))), digrad[3],digrad[4],digrad[5],&np_here); + } #else //_HAVE_MPI np_here=par->nz_here*((long)(par->n_grid*par->n_grid)); #endif //_HAVE_MPI print_info(" - Interpolating positions into density field\n"); - pos_2_dens(par,np_here,digrad[3],digrad[4],digrad[5],par->grid_dens); - + if (par->lpt_vzty) { + pos_2_dens_2lpt(par,np_here,digrad[3],digrad[4],digrad[5],par->grid_dens,digrad[6], digrad[7], digrad[8], par->grid_velx, par->grid_vely, par->grid_velz); + } else { + pos_2_dens(par,np_here,digrad[3],digrad[4],digrad[5],par->grid_dens); + } #ifdef _SPREC for(axis=0;axis<3;axis++) fftwf_free(cdigrad[3+axis]); diff --git a/src/fourier.c b/src/fourier.c index aea1cef..5f24cb0 100644 --- a/src/fourier.c +++ b/src/fourier.c @@ -222,6 +222,7 @@ void allocate_fftw(ParamCoLoRe *par) report_error(1,"Ran out of memory\n"); par->grid_dens=(flouble *)(par->grid_dens_f); +// Intialize the needed memory for the 2LPT velocity arrays #ifdef _SPREC par->grid_npot_f=fftwf_alloc_complex(dsize+2*par->n_grid*(par->n_grid/2+1)); #else //_SPREC @@ -231,6 +232,34 @@ void allocate_fftw(ParamCoLoRe *par) report_error(1,"Ran out of memory\n"); par->grid_npot=(flouble *)(par->grid_npot_f); +#ifdef _SPREC + par->grid_velx_f=fftwf_alloc_complex(dsize); +#else //_SPREC + par->grid_velx_f=fftw_alloc_complex(dsize); +#endif //_SPREC + if(par->grid_velx_f==NULL) + report_error(1,"Ran out of memory\n"); + par->grid_velx=(flouble *)(par->grid_velx_f); + +#ifdef _SPREC + par->grid_vely_f=fftwf_alloc_complex(dsize); +#else //_SPREC + par->grid_vely_f=fftw_alloc_complex(dsize); +#endif //_SPREC + if(par->grid_vely_f==NULL) + report_error(1,"Ran out of memory\n"); + par->grid_vely=(flouble *)(par->grid_vely_f); +// + +#ifdef _SPREC + par->grid_velz_f=fftwf_alloc_complex(dsize); +#else //_SPREC + par->grid_velz_f=fftw_alloc_complex(dsize); +#endif //_SPREC + if(par->grid_velz_f==NULL) + report_error(1,"Ran out of memory\n"); + par->grid_velz=(flouble *)(par->grid_velz_f); + #ifdef _HAVE_MPI par->slice_left =&(par->grid_npot[2*dsize]); par->slice_right=&(par->grid_npot[2*(dsize+par->n_grid*(par->n_grid/2+1))]); @@ -242,11 +271,23 @@ void end_fftw(ParamCoLoRe *par) #ifdef _SPREC if(par->grid_dens_f!=NULL) fftwf_free(par->grid_dens_f); + if(par->grid_velx_f!=NULL) + fftwf_free(par->grid_velx_f); + if(par->grid_vely_f!=NULL) + fftwf_free(par->grid_vely_f); + if(par->grid_velz_f!=NULL) + fftwf_free(par->grid_velz_f); if(par->grid_npot_f!=NULL) fftwf_free(par->grid_npot_f); #else //_SPREC if(par->grid_dens_f!=NULL) fftw_free(par->grid_dens_f); + if(par->grid_velx_f!=NULL) + fftw_free(par->grid_velx_f); + if(par->grid_vely_f!=NULL) + fftw_free(par->grid_vely_f); + if(par->grid_velz_f!=NULL) + fftw_free(par->grid_velz_f); if(par->grid_npot_f!=NULL) fftw_free(par->grid_npot_f); #endif //_SPREC diff --git a/src/io.c b/src/io.c index 9ed09ca..addf04d 100644 --- a/src/io.c +++ b/src/io.c @@ -64,6 +64,7 @@ static ParamCoLoRe *param_colore_new(void) par->smooth_potential=0; par->dens_type=DENS_TYPE_LGNR; par->lpt_interp_type=INTERP_CIC; + par->lpt_vzty=0; par->lpt_buffer_fraction=0.2; par->output_lpt=0; par->seed_rng=1234; @@ -80,6 +81,14 @@ static ParamCoLoRe *param_colore_new(void) // Density grids par->grid_dens_f=NULL; par->grid_dens=NULL; + // 2LPT velocity grids + par->grid_velx_f=NULL; + par->grid_vely_f=NULL; + par->grid_velz_f=NULL; + par->grid_velx=NULL; + par->grid_vely=NULL; + par->grid_velz=NULL; + //Pot. grids par->grid_npot_f=NULL; par->grid_npot=NULL; par->sigma2_gauss=0; @@ -295,6 +304,7 @@ ParamCoLoRe *read_run_params(char *fname,int test_memory) conf_read_int(conf,"field_par","dens_type",&(par->dens_type)); conf_read_double(conf,"field_par","lpt_buffer_fraction",&(par->lpt_buffer_fraction)); conf_read_int(conf,"field_par","lpt_interp_type",&(par->lpt_interp_type)); + conf_read_int(conf,"field_par","lpt_vzty",&(par->lpt_vzty)); conf_read_int(conf,"field_par","output_lpt",&(par->output_lpt)); par->seed_rng=i_dum; @@ -710,6 +720,87 @@ void write_lpt(ParamCoLoRe *par,unsigned long long npart,flouble *x,flouble *y,f fclose(fo); } +// TO DO: Merge the above and this +void write_lpt_wvel(ParamCoLoRe *par,unsigned long long npart,flouble *x,flouble *y,flouble *z, flouble *vx, flouble *vy, flouble *vz) +{ + GadgetHeader header; + FILE *fo; + char fname[256]; + unsigned long long ipart,np_total; + unsigned long long np_total_expected=par->n_grid*((long)(par->n_grid*par->n_grid)); + + sprintf(fname,"%s_lpt_out.%d",par->prefixOut,NodeThis); + fo=fopen(fname,"w"); + if(fo==NULL) error_open_file(fname); + +#ifdef _HAVE_MPI + unsigned long long np_send=npart; + MPI_Reduce(&np_send,&np_total,1,MPI_UNSIGNED_LONG_LONG,MPI_SUM,0,MPI_COMM_WORLD); + MPI_Bcast(&np_total,1,MPI_UNSIGNED_LONG_LONG,0,MPI_COMM_WORLD); +#else //_HAVE_MPI + np_total=npart; +#endif //_HAVE_MPI + + if(np_total!=np_total_expected) + report_error(1,"Only %llu particles found, but there should be %ull\n",np_total,np_total_expected); + + double m=27.7455*par->OmegaM*pow(par->l_box,3.)/np_total; + memset(&header,0,sizeof(GadgetHeader)); + + header.np[1]=npart; + header.mass[1]=m; + header.time=1.; + header.redshift=0.; + header.np_total[1]=(unsigned int)np_total; + header.np_total_highword[1]=(unsigned int)(np_total >> 32); + header.num_files=NNodes; + header.boxsize=par->l_box; + header.omega0=par->OmegaM; + header.omega_lambda=par->OmegaL; + header.hubble_param=par->hhub; + header.flag_gadgetformat=1; + + int blklen=sizeof(GadgetHeader); + my_fwrite(&blklen,sizeof(blklen),1,fo); + my_fwrite(&header,sizeof(GadgetHeader),1,fo); + my_fwrite(&blklen,sizeof(blklen),1,fo); + + float x0[3]; + // position + blklen=npart*sizeof(float)*3; + my_fwrite(&blklen,sizeof(blklen),1,fo); + for(ipart=0;ipartiz0_here*((long)(par->n_grid*par->n_grid))); + for(ipart=0;ipartlpt_vzty) { + dz_rsd=par->grid_velx[index]; // This is simply for consistency with the Snapshot Version. Note that the true dz_rsd will be computed in the skewers! + } else { + dz_rsd=rvel*get_bg(par,rr,BG_V1,0); + } for(ip=0;ipcats[ipop]->nsrc;ii++) { - par->cats[ipop]->srcs[ii].dz_rsd=0; + par->cats[ipop]->srcs[ii].dz_rsd=0; //Here is were really dz_rsd is initialize since get_beam_properties calls this first and cleans! par->cats[ipop]->srcs[ii].e1=0; par->cats[ipop]->srcs[ii].e2=0; }//end omp for @@ -499,7 +504,11 @@ static void srcs_get_beam_properties_single(ParamCoLoRe *par,int ipop) //Compute RSD added=interpolate_from_grid(par,xn,NULL,v,NULL,NULL,NULL,RETURN_VEL,INTERP_TYPE_SKW); if(added) { - vr=0.5*idx*(v[0]*u[0]+v[1]*u[1]+v[2]*u[2]); + if (par->lpt_vzty) { //This is directly velocity + vr=(v[0]*u[0]+v[1]*u[1]+v[2]*u[2]); + } else { + vr=0.5*idx*(v[0]*u[0]+v[1]*u[1]+v[2]*u[2]); + } cat->srcs[ip].dz_rsd+=vr; } @@ -518,7 +527,11 @@ static void srcs_get_beam_properties_single(ParamCoLoRe *par,int ipop) added=interpolate_from_grid(par,xn,&dens,v,NULL,NULL,&gauss,RETURN_DENS | RETURN_VEL,INTERP_TYPE_SKW); } if(added) { - vr=0.5*idx*(v[0]*u[0]+v[1]*u[1]+v[2]*u[2]); + if (par->lpt_vzty) { //Same as above + vr=(v[0]*u[0]+v[1]*u[1]+v[2]*u[2]); + } else { + vr=0.5*idx*(v[0]*u[0]+v[1]*u[1]+v[2]*u[2]); + } if(cat->skw_gauss) cat->g_skw[offp+i_r]+=gauss; else @@ -642,7 +655,10 @@ static void srcs_beams_postproc_single(ParamCoLoRe *par,int ipop) #endif //_HAVE_OMP { int ii; - double factor_vel=-par->fgrowth_0/(1.5*par->hubble_0*par->OmegaM); + double factor_vel; + if (!par->lpt_vzty){ //2LPT velocities are already normalized! + factor_vel=-par->fgrowth_0/(1.5*par->hubble_0*par->OmegaM); + } #ifdef _USE_FAST_LENSING int ir_s=0; HealpixShellsAdaptive *smap=NULL; @@ -658,9 +674,10 @@ static void srcs_beams_postproc_single(ParamCoLoRe *par,int ipop) double r=r_of_z(par,z); double vg=get_bg(par,r,BG_V1,0); - //RSDs - cat->srcs[ii].dz_rsd*=vg*factor_vel; - + //RSDs //2LPT velocities are already normalized! + if (!par->lpt_vzty){ + cat->srcs[ii].dz_rsd*=vg*factor_vel; + } //Lensing if(cat->has_lensing) { #ifdef _USE_FAST_LENSING @@ -724,12 +741,15 @@ static void srcs_beams_postproc_single(ParamCoLoRe *par,int ipop) } //Skewers - if(cat->has_skw) { - int i_r,i_r_max=MAX((int)(r*cat->idr+0.5),cat->nr-1); - long offp=ii*cat->nr; - for(i_r=0;i_r<=i_r_max;i_r++) { - vg=get_bg(par,(i_r+0.5)*cat->dr,BG_V1,0); - cat->v_skw[offp+i_r]*=vg*factor_vel; + //2LPT Velocities are already normalized! + if (!par->lpt_vzty){ + if(cat->has_skw) { + int i_r,i_r_max=MAX((int)(r*cat->idr+0.5),cat->nr-1); + long offp=ii*cat->nr; + for(i_r=0;i_r<=i_r_max;i_r++) { + vg=get_bg(par,(i_r+0.5)*cat->dr,BG_V1,0); + cat->v_skw[offp+i_r]*=vg*factor_vel; + } } } }//end omp for