From ef2cf6b0aa4c7a7cac33ced1c75415fd7f764dcf Mon Sep 17 00:00:00 2001
From: mruizherrerab <ManuelFrancisco.Ruiz-Herrera@ciemat.es>
Date: Fri, 20 Feb 2026 15:48:11 +0100
Subject: [PATCH] CoLoRe-2LPT: 2LPT velocities added for particles, tracers and
 beams

---
 Makefile              |  10 +-
 param_colore_2lpt.cfg |  91 ++++++++++++
 param_example.cfg     |   5 +-
 src/beaming.c         |  28 +++-
 src/common.h          |  12 ++
 src/cosmo.c           |  10 +-
 src/density.c         | 336 ++++++++++++++++++++++++++++++++++++++++--
 src/fourier.c         |  41 ++++++
 src/io.c              |  91 ++++++++++++
 src/srcs.c            |  48 ++++--
 10 files changed, 630 insertions(+), 42 deletions(-)
 create mode 100755 param_colore_2lpt.cfg
diff --git a/Makefile b/Makefile
index fa7e3ff..22f7129 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ OPTIONS = -Wall -Wno-format-overflow -O3 -std=c99
 #Use double precision integer (enable in general)
 DEFINEFLAGS += -D_LONGIDS
 #Use normalized bias model
-DEFINEFLAGS += -D_BIAS_MODEL_2
+DEFINEFLAGS += -D_BIAS_MODEL_3
 #Use linear bias model
 #DEFINEFLAGS += -D_BIAS_MODEL_3
 #Use new lensing method
@@ -33,8 +33,8 @@ USE_MPI = yes
 #GSL
 #GSL_INC = -I/add/path
 #GSL_LIB = -L/add/path
-GSL_INC = -I/home/alonso/include
-GSL_LIB = -L/home/alonso/lib
+GSL_INC = 
+GSL_LIB = 
 #FFTW
 FFTW_INC =
 FFTW_LIB =
@@ -45,8 +45,8 @@ FITS_LIB =
 HDF5_INC =
 HDF5_LIB =
 #libconfig
-CONF_INC =
-CONF_LIB =
+CONF_INC = -I/home/mruizh/software_libconf/include
+CONF_LIB = -L/home/mruizh/software_libconf/lib
 #healpix
 HPIX_INC =
 HPIX_LIB =
diff --git a/param_colore_2lpt.cfg b/param_colore_2lpt.cfg
new file mode 100755
index 0000000..a17ccf0
--- /dev/null
+++ b/param_colore_2lpt.cfg
@@ -0,0 +1,91 @@
+global:
+{
+  #Output prefix. Output will be in prefix_<node ID>.<fits/txt>
+  prefix_out= "/lustre/home/mruizh/2lpt_paper/codes/CoLoRe-master/new_out/out";
+  #Output format. Select HDF5, FITS or ASCII
+  output_format= "FITS";
+  #Output Gaussian overdensity field at z=0?
+  output_density= false
+  #Path to power spectrum at z=0. Power spectrum file must
+  #be in CAMB format: k (h/Mpc), P(k) (Mpc/h)^3.
+  pk_filename= "/lustre/home/mruizh/CoLoRe_2LPT/CoLoRe_2LPT/input_files/Pk_Abacus_Cosmology_z0.txt"
+  #This redshift range also defines the size of the box
+  z_min= 1.6
+  z_max= 3.79
+  #RNG seed note that output will depend on number of nodes, etc not only
+  #on the RNG seed
+  seed= 1
+  #Set to true if you want to generate the theory prediction for the 3D power spectrum
+  #of the different tracers.
+  write_pred=false
+  #Intervals of redshift at which the prediction will be produced.
+  pred_dz=0.1
+  #If write_pred is true, set this to true if you just want to generate the prediction
+  #and then exit. This is also useful if you want to inspect the memory requirements
+  #before a run.
+  just_write_pred= false
+}
+
+field_par:
+{
+  #Extra Gaussian smoothing scale [Mpc/h] (set to a
+  #negative value if you don't want any smoothing)
+  r_smooth= -1
+  #Do you want to smooth the Newtonian potential as well?
+  smooth_potential= false
+  #Will use a Cartesian grid with n_grid^3 cells
+  n_grid= 4096
+  #Density field type
+  # 0-lognormal
+  # 1-1LPT
+  # 2-2LPT
+  dens_type= 2
+  #If dens_type==1 or 2, buffer size (fraction per particle)
+  lpt_buffer_fraction= 0.6
+  #If dens_type==1 or 2, scheme to interpolate particle
+  #positions into a grid
+  # 0-NGP
+  # 1-CIC
+  # 2-TSC
+  lpt_interp_type= 1
+  lpt_vzty= 1
+  #Set to 1 if you want to output the LPT particle positions
+  output_lpt= 0
+}
+
+cosmo_par:
+{
+  #Non-relativistic matter
+  omega_M= 0.3153
+  #Dark energy
+  omega_L= 0.6847
+  #Baryons
+  omega_B= 0.049301692328524445
+  #Hubble parameter (in units of 100 km/s/Mpc)
+  h= 0.6736
+  #Dark energy equation of state
+  w= -1.0
+  #Primordial scalar spectral index, used only to extrapolate
+  #P(k) at low k end (-3 used at high k end)
+  ns= 0.9649
+  #Power spectrum normalization. The input power spectrum will be
+  #renormalized to this sigma8
+  sigma_8= 0.8111
+}
+
+#For each galaxy population, create a section called srcsX, starting with X=1
+srcs1:
+{
+  #Path to N(z) file. Should contain two columns
+  # 1-> z, 2-> dN(z)/dz*dOmega
+  # with dN/dzdOmega in units of deg^-2
+  nz_filename= "/lustre/home/mruizh/CoLoRe_2LPT/CoLoRe_2LPT/input_files/Nz_qso_130618_2_colore1_hZs.txt"
+  #Path to bias file. Should contain two columns
+  # 1-> z, 2-> b(z)
+  bias_filename= "/lustre/home/mruizh/CoLoRe_2LPT/CoLoRe_2LPT/input_files/Bz_clustering.txt"
+  threshold_filename= "/lustre/home/mruizh/CoLoRe_2LPT/CoLoRe_2LPT/input_files/Tz_clustering.txt"
+  #Do you want to include shear ellipticities?
+  include_lensing= false
+  #Do you want to store line-of-sight skewers for each object?
+  store_skewers= true
+}
diff --git a/param_example.cfg b/param_example.cfg
index 9935fc5..b5dfa44 100644
--- a/param_example.cfg
+++ b/param_example.cfg
@@ -48,7 +48,10 @@ field_par:
   # 1-CIC
   # 2-TSC
   lpt_interp_type= 1
-  #Set to 1 if you want to output the LPT particle positions
+
+  lpt_vzty= 0 #Set to 1 if you want to use 2LPT velocities with 2LPT densities. If used, only CIC available (for now) for this case!
+  
+  #Set to 1 if you want to output the LPT particle positions (if using 2LPT, also velocities will be in the output!)
   output_lpt= 0
 }
 
diff --git a/src/beaming.c b/src/beaming.c
index 9850279..4ffb9df 100644
--- a/src/beaming.c
+++ b/src/beaming.c
@@ -67,14 +67,20 @@ static void get_element(ParamCoLoRe *par,long ix,long iy,long iz,
 
   //Get velocity
   if(flag_return & RETURN_VEL) {
-    v[0]=par->grid_npot[ix_hi+iy_0+iz_0]-par->grid_npot[ix_lo+iy_0+iz_0];
-    v[1]=par->grid_npot[ix_0+iy_hi+iz_0]-par->grid_npot[ix_0+iy_lo+iz_0];
-    if(iz==0)
-      v[2]=par->grid_npot[ix_0+iy_0+iz_hi]-par->slice_left[ix_0+iy_0];
-    else if(iz==par->nz_here-1)
-      v[2]=par->slice_right[ix_0+iy_0]-par->grid_npot[ix_0+iy_0+iz_lo];
-    else
-      v[2]=par->grid_npot[ix_0+iy_0+iz_hi]-par->grid_npot[ix_0+iy_0+iz_lo];
+    if (par->lpt_vzty){
+       v[0]=par->grid_velx[ix_0+iy_0+iz_0];
+       v[1]=par->grid_vely[ix_0+iy_0+iz_0];
+       v[2]=par->grid_velz[ix_0+iy_0+iz_0];
+    } else {
+       v[0]=par->grid_npot[ix_hi+iy_0+iz_0]-par->grid_npot[ix_lo+iy_0+iz_0];
+       v[1]=par->grid_npot[ix_0+iy_hi+iz_0]-par->grid_npot[ix_0+iy_lo+iz_0];
+       if(iz==0)
+          v[2]=par->grid_npot[ix_0+iy_0+iz_hi]-par->slice_left[ix_0+iy_0];
+       else if(iz==par->nz_here-1)
+         v[2]=par->slice_right[ix_0+iy_0]-par->grid_npot[ix_0+iy_0+iz_lo];
+       else
+         v[2]=par->grid_npot[ix_0+iy_0+iz_hi]-par->grid_npot[ix_0+iy_0+iz_lo];
+    }
   }
 
   //Get ISW
@@ -319,6 +325,9 @@ void get_beam_properties(ParamCoLoRe *par)
 #ifdef _HAVE_MPI
   long size_slice_npot=(par->nz_max+2)*((long)(2*par->n_grid*(par->n_grid/2+1)));
   long size_slice_dens=par->nz_max*((long)(2*par->n_grid*(par->n_grid/2+1)));
+  long size_slice_velx=par->nz_max*((long)(2*par->n_grid*(par->n_grid/2+1)));
+  long size_slice_vely=par->nz_max*((long)(2*par->n_grid*(par->n_grid/2+1)));
+  long size_slice_velz=par->nz_max*((long)(2*par->n_grid*(par->n_grid/2+1)));
   flouble *buffer_sr=my_malloc(size_slice_npot*sizeof(flouble));
 #endif //_HAVE_MPI
 
@@ -330,6 +339,9 @@ void get_beam_properties(ParamCoLoRe *par)
 #endif //_DEBUG
     mpi_sendrecv_wrap(par->grid_npot,buffer_sr,size_slice_npot,i);
     mpi_sendrecv_wrap(par->grid_dens,buffer_sr,size_slice_dens,i);
+    mpi_sendrecv_wrap(par->grid_velx,buffer_sr,size_slice_velx,i);
+    mpi_sendrecv_wrap(par->grid_vely,buffer_sr,size_slice_vely,i);
+    mpi_sendrecv_wrap(par->grid_velz,buffer_sr,size_slice_velz,i);
 
 #endif //_HAVE_MPI
     par->nz_here=par->nz_all[node_i_am_now];
diff --git a/src/common.h b/src/common.h
index 38f67fc..b1ce904 100644
--- a/src/common.h
+++ b/src/common.h
@@ -110,6 +110,8 @@
 #define BG_KZ_CSTM 1012
 #define BG_BZ_CSTM 1013
 #define BG_NORM_CSTM 1014
+#define BG_F1 1015
+#define BG_F2 1016
 
 //Density field parameters
 #define DZ_SIGMA 0.05
@@ -248,6 +250,8 @@ typedef struct {
   double growth_d2_arr[NA]; //Array of density growth factors used to compute D_d(r)
   double growth_v_arr[NA]; //Array of velocity growth factors used to compute D_v(r)
   double growth_pd_arr[NA]; //Array of potential derivative factors used to compute \dot{\phi}
+  double growth_f1_arr[NA]; // Array of 1 order growth rates used to compute f_d(r)
+  double growth_f2_arr[NA]; // Array of 2 order growth rates used to compute f_d(r)
   double ihub_arr[NA]; //Array of 1/H(z)
   double glob_idr; //1/dr, where dr is the radial comoving distance interval used in the arrays above
   // Power spectra
@@ -267,6 +271,7 @@ typedef struct {
   int smooth_potential; //Do we smooth the newtonian potential as well?
   int dens_type; //Method to produce the density field
   int lpt_interp_type;
+  int lpt_vzty; // Use 2LPT velocities (only makes sense if dens_type is 2LPT)
   double lpt_buffer_fraction; //Fraction of memory saved for buffer particles
   int output_lpt;
   unsigned int seed_rng; //RNG seed
@@ -284,6 +289,12 @@ typedef struct {
   dftw_complex *grid_dens_f; //Fourier-space grid for the density field
   flouble *grid_dens; //Real-space grid for the density field
   dftw_complex *grid_npot_f; //Fourier-space grid for the Newtonian potential
+  flouble *grid_velx; //Real-space grid for the x component of the 2lpt velocity field
+  flouble *grid_vely; //Real-space grid for the y component of the 2lpt velocity field
+  flouble *grid_velz; //Real-space grid for the z component of the 2lpt velocity field
+  dftw_complex *grid_velx_f; //Fourier-space grid for the x component of the 2lpt velocity field (not used right?)
+  dftw_complex *grid_vely_f; //Fourier-space grid for the y component of the 2lpt velocity field (not used right?)
+  dftw_complex *grid_velz_f; //Fourier-space grid for the z component of the 2lpt velocity field (not used right?)
   flouble *grid_npot; //Real-space grid for the Newtonian potential
   flouble *slice_left; //Dummy array to store grid cells coming from the left node
   flouble *slice_right; //Dummy array to store grid cells coming from the right node
@@ -448,6 +459,7 @@ flouble *compute_lensing_spacing(ParamCoLoRe *par);
 ParamCoLoRe *read_run_params(char *fname,int test_memory);
 void write_density_grid(ParamCoLoRe *par,char *prefix_dens);
 void write_lpt(ParamCoLoRe *par,unsigned long long npart,flouble *x,flouble *y,flouble *z);
+void write_lpt_wvel(ParamCoLoRe *par,unsigned long long npart,flouble *x,flouble *y,flouble *z, flouble *vx, flouble *vy, flouble *vz); //Same as above but including 2lpt velocities (merge)
 void write_srcs(ParamCoLoRe *par);
 void write_imap(ParamCoLoRe *par);
 void write_cstm(ParamCoLoRe *par);
diff --git a/src/cosmo.c b/src/cosmo.c
index 2211849..af414d1 100644
--- a/src/cosmo.c
+++ b/src/cosmo.c
@@ -47,6 +47,11 @@ double get_bg(ParamCoLoRe *par,double r,int tag,int ipop)
     return f_of_r_linear(par,r,par->growth_d2_arr,par->growth_d2_arr[0],
 			 par->growth_d2_arr[NA-1]);
   }
+  else if(tag==BG_F1)
+    return f_of_r_linear(par,r,par->growth_f1_arr,par->growth_f1_arr[0],par->growth_f1_arr[NA-1]);
+  else if(tag==BG_F2) {
+    return f_of_r_linear(par,r,par->growth_f2_arr,par->growth_f2_arr[0],par->growth_f2_arr[NA-1]);
+  }
   else if(tag==BG_V1)
     return f_of_r_linear(par,r,par->growth_v_arr,par->growth_v_arr[0],par->growth_v_arr[NA-1]);
   else if(tag==BG_PD) {
@@ -717,11 +722,14 @@ void cosmo_set(ParamCoLoRe *par)
     double gz=csm_growth_factor(pars,a)/growth0;
     double om=csm_omega_m(pars,a);
     double fz=csm_f_growth(pars,a);
+    double f2z=2*pow(om, 0.5714285714285714); // Using 2LPTic Paper aproximation for f2(z) consistent with the one for D2(z)
     double hhz=csm_hubble(pars,a);
     par->z_arr_r2z[ii]=z;
     par->r_arr_r2z[ii]=r;
     par->growth_d_arr[ii]=gz;
-    par->growth_d2_arr[ii]=-0.42857142857*gz*gz*pow(om,-0.00699300699);
+    par->growth_d2_arr[ii]=-0.42857142857*gz*gz*pow(om,-0.00699300699); // Using Paper aproximation for D2(z) = -3/7*D1**2*Om**eps; consistent with the one for f2(z)
+    par->growth_f1_arr[ii]=fz; // Initialize the f1 and f2 arrays
+    par->growth_f2_arr[ii]=f2z;
     par->growth_v_arr[ii]=(gz*hhz*fz)/(par->fgrowth_0*par->hubble_0); //This is for the comoving velocity
     par->growth_pd_arr[ii]=gz*hhz*(fz-1);
     par->ihub_arr[ii]=1./hhz;
diff --git a/src/density.c b/src/density.c
index 9aadba5..a502575 100644
--- a/src/density.c
+++ b/src/density.c
@@ -186,6 +186,95 @@ static void pos_2_dens(ParamCoLoRe *par,unsigned long long np_here,
   else
     report_error(1,"Wrong interpolation type\n");
 }
+//Here I am using a CIC for the 2LPT case including the velocities. TO DO: have a pos_2_dens merged (including velocities) with NGP and TSC too. 
+static void pos_2_dens_2lpt(ParamCoLoRe *par,unsigned long long np_here,
+                       flouble *x,flouble *y,flouble *z,flouble *dens,flouble *vx, flouble *vy,flouble *vz, flouble *velsx, flouble *velsy, flouble *velsz)
+{
+  unsigned long long ii;
+  flouble i_agrid=par->n_grid/par->l_box;
+  long ngx=2*(par->n_grid/2+1);
+
+  for(ii=0;ii<np_here;ii++) {
+    int ax,i0[3],i1[3];
+    flouble a0[3],a1[3];
+
+    i0[0]=(int)(x[ii]*i_agrid);
+    i0[1]=(int)(y[ii]*i_agrid);
+    i0[2]=(int)(z[ii]*i_agrid);
+    a1[0]=x[ii]*i_agrid-i0[0];
+    a1[1]=y[ii]*i_agrid-i0[1];
+    a1[2]=z[ii]*i_agrid-i0[2];
+    for(ax=0;ax<3;ax++) {
+      a0[ax]=1-a1[ax];
+      i1[ax]=i0[ax]+1;
+      if(i0[ax]<0) i0[ax]+=par->n_grid;
+      if(i1[ax]<0) i1[ax]+=par->n_grid;
+      if(i0[ax]>=par->n_grid) i0[ax]-=par->n_grid;
+      if(i1[ax]>=par->n_grid) i1[ax]-=par->n_grid;
+    }
+    i0[2]-=par->iz0_here;
+    i1[2]-=par->iz0_here;
+
+    if((i0[2]>=0) && (i0[2]<par->nz_here)) {
+
+      dens[i0[0]+ngx*(i0[1]+par->n_grid*i0[2])]+=a0[0]*a0[1]*a0[2];
+      dens[i1[0]+ngx*(i0[1]+par->n_grid*i0[2])]+=a1[0]*a0[1]*a0[2];
+      dens[i0[0]+ngx*(i1[1]+par->n_grid*i0[2])]+=a0[0]*a1[1]*a0[2];
+      dens[i1[0]+ngx*(i1[1]+par->n_grid*i0[2])]+=a1[0]*a1[1]*a0[2];
+      // Get velocity field
+      velsx[i0[0]+ngx*(i0[1]+par->n_grid*i0[2])]+=a0[0]*a0[1]*a0[2]*vx[ii];
+      velsx[i1[0]+ngx*(i0[1]+par->n_grid*i0[2])]+=a1[0]*a0[1]*a0[2]*vx[ii];
+      velsx[i0[0]+ngx*(i1[1]+par->n_grid*i0[2])]+=a0[0]*a1[1]*a0[2]*vx[ii];
+      velsx[i1[0]+ngx*(i1[1]+par->n_grid*i0[2])]+=a1[0]*a1[1]*a0[2]*vx[ii];
+
+      velsy[i0[0]+ngx*(i0[1]+par->n_grid*i0[2])]+=a0[0]*a0[1]*a0[2]*vy[ii];
+      velsy[i1[0]+ngx*(i0[1]+par->n_grid*i0[2])]+=a1[0]*a0[1]*a0[2]*vy[ii];
+      velsy[i0[0]+ngx*(i1[1]+par->n_grid*i0[2])]+=a0[0]*a1[1]*a0[2]*vy[ii];
+      velsy[i1[0]+ngx*(i1[1]+par->n_grid*i0[2])]+=a1[0]*a1[1]*a0[2]*vy[ii];
+
+      velsz[i0[0]+ngx*(i0[1]+par->n_grid*i0[2])]+=a0[0]*a0[1]*a0[2]*vz[ii];
+      velsz[i1[0]+ngx*(i0[1]+par->n_grid*i0[2])]+=a1[0]*a0[1]*a0[2]*vz[ii];
+      velsz[i0[0]+ngx*(i1[1]+par->n_grid*i0[2])]+=a0[0]*a1[1]*a0[2]*vz[ii];
+      velsz[i1[0]+ngx*(i1[1]+par->n_grid*i0[2])]+=a1[0]*a1[1]*a0[2]*vz[ii];
+    }
+    if((i1[2]>=0) && (i1[2]<par->nz_here)) {
+
+      dens[i0[0]+ngx*(i0[1]+par->n_grid*i1[2])]+=a0[0]*a0[1]*a1[2];
+      dens[i1[0]+ngx*(i0[1]+par->n_grid*i1[2])]+=a1[0]*a0[1]*a1[2];
+      dens[i0[0]+ngx*(i1[1]+par->n_grid*i1[2])]+=a0[0]*a1[1]*a1[2];
+      dens[i1[0]+ngx*(i1[1]+par->n_grid*i1[2])]+=a1[0]*a1[1]*a1[2];
+      // Get velocity field
+      velsx[i0[0]+ngx*(i0[1]+par->n_grid*i1[2])]+=a0[0]*a0[1]*a1[2]*vx[ii];
+      velsx[i1[0]+ngx*(i0[1]+par->n_grid*i1[2])]+=a1[0]*a0[1]*a1[2]*vx[ii];
+      velsx[i0[0]+ngx*(i1[1]+par->n_grid*i1[2])]+=a0[0]*a1[1]*a1[2]*vx[ii];
+      velsx[i1[0]+ngx*(i1[1]+par->n_grid*i1[2])]+=a1[0]*a1[1]*a1[2]*vx[ii];
+
+      velsy[i0[0]+ngx*(i0[1]+par->n_grid*i1[2])]+=a0[0]*a0[1]*a1[2]*vy[ii];
+      velsy[i1[0]+ngx*(i0[1]+par->n_grid*i1[2])]+=a1[0]*a0[1]*a1[2]*vy[ii];
+      velsy[i0[0]+ngx*(i1[1]+par->n_grid*i1[2])]+=a0[0]*a1[1]*a1[2]*vy[ii];
+      velsy[i1[0]+ngx*(i1[1]+par->n_grid*i1[2])]+=a1[0]*a1[1]*a1[2]*vy[ii];
+
+      velsz[i0[0]+ngx*(i0[1]+par->n_grid*i1[2])]+=a0[0]*a0[1]*a1[2]*vz[ii];
+      velsz[i1[0]+ngx*(i0[1]+par->n_grid*i1[2])]+=a1[0]*a0[1]*a1[2]*vz[ii];
+      velsz[i0[0]+ngx*(i1[1]+par->n_grid*i1[2])]+=a0[0]*a1[1]*a1[2]*vz[ii];
+      velsz[i1[0]+ngx*(i1[1]+par->n_grid*i1[2])]+=a1[0]*a1[1]*a1[2]*vz[ii];
+    }
+  }
+  // Normalize velocity by density 
+  for(ii=0;ii<par->nz_here*par->n_grid*ngx;ii++) {
+    //if(dens[ii]!=0){
+    if(dens[ii]!=0) {
+      velsx[ii] /= dens[ii];
+      velsy[ii] /= dens[ii];
+      velsz[ii] /= dens[ii];
+   } else {
+      velsx[ii] = 0;
+      velsy[ii] = 0;
+      velsz[ii] = 0;
+   }
+  }
+
+}
 
 #ifdef _HAVE_MPI
 static void share_particles(ParamCoLoRe *par,unsigned long long np_allocated,unsigned long long np_real,
@@ -373,6 +462,193 @@ static void share_particles(ParamCoLoRe *par,unsigned long long np_allocated,uns
 }
 #endif //_HAVE_MPI
 
+//This is essentially the same as above but including the velocities of the particles for the 2LPT case. TO DO: Merge with the above!
+#ifdef _HAVE_MPI
+static void share_particles_2lpt(ParamCoLoRe *par,unsigned long long np_allocated,unsigned long long np_real,
+			    flouble *x,flouble *y,flouble *z,
+			    flouble *vx,flouble *vy,flouble *vz,
+			    unsigned long long *np_here)
+{
+  int inode;
+  unsigned long long ip;
+  flouble dx=par->l_box/par->n_grid;
+  flouble *z_left=my_malloc(NNodes*sizeof(flouble));
+  flouble *z_right=my_malloc(NNodes*sizeof(flouble));
+  flouble *z_true_left=my_malloc(NNodes*sizeof(flouble));
+  flouble *z_true_right=my_malloc(NNodes*sizeof(flouble));
+  flouble *z_bleft_left=my_malloc(NNodes*sizeof(flouble));
+  flouble *z_bleft_right=my_malloc(NNodes*sizeof(flouble));
+  flouble *z_bright_left=my_malloc(NNodes*sizeof(flouble));
+  flouble *z_bright_right=my_malloc(NNodes*sizeof(flouble));
+  unsigned long long nbuffer=(unsigned long long)(par->lpt_buffer_fraction*par->nz_here*
+						  ((long)(par->n_grid*par->n_grid)));
+  flouble *x_b=my_malloc(nbuffer*sizeof(flouble));
+  flouble *y_b=my_malloc(nbuffer*sizeof(flouble));
+  flouble *z_b=my_malloc(nbuffer*sizeof(flouble));
+  flouble *vx_b=my_malloc(nbuffer*sizeof(flouble));
+  flouble *vy_b=my_malloc(nbuffer*sizeof(flouble));
+  flouble *vz_b=my_malloc(nbuffer*sizeof(flouble));
+
+  for(inode=0;inode<NNodes;inode++) {
+    z_left[inode]=par->iz0_all[inode]*dx;
+    z_right[inode]=(par->iz0_all[inode]+par->nz_all[inode])*dx;
+    z_true_left[inode]=(par->iz0_all[inode]+(par->lpt_interp_type+1)*0.5)*dx;
+    z_true_right[inode]=(par->iz0_all[inode]+par->nz_all[inode]-(par->lpt_interp_type+1)*0.5)*dx;
+    z_bleft_left[inode]=(par->iz0_all[inode]-(par->lpt_interp_type+1)*0.5)*dx;
+    z_bleft_right[inode]=z_left[inode];
+    z_bright_left[inode]=z_right[inode];
+    z_bright_right[inode]=(par->iz0_all[inode]+par->nz_all[inode]+(par->lpt_interp_type+1)*0.5)*dx;
+    if(inode==0) {
+      z_bleft_left[inode]=(par->n_grid-(par->lpt_interp_type+1)*0.5)*dx;
+      z_bleft_right[inode]=par->n_grid*dx;
+    }
+    else if(inode==NNodes-1) {
+      z_bright_left[inode]=0;
+      z_bright_right[inode]=(par->lpt_interp_type+1)*0.5*dx;
+    }
+  }
+
+  //Figure out which particles need to be moved and which stay
+  unsigned long long n_inrange=0,n_inbuffer=0;
+  for(ip=0;ip<np_real;ip++) {
+    int include=0,send=0;
+    if(((z[ip]>=z_left[NodeThis]) && (z[ip]<z_right[NodeThis])) ||
+       ((z[ip]>=z_bleft_left[NodeThis]) && (z[ip]<z_bleft_right[NodeThis])) ||
+       ((z[ip]>=z_bright_left[NodeThis]) && (z[ip]<z_bright_right[NodeThis])))
+      include=1;
+    if((z[ip]<z_true_left[NodeThis]) || (z[ip]>=z_true_right[NodeThis]))
+      send=1;
+
+    if(send) {
+      if(n_inbuffer>=nbuffer)
+	report_error(1,"Out of memory, enlarge buffer %llu (%llu,%llu) %llu\n",
+		     np_allocated,n_inbuffer,nbuffer,n_inrange);
+      x_b[n_inbuffer]=x[ip];
+      y_b[n_inbuffer]=y[ip];
+      z_b[n_inbuffer]=z[ip];
+      vx_b[n_inbuffer]=vx[ip];
+      vy_b[n_inbuffer]=vy[ip];
+      vz_b[n_inbuffer]=vz[ip];
+      n_inbuffer++;
+    }
+
+    if(include) {
+      if(n_inrange>=np_allocated)
+	report_error(1,"Out of memory, enlarge buffer %llu %llu %llu\n",np_allocated,n_inbuffer,n_inrange);
+      x[n_inrange]=x[ip];
+      y[n_inrange]=y[ip];
+      z[n_inrange]=z[ip];
+      vx[n_inrange]=vx[ip];
+      vy[n_inrange]=vy[ip];
+      vz[n_inrange]=vz[ip];
+      n_inrange++;
+    }
+  }
+
+  for(inode=1;inode<NNodes;inode++) {
+    int node_to=(NodeThis+inode+NNodes)%NNodes;
+    int node_from=(NodeThis-inode+NNodes)%NNodes;
+    int i=0,j=n_inbuffer-1;
+
+    while(1) {
+      while(i<n_inbuffer && 
+	    !(((z_b[i]>=z_left[node_to]) && (z_b[i]<z_right[node_to])) ||
+	      ((z_b[i]>=z_bleft_left[node_to]) && (z_b[i]<z_bleft_right[node_to])) ||
+	      ((z_b[i]>=z_bright_left[node_to]) && (z_b[i]<z_bright_right[node_to]))))
+	i++;
+      while(j>=0 && 
+	    (((z_b[j]>=z_left[node_to]) && (z_b[j]<z_right[node_to])) ||
+	     ((z_b[j]>=z_bleft_left[node_to]) && (z_b[j]<z_bleft_right[node_to])) ||
+	     ((z_b[j]>=z_bright_left[node_to]) && (z_b[j]<z_bright_right[node_to]))))
+	j--;
+
+      if(i<j) {
+	flouble tmp;
+	tmp=x_b[i]; x_b[i]=x_b[j]; x_b[j]=tmp;
+	tmp=y_b[i]; y_b[i]=y_b[j]; y_b[j]=tmp;
+	tmp=z_b[i]; z_b[i]=z_b[j]; z_b[j]=tmp;
+	tmp=vx_b[i]; vx_b[i]=vx_b[j]; vx_b[j]=tmp;
+	tmp=vy_b[i]; vy_b[i]=vy_b[j]; vy_b[j]=tmp;
+	tmp=vz_b[i]; vz_b[i]=vz_b[j]; vz_b[j]=tmp;
+	i++; j--;
+      }
+      else
+	break;
+    }
+
+    int nsend,nrecv,nstay;
+    if(j==-1) {
+      nsend=n_inbuffer;
+      nstay=0;
+    }
+    else if(i==n_inbuffer) {
+      nsend=0;
+      nstay=n_inbuffer;
+    }
+    else {
+      if(j+1!=i)
+	report_error(1,"Error sharing particles\n");
+      nstay=i;
+      nsend=n_inbuffer-i;
+    }
+
+#ifdef _HAVE_MPI
+    flouble *x_send=x_b+nstay;
+    flouble *y_send=y_b+nstay;
+    flouble *z_send=z_b+nstay;
+    flouble *vx_send=vx_b+nstay;
+    flouble *vy_send=vy_b+nstay;
+    flouble *vz_send=vz_b+nstay;
+#endif //_HAVE_MPI
+    int tag=500+5*inode;
+
+#ifdef _HAVE_MPI
+    MPI_Status status;
+    MPI_Barrier(MPI_COMM_WORLD);
+    MPI_Sendrecv(&nsend,1,MPI_INT,node_to,tag,&nrecv,1,MPI_INT,node_from,tag,MPI_COMM_WORLD,&status);
+#endif //_HAVE_MPI
+    tag++;
+    
+    if(n_inrange+nrecv>np_allocated)
+      report_error(1,"Not enough memory, enlarge buffer\n");
+
+#ifdef _HAVE_MPI
+    MPI_Sendrecv(x_send,nsend*sizeof(flouble),FLOUBLE_MPI,node_to,tag,
+		 x+n_inrange,nrecv*sizeof(flouble),FLOUBLE_MPI,node_from,tag,MPI_COMM_WORLD,&status);
+    MPI_Sendrecv(y_send,nsend*sizeof(flouble),FLOUBLE_MPI,node_to,tag,
+		 y+n_inrange,nrecv*sizeof(flouble),FLOUBLE_MPI,node_from,tag,MPI_COMM_WORLD,&status);
+    MPI_Sendrecv(z_send,nsend*sizeof(flouble),FLOUBLE_MPI,node_to,tag,
+		 z+n_inrange,nrecv*sizeof(flouble),FLOUBLE_MPI,node_from,tag,MPI_COMM_WORLD,&status);
+    MPI_Sendrecv(vx_send,nsend*sizeof(flouble),FLOUBLE_MPI,node_to,tag,
+		 vx+n_inrange,nrecv*sizeof(flouble),FLOUBLE_MPI,node_from,tag,MPI_COMM_WORLD,&status);
+    MPI_Sendrecv(vy_send,nsend*sizeof(flouble),FLOUBLE_MPI,node_to,tag,
+		 vy+n_inrange,nrecv*sizeof(flouble),FLOUBLE_MPI,node_from,tag,MPI_COMM_WORLD,&status);
+    MPI_Sendrecv(vz_send,nsend*sizeof(flouble),FLOUBLE_MPI,node_to,tag,
+		 vz+n_inrange,nrecv*sizeof(flouble),FLOUBLE_MPI,node_from,tag,MPI_COMM_WORLD,&status);
+#endif //_HAVE_MPI
+
+    n_inrange+=nrecv;
+  }
+
+  *np_here=n_inrange;
+
+  free(z_left);
+  free(z_right);
+  free(z_bleft_left);
+  free(z_bleft_right);
+  free(z_bright_left);
+  free(z_bright_right);
+  free(x_b);
+  free(y_b);
+  free(z_b);
+  free(vx_b);
+  free(vy_b);
+  free(vz_b);
+
+  return;
+}
+#endif //_HAVE_MPI
+
 static void lpt_1(ParamCoLoRe *par)
 {
   int axis;
@@ -496,6 +772,9 @@ static void lpt_1(ParamCoLoRe *par)
 	    disp[ax][index]=p;
 	  }
 	  par->grid_dens[index]=0;
+	 par->grid_velx[index]=0; //Fill the 2LPT velocities with zero not needed (Maybe improve?)
+        par->grid_vely[index]=0;
+        par->grid_velz[index]=0;
 	}
       }
     } //end omp for
@@ -646,22 +925,25 @@ static void lpt_1(ParamCoLoRe *par)
 static void lpt_2(ParamCoLoRe *par)
 {
   int axis;
-
-  dftw_complex *(cdisp[3]),*(cdigrad[6]);
-  flouble *(disp[3]),*(digrad[6]);
+  print_info(" 2LPT\n");
+  dftw_complex *(cdisp[3]),*(cdigrad[9]);
+  flouble *(disp[3]),*(digrad[9]);
+  if (par->lpt_vzty){
+     print_info(" with 2LPT velocities\n");
+  } else {
+     print_info(" without 2LPT velocities\n");
+  }
   ptrdiff_t dsize=par->nz_here*((long)(par->n_grid*(par->n_grid/2+1)));
   ptrdiff_t dsize_buff=(ptrdiff_t)(dsize*(1+par->lpt_buffer_fraction));
 
-  print_info(" 2LPT\n");
-
   print_info(" - Transforming density field\n");
   for(axis=0;axis<2;axis++) {
     cdisp[axis]=dftw_alloc_complex(dsize_buff);
     disp[axis]=(flouble *)cdisp[axis];
   }
-  for(axis=0;axis<6;axis++) {
-    cdigrad[axis]=dftw_alloc_complex(dsize_buff);
-    digrad[axis]=(flouble *)cdigrad[axis];
+  for(axis=0;axis<9;axis++) {
+     cdigrad[axis]=dftw_alloc_complex(dsize_buff);
+     digrad[axis]=(flouble *)cdigrad[axis];
   }
   cdisp[2]=par->grid_dens_f;
   disp[2]=par->grid_dens;
@@ -858,12 +1140,20 @@ static void lpt_2(ParamCoLoRe *par)
 	for(ix=0;ix<par->n_grid;ix++) {
 	  int ax;
 	  double r,dg,d2g;
+	  //Define growth rates and H(z)^-1
+	  double f1g,f2g,invhz;
 	  long index=ix+indexy+indexz;
 	  long index_nopad=ix+par->n_grid*((long)(iy+par->n_grid*iz));
 	  xv[0]=(ix+0.0)*dx-par->pos_obs[0];
 	  r=sqrt(xv[0]*xv[0]+xv[1]*xv[1]+xv[2]*xv[2]);
 	  dg=get_bg(par,r,BG_D1,0);
 	  d2g=get_bg(par,r,BG_D2,0);
+	  if (par->lpt_vzty) { //Calculate growth rates and H(z)^-1
+	     f1g=get_bg(par,r,BG_F1,0);
+	     f2g=get_bg(par,r,BG_F2,0);
+	     invhz=get_bg(par,r,BG_IH,0);
+          }
+          
 	  for(ax=0;ax<3;ax++) {
 #ifdef _DEBUG
 	    d_mean_1_thr[ax]+=disp[ax][index];
@@ -872,11 +1162,18 @@ static void lpt_2(ParamCoLoRe *par)
 	    d_sigma2_2_thr+=digrad[ax][index]*digrad[ax][index];
 #endif //_DEBUG
 	    flouble p=xv[ax]+dg*disp[ax][index]+d2g*digrad[ax][index]+par->pos_obs[ax];
+	    if (par->lpt_vzty) { //Get the particles 2LPT velocities!
+	       flouble vzty = (dg*f1g*disp[ax][index]+d2g*f2g*digrad[ax][index])/invhz;
+	       digrad[6+ax][index_nopad]=vzty;
+            }
 	    if(p<0) p+=par->l_box;
 	    if(p>=par->l_box) p-=par->l_box;
 	    digrad[3+ax][index_nopad]=p;
 	  }
 	  par->grid_dens[index]=0;
+	  par->grid_velx[index]=0; // Keep the grids to zero, we will fill them with interpolation scheme (only CIC now; see above)
+	  par->grid_vely[index]=0;
+          par->grid_velz[index]=0;
 	}
       }
     } //end omp for
@@ -946,24 +1243,37 @@ static void lpt_2(ParamCoLoRe *par)
 #endif //_SPREC
 
   unsigned long long np_here;
-  if(par->output_lpt) {
+  if (par->output_lpt) {
     np_here=par->nz_here*((long)(par->n_grid*par->n_grid));
     print_info(" - Writing LPT positions\n");
-    write_lpt(par,np_here,digrad[3],digrad[4],digrad[5]);
+    if (par->lpt_vzty) {
+       write_lpt_wvel(par,np_here,digrad[3],digrad[4],digrad[5], digrad[6], digrad[7], digrad[8]);
+    } else {
+       write_lpt(par,np_here,digrad[3],digrad[4],digrad[5]);
+    }
   }
 
 #ifdef _HAVE_MPI
   print_info(" - Sharing particle positions\n");
-  share_particles(par,(unsigned long long)(2*dsize_buff),
+  if (par->lpt_vzty) {
+     share_particles_2lpt(par,(unsigned long long)(2*dsize_buff),
+                  (unsigned long long)(par->nz_here*((long)(par->n_grid*par->n_grid))),
+                  digrad[3],digrad[4],digrad[5],digrad[6],digrad[7],digrad[8],&np_here);
+  } else {
+     share_particles(par,(unsigned long long)(2*dsize_buff),
 		  (unsigned long long)(par->nz_here*((long)(par->n_grid*par->n_grid))),
 		  digrad[3],digrad[4],digrad[5],&np_here);
+  }
 #else //_HAVE_MPI
   np_here=par->nz_here*((long)(par->n_grid*par->n_grid));
 #endif //_HAVE_MPI
 
   print_info(" - Interpolating positions into density field\n");
-  pos_2_dens(par,np_here,digrad[3],digrad[4],digrad[5],par->grid_dens);
-
+  if (par->lpt_vzty) {
+     pos_2_dens_2lpt(par,np_here,digrad[3],digrad[4],digrad[5],par->grid_dens,digrad[6], digrad[7], digrad[8], par->grid_velx, par->grid_vely, par->grid_velz);
+  } else {
+     pos_2_dens(par,np_here,digrad[3],digrad[4],digrad[5],par->grid_dens);
+  }
 #ifdef _SPREC
   for(axis=0;axis<3;axis++)
     fftwf_free(cdigrad[3+axis]);
diff --git a/src/fourier.c b/src/fourier.c
index aea1cef..5f24cb0 100644
--- a/src/fourier.c
+++ b/src/fourier.c
@@ -222,6 +222,7 @@ void allocate_fftw(ParamCoLoRe *par)
     report_error(1,"Ran out of memory\n");
   par->grid_dens=(flouble *)(par->grid_dens_f);
 
+// Intialize the needed memory for the 2LPT velocity arrays
 #ifdef _SPREC
   par->grid_npot_f=fftwf_alloc_complex(dsize+2*par->n_grid*(par->n_grid/2+1));
 #else //_SPREC
@@ -231,6 +232,34 @@ void allocate_fftw(ParamCoLoRe *par)
     report_error(1,"Ran out of memory\n");
   par->grid_npot=(flouble *)(par->grid_npot_f);
 
+#ifdef _SPREC
+  par->grid_velx_f=fftwf_alloc_complex(dsize);
+#else //_SPREC
+  par->grid_velx_f=fftw_alloc_complex(dsize);
+#endif //_SPREC
+  if(par->grid_velx_f==NULL)
+    report_error(1,"Ran out of memory\n");
+  par->grid_velx=(flouble *)(par->grid_velx_f);
+
+#ifdef _SPREC
+  par->grid_vely_f=fftwf_alloc_complex(dsize);
+#else //_SPREC
+  par->grid_vely_f=fftw_alloc_complex(dsize);
+#endif //_SPREC
+  if(par->grid_vely_f==NULL)
+    report_error(1,"Ran out of memory\n");
+  par->grid_vely=(flouble *)(par->grid_vely_f);
+//
+
+#ifdef _SPREC
+  par->grid_velz_f=fftwf_alloc_complex(dsize);
+#else //_SPREC
+  par->grid_velz_f=fftw_alloc_complex(dsize);
+#endif //_SPREC
+  if(par->grid_velz_f==NULL)
+    report_error(1,"Ran out of memory\n");
+  par->grid_velz=(flouble *)(par->grid_velz_f);
+
 #ifdef _HAVE_MPI
   par->slice_left =&(par->grid_npot[2*dsize]);
   par->slice_right=&(par->grid_npot[2*(dsize+par->n_grid*(par->n_grid/2+1))]);
@@ -242,11 +271,23 @@ void end_fftw(ParamCoLoRe *par)
 #ifdef _SPREC
   if(par->grid_dens_f!=NULL)
     fftwf_free(par->grid_dens_f);
+  if(par->grid_velx_f!=NULL)
+    fftwf_free(par->grid_velx_f);
+  if(par->grid_vely_f!=NULL)
+    fftwf_free(par->grid_vely_f);
+  if(par->grid_velz_f!=NULL)
+    fftwf_free(par->grid_velz_f);
   if(par->grid_npot_f!=NULL)
     fftwf_free(par->grid_npot_f);
 #else //_SPREC
   if(par->grid_dens_f!=NULL)
     fftw_free(par->grid_dens_f);
+  if(par->grid_velx_f!=NULL)
+    fftw_free(par->grid_velx_f);
+  if(par->grid_vely_f!=NULL)
+    fftw_free(par->grid_vely_f);
+  if(par->grid_velz_f!=NULL)
+    fftw_free(par->grid_velz_f);
   if(par->grid_npot_f!=NULL)
     fftw_free(par->grid_npot_f);
 #endif //_SPREC
diff --git a/src/io.c b/src/io.c
index 9ed09ca..addf04d 100644
--- a/src/io.c
+++ b/src/io.c
@@ -64,6 +64,7 @@ static ParamCoLoRe *param_colore_new(void)
   par->smooth_potential=0;
   par->dens_type=DENS_TYPE_LGNR;
   par->lpt_interp_type=INTERP_CIC;
+  par->lpt_vzty=0;
   par->lpt_buffer_fraction=0.2;
   par->output_lpt=0;
   par->seed_rng=1234;
@@ -80,6 +81,14 @@ static ParamCoLoRe *param_colore_new(void)
   // Density grids
   par->grid_dens_f=NULL;
   par->grid_dens=NULL;
+  // 2LPT velocity grids
+  par->grid_velx_f=NULL;
+  par->grid_vely_f=NULL;
+  par->grid_velz_f=NULL;
+  par->grid_velx=NULL;
+  par->grid_vely=NULL;
+  par->grid_velz=NULL;
+  //Pot. grids
   par->grid_npot_f=NULL;
   par->grid_npot=NULL;
   par->sigma2_gauss=0;
@@ -295,6 +304,7 @@ ParamCoLoRe *read_run_params(char *fname,int test_memory)
   conf_read_int(conf,"field_par","dens_type",&(par->dens_type));
   conf_read_double(conf,"field_par","lpt_buffer_fraction",&(par->lpt_buffer_fraction));
   conf_read_int(conf,"field_par","lpt_interp_type",&(par->lpt_interp_type));
+  conf_read_int(conf,"field_par","lpt_vzty",&(par->lpt_vzty));
   conf_read_int(conf,"field_par","output_lpt",&(par->output_lpt));
 
   par->seed_rng=i_dum;
@@ -710,6 +720,87 @@ void write_lpt(ParamCoLoRe *par,unsigned long long npart,flouble *x,flouble *y,f
   fclose(fo);
 }
 
+// TO DO: Merge the above and this
+void write_lpt_wvel(ParamCoLoRe *par,unsigned long long npart,flouble *x,flouble *y,flouble *z, flouble *vx, flouble *vy, flouble *vz)
+{
+  GadgetHeader header;
+  FILE *fo;
+  char fname[256];
+  unsigned long long ipart,np_total;
+  unsigned long long np_total_expected=par->n_grid*((long)(par->n_grid*par->n_grid));
+
+  sprintf(fname,"%s_lpt_out.%d",par->prefixOut,NodeThis);
+  fo=fopen(fname,"w");
+  if(fo==NULL) error_open_file(fname);
+
+#ifdef _HAVE_MPI
+  unsigned long long np_send=npart;
+  MPI_Reduce(&np_send,&np_total,1,MPI_UNSIGNED_LONG_LONG,MPI_SUM,0,MPI_COMM_WORLD);
+  MPI_Bcast(&np_total,1,MPI_UNSIGNED_LONG_LONG,0,MPI_COMM_WORLD);
+#else //_HAVE_MPI
+  np_total=npart;
+#endif //_HAVE_MPI
+
+  if(np_total!=np_total_expected)
+    report_error(1,"Only %llu particles found, but there should be %ull\n",np_total,np_total_expected);
+
+  double m=27.7455*par->OmegaM*pow(par->l_box,3.)/np_total;
+  memset(&header,0,sizeof(GadgetHeader));
+
+  header.np[1]=npart;
+  header.mass[1]=m;
+  header.time=1.;
+  header.redshift=0.;
+  header.np_total[1]=(unsigned int)np_total;
+  header.np_total_highword[1]=(unsigned int)(np_total >> 32);
+  header.num_files=NNodes;
+  header.boxsize=par->l_box;
+  header.omega0=par->OmegaM;
+  header.omega_lambda=par->OmegaL;
+  header.hubble_param=par->hhub;
+  header.flag_gadgetformat=1;
+
+  int blklen=sizeof(GadgetHeader);
+  my_fwrite(&blklen,sizeof(blklen),1,fo);
+  my_fwrite(&header,sizeof(GadgetHeader),1,fo);
+  my_fwrite(&blklen,sizeof(blklen),1,fo);
+
+  float x0[3];
+  // position
+  blklen=npart*sizeof(float)*3;
+  my_fwrite(&blklen,sizeof(blklen),1,fo);
+  for(ipart=0;ipart<npart;ipart++) {
+    x0[0]=x[ipart];
+    x0[1]=y[ipart];
+    x0[2]=z[ipart];
+    my_fwrite(x0,sizeof(float),3,fo);
+  }
+  my_fwrite(&blklen,sizeof(blklen),1,fo);
+
+  // velocity
+  blklen=npart*sizeof(float)*3;
+  my_fwrite(&blklen,sizeof(blklen),1,fo);
+  for(ipart=0;ipart<npart;ipart++) {
+    x0[0] = vx[ipart];
+    x0[1] = vy[ipart];
+    x0[2] = vz[ipart]; 
+    my_fwrite(x0,sizeof(float),3,fo);
+  }
+  my_fwrite(&blklen,sizeof(blklen),1,fo);
+
+  // id
+  blklen=npart*sizeof(unsigned long long);
+  my_fwrite(&blklen,sizeof(blklen),1,fo);
+  long long id0=(long long)(par->iz0_here*((long)(par->n_grid*par->n_grid)));
+  for(ipart=0;ipart<npart;ipart++) {
+    unsigned long long id_out=id0+ipart;
+    my_fwrite(&id_out,sizeof(unsigned long long),1,fo);
+  }
+  my_fwrite(&blklen,sizeof(blklen),1,fo);
+
+  fclose(fo);
+}
+
 void write_imap(ParamCoLoRe *par)
 {
   int ipop;
diff --git a/src/srcs.c b/src/srcs.c
index 84c7360..600048f 100644
--- a/src/srcs.c
+++ b/src/srcs.c
@@ -251,7 +251,12 @@ static void srcs_set_cartesian_single(ParamCoLoRe *par,int ipop)
 	    int ip;
 	    double rr=sqrt(x0*x0+y0*y0+z0*z0);
 	    double rvel=factor_vel*get_rvel(par,ix,iy,iz,x0,y0,z0,rr);
-	    double dz_rsd=rvel*get_bg(par,rr,BG_V1,0);
+	    double dz_rsd;
+	    if (par->lpt_vzty) {
+	       dz_rsd=par->grid_velx[index]; // This is simply for consistency with the Snapshot Version. Note that the true dz_rsd will be computed in the skewers!
+	    } else {
+	       dz_rsd=rvel*get_bg(par,rr,BG_V1,0);
+            }
 	    for(ip=0;ip<npp;ip++) {
 	      int ax;
 	      long pix_id_ring,pix_id_nest;
@@ -435,7 +440,7 @@ static void srcs_beams_preproc_single(ParamCoLoRe *par,int ipop)
 #pragma omp for
 #endif //_HAVE_OMP
     for(ii=0;ii<par->cats[ipop]->nsrc;ii++) {
-      par->cats[ipop]->srcs[ii].dz_rsd=0;
+      par->cats[ipop]->srcs[ii].dz_rsd=0; //Here is were really dz_rsd is initialize since get_beam_properties calls this first and cleans!
       par->cats[ipop]->srcs[ii].e1=0;
       par->cats[ipop]->srcs[ii].e2=0;
     }//end omp for
@@ -499,7 +504,11 @@ static void srcs_get_beam_properties_single(ParamCoLoRe *par,int ipop)
       //Compute RSD
       added=interpolate_from_grid(par,xn,NULL,v,NULL,NULL,NULL,RETURN_VEL,INTERP_TYPE_SKW);
       if(added) {
-	vr=0.5*idx*(v[0]*u[0]+v[1]*u[1]+v[2]*u[2]);
+	if (par->lpt_vzty) { //This is directly velocity
+           vr=(v[0]*u[0]+v[1]*u[1]+v[2]*u[2]);
+	} else {
+	   vr=0.5*idx*(v[0]*u[0]+v[1]*u[1]+v[2]*u[2]);
+        }
 	cat->srcs[ip].dz_rsd+=vr;
       }
 
@@ -518,7 +527,11 @@ static void srcs_get_beam_properties_single(ParamCoLoRe *par,int ipop)
 	    added=interpolate_from_grid(par,xn,&dens,v,NULL,NULL,&gauss,RETURN_DENS | RETURN_VEL,INTERP_TYPE_SKW);
 	  }
       	  if(added) {
-      	    vr=0.5*idx*(v[0]*u[0]+v[1]*u[1]+v[2]*u[2]);
+	    if (par->lpt_vzty) { //Same as above
+	      vr=(v[0]*u[0]+v[1]*u[1]+v[2]*u[2]);
+	    } else {
+      	      vr=0.5*idx*(v[0]*u[0]+v[1]*u[1]+v[2]*u[2]);
+            }
             if(cat->skw_gauss)
               cat->g_skw[offp+i_r]+=gauss;
             else
@@ -642,7 +655,10 @@ static void srcs_beams_postproc_single(ParamCoLoRe *par,int ipop)
 #endif //_HAVE_OMP
   {
     int ii;
-    double factor_vel=-par->fgrowth_0/(1.5*par->hubble_0*par->OmegaM);
+    double factor_vel;
+    if (!par->lpt_vzty){ //2LPT velocities are already normalized!
+       factor_vel=-par->fgrowth_0/(1.5*par->hubble_0*par->OmegaM);
+    }
 #ifdef _USE_FAST_LENSING
     int ir_s=0;
     HealpixShellsAdaptive *smap=NULL;
@@ -658,9 +674,10 @@ static void srcs_beams_postproc_single(ParamCoLoRe *par,int ipop)
       double r=r_of_z(par,z);
       double vg=get_bg(par,r,BG_V1,0);
 
-      //RSDs
-      cat->srcs[ii].dz_rsd*=vg*factor_vel;
-
+      //RSDs //2LPT velocities are already normalized!
+      if (!par->lpt_vzty){
+         cat->srcs[ii].dz_rsd*=vg*factor_vel;
+      }
       //Lensing
       if(cat->has_lensing) {
 #ifdef _USE_FAST_LENSING
@@ -724,12 +741,15 @@ static void srcs_beams_postproc_single(ParamCoLoRe *par,int ipop)
       }
 
       //Skewers
-      if(cat->has_skw) {
-	int i_r,i_r_max=MAX((int)(r*cat->idr+0.5),cat->nr-1);
-	long offp=ii*cat->nr;
-	for(i_r=0;i_r<=i_r_max;i_r++) {
-	  vg=get_bg(par,(i_r+0.5)*cat->dr,BG_V1,0);
-	  cat->v_skw[offp+i_r]*=vg*factor_vel;
+      //2LPT Velocities are already normalized!
+      if (!par->lpt_vzty){
+         if(cat->has_skw) {
+	   int i_r,i_r_max=MAX((int)(r*cat->idr+0.5),cat->nr-1);
+	   long offp=ii*cat->nr;
+	   for(i_r=0;i_r<=i_r_max;i_r++) {
+	     vg=get_bg(par,(i_r+0.5)*cat->dr,BG_V1,0);
+	     cat->v_skw[offp+i_r]*=vg*factor_vel;
+	 }
 	}
       }
     }//end omp for