I compiled VASP 5.2.12 on a BlueGene/P (see the makefile below). I did not use scalapack because I read about problems with their use.
Now, i was doing some test calculations to test the performance of the machine and I found out that the same job running in Virtual Node mode (using all 4 cores of the processors) takes 4 times wall clock time than the same job running in the Symmetrical Multi-Processing mode (only one task per node). Is it something expected? Is it possible to improve the performace of the VN mode somehow (using different libraries..)? Does anyone has experience in setting the NPAR parameter for those kind of machines?
Maybe the answers to those questions are trivial, but I'm a newbie..
Thanks a lot for any help!!

Here is the makefile:

.SUFFIXES: .inc .f .F

# fortran compiler and linker
FC=mpixlf90_r -g -qfree=f90

# C-preprocessor define any of the flags given below
# MPI generate parallel version
# NGZhalf charge density reduced in Z direction
# wNGZhalf gamma point only reduced in Z direction
# CACHE_SIZE 5001 for SP3 and Power 3
# 32768 for 550,590,3CT
# 8001 595/397 quad word systems
# scaLAPACK use scaLAPACK
# Add -DwNGZhalf for gamma point only
CPP = /usr/bin/cpp -P -C -DHOST=\"BlueGene\" -DMPI -DNGZhalf \
-Duse_collective -Davoidalloc \
-DCACHE_SIZE=4000 -DMPI_BLOCK=50000 $*.F >$*.f
# general fortran flag
FFLAGS = -qarch=450d -qtune=450 -qmaxmem=-1 -qsource

# optimization

OFLAG = -O3 -qstrict
OBJ_HIGH = none
OBJ_NOOPT = none
DEBUG = -g

# options for linking

LIBLOC = /usr/local/lib

ESSL = -L/bgsys/ibm_essl/sles10/prod/opt/ibmmath/essl/4.4/lib -lesslbg

LIB = -L../../VASP5.2/vasp.5.lib -ldmy -L$(LIBLOC) -llapack_bgp $(ESSL)

# specify 3d-fft to be used with VASP
FFT3D = fftmpi.o fftmpi_map.o fft3dfurth.o fft3dlib.o

# general rules and compile lines
BASIC= symmetry.o symlib.o lattlib.o random.o

SOURCE= base.o mpi.o smart_allocate.o xml.o \
constant.o jacobi.o main_mpi.o scala.o \
asa.o lattice.o poscar.o ini.o mgrid.o xclib.o vdw_nl.o xclib_grad.o \
radial.o pseudo.o gridq.o ebs.o \
mkpoints.o wave.o wave_mpi.o wave_high.o \
$(BASIC) nonl.o nonlr.o nonl_high.o dfast.o choleski2.o \
mix.o hamil.o xcgrad.o xcspin.o potex1.o potex2.o \
constrmag.o cl_shift.o relativistic.o LDApU.o \
paw_base.o metagga.o egrad.o pawsym.o pawfock.o pawlhf.o rhfatm.o paw.o \
mkpoints_full.o charge.o Lebedev-Laikov.o stockholder.o dipol.o pot.o \
dos.o elf.o tet.o tetweight.o hamil_rot.o \
steep.o chain.o dyna.o sphpro.o us.o core_rel.o \
aedens.o wavpre.o wavpre_noio.o broyden.o \
dynbr.o rmm-diis.o reader.o writer.o tutor.o xml_writer.o \
brent.o stufak.o fileio.o opergrid.o stepver.o \
chgloc.o fast_aug.o fock.o mkpoints_change.o sym_grad.o \
mymath.o internals.o dynconstr.o dimer_heyden.o dvvtrajectory.o vdwforcefield.o \
hamil_high.o nmr.o pead.o mlwf.o subrot.o subrot_scf.o \
force.o pwlhf.o gw_model.o optreal.o davidson.o david_inner.o \
electron.o rot.o electron_all.o shm.o pardens.o paircorrection.o \
optics.o constr_cell_relax.o stm.o finite_diff.o elpol.o \
hamil_lr.o rmm-diis_lr.o subrot_cluster.o subrot_lr.o \
lr_helper.o hamil_lrf.o elinear_response.o ilinear_response.o \
linear_optics.o linear_response.o \
setlocalpp.o wannier.o electron_OEP.o electron_lhf.o twoelectron4o.o \
ratpol.o screened_2e.o wave_cacher.o chi_base.o wpot.o local_field.o \
ump2.o bse_te.o bse.o acfdt.o chi.o sydmat.o dmft.o \
rmm-diis_mlr.o linear_response_NMR.o


vasp.bgp: $(SOURCE) $(FFT3D) $(INC) main.o
rm -f vasp.bgp
$(FCL) -o vasp.bgp main.o $(SOURCE) $(FFT3D) $(LIB) $(LINK)
makeparam: $(SOURCE) $(FFT3D) makeparam.o main.F $(INC)
$(FCL) -o makeparam $(LINK) makeparam.o $(SOURCE) $(FFT3D) $(LIB)
zgemmtest: zgemmtest.o base.o random.o $(INC)
$(FCL) -o zgemmtest $(LINK) zgemmtest.o random.o base.o $(LIB)
dgemmtest: dgemmtest.o base.o random.o $(INC)
$(FCL) -o dgemmtest $(LINK) dgemmtest.o random.o base.o $(LIB)
ffttest: base.o smart_allocate.o mpi.o mgrid.o random.o ffttest.o $(FFT3D) $(INC)
$(FCL) -o ffttest $(LINK) ffttest.o mpi.o mgrid.o random.o smart_allocate.o base.o $(FFT3D) $(LIB)
kpoints: $(SOURCE) $(FFT3D) makekpoints.o main.F $(INC)
$(FCL) -o kpoints $(LINK) makekpoints.o $(SOURCE) $(FFT3D) $(LIB)

-rm -f *.g *.f *.o *.L *.mod ; touch *.F

main.o: main$(SUFFIX)
$(FC) $(FFLAGS)$(DEBUG) $(INCS) -c main$(SUFFIX)
xcgrad.o: xcgrad$(SUFFIX)
$(FC) $(FFLAGS) $(INLINE) $(INCS) -c xcgrad$(SUFFIX)
xcspin.o: xcspin$(SUFFIX)
$(FC) $(FFLAGS) $(INLINE) $(INCS) -c xcspin$(SUFFIX)

makeparam.o: makeparam$(SUFFIX)
$(FC) $(FFLAGS)$(DEBUG) $(INCS) -c makeparam$(SUFFIX)

makeparam$(SUFFIX): makeparam.F main.F
base.o: base.F
mgrid.o: mgrid.F
constant.o: constant.F
lattice.o: lattice.F
setex.o: setex.F
pseudo.o: pseudo.F
poscar.o: poscar.F
mkpoints.o: mkpoints.F
wave.o: wave.F
nonl.o: nonl.F
nonlr.o: nonlr.F

$(FC) $(FFLAGS) $(INCS) -c $*$(SUFFIX)

fft3dlib_f77.o: fft3dlib_f77.F
$(F77) $(FFLAGS_F77) -c $*$(SUFFIX)

$(FC) $(FFLAGS) $(OFLAG) $(INCS) -c $*$(SUFFIX)
$(FC) $(FFLAGS) $(OFLAG) $(INCS) -c $*$(SUFFIX)

# special rules

radial.o: radial.F
$(FC) $(FFLAGS) $(INCS) -O2 -c $*$(SUFFIX)

wave.o: wave.F
$(FC) $(FFLAGS) $(INCS) -O2 -c $*$(SUFFIX)

metagga.o: metagga.F
$(FC) $(FFLAGS) $(INCS) -O2 -c $*$(SUFFIX)

nonl.o: nonl.F
$(FC) $(FFLAGS) $(INCS) -O -c $*$(SUFFIX)

paw.o: paw.F
$(FC) $(FFLAGS) $(INCS) -O1 -c $*$(SUFFIX)

pseudo.o: pseudo.F
$(FC) $(FFLAGS) $(INCS) -O1 -c $*$(SUFFIX)
have you checked the hints in the forum posted some time ago?


