#!/usr/local/bin/bash
#CATEGORY: Visualization and post-processing
#SYNOPSIS: Create an XcrySDen Structure File from a JDFTx output file

function printUsageAndExit()
{
	echo '
	Create an XcrySDen Structure File from a JDFTx output file. Usage:
	
		createXSF <jdftxOutFile> <xsfFile> [Animated | Frames | <var1> <var2> ...]
	
	The simplest usage saves the final structure from the JDFTx output
	<jdftxOutFile> to <xsfFile>. Specifying "Animated" will include
	ionic positions and lattice vectors for all configurations in
	the run (if geometry or lattice optimization was performed).
	
	Specifying Frames will output one XSF file for each configuration.
	In this case, <xsfFile> must contain "%d", which will be substituted
	by the frame number.  For example, if <xsfFile> is test-%d.xsf,
	this will produce output files test-1.xsf, test-2.xsf etc.
	
	Additionally, scalar fields such as the electron density may
	also be saved, where var should name the file suffix used by JDFTx
	for that type eg. n for density, nUp for up-spin density, d_tot
	for total electrostatic potential etc. The full filenames may
	also be specified; this is a must when dump-name includes $STAMP,
	or $ITER, or if the dump was from a <freq>-dependent dump-name.
	Note that XSF is an ascii format (unlike the default binary dump)
	and adding many scalar field variables to it could make it enormous.
	Scalar fields are only supported without animation or frames.
	
	Note: this tool requires awk and octave to be available in path.
	'
	exit $1
}

if [[ "$1" == "-h" || "$1" == "--help" ]]; then printUsageAndExit 0; fi
if [ -z "$2" ]; then printUsageAndExit 1; fi

jdftxOutFile="$1"
xsfFile="$2"

command -v awk >/dev/null 2>&1 || { echo "createXSF requires awk, which was not found in path.  Aborting." >&2; exit 1; }
command -v octave >/dev/null 2>&1 || { echo "createXSF requires octave, which was not found in path.  Aborting." >&2; exit 1; }

if [ ! -f "$jdftxOutFile" ]; then
	echo "Could not open '$jdftxOutFile' for reading"
	exit 1
fi

varNameLcase=$(echo $3 | awk '{print tolower($0)}')
if [ "$varNameLcase" == "frames" ]; then
	xsfA=$(printf "$xsfFile" 1)
	xsfB=$(printf "$xsfFile" 2)
	if [ "$xsfA" == "$xsfB" ]; then
		echo "In Frames mode, <xsfFile> must contain a '%d' (which will be replaced by frame numbers)."
		exit 1
	fi
fi

tmpDir="createXSF.tmp"
mkdir -p "$tmpDir"
if [ ! -d "$tmpDir" ]; then
	echo "Could not create temporary working directory '$tmpDir'"
	exit 1
fi

#Extract relevant information from the output file (and save in separate files in tmpDir)
awk "
	#Header mode
	BEGIN { mode=1; nAtoms=0; nSteps=0; Sprinted=0; }
	mode==1 && \$1==\"Executable\" { fp = \"$tmpDir/inName\"; inPrinted=0;
		for(i=5; i<=NF; i++) if(\$i==\"-i\" || \$i==\"--input-filename\") {	print \$(i+1) > fp; inPrinted=1; }
		if(!inPrinted) print \"stdin\" > fp; close(fp); }
	mode==1 && \$1==\"coords-type\" { posCoords = (\$2==\"lattice\" || \$2==\"Lattice\" ? 0 : 1); }
	mode==1 && \$1==\"dump-name\" { fp=\"$tmpDir/dumpName\"; print \$2  > fp; close(fp) }
	mode==1 && ( \$1==\"ion\" || \$1==\"ion-vel\" ) {
		fpIonNames=\"$tmpDir/ionNames\"; print \$2 > fpIonNames;
		fpIonPos=\"$tmpDir/ionpos.0\"; print \$3, \$4, \$5 > fpIonPos;
		nAtoms++; }
	#Initial lattice vectors
	mode==1 && \$0~/Initializing the Grid/ { mode=2; close(fpIonNames); close(fpIonPos);
		refLine=NR; Sprinted=0; Ssuffix=\"grid\"; fp=\"$tmpDir/lattice.0\" }
	mode==2 && NR>refLine+1 && NR<=refLine+4 { print \$2, \$3, \$4 > fp; }
	mode==2 && NR==refLine+4 { close(fp) }
	mode==2 && !Sprinted && \$0~/Chosen fftbox/ { fp=(\"$tmpDir/S\" Ssuffix); print \$(NF-3), \$(NF-2), \$(NF-1) > fp; close(fp); Sprinted=1 }
	mode==2 && \$0~/Initializing tighter grid/ { Sprinted=0; Ssuffix=\"wfns\" }
	mode==2 && \$0~/Initializing supercell grid/ { Sprinted=0; Ssuffix=\"mlwf\" }
	mode==2 && \$0~/Setting up double-sized grid/ { Sprinted=0; iCenterPrinted=0; Ssuffix=\"double\" }
	mode==2 && Ssuffix==\"double\" && iCenterPrinted==0 && \$1==\"Grid:\" { fp=\"$tmpDir/iCenter\"; print \$(NF-3), \$(NF-2), \$(NF-1) > fp; close(fp); iCenterPrinted=1 }
	#Ionic positions
	/# Ionic positions in / { mode=3; refLine=NR; nSteps++; fp=(\"$tmpDir/ionpos.\" nSteps) }
	mode==3 && NR>refLine && NR<=refLine+nAtoms { print \$3, \$4, \$5 > fp }
	mode==3 && NR==refLine+nAtoms { close(fp) }
	#Lattice vectors
	mode==3 && \$0~/R =/ { mode=4; refLine=NR; fp=(\"$tmpDir/lattice.\" nSteps) }
	mode==4 && NR>refLine && NR<=refLine+3 { print \$2, \$3, \$4 > fp }
	mode==4 && NR==refLine+3 { close(fp) }
	#Supercell combinations:
	/Lattice vector linear combinations in supercell/ { mode=5; refLine=NR; fp=\"$tmpDir/super\" }
	mode==5 && NR>refLine && NR<=refLine+3 { print \$2, \$3, \$4 > fp; }
	mode==5 && NR==refLine+3 { refLine=0; mode=2; close(fp) }
	END { print nSteps, nAtoms, posCoords > \"$tmpDir/dimensions\"; }
" $jdftxOutFile

#Initialize array of variables
argArr=( "${@:3}" );
vars="";
for arg in "${argArr[@]}"; do
	vars="$vars \"$arg\""
done

#Put together into the target XSF file
cat > "$tmpDir/process.m" <<EOF
	#Read dimensions
	dims = load("-ascii", "$tmpDir/dimensions");
	nSteps = dims(1);
	nAtoms = dims(2);
	posCoords = dims(3);
	fp = fopen("$tmpDir/dumpName", "r");
	dumpName = fscanf(fp, "%s");
	fclose(fp);
	fp = fopen("$tmpDir/inName", "r");
	inName = fscanf(fp, "%s");
	lastDot = find(inName=="."); if lastDot; inName = inName(1:lastDot(end)-1); endif; #Remove extension
	lastSlash = find(inName=="/"); if lastSlash; inName = inName(lastSlash(end)+1:end); endif; #Remove path
	fclose(fp);
	
	#Grid sizes:
	Sgrid = load("-ascii", "$tmpDir/Sgrid");                                                                          #charge-density grid
	if exist("$tmpDir/Swfns", "file"); Swfns = load("-ascii", "$tmpDir/Swfns"); else; Swfns = Sgrid; endif            #wave-function grid
	if exist("$tmpDir/Smlwf", "file"); Smlwf = load("-ascii", "$tmpDir/Smlwf"); else; Smlwf = Swfns; endif            #MLWF supercell grid
	if exist("$tmpDir/Sdouble", "file"); Sdouble = load("-ascii", "$tmpDir/Sdouble"); else; Sdouble = Sgrid; endif    #coulomb-truncation double grid
	if exist("$tmpDir/iCenter", "file"); iCenter = load("-ascii", "$tmpDir/iCenter"); else; iCenter = [0 0 0]; endif  #offset for double
	
	#Initialize supercell:
	if norm(Smlwf-Swfns)
		super = load("-ascii", "$tmpDir/super");
		#Find unique set of unit cells inside supercell:
		detSuper = abs(det(super));
		adjSuper = inv(super)*detSuper;
		iMax = sum(sum(abs(super)));
		iRange = [-iMax:iMax];
		[i1grid,i2grid,i3grid] = meshgrid(iRange,iRange,iRange);
		iArr = [reshape(i1grid,[],1) reshape(i2grid,[],1) reshape(i3grid,[],1)];
		xSuper = int64(iArr*adjSuper');
		selSuper = find((min(xSuper,[],2)>=0) & (max(xSuper,[],2)<int64(detSuper)));
		offsets = iArr(selSuper,:);
	else
		#Trivial supercell
		super = eye(3);
		offsets = zeros(1,3);
	endif
	
	#Read and process atom names
	fp = fopen("$tmpDir/ionNames", "r");
	ionNames = textscan(fp, "%s"){1};
	fclose(fp);
	#--- Associative array for mapping symbols to atomic numbers:
	Z.H = 1; Z.He = 2; Z.Li = 3; Z.Be = 4; Z.B = 5; Z.C = 6; Z.N = 7; Z.O = 8; Z.F = 9; Z.Ne = 10; Z.Na = 11; Z.Mg = 12; Z.Al = 13; Z.Si = 14; Z.P = 15; Z.S = 16;
	Z.Cl = 17; Z.Ar = 18; Z.K = 19; Z.Ca = 20; Z.Sc = 21; Z.Ti = 22; Z.V = 23; Z.Cr = 24; Z.Mn = 25; Z.Fe = 26; Z.Co = 27; Z.Ni = 28; Z.Cu = 29; Z.Zn = 30; Z.Ga = 31; Z.Ge = 32;
	Z.As = 33; Z.Se = 34; Z.Br = 35; Z.Kr = 36; Z.Rb = 37; Z.Sr = 38; Z.Y = 39; Z.Zr = 40; Z.Nb = 41; Z.Mo = 42; Z.Tc = 43; Z.Ru = 44; Z.Rh = 45; Z.Pd = 46; Z.Ag = 47; Z.Cd = 48;
	Z.In = 49; Z.Sn = 50; Z.Sb = 51; Z.Te = 52; Z.I = 53; Z.Xe = 54; Z.Cs = 55; Z.Ba = 56; Z.La = 57; Z.Ce = 58; Z.Pr = 59; Z.Nd = 60; Z.Pm = 61; Z.Sm = 62; Z.Eu = 63; Z.Gd = 64;
	Z.Tb = 65; Z.Dy = 66; Z.Ho = 67; Z.Er = 68; Z.Tm = 69; Z.Yb = 70; Z.Lu = 71; Z.Hf = 72; Z.Ta = 73; Z.W = 74; Z.Re = 75; Z.Os = 76; Z.Ir = 77; Z.Pt = 78; Z.Au = 79; Z.Hg = 80;
	Z.Tl = 81; Z.Pb = 82; Z.Bi = 83; Z.Po = 84; Z.At = 85; Z.Rn = 86; Z.Fr = 87; Z.Ra = 88; Z.Ac = 89; Z.Th = 90; Z.Pa = 91; Z.U = 92; Z.Np = 93; Z.Pu = 94; Z.Am = 95; Z.Cm = 96;
	#--- Map atom names to atomic symbols
	for iAtom = 1:nAtoms
		key = tolower(ionNames{iAtom});
		key = [ toupper(key(1)) key(2:end) ];
		if !isfield(Z, key)
			Z = setfield(Z, key, input(sprintf("Enter atomic number for unknown atom type '%s': ", ionNames{iAtom})));
		endif
		atnum(iAtom) = getfield(Z, key);
	endfor
	
	#Initial ionpos and lattice:
	lattice0 = load("-ascii", "$tmpDir/lattice.0");
	ionpos0 = load("-ascii", "$tmpDir/ionpos.0");
	
	#Subsequent ionpos and lattice if any:
	if nSteps > 0
		lattice = repmat(reshape(lattice0, [3,3,1]), [1,1,nSteps]);
		ionpos = zeros(nAtoms,3,nSteps);
		iLatDone=0;
		for iStep = 1:nSteps
			ionpos(:,:,iStep) = load("-ascii", sprintf("$tmpDir/ionpos.%d",iStep));
			latFile = sprintf("$tmpDir/lattice.%d",iStep);
			if exist(latFile, "file")
				lattice(:,:,iLatDone+1:iStep) = repmat(reshape(load("-ascii", latFile),[3,3,1]), [1,1,iStep-iLatDone]);
				iLatDone = iStep;
			endif
		endfor
	else
		lattice = reshape(lattice0, [3,3,1]);
		ionpos = reshape(ionpos0, [nAtoms,3,1]);
		nSteps = 1;
	endif
	
	#Read scalar fields (if any):
	varNames = { $vars };
	vars = {};
	isAnimated = length(varNames) && strcmp(tolower(varNames{1}),"animated");
	isFrames = length(varNames) && strcmp(tolower(varNames{1}),"frames");
	if (isAnimated || isFrames)
		#No scalar fields output when multiple configurtions output
		varNames = {};
	else
	    #Single configuration output, optionally with scalar fields
		nSteps = 1;
		lattice = lattice(:,:,end);
		ionpos = ionpos(:,:,end);
	endif
	for iVar = 1:length(varNames)
		fname = varNames{iVar};
		if index(fname,".")==0 #Assume variable name if no .
			if index(dumpName,"\$STAMP") || index(dumpName,"\$ITER")
				printf("Cannot use variable names when \$STAMP or \$ITER are used in the dump-name; use full file names instead\n");
				exit(1);
			endif
			fname = strrep(dumpName, "\$VAR", fname);
			fname = strrep(fname, "\$INPUT", inName);
		endif
		fp = fopen(fname, "r");
		v = fread(fp, "double");
		fclose(fp);
		#Handle endianness, if necessary:
		[comp,maxSize,endian] = computer();
		if strcmp(endian,"B")
			v = swapbytes(v); #files are always little-endian, so swap bytes in memory to make big-endian
		endif
		switch length(v)
			case prod(Sgrid) #Real scalar field on charge-density grid
				Scur = Sgrid;
			case prod(Swfns) #Real scalar field on wavefunction grid
				Scur = Swfns;
			case prod(Smlwf) #Real scalar field on Wannier supercell grid
				Scur = Smlwf;
			case prod(Sdouble) #Real scalar field on coulomb-truncation double grid
				Scur = Sdouble;
			case 2*prod(Swfns) #Complex scalar field on wavefunction grid
				Scur = Swfns;
				v = reshape(v, 2, []);
				v = v(1,:) + I*v(2,:);
				#Remove phase from complex scalar field
				vSq = reshape(v.^2, [],1);
				vSqAbs = max(abs(vSq), 1e-300);
				weightSum = sum(abs(vSq));
				rMean = sum(real(vSq))/weightSum; rSigma = sqrt(max(0, sum((real(vSq).^2) ./ vSqAbs)/weightSum - rMean^2));
				iMean = sum(imag(vSq))/weightSum; iSigma = sqrt(max(0, sum((imag(vSq).^2) ./ vSqAbs)/weightSum - iMean^2));
				meanPhase = 0.5*atan2(iMean, rMean);
				sigmaPhase = 0.5*sqrt((iMean*rSigma)^2 + (rMean*iSigma)^2)/(rMean^2 + iMean^2);
				v = real(v*exp(-I*meanPhase));
				if(sum(v)<0); v=-v; meanPhase+=pi; endif #Make sure dominant sign is positive
				printf("Converting complex scalar field '%s' to real scalar field; phase: %f +/- %f\n", varNames{iVar}, meanPhase, sigmaPhase);
			otherwise
				printf("File '%s' contains neither a real nor complex field of dimensions [ %d %d %d ]", fname, Sgrid);
				if norm(Sgrid - Swfns); printf(" or [ %d %d %d ]", Swfns); endif
				if norm(Swfns - Smlwf); printf(" or [ %d %d %d ]", Smlwf); endif
				if norm(Sgrid - Sdouble); printf(" or [ %d %d %d ]", Sdouble); endif
				printf("\n");
				exit(1);
		endswitch
		v = permute(reshape(v, fliplr(Scur)), [3 2 1]); #Note C-to-Fortran order change
		if norm(Sgrid-Sdouble) && !norm(Scur-Sdouble)
			v = circshift(v, mod(-iCenter,Sgrid));
		endif
		#Convert periodic to general grid:
		v = cat(1, v, v(1,:,:));
		v = cat(2, v, v(:,1,:));
		v = cat(3, v, v(:,:,1));
		vars{iVar} = v;
	endfor
	
	#Output XSF file:
	Angstrom = 1/0.5291772;
	if (!isFrames)
		#Open a single file and write header
		fp = fopen("$xsfFile", "w");
		if (nSteps>1)
			fprintf(fp, "ANIMSTEPS %d\n", nSteps);
		endif
		fprintf(fp, "CRYSTAL\n");
	endif
	for iStep = 1:nSteps
		if (isFrames)
			#Open a file for each step and write header:
			fp = fopen(sprintf("$xsfFile",iStep), "w");
			fprintf(fp, "CRYSTAL\n");
		endif
		R = squeeze(lattice(:,:,iStep));
		#Print lattice vectors:
		if (nSteps>1 && isAnimated)
			fprintf(fp, "PRIMVEC %d\n", iStep);
		else
			fprintf(fp, "PRIMVEC\n");
		endif
		fprintf(fp, "%10.6f %10.6f %10.6f\n", R*super/Angstrom); #note printf order takes care of transposing lattice vectors
		#Transform ionic positions:
		pos = squeeze(ionpos(:,:,iStep));
		if posCoords==0
			pos = pos * R'; #convert Lattice to Cartesian
		endif
		pos /= Angstrom;
		formatString = "%2d %10.6f %10.6f %10.6f\n";
		#Print positions
		if (nSteps>1 && isAnimated)
			fprintf(fp, "PRIMCOORD %d\n", iStep);
		else
			fprintf(fp, "PRIMCOORD\n");
		endif
		nOffsets = size(offsets,1);
		fprintf(fp, "%d 1\n", nAtoms*nOffsets);
		for iOffset = 1:nOffsets
			offset = [ offsets(iOffset,:) * R'/Angstrom, zeros(1,size(pos,2)-3) ];
			fprintf(fp, formatString, [ reshape(atnum,nAtoms,1) pos+repmat(offset,nAtoms,1) ]');
		endfor
		if (isFrames)
			fclose(fp);
		endif
	endfor
	#Output scalar fields (if any)
	for iVar = 1:length(varNames)
		fprintf(fp, "BEGIN_BLOCK_DATAGRID_3D\n");
		fprintf(fp, " %s\n", varNames{iVar})
		formatString=" ";
		Scur = size(vars{iVar});
		for k=1:Scur(1)
			formatString = [ formatString " %e" ];
		endfor
		formatString = [ formatString "\n" ];
		Rcur = squeeze(lattice(:,:,end));
		if norm(Sgrid-Sdouble) && !norm(Scur-1-Sdouble)
			Rcur = Rcur * diag(Sdouble ./ Sgrid);
		endif
		if norm(Swfns-Smlwf) && !norm(Scur-1-Smlwf)
			Rcur = Rcur * super;
		endif
		fprintf(fp, " BEGIN_DATAGRID_3D_%s\n", varNames{iVar})
		fprintf(fp, "  %d %d %d\n", Scur);
		fprintf(fp, "  %10.6f %10.6f %10.6f\n", [zeros(1,3); Rcur'/Angstrom]');
		fprintf(fp, formatString, vars{iVar});
		fprintf(fp, " END_DATAGRID_3D\n");
		fprintf(fp, "END_BLOCK_DATAGRID_3D\n");
	endfor
	if (!isFrames)
		fclose(fp);
	endif
EOF
octave -qf "$tmpDir/process.m"
rm -rf "$tmpDir"
