# notes

- I can not use dispersion correction, because gromacs takes C6 and i set it to 1 for the tabulated interactions
    - actually I could, by adjusting ε and the table with C from Buckingham fit, but is complicated (to complicated for paper i think)

# setup

## import stuff

In [None]:
# magics
%matplotlib inline

# regulary used
from copy import deepcopy
import collections
import glob
import importlib
import itertools
import json
import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import numpy as np
import operator
import os
import pandas as pd
from pathlib import Path
import pickle
import random
import re
from scipy import constants as const
from scipy import integrate
from scipy import ndimage
from scipy import optimize
from scipy import signal
import string
import subprocess
import sys
import tempfile
import xml.etree.ElementTree as ET

# from own toolbox
import gromacstools as gt
run_bash = gt.general.run_bash
WorkingDir = gt.general.WorkingDir

# own constants
class oconst: pass
oconst.k_gro = const.k * const.N_A * 1e-3
oconst.h_gro = const.h * const.N_A * 1e-3 * 1e12
oconst.f_gro = 138.935458  # electric conversion factor: V = f q²/r
oconst.e_gro = 1.0
oconst.epsilon_0_gro = const.epsilon_0 * const.N_A * 1e3
oconst.rec_cm_per_THz = 1e12 / const.c / 100
oconst.bar_per_md_pressure = 10**28 * const.u

# notebook wide variables
jobids = []

In [None]:
class OrderedSet(collections.abc.Set):
    def __init__(self, iterable=()):
        self.d = collections.OrderedDict.fromkeys(iterable)

    def __len__(self):
        return len(self.d)

    def __contains__(self, element):
        return element in self.d

    def __iter__(self):
        return iter(self.d)

## often used helper functions

In [None]:
def check_job_stati(jobids, remote_host):
    stati = gt.remote.check_slurm_jobs(jobids, remote_host)

    # print status for each job
    completed_jobids = []
    for jobid, status in zip(jobids, stati):
        print(jobid, status)
        if (status in ('FAILED', 'COMPLETED', 'OUT_OF_MEMORY', None)
            or status.startswith('CANCELLED')):
            completed_jobids.append(jobid)

    # remove jobids which are completed
    new_jobids = [jobid for jobid in jobids if jobid not in completed_jobids]
    return new_jobids

In [None]:
def indent(elem, level=0):
    """indent xml"""
    i = "\n" + level*"  "
    if len(elem):
        if not elem.text or not elem.text.strip():
            elem.text = i + "  "
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
        for elem in elem:
            indent(elem, level+1)
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
    else:
        if level and (not elem.tail or not elem.tail.strip()):
            elem.tail = i

In [None]:
def gen_potential_and_force(at1, at2, r, combining_rule, add_coulomb, nonbond_params, repulsive_only=False, coulomb_only=False):
    
    if not coulomb_only:
        if frozenset((at1['type'], at2['type'])) in nonbond_params:
            nb = nonbond_params[frozenset((at1['type'], at2['type']))]
            if nb[0] == 'LJ':
                sig, eps = nb[1:3]
                with np.errstate(divide='ignore', invalid='ignore'):
                    pot = (sig/r)**12
                    if not repulsive_only:
                        pot -= (sig/r)**6
                    pot *= 4 * eps
            else:
                raise Exception('Not implemented')
        elif combining_rule == 'geometric':
            C12 = (at1['C12'] * at2['C12'])**(1/2)
            C6 = (at1['C6'] * at2['C6'])**(1/2)
            with np.errstate(divide='ignore', invalid='ignore'):
                pot = C12 / r**12
                if not repulsive_only:
                    pot -= C6 / r**6
        elif combining_rule == 'lorentz-berthelot':
            sig = 1/2 * (at1['σ'] + at2['σ'])
            eps = (at1['ε'] * at2['ε'])**(1/2)
            with np.errstate(divide='ignore', invalid='ignore'):
                pot = (sig/r)**12
                if not repulsive_only:
                    pot -= (sig/r)**6
                pot *= 4 * eps
        else:
            raise Exception('Not implemented')
    else:
        pot = np.zeros_like(r)
        
    with np.errstate(divide='ignore', invalid='ignore'):
        if add_coulomb or coulomb_only:
            pot += oconst.f_gro * at1['q'] * at2['q'] / r
        force = -1/2 * (np.diff(pot, prepend=0) + np.diff(pot, append=0)) / (r[1] - r[0])
        #f_max = force[np.argmax(np.where(pot > 1e6))]
        f_max = 0  # physical for constant potential
        force[pot > 1e6] = f_max
        pot[pot > 1e6] = 1e6
    pot = np.nan_to_num(pot, nan=1e6)
    force = np.nan_to_num(force, nan=f_max)
    return pot, force

In [None]:
def decombinate_LJ(sigma_ij, epsilon_ij, sigma_ii, epsilon_ii, combining_rule):
    if combining_rule == 'geometric':
        # epsilon_ij = (epsilon_ii * epsilon_jj)^(1/2)
        epsilon_jj = epsilon_ij**2 / epsilon_ii
        sigma_jj = sigma_ij**2 / sigma_ii
    elif combining_rule == 'lorentz-berthelot':
        epsilon_jj = epsilon_ij**2 / epsilon_ii
        # sigma_ij = (sigma_ii + sigma_jj) / 2
        sigma_jj = 2*sigma_ij - sigma_ii
    else:
        raise Exception('Not implemented')
        
    return sigma_jj, epsilon_jj

In [None]:
def save_parametric_force_field_as_top(filename, ff, system_name, moltypes, osm_restraints={}):
    parametric_ff = PARAMETRIC_FORCE_FIELDS[ff['parametric-ff']]
    file_content = ""
    file_content += "[ defaults ]\n"
    file_content += ";nbfunc  comb-rule  gen-pairs  fudgeLJ  fudgeQQ\n"
    nbfunc_dict = {'LJ': '1'}
    comb_rule_dict = {'lorentz-berthelot': '2', 'geometric': '3'}
    nbfunc = nbfunc_dict[parametric_ff['nbfunc']]
    comb_rule = comb_rule_dict[parametric_ff['combining-rule']]
    gen_pairs = parametric_ff['gen-pairs']
    fudgeLJ = parametric_ff['fudgeLJ']
    fudgeQQ = parametric_ff['fudgeQQ']
    file_content += f"{nbfunc}        {comb_rule}          {gen_pairs}         {fudgeLJ}      {fudgeQQ}\n"
    file_content += "\n"
    
    # atomtypes
    mass_dict = {
        'OW': 15.9994,
        'HW': 1.008,
        'MW': 0.0,
        'CA': 40.08,
        'K': 39.0983,
        'LI': 6.941,
        'NA': 22.98977,
        'CL': 35.45300,
    }
    ptype_dict = {
        'OW': 'A',
        'HW': 'A',
        'MW': 'D',
        'CA': 'A',
        'K':  'A',
        'LI': 'A',
        'NA': 'A',
        'CL': 'A',
    }
    file_content += "[ atomtypes ]\n"
    file_content += ";name  mass      charge   ptype  sigma    epsilon\n"
    for at in parametric_ff['atomtypes']:
        file_content += f"{at['type']:5s}  {mass_dict[at['type']]:8.5f}  {at['q']:7.4f}  {ptype_dict[at['type']]:4s}  {at['σ']:8.6f}  {at['ε']:8.6f}\n"
    file_content += "\n"
    
    # nonbond_params
    file_content += "[ nonbond_params ]\n"
    file_content += ";i    j     func  sigma       epsilon\n"
    nb_dict2 = {frozenset(pair): ('LJ', 1, 0.25) for pair in ff.get('tabulated-potentials', [])}
    for nb_set, nb in {**nb_dict2, **parametric_ff['nonbond-params']}.items():
        at1, at2 = sorted(list(nb_set))
        if nb[0] == 'LJ':
            file_content += f"{at1:4s}  {at2:4s}  1     {nb[1]:10.8f}  {nb[2]:10.8f}\n"
        else:
            raise Exception('not implemented')
    file_content += "\n"
    
    # moleculetypes
    mt_names_done = []
    for mt_name, mt in ((mt['name'], mt) for mt in moltypes):
        if mt_name in mt_names_done:
            continue
        if mt_name == 'SOL':
            itp_file = os.path.join(template_dir, 'itp', parametric_ff['water-model'] + '.itp')
            with open(itp_file, 'r') as f:
                file_content += f.read()
        else:
            if len(mt['atoms']) == 1:
                file_content += "[ moleculetype ]\n"
                file_content += ";molname  nrexcl\n"
                file_content += f"{mt_name:8s}  1\n"
                file_content += "\n"
                file_content += "[ atoms ]\n"
                file_content +=  ";id  at_type  res_nr  residu_name  at_name  cg_nr\n"
                file_content += f"1    {mt_name:7s}  1       {mt_name:7s}      {mt_name:7s}  1\n"
                if mt_name in osm_restraints:
                    osm_res = osm_restraints[mt_name]
                    file_content += "\n"
                    file_content += "[ position_restraints ]\n"
                    file_content += ";ai  funct  g  r  k\n"
                    file_content += f"{osm_res['ai']}  {osm_res['funct']}  {osm_res['g']}  {osm_res['r']}  {osm_res['k']}\n"
            else:
                raise Exception('not implemented for non single atom moltypes')
        file_content += "\n"
        mt_names_done.append(mt_name)
    
    file_content += "[ system ]\n"
    file_content += f"{system_name}\n"
    file_content += "\n"
    
    file_content += "[ molecules ]\n"
    for mt in moltypes:
        file_content += f"{mt['name']}  {mt['nmols']}\n"
    
    with open(filename, 'w') as f:
        f.write(file_content)

In [None]:
def show_energy_graphs(properties_list, edr_file="ener.edr"):
    run_bash("gmx energy -f {} -o energy-temp.xvg <<< '{}'".format(edr_file, "\n".join(properties_list)))
    gt.xvg.plot("energy-temp.xvg")
    run_bash("rm energy-temp.xvg")
    
def _exp_decay(x, a, b, c):
    return a * np.exp(-b*x) + c

def check_equi(properties_list, edr_file="ener.edr", safe_factor=3):
    run_bash("gmx energy -f {} -o energy-temp.xvg <<< '{}'".format(edr_file, "\n".join(properties_list)))
    data, _ = gt.xvg.load("energy-temp.xvg")
    run_bash("rm energy-temp.xvg")
    for prop in properties_list:
        x = data['Time (ps)']
        y = data[prop]
        p0 = (y[0]-np.mean(y), 0.1, np.mean(y))
        try:
            popt, pcov = optimize.curve_fit(_exp_decay, x, y, p0=p0)
        except:
            print('..fit unsuccessful, continuing..')
            continue
        lifetime = 1 / popt[1]
        a = popt[0]
        if (safe_factor * lifetime > max(x)) and (abs(a) / np.std(y) > 2):
            print('abs(a) / std(y):', abs(a) / np.std(y),  '(should be small)')
            plt.plot(x, y)
            plt.plot(x, _exp_decay(x, *popt))
            plt.show()
            raise Exception('looks unequilibrated')

In [None]:
def show_energy_graphs(properties_list, edr_file="ener.edr"):
    run_bash("gmx energy -f {} -o energy-temp.xvg <<< '{}'".format(edr_file, "\n".join(properties_list)))
    gt.xvg.plot("energy-temp.xvg")
    run_bash("rm energy-temp.xvg")

In [None]:
def readin_table(filename):
    data = np.loadtxt(filename, dtype=str, comments=['#', '@'])
    x = data[:, 0].astype(float)
    y = data[:, 1].astype(float)
    try:
        y_flag = data[:, 2].astype('S1')
    except:
        y_flag = [''] * len(x)
    return x, y, y_flag

In [None]:
def test_files_same(file1, file2, exclude_lines=('#.*',), compare_numpy=False):
    assert type(exclude_lines) in (tuple, list)
    if compare_numpy:
        return np.allclose(np.loadtxt(file1), np.loadtxt(file2), rtol=1e-2)  # 1% derivation
    else:
        exclude_args = ' '.join((f"-I '{ex}'" for ex in exclude_lines))
        return run_bash(f"if diff -q {exclude_args} {file1} {file2} &>/dev/null; then echo -n True; else echo -n False; fi") == 'True'


def skip_or_overwrite(source, dest, overwrite, compare_numpy=False):
    """skips when dest and source have same content
    or when overwrite is False and content is different
    returns True when we skip"""
    if os.path.isfile(dest):
        if test_files_same(source, dest, exclude_lines=('#.*', '%.*'), compare_numpy=compare_numpy):
            #print(f".. files {source} and {dest} have same content ..")
            print(f".. files have same content ..")
            print(f".. skipping ..")
            return True
        else:
            #print(f".. files {source} and {dest} have different content ..")
            print(f".. files have different content ..")
            if overwrite:
                print(f".. backing up, overwriting ..")
                run_bash(f"cp {dest} {dest}-backup-{''.join(random.choices(string.ascii_letters, k=10))}")
                run_bash(f"cp {source} {dest}")
                return False
            else:
                print(f".. doing nothing ..")
                return True
    else:
        print(f".. file {dest} not present, copying ..")
        run_bash(f"cp {source} {dest}")
        return False

## load iterative integral equations module

In [None]:
sys.path.append("/home/marvin/research/code/votca/csg/share/scripts/inverse")
import iie

## notebook specific constants

In [None]:
working_dir_base = os.path.realpath('.')
template_dir = os.path.join(os.getcwd(), 'template')

## remote computing resource

In [None]:
def gen_remote_stuff(remote_host, remote_partition, ntasks='exclusive', votca=False, **kwargs):
    remote_dir_base = os.path.join("/home/mbernhardt/run/projects/yuki-azade-collab/simulations")

    remote_gromacs_module = {
        #'enzogpu': 'gromacs-2019-gcc-7.3.0-yfcl45y  # gromacs@2019 build_type=RelWithDebInfo +cuda~double~mpi~plumed+rdtscp+shared simd=AVX2_256',
        'enzogpu': 'gromacs-2021.5-gcc-10.2.0-7muu47b  # gromacs@2021.5~blas+cuda~cycle_subcounters~double+hwloc~ipo~lapack~mdrun_only~mpi~nosuffix~opencl+openmp~plumed~relaxed_double_precision+shared~sycl build_type=RelWithDebInfo',
        'mammut-b': 'gromacs-2019.6-gcc-10.2.0-c5rae2n',
        'mammut-c': 'gromacs-2019.3-gcc-8.2.0-2skeqyg  # gromacs@2019.3 build_type=RelWithDebInfo ~cuda~double~mpi~plumed+rdtscp+shared simd=AVX_256',
        'enzo': 'gromacs-2019.5-gcc-8.2.0-qmpnnzb  # gromacs@2019.5 build_type=RelWithDebInfo ~cuda~double~mpi~plumed+rdtscp+shared simd=SSE2',
        'biby': 'gromacs-2019.3-gcc-8.2.0-rf37djz  # gromacs@2019.3 build_type=RelWithDebInfo ~cuda~double~mpi~plumed~rdtscp+shared simd=SSE2',
    }
    sbatch_arguments = [
        '--job-name=ions',
        f'--partition={remote_partition}',
        '--exclusive' if ntasks == 'exclusive' else f"--ntasks={ntasks}",
        '--time=96:00:00',
    ]
    for key, arg in kwargs.items():
        if arg is not None:
            sbatch_arguments.append(f"--{key.replace('_', '-')}={arg}")
    sbatch_arguments_section = '\n'.join((f'#SBATCH {arg}' for arg in sbatch_arguments))
    remote_header = f"""#!/bin/sh
{sbatch_arguments_section}

set -eo pipefail  # -u does not work with VOTCA and/or conda?

MODULEPATH=$MODULEPATH:/shared/spack/share/spack/modules/linux-fedora29-x86_64/
MODULEPATH=$MODULEPATH:/shared/spack/share/spack/modules/linux-fedora33-bulldozer/
MODULEPATH=$MODULEPATH:/shared/spack/share/spack/modules/linux-fedora33-x86_64/
module purge
module load {remote_gromacs_module[remote_partition]}
"""
    if votca:
        remote_header += """
spack load /e4wa6u5  # fftw@3.3.8~mpi~openmp~pfft_patches precision=double,float
spack load /p2rbc3m  # boost
spack load /dthwmyc  # netlib-lapack@3.8.0
source /home/mbernhardt/software/votca/bin/VOTCARC.bash
"""
    remote_header += """
source ~/software/miniconda3/etc/profile.d/conda.sh
conda activate
export PYTHONPATH=$PYTHONPATH:~/software/gromacstools
"""
    remote_footer = ''
    return remote_dir_base, remote_header, remote_footer

remote_host = 'franklin'
def test():
    remote_dir_base, remote_header, remote_footer = gen_remote_stuff(remote_host, 'enzo', ntasks=24, mem_per_cpu='1000M')
    #remote_dir_base, remote_header, remote_footer = gen_remote_stuff(remote_host, 'enzo', votca=False)
    print(remote_header)
    
test()
remote_dir_base, _, _ = gen_remote_stuff(remote_host, 'enzo', ntasks=24, mem_per_cpu='1000M')

## simulation set up stuff

### parametric force fields

In [None]:
# tip4p/2005 water ow epsilon
93.2 * const.k * const.N_A / 1000

In [None]:
# fudge values are not set, but not important for the systems in this notebook

PARAMETRIC_FORCE_FIELDS = {}

# Aquist model with spc water and cl- from smith and dang
# cl- from table 3 in chen2007 or 
# Amber and opls both use it for cations? 
# not used here, since effectively the same as OPLS

PARAMETRIC_FORCE_FIELDS['opls-q1.0'] = {
    'atomtypes': [
        {'type': 'OW', 'q': -0.8476, 'σ': 3.16557e-01, 'ε': 6.50194e-01},
        {'type': 'HW', 'q': +0.4238, 'σ': 0.00000e+00, 'ε': 0.00000e+00},
        {'type': 'CA', 'q': +2.0000, 'σ': 2.41203e-01, 'ε': 1.88136e+00},
        {'type': 'K',  'q': +1.0000, 'σ': 4.93463e-01, 'ε': 1.37235e-03},
        {'type': 'LI', 'q': +1.0000, 'σ': 2.12645e-01, 'ε': 7.64793e-02},
        {'type': 'NA', 'q': +1.0000, 'σ': 3.33045e-01, 'ε': 1.15980e-02},
        {'type': 'CL', 'q': -1.0000, 'σ': 4.41724e-01, 'ε': 4.92833e-01},
    ],
    'scalings': {},
    'water-model': 'water-spce',
    'nonbond-params': {},
    'nbfunc': 'LJ', 'gen-pairs': 'no', 'fudgeLJ': 0.0, 'fudgeQQ': 0.0,
    'combining-rule': 'geometric'
}
PARAMETRIC_FORCE_FIELDS['opls-q1.0-dummyions'] = deepcopy(PARAMETRIC_FORCE_FIELDS['opls-q1.0'])
for at in PARAMETRIC_FORCE_FIELDS['opls-q1.0-dummyions']['atomtypes']:
    if at['type'] not in ('OW', 'HW'):
        at['ε'] = 0.0
        at['q'] = 0.0

# ECC
# from kohagen 2016
# Ca²⁺ from martinek 2018 (1.2 nm cut-off, ? tail correction)
# K⁺ from bruce 2018 (only q scaled, not structure fitted, not with Cl¯) (1.4 nm cut-off, no tail correction)
# SPC/E water
# 1.2 nm cut off in kohagen 2016, no tail correction
PARAMETRIC_FORCE_FIELDS['eccr1'] = {
    'atomtypes': [
        {'type': 'OW', 'q': -0.8476, 'σ': 3.16557e-01, 'ε': 6.50194e-01},
        {'type': 'HW', 'q': +0.4238, 'σ': 0.00000e+00, 'ε': 0.00000e+00},
        {'type': 'CA', 'q': +1.5000, 'σ': 2.66560e-01, 'ε': 5.07200e-01},
        {'type': 'LI', 'q': +0.7500, 'σ': 1.80000e-01, 'ε': 7.64700e-02},
        {'type': 'K',  'q': +0.7500, 'σ': 3.34000e-01, 'ε': 1.30000e-01},
        {'type': 'NA', 'q': +0.7500, 'σ': 2.11500e-01, 'ε': 5.44284e-01},
        {'type': 'CL', 'q': -0.7500, 'σ': 4.10000e-01, 'ε': 4.92800e-01},
    ],
    'scalings': {},
    'water-model': 'water-spce',
    'nonbond-params': {},
    'nbfunc': 'LJ', 'gen-pairs': 'no', 'fudgeLJ': 0.0, 'fudgeQQ': 0.0,
    'combining-rule': 'lorentz-berthelot'
}
PARAMETRIC_FORCE_FIELDS['eccr1-dummyions'] = deepcopy(PARAMETRIC_FORCE_FIELDS['eccr1'])
for at in PARAMETRIC_FORCE_FIELDS['eccr1-dummyions']['atomtypes']:
    if at['type'] not in ('OW', 'HW'):
        at['ε'] = 0.0
        at['q'] = 0.0

# netz
# K⁺, Li⁺, NA⁺, Cl¯ from horinek 2009 (set 5b, medium deep LJ, ε=0.65) (cut-off = 0.9 nm, E-,p-tail correction)
# Ca²⁺ from mamatkulov 2013 (cut-off = 0.9 nm, E-,p-tail correction)
# spc/e water
PARAMETRIC_FORCE_FIELDS['netz'] = {
    'atomtypes': [
        {'type': 'OW', 'q': -0.8476, 'σ': 3.16557e-01, 'ε': 6.50194e-01},
        {'type': 'HW', 'q': +0.4238, 'σ': 0.00000e+00, 'ε': 0.00000e+00},
        #{'type': 'CA', 'q': +2.0000, 'σ': 2.79000e-01, 'ε': 7.80000e-01},  # old and WRONG!
        {'type': 'CA', 'q': +2.0000, 'σ': 2.41000e-01, 'ε': 9.40000e-01},
        {'type': 'K',  'q': +1.0000, 'σ': 2.89000e-01, 'ε': 6.50000e-01},
        {'type': 'LI', 'q': +1.0000, 'σ': 1.47000e-01, 'ε': 6.50000e-01},
        {'type': 'NA', 'q': +1.0000, 'σ': 2.23000e-01, 'ε': 6.50000e-01},
        {'type': 'CL', 'q': -1.0000, 'σ': 4.40000e-01, 'ε': 4.20000e-01},
    ],
    'scalings': {},
    'water-model': 'water-spce',
    'nonbond-params': {},
    'nbfunc': 'LJ', 'gen-pairs': 'no', 'fudgeLJ': 0.0, 'fudgeQQ': 0.0,
    'combining-rule': 'lorentz-berthelot'
}
PARAMETRIC_FORCE_FIELDS['netz-dummyions'] = deepcopy(PARAMETRIC_FORCE_FIELDS['netz'])
for at in PARAMETRIC_FORCE_FIELDS['netz-dummyions']['atomtypes']:
    if at['type'] not in ('OW', 'HW'):
        at['ε'] = 0.0
        at['q'] = 0.0

# madrid-2019 (cut-off = 1.0 nm, E-,p-tail correction)
# 0.85 charge factor
# TIP4P/2005 water
PARAMETRIC_FORCE_FIELDS['madrid'] = {
    'atomtypes': [
        {'type': 'OW', 'q': +0.0000, 'σ': 3.15890e-01, 'ε': 7.74908e-01},
        {'type': 'HW', 'q': +0.5564, 'σ': 0.00000e+00, 'ε': 0.00000e+00},
        {'type': 'MW', 'q': -1.1128, 'σ': 0.00000e+00, 'ε': 0.00000e+00},
        {'type': 'CA', 'q': +1.7000, 'σ': 2.66560e-01, 'ε': 5.07200e-01},
        {'type': 'K',  'q': +0.8500, 'σ': 2.30140e-01, 'ε': 1.98574e+00},
        {'type': 'LI', 'q': +0.8500, 'σ': 1.43970e-01, 'ε': 4.35090e-01},
        {'type': 'NA', 'q': +0.8500, 'σ': 2.21737e-01, 'ε': 1.472356e+00},
        {'type': 'CL', 'q': -0.8500, 'σ': 4.69906e-01, 'ε': 7.69230e-02},
    ],
    'scalings': {},
    'nonbond-params': {
        #frozenset(('A1', 'A2')):  ('sigma', 'epsilon'),
        frozenset(('CL', 'OW')): ('LJ', 0.42386698, 0.06198347),
        frozenset(('NA', 'OW')): ('LJ', 0.26083754, 0.79338830),
        frozenset(('NA', 'CL')): ('LJ', 0.30051231, 1.43889423),
        frozenset(('CA', 'OW')): ('LJ', 0.24000000, 7.25000000),
        frozenset(('CA', 'CL')): ('LJ', 0.31500000, 1.00000000),
        frozenset(('K',  'OW')): ('LJ', 0.28904000, 1.40043000),
        frozenset(('K',  'CL')): ('LJ', 0.33970000, 1.40000000),
        frozenset(('LI', 'OW')): ('LJ', 0.21200000, 0.70065003),
        frozenset(('LI', 'CL')): ('LJ', 0.27000000, 1.28294385),
    },
    'water-model': 'water-tip4p2005',
    'nbfunc': 'LJ', 'gen-pairs': 'no', 'fudgeLJ': 0.0, 'fudgeQQ': 0.0,
    'combining-rule': 'lorentz-berthelot'
}
PARAMETRIC_FORCE_FIELDS['madrid-dummyions'] = deepcopy(PARAMETRIC_FORCE_FIELDS['madrid'])
for at in PARAMETRIC_FORCE_FIELDS['madrid-dummyions']['atomtypes']:
    if at['type'] not in ('OW', 'HW', 'MW'):
        at['ε'] = 0.0
        at['q'] = 0.0
PARAMETRIC_FORCE_FIELDS['madrid-dummyions']['nonbond-params'] = {}

# apply scalings
def pff_apply_scalings(parametric_force_fields):
    for ff_name, ff in parametric_force_fields.items():
        scalings = ff['scalings']
        for at in ff['atomtypes']:
            at['q'] *= scalings.get(at['type'], {}).get('k_q', 1)
            at['σ'] *= scalings.get(at['type'], {}).get('k_σ', 1)
            at['ε'] *= scalings.get(at['type'], {}).get('k_ε', 1)
        del ff['scalings']
pff_apply_scalings(PARAMETRIC_FORCE_FIELDS)

# calculate C6, C12
# redundancy, but that's ok
def pff_calc_C6_C12(parametric_force_fields):
    for ff_name, ff in parametric_force_fields.items():
        for at in ff['atomtypes']:
            C6 = 4 * at['ε'] * at['σ']**6
            C12 = 4 * at['ε'] * at['σ']**12
            at['C6'] = C6
            at['C12'] = C12
            #print(' ', at['type'], f"{C6:.6e}", f"{C12:.6e}")
pff_calc_C6_C12(PARAMETRIC_FORCE_FIELDS)

# print parameters
def pff_print_atomtypes(parametric_force_fields):
    for ff_name, ff in parametric_force_fields.items():
        print(ff_name)
        for at in ff['atomtypes']:
            print(' ', at['type'], end='  ')
            #print('q, σ, ε, C6, C12:', at['q'], at['σ'], at['ε'], at['C6'], at['C12'])
        print()
PARAMETRIC_FORCE_FIELDS = {pff_name: {'name': pff_name, **pff} for pff_name, pff in PARAMETRIC_FORCE_FIELDS.items()}
pff_print_atomtypes(PARAMETRIC_FORCE_FIELDS)
pd.DataFrame(PARAMETRIC_FORCE_FIELDS).transpose()

In [None]:
# check dummyion ff
[(
    pd.DataFrame(PARAMETRIC_FORCE_FIELDS[f'{ff}-dummyions']['atomtypes']),
    pd.DataFrame(PARAMETRIC_FORCE_FIELDS[f'{ff}-dummyions']['nonbond-params']),
) for ff in ['opls-q1.0', 'eccr1', 'netz', 'madrid']]

In [None]:
# check cl- opls-aa from chandrasekhar
def check_opls():
    # cl- - O
    A_squared = 26000 * const.calorie
    C_squared = 3500 * const.calorie
    A_OO_squared = 6e5 * const.calorie
    C_OO_squared = 610 * const.calorie
    A = np.sqrt(A_squared)
    C = np.sqrt(C_squared)
    A_OO = np.sqrt(A_OO_squared)
    C_OO = np.sqrt(C_OO_squared)
    sigma = (A / C)**(1/6)
    epsilon = C**2 / 4 / A
    sigma_OO = (A_OO / C_OO)**(1/6)
    epsilon_OO = C_OO**2 / 4 / A_OO
    print(decombinate_LJ(sigma, epsilon, sigma_OO, epsilon_OO, 'geometric'))
    print(sigma, epsilon)
    
#check_opls()

In [None]:
# check opls-aa from aqvist
def check_opls():
    # Li+
    A = 25**2
    C = 2.6**2
    sigma = (A / C)**(1/6)
    epsilon = C**2 / 4 / A * const.calorie
    print(sigma, epsilon)
    
#check_opls()

### force fields

In [None]:
force_fields = {
    'opls-co0.9tc': {'tags': ['conc-range', 'tail-corr'], 'cut-off': 0.9, 'parametric-ff': 'opls-q1.0',
                     'cations': ('CA', 'K', 'LI', 'NA')},
    'eccr1-co1.2': {'tags': ['conc-range', ], 'cut-off': 1.2, 'parametric-ff': 'eccr1',
                    'cations': ('CA', 'K', 'LI', 'NA')},
    'netz-co0.9tc': {'tags': ['conc-range', 'tail-corr'], 'cut-off': 0.9, 'parametric-ff': 'netz',
                     'cations': ('CA', 'K', 'LI', 'NA')},
    'madrid-co1.0tc': {'tags': ['conc-range', 'tail-corr'], 'cut-off': 1.0, 'parametric-ff': 'madrid',
                       'cations': ('CA', 'K', 'LI', 'NA')},
    # as basis for inverse method
    # can not have tail corrections
    'netz-co0.9': {'tags': ['dummy'], 'cut-off': 0.9, 'parametric-ff': 'netz',
                   'cations': ('CA', 'K', 'LI', 'NA')},
    # can not have different cut of
    'eccr1-co0.9': {'tags': ['dummy'], 'cut-off': 0.9, 'parametric-ff': 'eccr1',
                   'cations': ('CA', 'K', 'LI', 'NA')},
    # inverse results
    'iff-altern5-eccr1-co1.2-nopc': {'tags': ['halftabulated', 'inverse-result', 'conc-range'], 'cut-off': 1.2,
                                     'parametric-ff': 'eccr1', 'cations': ('CA', 'K', 'LI', 'NA'),
                                     'tabulated-potentials': tuple((('OW', ion) for ion in ('CA', 'K', 'LI', 'NA', 'CL')))},
    'iff-altern5-netz-co0.9-nopc': {'tags': ['halftabulated', 'inverse-result', 'conc-range'], 'cut-off': 0.9,  # no tail-corr
                                    'parametric-ff': 'netz', 'cations': ('CA', 'K', 'LI', 'NA'),
                                    'tabulated-potentials': tuple((('OW', ion) for ion in ('CA', 'K', 'LI', 'NA', 'CL')))},
    # inverse results fitted
    # not in final paper
    # 'Buckingham-iff-altern5-eccr1-co1.2-nopc': {'tags': ['halftabulated', 'fit', 'conc-range'], 'cut-off': 1.2,
    #                                             'parametric-ff': 'eccr1', 'cations': ('CA', 'K', 'LI', 'NA'),
    #                                             'tabulated-potentials': tuple((('OW', ion) for ion in ('CA', 'K', 'LI', 'NA', 'CL')))},
    # 'Buckingham-iff-altern5-netz-co0.9-nopc': {'tags': ['halftabulated', 'fit', 'conc-range'], 'cut-off': 0.9,  # no tail-corr
    #                                            'parametric-ff': 'netz', 'cations': ('CA', 'K', 'LI', 'NA'),
    #                                            'tabulated-potentials': tuple((('OW', ion) for ion in ('CA', 'K', 'LI', 'NA', 'CL')))},
    # inverse results fitted for TI test
    'LJ-12-6-iff-altern5-netz-co0.9-nopc': {'tags': ['halftabulated', 'fit', 'dummy'], 'cut-off': 0.9,  # no tail-corr
                                            'parametric-ff': 'netz', 'cations': ('CA', 'K', 'LI', 'NA'),
                                            'tabulated-potentials': tuple((('OW', ion) for ion in ('CA', 'K', 'LI', 'NA', 'CL')))},
    # half tabluated lj potentials for TI tests
    'netz-co0.9-tab': {'tags': ['halftabulated', 'dummy'], 'cut-off': 0.9,
                       'parametric-ff': 'netz', 'cations': ('CA', 'K', 'LI', 'NA'),
                       'tabulated-potentials': tuple((('OW', ion) for ion in ('CA', 'K', 'LI', 'NA', 'CL')))},
    # as startpoint for TI, ions are 'ideal gas'
    'opls-co0.9tc-dummyions': {'tags': ['tail-corr', 'dummy'], 'cut-off': 0.9, 'parametric-ff': 'opls-q1.0-dummyions',
                               'cations': ('CA', 'K', 'LI', 'NA')},
    'eccr1-co1.2-dummyions': {'tags': ['dummy'], 'cut-off': 1.2, 'parametric-ff': 'eccr1-dummyions',
                              'cations': ('CA', 'K', 'LI', 'NA')},
    'netz-co0.9tc-dummyions': {'tags': ['tail-corr', 'dummy'], 'cut-off': 0.9, 'parametric-ff': 'netz-dummyions',
                               'cations': ('CA', 'K', 'LI', 'NA')},
    'madrid-co1.0tc-dummyions': {'tags': ['tail-corr', 'dummy'], 'cut-off': 1.0, 'parametric-ff': 'madrid-dummyions',
                                 'cations': ('CA', 'K', 'LI', 'NA')},
}

force_fields = {ff_name: {'name': ff_name, **ff} for ff_name, ff in force_fields.items()}
pd.DataFrame(force_fields).transpose()

### atom types

In [None]:
atomtypes = {
    'OW': {'name': 'OW', 'mass': 15.9994},
    'HW1': {'name': 'HW1', 'mass':  1.008},
    'HW2': {'name': 'HW2', 'mass':  1.008},
    'MW': {'name': 'MW', 'mass':  0.0},
    'CA': {'name': 'CA', 'mass': 40.08000},
    'K':  {'name': 'K', 'mass': 39.09830},
    'LI': {'name': 'LI', 'mass': 6.94100},
    'NA': {'name': 'NA', 'mass': 22.98977},
    'CL': {'name': 'CL', 'mass': 35.45300},
    'M': {'name': 'M', 'mass': 0.0},  # virtual site for TI
}

# for later assignement
def get_atomnames(atomtype):
    atomnames_dict = {
        'HW': ('HW1', 'HW2')
    }
    return atomnames_dict.get(atomtype, (atomtype,))

def get_atomtype(atomname):
    atomtypes_dict = {
        'HW1': 'HW',
        'HW2': 'HW',
    }
    return atomtypes_dict.get(atomname, atomname)

### system types

In [None]:
def gen_systemtypes():  

    # redundancy, but that's ok
    spce_atoms = tuple((atomtypes[atom] for atom in ['OW', 'HW1', 'HW2']))
    tip4p_atoms = tuple((atomtypes[atom] for atom in ['OW', 'HW1', 'HW2', 'MW']))
    ca_atoms = tuple((atomtypes[atom] for atom in ['CA']))
    k_atoms = tuple((atomtypes[atom] for atom in ['K']))
    li_atoms = tuple((atomtypes[atom] for atom in ['LI']))
    na_atoms = tuple((atomtypes[atom] for atom in ['NA']))
    cl_atoms = tuple((atomtypes[atom] for atom in ['CL']))

    system_types = {
        'water-pure': {'ions': None, 'n_cation_anion': None},
        'water-licl': {'ions': (li_atoms, cl_atoms), 'n_cation_anion': (1, 1)},
        'water-nacl': {'ions': (na_atoms, cl_atoms), 'n_cation_anion': (1, 1)},
        'water-kcl': {'ions': (k_atoms, cl_atoms), 'n_cation_anion': (1, 1)},
        'water-cacl2_': {'ions': (ca_atoms, cl_atoms), 'n_cation_anion': (1, 2)},
    }
    system_types = {st_name: {'name': st_name, **st} for st_name, st in system_types.items()}
    return system_types

system_types = gen_systemtypes()
pd.DataFrame(system_types).transpose()

### system generator

In [None]:
# likely constant parameters, better than having those seperatedly globals
SYSTEM_PARAMS = dict(
    n_water = 5000,
    n_salts = (25, 50, 100, 150, 200, 250, 500),
    n_salt_all_ff = (0, 50),
    n_salt_all_analyis = (0, 50, 500),
    tags_all = ['npt', 'npt-dist'],
    tags_all_analysis = ['resacf', 'therm-exp', 'dos'],
    # unused: nvt dist
)
SYSTEM_PARAMS

In [None]:
def calc_volume_from_real(density, molar_mass, n_mols):
    """
    density in g/mL
    molar_mass in g/mol
    n_mols
    """
    system_mass = n_mols * molar_mass / const.N_A  # in g
    volume = system_mass / density  # in mL
    return volume / 1e-21 # in nm³

# specific for water-pure and water-salt
def system_generator(system_types, force_fields, system_params=SYSTEM_PARAMS, parametric_force_fields=PARAMETRIC_FORCE_FIELDS, verbose=False):  
    
    # system parameters
    n_water = system_params['n_water']
    n_salts = system_params['n_salts']
    n_salt_all_ff = system_params['n_salt_all_ff']
    n_salt_all_analyis = system_params['n_salt_all_analyis']
    tags_all = system_params['tags_all']
    tags_all_analysis = system_params['tags_all_analysis']
    
    # redundancy, but that's ok
    water_atoms_dict = {
        'water-spce': tuple((atomtypes[atom] for atom in ['OW', 'HW1', 'HW2'])),
        'water-tip4p2005': tuple((atomtypes[atom] for atom in ['OW', 'HW1', 'HW2', 'MW'])),
    }
    
    for system_type_name, system_type in system_types.items():
        if verbose: print(system_type_name)
            
        ions = system_type['ions']
        if system_type['ions'] is None:
            systype_n_salts = [0]
        else:
            systype_n_salts = n_salts

        for n_salt in systype_n_salts:
            if verbose: print(n_salt)
            if ions is not None:
                n_cation = n_salt * system_type['n_cation_anion'][0]
                n_anion = n_salt * system_type['n_cation_anion'][1]

            # all force fields for n_salt in n_salt_all_ff
            force_fields_this_n_salt = (
                force_fields if n_salt in n_salt_all_ff
                else {ff_name: ff for ff_name, ff in force_fields.items() if 'conc-range' in ff['tags']}
            )
            for force_field_name, force_field in force_fields_this_n_salt.items():
                # only if force field has info about cation
                if ions is not None and not ions[0][0]['name'] in force_field['cations']:
                    continue
                if verbose: print(force_field['name'])
                    
                parametric_ff = parametric_force_fields[force_field['parametric-ff']]
                moltypes = [
                    {'name': "SOL", 'atoms': water_atoms_dict[parametric_ff['water-model']], 'nmols': n_water,
                     'sigma': 2, 'abc_indicators': [1, 2, 0, -1], 'rot_treat': 'f',
                     'type': parametric_ff['water-model']},
                ]
                if ions is not None:
                    if n_cation != 0:
                        moltypes = moltypes + [
                            {'name': ions[0][0]['name'], 'atoms': ions[0], 'nmols': n_cation,
                             'sigma': 1, 'abc_indicators': [0, 0, 0, 0], 'rot_treat': 'f'},
                        ]
                    if n_anion != 0:
                        moltypes = moltypes + [
                            {'name': ions[1][0]['name'], 'atoms': ions[1], 'nmols': n_anion,
                             'sigma': 1, 'abc_indicators': [0, 0, 0, 0], 'rot_treat': 'f'}
                    ]
                tags = []
                tags += tags_all
                tags += force_field['tags']
                if n_salt in n_salt_all_analyis:
                    tags += tags_all_analysis

                if ions is None:
                    name = f"water{n_water}-pure/" + force_field['name']
                else:
                    salt_name = system_type['name'].split('-')[1]
                    name = f"water{n_water}-{salt_name}{n_salt}/" + force_field['name']
                system = {
                    'name': name, 'type': system_type,
                    'force-field': force_field,
                    'parametric-ff': parametric_ff,
                    'temperature': 300, 'density-init': 1.0, 'is-small': False,
                    'moltypes': moltypes, 'tags': tags,
                }

                # some additional fields for later use
                # redundancy, but that's ok
                system['n-salt'] = n_salt
                average_molar_mass = gt.moltypes.get_average_molar_mass(moltypes)
                system['r_max_g_intra'] = 0.2
                system['volume-init'] = calc_volume_from_real(system['density-init'], average_molar_mass, gt.moltypes.get_nmols(moltypes))
                assert system['volume-init']**(1/3) / 2 > system['force-field']['cut-off'], "system to small for RDF"
                # concentration measures
                if ions is None:
                    system['concentration-init'] = system['mole-fraction'] = system['molar-mixing-ratio'] = system['mass-fraction'] = 0
                else:
                    system['concentration-init'] = system['moltypes'][1]['nmols'] / const.N_A / (system['volume-init'] * 1e-24)  # mol/l
                    system['mole-fraction'] = system['moltypes'][1]['nmols'] / (system['moltypes'][0]['nmols'] + system['moltypes'][1]['nmols'])
                    system['molar-mixing-ratio'] = system['moltypes'][1]['nmols'] / system['moltypes'][0]['nmols']
                    system['mass-fraction'] = (
                        sum((at['mass'] * mt['nmols'] for mt in system['moltypes'][1:3] for at in mt['atoms']))
                        / (average_molar_mass * gt.moltypes.get_nmols(moltypes))
                    )
                system['atomnames'] = OrderedSet((atom['name'] for mt in system['moltypes'] for atom in mt['atoms']))
                system['atomnames-no-h'] = system['atomnames'] - OrderedSet(('HW1', 'HW2', 'MW'))
                system['atomtypes'] = OrderedSet((get_atomtype(atom['name']) for mt in system['moltypes'] for atom in mt['atoms']))
                system['atomtypes-no-h'] = system['atomtypes'] - OrderedSet(('HW', 'MW'))
                
                yield system

In [None]:
# system overview
pd.DataFrame(system_generator(system_types, force_fields))

## thermal expansion settings

In [None]:
DeltaTs = (-10, 0, 10)

## dos parameters

In [None]:
dos_names = [
    {'name': 'trn', 'tags': []},
    {'name': 'roto', 'tags': []},
    {'name': 'vib', 'tags': []},
    {'name': 'roto_a', 'tags': ['comp']},
    {'name': 'roto_b', 'tags': ['comp']},
    {'name': 'roto_c', 'tags': ['comp']},
]

param_dos = {
    'n_samples': 5,
    'n_blocks': 4,
    'n_frames_per_block': 2000,
}

## plot settings

In [None]:
ff_short_names = {
    'opls-co0.9tc': 'OPLS',
    'eccr1-co1.2': 'ECC',
    'eccr1-co0.9': 'ECC (0.9 nm cut)',
    'netz-co0.9tc': 'HMN',
    'netz-co0.9': 'HMN (no t.c.)',
    'madrid-co1.0tc': 'Madrid',
    'iff-altern5-eccr1-co1.2-nopc': 'ECC IMC',
    'iff-altern5-netz-co0.9-nopc': 'HMN IMC',
    'Buckingham-iff-altern5-eccr1-co1.2-nopc': 'ECC IMC Buck.',
    'Buckingham-iff-altern5-netz-co0.9-nopc': 'HMN IMC Buck.',
}

In [None]:
colors = ['#228833', '#4477AA', '#66CCEE', '#AA3377', '#EE6677', '#CCBB44']
ff_colors = {
    'opls-co0.9tc': '#1a3', #'#228833',
    'eccr1-co1.2': '#38c', #'#4477aa',
    'eccr1-co0.9': '#38c', #'#4477aa',  # for TI
    'netz-co0.9tc': '#b17', #'#aa3377',
    'madrid-co1.0tc': '#ec1', #'#CCBB44',
    'netz-co0.9': '#b17', #'#aa3377',
    'iff-altern5-eccr1-co1.2-nopc': '#9bd', #'#66bbee', #'#99bbdd',
    'iff-altern5-netz-co0.9-nopc': '#d9b',  #'#dd99bb',
    #'Buckingham-iff-altern5-eccr1-co1.2-nopc': '#99ccdd',
    #'Buckingham-iff-altern5-netz-co0.9-nopc': '#ffaa99',
}
ff_linestyles = {
    'opls-co0.9tc': '--',
    'eccr1-co1.2': '--',
    'eccr1-co0.9': '--',  # for TI
    'netz-co0.9tc': '--',
    'madrid-co1.0tc': '--',
    'netz-co0.9': '--',
    'iff-altern5-eccr1-co1.2-nopc': '-.',
    'iff-altern5-netz-co0.9-nopc': '-.',
    #'Buckingham-iff-altern5-eccr1-co1.2-nopc': ':',
    #'Buckingham-iff-altern5-netz-co0.9-nopc': ':',
}
fig, ax = plt.subplots()
for f, (ff, color) in enumerate(ff_colors.items()):
    ax.plot([0, 1], [-f, -f], color=color, label=ff_short_names[ff], linewidth=5, linestyle=ff_linestyles[ff])
ax.legend()
plt.show()

In [None]:
sys_type_short_names = {
    'water-cacl2_': r'\ce{CaCl2}',
    'water-kcl': r'\ce{KCl}',
    'water-licl': r'\ce{LiCl}',
    'water-nacl': r'\ce{NaCl}',
    'water-pure': r'\ce{H2O}',
}

In [None]:
at_short_names = {
    'CA': 'Ca',
    'NA': 'Na',
    'K': 'K',
    'LI': 'Li',
    'CL': 'Cl',
    'OW': 'O',
}
ion_short_names = {
    'CA': r'\ce{Ca^2+}',
    'NA': r'\ce{Na+}',
    'K': r'\ce{K+}',
    'LI': r'\ce{Li+}',
    'CL': r'\ce{Cl-}',
}

In [None]:
nb_plot_name = {
    'OW-LI': 'O - Li',
    'OW-NA': 'O - Na',
    'OW-K': 'O - K',
    'OW-CA': 'O - Ca',
    'OW-CL': 'O - Cl',
    'CA-CL': 'Ca - Cl',
}

In [None]:
nb_plot_cmap = {
    'OW-LI': 'Purples',
    'OW-NA': 'Greens',
    'OW-K': 'Blues',
    'OW-CA': 'Oranges',
    'OW-CL': 'RdPu',
}
nb_plot_color = {ia_name: mpl.cm.get_cmap(cmap)(160) for ia_name, cmap in nb_plot_cmap.items()}

gradient = np.linspace(0, 1, 256)
gradient = np.vstack((gradient, gradient))
fig, ax = plt.subplots(figsize=(3, 3), constrained_layout=True)
for i, ia_name in enumerate(nb_plot_cmap.keys()):
    color = nb_plot_color[ia_name]
    cmap = nb_plot_cmap[ia_name]
    ax.plot([0, 1], [-i, -i], color=color, label=nb_plot_name[ia_name], linewidth=5, linestyle='-')
    ax.imshow(gradient, cmap=cmap, extent=(-1, 0, -i-0.2, -i+0.2))
ax.set_xlim(-1, 1)
ax.set_ylim(-len(nb_plot_cmap), 1)
fig.legend()
plt.show()

In [None]:
SMALL_SIZE = 8
MEDIUM_SIZE = 10
BIGGER_SIZE = 12
mpl_rc_global = {
    'figure.dpi': 120,
    'legend.frameon': False,
    'font.size': SMALL_SIZE,          # controls default text sizes
    'axes.titlesize': SMALL_SIZE,     # fontsize of the axes title
    'axes.labelsize': SMALL_SIZE,    # fontsize of the x and y labels
    'xtick.labelsize': SMALL_SIZE,    # fontsize of the tick labels
    'ytick.labelsize': SMALL_SIZE,    # fontsize of the tick labels
    'legend.fontsize': SMALL_SIZE,    # legend fontsize
    'figure.titlesize': MEDIUM_SIZE,  # fontsize of the figure title
    'xtick.top': True,
    'ytick.right': True,
    'xtick.direction': 'in',
    'ytick.direction': 'in',
    'legend.frameon': False,
    'text.usetex': True,  # otherwise \epsilon is same as \varepsilon. Also nice font
    'text.latex.preamble': r"\usepackage[version=4]{mhchem}",
}

## literature data

In [None]:
osmp_lit_dict = {
    'water-cacl2_': {
        'crc-liquid-data': {
            'mass_percent': np.array([0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 12.0, 14.0,
                                      16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0, 30.0, 32.0, 34.0, 36.0, 38.0, 40.0]),
            'molality': np.array([0.0, 0.045, 0.091, 0.184, 0.279, 0.375, 0.474, 0.575, 0.678, 0.784, 0.891, 1.001, 1.229, 1.467,
                                  1.716, 1.978, 2.253, 2.541, 2.845, 3.166, 3.504, 3.862, 4.240, 4.642, 5.068, 5.522, 6.007]),
            'concentration': np.array([0.0, 0.045, 0.091, 0.183, 0.277, 0.372, 0.469, 0.567, 0.667, 0.768, 0.872, 0.976, 1.191, 1.413,
                                       1.641, 1.878, 2.122, 2.374, 2.634, 2.902, 3.179, 3.464, 3.759, 4.062, 4.375, 4.698, 5.030]),
            'density': np.array([0.9982, 1.0024, 1.0065, 1.0148, 1.0232, 1.0316, 1.0401, 1.0486, 1.0572, 1.0659, 1.0747, 1.0835, 1.1014, 1.1198,
                                 1.1386, 1.1579, 1.1775, 1.1976, 1.2180, 1.2388, 1.2600, 1.2816, 1.3036, 1.3260, 1.3488, 1.3720, 1.3957]),
        },
        'osm-coeff': {
            # alternative?: Robinson, R. A.; Stokes, R. H. Electrolyte Solutions. Butterworth Scientific Publications: London. 1955.
            # M. E. Guendouzi, A. Dinane and A. Mounir, The Journal of Chemical Thermodynamics, 2001, 33, 1059–1072.
            'Guendouzi 2001': {
                'temperature': 298.15,
                'molality': np.array([0.2, 0.3, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0]),  # mol / kg
                'osm-coeff': np.array([0.864, 0.870, 0.914, 1.047, 1.190, 1.374, 1.560, 1.774, 1.977, 2.174, 2.377, 2.565, 2.731, 2.872]),
            }
        }
    },
    'water-kcl': {
        'crc-liquid-data': {
            'mass_percent': np.array([0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0]),
            'molality': np.array([0.0, 0.067, 0.135, 0.274, 0.415, 0.559, 0.706, 0.856, 1.010, 1.166, 1.327, 1.490, 1.829, 2.184, 2.555, 2.944, 3.353, 3.783, 4.236]),
            'concentration': np.array([0.0, 0.067, 0.135, 0.271, 0.409, 0.549, 0.691, 0.835, 0.980, 1.127, 1.276, 1.426, 1.733, 2.048, 2.370, 2.701, 3.039, 3.386, 3.742]),
            'density': np.array([0.9982, 1.0014, 1.0046, 1.0110, 1.0174, 1.0239, 1.0304, 1.0369, 1.0434, 1.0500, 1.0566, 1.0633, 1.0768, 1.0905, 1.1043, 1.1185,
                                 1.1328, 1.1474, 1.1623]),
        },
        'osm-coeff': {
            # alternative?: Robinson, R. A.; Stokes, R. H. Electrolyte Solutions. Butterworth Scientific Publications: London. 1955.
            # M. E. Guendouzi, A. Dinane and A. Mounir, The Journal of Chemical Thermodynamics, 2001, 33, 1059–1072.
            'Guendouzi 2001': {
                'temperature': 298.15,
                'molality': np.array([0.2, 0.3, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5]),  # mol / kg
                'osm-coeff': np.array([0.905, 0.902, 0.901, 0.897, 0.900, 0.913, 0.924, 0.934, 0.951, 0.974, 0.988]),
            }
        }
    },
    'water-licl': {
        'crc-liquid-data': {
            'mass_percent': np.array([0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0, 30.0]),
            'molality': np.array([0.0, 0.119, 0.238, 0.481, 0.730, 0.983, 1.241, 1.506, 1.775, 2.051, 2.333, 2.621, 3.217, 3.840, 4.493, 5.178, 5.897, 6.653, 7.449,
                                  8.288, 9.173, 10.109]),
            'concentration': np.array([0.0, 0.118, 0.237, 0.476, 0.719, 0.964, 1.211, 1.462, 1.715, 1.971, 2.230, 2.491, 3.022, 3.564, 4.118, 4.683, 5.260, 5.851, 6.453,
                                       7.069, 7.700, 8.344]),
            'density': np.array([0.9982, 1.0012, 1.0041, 1.0099, 1.0157, 1.0215, 1.0272, 1.0330, 1.0387, 1.0444, 1.0502, 1.0560, 1.0675, 1.0792, 1.0910, 1.1029,
                                 1.1150, 1.1274, 1.1399, 1.1527, 1.1658, 1.1791]),
        },
        'osm-coeff': {
            # alternative?: Robinson, R. A.; Stokes, R. H. Electrolyte Solutions. Butterworth Scientific Publications: London. 1955.
            # M. E. Guendouzi, A. Dinane and A. Mounir, The Journal of Chemical Thermodynamics, 2001, 33, 1059–1072.
            'Guendouzi 2001': {
                'temperature': 298.15,
                'molality': np.array([0.2, 0.3, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0]),  # mol / kg
                'osm-coeff': np.array([0.933, 0.948, 0.969, 1.018, 1.066, 1.142, 1.207, 1.278, 1.364, 1.454, 1.532, 1.612, 1.700, 1.784]),
            }
        }
    },
    'water-nacl': {
        'crc-liquid-data': {
            'mass_percent': np.array([0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0,
                                      10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0]),
            'molality': np.array([0.0, 0.086, 0.173, 0.349, 0.529, 0.713, 0.901, 1.092, 1.288, 1.488,
                                  1.692, 1.901, 2.333, 2.785, 3.259, 3.756, 4.278, 4.826, 5.403, 6.012]),
            'concentration': np.array([0.0, 0.086, 0.172, 0.346, 0.523, 0.703, 0.885, 1.069, 1.256, 1.445,
                                       1.637, 1.832, 2.229, 2.637, 3.056, 3.486, 3.928, 4.382, 4.847, 5.326]),
            'density': np.array([0.9982, 1.0018, 1.0053, 1.0125, 1.0196, 1.0268, 1.0340, 1.0413, 1.0486, 1.0559,
                                 1.0633, 1.0707, 1.0857, 1.1008, 1.1162, 1.1319, 1.1478, 1.1640, 1.1804, 1.1972]),
        },
        'osm-coeff': {
            # alternative?: Robinson, R. A.; Stokes, R. H. Electrolyte Solutions. Butterworth Scientific Publications: London. 1955.
            # M. E. Guendouzi, A. Dinane and A. Mounir, The Journal of Chemical Thermodynamics, 2001, 33, 1059–1072.
            'Guendouzi 2001': {
                'temperature': 298.15,
                'molality': np.array([0.2, 0.3, 0.5, 1.0, 1.5, 2.0, 2.5,
                                      3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0]),  # mol / kg (?)
                'osm-coeff': np.array([0.919, 0.930, 0.923, 0.931, 0.949, 0.977, 1.010,
                                       1.037, 1.086, 1.111, 1.149, 1.183, 1.228, 1.257]),
            }
        }
    },
}

def vant_Hoff_osmotic_pressure(concentration, n_diss, temperature):
     return n_diss * concentration * 1e3 * const.R * temperature / 1e5  # bar
    
def get_osmotic_coeff_lit(osmp_lit_dict, sys_type_name, lit_source):
    osmp_lit_dict_st = osmp_lit_dict[sys_type_name]
    if 'molality' in osmp_lit_dict_st['osm-coeff'][lit_source]:
        lit_molar_concentration = np.interp(osmp_lit_dict_st['osm-coeff'][lit_source]['molality'],
                                            osmp_lit_dict_st['crc-liquid-data']['molality'],
                                            osmp_lit_dict_st['crc-liquid-data']['concentration'])
    else:
        raise Exception("not implemented how to handle this literature data")
    lit_osm_coeff = deepcopy(osmp_lit_dict_st['osm-coeff'][lit_source]['osm-coeff'])
    return lit_molar_concentration, lit_osm_coeff

In [None]:
plt.plot(osmp_lit_dict['water-cacl2_']['crc-liquid-data']['concentration'],
         osmp_lit_dict['water-cacl2_']['crc-liquid-data']['molality'],
        )
plt.plot([0, 5], [0, 5], color='k', linestyle=':')
plt.show()

# iterative coarse-graining

In [None]:
force_fields_inv = {ffn: ff for ffn, ff in force_fields.items() if ffn in ['eccr1-co1.2', 'netz-co0.9']}
#force_fields_inv = {ffn: ff for ffn, ff in force_fields.items() if ffn in ['netz-co0.9']}
system_types_inv = {stn: st for stn, st in system_types.items() if st['ions'] is not None}  # not pure water
#system_types_inv = {stn: st for stn, st in system_types.items() if (st['ions'] is not None and stn.startswith('water-cacl2_'))}

In [None]:
# eccr convergence is smooth so I took the last IMC
# netz convergence is noisy so I took the best (filled in later)
final_potential_from_segment_step = {
    # segments are zero-indexed, iterations one-indexed
    'water5000-cacl2_50/altern5-eccr1-co1.2-nopc': (5, 10),
    'water5000-kcl50/altern5-eccr1-co1.2-nopc': (5, 10),
    'water5000-licl50/altern5-eccr1-co1.2-nopc': (5, 10),
    'water5000-nacl50/altern5-eccr1-co1.2-nopc': (5, 10),
}

## inverse setup generator

In [None]:
def inverse_setup_generator(system_types, force_fields, verbose=False):
    # kind of constants, not changing those right now
    pressure_setting_name, pressure_setting = ('nopc',  {'name': 'nopc'})
    all_interactions = [
        {'name': 'OW-CA', 'plot-name': 'O - Ca', 'type1': 'OW', 'type2': 'CA'},
        {'name': 'OW-K',  'plot-name': 'O - K',  'type1': 'OW', 'type2': 'K'},
        {'name': 'OW-LI', 'plot-name': 'O - Li', 'type1': 'OW', 'type2': 'LI'},
        {'name': 'OW-NA', 'plot-name': 'O - Na', 'type1': 'OW', 'type2': 'NA'},
        {'name': 'OW-CL', 'plot-name': 'O - Cl', 'type1': 'OW', 'type2': 'CL'},
    ]
    r_max_g_dict = {
        'water-cacl2_': 0.85,
        'water-kcl': 0.73,
        'water-licl': 0.726,
        'water-nacl': 0.726,
    }
    n_salt_cg = 50
        
    for system_type_name, system_type in system_types.items():
        if verbose: print(system_type['name'])
        for ff_base_name, ff_base in force_fields.items():
            if verbose: print(ff_base['name'])
                
            # choose reference system
            refsys_parametric = tuple((sys for sys in system_generator({system_type_name: system_type}, {ff_base_name: ff_base})
                                       if sys['n-salt'] == n_salt_cg))
            if len(refsys_parametric) == 1:
                refsys_parametric = refsys_parametric[0]
            else:
                raise Exception('should only yield one system')
            moltypes = refsys_parametric['moltypes']
            interactions = [f"OW-{ion[0]['name']}" for ion in system_type['ions']]
            atomtypes = refsys_parametric['atomtypes']
            atomtypes_no_h = refsys_parametric['atomtypes-no-h']
            with WorkingDir(refsys_parametric['name']):
                run_bash("gmx energy -f npt-prod/ener.edr -o /tmp/volume.xvg <<< 'volume'")
                data, _ = gt.xvg.load('/tmp/volume.xvg')
                run_bash("rm /tmp/volume.xvg")
                one_bar_volume = np.mean(data['Volume'])
                del data
            ff_name = f"iff-altern5-{ff_base['name']}-{pressure_setting_name}"
            inverse_setup_name = f"{refsys_parametric['name'].split('/')[0]}/altern5-{ff_base['name']}-{pressure_setting_name}"
            inverse_setup = {
                'name': inverse_setup_name,
                'ff-name': ff_name,
                'refsys-parametric': refsys_parametric,
                'r-max-g': r_max_g_dict[system_type['name']],
                'r-max-u': refsys_parametric['force-field']['cut-off'],
                'conf-from': f"{ff_base['name']}/npt-prod/confout.gro",
                '1bar-volume': one_bar_volume,
                'init': {'type': 'ref-ff'},
                'target': 'tgt1',
                'imc-regul': 0.3,
                'segments': [
                    {'method': 'ibi', 'iterations': 5, 'nsteps':  40000},
                    {'method': 'imc', 'iterations': 5, 'nsteps': 400000}
                ]*2 + [
                    {'method': 'ibi', 'iterations': 5, 'nsteps':  40000},
                    {'method': 'imc', 'iterations': 10, 'nsteps': 400000},
                    {'method': 'ibi', 'iterations': 10, 'nsteps':  40000}
                ],
                'interactions': [ia for ia in all_interactions if ia['name'] in interactions],
                'pressure-setting': pressure_setting,
            }
            yield inverse_setup

In [None]:
pd.DataFrame(inverse_setup_generator(system_types_inv, force_fields_inv, verbose=False))

## prepare IMC/IBI all interactions simultaneously

- prepare targets in other notebook

In [None]:
for inverse_setting in inverse_setup_generator(system_types_inv, force_fields_inv, verbose=False):
    print(inverse_setting['name'])
    
    working_dir = os.path.join(inverse_setting['name'], 'both')
    with WorkingDir(working_dir):
        # make dirs and copy template files
        run_bash(f"mkdir -p segment-template")
        # topol.top
        refsysp = inverse_setting['refsys-parametric']
        pff = PARAMETRIC_FORCE_FIELDS[refsysp['force-field']['parametric-ff']]
        ff = {'name': inverse_setting['ff-name'], 'tags': ['halftabulated'], 'cut-off': 1.0, 'parametric-ff': pff['name'],
               'tabulated-potentials': ((interaction['type1'], interaction['type2']) for interaction in inverse_setting['interactions'])}
        save_parametric_force_field_as_top(f'segment-template/topol.top', ff, "foo",
                                          refsysp['moltypes'], osm_restraints={})
        # copy conf.gro and scale it
        run_bash(f"cp ../../{inverse_setting['conf-from']} segment-template/conf-unscaled.gro")
        scale = (inverse_setting['1bar-volume'] / np.prod(gt.gro.get_box(f"segment-template/conf-unscaled.gro")))**(1/3)
        run_bash(f"gmx editconf -f segment-template/conf-unscaled.gro -o segment-template/conf.gro -scale {scale} {scale} {scale}")
        run_bash("rm -f segment-template/\#*")
        # generate index.ndx
        top = gt.top.Topology()
        top.load_simple_top(refsysp['moltypes'])
        gt.top.generate_index_file(top, f'segment-template/index.ndx')
        del top
        # copy target
        for interaction in inverse_setting['interactions']:
            run_bash(f"cp {template_dir}/tgt/{refsysp['type']['name']}/{interaction['name']}.dist.{inverse_setting['target']} "
                     + f"segment-template/{interaction['name']}.dist.tgt")
        # copy grompp.mdp
        run_bash(f"cp {template_dir}/mdp/prod.mdp segment-template/grompp.mdp")
        # adjust grompp.mdp
        gt.mdp.set_parameter(f"segment-template/grompp.mdp", 'ref-t', refsysp['temperature'])
        gt.mdp.set_parameter(f"segment-template/grompp.mdp", 'cutoff-scheme', 'Group')
        gt.mdp.set_parameter(f"segment-template/grompp.mdp", 'vdwtype', 'User')
        for key in ('rlist', 'rvdw', 'rcoulomb'):
            gt.mdp.set_parameter(f"segment-template/grompp.mdp", key, inverse_setting['r-max-u'])
        if 'tail-corr' in refsysp['tags']:
            pass  # does not make sense with halftabluated systems. tabulated C6=C12=1 would go into the calculation.
            #gt.mdp.set_parameter(f"segment-template/grompp.mdp", 'DispCorr', 'EnerPress')
        energygrps = [interaction['type1'] for interaction in inverse_setting['interactions']]
        energygrps += [interaction['type2'] for interaction in inverse_setting['interactions']]
        energygrps = ' '.join(tuple(OrderedSet(energygrps)))
        gt.mdp.set_parameter(f"segment-template/grompp.mdp", 'energygrps', energygrps)
        energygrp_table = '  '.join((' '.join(interaction['name'].split('-')) for interaction in inverse_setting['interactions']))
        gt.mdp.set_parameter(f"segment-template/grompp.mdp", 'energygrp-table', energygrp_table)
        # copy table.xvg
        run_bash(f"cp {template_dir}/table/table6-12.xvg segment-template/table.xvg")

        # per segment settings
        for s, segment in enumerate(inverse_setting['segments']):
            # copy segment template
            run_bash(f"cp -rT segment-template segment{s}")
            # adjust grompp.mdp
            gt.mdp.set_parameter(f"segment{s}/grompp.mdp", 'nsteps', segment['nsteps'])
            # copy settingx.xml
            run_bash(f"cp {template_dir}/xml/{segment['method']}.xml segment{s}/settings.xml")
            # adjust settings.xml
            tree = ET.parse(f'segment{s}/settings.xml')
            root = tree.getroot()
            # make copies of non-bonded
            nb = root.findall('non-bonded')[0]
            for i in range(1, len(inverse_setting['interactions'])):
                nb_new = deepcopy(nb)
                root.insert(i, nb_new)
                
            for i, interaction in enumerate(inverse_setting['interactions']):
                nb = root.findall('non-bonded')[i]
                nb.find('name').text = interaction['name']
                nb.find('type1').text = interaction['type1']
                nb.find('type2').text = interaction['type2']
                nb.find('max').text = str(inverse_setting['r-max-g'])
                nb.find('inverse/target').text = interaction['name'] + '.dist.tgt'
                nb.find('inverse/gromacs/table').text = 'table_' + interaction['name'].replace('-', '_') + '.xvg'
                if inverse_setting['pressure-setting']['name'] == 'wjkpc':
                    # votca advanced pressure correction likely not made for multicomponent
                    # taking ρ = sqrt(ρ₁ * ρ₂)
                    N1 = gt.moltypes.count_atomname(refsysp['moltypes'], interaction['type1'])
                    N2 = gt.moltypes.count_atomname(refsysp['moltypes'], interaction['type2'])
                    nb.find('inverse/particle_dens').text = str(np.sqrt(N1 * N2
                                                                        / inverse_setting['1bar-volume']**2))
                    nb.find('inverse/post_update').text += ' pressure'
                    nb.find('inverse/p_target').text = str(inverse_setting['pressure-setting']['p-target'])
                    nb.find('inverse/post_update_options/pressure/do').text = str(inverse_setting['pressure-setting']['do'])
                    nb.find('inverse/post_update_options/pressure/wjk/scale').text = str(inverse_setting['pressure-setting']['wjk-scale'])
                    nb.find('inverse/post_update_options/pressure/wjk/max_A').text = str(inverse_setting['pressure-setting']['max-A'])
                elif inverse_setting['pressure-setting']['name'] == 'nopc':
                    nb.find('inverse').remove(nb.find('inverse/particle_dens'))
                    nb.find('inverse').remove(nb.find('inverse/p_target'))
                    nb.find('inverse/post_update_options').remove(nb.find('inverse/post_update_options/pressure'))
                else:
                    raise Exception('not implemented')
            root.find('inverse/kBT').text = str(refsysp['temperature'] * oconst.k_gro)
            root.find('inverse/iterations_max').text = str(segment['iterations'])
            root.find('inverse/gromacs/grompp/opts').text = '-maxwarn 1'
            if segment['method'] == 'imc':
                root.find('inverse/imc/default_reg').text = str(inverse_setting['imc-regul'])
            tree.write(f'segment{s}/settings.xml')
            
        # generate pot.in
        if inverse_setting['init']['type'] == 'bi':
            pass
        elif inverse_setting['init']['type'] == 'ref-ff':
            r = np.linspace(0, 3, num=1501)
            for interaction in inverse_setting['interactions']:
                at1 = next((at for at in pff['atomtypes'] if at['type'] == interaction['type1']))
                at2 = next((at for at in pff['atomtypes'] if at['type'] == interaction['type2']))
                pot, force = gen_potential_and_force(at1, at2, r, pff['combining-rule'], add_coulomb=False,
                                                     nonbond_params=pff['nonbond-params'])
                np.savetxt(f"segment0/{interaction['name']}.pot.in", np.stack((r, pot)).T)
        else:
            raise Exception('Not implemented!')
            
        # remove template
        run_bash("rm -rf segment-template")

## run on cluster

### run inverse simultaneously

In [None]:
def run_inv():
    remote_dir_base, remote_header, remote_footer = gen_remote_stuff(remote_host, 'mammut-c', votca=True)
    for inverse_setting in inverse_setup_generator(system_types_inv, force_fields_inv, verbose=False):
        print(inverse_setting['name'])
        working_dir = os.path.join(inverse_setting['name'], 'both')
        remote_dir = os.path.join(remote_dir_base, working_dir)
        with WorkingDir(working_dir):
            # mkdir
            run_bash(f"ssh {remote_host} mkdir -p {remote_dir}")

            # push files
            filelist = ["segment*/conf.gro", "segment*/grompp.mdp", "segment*/index.ndx", "segment*/settings.xml",
                       "segment*/*.dist.tgt", "segment*/table*", "segment*/topol.top"]
            if inverse_setting['init']['type'] not in ['bi']:
                filelist += ["segment0/*.pot.in"]
            gt.remote.push_files(filelist, remote_host, remote_dir)

            script = remote_header
            for s, segment in enumerate(inverse_setting['segments']):
                if s != 0:
                    last_segment = inverse_setting['segments'][s-1]
                    for interaction in inverse_setting['interactions']:
                        script += (f"\ncp segment{s-1}/step_{last_segment['iterations']:03}/{interaction['name']}.pot.new "
                                   f"segment{s}/{interaction['name']}.pot.in")

                script += f"""
pushd segment{s}
    ln -sf $JOBTMP jobtmp
    csg_inverse --options settings.xml
    unlink jobtmp
popd
""" 
            script += remote_footer

            jobid = gt.remote.run_slurm_script(script, remote_host, remote_dir, dry_run=False)
            print(jobid)
            if jobid != None:
                jobids.append(jobid)
run_inv()

### check job status

In [None]:
jobids = check_job_stati(jobids, remote_host)

### copy results from cluster

In [None]:
def copy_from_cluster():
    remote_dir_base, remote_header, remote_footer = gen_remote_stuff(remote_host, 'enzo', votca=True)
    for inverse_setting in inverse_setup_generator(system_types_inv, force_fields_inv, verbose=False):
        print(inverse_setting['name'])
        
        # TEMPORARY
        if not inverse_setting['name'].startswith('water5000-cacl2_50/altern5-netz'):
            print('.. continue ..')
            continue
            
        working_dir = os.path.join(inverse_setting['name'], 'both')
        remote_dir = os.path.join(remote_dir_base, working_dir)
        with WorkingDir(working_dir):
            filelist = ["segment*/step_*/*.dist*.new", "segment*/step_*/*.pot.new", "segment*/step_*/*.dpot.new",
                       "segment*/step_*/*.pot.cur",
                       "segment*/step_*/ener.edr", "segment0/step_001/*.dist.tgt"]
            gt.remote.pull_files(filelist, remote_host, remote_dir)
copy_from_cluster()

## evaluate coarse-graining

### determine best iteration for Netz-derived model

In [None]:
def determine_best_iteration():
    Deltas = {}
    for inverse_setting in inverse_setup_generator(system_types_inv, force_fields_inv, verbose=False):
        print(inverse_setting['name'])
        working_dir = os.path.join(inverse_setting['name'], 'both')

        Deltas[inverse_setting['name']] = collections.defaultdict(lambda: 0.0)

        with WorkingDir(working_dir):
            if len(glob.glob('segment*/step_*/')) <= 1:
                print('.. no data ..')
                continue


            for interaction in inverse_setting['interactions']:
                print('  ' + interaction['name'])
                ia_name = interaction['name']

                # plot g(r) or Δg(r)
                dist_files = sorted(glob.glob(f"segment*/step_*/{ia_name}.dist.new"))
                # load target g(r)
                _, dist_tgt_g, _ = readin_table(f"segment0/step_001/{ia_name}.dist.tgt")
                for i, dist_file in enumerate(dist_files):
                    segment = int(re.search('segment(\d+)', dist_file).group(1))
                    step = int(re.search('step_(\d+)', dist_file).group(1))
                    dist_r, dist_g, dist_flag = readin_table(dist_file)
                    Delta = np.sqrt(1/max(dist_r) * np.trapz(x=dist_r, y=(dist_g - dist_tgt_g)**2))
                    Deltas[inverse_setting['name']][(segment, step)] += Delta / len(inverse_setting['interactions'])

        best_it = min(Deltas[inverse_setting['name']], key=Deltas[inverse_setting['name']].get)
        if inverse_setting['name'] not in final_potential_from_segment_step:
            final_potential_from_segment_step[inverse_setting['name']] = best_it
determine_best_iteration()

In [None]:
final_potential_from_segment_step

### equilibration check

In [None]:
def equi_check():
    cmap = plt.get_cmap('rainbow')
    
    for inverse_setting in inverse_setup_generator(system_types_inv, force_fields_inv, verbose=False):
        print(inverse_setting['name'])
        working_dir = os.path.join(inverse_setting['name'], 'both')
        with WorkingDir(working_dir):

            # show stuff
            ener_files = sorted(glob.glob('segment*/step_*/ener.edr'))
            if ener_files == []:
                print('.. nothing here ..')
                continue
            segment_step_tuples = []
            pressures = []
            pressures_std = []

            # energy files to actually plot
            ener_files_to_show = ener_files

            fig, (ax0, ax1, ax2) = plt.subplots(nrows=1, ncols=3, figsize=[18, 2])
            # load data and plot
            e_max = len(ener_files)
            for e, ener_file in enumerate(ener_files):
                # skip empty ener.edr
                if os.stat(ener_file).st_size == 0:
                    continue
                # get segment and step
                segment = int(re.search('segment(\d+)', ener_file).group(1))
                step = int(re.search('step_(\d+)', ener_file).group(1))
                segment_step_tuples.append((segment, step))
                # get energy data
                tmp_fd, tmp_path = tempfile.mkstemp(suffix='.xvg')
                try:
                    run_bash(f"gmx energy -f {ener_file} -o {tmp_path} <<< 'Temperature\nPressure'")
                    data, _ = gt.xvg.load(tmp_path)
                finally:
                    os.remove(tmp_path)
                data_noequi = data[data['Time (ps)'] > 10]
                pressures.append(data_noequi['Pressure'].mean())
                #pressures_std.append(data_noequi['Pressure'].std())
                # block average
                pressures_std.append(data_noequi.groupby(data_noequi.index.array // 100)['Pressure'].mean().std())

                if ener_file in ener_files_to_show:
                    label = f"{segment} {step}"
                    ax0.plot(data['Time (ps)'], np.array(data['Temperature']), label=label, color=cmap(e/e_max))
                    ax1.plot(data['Time (ps)'], np.array(data['Pressure']), label=label, color=cmap(e/e_max))

            # plot average pressure
            ax2.errorbar([f'{seg} {step}' for seg, step in segment_step_tuples],
                         pressures, pressures_std, label='pressure cur')
            # final potential marker
            if inverse_setting['name'] in final_potential_from_segment_step:
                ax2.axvline(f"{final_potential_from_segment_step[inverse_setting['name']][0]} "
                            f"{final_potential_from_segment_step[inverse_setting['name']][1]}", color='green', linestyle='--')

            ax0.set_title("Temperature")
            ax1.set_title("Pressure")
            ax2.set_title("Pressure vs. step")
            ax2.set_xlabel('segment step')
            ax2.set_ylabel('pressure in bar')
            ax2.xaxis.set_major_locator(mpl.ticker.MaxNLocator(integer=True))
            #ax0.legend(loc='center right')
            ax2.legend()
            fig.tight_layout()    
            #fig.savefig(os.path.join(working_dir_base, "figures", f"p-convergence_{inverse_setting['name']}_{system['name'].replace('/', '-')}.png"), dpi=150)
            plt.show()
equi_check()

### plot dist, dU, U 

In [None]:
def show_dist_dU_U():
    cmap = plt.get_cmap('rainbow')

    mpl_rc = {
        'figure.dpi': 100,
        'legend.title_fontsize': 8,
        'legend.fontsize': 8,
        'legend.handlelength': 1,
    }

    plot_delta_g = False

    for inverse_setting in inverse_setup_generator(system_types_inv, force_fields_inv, verbose=False):
        print(inverse_setting['name'])
        working_dir = os.path.join(inverse_setting['name'], 'both')


        with WorkingDir(working_dir):
            if len(glob.glob('segment*/step_*/')) <= 1:
                print('.. nothing here ..')
                continue


            for interaction in inverse_setting['interactions']:
                print('  ' + interaction['name'])
                fig, (ax0, ax1, ax2) = plt.subplots(nrows=1, ncols=3, figsize=[18, 5])
                ia_name = interaction['name']

                # plot g(r) or Δg(r)
                dist_files = sorted(glob.glob(f"segment*/step_*/{ia_name}.dist.new"))
                #dist_files = dist_files[:10]
                axins0 = inset_axes(ax0, width="50%", height="50%", loc='upper right')
                # load target g(r)
                _, dist_tgt_g, _ = readin_table(f"segment0/step_001/{ia_name}.dist.tgt")
                g_std = []
                ax0_legend_lines = []
                for i, dist_file in enumerate(dist_files):
                    segment = int(re.search('segment(\d+)', dist_file).group(1))
                    step = int(re.search('step_(\d+)', dist_file).group(1))
                    label = None
                    if segment == 0 and step == 0:
                        label = 'init. guess'
                    elif step == 5:
                        label = f"{segment} " + inverse_setting['segments'][segment]['method'].upper()
                    dist_r, dist_g, dist_flag = readin_table(dist_file)
                    if plot_delta_g:
                        line = ax0.plot(dist_r, dist_g - dist_tgt_g, label=label, color=cmap(i/len(dist_files)))
                    else:
                        line = ax0.plot(dist_r, dist_g, label=label, color=cmap(i/len(dist_files)))
                    if label is not None:
                        ax0_legend_lines.append(line[0])
                    ax0.set_title(r'$Δg(r)$' if plot_delta_g else r'$g(r)$')

                    x = f"{segment} {step}"
                    y = np.sqrt(1/max(dist_r) * np.trapz(x=dist_r, y=(dist_g - dist_tgt_g)**2))
                    label = inverse_setting['segments'][segment]['method'].upper()
                    g_std.append((x, y, label))

                # plot target g(r)
                if plot_delta_g:
                    line = ax0.plot(dist_r, np.zeros_like(dist_r), label=f"tgt", color='k', linestyle='--')
                    ax0.set_ylim(-0.04, 0.02)
                else:
                    line = ax0.plot(dist_r, dist_tgt_g, label="tgt", color='k', linestyle='--')
                ax0_legend_lines.append(line[0])

                # now that we have g, define core_end
                ndx_ce = np.where(dist_g > 1e-10)[0][0]
                core_end = dist_r[ndx_ce]

                axins0.plot(*list(zip(*g_std))[0:2], '.-')
                axins0.set_xticks(range(len(g_std)))
                axins0.set_xticklabels(*list(zip(*g_std))[2:3])
                #axins0.set_xticklabels(axins0.get_xticklabels(), rotation=90, ha='right')
                axins0.tick_params(labelrotation=90)
                # only every fith tick
                for l, label in enumerate(axins0.xaxis.get_ticklabels()):
                    if l % 5 != 0:
                        label.set_visible(False)
                # final potential marker
                axins0.axvline(f"{final_potential_from_segment_step[inverse_setting['name']][0]} "
                               f"{final_potential_from_segment_step[inverse_setting['name']][1]}", color='green', linestyle='--')

                axins0.set_ylim(0, 0.3)
                #axins0.set_xlabel('seg. st.')
                axins0.set_ylabel('RMSD')
                #axins0.grid()
                #axins0.remove()

                # plot dU(r)  
                ax1_legend_lines = []
                dpot_files = sorted(glob.glob(f'segment*/step_*/{ia_name}.dpot.new'))
                #dpot_files = dpot_files[:10]
                for i, dpot_file in enumerate(dpot_files):
                    segment = int(re.search('segment(\d+)', dpot_file).group(1))
                    step = int(re.search('step_(\d+)', dpot_file).group(1))
                    label = None
                    if segment == 0 and step == 0:
                        label = 'init. guess'
                    elif step == 5:
                        label = f"{segment} " + inverse_setting['segments'][segment]['method'].upper()
                    dpot_r, dpot_dU, dist_flag = readin_table(dpot_file)
                    line = ax1.plot(dpot_r, dpot_dU, '-', label=label, color=cmap(i/len(dpot_files)))
                    if label is not None: ax1_legend_lines.append(line[0])
                    ax1.set_title("ΔU(r)")
                    ax1.set_ylim((-10, 10))

                # plot U(r)
                pot_files = sorted(glob.glob(f'segment*/step_*/{ia_name}.pot.new'))
                ax2_legend_lines = []
                #pot_files = pot_files[:10]
                axins2 = inset_axes(ax2, width="50%", height="70%", loc='upper right')
                for i, pot_file in enumerate(pot_files):
                    segment = int(re.search('segment(\d+)', pot_file).group(1))
                    step = int(re.search('step_(\d+)', pot_file).group(1))
                    label = f"{segment} {step}"
                    label = None
                    if segment == 0 and step == 0:
                        label = 'init. guess'
                    elif step == 5:
                        label = f"{segment} " + inverse_setting['segments'][segment]['method'].upper()
                    pot_r, pot_U, dist_flag = readin_table(pot_file)
                    line = ax2.plot(pot_r, pot_U, label=label, color=cmap(i/len(pot_files)))
                    axins2.plot(pot_r, pot_U, '-', label=label, color=cmap(i/len(pot_files)))
                    if label is not None:
                        ax2_legend_lines.append(line[0])
                    ax2.set_ylim((-8, 8))
                    ax2.set_title("U(r)")

                cut_off = inverse_setting['r-max-u']
                axins2.set_xlim(core_end, core_end+0.1)
                axins2.set_ylim(pot_U[ndx_ce+20], pot_U[ndx_ce])
                #axins2.set_ylim(-3, 3)
                #axins2.remove()
                axins2.patch.set_alpha(0.5)

                ax0.legend(ax0_legend_lines, [li.get_label() for li in ax0_legend_lines],
                           loc='upper left', title='seg. met.')
                ax1.legend(ax1_legend_lines, [li.get_label() for li in ax1_legend_lines],
                           loc='upper left', title='seg. met.')
                ax2.legend(ax2_legend_lines, [li.get_label() for li in ax2_legend_lines],
                           loc='upper left', title='seg. met.')
                for ax in (ax0, ax1, ax2):
                    ax.set_xlim((0.10, inverse_setting['r-max-g'] + 0.1))
                    ax.set_xlabel("r in nm")
                    #ax.grid()
                for ax in (ax1, ax2):
                    ax.set_ylabel("U in kJ mol¯¹")

                #fig.tight_layout()    
                #fig.savefig(os.path.join(working_dir_base, "../figures", f"{inverse_setting['name'].replace('/', '-')}-{ia_name}.png"), dpi=300)
                plt.show()
    
show_dist_dU_U()

### convergence plot for paper

In [None]:
def convergence_plot():
    mpl_rc = {
        'figure.dpi': 120,
        'legend.title_fontsize': 8,
        'legend.fontsize': 8,
        'legend.handlelength': 2,
    }

    with plt.rc_context(rc={**mpl_rc_global, **mpl_rc}):
        fig, axes = plt.subplots(nrows=1, ncols=2, figsize=[4.6, 2.0], tight_layout=True, dpi=200)  # sharey='row' 
        for i, inverse_setting in enumerate(inverse_setup_generator(system_types_inv, force_fields_inv, verbose=False)):
            ax = axes[i%2]
            if i < 2:
                ax.set_title(ff_short_names[inverse_setting['ff-name']])
            print(inverse_setting['name'])
            working_dir = os.path.join(inverse_setting['name'], 'both')

            Delta = collections.defaultdict(lambda: 0.0)

            with WorkingDir(working_dir):
                if len(glob.glob('segment*/step_*/')) <= 1:
                    print('.. nothing here ..')
                    continue

                for interaction in inverse_setting['interactions']:
                    #print('  ' + interaction['name'])
                    ia_name = interaction['name']

                    # plot g(r) or Δg(r)
                    dist_files = sorted(glob.glob(f"segment*/step_*/{ia_name}.dist.new"))
                    # load target g(r)
                    _, dist_tgt_g, _ = readin_table(f"segment0/step_001/{ia_name}.dist.tgt")
                    for i, dist_file in enumerate(dist_files):
                        segment = int(re.search('segment(\d+)', dist_file).group(1))
                        step = int(re.search('step_(\d+)', dist_file).group(1))
                        dist_r, dist_g, dist_flag = readin_table(dist_file)
                        Delta_ = np.sqrt(1/max(dist_r) * np.trapz(x=dist_r, y=(dist_g - dist_tgt_g)**2))
                        Delta[(segment, step)] += Delta_ / len(inverse_setting['interactions'])
                        

            x = ["{} {}".format(*key) for key in Delta.keys()]
            y = Delta.values()
            color = nb_plot_color[inverse_setting['interactions'][0]['name']]
            line, = ax.plot(x, y, label=sys_type_short_names[inverse_setting['refsys-parametric']['type']['name']], linewidth=1.0, color=color)
            xticklabels = [("IBI", "IMC")[key[0] % 2] if key[1] == 1 else "" for key in Delta.keys()]
            ax.set_xticks(x)
            ax.set_xticklabels(xticklabels)
            ax.axvline(f"{final_potential_from_segment_step[inverse_setting['name']][0]} "
                       f"{final_potential_from_segment_step[inverse_setting['name']][1]}", color=line.get_color(), linestyle='--')
            
        for ax in axes:
            ax.tick_params(labelrotation=90)
            ax.set_yticks([0, 0.05, 0.1])
            ax.set_ylim(0, 0.10)
            ax.set_yticklabels(["0", "0.05", "0.10"])
        axes[0].set_ylabel(r'$\Delta^\mathrm{cat.-O} + \Delta^\mathrm{an.-O}$')
        axes[0].legend(frameon=False)

        fig.savefig(os.path.join(working_dir_base, "../figures", f"convergence.pdf"))
        plt.show()
    
convergence_plot()

In [None]:
!cp -a ../figures/convergence.pdf ~/research/output/ion-shortrange-paper/figures/

### copy resulting OW-Cation to template/table

In [None]:
def copy_tables():
    overwrite = False
    
    for inverse_setting in inverse_setup_generator(system_types_inv, force_fields_inv, verbose=False):
        print(inverse_setting['name'])
        working_dir = os.path.join(inverse_setting['name'], 'both')

        for interaction in inverse_setting['interactions'][0:1]:
            with WorkingDir(working_dir):
                print('  ' + interaction['name'])
                ia_name = interaction['name']
                # copy best pot to inverse method folder
                best_pot1 = (f"segment{final_potential_from_segment_step[inverse_setting['name']][0]}"
                             f"/step_{final_potential_from_segment_step[inverse_setting['name']][1]:03d}/{ia_name}.pot.cur")
                best_pot2 = f"best-{ia_name}.pot.cur"
                skip_or_overwrite(best_pot1, best_pot2, overwrite)
                    
                # make table
                table_name = "table_" + ia_name.replace('-', '_') + '.xvg'
                best_table1 = f"best-{table_name}"
                run_bash(f"csg_call --options segment0/settings.xml --ia-type non-bonded --ia-name {ia_name} "
                         f"convert_potential gromacs --clean {best_pot2} {best_table1}")
                # copy to template/table
                ff_name = inverse_setting['ff-name']
                best_table2 = f"{working_dir_base}/template/table/{ff_name}/{table_name}"
                run_bash(f"mkdir -p {working_dir_base}/template/table/{ff_name}")
                skip_or_overwrite(best_table1, best_table2, overwrite)
copy_tables()

### average oxygen-chloride potentials and save them

In [None]:
def average_ox_an():
    
    overwrite = False
    
    for ff_name, ff_inverse_settings in itertools.groupby(sorted(inverse_setup_generator(system_types_inv, force_fields_inv), key=operator.itemgetter('ff-name')),
                                                          key=operator.itemgetter('ff-name')):
        print(f"  {ff_name}")
        U_dict = {}
        r_dict = {}
        ff_inverse_settings = tuple(ff_inverse_settings)  # need to iterate this twice
        # shortest r will be common r later
        r_shortest = [0, np.inf]
        inverse_setting_shortest = None
        for inverse_setting in ff_inverse_settings:
            print('  ' + inverse_setting['name'])
            interaction = inverse_setting['interactions'][1]  # only oxygen-chloride
            ia_name = interaction['name']
            working_dir = os.path.join(inverse_setting['name'], 'both')
            # load potential and rdf
            best_pot = (f"segment{final_potential_from_segment_step[inverse_setting['name']][0]}"
                        f"/step_{final_potential_from_segment_step[inverse_setting['name']][1]:03d}/{ia_name}.pot.cur")
            with WorkingDir(working_dir):
                U_data = np.loadtxt(best_pot, dtype=str, comments=['#', '@'])
            r = U_data[:, 0].astype(float)
            U = U_data[:, 1].astype(float)
            U_dict[inverse_setting['name']] = U
            r_dict[inverse_setting['name']] = r
            if max(r) < max(r_shortest):
                r_shortest = r
                inverse_setting_shortest = inverse_setting
        # give values for Us on r_shortest (effectively cutting, but interp is convenient)
        for inverse_setting in ff_inverse_settings:
            U_dict[inverse_setting['name']] = np.interp(r_shortest, r_dict[inverse_setting['name']], U_dict[inverse_setting['name']])
        # average potential
        U_mean = np.mean(tuple(U_dict.values()), axis=0)
        # save potential
        pot1 = f"/tmp/{ff_name}-{ia_name}.xvg"
        np.savetxt(pot1, np.stack((r_shortest, U_mean)).T, header="averaged OW-CL interaction")
        pot2 = f"template/table/{ff_name}/{ia_name}.pot.avg"
        skip_or_overwrite(pot1, pot2, overwrite)
        # make table
        table_name = "table_" + ia_name.replace('-', '_') + '.xvg'
        table1 = f"/tmp/{table_name}"
        run_bash(f"csg_call --options {inverse_setting_shortest['name']}/both/segment0/settings.xml --ia-type non-bonded --ia-name {ia_name} "
                 f"convert_potential gromacs --clean {pot2} {table1}")
        # copy to template/table
        ff_name = inverse_setting['ff-name']
        table2 = f"template/table/{ff_name}/{table_name}"
        run_bash(f"mkdir -p template/table/{ff_name}")
        skip_or_overwrite(table1, table2, overwrite)
            
average_ox_an()

### sanity check: compare potentials

In [None]:
def compare_potentials():
    mpl_rc = {
        'figure.dpi': 100,
        'legend.title_fontsize': 8,
        'legend.fontsize': 8,
        'legend.handlelength': 1,
    }
    for interaction_index in range(2):
        for inverse_setting in inverse_setup_generator(system_types_inv, force_fields_inv, verbose=False):
            print(inverse_setting['name'])
            # OW-cation
            interaction = inverse_setting['interactions'][interaction_index]
            ia_name = interaction['name']

            # load potentials from iteration
            working_dir = os.path.join(inverse_setting['name'], 'both')
            with WorkingDir(working_dir):
                inv_file = (f"segment{final_potential_from_segment_step[inverse_setting['name']][0]}"
                           f"/step_{final_potential_from_segment_step[inverse_setting['name']][1]:03d}/{ia_name}.pot.cur")
                inv_r, inv_U, _ = readin_table(inv_file)
            inv_r_max = max(inv_r)

            # load potentials from template/table
            template_table_file = f"template/table/{inverse_setting['ff-name']}/table_{ia_name.replace('-', '_')}.xvg"
            template_r, _, _, _, _, template_U, _ = np.loadtxt(template_table_file).transpose()
            template_r_max = np.nonzero(template_r)[0][-1]

            # load potentials from md folders
            md_table_files = glob.glob(f"water5000-*/{inverse_setting['ff-name']}/topol/"
                                      f"table_{ia_name.replace('-', '_')}.xvg")
            md_tables = {}
            for md_table_file in md_table_files:
                r, _, _, _, _, U, _ = np.loadtxt(md_table_file).transpose()
                md_r_max = np.nonzero(r)[0][-1]
                md_tables[md_table_file] = {'r': r, 'U': U, 'r-max': md_r_max}

            # plot
            fig, ax = plt.subplots(figsize=(17, 1.5), constrained_layout=True)
            # inv
            line, = ax.plot(inv_r, inv_U, label=inv_file)
            ax.axvline(inv_r_max, color=line.get_color())
            # template
            line, = ax.plot(template_r, template_U, linestyle='--', label=template_table_file)
            ax.axvline(template_r_max, color=line.get_color())
            # md
            for i, (filename, md_table) in enumerate(md_tables.items()):
                label = f"{len(md_tables)} x" + filename if i == 0 else None
                line, = ax.plot(md_table['r'], md_table['U'], linestyle=':', color='darkgreen', label=label)
                ax.axvline(md_table['r-max'], color=line.get_color())

            ax.legend(frameon=False, loc='upper right')
            ax.set_title(ia_name)
            #ax.set_xlim(0, inverse_setting['r-max-u'])
            ax.set_xlim(0, 3.1)
            ax.set_ylim(-6, 6)
            plt.show()

compare_potentials()

## fitting

### fit functions

In [None]:
def gen_lj_12_6_potential(r, a, b):
    with np.errstate(divide='ignore', invalid='ignore'):
        pot = a / r**12 - b / r**6
    pot[0] = pot[1]
    return pot
    
def gen_buckingham_potential(r, a, b, c):
    with np.errstate(divide='ignore', invalid='ignore'):
        pot = a * np.exp(-b * r) - c / r**6
    pot[0] = pot[1]
    return pot

def gen_buckingham_mod_potential(r, a, b, c):
    offset = -0.1
    with np.errstate(divide='ignore', invalid='ignore'):
        pot = a * np.exp(-b * (r + offset)) - c / (r + offset)**6
    ndx0 = np.where((r + offset) > 0.0)[0][0]
    pot[:ndx0] = pot[ndx0]
    return pot

def gen_lj_9_6_potential(r, a, b):
    with np.errstate(divide='ignore', invalid='ignore'):
        pot = a / r**9 - b / r**6
    pot[0] = pot[1]
    return pot

def gen_lj_8_6_potential(r, a, b):
    with np.errstate(divide='ignore', invalid='ignore'):
        pot = a / r**8 - b / r**6
    pot[0] = pot[1]
    return pot

def gen_lj_7_6_potential(r, a, b):
    with np.errstate(divide='ignore', invalid='ignore'):
        pot = a / r**7 - b / r**6
    pot[0] = pot[1]
    return pot

def gen_lj_12_9_6_potential(r, a, b, c):
    with np.errstate(divide='ignore', invalid='ignore'):
        pot = a / r**12 + b / r**9 - c / r**6
    pot[0] = pot[1]
    return pot

def gen_mie_potential(r, sigma, epsilon, n, m):
    with np.errstate(divide='ignore', invalid='ignore'):
        pot = (n / (n - m)) * (n / m)**(m/(n-m)) * epsilon * ((sigma/r)**n - (sigma/r)**m)
    pot[0] = pot[1]
    return pot

def gen_att_6_potential(r, b):
    with np.errstate(divide='ignore', invalid='ignore'):
        pot = - b / r**6
    pot[0] = pot[1]
    return pot

def gen_rep_power_potential(r, a, e):
    with np.errstate(divide='ignore', invalid='ignore'):
        pot = a * r**e
    pot[0] = pot[1]
    return pot

def gen_lj_n_6_potential(r, a, n, b):
    with np.errstate(divide='ignore', invalid='ignore'):
        pot = a / r**n - b / r**6
    pot[0] = pot[1]
    return pot

bounds_lj = (0, 1e-5)  # A, B
fit_functions = {
    'Buckingham': {'name': 'Buckingham', 'func': gen_buckingham_potential,
                   'p0': (1e8, 5e1, 1e-3), 'bounds': ((0, 0, 0), (np.inf, np.inf, np.inf))},
    'LJ-12-6': {'name': '12-6 LJ', 'func': gen_lj_12_6_potential,
                'p0': (2e-7, 2e-4), 'bounds': (bounds_lj, (np.inf, np.inf))},
    'LJ-9-6': {'name': '9-6 Mie', 'func': gen_lj_9_6_potential,
               'p0': (2e-4, 2e-2), 'bounds': (bounds_lj, (np.inf, np.inf))},
    'LJ-8-6': {'name': '8-6 Mie', 'func': gen_lj_8_6_potential,
               'p0': (2e-4, 2e-2), 'bounds': (bounds_lj, (np.inf, np.inf))},
    #'Buckingham-offset': {'name': 'Buckingham of.', 'func': gen_buckingham_mod_potential,
                   #'p0': (1e8, 5e1, 1e-3), 'bounds': ((0, 0, 0), (np.inf, np.inf, np.inf))},
    'LJ-7-6': {'name': '7-6 Mie', 'func': gen_lj_7_6_potential,
               'p0': (2e-4, 2e-2), 'bounds': (bounds_lj, (np.inf, np.inf))},
    #'LJ-12-9-6': {'name': 'LJ-12-9-6', 'func': gen_lj_12_9_6_potential,
                  #'p0': (2e-6, 2e-5, 2e-4), 'bounds': ((0, 0, 0), (np.inf, np.inf, np.inf))},
    #'Mie': {'name': 'Mie', 'func': gen_mie_potential,
            #'p0': (0.3, 3, 12, 6), 'bounds': ((0, 0, 7, 2), (np.inf, np.inf, 30, 6))},
    'LJ-n-6': {'name': 'n-6 Mie', 'func': gen_lj_n_6_potential,
               'p0': (2e-4, 8, 2e-2), 'bounds': ((0, 1, 1e-5), (np.inf, np.inf, np.inf))},
}

### load data

In [None]:
iff_pot_fit_dict = {}

In [None]:
def load_data():
    for ff_name, ff_inverse_settings in itertools.groupby(sorted(inverse_setup_generator(system_types_inv, force_fields_inv), key=operator.itemgetter('ff-name')),
                                                          key=operator.itemgetter('ff-name')):
        print(f"  {ff_name}")
        U_dict = {}
        g_dict = {}
        r_dict = {}
        ff_inverse_settings = tuple(ff_inverse_settings)  # need to iterate this twice
        r_shortest = (0, np.inf)
        for inverse_setting in ff_inverse_settings:
            print('  ' + inverse_setting['name'])
            for i, interaction in enumerate(inverse_setting['interactions']):
                ia_name = interaction['name']
                print('    ' + ia_name)

                working_dir = os.path.join(inverse_setting['name'], 'both')
                # load potential and rdf
                U_file = (f"{working_dir}/segment{final_potential_from_segment_step[inverse_setting['name']][0]}"
                         f"/step_{final_potential_from_segment_step[inverse_setting['name']][1]:03d}/{ia_name}.pot.cur")
                U_data = np.loadtxt(U_file, dtype=str, comments=['#', '@'])
                r = U_data[:, 0].astype(float)
                U = U_data[:, 1].astype(float)
                g_file = (f"{working_dir}/segment{final_potential_from_segment_step[inverse_setting['name']][0]}"
                         f"/step_{final_potential_from_segment_step[inverse_setting['name']][1]:03d}/{ia_name}.dist.new")
                g_data = np.loadtxt(g_file, dtype=str, comments=['#', '@'])
                r_g = g_data[:, 0].astype(float)
                g = g_data[:, 1].astype(float)
                if not np.allclose(r, r_g):
                    print("grid of g and U differ, interpolating g..")
                    g = np.interp(r, r_g, g)
                U_dict[inverse_setting['name'], ia_name] = U
                g_dict[inverse_setting['name'], ia_name] = g
                r_dict[inverse_setting['name'], ia_name] = r
                if i == 1 and max(r) < max(r_shortest):
                    r_shortest = r
        # cut OW-CL interactions
        # needs to be done for all before averaging
        for inverse_setting in ff_inverse_settings:
            interaction = inverse_setting['interactions'][1]
            ia_name = interaction['name']
            int_key = (inverse_setting['name'], ia_name)
            U_dict[int_key] = np.interp(r_shortest, r_dict[int_key], U_dict[int_key])
            g_dict[int_key] = np.interp(r_shortest, r_dict[int_key], g_dict[int_key])
        for inverse_setting in ff_inverse_settings:
            for i, interaction in enumerate(inverse_setting['interactions']):
                ia_name = interaction['name']
                int_key = (inverse_setting['name'], ia_name)
                if i == 1:
                    # average OW-CL interactions
                    U_mean = np.mean([U for (invset, ia), U in U_dict.items() if ia == ia_name], axis=0)
                    g_mean = np.mean([g for (invset, ia), g in g_dict.items() if ia == ia_name], axis=0)
                    r = r_shortest
                else:
                    U_mean = None
                    g_mean = None
                    r = r_dict[int_key]
                U = U_dict[int_key]
                g = g_dict[int_key]
                iff_pot_fit_dict[(inverse_setting['name'], ia_name, 'data')] = (r, g, U, None, None, None, None, g_mean, U_mean)

load_data()

In [None]:
def test():
    r, g, u, _, _, _, _, g_mean, u_mean = iff_pot_fit_dict[('water5000-cacl2_50/altern5-netz-co0.9-nopc', 'OW-CA', 'data')]
    plt.plot(r, g)
    #plt.plot(r, g_mean)
    plt.ylim(0, 0.2)
    plt.show()
    
    plt.plot(r, u)
    plt.ylim(-10, 100)
    plt.show()

test()

### fit all potentials

In [None]:
def fit_all():
    for inverse_setting in inverse_setup_generator(system_types_inv, force_fields_inv, verbose=False):
        print(inverse_setting['name'])
        for i, interaction in enumerate(inverse_setting['interactions']):
            print('  ' + interaction['name'])
            ia_name = interaction['name']
            # load data
            (r, g, U, _, _, _, _, g_mean, U_mean) = iff_pot_fit_dict[(inverse_setting['name'], ia_name, 'data')]
            # slice for fit (from first non-zero g)
            start = np.min(np.where(g > 1e-10))
            sl = slice(start, None)
            # fit
            for mean in ((False, True) if i == 1 else (False,)):
                print('mean' if mean else 'not mean')
                U_used = U_mean if mean else U
                g_used = g_mean if mean else g
                # sigma for fit
                with np.errstate(divide='ignore'):
                    sigma = 1 / np.sqrt(g_used)
                ia_name_used = ia_name + '-mean' if mean else ia_name
                for fit_func_name, fit_func in fit_functions.items():
                    #print(fit_func_name)
                    p0 = fit_func['p0']
                    bounds = fit_func['bounds']
                    popt, pcov = optimize.curve_fit(fit_func['func'], r[sl], U_used[sl], p0=p0, sigma=sigma[sl], bounds=bounds, maxfev=int(2e5))
                    iff_pot_fit_dict[(inverse_setting['name'], ia_name_used, fit_func_name)] = (r, g_used, U_used, popt, p0, sigma, sl, None, None)
                    if fit_func_name == 'LJ-n-6':
                        print(f"n: {popt[1]:.1f}")

fit_all()

### show some fits

In [None]:
def show_some_fits():
    for inverse_setting in inverse_setup_generator(system_types_inv, force_fields_inv, verbose=False):
        print(inverse_setting['name'])
        for i, interaction in enumerate(inverse_setting['interactions']):
            print('  ' + interaction['name'])
            ia_name = interaction['name']
            # load data
            (r, g, U, _, _, _, _, g_mean, U_mean) = iff_pot_fit_dict[(inverse_setting['name'], ia_name, 'data')]
            # slice for fit (from first non-zero g)
            start = np.min(np.where(g > 1e-10))
            sl = slice(start, None)
            # fit
            for mean in ((False, True) if i == 1 else (False,)):
                print('mean' if mean else 'not mean')
                U_used = U_mean if mean else U
                g_used = g_mean if mean else g
                # sigma for fit
                with np.errstate(divide='ignore'):
                    sigma = 1 / np.sqrt(g_used)
                ia_name_used = ia_name + '-mean' if mean else ia_name
                for fit_func_name, fit_func in {
                    k: v for k, v in fit_functions.items()
                    if k in (
                        'LJ-n-6',
                    )}.items():
                    #print(fit_func_name)
                    (r, g_used, U_used, popt, p0, sigma, sl, _, _) = iff_pot_fit_dict[(inverse_setting['name'], ia_name_used, fit_func_name)]
                    print(f"n: {popt[1]:.4f}")
                    print(popt[0], popt[2])
                    fig, (ax0, ax1) = plt.subplots(ncols=2)
                    for ax in (ax0, ax1):
                        ax.plot(r, U_used, label='IMC')
                        ax.plot(r, fit_func['func'](r, *popt), label='fit')
                    ax0.set_ylim(-10, 10)
                    ax1.set_yscale('log')
                    ax1.set_ylim(0.5, 1000)
                    ax1.legend()
                    plt.show()

show_some_fits()

### fit repulsive and attractive separately and paper table

In [None]:
def fit_sep_all(show_plots=False):
    
    
    for inverse_setting in inverse_setup_generator(system_types_inv, force_fields_inv, verbose=False):
        print(inverse_setting['name'])
        for i, interaction in enumerate(inverse_setting['interactions']):
            print('  ' + interaction['name'])
            ia_name = interaction['name']
            # load data
            (r, g, U, _, _, _, _, g_mean, U_mean) = iff_pot_fit_dict[(inverse_setting['name'], ia_name, 'data')]
            # slice for fit (from first non-zero g)
            # fit
            for mean in ((False, True) if i == 1 else (False,)):
                print('mean' if mean else 'not mean')
                U_used = U_mean if mean else U
                g_used = g_mean if mean else g
                #start_att = int(np.argmin(U_used) * 2**(1/6))
                start_att = np.argmin(U_used)
                start_rep = np.min(np.where(g_used > 1e-10))
                #start_rep = np.min(np.where(g_used > 0.15))
                #end_rep = (np.argmax(g_used) + start_att) // 2
                end_rep = np.argmax(g_used) #+ 4
                #end_rep = start_att
                sl_rep = slice(start_rep, end_rep)
                sl_att = slice(start_att, None)
                # sigma for fit
                with np.errstate(divide='ignore'):
                    sigma = 1 / np.sqrt(g_used)
                ia_name_used = ia_name + '-mean' if mean else ia_name
                # fit attractive
                bounds = ((0,), (np.inf,))
                sigma_att = sigma[sl_att]
                #sigma_att = r[sl_att] ** (-1)
                popt_att, pcov_att = optimize.curve_fit(gen_att_6_potential, r[sl_att], U_used[sl_att], p0=1e-4, sigma=sigma_att, bounds=bounds, maxfev=int(2e5))
                # fit repulsive
                U_rep = U_used - gen_att_6_potential(r, popt_att)
                popt_rep, pcov_rep = optimize.curve_fit(gen_rep_power_potential, r[sl_rep], U_rep[sl_rep], p0=(1, -8), sigma=sigma[sl_rep], maxfev=int(2e5))
                perr_rep = np.sqrt(np.diag(pcov_rep))
                #print(f"{popt_rep[1]:.1f} ± {perr_rep[1]:.1f}")
                
                if show_plots:
                    fig, (ax0, ax1) = plt.subplots(ncols=2)
                    ax0.plot(r, U_used)
                    ax0.plot(r, gen_att_6_potential(r, popt_att))
                    ax0.plot(r, gen_att_6_potential(r, popt_att) + gen_rep_power_potential(r, *popt_rep), label=f"{popt_rep[1]:.1f}")
                    ax0.set_ylim(-10, 10)
                    ax0.legend()
                    ax1.plot(r, U_used - gen_att_6_potential(r, popt_att))
                    ax1.plot(r, gen_rep_power_potential(r, *popt_rep), label=f"{popt_rep[1]:.1f}")
                    ax1.set_yscale('log')
                    ax1.set_ylim(0.5, 1000)
                    ax1.legend()
                    plt.show()
                
                fit_func_name = 'rep-att'
                iff_pot_fit_dict[(inverse_setting['name'], ia_name_used, fit_func_name)] = (r, g_used, U_used, (popt_att, popt_rep), None, sigma, (sl_att, sl_rep), None, None)

fit_sep_all(show_plots=True)

In [None]:
def make_sep_fit_table():
    index = pd.MultiIndex.from_product((['HMN IMC', 'ECC IMC'], nb_plot_name.values()))
    columns = ('C_rep', 'exp', 'C_6')
    columns_final = (r'{$C_\text{rep}$}', '{$n$}', r'{$C_6$}')
    df_rep_fit = pd.DataFrame(index=index, columns=columns)
    for inverse_setting in inverse_setup_generator(system_types_inv, force_fields_inv, verbose=False):
        print(inverse_setting['name'])
        for i, interaction in enumerate(inverse_setting['interactions']):
            print('  ' + interaction['name'])
            ia_name = interaction['name']
            mean = i == 1
            print('mean' if mean else 'not mean')
            ia_name_used = ia_name + '-mean' if mean else ia_name
            # load data
            fit_func_name = 'rep-att'
            (_, _, _, (popt_att, popt_rep), _, _, (_, _), _, _) = iff_pot_fit_dict[(inverse_setting['name'], ia_name_used, fit_func_name)]

            df_rep_fit.at[(ff_short_names[inverse_setting['ff-name']], nb_plot_name[ia_name]), 'C_rep'] = f"{popt_rep[0]:.2e}"
            df_rep_fit.at[(ff_short_names[inverse_setting['ff-name']], nb_plot_name[ia_name]), 'exp'] = f"{-popt_rep[1]:.1f}"  # ± {perr_rep[1]:.1f}"
            df_rep_fit.at[(ff_short_names[inverse_setting['ff-name']], nb_plot_name[ia_name]), 'C_6'] = f"{popt_att[0]:.2e}"
    df_rep_fit.columns = columns_final
    print(df_rep_fit.to_latex(column_format='l l S[table-format=1.2e3] S[table-format=2.1] S[table-format=1.2e1]', escape=False))

make_sep_fit_table()

### plot fit for repulsive potential

In [None]:
def plot_sep(use_gradient=True, show_fits=True, show_grid=True, show_labels=True):
    
    mpl_rc_local = {
        'legend.handlelength': 1.2,
        'legend.labelspacing': 0.2,
    }
    
    label_positioning = {
        (0, 'OW-CL'): ('left', 'bottom', 0.0, 0.0),
        (0, 'OW-K'): ('left', 'bottom', 0.0, 0.0),
        (0, 'OW-NA'): ('right', 'top', -0.002, 0.0),
        (0, 'OW-LI'): ('right', 'top', -0.002, 0.0),
        (0, 'OW-CA'): ('left', 'bottom', 0.0, 0.0),
        (1, 'OW-CL'): ('left', 'bottom', 0.0, 0.0),
        (1, 'OW-K'): ('right', 'top', 0.0, -1.5),
        (1, 'OW-NA'): ('right', 'top', -0.002, 0.0),
        (1, 'OW-CA'): ('left', 'bottom', 0.0, 0.0),
    }
    
    with plt.rc_context({**mpl_rc_global, **mpl_rc_local}):
        fig, axes = plt.subplots(nrows=1, ncols=2, constrained_layout=True, figsize=(3.9, 2.1), dpi=200)
        fig.set_constrained_layout_pads(w_pad=0.01, h_pad=0.01)
        legend_handles = []
        legend_labels = []
        for f, (ff_name, ff_inverse_settings) in enumerate(itertools.groupby(sorted(inverse_setup_generator(system_types_inv, force_fields_inv),
                                                                                    key=operator.itemgetter('ff-name'), reverse=False),
                                                                             key=operator.itemgetter('ff-name'))):
            ax = axes[f]
            ax.set_title(ff_short_names[ff_name])
            #print(f"  {ff_name}")
            for v, inverse_setting in enumerate(ff_inverse_settings):
                #print(inverse_setting['name'])
                for i, interaction in enumerate(inverse_setting['interactions']):
                    #print('  ' + interaction['name'])
                    ia_name = interaction['name']
                    mean = ia_name == 'OW-CL'
                    # skip other mean
                    if ia_name == 'OW-CL' and v > 0:
                        continue
                    #print('mean' if mean else 'not mean')

                    # load data
                    ia_name_used = ia_name + '-mean' if mean else ia_name
                    (r, g_used, U_used, (popt_att, popt_rep), _, sigma, (sl_att, sl_rep), _, _) = iff_pot_fit_dict[(inverse_setting['name'], ia_name_used, 'rep-att')]
                        
                    # plot potential
                    U_rep = U_used - gen_att_6_potential(r, popt_att)
                    label = nb_plot_name[ia_name]
                    if use_gradient:
                        # multicolored line
                        points = np.array([r, U_rep]).T.reshape(-1, 1, 2)
                        segments = np.concatenate([points[:-1], points[1:]], axis=1)
                        # Create a continuous norm to map from data points to colors
                        #norm = plt.Normalize(-1, max(g_used))
                        norm = plt.Normalize(-0.2 * max(g_used), max(g_used))
                        cmap = nb_plot_cmap[ia_name]
                        lc = mpl.collections.LineCollection(segments, cmap=cmap, norm=norm)
                        # Set the values used for colormapping
                        lc.set_array(g_used)
                        #lc.set_linewidth(2)
                        line = ax.add_collection(lc)
                        if f == 0:
                            legend_handles.append(mpl.lines.Line2D([0], [0], color=nb_plot_color[ia_name], lw=1))
                            legend_labels.append(label)
                    else:
                        color = nb_plot_color[ia_name]
                        line, = ax.plot(r, U_rep, label=label, color=color, linestyle='-')
                        if f == 0:
                            legend_handles.append(line)
                            legend_labels.append(label)
                    
                    # plot fit
                    if show_fits:
                        fit_width = 5
                        color = nb_plot_color[ia_name]
                        start = np.argmax(g_used) - fit_width
                        end = np.argmax(g_used) + fit_width
                        #print(start, end, popt_rep)
                        sl = slice(start, end)
                        x = r[sl]
                        y = gen_rep_power_potential(r, *popt_rep)[sl]
                        ax.plot(x, y, label=label, color=color, linestyle='--')
                        
                        if show_labels:
                            ha, va, x_offset, y_offset = ('right', 'top', 0.0, 0.0)
                            if (f, ia_name) in label_positioning:
                                print(f, ia_name)
                                ha, va, x_offset, y_offset = label_positioning[(f, ia_name)]
                            ax.text(x[fit_width] + x_offset, y[fit_width] + y_offset, f'{-popt_rep[1]:.1f}',
                                    color=color, horizontalalignment=ha, verticalalignment=va)
                            
                    
        # plot r^-8 and r^-12
        if show_fits:
            for ax in axes:
                sl = slice(63, 73)
                x = r[sl]
                y = gen_rep_power_potential(r, 3e-4, -12)[sl]
                ax.plot(x, y, color='grey', linestyle='--')
                ax.text(np.mean(x), np.mean(y), r'$r^{-12}$', horizontalalignment='left', verticalalignment='center',)
                sl = slice(63, 73)
                x = r[sl]
                y = gen_rep_power_potential(r, 4e-2, -7)[sl]
                ax.plot(x, y, color='k', linestyle='--')
                ax.text(np.mean(x), np.mean(y)-7, r'$r^{-7}$', horizontalalignment='right', verticalalignment='top',)
                
        # show skewed grid
        if show_fits and show_grid:
            for ax in axes:
                x = r
                for a in np.logspace(-2, -8, num=10):
                    y = gen_rep_power_potential(r, a, -12)
                    ax.plot(x, y, color='grey', linestyle=':', linewidth=0.5)
                for a in np.logspace(-1, -5, num=10):
                    y = gen_rep_power_potential(r, a, -8)
                    ax.plot(x, y, color='darkgrey', linestyle=':', linewidth=0.5)
                    
        for ax in axes:
            ax.set_xscale('log')
            ax.set_yscale('log')
            ax.set_xticks(np.arange(0.15, 0.4, 0.05))
            ax.set_xlim(0.17, 0.39)
            ax.set_ylim(2, 300)
            ax.get_xaxis().set_major_formatter(mpl.ticker.ScalarFormatter())
            ax.get_xaxis().set_minor_formatter(mpl.ticker.ScalarFormatter())
        legend_order = [0, 2, 3, 4, 1]
        axes[1].legend((legend_handles[ndx] for ndx in legend_order), (legend_labels[ndx] for ndx in legend_order), loc='lower left')
        axes[1].yaxis.set_major_formatter(mpl.ticker.NullFormatter())
        axes[1].yaxis.set_minor_formatter(mpl.ticker.NullFormatter())
        axes[0].set_xlabel(r'$r$ in nm')
        axes[1].set_xlabel(r'$r$ in nm')
        #fig.supxlabel(r'$r$ in nm')
        axes[0].set_ylabel(r'$u_\mathrm{rep}(r)$ in kJ/mol')
        fig.savefig(os.path.join('..', 'figures', f"fit-repulsive.pdf"))
        plt.show()

#plot_sep(False, False, False)
#plot_sep(True, False, False)
plot_sep(True, True, False, True)

In [None]:
!cp -a ../figures/fit-repulsive.pdf ~/research/output/ion-shortrange-paper/figures/

In [None]:
# for trr146 b1 proposal
def plot_sep_nico(use_gradient=True, show_fits=True, show_grid=True, show_labels=True, show_steepness=True):
    
    mpl_rc_local = {
        'legend.handlelength': 1.2,
        'legend.labelspacing': 0.2,
    }
    
    label_positioning = {
        (0, 'OW-CL'): ('left', 'bottom', 0.0, 0.0),
        (0, 'OW-K'): ('left', 'bottom', -0.005, 0.0),
        (0, 'OW-CA'): ('left', 'bottom', 0.0, 0.0),
        (1, 'OW-CL'): ('left', 'bottom', 0.0, 0.0),
        (1, 'OW-K'): ('right', 'top', 0.005, -1.5),
        (1, 'OW-CA'): ('left', 'bottom', 0.0, 0.0),
    }
    
    with plt.rc_context({**mpl_rc_global, **mpl_rc_local}):
        fig, ax = plt.subplots(nrows=1, ncols=1, constrained_layout=True, figsize=(2.4, 1.8), dpi=200)
        legend_handles = []
        legend_labels = []
        
        force_fields_inv = {ffn: ff for ffn, ff in force_fields.items() if ffn in ('netz-co0.9',)}
        system_types_inv = {stn: st for stn, st in system_types.items() if st['name'] == 'water-cacl2_'}  # not pure water
        inverse_setting = next(inverse_setup_generator(system_types_inv, force_fields_inv))
        ff_name = inverse_setting['ff-name']
        print(f"  {ff_name}")
        print(inverse_setting['name'])
        interaction = inverse_setting['interactions'][0]
        print('  ' + interaction['name'])
        ia_name = interaction['name']
        mean = False
        # load data
        ia_name_used = ia_name + '-mean' if mean else ia_name
        (r, g_used, U_used, (popt_att, popt_rep), _, sigma, (sl_att, sl_rep), _, _) = iff_pot_fit_dict[(inverse_setting['name'], ia_name_used, 'rep-att')]

        # plot potential
        U_rep = U_used - gen_att_6_potential(r, popt_att)
        label = nb_plot_name[ia_name]
        if use_gradient:
            # multicolored line
            points = np.array([r, U_rep]).T.reshape(-1, 1, 2)
            segments = np.concatenate([points[:-1], points[1:]], axis=1)
            # Create a continuous norm to map from data points to colors
            #norm = plt.Normalize(-1, max(g_used))
            norm = plt.Normalize(-0.2 * max(g_used), max(g_used))
            cmap = nb_plot_cmap[ia_name]
            lc = mpl.collections.LineCollection(segments, cmap=cmap, norm=norm)
            # Set the values used for colormapping
            lc.set_array(g_used)
            #lc.set_linewidth(2)
            line = ax.add_collection(lc)
            legend_handles.append(mpl.lines.Line2D([0], [0], color=nb_plot_color[ia_name], lw=1))
            legend_labels.append('IMC')
        else:
            color = nb_plot_color[ia_name]
            line, = ax.plot(r, U_rep, label=label, color=color, linestyle='-')
            if f == 0:
                legend_handles.append(line)
                legend_labels.append(label)
                
        # plot LJ potential of reference force field
        r_g_ref = npt_system_interaction_dict[('water5000-cacl2_50/netz-co0.9tc', 'OW-CA', 'r')]
        g_ref = npt_system_interaction_dict[('water5000-cacl2_50/netz-co0.9tc', 'OW-CA', 'g')]
        pff = PARAMETRIC_FORCE_FIELDS['netz']
        at1 = next((at for at in pff['atomtypes'] if at['type'] == interaction['type1']))
        at2 = next((at for at in pff['atomtypes'] if at['type'] == interaction['type2']))
        U_ref_rep, _ = gen_potential_and_force(at1, at2, r_g_ref, pff['combining-rule'], False, pff['nonbond-params'], repulsive_only=True)
        if use_gradient:
            # multicolored line
            points = np.array([r_g_ref, U_ref_rep]).T.reshape(-1, 1, 2)
            segments = np.concatenate([points[:-1], points[1:]], axis=1)
            # Create a continuous norm to map from data points to colors
            #norm = plt.Normalize(-1, max(g_used))
            norm = plt.Normalize(-0.2 * max(g_ref), max(g_ref))
            cmap = nb_plot_cmap['OW-LI']
            lc = mpl.collections.LineCollection(segments, cmap=cmap, norm=norm)
            # Set the values used for colormapping
            lc.set_array(g_ref)
            #lc.set_linewidth(2)
            line = ax.add_collection(lc)
            legend_handles.append(mpl.lines.Line2D([0], [0], color=nb_plot_color['OW-LI'], lw=1))
            legend_labels.append('LJ')
        else:
            line, = ax.plot(r, U_ref_rep, label=label, color='red', linestyle='-')

        # plot fit
        if show_fits:
            fit_width = 5
            color = nb_plot_color[ia_name]
            start = np.argmax(g_used) - fit_width
            end = np.argmax(g_used) + fit_width
            #print(start, end, popt_rep)
            sl = slice(start, end)
            x = r[sl]
            y = gen_rep_power_potential(r, *popt_rep)[sl]
            ax.plot(x, y, label=label, color=color, linestyle='--')

            if show_labels:
                ha, va, x_offset, y_offset = ('right', 'top', 0.0, 0.0)
                if (f, ia_name) in label_positioning:
                    print(f, ia_name)
                    ha, va, x_offset, y_offset = label_positioning[(f, ia_name)]
                ax.text(x[fit_width] + x_offset, y[fit_width] + y_offset, f'{-popt_rep[1]:.1f}',
                        color=color, horizontalalignment=ha, verticalalignment=va)

                    
        # plot r^-8 and r^-12
        if show_steepness:
            sl = slice(55, 65)
            x = r[sl]
            y = gen_rep_power_potential(r, 5e-5, -12)[sl]
            ax.plot(x, y, color='grey', linestyle='--')
            ax.text(np.mean(x), np.mean(y), r'$r^{-12}$', horizontalalignment='left', verticalalignment='center',)
            sl = slice(55, 65)
            x = r[sl]
            y = gen_rep_power_potential(r, 1.2e-2, -7)[sl]
            ax.plot(x, y, color='k', linestyle='--')
            ax.text(np.mean(x), np.mean(y)-5, r'$r^{-7}$', horizontalalignment='right', verticalalignment='top',)
                
        # show skewed grid
        if show_fits and show_grid:
            x = r
            for a in np.logspace(-2, -8, num=10):
                y = gen_rep_power_potential(r, a, -12)
                ax.plot(x, y, color='grey', linestyle=':', linewidth=0.5)
            for a in np.logspace(-1, -5, num=10):
                y = gen_rep_power_potential(r, a, -8)
                ax.plot(x, y, color='darkgrey', linestyle=':', linewidth=0.5)
                    
        #ax.set_title(ff_short_names[ff_name])
        ax.set_xscale('log')
        ax.set_yscale('log')
        ax.set_xticks(np.arange(0.15, 0.4, 0.05))
        ax.set_xlim(0.20, 0.34)
        ax.set_ylim(8, 300)
        ax.get_xaxis().set_major_formatter(mpl.ticker.ScalarFormatter())
        ax.get_xaxis().set_minor_formatter(mpl.ticker.ScalarFormatter())
        #legend_order = [0]
        ax.legend(legend_handles, legend_labels, loc='lower left')
        #ax.legend((legend_handles[ndx] for ndx in legend_order), (legend_labels[ndx] for ndx in legend_order), loc='lower left')
        
        #ax.yaxis.set_major_formatter(mpl.ticker.NullFormatter())
        #ax.yaxis.set_minor_formatter(mpl.ticker.NullFormatter())
        ax.set_xlabel(r'$r$ in nm')
        ax.set_xlabel(r'$r$ in nm')
        ax.set_ylabel(r'$u_\mathrm{rep}(r)$ in kJ/mol')
        fig.savefig(os.path.join('..', 'figures', f"repulsive-trr-proposal.pdf"))
        plt.show()

plot_sep_nico(True, False, False, False, True)

### potential plot for SI

In [None]:
def plot_potential_SI(use_gradient, show_attractive_fit_only=True):
    
    mpl_rc_local = {
        'legend.handlelength': 1.6,
        'legend.labelspacing': 0.2,
    }
    
    with plt.rc_context({**mpl_rc_global, **mpl_rc_local}):
        fig, axes = plt.subplots(nrows=1, ncols=2, constrained_layout=True, figsize=(4.4, 2.1), dpi=200)
        legend_handles = []
        legend_labels = []
        for f, (ff_name, ff_inverse_settings) in enumerate(itertools.groupby(sorted(inverse_setup_generator(system_types_inv, force_fields_inv),
                                                                                    key=operator.itemgetter('ff-name'), reverse=False),
                                                                             key=operator.itemgetter('ff-name'))):
            ax = axes[f]
            ax.set_title(ff_short_names[ff_name])
            for v, inverse_setting in enumerate(ff_inverse_settings):
                for i, interaction in enumerate(inverse_setting['interactions']):
                    ia_name = interaction['name']
                    mean = ia_name == 'OW-CL'
                    # skip other mean
                    if ia_name == 'OW-CL' and v > 0:
                        continue
                    # load data
                    ia_name_used = ia_name + '-mean' if mean else ia_name
                    (r, g_used, U_used, (popt_att, popt_rep), _, sigma, (sl_att, sl_rep), _, _) = iff_pot_fit_dict[(inverse_setting['name'], ia_name_used, 'rep-att')]
                    # plot potential
                    label = nb_plot_name[ia_name]
                    color = nb_plot_color[ia_name]
                    if use_gradient:
                        # multicolored line
                        points = np.array([r, U_used]).T.reshape(-1, 1, 2)
                        segments = np.concatenate([points[:-1], points[1:]], axis=1)
                        # Create a continuous norm to map from data points to colors
                        #norm = plt.Normalize(-0.2 * max(g_used), 0.5*max(g_used))
                        #norm = plt.Normalize(-0.6, 4.5)
                        norm = plt.Normalize(-0.2 * max(g_used), 0.8*max(g_used))
                        cmap = nb_plot_cmap[ia_name]
                        lc = mpl.collections.LineCollection(segments, cmap=cmap, norm=norm)
                        # Set the values used for colormapping
                        lc.set_array(g_used)
                        #lc.set_linewidth(2)
                        line = ax.add_collection(lc)
                        if f == 0:
                            legend_handles.append(mpl.lines.Line2D([0], [0], color=nb_plot_color[ia_name], lw=1))
                            legend_labels.append(label)
                    else:
                        color = nb_plot_color[ia_name]
                        line, = ax.plot(r, U_used, label=label, color=color, linestyle='-')
                        if f == 0:
                            legend_handles.append(line)
                            legend_labels.append(label)
                    if show_attractive_fit_only:
                        ax.plot(r, gen_att_6_potential(r, popt_att), linestyle='--', color=color, zorder=10, linewidth=0.8)
                    else:
                        ax.plot(r, gen_att_6_potential(r, popt_att) + gen_rep_power_potential(r, *popt_rep), linestyle='--', color=color, zorder=10, linewidth=0.8)

            ax.set_title(ff_short_names[inverse_setting['ff-name']] + '    ' + nb_plot_name[ia_name])
            ax.set_xlim(0.16, 0.75)
            ax.set_ylim(-7, 17)
            ax.set_xlabel(r'$r$ in nm')
            ax.set_title(ff_short_names[ff_name])
        axes[0].set_ylabel(r'$u_\mathrm{att}(r)$ in kJ/mol')
        legend_order = [0, 2, 3, 4, 1]
        axes[1].legend((legend_handles[ndx] for ndx in legend_order), (legend_labels[ndx] for ndx in legend_order), loc='upper right')
        fig.savefig(os.path.join('..', 'figures', f"fit-attractive.pdf"))
        plt.show()

plot_potential_SI(use_gradient=True, show_attractive_fit_only=False)
plot_potential_SI(use_gradient=True)

In [None]:
!cp -a ../figures/fit-attractive.pdf ~/research/output/ion-shortrange-paper/figures/

### single potential plot for presentation

In [None]:
def plot_single(show_attr=True):
    
    mpl_rc_local = {
        'legend.handlelength': 1.6,
        'legend.labelspacing': 0.3,
    }
    
    with plt.rc_context({**mpl_rc_global, **mpl_rc_local}):
        fig, ax = plt.subplots(constrained_layout=True, figsize=(2.0, 1.5), dpi=200)
        inverse_setting = next(inverse_setup_generator(system_types_inv, force_fields_inv))
        interaction = inverse_setting['interactions'][0]
        ia_name = interaction['name']
        mean = ia_name == 'OW-CL'
        # load data
        ia_name_used = ia_name + '-mean' if mean else ia_name
        (r, g_used, U_used, (popt_att, popt_rep), _, sigma, (sl_att, sl_rep), _, _) = iff_pot_fit_dict[(inverse_setting['name'], ia_name_used, 'rep-att')]
        # plot potential
        label = nb_plot_name[ia_name]
        color = nb_plot_color[ia_name]
        ax.plot(r, U_used, label=label, color=color, linestyle='-')
        if show_attr:
            ax.plot(r, gen_att_6_potential(r, popt_att), linestyle='--')

        ax.set_title(ff_short_names[inverse_setting['ff-name']] + '    ' + nb_plot_name[ia_name])
        ax.set_xlim(0, 0.75)
        ax.set_ylim(-15, 80)
        ax.set_xlabel(r'$r$ in nm')
        ax.set_ylabel(r'$U_\mathrm{}(r)$ in kJ/mol')
        plt.show()

plot_single(False)
plot_single(True)

### make parametric fit force fields

In [None]:
# show fit parameters
df_fit = pd.DataFrame(iff_pot_fit_dict).transpose()
df_fit.columns = "r, g_used, U_used, popt, p0, sigma, sl, g_mean, U_mean".split(', ')
df_fit.loc[(slice(None), slice(None), 'LJ-12-6'), 'popt']

In [None]:
parametric_fit_force_fields = {}

def make_fit_pfff():
    for pff, pfff, iff in (
        ('netz', 'fit-iff-netz', 'altern5-netz-co0.9-nopc'),
        ('eccr1', 'fit-iff-eccr1', 'altern5-eccr1-co1.2-nopc'),
    ):
        parametric_fit_force_fields[pfff] = deepcopy(PARAMETRIC_FORCE_FIELDS[pff])
        at_ow = next((at for at in parametric_fit_force_fields[pfff]['atomtypes'] if at['type'] == 'OW'))
        sig_eps_ow = at_ow['σ'], at_ow['ε']
        parametric_fit_force_field = parametric_fit_force_fields[pfff]
        atomtypes_new = []
        #'nonbond-params': {
        #frozenset(('A1', 'A2')):  ('sigma', 'epsilon'),
        #frozenset(('CL', 'OW')): ('LJ', 0.42386698, 0.06198347),
        nonbond_params = {}
        for at in parametric_fit_force_field['atomtypes']:
            print(at['type'])
            at_new = deepcopy(at)
            if at['type'] in {'LI', 'NA', 'K', 'CA', 'CL'}:
                print(at['σ'], at['ε'], *sig_eps_ow, parametric_fit_force_field['combining-rule'])
                if at['type'] == 'CA':
                    system_name = f"water5000-{at['type'].lower()}cl2_50/{iff}" 
                    ia_name = f"OW-{at['type']}"
                elif at['type'] == 'CL':
                    system_name = f"water5000-nacl50/{iff}"
                    ia_name = f"OW-{at['type']}-mean"
                else:
                    system_name = f"water5000-{at['type'].lower()}cl50/{iff}" 
                    ia_name = f"OW-{at['type']}"
                A, B = df_fit.at[(system_name, ia_name, 'LJ-12-6'), 'popt']
                sig_eps_fit = (A / B)**(1/6), B**2 / (4 * A)
                sigma, epsilon = decombinate_LJ(*sig_eps_fit, *sig_eps_ow, parametric_fit_force_field['combining-rule'])
                at_new['σ'] = sigma
                at_new['ε'] = epsilon
                nonbond_params[frozenset({'OW', at['type']})] = ('LJ', *sig_eps_fit)
            atomtypes_new.append(at_new)
        parametric_fit_force_fields[pfff]['atomtypes'] = atomtypes_new
        parametric_fit_force_fields[pfff]['nonbond-params'] = nonbond_params
                                                           
make_fit_pfff()
pff_calc_C6_C12(parametric_fit_force_fields)

parametric_fit_force_fields['fit-iff-netz'], PARAMETRIC_FORCE_FIELDS['netz']

In [None]:
with open('parametric-fit-force-fields.pkl', 'wb') as f:
    pickle.dump(parametric_fit_force_fields, f)

In [None]:
with open('parametric-fit-force-fields.pkl', 'rb') as f:
    parametric_fit_force_fields = pickle.load(f)

### save potential fits

In [None]:
# safe fits to table
# oxygen-chloride is average over cations
def safe_fits():
    
    overwrite = False
    
    r_table = np.linspace(0, 3, num=1501)
    for fit_func_name, fit_func in fit_functions.items():
        print(f"{fit_func_name}")
        for j, inverse_setting in enumerate(inverse_setup_generator(system_types_inv, force_fields_inv)):
            print(f"  {inverse_setting['name']}")
            for i, interaction in enumerate(inverse_setting['interactions']):
                print(f"    {interaction['name']}")
                ia_name = interaction['name']
                ia_name_used = ia_name + '-mean' if i == 1 else ia_name
                _, _, _, popt, _, _, _, _, _ = iff_pot_fit_dict[(inverse_setting['name'], ia_name_used, fit_func_name)]
                U_fit = fit_func['func'](r_table, *popt)
                U_fit = np.nan_to_num(U_fit)
                force = -1/2 * (np.diff(U_fit, prepend=0) + np.diff(U_fit, append=0)) / (r_table[1] - r_table[0])
                first_U_ndx = max((np.argmax(U_fit)+1, np.argmax(np.nonzero(np.abs(U_fit) > 1e6))+1))
                
                #f_fill = 0  # physical for constant potential
                # not physical but prevents problems with steepest decent
                # Could cause problems with TI
                f_fill = force[first_U_ndx]
                
                force[0:first_U_ndx] = f_fill
                U_fit[0:first_U_ndx] = U_fit[first_U_ndx]
                U_fit = np.nan_to_num(U_fit, nan=U_fit[first_U_ndx])
                force = np.nan_to_num(force, nan=f_fill)
                table_name = "table_" + interaction['name'].replace('-', '_') + '.xvg'
                zeros = np.zeros_like(r_table)
                # save table temp
                table1 = f"/tmp/{inverse_setting['name'].replace('/', '_')}-{fit_func_name}-{table_name}"
                np.savetxt(table1,
                           np.stack((r_table, zeros, zeros, zeros, zeros, U_fit, force)).T,
                           header=f"{fit_func_name} {str(popt)}")
                # check and save in template
                table2 = f"template/table/{fit_func_name}-{inverse_setting['ff-name']}/{table_name}"
                run_bash(f"mkdir -p template/table/{fit_func_name}-{inverse_setting['ff-name']}")
                # comparison is inaccurate, due to unstable fitting
                skip_or_overwrite(table1, table2, overwrite, compare_numpy=True)
                """
                if not test_files_same(table1, table2):
                    fig, ax = plt.subplots(figsize=(17, 1.5))
                    ax.plot(r_table, U_fit, label=table1)
                    _, _, _, _, _, U, f = np.loadtxt(table2).transpose()
                    ax.plot(r_table, U, label=table2)
                    ax.set_ylim(-8, 8)
                    ax.legend()
                    plt.show()
                """
                
safe_fits()

### plot potential fits

In [None]:
def plot_fits():
    for j, inverse_setting in enumerate(inverse_setup_generator(system_types_inv, force_fields_inv)):
        print(f"{inverse_setting['name']}")
        for interaction in inverse_setting['interactions']:
            print('  ' + interaction['name'])
            working_dir = os.path.join(inverse_setting['name'], 'both')

            fig, ax = plt.subplots(figsize=(4, 3))
            axins = inset_axes(ax, width="50%", height="50%", loc='upper right')

            for fit_func_name, fit_func in fit_functions.items():
                r, g, U, popt, p0, sigma, sl, g_mean, U_mean = iff_pot_fit_dict[(inverse_setting['name'], interaction['name'], fit_func_name)]
                U_fit = fit_func['func'](r, *popt)
                res = np.trapz(((g * (U_fit - U))**2), x=r)
                print(fit_func_name, res)
                for ax_ in (ax, axins):
                    ax_.plot(r, U_fit, label=fit_func_name+f'  (R={res:.2f})')
            for ax_ in (ax, axins):
                ax_.plot(r, U, color='k', label=r'$U_\mathrm{impr.}$', zorder=-2)
                if U_mean is not None:
                    ax_.plot(r, U_mean, '--', color='grey', label='target mean', zorder=-1)
            ax2 = ax.twinx()  
            ax2.plot(r, 1/sigma**2, linestyle=':', color='grey', label=r'$g$')
            # for legend
            ax.plot(r, 1000 - 1/sigma**2, linestyle=':', color='grey', label=r'$g$')
            ax.set_xlim(0.15, 0.71)
            ax.set_ylim(-8, 50)
            #ax.set_ylim(-1, 1)
            ax.set_xlabel(r"$r$ in nm")
            ax.set_ylabel(r"$U$ in kJ/mol")
            ax2.set_ylabel(r"$g$")
            ax2.set_ylim(0)
            axins.set_xlim(r[np.argmin(U_fit)]-0.1, r[np.argmin(U_fit)]+0.1)
            axins.set_ylim(min(U_fit)-0.5, min(U_fit)+2.0)
            axins.remove()
            ax.legend(frameon=False, loc='upper right')
            fig.tight_layout()
            #fig.savefig(os.path.join('..', 'figures', f"fit_{inverse_setting['name'].replace('/', '_')}_{interaction['name']}.png"), dpi=300)
            plt.show()

plot_fits()

### plot quality of fit

In [None]:
# plot fits
def plot_fit_quality():
    ias = ('O-Cat.', 'O-Cl')
    xyl = {(fit_func_name, ia): [] for fit_func_name in fit_functions.keys() for ia in ias}

    ff_short_names_fitplot = {
        'iff-altern5-eccr1-co1.2-nopc': 'ECC',
        'iff-altern5-netz-co0.9-nopc': 'Netz',
    }

    mpl_rc = {
        'legend.labelspacing': 0.2,
        'legend.columnspacing': 1.5,
        'legend.handlelength': 3.2,
    }
    gap_extra = 0.25
    markers = ('D', (6, 1, 0), '+', '2', '.')

    with plt.rc_context({**mpl_rc_global, **mpl_rc}):
        force_fields_inv = {ffn: ff for ffn, ff in force_fields.items() if ffn in ('eccr1-co1.2', 'netz-co0.9')}
        
        # two different force fields
        for f, (ff_inv_name, ff_inv) in enumerate(force_fields_inv.items()):
        
            # four different system types per force field
            for s, inverse_setting in enumerate(inverse_setup_generator(system_types_inv, {ff_inv_name: ff_inv})):
                #print(f"{inverse_setting['name']}")
                
                # two interactions
                for i, interaction in enumerate(inverse_setting['interactions']):
                    #print('  ' + interaction['name'])

                    # mean for OW-CL once at end
                    means = (False, True) if s in (3,) and i == 1 else (False,)

                    for fit_func_name, fit_func in fit_functions.items():
                        #print('    ' + fit_func_name)
                        for mean in means:
                            ia_name_used = interaction['name'] + '-mean' if mean else interaction['name']
                            r, g, U, popt, p0, sigma, sl, g_mean, U_mean = iff_pot_fit_dict[(inverse_setting['name'], ia_name_used, fit_func_name)]
                            U_fit = fit_func['func'](r, *popt)
                            res = np.sqrt(1/max(r) * np.trapz(((g * (U_fit - U))**2), x=r))
                            #print(fit_func_name, popt, res)
                            ia = ias[i]
                            x = len(system_types_inv) * f + gap_extra * f + s + (f if i == 1 else 0) + int(mean)
                            label = 'avg' if mean else sys_type_short_names[inverse_setting['refsys-parametric']['type']['name']]
                            #print("s, f, i, x, label, res:", s, f, i, x, label, res)
                            xyl[(fit_func_name, ia)].append((x, res, label))
        # plot
        fig, axes = plt.subplots(ncols=2, figsize=(4.77, 2.0), constrained_layout=True, sharey='row')
        fig.set_constrained_layout_pads(w_pad=0.02, h_pad=0.05, hspace=0., wspace=0.)
        for i, ia in enumerate(ias):
            axes[i].set_title(ia)
            slicer1 = slice(0, 4) if i == 0 else slice(0, 5)
            slicer2 = slice(4, 8) if i == 0 else slice(5, 10)
            xticks = []
            for t, (fit_func_name, fit_func) in enumerate(fit_functions.items()):
                # plot ECC
                #x = np.array(range(len(y[(fit_func_name, ia)])))
                x_ecc, y_ecc, label_ecc = zip(*xyl[(fit_func_name, ia)][slicer1])
                line, = axes[i].plot(x_ecc, y_ecc, marker=markers[t], markersize=(3 if t == 0 else 6), linestyle=':', label=fit_func['name'])
                # plot Netz
                x_netz, y_netz, label_netz = zip(*xyl[(fit_func_name, ia)][slicer2])
                axes[i].plot(x_netz, y_netz, marker=markers[t], markersize=(3 if t == 0 else 6), linestyle=':', color=line.get_color())
                    
            axes[i].set_xticks((*x_ecc, *x_netz))
            axes[i].set_xticklabels((*label_ecc, *label_netz), fontsize=6)

            axes[i].text(0.19, -0.2, "ECC", transform=axes[i].transAxes)
            axes[i].text(0.7, -0.2, "Netz", transform=axes[i].transAxes)
        axes[0].text(0.85, 0.82, f"{xyl[('LJ-12-6', 'O-Cat.')][7][1]:.1f}↑", transform=axes[0].transAxes)
        #ax.set_xlim(0.15, 0.71)
        axes[0].set_ylim(0, 2.6)
        axes[0].set_ylabel(r"$\Delta$ in kJ/mol")
        axes[1].legend(frameon=False, loc='upper center')
        fig.savefig('../figures/fit-quality.pdf', dpi=300)
        plt.show()

plot_fit_quality()

In [None]:
#!cp -a ../figures/fit-quality.pdf ~/research/output/ion-shortrange-paper/figures/

### compare different OW-CL potentials

In [None]:
def comp_ocl():
    # two different force fields
    for f, (ff_inv_name, ff_inv) in enumerate(force_fields_inv.items()):
        print(ff_inv_name)

        fig, ax = plt.subplots(figsize=(6, 3))

        U_mean_list = []
        # four different system types per force field
        for s, inverse_setting in enumerate(inverse_setup_generator(system_types_inv, {ff_inv_name: ff_inv})):
            print(f"{inverse_setting['name']}")
            
            interaction = inverse_setting['interactions'][1]
            means = (False, True) if s in (3,) else (False,)

            r, g, U, popt, p0, sigma, sl, g_mean, U_mean = iff_pot_fit_dict[(inverse_setting['name'], interaction['name'], 'data')]
            label = sys_type_short_names[inverse_setting['refsys-parametric']['type']['name']]
            ax.plot(r, U, '-', label=label)
        ax.plot(r, U_mean, '--', color='k', label="avg")
                    
        ax.set_xlim(0.3, 0.78)
        ax.set_ylim(-2, 4)
        ax.set_title(ff_inv_name)
        ax.legend(loc='upper right', frameon=False)
        plt.show()
        
comp_ocl()

# MD

## systems to run and analyze

In [None]:
force_fields_md = {ffn: ff for ffn, ff in force_fields.items() if 'dummy' not in ff['tags']}
#force_fields_md = {ffn: ff for ffn, ff in force_fields.items() if (ffn.startswith('netz') and 'dummy' not in ff['tags'])}
#force_fields_md = {ffn: ff for ffn, ff in force_fields.items() if 'netz' in ffn}
system_types_md = system_types
#system_types_md = {stn: st for stn, st in system_types.items() if stn == 'water-cacl2_'}

systems_md = (system_types_md, force_fields_md)
pd.DataFrame(system_generator(*systems_md, verbose=False))

## preparation

### prepare MD

In [None]:
def prepare_md():
    for system in system_generator(*systems_md):
        print(f"system {system['name']}")
        with WorkingDir(system['name']):

            # make dirs
            run_bash("rm -rf topol")
            run_bash("mkdir -p equi1 equi2 prod topol")
            if 'npt' in system['tags']:
                run_bash(f"mkdir -p npt-equi3 npt-prod")
            all_folders = ["equi1", "equi2", "prod"] + (["npt-equi3", "npt-prod"] if 'npt' in system['tags'] else [])

            # topol.top
            save_parametric_force_field_as_top('topol/topol.top', system['force-field'], system['name'], system['moltypes'],
                                              osm_restraints={})
            if 'halftabulated' in system['tags']:
                ff_no_tabulated = deepcopy(system['force-field'])
                ff_no_tabulated['tabulated-potentials'] = []
                save_parametric_force_field_as_top('equi1/topol.top', ff_no_tabulated, system['name'], system['moltypes'],
                                                   osm_restraints={})
            elif 'tabulated' in system['tags']:
                raise Exception('not implemented')
            else:
                run_bash("ln -sf ../topol/topol.top equi1/topol.top")
            
            # single-*.gro and map*.xml
            for mt in system['moltypes']:
                name = mt.get('type', mt['name'])
                run_bash(f"cp {template_dir}/gro/single-{name}.gro equi1/single-{mt['name']}.gro")
                run_bash(f"cp {template_dir}/map/map-{name}.xml topol/map-{mt['name']}.xml")
                
            # tables_{}_{}.xvg
            if 'halftabulated' in system['tags']:
                for table in (f"table_{pair[0]}_{pair[1]}.xvg" for pair in itertools.combinations_with_replacement(system['atomtypes-no-h'], 2)
                               if (pair[0], pair[1]) in system['force-field']['tabulated-potentials']):
                    run_bash(f"cp {template_dir}/table/{system['force-field']['name']}/{table} topol/")
                    for folder in all_folders:
                        run_bash(f"rm -f {folder}/{table}")
                    for folder in all_folders[1:]:  # equi1 is run with LJ parameters
                        run_bash(f"ln -sf ../topol/{table} {folder}/{table}")
            if 'tabulated' in system['tags']:
                raise Exception('not implemented')
                
            # table.xvg
            if 'halftabulated' in system['tags']:
                run_bash(f"cp {template_dir}/table/table6-12.xvg topol/table.xvg")
                for folder in all_folders:
                    run_bash(f"rm -f {folder}/table.xvg")
                    run_bash(f"ln -sf ../topol/table.xvg {folder}/table.xvg")
                    
            # index.ndx
            if 'halftabulated' in system['tags']:
                top = gt.top.Topology()
                top.load_simple_top(system['moltypes'])
                gt.top.generate_index_file(top, 'topol/index.ndx')
                del top
                
            # grompp.mpd files
            for folder in all_folders:
                run_bash(f"cp {template_dir}/mdp/{folder}.mdp {folder}/grompp.mdp")
            # run length
            gt.mdp.set_parameter("equi1/grompp.mdp", 'nsteps', int(1e4))
            gt.mdp.set_parameter("equi2/grompp.mdp", 'nsteps', int(1e5))
            gt.mdp.set_parameter("prod/grompp.mdp", 'nsteps',  int(2e5))
            if 'npt' in system['tags']:
                npt_equi_nsteps = (int(5e5) if system['name'].startswith(('water5000-cacl2_500'))
                                    else int(1e5))
                gt.mdp.set_parameter("npt-equi3/grompp.mdp", 'nsteps',  npt_equi_nsteps)
                gt.mdp.set_parameter("npt-prod/grompp.mdp", 'nsteps',  int(2e5))
            # set temperature
            gt.mdp.set_parameter("equi2/grompp.mdp", 'gen-temp', system['temperature'])
            for folder in all_folders[1:]:
                mdp_file = folder + '/grompp.mdp'
                gt.mdp.set_parameter(mdp_file, 'ref-t', system['temperature'])
            # set pressure
            if 'npt' in system['tags']:
                gt.mdp.set_parameter("npt-equi3/grompp.mdp", 'ref-p', 1.0)
                gt.mdp.set_parameter("npt-prod/grompp.mdp", 'ref-p', 1.0)
            # set cutoff scheme
            cutoff_scheme = 'group' if 'halftabulated' in system['tags'] else 'Verlet'
            for folder in all_folders:
                mdp_file = folder + '/grompp.mdp'
                gt.mdp.set_parameter(mdp_file, 'cutoff-scheme', cutoff_scheme)
            gt.mdp.set_parameter('equi1/grompp.mdp', 'cutoff-scheme', 'Verlet')  # LJ for equi1
            # set cutoffs
            co = system['force-field']['cut-off']
            for folder in all_folders:
                mdp_file = folder + '/grompp.mdp'
                for key in ('rlist', 'rcoulomb', 'rvdw'):
                    gt.mdp.set_parameter(mdp_file, key, co)
            # set vdwtype
            vdwtype = 'User' if 'halftabulated' in system['tags'] else 'Cut-off'
            for folder in all_folders:
                mdp_file = folder + '/grompp.mdp'
                gt.mdp.set_parameter(mdp_file, 'vdwtype', vdwtype)
            gt.mdp.set_parameter("equi1/grompp.mdp", 'vdwtype', 'Cut-off')  # LJ for equi1
            # set tail correction (dispersion correction)
            if 'tail-corr' in system['tags']:
                for folder in all_folders:
                    mdp_file = folder + '/grompp.mdp'
                    gt.mdp.set_parameter(mdp_file, 'DispCorr', 'EnerPres')
            # set energygrps(-table)
            if 'halftabulated' in system['tags']:
                pairs = tuple((pair for pair in system['force-field'].get('tabulated-potentials', [])
                                if pair[0] in system['atomtypes']
                                and pair[1] in system['atomtypes']))
                energygrps =  ' '.join(list(OrderedSet([pair[0] for pair in pairs]
                                                       +[pair[1] for pair in pairs])))
                energygrp_table =  '  '.join((f"{pair[0]} {pair[1]}" for pair in pairs))
                for folder in all_folders[1:]:  # LJ for equi1
                    mdp_file = folder + '/grompp.mdp'
                    gt.mdp.set_parameter(mdp_file, 'energygrps', energygrps)
                    gt.mdp.set_parameter(mdp_file, 'energygrp-table', energygrp_table)
                gt.mdp.set_parameter('equi1/grompp.mdp', 'energygrps', '')  # LJ for equi1
                gt.mdp.set_parameter('equi1/grompp.mdp', 'energygrp-table', '')
            elif 'tabulated' in system['tags']:
                raise Exception('not implemented')
            else:
                for folder in all_folders:
                    mdp_file = folder + '/grompp.mdp'
                    gt.mdp.set_parameter(mdp_file, 'energygrps', '')
                    gt.mdp.set_parameter(mdp_file, 'energygrp-table', '')
                    
            # settings.xml for calculating distributions
            cg = ET.Element('cg')
            for pair in itertools.combinations_with_replacement(system['atomtypes'], 2):
                non_bonded = ET.SubElement(cg, 'non-bonded')
                name = ET.SubElement(non_bonded, 'name')
                type1 = ET.SubElement(non_bonded, 'type1')
                type2 = ET.SubElement(non_bonded, 'type2')
                min = ET.SubElement(non_bonded, 'min')
                max = ET.SubElement(non_bonded, 'max')
                max_intra = ET.SubElement(non_bonded, 'max_intra')
                step = ET.SubElement(non_bonded, 'step')
                name.text = '-'.join(pair)
                type1.text = pair[0]
                type2.text = pair[1]
                min.text = '0'
                max.text = str(system['force-field']['cut-off'])
                max_intra.text = '0.2'
                step.text = '0.004'
            indent(cg)
            tree = ET.ElementTree(cg)
            tree.write('topol/settings.xml')
prepare_md()

### prepare md thermal expansion

In [None]:
pd.DataFrame((sys for sys in system_generator(*systems_md) if 'therm-exp' in sys['tags']))

In [None]:
def prepare_thermal_expansion():
    for system in (sys for sys in system_generator(*systems_md) if 'therm-exp' in sys['tags']):
        print(f"system {system['name']}")
        with WorkingDir(system['name']):

            # make dirs
            Ts = [system['temperature'] + DeltaT for DeltaT in DeltaTs]
            print(f"temperatures: {Ts}")
            for T in Ts:
                run_bash(f"mkdir -p therm-exp-{T:.0f}")
            all_folders = [f"therm-exp-{T:.0f}" for T in Ts]

            # link/copy tables
            if 'halftabulated' in system['tags']:
                for folder in all_folders:
                    # table.xvg
                    run_bash(f"rm -f {folder}/table.xvg")
                    run_bash(f"ln -sf ../topol/table.xvg {folder}/table.xvg")
                    # link to topol/tables_{}_{}.xvg
                    for table in (f"table_{pair[0]}_{pair[1]}.xvg" for pair in itertools.combinations_with_replacement(system['atomtypes-no-h'], 2)
                                   if (pair[0], pair[1]) in system['force-field']['tabulated-potentials']):
                        run_bash(f"rm -f {folder}/{table}")
                        run_bash(f"ln -sf ../topol/{table} {folder}/{table}")
                
            # grompp.mpd files
            for T in Ts:
                folder = f"therm-exp-{T:.0f}"
                # copy template
                run_bash(f"cp {template_dir}/mdp/therm-exp.mdp {folder}/grompp.mdp")
                # run length
                gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'nsteps', int(1e6))
                # set temperature
                gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'ref-t', T)
                # set pressure
                gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'ref-p', 1.0)
                # set cutoff scheme
                cutoff_scheme = 'group' if 'halftabulated' in system['tags'] else 'Verlet'
                gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'cutoff-scheme', cutoff_scheme)
                # set cutoffs
                co = system['force-field']['cut-off']
                for key in ('rlist', 'rcoulomb', 'rvdw'):
                    gt.mdp.set_parameter(f"{folder}/grompp.mdp", key, co)
                # set vdwtype
                vdwtype = 'User' if 'halftabulated' in system['tags'] else 'Cut-off'
                gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'vdwtype', vdwtype)
                # set tail correction (dispersion correction)
                if 'tail-corr' in system['tags']:
                    gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'DispCorr', 'EnerPres')
                # set energygrps(-table)
                if 'halftabulated' in system['tags']:
                    pairs = tuple((pair for pair in system['force-field'].get('tabulated-potentials', [])
                                    if pair[0] in system['atomtypes']
                                    and pair[1] in system['atomtypes']))
                    energygrps =  ' '.join(list(OrderedSet([pair[0] for pair in pairs]
                                                           +[pair[1] for pair in pairs])))
                    energygrp_table =  '  '.join((f"{pair[0]} {pair[1]}" for pair in pairs))
                    gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'energygrps', energygrps)
                    gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'energygrp-table', energygrp_table)
                elif 'tabulated' in system['tags']:
                    raise Exception('not implemented')
                else:
                    gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'energygrps', '')
                    gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'energygrp-table', '')
                    
prepare_thermal_expansion()

### prepare dos

In [None]:
pd.DataFrame((sys for sys in system_generator(*systems_md) if 'dos' in sys['tags']))

In [None]:
def prepare_dos():
    for system in (sys for sys in system_generator(*systems_md) if 'dos' in sys['tags']):
        print(f"system {system['name']}")
        with WorkingDir(system['name']):

            # make dirs
            folder = "npt-prod-vel"
            run_bash(f"mkdir -p {folder}")

            # topol, tables, index files, etc should already be present
            
            # link/copy tables
            if 'halftabulated' in system['tags']:
                # table.xvg
                run_bash(f"rm -f {folder}/table.xvg")
                run_bash(f"ln -sf ../topol/table.xvg {folder}/table.xvg")
                # link to topol/tables_{}_{}.xvg
                for table in (f"table_{pair[0]}_{pair[1]}.xvg" for pair in itertools.combinations_with_replacement(system['atomtypes-no-h'], 2)
                               if (pair[0], pair[1]) in system['force-field']['tabulated-potentials']):
                    run_bash(f"rm -f {folder}/{table}")
                    run_bash(f"ln -sf ../topol/{table} {folder}/{table}")
                
            # grompp.mpd file
            # copy template
            run_bash(f"cp {template_dir}/mdp/npt-dos.mdp {folder}/grompp.mdp")
            # run length
            gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'nsteps', int(8e4))
            # set temperature
            gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'ref-t', system['temperature'])
            # set pressure
            gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'ref-p', 1.0)
            # set cutoff scheme
            cutoff_scheme = 'group' if 'halftabulated' in system['tags'] else 'Verlet'
            gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'cutoff-scheme', cutoff_scheme)
            # set cutoffs
            co = system['force-field']['cut-off']
            for key in ('rlist', 'rcoulomb', 'rvdw'):
                gt.mdp.set_parameter(f"{folder}/grompp.mdp", key, co)
            # set vdwtype
            vdwtype = 'User' if 'halftabulated' in system['tags'] else 'Cut-off'
            gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'vdwtype', vdwtype)
            # set tail correction (dispersion correction)
            if 'tail-corr' in system['tags']:
                gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'DispCorr', 'EnerPres')
            # set energygrps(-table)
            if 'halftabulated' in system['tags']:
                pairs = tuple((pair for pair in system['force-field'].get('tabulated-potentials', [])
                                if pair[0] in system['atomtypes']
                                and pair[1] in system['atomtypes']))
                energygrps =  ' '.join(list(OrderedSet([pair[0] for pair in pairs]
                                                       +[pair[1] for pair in pairs])))
                energygrp_table =  '  '.join((f"{pair[0]} {pair[1]}" for pair in pairs))
                gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'energygrps', energygrps)
                gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'energygrp-table', energygrp_table)
            elif 'tabulated' in system['tags']:
                raise Exception('not implemented')
            else:
                gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'energygrps', '')
                gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'energygrp-table', '')
                    
            run_bash(f"mkdir -p {folder}/dos")
            params = {
                'nsamples': param_dos['n_samples'],
                'nblocks': param_dos['n_blocks'],
                'nblocksteps': param_dos['n_frames_per_block'],
                'moltypes': [{
                    'nmols': moltype['nmols'],
                    'atom_masses': [atom['mass'] for atom in moltype['atoms']],
                    'rot_treat': moltype['rot_treat'],
                    'abc_indicators': moltype['abc_indicators']
                } for moltype in system['moltypes']],
                'cross_spectra': []
            }

            with open(f'{folder}/dos/params.json', 'w') as f:
                json.dump(params, f, indent=4)
prepare_dos()

### fill box

In [None]:
def fill_boxes():
    for system in system_generator(*systems_md):
        print(f"system {system['name']}")
        with WorkingDir(system['name']):
            box_edge = system['volume-init']**(1/3)

            # check existing conf.gro
            try:
                n_atoms_inserted = gt.gro.get_natoms("equi1/conf.gro")
                box = gt.gro.get_box("equi1/conf.gro")
            except:
                n_atoms_inserted = 0
                box = [0, 0, 0]
            n_atoms_wanted = gt.moltypes.get_natoms(system['moltypes'])
            if n_atoms_inserted == n_atoms_wanted:
                if np.allclose(box, [box_edge]*3):
                    print('..conf.gro with correct number of atoms and box existing..')
                    continue

            # empty box with volume
            empty_gro = f"system\n0\n{box_edge} {box_edge} {box_edge}"
            with open("equi1/conf.gro", 'w') as f:
                f.write(empty_gro)

            # insert water
            n_water = sum((moltype['nmols'] for moltype in system['moltypes'] if moltype['name'] == 'SOL'))
            water_type = next((moltype['type'] for moltype in system['moltypes'] if moltype['name'] == 'SOL'))
            if n_water > 0:
                gro_file = {'water-spce': 'spc216.gro', 'water-tip4p2005': 'tip4p.gro'}[water_type]
                run_bash(f"gmx solvate -cs {gro_file} -box {box_edge} {box_edge} {box_edge} -maxsol {n_water} -scale 0.5 -o equi1/conf.gro")
                run_bash("rm -f equi1/\#conf.gro.*")

            # insert other molecules
            for moltype in (moltype for moltype in system['moltypes'] if moltype['name'] != 'SOL'):
                n_mols = moltype['nmols']
                mt_name = moltype['name']
                run_bash(f"gmx insert-molecules -f equi1/conf.gro -o equi1/conf.gro -ci equi1/single-{mt_name}.gro -nmol {n_mols} -try 100 -scale 0.65")
                run_bash("rm -f equi1/\#conf.gro.*")

            # check
            n_atoms_inserted = gt.gro.get_natoms("equi1/conf.gro")
            if n_atoms_inserted != n_atoms_wanted:
                print(n_atoms_inserted, n_atoms_wanted)
                raise Exception("not enough molecules inserted")
fill_boxes()

## run on cluster

### check what has not run yet

In [None]:
def check_has_run():
    for system in system_generator(*systems_md):
        print(f"system {system['name']}")
        working_dir = os.path.join(system['name'])

        with WorkingDir(working_dir):
            # check if already done
            dist_done = 'dist' not in system['tags'] or os.path.isfile('prod/OW-HW.dist.new')
            npt_done = 'npt' not in system['tags'] or os.path.isfile('npt-prod/confout.gro')
            npt_dist_done = 'npt-dist' not in system['tags'] or os.path.isfile('npt-prod/OW-HW.dist.new')
            npt_msd_done = 'npt' not in system['tags'] or os.path.isfile('npt-prod/msd-SOL.txt')
            therm_exp_done = 'therm-exp' not in system['tags'] or all(
                (os.path.isfile(f"therm-exp-{system['temperature']+DeltaT}/confout.gro") for DeltaT in DeltaTs)
            )
            dos_done = 'dos' not in system['tags'] or os.path.isfile('npt-prod-vel/dos/dos.json')
            for tag, done in (
                ('dist', dist_done),
                ('npt', npt_done),
                ('npt-dist', npt_dist_done),
                ('npt', npt_msd_done),
                #('therm-exp', therm_exp_done),
                ('dos', dos_done),
            ):
                if not done:
                    print(f"{tag} not done but wanted")
                
check_has_run()

### run MD and analyze (RDF, MSD, therm. exp., DOS)

In [None]:
def run_md():
    remote_dir_base, remote_header, remote_footer = gen_remote_stuff(remote_host, 'mammut-b', votca=True, mem_per_cpu='3800M')
    for system in system_generator(*systems_md):
        print(f"system {system['name']}")
        working_dir = os.path.join(system['name'])
        remote_dir = os.path.join(remote_dir_base, system['name'])

        with WorkingDir(working_dir):
            # check if already done
            dist_done = 'dist' not in system['tags'] or os.path.isfile('prod/OW-HW.dist.new')
            npt_done = 'npt' not in system['tags'] or os.path.isfile('npt-prod/confout.gro')
            npt_dist_done = 'npt-dist' not in system['tags'] or os.path.isfile('npt-prod/OW-HW.dist.new')
            npt_msd_done = 'npt' not in system['tags'] or os.path.isfile('npt-prod/msd-SOL.txt')
            therm_exp_done = 'therm-exp' not in system['tags'] or all(
                (os.path.isfile(f"therm-exp-{system['temperature']+DeltaT}/confout.gro") for DeltaT in DeltaTs)
            )
            dos_done = 'dos' not in system['tags'] or os.path.isfile('npt-prod-vel/dos/dos.json')
            if all((
                dist_done,
                npt_done,
                npt_dist_done,
                npt_msd_done,
                #therm_exp_done,
                dos_done,
            )):
                print('..all results present locally..')
                continue

            # mkdir
            run_bash(f"ssh {remote_host} mkdir -p {remote_dir}")

            # delete old topology and table files
            run_bash(f"ssh {remote_host} rm -rf {remote_dir}/topol {remote_dir}/*/table*")

            # copy simulation files to remote
            filelist = "equi1/conf.gro equi1/topol.top */grompp.mdp topol".split( )
            if 'halftabulated' in system['tags']:
                filelist.append("*/table*")
            if 'dos' in system['tags']:
                filelist.append("*/dos/params.json")
            gt.remote.push_files(filelist, remote_host, remote_dir, exclude="traj*")

            # strings for the bash script
            mapping_files = ';'.join([f"../topol/map-{moltype['name']}.xml" for moltype in system['moltypes']])
            tabulated_string = ""
            if 'halftabulated' in system['tags']:
                tabulated_string = "-n ../topol/index.ndx -maxwarn 1"
            calc_nvt = str('nvt' in system['tags'])
            calc_npt = str('npt' in system['tags'])
            calc_dist = str('dist' in system['tags'])
            calc_npt_dist = str('npt-dist' in system['tags'])
            therm_exp = str('therm-exp' in system['tags'])
            calc_dos = str('dos' in system['tags'])

            # commands to be run on compute nodes
            script = remote_header + rf"""
# gromacs decides for small systems to have less
#NT_ARG="-nt $SLURM_JOB_CPUS_PER_NODE"
NT_ARG=""
# bug in gromacs with sd and gpu https://gitlab.com/gromacs/gromacs/-/issues/3473
NB_ARG="-notunepme"

pushd equi1
    if [[ ! -f confout.gro ]]; then
        gmx grompp -p topol.top {tabulated_string}
        gmx mdrun $NT_ARG $NB_ARG
    fi
    rm -f \#*
popd

pushd equi2
    if [[ ! -f confout.gro ]]; then
        gmx grompp -p ../topol/topol.top {tabulated_string} -c ../equi1/confout.gro
        gmx mdrun $NT_ARG $NB_ARG
    fi
    rm -f \#*
popd

if [[ {calc_nvt} == True ]]; then
    pushd prod
        if [[ ! -f confout.gro ]]; then
            gmx grompp -p ../topol/topol.top {tabulated_string} -c ../equi2/confout.gro
            gmx mdrun -x traj_comp.xtc $NT_ARG $NB_ARG
        fi
    popd
fi

if [[ {calc_dist} == True ]]; then
    pushd prod
        if [[ ! -f OW-HW.dist.new ]]; then
            csg_stat --top topol.tpr --options ../topol/settings.xml --trj traj_comp.xtc \
                --cg "{mapping_files}" --nt=$SLURM_JOB_CPUS_PER_NODE --ext dist.new
            #csg_stat --top topol.tpr --options ../topol/settings.xml --trj traj_comp.xtc \
                #--cg "{mapping_files}" --nt=$SLURM_JOB_CPUS_PER_NODE --include-intra --ext dist-incl.new
        fi
        rm -f \#*
    popd
fi

if [[ {calc_npt} == True ]]; then
    pushd npt-equi3
        if [[ ! -f confout.gro ]]; then
            gmx grompp -p ../topol/topol.top {tabulated_string} -c ../equi2/confout.gro
            gmx mdrun $NT_ARG $NB_ARG
        fi
        rm -f \#*
    popd

    pushd npt-prod
        if [[ ! -f confout.gro ]]; then
            gmx grompp -p ../topol/topol.top {tabulated_string} -c ../npt-equi3/confout.gro
            gmx mdrun -x traj_comp.xtc $NT_ARG $NB_ARG
        fi
    popd
fi

if [[ {calc_npt_dist} == True ]]; then
    pushd npt-prod
        if [[ ! -f OW-HW.dist.new ]]; then
            csg_stat --top topol.tpr --options ../topol/settings.xml --trj traj_comp.xtc \
                --cg "{mapping_files}" --nt=$SLURM_JOB_CPUS_PER_NODE --ext dist.new
            #csg_stat --top topol.tpr --options ../topol/settings.xml --trj traj_comp.xtc \
                #--cg "{mapping_files}" --nt=$SLURM_JOB_CPUS_PER_NODE --include-intra --ext dist-incl.new
        fi
        rm -f \#*
    popd
fi

if [[ {calc_npt} == True ]]; then
    pushd npt-prod
        if [[ ! -f msd-SOL.xvg ]]; then
            gmx msd -s topol.tpr -f traj_comp.xtc -o msd-SOL.xvg <<< 'SOL' > msd-SOL.txt
        fi
        rm -f \#*
    popd
fi

#if [[ {therm_exp} == True ]]; then
    #for dir in therm-exp-*; do
        #pushd $dir
            #if [[ ! -f confout.gro ]]; then
                #gmx grompp -p ../topol/topol.top {tabulated_string} -c ../npt-equi3/confout.gro
                #gmx mdrun -x traj_comp.xtc $NT_ARG $NB_ARG
            #fi
        #popd
    #done
#fi

if [[ {calc_dos} == True ]]; then
    if [[ ! -f npt-prod-vel/dos/dos.json ]]; then
        pushd npt-prod-vel
            gmx grompp -p ../topol/topol.top {tabulated_string} -c ../npt-equi3/confout.gro
            gmx mdrun $NT_ARG $NB_ARG -o $JOBTMP/traj-dos.trr
        popd

        pushd npt-prod-vel/dos
            ~/bin/dos-calc params.json $JOBTMP/traj-dos.trr -v
            rm -f $JOBTMP/traj-dos.trr
        popd
    fi
fi

""" + remote_footer

            jobid = gt.remote.run_slurm_script(script, remote_host, remote_dir, dry_run=False)
            print(jobid)
            if jobid != None:
                jobids.append(jobid)
run_md()

### check job status

In [None]:
jobids = check_job_stati(jobids, remote_host)

### copy results from cluster

In [None]:
def copy_from_cluster():
    remote_dir_base, remote_header, remote_footer = gen_remote_stuff(remote_host, 'enzo')
    for system in system_generator(*systems_md):
        print(f"system {system['name']}")
        working_dir = os.path.join(system['name'])
        remote_dir = os.path.join(remote_dir_base, system['name'])
        with WorkingDir(working_dir):
            filelist = ["*/*.edr", "*prod/*.dist*.new*", "*/confout.gro", "*/topol.tpr", "*/msd-SOL.*", "*/dos/dos.json"]
            try:
                gt.remote.pull_files(filelist, remote_host, remote_dir)
            except subprocess.CalledProcessError:
                print('..rsync failed..')
copy_from_cluster()

## equilibration check

In [None]:
def run_cell():
    for system in system_generator(*systems_md):
        print(f"system {system['name']}")
        with WorkingDir(system['name']):
            try:
                check_equi(["Volume"], edr_file="npt-equi3/ener.edr", safe_factor=2.0)
            except:
                pass
                #print('..no data..')
run_cell()

# Analysis

## RMSD table for paper

In [None]:
def gen_df_rmsd():
    index = [ff['name'] for ff in force_fields.values() if 'conc-range' in ff['tags']] # and 'fit' not in ff['tags']]
    index_final = [ff_short_names[ff] for ff in index]
    index_translation_dict = {ff: ff_final for ff, ff_final in zip(index, index_final)}
    columns = ('rdf', 'density', 'diffusion', 'osmotic')
    columns_final = (r'{$\Delta_\text{RDF}$}', r'{$\Delta_\rho$ (\si{\gram\per\ml})}', r'{$\Delta_{D/D_0}$}', r'{$\Delta_\phi$}')
    columns_translation_dict = {prop: prop_final for prop, prop_final in zip(columns, columns_final)}
    df_rmsd = pd.DataFrame(index=index, columns=columns)
    return df_rmsd, index_translation_dict, columns_translation_dict

df_rmsd, index_rmsd_transl, columns_rmsd_transl = gen_df_rmsd()
#_, index_rmsd_transl, columns_rmsd_transl = gen_df_rmsd()
df_rmsd

In [None]:
df_rmsd

In [None]:
df_rmsd.to_pickle('df_rmsd.pkl')

In [None]:
df_rmsd = pd.read_pickle('df_rmsd.pkl')

In [None]:
# now collect data from below, come back, and execute this cell
def show_df_rmsd(df_rmsd):
    df_rmsd = df_rmsd.copy()
    df_rmsd.rename(index=index_rmsd_transl, inplace=True)
    df_rmsd.rename(columns=columns_rmsd_transl, inplace=True)
    formatters = list([lambda x: '{:4.2f}'.format(x)]*4)
    formatters[1] = lambda x: '{:5.3f}'.format(x)
    print(df_rmsd.astype(float).to_latex(escape=False, formatters=formatters,
                                         column_format=r"l S[table-format=2.2] S[table-format=2.3] S[table-format=2.3] S[table-format=2.3]"))
    return df_rmsd

show_df_rmsd(df_rmsd);

## distributions and potentials

In [None]:
def nb_interactions_from_settings(settings_file):
    tree = ET.parse(settings_file)
    root = tree.getroot()
    nb_interactions = []
    for node in root.findall('non-bonded'):
        nb_interactions.append({
            'name': node.find('name').text,
            'type1': node.find('type1').text,
            'type2': node.find('type2').text,
            'min': float(node.find('min').text),
            'max': float(node.find('max').text),
            'max_intra': float(node.find('max_intra').text),
            'step': float(node.find('step').text),
        })
    return nb_interactions

### load distributions NPT

In [None]:
npt_system_interaction_dict = {}

def load_npt_dist_pot():
    for system in (sys for sys in system_generator(*systems_md) if 'npt-dist' in sys['tags']):
        print(f"system {system['name']}")
        working_dir = os.path.join(system['name'])

        with WorkingDir(working_dir):
            nb_interactions = nb_interactions_from_settings('topol/settings.xml')

        for nb in nb_interactions:
            nb_name = nb['name']
            print(nb_name, end='\t')
            g_file = f"npt-prod/{nb_name}.dist.new"
            #g_incl_file = f"npt-prod/{nb_name}.dist-incl.new"
            with WorkingDir(working_dir):
                data = np.loadtxt(g_file, dtype=str, comments=['#', '@'])
            r = data[:, 0].astype(float)
            g = data[:, 1].astype(float)

            npt_system_interaction_dict[(system['name'], nb['name'], 'r')] = r
            npt_system_interaction_dict[(system['name'], nb['name'], 'g')] = g
        print('')
        
load_npt_dist_pot()

### calculate NPT coordination numbers

In [None]:
def get_first_peak_coordination(r, g, rho):
    try:
        peaks, _ = signal.find_peaks(g, prominence=0.02)
        mins, _ = signal.find_peaks(-g, prominence=0.02)
        ndx_peak = peaks[0]
        ndx_min = peaks[0] + np.argmin(g[peaks[0]:mins[0]])
        delta_r = r[1] - r[0]
        integrand = g[:ndx_min] * r[:ndx_min]**2
        n_coord = 4 * np.pi * rho * np.trapz(integrand, x=r[:ndx_min])
        return ndx_peak, ndx_min, n_coord
    except:
        return None, None, None

In [None]:
def calc_coord_numbers():
    for system in (sys for sys in system_generator(*systems_md) if 'npt-dist' in sys['tags']):
        print(f"system {system['name']}")

        tempf = tempfile.mkstemp(suffix='.xvg')[1]
        with WorkingDir(system['name']):
            nb_interactions = nb_interactions_from_settings('topol/settings.xml')
            run_bash(f"gmx energy -f npt-prod/ener.edr -o {tempf} <<< 'Volume'")
        data, _ = gt.xvg.load(tempf)
        run_bash(f"rm -f {tempf}")
        volume = data['Volume'].mean()
        volume_std = data['Volume'].std()
            
        for nb in nb_interactions:
            nb_name = nb['name']

            # load data
            r = npt_system_interaction_dict[(system['name'], nb['name'], 'r')]
            g = npt_system_interaction_dict[(system['name'], nb['name'], 'g')]

            # coordination number
            num1 = gt.moltypes.count_atomname(system['moltypes'], nb['type1'])
            num2 = gt.moltypes.count_atomname(system['moltypes'], nb['type2'])
            rho1 = num1 / volume
            rho2 = num2 / volume
            ndx_peak, ndx_min, n_coord1 = get_first_peak_coordination(r, g, rho1)
            ndx_peak, ndx_min, n_coord2 = get_first_peak_coordination(r, g, rho2)
            npt_system_interaction_dict[(system['name'], nb['name'], 'n_coord')] = (ndx_peak, ndx_min, n_coord1, n_coord2)
calc_coord_numbers()

### plot all NPT distributions

In [None]:
def plot_npt_dist():

    nb_interaction_names_to_show = [
        #'OW-LI',
        #'OW-NA',
        #'OW-K',
        #'OW-CA',
        #'OW-CL',
        #'OW-OW',
        #'OW-HW',
        'CA-CL',
    ]
    
    ff_to_show = [
        'opls-co0.9tc',
        'netz-co0.9tc',
        'eccr1-co1.2',
        'madrid-co1.0tc',
        #'iff-altern5-eccr1-co1.2-nopc',
        #'iff-altern5-netz-co0.9-nopc'
    ]
    
    molar_mixing_ratios_to_show = [
        0.005,
        #0.01,
        #0.02,
        #0.03,
        #0.04,
        #0.05,
        #0.1,
    ]

    with mpl.rc_context(rc={'figure.dpi': 150}):
        for system in (sys for sys in system_generator(*systems_md) if 'npt-dist' in sys['tags']):
            if system['force-field']['name'] not in ff_to_show:
                continue
            if system['molar-mixing-ratio'] not in molar_mixing_ratios_to_show:
                continue
            print(f"system {system['name']}")
            with WorkingDir(system['name']):
                nb_interactions = nb_interactions_from_settings('topol/settings.xml')

            for nb in [nb for nb in nb_interactions if nb['name'] in nb_interaction_names_to_show]:
                nb_name = nb['name']

                # load data
                r = npt_system_interaction_dict[(system['name'], nb['name'], 'r')]
                g = npt_system_interaction_dict[(system['name'], nb['name'], 'g')]

                # plot
                fig, ax = plt.subplots(figsize=(3, 2))
                ax.plot(r, g, label='g(r)')

                # coordination number
                ndx_peak, ndx_min, n_coord1, n_coord2 = npt_system_interaction_dict[(system['name'], nb['name'], 'n_coord')]
                ax.text(r[ndx_peak], g[ndx_peak], f"{n_coord1:.4f}, {n_coord2:.4f}", ha='center', va='bottom')
                ax.axvline(r[ndx_min], linestyle=':', color='k')
                
                # print numbers
                print('first peak height:', max(g))
                print('first peak postition:', r[np.argmax(g)])

                ax.set_xlim(0)
                ax.set_ylim(0, max(g)+0.2)
                ax.set_xlabel("r / nm")
                ax.set_ylabel("g(r)")
                ax.vlines(system['force-field']['cut-off'], 0.1, 0.9, linestyles='--', color='r')
                ax.vlines(system['force-field']['cut-off'], 0.1, 0.9, linestyles=':', color='orange')
                ax.set_title(nb_name)
                #ax.legend()
                fig.tight_layout()
                plt.show()
plot_npt_dist()

### plot NPT coordination numbers

In [None]:
def plot_npt_coordnr():
    systems_to_show = [sys for sys in system_generator(*systems_md) if 'npt-dist' in sys['tags']]

    nb_interaction_names_to_show = {
        #'OW-CL': {'sys-types': ('water-cacl2_', 'water-kcl', 'water-licl', 'water-nacl')},
        'OW-CL': {'sys-types': ('water-nacl',)},
        'OW-CA': {'sys-types': ('water-cacl2_',)},
        'OW-K': {'sys-types': ('water-kcl',)},
        'OW-LI': {'sys-types': ('water-licl',)},
        'OW-NA': {'sys-types': ('water-nacl',)},
    }

    force_fields_to_show = [
        'opls-co0.9tc',
        'eccr1-co1.2',
        'netz-co0.9tc',
        'madrid-co1.0tc',
        'iff-altern5-eccr1-co1.2-nopc',
        'iff-altern5-netz-co0.9-nopc',
    ]
    r_to_show = list(set(([sys['molar-mixing-ratio'] for sys in systems_to_show])))
    system_types_to_show = list(set(([sys['type']['name'] for sys in systems_to_show])))

    df_ncoord = pd.DataFrame(index=pd.MultiIndex.from_product((system_types_to_show, force_fields_to_show, r_to_show)),
                             columns=nb_interaction_names_to_show)

    for system in systems_to_show:
        for nb_name in nb_interaction_names_to_show.keys():
            # load data
            try:
                ndx_peak, ndx_min, n_coord1, n_coord2 = npt_system_interaction_dict[(system['name'], nb_name, 'n_coord')]
                df_ncoord.at[(system['type']['name'], system['force-field']['name'], system['molar-mixing-ratio']), nb_name] = n_coord1
            except:
                #print('.. error ..')
                pass

    print(df_ncoord)
    with mpl.rc_context(rc=mpl_rc_global):
        for nb_name, _nb in nb_interaction_names_to_show.items():
            fig, ax = plt.subplots(figsize=(4, 2.5), dpi=300)
            for f, ff_name in enumerate(force_fields_to_show):
                for _sys_type in _nb['sys-types']:
                    # plot
                    data = df_ncoord.loc[(_sys_type, ff_name, slice(None)), nb_name]
                    data = data.sort_index(level=2)
                    x = data.index.get_level_values(2)
                    y = np.array(data)
                    #marker = 'so<P'[f%4]
                    marker = '.'
                    linestyle = ff_linestyles[ff_name]
                    label = ff_short_names[ff_name]
                    color = ff_colors[ff_name]
                    ax.plot(x, y, marker=marker, linestyle=linestyle, label=label, color=color)

            ax.set_xlim(0)
            #ax.set_ylim(0)
            ax.set_xlabel(r"$r$")
            ax.set_ylabel("coordination number")
            ax.set_title(' coordinated by '.join(nb_name.split('-')[::-1]))
            ax.legend(frameon=False)
            fig.tight_layout()
            #fig.savefig(os.path.join('..', 'figures', f"ncoord_{nb_name}.png"), dpi=300)
            plt.show()
plot_npt_coordnr()

### compare distributions of different force fields or systems

In [None]:
# literature data
first_peak_rdf_lit_data = {
    'OW-NA': [
        {'source-zotero': "bankuraHydrationStructureNa2014", 'source-short': 'AIMD$^1$', 'peak': (0.241, 5.4)},
        {'source-zotero': "dingInitioMolecularDynamics2019", 'source-short': 'AIMD$^2$', 'peak': (0.25, 4.2)},
    ],
    'OW-K': [
        {'source-zotero': "tongraarBornOppenheimerInitio1998", 'source-short': 'AIMD$^3$', 'peak': (0.2855, 3.43)},
        {'source-zotero': "liuHydrationCoordinationSolvation2010", 'source-short': 'AIMD$^4$', 'peak': (0.278, 3.65)},
        {'source-zotero': "rowleySolvationStructureNa2012", 'source-short': 'AIMD$^5$', 'peak': (0.275, 3.83)},
        {'source-zotero': "bankuraHydrationStructureNa2014", 'source-short': 'AIMD$^1$', 'peak': (0.277, 3.42)},
    ],
}

In [None]:
def safe_xlogx(x):
    with np.errstate(divide='ignore', invalid='ignore'):
        return np.where(x == 0.0, 0.0, x * np.log(x))

In [None]:
def plot_comparison_dist(show_lit_data=True, show_twobody_entropy=False, show_rdf_coulomb_energy=False):
    system_combinations_to_compare = [
        {'name': 'CaCl2 for Nico', 'show-nvt': False, 'show-npt': True,
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    #'water5000-cacl2_50/madrid-co1.0tc',
                                    'water5000-cacl2_50/netz-co0.9tc',
                                ]],
         'nb-interactions-to-show': [
             'OW-CA',
         ],
         'special-label': {'netz-co0.9tc': 'LJ'},
        },
        {'name': 'all CaCl', 'show-nvt': False, 'show-npt': True,
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    'water5000-cacl2_50/opls-co0.9tc',
                                    #'water5000-cacl2_50/opls-co1.0-halftabulated',
                                    'water5000-cacl2_50/eccr1-co1.2',
                                    'water5000-cacl2_50/netz-co0.9tc',
                                    #'water5000-cacl2_50/netz-co0.9',
                                    'water5000-cacl2_50/madrid-co1.0tc',
                                    'water5000-cacl2_50/iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-cacl2_50/iff-altern5-netz-co0.9-nopc',
                                    'water5000-cacl2_50/Buckingham-iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-cacl2_50/Buckingham-iff-altern5-netz-co0.9-nopc',
                                ]],
         'nb-interactions-to-show': [
             'OW-CA',
             'OW-CL',
             #'CA-CL',
         ]},
        {'name': 'all KCl', 'show-nvt': False, 'show-npt': True,
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    'water5000-kcl50/opls-co0.9tc',
                                    #'water5000-kcl50/opls-co1.0-halftabulated',
                                    'water5000-kcl50/eccr1-co1.2',
                                    'water5000-kcl50/netz-co0.9tc',
                                    'water5000-kcl50/madrid-co1.0tc',
                                    'water5000-kcl50/iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-kcl50/iff-altern5-netz-co0.9-nopc',
                                    'water5000-kcl50/Buckingham-iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-kcl50/Buckingham-iff-altern5-netz-co0.9-nopc',
                                ]],
         'nb-interactions-to-show': [
             'OW-K',
             'OW-CL',
             #'K-CL',
         ]},
        {'name': 'all LiCl', 'show-nvt': False, 'show-npt': True,
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    'water5000-licl50/opls-co0.9tc',
                                    #'water5000-licl50/opls-co1.0-halftabulated',
                                    'water5000-licl50/eccr1-co1.2',
                                    'water5000-licl50/netz-co0.9tc',
                                    'water5000-licl50/madrid-co1.0tc',
                                    'water5000-licl50/iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-licl50/iff-altern5-netz-co0.9-nopc',
                                    'water5000-licl50/Buckingham-iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-licl50/Buckingham-iff-altern5-netz-co0.9-nopc',
                                ]],
         'nb-interactions-to-show': [
             'OW-LI',
             'OW-CL',
             #'LI-CL'
         ]},
        {'name': 'all NaCl', 'show-nvt': False, 'show-npt': True,
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    'water5000-nacl50/opls-co0.9tc',
                                    #'water5000-nacl50/opls-co1.0-halftabulated',
                                    'water5000-nacl50/eccr1-co1.2',
                                    'water5000-nacl50/netz-co0.9tc',
                                    'water5000-nacl50/madrid-co1.0tc',
                                    'water5000-nacl50/iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-nacl50/iff-altern5-netz-co0.9-nopc',
                                    #'water5000-nacl50/Buckingham-iff-altern5-eccr1-co1.2-nopc',
                                    #'water5000-nacl50/Buckingham-iff-altern5-netz-co0.9-nopc',
                                ]],
         'nb-interactions-to-show': [
             'OW-NA',
             'OW-CL',
             #'NA-CL',
         ]},
        {'name': 'Ca-Cl 25', 'show-nvt': False, 'show-npt': True,
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    'water5000-cacl2_25/opls-co0.9tc',
                                    'water5000-cacl2_25/eccr1-co1.2',
                                    'water5000-cacl2_25/netz-co0.9tc',
                                    'water5000-cacl2_25/madrid-co1.0tc',
                                    'water5000-cacl2_25/iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-cacl2_25/iff-altern5-netz-co0.9-nopc',
                                ]],
         'nb-interactions-to-show': [
             'CA-CL',
         ]},
        {'name': 'Ca-Cl 50', 'show-nvt': False, 'show-npt': True,
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    'water5000-cacl2_50/opls-co0.9tc',
                                    'water5000-cacl2_50/eccr1-co1.2',
                                    'water5000-cacl2_50/netz-co0.9tc',
                                    'water5000-cacl2_50/madrid-co1.0tc',
                                    'water5000-cacl2_50/iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-cacl2_50/iff-altern5-netz-co0.9-nopc',
                                ]],
         'nb-interactions-to-show': [
             'CA-CL',
         ]},
    ][5:7]  # CHANGE ME

    additional_dists = [
        {'name': 'AIMD', 'path': '../received/azade-aimd/ca-cl2/ca-o-gofr-0.04.dat', 'show-for': ['OW-CA']},
        {'name': 'AIMD Azade', 'path': '../received/azade-aimd/k-cl/k-o-gofr-0.04.dat', 'show-for': ['OW-K']},
        {'name': 'AIMD', 'path': '../received/azade-aimd/li-cl/li-o-gofr-0.04.dat', 'show-for': ['OW-LI']},
        {'name': 'AIMD', 'path': '../received/azade-aimd/na-cl/na-o-gofr-0.04.dat', 'show-for': ['OW-NA']},
        #{'name': 'Scaled OPLS (data from Azade)', 'path': '../received/NaCl-RDFs/scaled/na-o-gofr.dat', 'show-for': ['OW-NA']},
        {'name': 'AIMD', 'path': '../received/NaCl-RDFs/aimd/cl-o-gofr.dat', 'show-for': ['OW-CL']},
        #{'name': 'Scaled OPLS (data from Azade)', 'path': '../received/NaCl-RDFs/scaled/cl-o-gofr.dat', 'show-for': ['OW-CL']},
        {'name': 'AIMD', 'path': '../received/NaCl-RDFs/aimd/na-cl-gofr.dat', 'show-for': ['NA-CL']},
        #{'name': 'Scaled OPLS (data from Azade)', 'path': '../received/NaCl-RDFs/scaled/na-cl-gofr.dat', 'show-for': ['NA-CL']},
        {'name': 'AIMD', 'path': '../received/NaCl-RDFs/aimd/na-h-gofr.dat', 'show-for': ['HW-NA']},
        #{'name': 'Scaled OPLS (data from Azade)', 'path': '../received/NaCl-RDFs/scaled/na-h-gofr.dat', 'show-for': ['HW-NA']},
        {'name': 'AIMD', 'path': '../received/NaCl-RDFs/aimd/cl-h-gofr.dat', 'show-for': ['HW-CL']},
        #{'name': 'Scaled OPLS (data from Azade)', 'path': '../received/NaCl-RDFs/scaled/cl-h-gofr.dat', 'show-for': ['HW-CL']},
    ]

    plot_params_nb = {
        'OW-CA': {'xlim': (0.19, 0.55)},
        'OW-K': {'xlim': (0.22, 0.55)},
        'OW-LI': {'xlim': (0.15, 0.35)},
        'OW-NA': {'xlim': (0.19, 0.55)},
        'OW-CL': {'xlim': (0.25, 0.55)},
        'OW-OW': {'xlim': (0.20, 0.55)},
        'OW-HW': {'xlim': (0.05, 0.55)},
        'NA-CL': {'xlim': (0.20, 0.55)},
        'HW-NA': {'xlim': (0.15, 0.55)},
        'HW-CL': {'xlim': (0.15, 0.55)},
        'CA-CL': {'xlim': (0.20, 0.55)},
        'K-CL': {'xlim': (0.20, 0.55)},
        'LI-CL': {'xlim': (0.15, 0.55)},
        'NA-CL': {'xlim': (0.20, 0.55)},
    }

    with mpl.rc_context(rc={
        'figure.dpi': 200,
        #'legend.fontsize': 6,
        'legend.labelspacing': 0.2,
        'legend.handlelength': 2.5,
    }):
        for system_combination in system_combinations_to_compare:
            print(f"system-combination {system_combination['name']}")

            for nb_name in system_combination['nb-interactions-to-show']:
                print(f"  interaction {nb_name}")
                fig, ax = plt.subplots(figsize=(3.5, 2), constrained_layout=True)
                #fig.set_constrained_layout_pads(w_pad=0, h_pad=0, hspace=0., wspace=0.)

                for s, system in enumerate(system_combination['system-combination']):
                    print(f"    system {system['name']}")

                    label = ff_short_names.get(system['force-field']['name'], system['force-field']['name'])
                    if 'special-label' in system_combination:
                        label = system_combination['special-label'][system['force-field']['name']]
                    linestyle = ff_linestyles[system['force-field']['name']]
                    #linestyle = '-'
                    #linestyle = ['-', '--', ':', '-.'][s%4]
                    color = ff_colors[system['force-field']['name']]
                    if system_combination['show-nvt']:
                        try:
                            r = system_interaction_dict[(system['name'], nb_name, 'r')]
                            g = system_interaction_dict[(system['name'], nb_name, 'g')]
                            ax.plot(r, g, linestyle=linestyle, label=label, color=color)
                        except KeyError:
                            print('..no data..')

                    if system_combination['show-npt']:
                        try:
                            r = npt_system_interaction_dict[(system['name'], nb_name, 'r')]
                            g = npt_system_interaction_dict[(system['name'], nb_name, 'g')]
                            if show_twobody_entropy or show_rdf_coulomb_energy:
                                dens = 5150 / df_dens.at[(system['type']['name'], system['force-field']['name'], system['molar-mixing-ratio'], 0), 'volume']
                                print('density:', dens)
                                #x_o_x_ion = system['mole-fraction'] / 2 * (1 - system['mole-fraction'])
                                # hmmm
                                x_o_x_ion = 1
                                print('x_o_x_ion:', x_o_x_ion)
                            if show_twobody_entropy:
                                cut = 200
                                S_ex = -4 * np.pi * oconst.k_gro*1000 * dens * x_o_x_ion * np.trapz(x=r[:cut], y=((safe_xlogx(g) - g + 1) * r**2)[:cut])
                                label += f" {S_ex:.3f}"
                            if show_rdf_coulomb_energy:
                                cut = 80
                                print('r[cut]:', r[cut])
                                U_coulomb = 4 * np.pi * oconst.f_gro * dens * x_o_x_ion * np.trapz(x=r[:cut], y=r[:cut] * g[:cut])  # y = 1/r * r² * g
                                label += f" {U_coulomb:.0f}"
                            ax.plot(r, g, linestyle=linestyle, label=label, color=color)
                        except KeyError:
                            print('..no data..')


                # plot additional
                for a, add_dist in enumerate((ad for ad in additional_dists if nb_name in ad['show-for'])):
                    data = np.loadtxt(add_dist['path'])
                    x = data.T[0]/10  # votca and vmd both give g(r) value for bin symmetrically around r
                    y = data.T[1]
                    linestyle = ['--', '-.'][(a+1)%2]
                    label=add_dist['name']
                    if show_twobody_entropy:
                        cut = 200
                        S_ex = -4 * np.pi * oconst.k_gro*1000 * dens * x_o_x_ion * np.trapz(x=x[:cut], y=((safe_xlogx(y) - y + 1) * x**2)[:cut])
                        label += f" {S_ex:.3f}"
                    if show_rdf_coulomb_energy:
                        cut = 80
                        print('r[cut]:', r[cut])
                        U_coulomb = 4 * np.pi * oconst.f_gro * dens * x_o_x_ion * np.trapz(x=x[:cut], y=x[:cut] * y[:cut])  # y = 1/r * r² * g
                        label += f" {U_coulomb:.0f}"
                    ax.plot(x, y, linestyle=linestyle, color='k', label=label)
                    
                # plot literature
                if show_lit_data:
                    if nb_name in first_peak_rdf_lit_data:
                        for d, data in enumerate(first_peak_rdf_lit_data[nb_name]):
                            peak = data['peak']
                            label = data['source-short']
                            color = ['darkorange', 'crimson', 'royalblue', 'teal'][d]
                            ax.scatter([peak[0]], [peak[1]], label=label, marker="<>s."[d], color=color)

                ax.set_title(nb_plot_name[nb_name])
                ax.set_ylim(0)
                ax.set_ylim(0, 6)

                #ax.set_xlim(plot_params_nb.get(nb_name, {'xlim': (0, 2)})['xlim'])
                ax.set_xlabel(r"$r$ / nm")
                ax.set_ylabel(r"$g(r)$")
                ax.legend(frameon=False, labelspacing=0.1, fontsize=8, loc='upper right')
                fig.savefig(os.path.join('../figures', f"dist_{nb_name}_{system_combination['name'].replace(' ', '-')}.pdf"))
                fig.savefig(os.path.join('/tmp', f"dist_{nb_name}_{system_combination['name'].replace(' ', '-')}.png"), transparent=True, dpi=300)
                plt.show()
#plot_comparison_dist(show_lit_data=False, show_twobody_entropy=True, show_rdf_coulomb_energy=False)
plot_comparison_dist(show_lit_data=False, show_twobody_entropy=False, show_rdf_coulomb_energy=False)

### literature values peaks

In [None]:
lit_rdf_peak1 = {
    'LI': 0.208,
    'NA': 0.2356,
    'K': 0.2798,
    'CA': 0.2422,
    'CL': 0.3187,
}

### appendix plot distributions

In [None]:
def plot_comparison_dist_paper(show_lit_data=False, show_exp_data=True):
    system_combinations_to_compare = [
        {'name': 'all LiCl', 'show-nvt': False, 'show-npt': True,
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    'water5000-licl50/opls-co0.9tc',
                                    'water5000-licl50/eccr1-co1.2',
                                    'water5000-licl50/netz-co0.9tc',
                                    'water5000-licl50/madrid-co1.0tc',
                                    'water5000-licl50/iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-licl50/iff-altern5-netz-co0.9-nopc',
                                    'water5000-licl50/Buckingham-iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-licl50/Buckingham-iff-altern5-netz-co0.9-nopc',
                                ]],
         'nb-interactions-to-show': [
             'OW-LI',
             'OW-CL',
         ]},
        {'name': 'all NaCl', 'show-nvt': False, 'show-npt': True,
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    'water5000-nacl50/opls-co0.9tc',
                                    'water5000-nacl50/eccr1-co1.2',
                                    'water5000-nacl50/netz-co0.9tc',
                                    'water5000-nacl50/madrid-co1.0tc',
                                    'water5000-nacl50/iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-nacl50/iff-altern5-netz-co0.9-nopc',
                                    'water5000-nacl50/Buckingham-iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-nacl50/Buckingham-iff-altern5-netz-co0.9-nopc',
                                ]],
         'nb-interactions-to-show': [
             'OW-NA',
             'OW-CL',
         ]},
        {'name': 'all KCl', 'show-nvt': False, 'show-npt': True,
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    'water5000-kcl50/opls-co0.9tc',
                                    'water5000-kcl50/eccr1-co1.2',
                                    'water5000-kcl50/netz-co0.9tc',
                                    'water5000-kcl50/madrid-co1.0tc',
                                    'water5000-kcl50/iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-kcl50/iff-altern5-netz-co0.9-nopc',
                                    'water5000-kcl50/Buckingham-iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-kcl50/Buckingham-iff-altern5-netz-co0.9-nopc',
                                ]],
         'nb-interactions-to-show': [
             'OW-K',
             'OW-CL',
         ]},
        {'name': 'all CaCl', 'show-nvt': False, 'show-npt': True,
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    'water5000-cacl2_50/opls-co0.9tc',
                                    'water5000-cacl2_50/eccr1-co1.2',
                                    'water5000-cacl2_50/netz-co0.9tc',
                                    'water5000-cacl2_50/madrid-co1.0tc',
                                    'water5000-cacl2_50/iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-cacl2_50/iff-altern5-netz-co0.9-nopc',
                                    'water5000-cacl2_50/Buckingham-iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-cacl2_50/Buckingham-iff-altern5-netz-co0.9-nopc',
                                ]],
         'nb-interactions-to-show': [
             'OW-CA',
             'OW-CL',
         ]},
    ]

    additional_dists = [
        {'name': 'AIMD', 'path': '../received/azade-aimd/li-cl/li-o-gofr-0.04.dat', 'show-for': ['OW-LI']},
        {'name': 'AIMD', 'path': '../received/azade-aimd/na-cl/na-o-gofr-0.04.dat', 'show-for': ['OW-NA']},
        {'name': 'AIMD', 'path': '../received/azade-aimd/k-cl/k-o-gofr-0.04.dat', 'show-for': ['OW-K']},
        {'name': 'AIMD', 'path': '../received/azade-aimd/ca-cl2/ca-o-gofr-0.04.dat', 'show-for': ['OW-CA']},
        {'name': 'AIMD', 'path': '../received/azade-aimd/na-cl/cl-o-gofr-0.04.dat', 'show-for': ['OW-CL']},
    ]

    mpl_rc = {
        'legend.labelspacing': 0.2,
        'legend.handlelength': 1.8,
        'legend.columnspacing': 1.0,
        'legend.handletextpad': 0.5,
    }
    marker_colors = list(mpl.colors.TABLEAU_COLORS.values())
    marker_color_ndx = 0
    marker_color_dict = {}
    with plt.rc_context({**mpl_rc_global, **mpl_rc}):
        
        fig, axes = plt.subplots(figsize=(4.6, 5.6), nrows=4, ncols=2, sharex='col', constrained_layout=True, dpi=200)
        fig.set_constrained_layout_pads(w_pad=0.05, h_pad=0.00, hspace=0.0, wspace=0.0)
        legend_handles, legend_labels = [], []
            
        for c, system_combination in enumerate(system_combinations_to_compare):
            print(f"system-combination {system_combination['name']}")
            axrow = axes[c]
            
            for n, nb_name in enumerate(system_combination['nb-interactions-to-show']):
                #print(f"  interaction {nb_name}")
                ax = axrow[n]

                for s, system in enumerate(system_combination['system-combination']):
                    #print(f"    system {system['name']}")

                    label = ff_short_names.get(system['force-field']['name'], system['force-field']['name'])
                    if system_combination['show-nvt']:
                        try:
                            r = system_interaction_dict[(system['name'], nb_name, 'r')]
                            g = system_interaction_dict[(system['name'], nb_name, 'g')]
                            linestyle = ['-', '--', ':', '-.'][s%4]
                            line, = ax.plot(r, g, linestyle=linestyle, label=label)
                        except KeyError:
                            print('..no data..')

                    if system_combination['show-npt']:
                        try:
                            r = npt_system_interaction_dict[(system['name'], nb_name, 'r')]
                            g = npt_system_interaction_dict[(system['name'], nb_name, 'g')]
                            #linestyle = ff_linestyles[system['force-field']['name']]
                            linestyle = '-'
                            color = ff_colors[system['force-field']['name']]
                            line, = ax.plot(r, g, linestyle=linestyle, label=label, color=color, linewidth=1.0)
                        except KeyError:
                            print('..no data..')
                    if label not in legend_labels:
                        legend_handles.append(line)
                        legend_labels.append(label)
                        


                # plot additional
                for a, add_dist in enumerate((ad for ad in additional_dists if nb_name in ad['show-for'])):
                    data = np.loadtxt(add_dist['path'])
                    x = data.T[0]/10  # votca and vmd both give g(r) value for bin symmetrically around r
                    y = data.T[1]
                    linestyle = ['--', '-.'][a%2]
                    label = add_dist['name']
                    line, = ax.plot(x, y, linestyle=linestyle, color='k', label=label)
                    if label not in legend_labels:
                        legend_handles.append(line)
                        legend_labels.append(label)
                    
                # plot literature
                if show_lit_data:
                    if nb_name in first_peak_rdf_lit_data:
                        for data in first_peak_rdf_lit_data[nb_name]:
                            peak = data['peak']
                            label = data['source-short']
                            zot = data['source-zotero']
                            if zot in marker_color_dict.keys():
                                marker_color_ndx_here = marker_color_dict[zot]
                            else:
                                marker_color_ndx_here = marker_color_ndx
                                marker_color_dict[zot] = marker_color_ndx
                            line, = ax.plot([peak[0]], [peak[1]], label=label, marker='x', linestyle='', zorder=10, color=marker_colors[marker_color_ndx_here])
                            marker_color_ndx += 1
                            if label not in legend_labels:
                                legend_handles.append(line)
                                legend_labels.append(label)
                if show_exp_data:
                    if (ion := nb_name.split('-')[1]) in lit_rdf_peak1.keys():
                        ax.axvline(lit_rdf_peak1[ion])
                    
                ax.set_ylim(0)
                #ax.set_xlim(0.15, 0.7)
                #ax.set_xlim({'OW-NA': (0.2, 0.49), 'OW-CL': (0.25, 0.44)}[nb_name])
                ax.set_xlim((0.25, 0.6) if nb_name == 'OW-CL' else (0.15, 0.55))
                #ax.set_title(nb_name)
                ax.text(.89, .85, nb_plot_name[nb_name],
                        horizontalalignment='right', transform=ax.transAxes)
                
                axrow[0].set_ylabel(r"$g(r)$ in " + sys_type_short_names[system['type']['name']])
            axes[-1,0].set_xlabel(r"$r$ / nm")
            axes[-1,1].set_xlabel(r"$r$ / nm")
        #handles, labels = axes[0,0].get_legend_handles_labels()
        #legend_labels, legend_handles = zip(*sorted(zip(legend_labels, legend_handles), key=lambda t: t[0], reverse=False))
        order = np.array([0, 3, 1, 2, 4, 5, 6])
        legend_handles, legend_labels = [legend_handles[idx] for idx in order], [legend_labels[idx] for idx in order]
        ncol = 5 if show_lit_data else 4
        fig.legend(legend_handles, legend_labels, ncol=ncol, loc='lower center', bbox_to_anchor=(0.50, 1.00),)
        fig.savefig(os.path.join('../figures', f"dist_all.pdf"), bbox_inches='tight')
        plt.show()
plot_comparison_dist_paper(show_lit_data=False, show_exp_data=False)

In [None]:
!cp -a ../figures/dist_all.pdf ~/research/output/ion-shortrange-paper/figures/

### toc plot distributions (not used)

In [None]:
def plot_toc_distributions():
    mpl_rc = {}
    marker_colors = list(mpl.colors.TABLEAU_COLORS.values())
    marker_color_ndx = 0
    marker_color_dict = {}
    with plt.rc_context({**mpl_rc_global, **mpl_rc}):
        
        fig, ax = plt.subplots(figsize=(2*2.54, 2*2.54), constrained_layout=True)
        fig.set_constrained_layout_pads(w_pad=0.00, h_pad=0.00, hspace=0.0, wspace=0.0)
            
        r = npt_system_interaction_dict[('water5000-cacl2_50/netz-co0.9tc', 'OW-CA', 'r')]
        g = npt_system_interaction_dict[('water5000-cacl2_50/netz-co0.9tc', 'OW-CA', 'g')]
        linestyle = '-'
        color = 'k'
        ax.plot(r, g, linestyle=linestyle, color=color, linewidth=1.0)
        ax.set_ylim(0)
        ax.set_xlim(0.0, 0.3)
        ax.set_xlabel(r"$r$ / nm")
        ax.set_axis_off()
        fig.savefig(os.path.join('/tmp', f"toc-dist.svg"), bbox_inches='tight', format='svg')
        plt.show()
plot_toc_distributions()

### RMSD RDF

In [None]:
def rmsd_rdf():
    
    rdf_rmsd_dict = collections.defaultdict(lambda: 0)
    
    reference_dists = {
        'OW-LI': {'name': 'AIMD', 'path': '../received/azade-aimd/li-cl/li-o-gofr-0.04.dat',},
        'OW-NA': {'name': 'AIMD', 'path': '../received/azade-aimd/na-cl/na-o-gofr-0.04.dat',},
        'OW-K': {'name': 'AIMD', 'path': '../received/azade-aimd/k-cl/k-o-gofr-0.04.dat',},
        'OW-CA': {'name': 'AIMD', 'path': '../received/azade-aimd/ca-cl2/ca-o-gofr-0.04.dat',},
        'OW-CL': {'name': 'AIMD', 'path': '../received/azade-aimd/na-cl/cl-o-gofr-0.04.dat',},
    }
    
    system_params = deepcopy(SYSTEM_PARAMS)
    system_params['n_salts'] = (25, )
    for system in (sys for sys in system_generator(*systems_md, system_params=system_params) if 'npt-dist' in sys['tags']):
        #print(f"system {system['name']}")
        working_dir = os.path.join(system['name'])

        with WorkingDir(working_dir):
            nb_interactions = nb_interactions_from_settings('topol/settings.xml')

        for nb in (nb for nb in nb_interactions if nb['name'] in ('OW-LI', 'OW-NA', 'OW-K', 'OW-CA', 'OW-CL',)):
            nb_name = nb['name']
            #print(nb_name, end='\t')
            
            # load rdf data
            r = npt_system_interaction_dict[(system['name'], nb_name, 'r')]
            g = npt_system_interaction_dict[(system['name'], nb_name, 'g')]
            # only up to 0.7 nm
            max_r = 0.7
            r, g = r[r <= max_r], g[r <= max_r]
            # load reference data
            data_ref = np.loadtxt(reference_dists[nb_name]['path'])
            r_ref = data_ref.T[0]/10  # votca and vmd both give g(r) value for bin symmetrically around r
            g_ref = data_ref.T[1]
            g_ref_interpolated = np.interp(r, r_ref, g_ref)
            # calculate and save MSD
            msd = 1/max_r * np.trapz(x=r, y=(g - g_ref_interpolated)**2)  # unitless
            #msd = np.trapz(x=r, y=(g - g_ref_interpolated)**2)  # not unitless
            weight = 1/4 if nb_name == 'OW-CL' else 1.0
            rdf_rmsd_dict[(system['force-field']['name'], 'rmsd')] += weight * msd
            #rdf_rmsd_dict[(system['force-field']['name'], 'counter')] += weight
        #print('')
        
    for ff_name, ff in force_fields.items():
        if 'dummy' in ff['tags']:
            continue
        df_rmsd.at[ff_name, 'rdf'] = np.sqrt(rdf_rmsd_dict[(ff_name, 'rmsd')]) #/ rdf_rmsd_dict[(ff_name, 'counter')]
    
rmsd_rdf()
df_rmsd

### calculate neutron scattering difference NaCl - LiCl

In [None]:
def fourier(r, f):
    """Compute the radially 3D FT of a radially symmetric function.

    The frequency grid is also returned.  Some special extrapolations are used
    to make the results consistent. This function is isometric meaning it can
    be used to calculate the FT and the inverse FT.  That means inputs can also
    be k and f_hat which results in r and f.

    Args:
        r: Input grid. Must be evenly spaced. Can start at zero or at Δr, but nowhere
            else.
        f: Input function. Must have same length as r and correspond to its values.

    Returns:
        (k, f_hat): The reciprocal grid and the FT of f.

    """
    Delta_r = r[1] - r[0]
    r0_added = False
    if np.isclose(r[0], Delta_r):
        r = np.concatenate(([0], r))
        f = np.concatenate(([0], f))
        r0_added = True
    elif np.isclose(r[0], 0.0):
        pass
    else:
        raise Exception('this function can not handle this input')
    # if the input is even, np.fft.rfftfreq would end with the Nyquist frequency.
    # But there the imaginary part of the FT is always zero, so we alwas append a zero
    # to obtain a odd grid.
    if len(r) % 2 == 0:  # even
        r = np.concatenate((r, [r[-1]+Delta_r]))
        f = np.concatenate((f, [0]))
        n = (len(r)-1)*2-1
    else:  # odd
        n = len(r)*2-1
    k = np.fft.rfftfreq(n=n, d=Delta_r)
    with np.errstate(divide='ignore', invalid='ignore'):
        f_hat = -2 / k / 1 * Delta_r * np.imag(np.fft.rfft(r * f, n=n))
    if r0_added:
        f_hat = f_hat[1:]
        k = k[1:]
    return k, f_hat

In [None]:
def show_neutron_diff(convolute_sigma=None, zero_start_Delta_G=False, show_fourier=False):
    
    K_XY = {
        "OW-OW": 0.034091,
        "OW-CL": 0.008101,
        "CL-CL": 0.000481,
        "NA-NA": 0.0000691,
        "NA-CL": 0.000365,
        "OW-NA": 0.00307,
        "LI-LI": 0.0000189,
        "LI-CL": -0.00019,
        "OW-LI": -0.0016,
    }

    system_combinations_to_compare = [
        {'name': 'ECC 250',
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    'water5000-nacl250/eccr1-co1.2',
                                    'water5000-licl250/eccr1-co1.2',
                                ]],
        },
        {'name': 'ECC IMC 250',
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    'water5000-nacl250/iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-licl250/iff-altern5-eccr1-co1.2-nopc',
                                ]],
        },
        {'name': 'HMN 250',
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    'water5000-nacl250/netz-co0.9tc',
                                    'water5000-licl250/netz-co0.9tc',
                                ]],
        },
        {'name': 'HMN IMC 250',
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    'water5000-nacl250/iff-altern5-netz-co0.9-nopc',
                                    'water5000-licl250/iff-altern5-netz-co0.9-nopc',
                                ]],
        },
        {'name': 'ECC 500',
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    'water5000-nacl500/eccr1-co1.2',
                                    'water5000-licl500/eccr1-co1.2',
                                ]],
        },
        {'name': 'ECC IMC 500',
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    'water5000-nacl500/iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-licl500/iff-altern5-eccr1-co1.2-nopc',
                                ]],
        },
        {'name': 'HMN 500',
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    'water5000-nacl500/netz-co0.9tc',
                                    'water5000-licl500/netz-co0.9tc',
                                ]],
        },
        {'name': 'HMN IMC 500',
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    'water5000-nacl500/iff-altern5-netz-co0.9-nopc',
                                    'water5000-licl500/iff-altern5-netz-co0.9-nopc',
                                ]],
        },
    #][0:4]  # change me
    ][4:8]  # change me
    
    nbs_to_compare = [
        {"OW-NA", "OW-LI"},
        {"NA-CL", "LI-CL"},
    ]
    
    with mpl.rc_context(rc={
        'figure.dpi': 300,
        #'legend.fontsize': 6,
        'legend.labelspacing': 0.2,
        'legend.handlelength': 2.5,
    }):
        fig_out, ax_out = plt.subplots(figsize=(5, 3), constrained_layout=True)
        for system_combination in system_combinations_to_compare:
            print(f"system-combination {system_combination['name']}")

            #fig, ax = plt.subplots(figsize=(8, 3), constrained_layout=True)

            Delta_G_dict = {}
            for s, system in enumerate(system_combination['system-combination']):
                print(f"  system {system['name']}")
                with WorkingDir(system['name']):
                    nb_interactions = nb_interactions_from_settings('topol/settings.xml')
                for nb in (nb for nb in nb_interactions if 'H' not in nb['name']):
                    nb_name = nb['name']
                    r = npt_system_interaction_dict[(system['name'], nb_name, 'r')]
                    g = npt_system_interaction_dict[(system['name'], nb_name, 'g')]
                    sign = [-1, 1][s]
                    #ax.plot(r, K_XY[nb_name] * (g - 1), label=f"{system['name']} {nb_name}", linestyle='--')
                    Delta_G_dict[system['name'], nb_name] = sign * K_XY[nb_name] * (g - 1)
            #plt.show()
                    
            # print difference function ΔG(r) for some interactions
            fig, ax = plt.subplots(figsize=(5, 2), constrained_layout=True)
            ax.set_title(system_combination['name'])
            for nb in (nb for nb in nb_interactions if 'H' not in nb['name']):
                nb_name = nb['name']
                Delta_G_nb = np.zeros_like(r)
                for s, system in enumerate(system_combination['system-combination']):
                    if (system['name'], nb_name) in Delta_G_dict:
                        Delta_G_nb += Delta_G_dict[system['name'], nb_name]
                    for nb_to_compare in nbs_to_compare:
                        if nb_name in nb_to_compare and (system['name'], nb_name_temp := nb_to_compare.difference({nb_name}).pop()) in Delta_G_dict:
                            Delta_G_nb += Delta_G_dict[system['name'], nb_name_temp]
                # optional, in paper they all start with 0
                if zero_start_Delta_G:
                    Delta_G_nb -= Delta_G_nb[0]
                ax.plot(r, Delta_G_nb, label=nb_name, linestyle='--')
            ax.legend(frameon=False, labelspacing=0.1, loc='upper right')

            #ax.set_ylim(-0.001, 0.001)
            ax.set_xlabel(r"$r$ / nm")
            ax.set_ylabel(r"$g(r)$")
            ax.legend(frameon=False, labelspacing=0.1, fontsize=8, loc='upper right')
            #fig.savefig(os.path.join('../figures', f"neutron-diff.pdf"))
            
            Delta_G = sum(Delta_G_dict.values())
            if convolute_sigma is not None:
                Delta_G = ndimage.gaussian_filter1d(Delta_G, convolute_sigma)
            x = r
            y = Delta_G
            if show_fourier:
                x, y = fourier(x, y)
            ax_out.plot(x, y, label=system_combination['name'])
        #ax_out.grid()
        ax_out.legend(frameon=False)
        ax_out.set_xlim(0, 1)
        ax_out.tick_params(direction='in', top=True, right=True)
        if show_fourier:
            ax_out.set_xlim(0, 15)
        plt.show()
            
show_neutron_diff(convolute_sigma=None, zero_start_Delta_G=False, show_fourier=False)
#show_neutron_diff(convolute_sigma=None, zero_start_Delta_G=False, show_fourier=True)

### PMF frequency

In [None]:
# Polynomial.fit is weird!
x = [1, 2, 3]
y = [0, -1, 0]
fit = np.polynomial.Polynomial.fit(x, y, 2)
fit2 = np.polynomial.polynomial.polyfit(x, y, 2)
fit, fit.coef, fit.convert(), fit2

In [None]:
def pmf_frequency():
    system_combinations_to_compare = [
        {'name': 'all LiCl',
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    'water5000-licl50/opls-co0.9tc',
                                    'water5000-licl50/eccr1-co1.2',
                                    'water5000-licl50/netz-co0.9tc',
                                    'water5000-licl50/madrid-co1.0tc',
                                    'water5000-licl50/iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-licl50/iff-altern5-netz-co0.9-nopc',
                                    'water5000-licl50/Buckingham-iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-licl50/Buckingham-iff-altern5-netz-co0.9-nopc',
                                ]],
         'nb-interactions-to-show': [
             'OW-LI',
             #'OW-CL',
         ]},
        {'name': 'all NaCl',
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    'water5000-nacl50/opls-co0.9tc',
                                    'water5000-nacl50/eccr1-co1.2',
                                    'water5000-nacl50/netz-co0.9tc',
                                    'water5000-nacl50/madrid-co1.0tc',
                                    'water5000-nacl50/iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-nacl50/iff-altern5-netz-co0.9-nopc',
                                    'water5000-nacl50/Buckingham-iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-nacl50/Buckingham-iff-altern5-netz-co0.9-nopc',
                                ]],
         'nb-interactions-to-show': [
             'OW-NA',
             #'OW-CL',
         ]},
    ]

    mpl_rc = {
        'legend.labelspacing': 0.2,
        'legend.handlelength': 1.8,
        'legend.columnspacing': 1.0,
        'legend.handletextpad': 0.5,
    }
    
    pmf_frequency_dict = {}
    
    with plt.rc_context({**mpl_rc_global, **mpl_rc}):
        
        fig, axes = plt.subplots(figsize=(4.6, 1.6), nrows=1, ncols=2, sharex='col', constrained_layout=True, dpi=200)
        fig.set_constrained_layout_pads(w_pad=0.05, h_pad=0.00, hspace=0.0, wspace=0.0)
        legend_handles, legend_labels = [], []
            
        for c, system_combination in enumerate(system_combinations_to_compare):
            print(f"system-combination {system_combination['name']}")
            ax = axes.flatten()[c]
            
            nb_name = system_combination['nb-interactions-to-show'][0]
            #print(f"  interaction {nb_name}")

            for s, system in enumerate(system_combination['system-combination']):
                #print(f"    system {system['name']}")

                # PMF
                label = ff_short_names.get(system['force-field']['name'], system['force-field']['name'])
                r = npt_system_interaction_dict[(system['name'], nb_name, 'r')]
                g = npt_system_interaction_dict[(system['name'], nb_name, 'g')]
                with np.errstate(divide='ignore'):
                    # oconst.k_gro = const.k * const.N_A * 1e-3
                    pmf = -oconst.k_gro * 300 * np.log(g)  # [pmf] = kJ/mol
                    pmf_SI = -const.k * 300 * np.log(g)  # [pmf_SI] = J
                r_SI = r / 1e9  # [r_SI] = m
                
                # PMF fit
                ndx_min = np.argmin(pmf)
                pm = 1
                fit_range = slice(ndx_min - pm, ndx_min + pm + 1)
                pm_plot = 10
                plot_range = slice(ndx_min - pm_plot, ndx_min + pm_plot + 1)
                fit = np.polynomial.Polynomial.fit(r[fit_range], pmf[fit_range], 2)
                
                # force constant
                k = 2 * fit.convert().coef[2]  # [k] = kJ/mol/nm²
                m1 = 18
                m2 = system['moltypes'][1]['atoms'][0]['mass']
                mu = m1 * m2 / (m1 + m2)  # [mu] = u
                f = 1 / (2 * np.pi) * np.sqrt(k / mu)  # [f] = 1/ps
                pmf_frequency_dict[system['name']] = f
                f_SI = f * 1e12
                
                # wavenumber
                nu_tilde = f_SI / const.c  # [nu_tilde] = 1/m
                #print(nu_tilde / 100)
                
                linestyle = '-'
                color = ff_colors[system['force-field']['name']]
                line, = ax.plot(r, pmf, linestyle=linestyle, label=label, color=color, linewidth=1.0)
                if label not in legend_labels:
                    legend_handles.append(line)
                    legend_labels.append(label)
                # plot PMF fit
                ax.plot(r[plot_range], fit(r[plot_range]), linestyle=':', color=line.get_color(), linewidth=1.0)
                #ax.axvline(x=r[ndx_min], color=line.get_color(), linestyle=':')

            #ax.set_xlim((0.15, None))
            ax.text(.89, .2, nb_plot_name[nb_name],
                    horizontalalignment='right', transform=ax.transAxes)

        axes[0].set_ylabel(r"$w(r)$ in kJ/mol")
        axes[0].set_xlim((0.15, 0.25))
        axes[1].set_xlim((0.20, 0.30))
        axes[0].set_ylim(-8, 1)
        axes[1].set_ylim(-6, 1)
        for ax in axes:
            ax.set_xlabel(r"$r$ / nm")
        #handles, labels = axes[0,0].get_legend_handles_labels()
        #legend_labels, legend_handles = zip(*sorted(zip(legend_labels, legend_handles), key=lambda t: t[0], reverse=False))
        #order = np.array([0, 3, 1, 2, 4, 5, 6])
        #legend_handles, legend_labels = [legend_handles[idx] for idx in order], [legend_labels[idx] for idx in order]
        ncol = 4
        fig.legend(legend_handles, legend_labels, ncol=ncol, loc='lower center', bbox_to_anchor=(0.50, 1.00),)
        fig.savefig('../figures/pmf_fit.pdf', bbox_inches='tight')
        plt.show()
        return pmf_frequency_dict
pmf_frequency_dict = pmf_frequency()

In [None]:
!cp -a ../figures/pmf_fit.pdf ~/research/output/ion-shortrange-paper/figures/

## compare density, compressibility with literature data

### load

In [None]:
molar_mixing_ratios = list(OrderedSet([sys['molar-mixing-ratio'] for sys in system_generator(*systems_md)]))

n_blocks = 5
block_len = 80  # in ps
system_types_to_show = list(OrderedSet(([sys['type']['name'] for sys in system_generator(*systems_md)])))
columns = ['density', 'concentration', 'compressibility', 'mass-fraction']
index = pd.MultiIndex.from_product((
    system_types_to_show,
    [ffn for ffn, ff in force_fields.items() if 'conc-range' in ff['tags']],
    molar_mixing_ratios,
    range(n_blocks)
))
df_dens = pd.DataFrame(columns=columns, index=index, dtype=float)
df_dens.head()

In [None]:
def load_dens():
    for system in (sys for sys in system_generator(*systems_md) if 'conc-range' in sys['tags']):
        print(f"system {system['name']}")
        working_dir = os.path.join(system['name'])
        
        mass_fraction = system['mass-fraction']
        df_dens.loc[(system['type']['name'], system['force-field']['name'], system['molar-mixing-ratio'], slice(None)), 'mass-fraction'] = mass_fraction

        with WorkingDir(working_dir):
            try:
                run_bash(f"gmx energy -f npt-prod/ener.edr -o energy-temp.xvg <<< 'volume\ndensity'")
                data, header = gt.xvg.load('energy-temp.xvg')
            except:
                print('..no data..')
                continue
            run_bash("rm -f energy-temp.xvg")
            # volume, concentration
            for block in range(n_blocks):
                block_start = block * block_len
                block_end = block_start + block_len
                block_data = data[data['Time (ps)'].between(block_start, block_end)]
                volume = block_data['Volume'].mean()
                df_dens.at[(system['type']['name'], system['force-field']['name'], system['molar-mixing-ratio'], block), 'volume'] = volume
                density = block_data['Density'].mean() * 1e-3
                try:
                    n_nacl = system['moltypes'][1]['nmols']
                except IndexError:
                    n_nacl = 0.0
                concentration = n_nacl / const.N_A / (volume * 1e-24)
                df_dens.at[(system['type']['name'], system['force-field']['name'], system['molar-mixing-ratio'], block), 'concentration'] = concentration
                df_dens.at[(system['type']['name'], system['force-field']['name'], system['molar-mixing-ratio'], block), 'density'] = density

            # isothermal compressibility
            for block in range(n_blocks):
                block_start = block * block_len
                block_end = block_start + block_len
                try:
                    run_bash(f"gmx energy -f npt-prod/ener.edr -o temp.xvg -fluct_props -driftcorr -b {block_start} -e {block_end} <<< 'Temperature\nVolume' > temp.txt")
                except:
                    print('..no data..')
                    continue
                with open('temp.txt', 'r') as f:
                    for line in f.readlines():
                        if line.startswith('Isothermal Compressibility Kappa'):
                            isothermal_compressibility = float(line.split('=')[1].split('(')[0])
                run_bash("rm -f temp.xvg temp.txt")
                df_dens.at[(system['type']['name'], system['force-field']['name'], system['molar-mixing-ratio'], block), 'compressibility'] = isothermal_compressibility
load_dens()

In [None]:
df_dens.tail()

In [None]:
with open('df_dens.pkl', 'wb') as f:
    pickle.dump(df_dens, f)

In [None]:
with open('df_dens.pkl', 'rb') as f:
    df_dens = pickle.load(f)

### literature data

In [None]:
# molar mixing ratio - density

# CaCl2, KCl, NaCL literature data https://handymath.com/cgi-bin/nacltble.cgi?submit=Entry
# "Perry's Chemical Engineers' Handbook" by Robert H. Perry, Don Green, 7th Edition page 140 (2-99)
# densities in g/ml
# % is mass fraction (95 percent sure)

# LiCl data from tanaka 1991

density_w_lit_dict = {
    'water-cacl2_': {
        'mass-fraction': np.array((2, 4, 8, 12, 16, 20, 25, 30, 35, 40)) / 100,
        'density': {
            293.15: np.array((1.0148, 1.0316, 1.0659, 1.1015, 1.1386, 1.1775, 1.2284, 1.2816, 1.3373, 1.3957)),
            303.15: np.array((1.0120, 1.0286, 1.0626, 1.0978, 1.1345, 1.1730, 1.2236, 1.2764, 1.3316, 1.3895))
        },
        'molar-mass': sum((at['mass'] for at in (atomtypes[atomname] for atomname in ('CA', 'CL', 'CL')))),
    },
    'water-kcl': {
        'mass-fraction': np.array((1, 2, 4, 8, 12, 16, 20, 24)) / 100,
        'density': {
            298.15: np.array((1.00342, 1.00977, 1.02255, 1.04847, 1.07506, 1.10245, 1.13072, 1.15995)),
            313.15: np.array((0.99847, 1.00471, 1.01727, 1.04278, 1.06897, 1.09600, 1.12399, 1.15299))
        },
        'molar-mass': sum((at['mass'] for at in (atomtypes[atomname] for atomname in ('K', 'CL')))),
    },
    'water-licl': {
        'molality': np.array((0.05, 0.10, 0.50, 1.00, 2.00, 3.00, 4.00, 6.00, 8.00, 10.00, 12.00, 14.00, 16.00, 18.00, 20.00))/1e3,  # mol / g
        'density': {
            298.15: np.array((998.28, 999.47, 1009.02, 1020.08, 1041.63, 1061.53, 1080.18, 1114.82, 1146.26, 1175.89, 1206.03, 1231.39, 1255.85, 1278.67, 1298.72))/1000,
            303.15: np.array((996.90, 998.08, 1007.60, 1018.71, 1040.14, 1060.04, 1078.69, 1113.33, 1144.71, 1174.31, 1204.38, 1229.68, 1254.04, 1276.78, 1296.77))/1000,
        },
        'molar-mass': sum((at['mass'] for at in (atomtypes[atomname] for atomname in ('LI', 'CL')))),
    },
    'water-nacl': {
        'mass-fraction': np.array((1, 2, 4, 8, 12, 16, 20, 24, 26)) / 100,
        'density': {
            298.15: np.array((1.00409, 1.01112, 1.02530, 1.05412, 1.08365, 1.11401, 1.14533, 1.17776, 1.19443)),
            313.15: np.array((0.99908, 1.00593, 1.01977, 1.04798, 1.07699, 1.10688, 1.13774, 1.16971, 1.18614)),
        },
        'molar-mass': sum((at['mass'] for at in (atomtypes[atomname] for atomname in ('NA', 'CL')))),
    },
}

def mass_fraction_from_molality(b, M):
    return 1 / (1 + (1 / (b*M)))

def concentration_from_mass_fraction(w, M, rho):
    return w / M * (rho * 1000)

def interpolate_density(density_dict, T):
    """Interpolate density array between two temperatures."""
    density_new = np.zeros_like(tuple(density_dict.values())[0])
    T1, T2 = tuple(density_dict.keys())
    for i, (d1, d2) in enumerate(zip(*density_dict.values())):
        density_new[i] = np.interp(T, (T1, T2), (d1, d2))
    return density_new

In [None]:
# plot literature densities
def plot_dens_lit():
    
    xaxis = 'c'
    
    with mpl.rc_context(rc={
        'legend.labelspacing': 0.1
    }):
        fig, ax = plt.subplots(figsize=(5, 3))
        
        for sys_type, sys_type_data in density_w_lit_dict.items():
            if 'mass-fraction' in sys_type_data:
                w = sys_type_data['mass-fraction']
            else:
                w = mass_fraction_from_molality(sys_type_data['molality'], sys_type_data['molar-mass'])
            densities = interpolate_density(sys_type_data['density'], 300)
            c = concentration_from_mass_fraction(w, sys_type_data['molar-mass'], densities)
            if xaxis == 'c':
                ax.plot(c, densities, '.-', label=f"{sys_type}")
            elif xaxis == 'w':
                ax.plot(w * 100, densities, '.-', label=f"{sys_type}")

        if xaxis == 'c':
            ax.set_xlabel(r"$c$(salt) in mol/l")
        elif xaxis == 'w':
            ax.set_xlabel(r"$w$(salt)")
        ax.set_xlim(0)
        ax.set_ylabel(r"$\rho$ in g/mol")
        ax.legend(frameon=False)
        fig.tight_layout()
        plt.show()
plot_dens_lit()

### plot

In [None]:
# plot densities vs concentration
def plot_dens_vs_w():
    ylim_dict = {
        'water-cacl2_': (0.99, 1.40),
        'water-kcl': (0.99, 1.25),
        'water-licl': (0.99, 1.12),
        'water-nacl': (0.99, 1.2),
    }

    mpl_rc = {
        'legend.labelspacing': 0.5,
        'legend.columnspacing': 1.5,
        'legend.handlelength': 1.8,
    }

    with plt.rc_context({**mpl_rc_global, **mpl_rc}):
        fig, axes = plt.subplots(figsize=(4.62, 2.5), nrows=2, ncols=2, constrained_layout=True, sharex='all', dpi=200)
        fig.set_constrained_layout_pads(w_pad=0.03, h_pad=0.00, hspace=0.0, wspace=0.0)
        for s, sys_type in enumerate((st for st in system_types if st != 'water-pure')):
            print(s, sys_type)
            ax = axes.flat[s]
        
            for f, ff in enumerate((ff for ff in force_fields.values() if 'conc-range' in ff['tags'])):
                #print(ff['name'])
                index = (sys_type, ff['name'], slice(None), slice(None))
                x = df_dens.loc[index, 'concentration'].groupby(level=2).mean().to_numpy()
                y = df_dens.loc[index, 'density'].groupby(level=2).mean().to_numpy()
                yerr = df_dens.loc[index, 'density'].groupby(level=2).std().to_numpy()
                ax.errorbar(x[1:], y[1:], yerr=yerr[1:], marker='.', linestyle=':', label=ff_short_names[ff['name']], color=ff_colors[ff['name']])
                print(x[1:3])

            # literature
            sys_type_data = density_w_lit_dict[sys_type]
            if 'mass-fraction' in sys_type_data:
                w = sys_type_data['mass-fraction']
            else:
                w = mass_fraction_from_molality(sys_type_data['molality'], sys_type_data['molar-mass'])
            densities = interpolate_density(sys_type_data['density'], 300)
            c = concentration_from_mass_fraction(w, sys_type_data['molar-mass'], densities)
            ax.plot(c, densities, '.-', color='k', label="exp.")

            #ax.set_xlim(0, max(x)+0.2)
            ax.set_xlim(0, 5.1)
            ax.set_ylim(ylim_dict[sys_type])
            ax.text(.05, .83, sys_type_short_names[sys_type],
                    horizontalalignment='left', transform=ax.transAxes)
        for ax in axes[1]:
            ax.set_xlabel(r"$c$(Salt) in mol/l")
        for ax in axes[:, 0]:
            ax.set_ylabel(r"$\rho$ in g/ml")
        
        handles, labels = axes[-1][-1].get_legend_handles_labels()
        #labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0], reverse=False))
        order = [1, 4, 2, 3, 5, 6, 0]
        handles, labels = [handles[idx] for idx in order], [labels[idx] for idx in order]
        fig.legend(handles, labels, ncol=4, loc='lower center', bbox_to_anchor=(0.52, 0.97),)
        fig.savefig(os.path.join('../figures', f"densities.pdf"), bbox_inches='tight')
        plt.show()
plot_dens_vs_w()

In [None]:
!cp -a ../figures/densities.pdf ~/research/output/ion-shortrange-paper/figures/

### RMSD density

In [None]:
def rmsd_density():
    
    dens_rmsd_dict = collections.defaultdict(lambda: 0)

    for s, sys_type in enumerate((st for st in system_types if st != 'water-pure')):
        print(s, sys_type)
        
        # experimental from literature
        sys_type_data = density_w_lit_dict[sys_type]
        if 'mass-fraction' in sys_type_data:
            w = sys_type_data['mass-fraction']
        else:
            w = mass_fraction_from_molality(sys_type_data['molality'], sys_type_data['molar-mass'])
        rho_ref = interpolate_density(sys_type_data['density'], 300)
        c_ref = concentration_from_mass_fraction(w, sys_type_data['molar-mass'], rho_ref)

        for f, ff in enumerate((ff for ff in force_fields.values() if 'conc-range' in ff['tags'])):
            #print(ff['name'])
            index = (sys_type, ff['name'], slice(None), slice(None))
            c = np.array(df_dens.loc[index, 'concentration'].groupby(axis=0, level=2).mean())
            rho = np.array(df_dens.loc[index, 'density'].groupby(axis=0, level=2).mean())
            # ignore first and last point, excluding 0 and up to 3 mol/l
            c = c[1:-1]
            rho = rho[1:-1]
            # msd
            msd = np.sum((rho - np.interp(c, c_ref, rho_ref))**2)
            dens_rmsd_dict[(ff['name'], 'rmsd')] += msd
            #dens_rmsd_dict[(ff['name'], 'counter')] += 1.0
            
    for ff_name, ff in force_fields.items():
        if 'dummy' in ff['tags']:
            continue
        #print(ff_name)
        #print(dens_rmsd_dict[(ff_name, 'counter')])
        df_rmsd.at[ff_name, 'density'] = np.sqrt(dens_rmsd_dict[(ff_name, 'rmsd')])  # / dens_rmsd_dict[(ff_name, 'counter')]
        
rmsd_density()
df_rmsd

In [None]:
# not working currently
def plot_compress():
    with mpl.rc_context(rc={'figure.dpi': 150}):
        fig, ax = plt.subplots(figsize=(5, 3))
        for c, column in enumerate(df_dens.columns):
            x = df_conc.mean(axis=0, level=0)[column]
            xerr = df_conc.std(axis=0, level=0)[column]
            y = df_compress.mean(axis=0, level=0)[column]
            yerr = df_compress.std(axis=0, level=0)[column]
            #ax.errorbar(y.index, y, yerr=yerr, fmt='s:', label=column)
            ax.errorbar(x, y, xerr=xerr, yerr=yerr, marker='so>'[c%3], linestyle=':', label=label_dict[column])
        ax.set_xlim(0)
        ax.set_xlabel(r"$r$(NaCL)")
        ax.set_ylabel(r"$\beta_T$ in 1/Pa")
        ax.legend()
        fig.tight_layout()
        fig.savefig(os.path.join('../figures', f"water-nacl-compress.png"), dpi=300)
        plt.show()
plot_compress()

## water diffusion

### load

In [None]:
molar_mixing_ratios = list(OrderedSet([sys['molar-mixing-ratio'] for sys in system_generator(*systems_md)]))

system_types_to_show = list(OrderedSet(([sys['type']['name'] for sys in system_generator(*systems_md)])))
columns = ['D', 'Derr']
index = pd.MultiIndex.from_product((
    system_types_to_show,
    [ffn for ffn, ff in force_fields.items() if 'conc-range' in ff['tags']],
    molar_mixing_ratios
))
df_diff = pd.DataFrame(columns=columns, index=index, dtype=float)
df_diff.head()

In [None]:
def correct_diffusivity_fse(D, T, boxlength, water_model):
    """finite size correction for diffusivity
    explained nicely here: https://www.tandfonline.com/doi/full/10.1080/08927022.2020.1810685
    D in 10^{-5} cm^2/s
    T in K
    boxlength in nm
    """
    xi = 2.83729  # dimensionless for cubic periodic boundary conditions
    eta = {
        'water-spce': 7.29e-4,
        'water-tip4p2005': 8.55e-4,
    }[water_model]  # kg/(s*m)
    D_corr = (D / 1e9 + xi * const.k * T / (6 * np.pi * eta * boxlength/1e9)) * 1e9
    return D_corr


def load_diff():
    for system in (sys for sys in system_generator(*systems_md) if 'conc-range' in sys['tags']):
        #print(f"system {system['name']}")
        working_dir = os.path.join(system['name'])
        
        mass_fraction = system['mass-fraction']
        df_dens.loc[(system['type']['name'], system['force-field']['name'], system['molar-mixing-ratio'], slice(None)), 'mass-fraction'] = mass_fraction

        with WorkingDir(working_dir):
            with open('npt-prod/msd-SOL.txt', 'r') as f:
                for line in f.readlines():
                    if line.startswith('D['):
                        D, Derr = map(float, line.split(']')[1].split(')')[0].split(' (+/- '))
            boxlength, _, _ = gt.gro.get_box('npt-prod/confout.gro')
        water_model = PARAMETRIC_FORCE_FIELDS[system['force-field']['parametric-ff']]['water-model']
        D_corr = correct_diffusivity_fse(D, system['temperature'], boxlength, water_model)
        df_diff.at[(system['type']['name'], system['force-field']['name'], system['molar-mixing-ratio']), 'D'] = D
        df_diff.at[(system['type']['name'], system['force-field']['name'], system['molar-mixing-ratio']), 'Derr'] = Derr
        df_diff.at[(system['type']['name'], system['force-field']['name'], system['molar-mixing-ratio']), 'Dcorr'] = D_corr
load_diff()
df_diff

In [None]:
# from separate manual simulations
D_pure = {
    # D, D_err, boxlength
    'water-spce': {'D': 2.4385, 'D_err': 0.0378, 'boxlength': 5.31181},
    'water-tip4p2005': {'D': 2.1116, 'D_err': 0.0587, 'boxlength': 5.31485},
}
for water_ff, data_dict in D_pure.items():
    D_corr = correct_diffusivity_fse(data_dict['D'], 300, data_dict['boxlength'], water_ff)
    data_dict['D_corr'] = D_corr
    print(D_corr)

### literature data

In [None]:
# from müller hertz 1996
# by nmr, 25 °C
water_diff_lit_data = {
    'water-cacl2_': {
        'molality': np.array([0.0, 1.00, 2.02, 3.02, 4.08, 5.29, 6.02, 7.10, 8.18, 9.01, 10.01, 10.11, 11.02]),
        'diffusion-coefficient': np.array([2.30, 1.88, 1.43, 1.07, 0.749, 0.494, 0.384, 0.255, 0.180, 0.139, 0.101, 0.0993, 0.0773]),
    },
    'water-kcl': {
        'molality': np.array([0.0, 1.02, 2.00, 3.05, 4.00, 4.60]),
        'diffusion-coefficient': np.array([2.30, 2.38, 2.38, 2.34, 2.31, 2.30]),
    },
    'water-licl': {
        'molality': np.array([0.0, 1.00, 2.03, 3.10, 4.06, 5.02, 6.06, 7.02, 7.98, 9.19, 10.04, 11.04, 12.01, 12.96, 14.07, 15.21,
                              16.31, 16.93, 18.00, 18.92, 20.03]),
        'diffusion-coefficient': np.array([2.30, 2.09, 1.84, 1.63, 1.44, 1.28, 1.12, 0.991, 0.876, 0.730, 0.656, 0.565, 0.489, 0.424,
                                          0.369, 0.318, 0.276, 0.259, 0.219, 0.200, 0.175]),
    },
    'water-nacl': {
        'molality': np.array([0.0, 1.00, 2.00, 3.00, 4.00, 5.00, 5.90]),
        'diffusion-coefficient': np.array([2.30, 2.17, 2.02, 1.87, 1.71, 1.57, 1.43]),
    },
}

### plot

In [None]:
def plot_diff_water(use_finite_size_correction=True):
    ylim_dict = {
        'water-cacl2_': (0.08, 1.25),
        'water-kcl': (0.08, 1.25),
        'water-licl': (0.3, 1.15),
        'water-nacl': (0.3, 1.15),
    }

    mpl_rc = {
        'legend.labelspacing': 0.5,
        'legend.columnspacing': 1.5,
        'legend.handlelength': 1.8,
    }

    with plt.rc_context({**mpl_rc_global, **mpl_rc}):
        fig, axes = plt.subplots(figsize=(4.62, 2.5), nrows=2, ncols=2, constrained_layout=True, sharex='col', sharey='row', dpi=200)
        fig.set_constrained_layout_pads(w_pad=0.03, h_pad=0.00, hspace=0.0, wspace=0.0)
        for s, sys_type in enumerate((st for st in system_types if st != 'water-pure')):
            #print(s, sys_type['name'])
            ax = axes.flat[s]
        
            for f, ff in enumerate((ff for ffn, ff in force_fields.items() if 'conc-range' in ff['tags'])):
                #print(ff['name'])
                index = (sys_type, ff['name'], slice(None))
                index_pure = ('water-pure', ff['name'], 0.0)
                concentration = df_dens.loc[index, 'concentration'].groupby(axis=0, level=2).mean()
                x = concentration.to_numpy()
                if use_finite_size_correction:
                    y = (df_diff.loc[index, 'Dcorr'] / df_diff.at[index_pure, 'Dcorr']).to_numpy()
                    yerr = (df_diff.loc[index, 'Derr'] / df_diff.at[index_pure, 'Dcorr']).to_numpy()
                    #yerr = df_diff.loc[index, 'Derr'].to_numpy()
                else:
                    y = (df_diff.loc[index, 'D'] / df_diff.at[index_pure, 'D']).to_numpy()
                    yerr = (df_diff.loc[index, 'Derr'] / df_diff.at[index_pure, 'D']).to_numpy()
                ax.errorbar(x, y, yerr=yerr, marker='.', linestyle=':', label=ff_short_names[ff['name']], color=ff_colors[ff['name']])

            # literature
            molality = water_diff_lit_data[sys_type]['molality']
            # from molality to concentration we can use crc data
            osmp_lit_dict_st = osmp_lit_dict[sys_type]
            concentration = np.interp(molality,
                                      osmp_lit_dict_st['crc-liquid-data']['molality'],
                                      osmp_lit_dict_st['crc-liquid-data']['concentration'], left=np.nan, right=np.nan)
            x_lit = concentration
            y_lit = water_diff_lit_data[sys_type]['diffusion-coefficient']
            y_lit /= y_lit[0]
            ax.plot(x_lit, y_lit, '-', marker='.', color='k', label="exp.")

            #ax.set_xlim(0, max(x)+0.2)
            ax.set_xlim(0, 5.1)
            ax.set_ylim(ylim_dict[sys_type])
            ax.text(.05, .10, sys_type_short_names[sys_type],
                    horizontalalignment='left', transform=ax.transAxes)
            
        for ax in axes[1]:
            ax.set_xlabel(r"$c$(Salt) in mol/l")
        for ax in axes[:, 0]:
            ax.set_ylabel(r"$D / D_0$")
        
        handles, labels = axes[-1][-1].get_legend_handles_labels()
        #labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0], reverse=False))
        order = [1, 4, 2, 3, 5, 6, 0]
        handles, labels = [handles[idx] for idx in order], [labels[idx] for idx in order]
        fig.legend(handles, labels, ncol=4, loc='lower center', bbox_to_anchor=(0.52, 0.97),)
        fig.savefig(os.path.join('../figures', f"water-diffusivity.pdf"), bbox_inches='tight')
        plt.show()
        
plot_diff_water(use_finite_size_correction=True)
#plot_diff_water(use_finite_size_correction=False)

In [None]:
!cp -a ../figures/water-diffusivity.pdf ~/research/output/ion-shortrange-paper/figures/

### RMSD diffusion

In [None]:
def rmsd_diffusion():
    
    diff_rmsd_dict = collections.defaultdict(lambda: 0)

    for s, sys_type in enumerate((st for st in system_types if st != 'water-pure')):
        print(s, sys_type)
        
        # experimental from literature
        molality = water_diff_lit_data[sys_type]['molality']
        # from molality to concentration we can use crc data
        osmp_lit_dict_st = osmp_lit_dict[sys_type]
        concentration = np.interp(molality,
                                  osmp_lit_dict_st['crc-liquid-data']['molality'],
                                  osmp_lit_dict_st['crc-liquid-data']['concentration'], left=np.nan, right=np.nan)
        x_lit = concentration
        y_lit = water_diff_lit_data[sys_type]['diffusion-coefficient']
        y_lit /= y_lit[0]

        for f, ff in enumerate((ff for ff in force_fields.values() if 'conc-range' in ff['tags'])):
            #print(ff['name'])
            index = (sys_type, ff['name'], slice(None))
            index_pure = ('water-pure', ff['name'], 0.0)
            concentration = df_dens.loc[index, 'concentration'].groupby(axis=0, level=2).mean()
            x = np.array(concentration)
            y = np.array(df_diff.loc[index, 'Dcorr'] / df_diff.at[index_pure, 'Dcorr'])
            # ignore first and last point, excluding 0 and up to 3 mol/l
            x = x[1:-1]
            y = y[1:-1]
            # msd
            msd = np.sum((y - np.interp(x, x_lit, y_lit))**2)
            diff_rmsd_dict[(ff['name'], 'msd')] += msd
            #diff_rmsd_dict[(ff['name'], 'counter')] += 1.0
            
    for ff_name, ff in force_fields.items():
        if 'dummy' in ff['tags']:
            continue
        df_rmsd.at[ff_name, 'diffusion'] = np.sqrt(diff_rmsd_dict[(ff_name, 'msd')])  # / diff_rmsd_dict[(ff_name, 'counter')]
        
rmsd_diffusion()
df_rmsd

## residence times

### residence settings

In [None]:
residence_settings_names = [
    "allNA_0.33-400-2.0-T",
    "allCL_0.38-400-2.0-T",
    #"allNA_0.33-200-2.0-T",  # checking time dependence
    #"allNA_0.33-60-2.0-T",
    #"allNA_0.33-400-2.0-F",  # checking outer_spans dependence
    #"allNA_0.33-400-2.0-F-d",  # checking delay dependence
    #"1NA_0.33-400-2.0-T",  # checking single ion shell dependence
]

residence_settings = []
for sel in (
    {'name': 'allNA_0.33', 'selection': 'atomname OW and within 0.33 of resname NA'},
    {'name': 'allCL_0.38', 'selection': 'atomname OW and within 0.38 of resname CL'},
    {'name': '1NA_0.33', 'selection': 'atomname OW and within 0.33 of atomnr 15001'},
):
    for time in (60, 200, 400):
        for max_f in (2.0,):  # common value from literature
            for outer_spans in (True, False):
                for delay in (True, False):
                    name = f"{sel['name']}-{time}-{max_f:.1f}-{'T' if outer_spans else 'F'}{'-d' if delay else ''}"
                    if name in residence_settings_names:
                        residence_setting = {
                            'name': name,
                            'sel': sel,
                            'time': time,
                            'max_f': max_f,
                            'outer_spans': outer_spans,
                            'delay': delay,
                        }
                        residence_settings.append(residence_setting)
pd.DataFrame(residence_settings)

### calculate residence acf on cluster

In [None]:
def run_acf():
    remote_dir_base, remote_header, remote_footer = gen_remote_stuff(remote_host, 'enzo', ntasks=1)
    for system in (sys for sys in systems if 'resacf' in sys['tags']):
        print(f"system {system['name']}")
        for residence_setting in residence_settings:
            print(residence_setting['name'])
            working_dir = os.path.join(system['name'], 'resacf', residence_setting['name'])
            remote_dir = os.path.join(remote_dir_base, system['name'], 'resacf', residence_setting['name'])
            with WorkingDir(working_dir):
                # check if already done
                if os.path.isfile('residence-acf.xvg'):
                    print('..results present locally..')
                    continue

                # mkdir
                run_bash(f"ssh {remote_host} mkdir -p {remote_dir}")

                # flags
                outer_flag = ""
                if residence_setting['outer_spans']:
                    outer_flag = "--outer-spans"
                delay_flag = ""
                if residence_setting['delay']:
                    delay_flag = "--delay"

                # commands to be run on compute nodes
                script = remote_header + rf"""
if [[ ! -f residence-acf.xvg ]]; then
    gmx select -f ../../prod/traj_comp.xtc -s ../../prod/topol.tpr -oi select.dat -e {residence_setting['time']} -select '{residence_setting['sel']['selection']}'
    ~/bin/resacf.py select.dat residence-acf.xvg --end {residence_setting['time']} --max-false {residence_setting['max_f']} {outer_flag} {delay_flag}
fi
""" + remote_footer

                jobid = gt.remote.run_slurm_script(script, remote_host, remote_dir, dry_run=False)
                print(jobid)
                if jobid != None:
                    jobids.append(jobid)
run_acf()

### check job status

In [None]:
jobids = check_job_stati(jobids, remote_host)

### copy results from cluster

In [None]:
for system in (sys for sys in systems if 'resacf' in sys['tags']):
    print(f"system {system['name']}")
    for residence_setting in residence_settings:
        print(residence_setting['name'])
        working_dir = os.path.join(system['name'], 'resacf', residence_setting['name'])
        remote_dir = os.path.join(remote_dir_base, system['name'], 'resacf', residence_setting['name'])

        with WorkingDir(working_dir):
            filelist = ["residence-acf.xvg"]
            try:
                gt.remote.pull_files(filelist, remote_host, remote_dir)
            except:
                print('..no data..')

### load acf

In [None]:
# keys are (system, residence_setting_name)
acf_dict = {}

for system in (sys for sys in systems if 'resacf' in sys['tags']):
    print(f"system {system['name']}")
    for residence_setting in residence_settings:
        print(residence_setting['name'])
        working_dir = os.path.join(system['name'], 'resacf', residence_setting['name'])
        with WorkingDir(working_dir):
            try:
                # load data
                t, acf, acf_std = np.loadtxt('residence-acf.xvg', dtype=float, comments=['#', '@']).T
                # store data
                acf_dict[(system['name'], residence_setting['name'])] = (t, acf, acf_std)
            except:
                print('..no data..')

### integrate acf

In [None]:
tau_dict = {}

for system in (sys for sys in systems if 'resacf' in sys['tags']):
    print(f"system {system['name']}")
    for residence_setting in residence_settings:
        print(residence_setting['name'])
        try:
            # load data
            t, acf, acf_std = acf_dict[(system['name'], residence_setting['name'])]
        except:
            print('..no data..')
            continue
        # fit data
        #(tau,), _ = optimize.curve_fit(exp_decay, t, acf, p0=1)
        # integrate
        tau = np.trapz(x=t, y=acf)
        # store fit
        tau_dict[(system['name'], residence_setting['name'])] = tau

### compare residence settings

In [None]:
residence_settings_to_show = [rs for rs in residence_settings if rs['name'] in [
    'allNA_0.33-60-2.0-T',
    'allNA_0.33-400-2.0-T',
    '1NA_0.33-400-2.0-T',
    #'allCL_0.38-400-2.0-T',
]]
label_dict = {
    'allNA_0.33-60-2.0-T': r'400 ps',
    'allNA_0.33-400-2.0-T': r'60 ps',
    '1NA_0.33-400-2.0-T': r'400 ps, 1 Na',
}
linestyles = ['-', '--', ':', '-.']


for system in (sys for sys in systems if 'resacf' in sys['tags']):
    print(f"system {system['name']}")
    
    fig, ax = plt.subplots(figsize=(4, 3))

    for r, residence_setting in enumerate(residence_settings_to_show):
        print(residence_setting['name'])
        try:
            t, acf, acf_std = acf_dict[(system['name'], residence_setting['name'])]
            tau = tau_dict[(system['name'], residence_setting['name'])]
            label = label_dict.get(residence_setting['name'], 'foo')
            label += fr", $\tau = {tau:.1f}$ ps"
            line, = ax.plot(t, acf, label=label, linestyle=linestyles[r%4])
            ax.fill_between(t, acf-acf_std, acf+acf_std, alpha=0.3, color=line.get_color())
        except:
            print('..no data..')
            
    ax.set_xlim(0, 25)
    ax.set_ylim(0)
    ax.set_xlabel(r'$t$')
    ax.set_ylabel(r'$S_R(t)$')
    ax.legend(frameon=False)
    fig.tight_layout()
    fig.savefig(f"../figures/resacf-{system['name'].replace('/', '-')}.png", dpi=300)
    plt.show()

### compare systems (force fields)

In [None]:
residence_settings_to_show = [rs for rs in residence_settings if rs['name'] in [
    #'allNA_0.33-60-2.0-T',
    'allNA_0.33-400-2.0-T',
    'allCL_0.38-400-2.0-T',
]]

label_dict = {
    'water5000-nacl50/azade-co0.7': 'Azade',
    'water5000-nacl50/opls-co0.7-q0.75': r'ECC OPLS',
    'water5000-nacl50/opls-co0.7-q1.0': r'OPLS',
    'water5000-nacl50/eccr1-co0.7': 'ECCR',
    'water5000-nacl50/altern1-co0.7-q0.75': r'ECC OPLS IMC',
    'water5000-nacl50/altern2-co0.7-q0.75': r'ECCR IMC impr.',
    'water5000-nacl50/fuentes1-co0.7': 'NaCl/ε',
}
linestyles = ['-', '--', ':', '-.']
mpl_rc = {
    'figure.dpi': 150,
    #'legend.title_fontsize': 8,
    #'legend.fontsize': 8,
    #'legend.labelspacing': 0.1,
}

with mpl.rc_context(mpl_rc):
    for residence_setting in residence_settings_to_show:
        print(residence_setting['name'])

        fig, ax = plt.subplots(figsize=(4,2.5))

        for s, system in enumerate((sys for sys in systems if 'resacf' in sys['tags'])):
            print(f"system {system['name']}")
            try:
                t, acf, acf_std = acf_dict[(system['name'], residence_setting['name'])]
                tau = tau_dict[(system['name'], residence_setting['name'])]
                label = label_dict.get(system['name'], 'foo')
                label += fr", $\tau = {tau:.1f}$ ps"
                line, = ax.plot(t, acf, label=label, linestyle=linestyles[s%4])
                ax.fill_between(t, acf-acf_std, acf+acf_std, alpha=0.3)
                #ax.plot(t, exp_decay(t, tau), linestyle=':', color=line.get_color())
                print(tau)
            except:
                print('..no data..')

        ax.set_xlim(0, 25)
        ax.set_ylim(0, 1.0)
        ax.set_xlabel(r'$t$ in ps')
        ax.set_ylabel(r'$S_R(t)$')
        # reorder legend
        handles, labels = plt.gca().get_legend_handles_labels()
        #order = [3,0,2,1]
        #order = [0,1,2,3]
        #ax.legend([handles[idx] for idx in order],[labels[idx] for idx in order], frameon=False)
        ax.legend(frameon=False)
        fig.tight_layout()
        fig.savefig(f"../figures/resacf-{residence_setting['name']}.png", dpi=300)
        plt.show()

## thermal expansion

In [None]:
n_blocks = 5
block_len = 360  # in ps
system_types_to_show = list(OrderedSet(([sys['type']['name'] for sys in system_generator(*systems_md)])))
columns = ['volume']
index = pd.MultiIndex.from_product((
    system_types_to_show,
    [ff for ff in force_fields],
    DeltaTs,
    range(n_blocks)
))
df_te = pd.DataFrame(columns=columns, index=index, dtype=float)
df_te

In [None]:
df_therm_exp = pd.DataFrame()

def load_therm_exp():
    for system in (sys for sys in system_generator(*systems_md) if 'therm-exp' in sys['tags']):
        print(f"system {system['name']}")
        working_dir = os.path.join(system['name'])
        
        mass_fraction = system['mass-fraction']
        df_te.loc[(system['type']['name'], system['force-field']['name'], slice(None), slice(None)), 'mass-fraction'] = mass_fraction

        with WorkingDir(working_dir):
            for DeltaT in DeltaTs:
                T = system['temperature'] + DeltaT
                folder = f"therm-exp-{T:.0f}"
                try:
                    run_bash(f"gmx energy -f {folder}/ener.edr -b 200 -o /tmp/energy-temp.xvg <<< 'volume'")
                    data, header = gt.xvg.load('/tmp/energy-temp.xvg')
                    run_bash("rm -f /tmp/energy-temp.xvg")
                except:
                    print('..no data..')
                    continue
                # volume, concentration
                for block in range(n_blocks):
                    block_start = block * block_len
                    block_end = block_start + block_len
                    block_data = data[data['Time (ps)'].between(block_start, block_end)]
                    volume = block_data['Volume'].mean()
                    df_te.at[(system['type']['name'], system['force-field']['name'], DeltaT, block), 'volume'] = volume
load_therm_exp()

In [None]:
df_te.loc[('water-nacl', 'netz-co0.9tc', slice(None), slice(None)), slice(None)]

In [None]:
def calc_therm_exp():
    for system in (sys for sys in system_generator(*systems_md) if 'therm-exp' in sys['tags']):
        print(f"system {system['name']}")
        
        for DeltaT_pair in ((DeltaTs[i], DeltaTs[i+1]) for i in range(len(DeltaTs)-1)):
            for block in range(n_blocks):
                V1 = df_te.at[(system['type']['name'], system['force-field']['name'], DeltaT_pair[0], block), 'volume']
                V2 = df_te.at[(system['type']['name'], system['force-field']['name'], DeltaT_pair[1], block), 'volume']
                alpha = (V2 - V1) / (DeltaT_pair[1] - DeltaT_pair[0]) / np.mean((V1, V2))
                df_te.at[(system['type']['name'], system['force-field']['name'], DeltaT_pair[0], block), 'alpha'] = alpha
calc_therm_exp()

In [None]:
df_te.loc[(slice(None), slice(None), slice(None), slice(None)), 'alpha'].mean(axis=0, level=(0, 1, 2))

In [None]:
mpl_rc = {'figure.dpi': 120}
with plt.rc_context({**mpl_rc_global, **mpl_rc}):
    for systype in OrderedSet(sys['type']['name'] for sys in system_generator(*systems_md) if 'therm-exp' in sys['tags']):
        print(f"system type {systype}")

        fig, ax = plt.subplots(constrained_layout=True, figsize=(3, 2.2))
        ffs = [ff_short_names[ff] if ff in ff_short_names else ff for ff in force_fields]
        x = range(len(ffs))
        ax.set_xticks(x)
        ax.set_xticklabels(ffs, rotation=90)
        alphas = []
        alphas_std = []
        for ff in force_fields:
            print(f" force-field {ff}")
            #for DeltaT_pair in ((DeltaTs[i], DeltaTs[i+1]) for i in range(len(DeltaTs)-1)):
                #print(f"    DeltaT_pair", DeltaT_pair)
            alpha = float(df_te.loc[(systype, ff, slice(None), slice(None)), 'alpha'].mean(axis=0, level=(0, 1)))
            alpha_std = float(df_te.loc[(systype, ff, slice(None), slice(None)), 'alpha'].std(axis=0, level=(0, 1)))
            #print(f"      alpha {alpha:.5f} ± {alpha_std:.5f}")
            alphas.append(alpha)
            alphas_std.append(alpha_std)
        ax.bar(x, alphas, yerr=alpha_std)
        plt.show()

# osmotic pressure

## settings

In [None]:
system_types_osmp = {stn: st for stn, st in system_types.items() if stn != 'water-pure'}
#system_types_osmp = {stn: st for stn, st in system_types.items() if stn.startswith('water-cacl2_')}
force_fields_osmp = {ffn: ff for ffn, ff in force_fields.items() if ('fit' not in ff['tags'] and 'dummy' not in ff['tags'])}
#force_fields_osmp = {ffn: ff for ffn, ff in force_fields.items() if ('fit' not in ff['tags'] and 'dummy' not in ff['tags'] and ffn.startswith('iff-altern5-netz'))}
#force_fields_osmp = {ffn: ff for ffn, ff in force_fields.items() if ('fit' not in ff['tags'] and 'dummy' not in ff['tags'] and ffn.startswith('netz'))}

systems_osmp = (system_types_osmp, force_fields_osmp)
pd.DataFrame(system_generator(*systems_osmp, verbose=False))

In [None]:
osmp_methods = {
    'osmp-xy-k4000': {'k': 4000, 'scale': 'xy'}
}
pd.DataFrame(osmp_methods)

## create DataFrame

In [None]:
# build empty DataFrame
iterables = [[sys['name'] for sys in system_generator(*systems_osmp)],
             [osmp_name for osmp_name in osmp_methods],
             ['pre', 'prod']]
index = pd.MultiIndex.from_product(iterables, names=['system', 'osmp_method', 'run'])
df_osmp = pd.DataFrame(index=index, columns=["Pi_low", "Pi_high", "Pi_low_err", "Pi_high_err", "Pi", "Pi_err",
                                             "x", "x_err", "x_inner", "x_inner_err", "p_tot", "p_ex", "p_in",
                                             "c_inner", "c_inner_err"], dtype=np.float64)
df_osmp.head()

## preparations

### prepare files

In [None]:
def prepare_osmotic_pressure():
    for system in system_generator(*systems_osmp):
        print("system", system['name'])

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)

            working_dir = os.path.join(system['name'], osmp_name)
            with WorkingDir(working_dir):
                # make dirs
                run_bash("mkdir -p topol equi1 equi2 equi3 pre equi4 prod")
                all_folders = ("equi1", "equi2", "equi3", "pre", "equi4", "prod")
                
                # single-*.gro
                for mt in system['moltypes']:
                    name = mt.get('type', mt['name'])
                    run_bash(f"cp {template_dir}/gro/single-{name}.gro equi1/single-{mt['name']}.gro")
                
                # copy confout from NPT prod
                run_bash(f"cp ../npt-prod/confout.gro equi1/npt-confout.gro")
                    
                # table_?_?.xvg
                if 'halftabulated' in system['tags']:
                    for table in (f"table_{pair[0]}_{pair[1]}.xvg" for pair in itertools.combinations_with_replacement(system['atomtypes-no-h'], 2)
                        if (pair[0], pair[1]) in system['force-field']['tabulated-potentials']):
                            run_bash(f"cp {template_dir}/table/{system['force-field']['name']}/{table} topol/")
                            for folder in all_folders:
                                run_bash(f"rm -f {folder}/{table}")
                            for folder in all_folders[1:]:  # equi1 is run with LJ parameters
                                run_bash(f"ln -sf ../topol/{table} {folder}/{table}")
                if 'tabulated' in system['tags']:
                    raise Exception('not implemented')
                    
                # table.xvg
                if 'halftabulated' in system['tags']:
                    run_bash(f"cp {template_dir}/table/table6-12.xvg topol/table.xvg")
                    for folder in all_folders[1:]:
                        run_bash(f"rm -f {folder}/table.xvg")
                        run_bash(f"ln -s ../topol/table.xvg {folder}/table.xvg")
                        
                # .mpd files
                run_bash(f"cp {template_dir}/mdp/equi1.mdp equi1/grompp.mdp")
                run_bash(f"cp {template_dir}/mdp/equi2.mdp equi2/grompp.mdp")
                run_bash(f"cp {template_dir}/mdp/npt-equi3.mdp equi3/grompp.mdp")
                run_bash(f"cp {template_dir}/mdp/npt-prod.mdp pre/grompp.mdp")
                run_bash(f"cp {template_dir}/mdp/npt-equi3.mdp equi4/grompp.mdp")
                run_bash(f"cp {template_dir}/mdp/npt-prod.mdp prod/grompp.mdp")
                # run length
                gt.mdp.set_parameter("equi1/grompp.mdp", 'nsteps', int(1e4))
                gt.mdp.set_parameter("equi2/grompp.mdp", 'nsteps', int(1e5))
                gt.mdp.set_parameter("equi3/grompp.mdp", 'nsteps', int(2e5))
                gt.mdp.set_parameter("pre/grompp.mdp", 'nsteps',   int(5e5))
                gt.mdp.set_parameter("equi4/grompp.mdp", 'nsteps', int(1e6))
                gt.mdp.set_parameter("prod/grompp.mdp", 'nsteps',  int(5e6))
                # set temperature
                gt.mdp.set_parameter("equi2/grompp.mdp", 'gen-temp', system['temperature'])
                for folder in all_folders[1:]:
                    gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'ref-t', system['temperature'])
                # set pressure
                for folder in all_folders[2:]:
                    gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'ref-p', '200.0  0.0')
                    gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'pcoupltype', 'semiisotropic')
                    gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'tau-p', '5')
                    gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'compressibility', '4.5e-5  0.0')
                # set cutoff scheme
                cutoff_scheme = 'group' if 'halftabulated' in system['tags'] else 'Verlet'
                for folder in all_folders:
                    gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'cutoff-scheme', cutoff_scheme)
                gt.mdp.set_parameter('equi1/grompp.mdp', 'cutoff-scheme', 'Verlet')  # LJ for equi1
                # set cutoffs
                co = system['force-field']['cut-off']
                for folder in all_folders:
                    for key in ('rlist', 'rcoulomb', 'rvdw'):
                        gt.mdp.set_parameter(f"{folder}/grompp.mdp", key, co)
                if 'tail-corr' in system['tags']:
                    for folder in all_folders:
                        mdp_file = folder + '/grompp.mdp'
                        gt.mdp.set_parameter(mdp_file, 'DispCorr', 'EnerPres')
                # set vdwtype
                vdwtype = 'User' if 'halftabulated' in system['tags'] else 'Cut-off'
                for folder in all_folders:
                    gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'vdwtype', vdwtype)
                gt.mdp.set_parameter("equi1/grompp.mdp", 'vdwtype', 'Cut-off')  # LJ for equi1
                # set energygrps(-table)
                pairs = tuple((pair for pair in system['force-field'].get('tabulated-potentials', [])
                                if pair[0] in system['atomtypes']
                                and pair[1] in system['atomtypes']))
                energygrps =  ' '.join(list(OrderedSet([pair[0] for pair in pairs]
                                                       +[pair[1] for pair in pairs])))
                energygrp_table =  '  '.join((f"{pair[0]} {pair[1]}" for pair in pairs))
                for folder in all_folders:
                    gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'energygrps', energygrps)
                    gt.mdp.set_parameter(f"{folder}/grompp.mdp", 'energygrp-table', energygrp_table)
                gt.mdp.set_parameter('equi1/grompp.mdp', 'energygrps', '')  # LJ for equi1
                gt.mdp.set_parameter('equi1/grompp.mdp', 'energygrp-table', '')
prepare_osmotic_pressure()

### elongate and fill box

In [None]:
def fill_osmp_boxes():
    # insert scale
    scale = 0.57

    def N_from_rho_M_V(density, molar_mass, volume):
        """
        density in g/mL
        molar_mass in g/mol
        volume in nm^3
        """
        # m = V * ρ
        mass = (volume / 1e21) * density # in g
        # n = m / M
        amount_of_substance = mass / molar_mass # in mol
        return amount_of_substance * const.N_A  # in entities

    for system in system_generator(*systems_osmp):
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)
            working_dir = os.path.join(system['name'], osmp_name, 'equi1')

            with WorkingDir(working_dir):
                box_edge, _, _ = gt.gro.get_box('npt-confout.gro')
                new_moltype = system['moltypes'][0]
                new_name = new_moltype['name']
                new_mass = gt.moltypes.get_mol_mass(system['moltypes'], 0, single_mol=True)
                new_volume = box_edge**3  # total minus center slab
                # density of water ~ 1 g/ml
                new_nmols = int(N_from_rho_M_V(1.0, new_mass, new_volume))
                print(new_volume, new_mass, new_nmols)
                n_atoms_wanted = gt.moltypes.get_natoms(system['moltypes']) + new_nmols * len(new_moltype['atoms'])
                new_box = (box_edge, box_edge, box_edge*2)

                # check for existing conf.gro
                try:
                    n_atoms_existing = gt.gro.get_natoms("conf.gro")
                    box_existing = gt.gro.get_box("conf.gro")
                except:
                    n_atoms_existing = 0
                    box_existing = [0, 0, 0]
                if n_atoms_existing == n_atoms_wanted and np.allclose(box_existing, new_box):
                        print('..conf.gro with correct number of atoms and box existing..')
                        continue

                # if not the first, copy
                if osmp_nr != 0:
                    run_bash(f"cp ../../{list(osmp_methods.keys())[0]}/equi1/conf.gro conf.gro")
                    continue

                # partially enlarge box
                run_bash(f"gmx editconf -f npt-confout.gro -box {new_box[0]} {new_box[1]} {new_box[2]-0.4} -o elongated.gro")
                # insert water molecules
                run_bash(f"gmx insert-molecules -f elongated.gro "
                         f"-ci single-{new_name}.gro -nmol {new_nmols} -o inserted.gro "
                         f"-try 300 -scale {scale}")
                # check if enoughp inserted
                n_atoms_inserted = gt.gro.get_natoms("inserted.gro")
                if n_atoms_inserted != n_atoms_wanted:
                    print(n_atoms_inserted, n_atoms_wanted)
                    raise Exception("not enough molecules inserted")
                # fully enlarge box
                run_bash(f"gmx editconf -f inserted.gro -box {new_box[0]} {new_box[1]} {new_box[2]} -o conf.gro")
                run_bash(f"rm -f elongated.gro inserted.gro")
                run_bash("rm -f \#*")
fill_osmp_boxes()

### create osmp_system

In [None]:
def create_osmp_systems():
    for system in system_generator(*systems_osmp):
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)
            working_dir = os.path.join(system['name'], osmp_name, 'equi1')

            with WorkingDir(working_dir):
                new_moltype = deepcopy(system['moltypes'][0])
                new_natoms_total = gt.gro.get_natoms("conf.gro")
                old_natoms_total = gt.gro.get_natoms("npt-confout.gro")
                new_additional_mols = (new_natoms_total - old_natoms_total) // len(system['moltypes'][0]['atoms'])
                new_moltype['nmols'] = new_additional_mols
                # osmotic pressure system
                osmp_system = deepcopy(system)
                osmp_system['name'] += '/' + osmp_name
                # moltypes with new mols
                osmp_system['moltypes'].append(new_moltype)
                # write to file
                with open("../osmp_system.pkl", 'wb') as f:
                    pickle.dump(osmp_system, f)
create_osmp_systems()

### prepare topol.top

In [None]:
def prepare_osmp_topol():
    for system in system_generator(*systems_osmp):
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)
            working_dir = os.path.join(system['name'], osmp_name)

            with WorkingDir(working_dir):
                with open("osmp_system.pkl", 'rb') as f:
                    osmp_system = pickle.load(f)

                box_edge, _, _ = gt.gro.get_box('equi1/npt-confout.gro')
                restr_r = box_edge / 2
                with open("osmp_system.pkl", 'rb') as f:
                    osmp_system = pickle.load(f)
                # topol.top
                save_parametric_force_field_as_top('topol/topol.top', system['force-field'], system['name'], osmp_system['moltypes'],
                                                  osm_restraints={
                                                      osmp_system['moltypes'][1]['name']: {'ai': 1, 'funct': 2, 'g': 5, 'r': restr_r, 'k': osmp_method['k']},
                                                      osmp_system['moltypes'][2]['name']: {'ai': 1, 'funct': 2, 'g': 5, 'r': restr_r, 'k': osmp_method['k']}
                                                  })
                if 'halftabulated' in system['tags']:
                    ff_no_tabulated = deepcopy(system['force-field'])
                    ff_no_tabulated['tabulated-potentials'] = []
                    save_parametric_force_field_as_top('equi1/topol.top', ff_no_tabulated, system['name'], osmp_system['moltypes'],
                                                      osm_restraints={
                                                          osmp_system['moltypes'][1]['name']: {'ai': 1, 'funct': 2, 'g': 5, 'r': restr_r, 'k': osmp_method['k']},
                                                          osmp_system['moltypes'][2]['name']: {'ai': 1, 'funct': 2, 'g': 5, 'r': restr_r, 'k': osmp_method['k']}
                                                      })
prepare_osmp_topol()

### prepare restraints.gro

In [None]:
def prepare_osmp_restraint_gro():
    for system in system_generator(*systems_osmp):
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)
            working_dir = os.path.join(system['name'], osmp_name)

            with WorkingDir(working_dir):
                with open("osmp_system.pkl", 'rb') as f:
                    osmp_system = pickle.load(f)

                box_edge, _, _ = gt.gro.get_box('equi1/npt-confout.gro')
                restr_center = np.round(box_edge, 3)  # precision of restraints.gro file
                with open("osmp_system.pkl", 'rb') as f:
                    osmp_system = pickle.load(f)
                # generate restraints.gro
                run_bash(f"cp equi1/conf.gro topol/restraint.gro")
                top = gt.top.Topology()
                top.load_simple_top(osmp_system['moltypes'])
                # load gro file
                top.load_gro_file_pos_vel("topol/restraint.gro")
                box = gt.gro.get_box("topol/restraint.gro")
                # modify coordinates
                for atom in top.moltypes()[0].atoms() + top.moltypes()[-1].atoms():
                    atom.pos = np.zeros(3)
                for atom in top.moltypes()[1].atoms() + top.moltypes()[2].atoms():
                    atom.pos = np.array([0.0, 0.0, restr_center])
                # save gro file
                top.save_gro_file("topol/restraint.gro", box)
prepare_osmp_restraint_gro()

### prepare index.ndx

In [None]:
def prepare_osmp_index():
    for system in system_generator(*systems_osmp):
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)
            working_dir = os.path.join(system['name'], osmp_name)

            with WorkingDir(working_dir):
                # generate simple index file
                if 'halftabulated' in system['tags']:
                    with open("osmp_system.pkl", 'rb') as f:
                        osmp_system = pickle.load(f)
                    top = gt.top.Topology()
                    top.load_simple_top(osmp_system['moltypes'])
                    gt.top.generate_index_file(top, 'index.ndx')
prepare_osmp_index()

## osmotic pressure pre-run

### md, parallelized

In [None]:
def osmp_pre_run():
    remote_dir_base, remote_header, remote_footer = gen_remote_stuff(remote_host, 'enzo')
    for system in system_generator(*systems_osmp):
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)

            working_dir = os.path.join(system['name'], osmp_name)
            remote_dir = os.path.join(remote_dir_base, system['name'], osmp_name)

            with WorkingDir(working_dir):
                # test local files
                pre_done = os.path.isfile('pre/done')
                if all((pre_done, )):
                    print('..results present locally..')
                    continue

                # mkdir
                run_bash(f"ssh {remote_host} mkdir -p {remote_dir}")

                # copy simulation files to remote
                if 'halftabulated' in system['tags']:
                    filelist = ["equi1/conf.gro */grompp.mdp topol equi1/topol.top */table* index.ndx"]
                else:
                    filelist = ["equi1/conf.gro */grompp.mdp topol"]
                gt.remote.push_files(filelist, remote_host, remote_dir, exclude="traj*")

                index_string = ""
                equi1_topol = "../topol/topol.top"
                if 'halftabulated' in system['tags']:
                    index_string = "-n ../index.ndx"
                    equi1_topol = "topol.top"

                # commands to be run on compute nodes
                script = remote_header + rf"""
pushd equi1
    if [[ ! -f confout.gro ]]; then
        gmx grompp -p {equi1_topol} -r ../topol/restraint.gro
        gmx mdrun
    fi
    rm -f \#*
popd

pushd equi2
    if [[ ! -f confout.gro ]]; then
        gmx grompp {index_string} -maxwarn 1 -p ../topol/topol.top -r ../topol/restraint.gro -c ../equi1/confout.gro
        gmx mdrun
    fi
    rm -f \#*
popd

pushd equi3
    if [[ ! -f confout.gro ]]; then
        gmx grompp {index_string} -p ../topol/topol.top -r ../topol/restraint.gro -maxwarn 2 -c ../equi2/confout.gro
        gmx mdrun
    fi
    rm -f \#*
popd


pushd pre
    if [[ ! -f confout.gro ]]; then
        gmx grompp {index_string} -p ../topol/topol.top -r ../topol/restraint.gro -maxwarn 2 -c ../equi3/confout.gro
        gmx mdrun
    fi
    rm -f \#*
popd

touch pre/done
""" + remote_footer

            jobid = gt.remote.run_slurm_script(script, remote_host, remote_dir, dry_run=False)
            print(jobid)
            if jobid != None:
                jobids.append(jobid)
osmp_pre_run()

### check job status

In [None]:
jobids = check_job_stati(jobids, remote_host)

### calc Pi, one core

In [None]:
!mkdir -p scripts
with open('scripts/calcPi.py', 'w') as f:
    f.write(r"""#!/usr/bin/env python3
    
import gromacstools as gt
import numpy as np
import subprocess
from scipy import constants as const

# own constants
class oconst: pass
oconst.bar_per_md_pressure = 10**28 * const.u

n_blocks = 5

gt.general.run_bash("gmx energy -f ener.edr -o box-xy.xvg <<< 'Box-x\nBox-y'")
data, _ = gt.xvg.load('box-xy.xvg')
gt.general.run_bash("rm box-xy.xvg")
data['A'] = data['Box-X'] * data['Box-Y']
A = data['A'].mean()

data_low, header_low = gt.xvg.load("wallf-low.xvg")
data_high, header_high = gt.xvg.load("wallf-high.xvg")
f_low_raw = np.abs(data_low['y'].iloc[:-1])
f_high_raw = np.abs(data_high['y'].iloc[:-1])
len_block_low = len(f_low_raw) // n_blocks
len_block_high = len(f_high_raw) // n_blocks
f_low_blocks = []
f_high_blocks = []
for i in range(n_blocks):
    f_low_blocks.append(np.mean(f_low_raw[len_block_low*i:len_block_low*(i+1)]))
    f_high_blocks.append(np.mean(f_high_raw[len_block_high*i:len_block_high*(i+1)]))
f_low = np.mean(f_low_blocks)
f_high = np.mean(f_high_blocks)
f_low_err = np.std(f_low_blocks)
f_high_err = np.std(f_high_blocks)
Pi_low = f_low / A * oconst.bar_per_md_pressure
Pi_high = f_high / A * oconst.bar_per_md_pressure
Pi_low_err = f_low_err / A * oconst.bar_per_md_pressure
Pi_high_err = f_high_err / A * oconst.bar_per_md_pressure
np.savez_compressed('Pi.npz', Pi_low=Pi_low, Pi_low_err=Pi_low_err, Pi_high=Pi_high, Pi_high_err=Pi_high_err)
""")
!chmod a+x scripts/calcPi.py
gt.remote.push_files(['scripts/calcPi.py'], remote_host, remote_dir_base)

In [None]:
!mkdir -p scripts
with open('scripts/calcPressure.py', 'w') as f:
    f.write(r"""#!/usr/bin/env python3
    
import gromacstools as gt
import numpy as np
import subprocess
from scipy import constants as const


gt.general.run_bash("gmx energy -f ener.edr -o pressure.xvg <<< 'Pres-XX\nPres-YY'")
data, _ = gt.xvg.load('pressure.xvg')
gt.general.run_bash("rm pressure.xvg")
p_tot = 1/2 * (data['Pres-XX'].mean() + data['Pres-YY'].mean())
np.savez_compressed('p_tot.npz', p_tot=p_tot)
""")
!chmod a+x scripts/calcPressure.py
gt.remote.push_files(['scripts/calcPressure.py'], remote_host, remote_dir_base)

In [None]:
!mkdir -p scripts
with open('scripts/calcX.py', 'w') as f:
    f.write(r"""#!/usr/bin/env python3
    
import gromacstools as gt
import numpy as np
import subprocess
from scipy import constants as const

# own constants
class oconst: pass
oconst.bar_per_md_pressure = 10**28 * const.u

gt.general.run_bash("gmx energy -f ener.edr -o box-xy.xvg <<< 'Box-x\nBox-y'")
data, _ = gt.xvg.load('box-xy.xvg')
gt.general.run_bash("rm box-xy.xvg")
data['A'] = data['Box-X'] * data['Box-Y']
A = data['A'].mean()
del data

_, _, box_edge = gt.gro.get_box('confout.gro')
restr_r = box_edge / 4
restr_center = np.round(box_edge / 2, 3)
wall_low = restr_center - restr_r
wall_high = restr_center + restr_r
inner_wall_low = restr_center - 0.5*restr_r
inner_wall_high = restr_center + 0.5*restr_r
volume_full = A * (wall_high - wall_low)
volume_inner = A * (inner_wall_high - inner_wall_low)

x = {}
x_err = {}
c = {}
c_err = {}
for region in ('full', 'inner'):
    filename = {'full': "size.xvg", 'inner': "size-inner.xvg"}[region]
    volume = {'full': volume_full, 'inner': volume_inner}[region]
    data, header = gt.xvg.load(filename)
    if data.columns[1].startswith('atomname CA'):
        data.columns = ['t', 'CA', 'CL', 'SOL']
        data['x'] = 1/2 * (data['CA'] + 1/2*data['CL']) / (1/2 * (data['CA'] + 1/2*data['CL']) + data['SOL'])
        data['c'] = 1/2 * (data['CA'] + 1/2*data['CL']) / volume
    elif data.columns[1].startswith(tuple((f'atomname {ion}' for ion in ['NA', 'K', 'LI']))):
        data.columns = ['t', 'CATION', 'CL', 'SOL']
        data['x'] = 1/2 * (data['CATION'] + data['CL']) / (1/2 * (data['CATION'] + data['CL']) + data['SOL'])
        data['c'] = 1/2 * (data['CATION'] + data['CL']) / volume
    else:
        raise Exception('Unknown system!')
    x[region] = data['x'].mean()
    x_err[region] = data['x'].std()
    c[region] = data['c'].mean()
    c[region] *= 1 / const.N_A / 1e-24  # now mol/L
    c_err[region] = data['c'].std()
    c_err[region] *= 1 / const.N_A / 1e-24  # now mol/L
    
np.savez_compressed('x.npz',
x=x['full'],
x_err=x_err['full'],
x_inner=x['inner'],
x_inner_err=x_err['inner'],
c=c['full'],
c_err=c_err['full'],
c_inner=c['inner'],
c_inner_err=c_err['inner'],
)
""")
!chmod a+x scripts/calcX.py
gt.remote.push_files(['scripts/calcX.py'], remote_host, remote_dir_base)

In [None]:
def osmp_pre_Pi():
    remote_dir_base, remote_header, remote_footer = gen_remote_stuff(remote_host, 'enzo')
    for system in system_generator(*systems_osmp):
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)

            working_dir = os.path.join(system['name'], osmp_name)
            remote_dir = os.path.join(remote_dir_base, system['name'], osmp_name)

            with WorkingDir(working_dir):
                # test local files
                Pi_done = os.path.isfile('pre/Pi.npz')
                p_tot_done = os.path.isfile('pre/p_tot.npz')
                x_done = os.path.isfile('pre/x.npz')
                if all((Pi_done, p_tot_done, x_done)):
                    print('..results present locally..')
                    continue

                # wallforce parameters
                box_edge, _, _ = gt.gro.get_box('equi1/npt-confout.gro')
                restr_r = box_edge / 2
                restr_center = np.round(box_edge, 3)  # precision of restraints.gro file
                wall_low = restr_center - restr_r
                wall_high = restr_center + restr_r
                restr_k = osmp_method['k']
                inner_wall_low = restr_center - 0.5*restr_r
                inner_wall_high = restr_center + 0.5*restr_r
                cation = system['moltypes'][1]['name']
                anion = system['moltypes'][2]['name']

                # commands to be run on compute nodes
                script = remote_header + rf"""
pushd pre
    if [[ ! -f wallf-low.xvg ]]; then
        $HOME/bin/wallforce -quiet -f traj_comp.xtc -s topol.tpr -axis zn -wallr {wall_low} -wallk {restr_k} -o wallf-low.xvg <<< "name {cation} or name {anion}"
    fi
    if [[ ! -f wallf-high.xvg ]]; then
        $HOME/bin/wallforce -quiet -f traj_comp.xtc -s topol.tpr -axis z -wallr {wall_high} -wallk {restr_k} -o wallf-high.xvg <<< "name {cation} or name {anion}"
    fi
    rm -f \#*
popd

pushd pre
    if [[ ! -f size.xvg ]]; then
        echo -e 'atomname {cation} and z > {wall_low} and z < {wall_high}\n'\
                'atomname {anion} and z > {wall_low} and z < {wall_high}\n'\
                'res_com of resname SOL and z > {wall_low} and z < {wall_high}'\
         | gmx select -f traj_comp.xtc -s topol.tpr -os size.xvg
    fi
    rm -f \#*
popd

pushd pre
    if [[ ! -f size-inner.xvg ]]; then
        echo -e 'atomname {cation} and z > {inner_wall_low} and z < {inner_wall_high}\n'\
                'atomname {anion} and z > {inner_wall_low} and z < {inner_wall_high}\n'\
                'res_com of resname SOL and z > {inner_wall_low} and z < {inner_wall_high}'\
          | gmx select -f traj_comp.xtc -s topol.tpr -os size-inner.xvg
    fi
    rm -f \#*
popd

pushd pre
    rm -f traj_comp.xtc
popd

pushd equi3
    if [[ ! -f ener-coarse.edr ]]; then
        gmx eneconv -f ener.edr -o ener-coarse.edr -dt 20
    fi
popd

pushd pre
    if [[ ! -f Pi.npz ]]; then
        ../../../../scripts/calcPi.py
    fi
popd

pushd pre
    if [[ ! -f p_tot.npz ]]; then
        ../../../../scripts/calcPressure.py
    fi
popd

pushd pre
    if [[ ! -f x.npz ]]; then
        ../../../../scripts/calcX.py
    fi
popd
""" + remote_footer

            jobid = gt.remote.run_slurm_script(script, remote_host, remote_dir, dry_run=False)
            print(jobid)
            if jobid != None:
                jobids.append(jobid)
osmp_pre_Pi()

### check job status

In [None]:
jobids = check_job_stati(jobids, remote_host)

### copy results from cluster

In [None]:
def copy_from_cluster():
    remote_dir_base, remote_header, remote_footer = gen_remote_stuff(remote_host, 'enzo')
    for system in system_generator(*systems_osmp):
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)
            working_dir = os.path.join(system['name'], osmp_name)
            remote_dir = os.path.join(remote_dir_base, system['name'], osmp_name)

            with WorkingDir(working_dir):
                exclude = "traj*"
                filelist = ["*/ener-coarse.edr", "pre/Pi.npz", "pre/p_tot.npz", "pre/x.npz", "pre/done"]
                try:
                    gt.remote.pull_files(filelist, remote_host, remote_dir, exclude=exclude)
                except:
                    print('.. pulling failed ..')
            
copy_from_cluster()

### equilibration check

In [None]:
def equi_check():
    for system in system_generator(*systems_osmp):
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)
            working_dir = os.path.join(system['name'], osmp_name)

            with WorkingDir(working_dir):
                try:
                    check_equi(["Volume"], edr_file="equi3/ener-coarse.edr", safe_factor=3.0)
                except:
                    print('..no data..')

equi_check()

### Pi in data frame

In [None]:
# f: force
# A: area
# Pi: osmotic pressure
# high, low: respective walls
    
def load_Pi_pre():
    n_blocks = 5

    for system in system_generator(*systems_osmp):
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)
            working_dir = os.path.join(system['name'], osmp_name)

            with WorkingDir(working_dir):
                data = np.load('pre/Pi.npz')
                df_osmp.at[(system['name'], osmp_name, 'pre'), 'Pi_low'] = data['Pi_low']
                df_osmp.at[(system['name'], osmp_name, 'pre'), 'Pi_high'] = data['Pi_high']
                df_osmp.at[(system['name'], osmp_name, 'pre'), 'Pi_low_err'] = data['Pi_low_err']
                df_osmp.at[(system['name'], osmp_name, 'pre'), 'Pi_high_err'] = data['Pi_high_err']
                df_osmp.at[(system['name'], osmp_name, 'pre'), 'Pi'] = 1/2 * (data['Pi_low'] + data['Pi_high'])
                df_osmp.at[(system['name'], osmp_name, 'pre'), 'Pi_err'] = 1/2 * np.sqrt(data['Pi_low_err']**2 + data['Pi_high_err']**2)

load_Pi_pre()

In [None]:
df_osmp.loc[(slice(None), slice(None), 'pre'), :]

### get p_tot (Pres-XX, Pres-YY)

In [None]:
def get_p_tot():
    for system in system_generator(*systems_osmp):
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)
            working_dir = os.path.join(system['name'], osmp_name)

            with WorkingDir(working_dir):
                data = np.load('pre/p_tot.npz')
                df_osmp.at[(system['name'], osmp_name, 'pre'), 'p_tot'] = data['p_tot']
get_p_tot()

In [None]:
df_osmp.tail()

### calc p_in and p_ex

In [None]:
def calc_p_in_ex():
    for system in system_generator(*systems_osmp):
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)
            working_dir = os.path.join(system['name'], osmp_name)

            with WorkingDir(working_dir):
                p_tot = df_osmp.at[(system['name'], osmp_name, 'pre'), 'p_tot']
                Pi_low = df_osmp.at[(system['name'], osmp_name, 'pre'), 'Pi_low']
                Pi_high = df_osmp.at[(system['name'], osmp_name, 'pre'), 'Pi_high'] 
                Pi = (Pi_low + Pi_high) / 2
                # p_tot = (p_in + p_ex) / 2
                # Pi = p_in - p_ex
                p_ex = p_tot - Pi / 2
                p_in = p_ex + Pi
                df_osmp.at[(system['name'], osmp_name, 'pre'), 'p_ex'] = p_ex
                df_osmp.at[(system['name'], osmp_name, 'pre'), 'p_in'] = p_in
                
calc_p_in_ex()

### actual mole fraction

In [None]:
def calc_x():
    for system in system_generator(*systems_osmp):
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)
            working_dir = os.path.join(system['name'], osmp_name)

            with WorkingDir(working_dir):
                data = np.load('pre/x.npz')
                df_osmp.at[(system['name'], osmp_name, 'pre'), 'x'] = data['x']
                df_osmp.at[(system['name'], osmp_name, 'pre'), 'x_err'] = data['x_err']
                df_osmp.at[(system['name'], osmp_name, 'pre'), 'x_inner'] = data['x_inner']
                df_osmp.at[(system['name'], osmp_name, 'pre'), 'x_inner_err'] = data['x_inner_err']
                
calc_x()

### plot

In [None]:
def plot_Pi_pre():
    cmap = plt.get_cmap('rainbow')

    # Literature data: CRC handbook of chem and phys (25°C)
    lit_molality = np.arange(0.1, 1.1, 0.1)  # mol / kg (?)
    lit_activity_coeff = np.array([0.778, 0.735, 0.710, 0.693, 0.681, 0.673, 0.667, 0.662, 0.659, 0.657])
    M0 = 18.0154  # g / mol
    lit_mole_fraction = 1 / (1 + 1/(M0 / 1000*lit_molality))
    V_M = 18.0685  # cm^3 (of water 25°)
    lit_osmotic_pressure = - const.R * 300 / V_M * np.log(lit_activity_coeff * lit_molality)
    lit_x = lit_mole_fraction
    lit_y = lit_osmotic_pressure


    for stn, st in system_types_osmp.items():
        print(stn)
        fig, ax = plt.subplots(figsize=(6, 3), constrained_layout=True)
        for f, (ffn, ff) in enumerate(force_fields_osmp.items()):
            print(ffn)
            system_names = tuple((sys['name'] for sys in system_generator(*systems_osmp)
                                  if ffn == sys['force-field']['name']
                                  if stn == sys['type']['name']))
            for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
                #print('  osmp_name', osmp_name)

                rows = (system_names, osmp_name, 'pre')
                x = df_osmp.loc[(rows, 'x_inner')]
                xerr = df_osmp.loc[(rows, 'x_inner_err')]
                y = df_osmp.loc[(rows, 'Pi')]
                yerr = df_osmp.loc[(rows, 'Pi_err')]
                ax.errorbar(x=x, y=y, xerr=xerr, yerr=yerr,
                            color=cmap(f/6), label=ff_short_names[ffn], linestyle=':')

        ax.legend()
        ax.set_xlim(0)
        ax.set_ylim(0)
        ax.set_title(sys_type_short_names[stn])
        plt.show()

plot_Pi_pre()

## production run

### set production pressure

\begin{align}
p_{tot} &= p_{in} + p_{ex}\\
\Pi &= p_{in} - p_{ex}\\
p_{tot} &= 2 * p_{ex} + \Pi\\
p_{ex} &= 1 bar\\
%p_{ex} &= 1/2 * (p_{tot} - \Pi)\\
\end{align}
            

\begin{align}
p_{tot} &= (p_{in} + p_{ex}) / 2 \\
\Pi &= p_{in} - p_{ex}\\
p_{tot} &= p_{ex} + \Pi / 2\\
p_{ex} &= 1 bar\\
%p_{ex} &= 1/2 * (p_{tot} - \Pi)\\
\end{align}
            

In [None]:
def set_p():
    for system in system_generator(*systems_osmp):
        print("system", system['name'])

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)

            with WorkingDir(system['name'] + "/" + osmp_name):
                row = (system['name'], osmp_name, 'pre')
                p_ex = 1.0
                Pi = 1/2 * (df_osmp.at[row, 'Pi_low'] + df_osmp.at[row, 'Pi_high'])
                p_tot = p_ex + Pi/2
                print(p_tot)
                print(2 * p_ex + Pi)
                if osmp_method['scale'] == 'xy':
                    gt.mdp.set_parameter("equi4/grompp.mdp", 'ref-p', str(p_tot) + " 0.0")
                    gt.mdp.set_parameter("prod/grompp.mdp",  'ref-p', str(p_tot) + " 0.0")
                else:
                    raise Exception('not implemented')
                    
set_p()

### md, parallelized

In [None]:
def run_osmp_md_prod():
    remote_dir_base, remote_header, remote_footer = gen_remote_stuff(remote_host, 'enzo')
    for system in system_generator(*systems_osmp):
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)

            working_dir = os.path.join(system['name'], osmp_name)
            remote_dir = os.path.join(remote_dir_base, system['name'], osmp_name)

            with WorkingDir(working_dir):
                # test local files
                prod_done = os.path.isfile('prod/done')
                if all((prod_done, )):
                    print('..results present locally..')
                    continue

                # wallforce parameters
                box_edge, _, _ = gt.gro.get_box('equi1/npt-confout.gro')
                restr_r = box_edge / 2
                restr_center = np.round(box_edge, 3)  # precision of restraints.gro file
                wall_low = restr_center - restr_r
                wall_high = restr_center + restr_r
                restr_k = osmp_method['k']
                inner_wall_low = restr_center - 0.5*restr_r
                inner_wall_high = restr_center + 0.5*restr_r
                cation = system['moltypes'][1]['name']
                anion = system['moltypes'][2]['name']

                # mkdir
                run_bash(f"ssh {remote_host} mkdir -p {remote_dir}")

                # copy simulation files to remote
                if 'tabulated' in system['tags']:
                    filelist = ["{equi4,prod}/grompp.mdp topol {equi4,prod}/table* index.ndx"]
                else:
                    filelist = ["{equi4,prod}/grompp.mdp topol"]
                gt.remote.push_files(filelist, remote_host, remote_dir, exclude="traj*")

                index_string = ""
                if 'halftabulated' in system['tags']:
                    index_string = "-n ../index.ndx"

                # commands to be run on compute nodes
                script = remote_header + rf"""
    pushd equi4
        if [[ ! -f confout.gro ]]; then
            gmx grompp {index_string} -p ../topol/topol.top -r ../topol/restraint.gro -maxwarn 2 -c ../pre/confout.gro
            gmx mdrun
        fi
        rm -f \#*
    popd


    pushd prod
        if [[ ( ! -f confout.gro ) && -f state.cpt ]]; then
            gmx mdrun -cpi state
        elif [[ ( ! -f confout.gro ) && ( ! -f state.cpt) ]]; then
            gmx grompp {index_string} -p ../topol/topol.top -r ../topol/restraint.gro -maxwarn 2 -c ../equi4/confout.gro
            gmx mdrun
        elif [[ -f confout.gro ]]; then
            echo "md done"
        else
            echo "Weird state. This should never happen."
            exit 1
        fi
        rm -f \#*
    popd
    
    touch prod/done
    """ + remote_footer

            jobid = gt.remote.run_slurm_script(script, remote_host, remote_dir, dry_run=False)
            print(jobid)
            if jobid != None:
                jobids.append(jobid)
                
run_osmp_md_prod()

### check job status

In [None]:
jobids = check_job_stati(jobids, remote_host)

### calc Pi, one core

In [None]:
def osmp_prod_run_Pi():
    remote_dir_base, remote_header, remote_footer = gen_remote_stuff(remote_host, 'enzo')
    for system in system_generator(*systems_osmp):
        if system['name'] != 'water5000-cacl2_200/madrid-co1.0tc':
            continue
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)

            working_dir = os.path.join(system['name'], osmp_name)
            remote_dir = os.path.join(remote_dir_base, system['name'], osmp_name)

            with WorkingDir(working_dir):
                # test local files
                Pi_done = os.path.isfile('prod/Pi.npz')
                p_tot_done = os.path.isfile('prod/p_tot.npz')
                x_done = os.path.isfile('prod/x.npz')
                if all((Pi_done, p_tot_done, x_done)):
                    print('..results present locally..')
                    #continue

                # wallforce parameters
                box_edge, _, _ = gt.gro.get_box('equi1/npt-confout.gro')
                restr_r = box_edge / 2
                restr_center = np.round(box_edge, 3)  # precision of restraints.gro file
                wall_low = restr_center - restr_r
                wall_high = restr_center + restr_r
                restr_k = osmp_method['k']
                inner_wall_low = restr_center - 0.5*restr_r
                inner_wall_high = restr_center + 0.5*restr_r
                cation = system['moltypes'][1]['name']
                anion = system['moltypes'][2]['name']

                # commands to be run on compute nodes
                script = remote_header + rf"""
source /home/mbernhardt/software/miniconda3/etc/profile.d/conda.sh
conda activate base
export PYTHONPATH=$PYTHONPATH:/home/mbernhardt/software/gromacstools

pushd prod
    if [[ ! -f wallf-low.xvg ]]; then
        $HOME/bin/wallforce -quiet -f traj_comp.xtc -s topol.tpr -axis zn -wallr {wall_low} -wallk {restr_k} -o wallf-low.xvg <<< "name {cation} or name {anion}"
    fi
    if [[ ! -f wallf-high.xvg ]]; then
        $HOME/bin/wallforce -quiet -f traj_comp.xtc -s topol.tpr -axis z -wallr {wall_high} -wallk {restr_k} -o wallf-high.xvg <<< "name {cation} or name {anion}"
    fi
    rm -f \#*
popd

pushd prod
    if [[ ! -f size.xvg ]]; then
        echo -e 'atomname {cation} and z > {wall_low} and z < {wall_high}\n'\
                'atomname {anion} and z > {wall_low} and z < {wall_high}\n'\
                'res_com of resname SOL and z > {wall_low} and z < {wall_high}'\
         | gmx select -f traj_comp.xtc -s topol.tpr -os size.xvg
    fi
    rm -f \#*
popd

pushd prod
    if [[ ! -f size-inner.xvg ]]; then
        echo -e 'atomname {cation} and z > {inner_wall_low} and z < {inner_wall_high}\n'\
                'atomname {anion} and z > {inner_wall_low} and z < {inner_wall_high}\n'\
                'res_com of resname SOL and z > {inner_wall_low} and z < {inner_wall_high}'\
          | gmx select -f traj_comp.xtc -s topol.tpr -os size-inner.xvg
    fi
    rm -f \#*
popd

pushd prod
    rm -f traj_comp.xtc
popd

pushd equi4
    if [[ ! -f ener-coarse.edr ]]; then
        gmx eneconv -f ener.edr -o ener-coarse.edr -dt 20
    fi
popd

pushd prod
    if [[ ! -f Pi.npz ]]; then
        ../../../../scripts/calcPi.py
    fi
popd

pushd prod
    if [[ ! -f p_tot.npz ]]; then
        ../../../../scripts/calcPressure.py
    fi
popd

pushd prod
    if [[ ! -f x.npz ]]; then
        ../../../../scripts/calcX.py
    fi
popd
""" + remote_footer

            jobid = gt.remote.run_slurm_script(script, remote_host, remote_dir, dry_run=True)
            print(jobid)
            if jobid != None:
                jobids.append(jobid)
osmp_prod_run_Pi()

### check job status

In [None]:
jobids = check_job_stati(jobids, remote_host)

### copy results from cluster

In [None]:
def copy_from_cluster_prod():
    remote_dir_base, remote_header, remote_footer = gen_remote_stuff(remote_host, 'enzo')
    for system in system_generator(*systems_osmp):
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)
            working_dir = os.path.join(system['name'], osmp_name)
            remote_dir = os.path.join(remote_dir_base, system['name'], osmp_name)

            with WorkingDir(working_dir):
                exclude = "traj*"
                filelist = ["equi4/ener-coarse.edr", "prod/Pi.npz", "prod/p_tot.npz", "prod/x.npz", "prod/done"]
                gt.remote.pull_files(filelist, remote_host, remote_dir, exclude=exclude)

copy_from_cluster_prod()

### equilibration check

In [None]:
def equi_check():
    for system in system_generator(*systems_osmp):
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)
            working_dir = os.path.join(system['name'], osmp_name)

            with WorkingDir(working_dir):
                try:
                    check_equi(["Volume"], edr_file="equi4/ener-coarse.edr", safe_factor=3.0)
                except:
                    pass
    
equi_check()

In [None]:
def show_en():
    for system in system_generator(*systems_osmp):
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)
            working_dir = os.path.join(system['name'], osmp_name)

            with WorkingDir(working_dir):
                print("equilibration 4")
                show_energy_graphs(["Temperature", "Volume"], edr_file="equi4/ener-coarse.edr")
                #print("production")
                #show_energy_graphs(["Temperature", "Volume"], edr_file="pre/ener.edr")
        
show_en()

### Pi in data frame

In [None]:
# f: force
# A: area
# Pi: osmotic pressure
# high, low: respective walls

def load_Pi():    
    n_blocks = 5

    for system in system_generator(*systems_osmp):
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)
            working_dir = os.path.join(system['name'], osmp_name)

            with WorkingDir(working_dir):
                try:
                    data = np.load('prod/Pi.npz')
                    df_osmp.at[(system['name'], osmp_name, 'prod'), 'Pi_low'] = data['Pi_low']
                    df_osmp.at[(system['name'], osmp_name, 'prod'), 'Pi_high'] = data['Pi_high']
                    df_osmp.at[(system['name'], osmp_name, 'prod'), 'Pi_low_err'] = data['Pi_low_err']
                    df_osmp.at[(system['name'], osmp_name, 'prod'), 'Pi_high_err'] = data['Pi_high_err']
                    df_osmp.at[(system['name'], osmp_name, 'prod'), 'Pi'] = np.nanmean((data['Pi_low'], data['Pi_high']))
                    df_osmp.at[(system['name'], osmp_name, 'prod'), 'Pi_err'] = 1/2 * np.sqrt(np.nansum((data['Pi_low_err']**2, data['Pi_high_err']**2)))
                except FileNotFoundError:
                    print(".. no data ..")
                    df_osmp.loc[(system['name'], osmp_name, 'prod'), ('Pi_low', 'Pi_high', 'Pi_low_err', 'Pi_high_err', 'Pi', 'Pi_err')] = np.nan
load_Pi()

### get p_tot (Pres-XX, Pres-YY)

In [None]:
def get_p_tot():
    for system in system_generator(*systems_osmp):
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)
            working_dir = os.path.join(system['name'], osmp_name)

            with WorkingDir(working_dir):
                try:
                    p_tot = np.load('prod/p_tot.npz')['p_tot']
                except FileNotFoundError:
                    print(".. no data ..")
                    p_tot = np.nan
                df_osmp.at[(system['name'], osmp_name, 'prod'), 'p_tot'] = p_tot

get_p_tot()

### calc p_in and p_ex

In [None]:
def calc_p_in_ex():
    for system in system_generator(*systems_osmp):
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)
            working_dir = os.path.join(system['name'], osmp_name)

            with WorkingDir(working_dir):
                p_tot = df_osmp.at[(system['name'], osmp_name, 'prod'), 'p_tot']
                Pi_low = df_osmp.at[(system['name'], osmp_name, 'prod'), 'Pi_low']
                Pi_high = df_osmp.at[(system['name'], osmp_name, 'prod'), 'Pi_high'] 
                #Pi = (Pi_low + Pi_high) / 2
                Pi = np.nanmean([Pi_low, Pi_high])
                # p_tot = (p_in + p_ex) / 2
                # Pi = p_in - p_ex
                p_ex = 1/2 * (p_tot - Pi)
                p_in = p_ex + Pi
                df_osmp.at[(system['name'], osmp_name, 'prod'), 'p_ex'] = p_ex
                df_osmp.at[(system['name'], osmp_name, 'prod'), 'p_in'] = p_in

calc_p_in_ex()

In [None]:
df_osmp.tail()

In [None]:
df_osmp.loc['water5000-cacl2_200/madrid-co1.0tc', 'osmp-xy-k4000', 'prod']

### actual mole fraction and concentration

In [None]:
def calc_x():
    for system in system_generator(*systems_osmp):
        print(f"system {system['name']}")

        for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
            print('  osmp_name', osmp_name)
            working_dir = os.path.join(system['name'], osmp_name)

            with WorkingDir(working_dir):
                try:
                    data = np.load('prod/x.npz')
                except FileNotFoundError:
                    print('.. no data ..')
                    df_osmp.loc[(system['name'], osmp_name, 'prod'), ('x', 'x_err', 'x_inner', 'x_inner_err')] = data['x']
                df_osmp.at[(system['name'], osmp_name, 'prod'), 'x'] = data['x']
                df_osmp.at[(system['name'], osmp_name, 'prod'), 'x_err'] = data['x_err']
                df_osmp.at[(system['name'], osmp_name, 'prod'), 'x_inner'] = data['x_inner']
                df_osmp.at[(system['name'], osmp_name, 'prod'), 'x_inner_err'] = data['x_inner_err']
                df_osmp.at[(system['name'], osmp_name, 'prod'), 'c'] = data['c']
                df_osmp.at[(system['name'], osmp_name, 'prod'), 'c_err'] = data['c_err']
                df_osmp.at[(system['name'], osmp_name, 'prod'), 'c_inner'] = data['c_inner']
                df_osmp.at[(system['name'], osmp_name, 'prod'), 'c_inner_err'] = data['c_inner_err']

calc_x()

## save and load dataframe

In [None]:
df_osmp.to_pickle('df_osmp.pkl')

In [None]:
df_osmp = pd.read_pickle('df_osmp.pkl')

In [None]:
df_osmp.loc[(slice(None), slice(None), 'prod'), slice(None)]

In [None]:
df_osmp_prod = df_osmp.loc[(slice(None), slice(None), 'prod'), slice(None)]
df_osmp_prod[df_osmp_prod.isna().any(axis=1)]

## plot

### plot simple

In [None]:
def plot_Pi_prod(show_pre=False):
    cmap = plt.get_cmap('rainbow')

    # Literature data: CRC handbook of chem and phys (25°C)
    lit_molality = np.arange(0.1, 1.1, 0.1)  # mol / kg (?)
    lit_activity_coeff = np.array([0.778, 0.735, 0.710, 0.693, 0.681, 0.673, 0.667, 0.662, 0.659, 0.657])
    M0 = 18.0154  # g / mol
    lit_mole_fraction = 1 / (1 + 1/(M0 / 1000*lit_molality))
    V_M = 18.0685  # cm^3 (of water 25°)
    lit_osmotic_pressure = - const.R * 300 / V_M * np.log(lit_activity_coeff * lit_molality)
    lit_x = lit_mole_fraction
    lit_y = lit_osmotic_pressure


    for stn, st in system_types_osmp.items():
        print(stn)
        fig, ax = plt.subplots(figsize=(6, 3), constrained_layout=True, dpi=300)
        for f, (ffn, ff) in enumerate(force_fields_osmp.items()):
            print(ffn)
            system_names = tuple((sys['name'] for sys in system_generator(*systems_osmp)
                                  if ffn == sys['force-field']['name']
                                  if stn == sys['type']['name']))
            for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
                #print('  osmp_name', osmp_name)

                rows = (system_names, osmp_name, 'prod')
                x = df_osmp.loc[(rows, 'x_inner')]
                xerr = df_osmp.loc[(rows, 'x_inner_err')]
                y = df_osmp.loc[(rows, 'Pi')]
                yerr = df_osmp.loc[(rows, 'Pi_err')]
                ax.errorbar(x=x, y=y, xerr=xerr, yerr=yerr,
                            color=cmap(f/6), label=ff_short_names[ffn], linestyle='--')
                if show_pre:
                    rows = (system_names, osmp_name, 'pre')
                    x = df_osmp.loc[(rows, 'x_inner')]
                    xerr = df_osmp.loc[(rows, 'x_inner_err')]
                    y = df_osmp.loc[(rows, 'Pi')]
                    yerr = df_osmp.loc[(rows, 'Pi_err')]
                    ax.errorbar(x=x, y=y, xerr=xerr, yerr=yerr,
                                color=cmap(f/6), label=None, linestyle=':')

        ax.legend()
        ax.set_xlim(0)
        ax.set_ylim(0)
        ax.set_title(sys_type_short_names[stn])
        ax.grid()
        plt.show()

plot_Pi_prod(show_pre=True)

### plot

In [None]:
df_osmp.head()

In [None]:
def plot_coeff():
    ylim_dict = {
        'water-cacl2_': (0.0, 4.2),
        'water-kcl': (0.0, 1.5),
        'water-licl': (0.0, 1.95),
        'water-nacl': (0.0, 1.65),
    }

    mpl_rc = {
        'legend.labelspacing': 0.5,
        'legend.columnspacing': 1.5,
        'legend.handlelength': 3.0,
    }
    ff_to_show = {
      'opls-co0.9tc',
      'eccr1-co1.2',
      'netz-co0.9tc',
      'madrid-co1.0tc',
      #'netz-co0.9',
      'iff-altern5-eccr1-co1.2-nopc',
      'iff-altern5-netz-co0.9-nopc',
      'Buckingham-iff-altern5-eccr1-co1.2-nopc',
    }

    with plt.rc_context({**mpl_rc_global, **mpl_rc}):
        fig, axes = plt.subplots(figsize=(4.67, 2.5), nrows=2, ncols=2, constrained_layout=True, sharex='all', dpi=200)
        #fig, axes = plt.subplots(figsize=(10.67, 5.5), nrows=2, ncols=2, constrained_layout=True, sharex='all', dpi=200)
        fig.set_constrained_layout_pads(w_pad=0.05, h_pad=0.00, hspace=0.0, wspace=0.0)
        for s, (stn, st) in enumerate(system_types_osmp.items()):
            print(s, stn)
            ax = axes.flat[s]
            n_ions = sum(st['n_cation_anion'])
            for f, (ffn, ff) in enumerate(force_fields_osmp.items()):
                #print(f"  force field {ffn}")
                system_names = tuple((sys['name'] for sys in system_generator(*systems_osmp)
                                      if ffn == sys['force-field']['name']
                                      if sys['type']['name'] == stn))
                for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
                    #print('  osmp_name', osmp_name)

                    rows = (system_names, osmp_name, 'prod')
                    x = df_osmp.loc[(rows, 'c_inner')]
                    xerr = df_osmp.loc[(rows, 'c_inner_err')]
                    print(n_ions)
                    vant_Hoff_Pi = vant_Hoff_osmotic_pressure(x, n_ions, 300)
                    y = df_osmp.loc[(rows, 'Pi')] / vant_Hoff_Pi
                    yerr = df_osmp.loc[(rows, 'Pi_err')] / vant_Hoff_Pi
                    if len(x) > 0 and ffn in ff_to_show:
                        ax.errorbar(x=x, y=y, xerr=xerr, yerr=yerr, color=ff_colors[ffn],
                                    label=ff_short_names[ffn], linestyle=':', marker='.')
            #ax.grid()
            # lit data
            lit_x, lit_y = get_osmotic_coeff_lit(osmp_lit_dict, stn, 'Guendouzi 2001')
            ax.plot(lit_x, lit_y, marker='.', linestyle='-', color='k', label='exp.', zorder=0, markersize=4)
            
            vant_Hoff_x = np.linspace(0, 5, 10)
            #vant_Hoff_y = vant_Hoff_osmotic_pressure(vant_Hoff_x, n_inos, 300)
            vant_Hoff_y = np.ones_like(vant_Hoff_x)
            #ax.plot(vant_Hoff_x, vant_Hoff_y, marker='', linestyle='-', color='grey', label="van't Hoff")
            ax.axhline(1, marker='', linestyle='-', color='grey', linewidth=0.5)  # label="van't Hoff")
            ax.set_xlim(0, 5.5)
            ax.set_ylim(ylim_dict[stn])
            ax.text(.05, .83, sys_type_short_names[stn],
                    horizontalalignment='left', transform=ax.transAxes)

        axes[0, 0].set_ylabel(r"$\phi$")
        axes[1, 0].set_ylabel(r"$\phi$")
        axes[1, 0].set_xlabel(r"$c$ in mol/l")
        axes[1, 1].set_xlabel(r"$c$ in mol/l")

        handles, labels = ax.get_legend_handles_labels()
        #labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0], reverse=False))
        order = [1, 4, 2, 3, 5, 6, 0]
        handles, labels = [handles[idx] for idx in order], [labels[idx] for idx in order]
        fig.legend(handles, labels, ncol=4, loc='lower center', bbox_to_anchor=(0.54, 0.97),)
        fig.savefig('../figures/osmotic-coefficients.pdf', bbox_inches='tight')
        plt.show()
        
plot_coeff()

In [None]:
!cp -a ../figures/osmotic-coefficients.pdf ~/research/output/ion-shortrange-paper/figures/

### osmp RMSD

In [None]:
def rmsd_osmp():
    
    diff_rmsd_dict = collections.defaultdict(lambda: 0)

    for s, (stn, st) in enumerate(system_types_osmp.items()):
        print(s, stn)
        n_ions = sum(st['n_cation_anion'])
        
        # experimental from literature
        x_lit, y_lit = get_osmotic_coeff_lit(osmp_lit_dict, stn, 'Guendouzi 2001')

        for f, (ffn, ff) in enumerate(force_fields_osmp.items()):
            #print(ffn)
            system_names = tuple((sys['name'] for sys in system_generator(*systems_osmp)
                                  if ffn == sys['force-field']['name']
                                  if sys['type']['name'] == stn))
            osmp_name = 'osmp-xy-k4000'
            osmp_method = osmp_methods[osmp_name]
            rows = (system_names, osmp_name, 'prod')
            x = df_osmp.loc[(rows, 'c_inner')]
            xerr = df_osmp.loc[(rows, 'c_inner_err')]
            vant_Hoff_Pi = vant_Hoff_osmotic_pressure(x, n_ions, 300)
            y = df_osmp.loc[(rows, 'Pi')] / vant_Hoff_Pi
            # ignore first and last point, excluding 0 and up to 3 mol/l
            x = x[1:-1]
            y = y[1:-1]
            # msd
            msd = np.sum((y - np.interp(x, x_lit, y_lit))**2)
            diff_rmsd_dict[(ff['name'], 'msd')] += msd
            #diff_rmsd_dict[(ff['name'], 'counter')] += 1.0
            
    for ff_name, ff in force_fields.items():
        if 'dummy' in ff['tags']:
            continue
        df_rmsd.at[ff_name, 'osmotic'] = np.sqrt(diff_rmsd_dict[(ff_name, 'msd')])  # / diff_rmsd_dict[(ff_name, 'counter')]
    #df_rmsd.at['Buckingham-iff-altern5-eccr1-co1.2-nopc', 'osmotic'] = np.nan
    #df_rmsd.at['Buckingham-iff-altern5-netz-co0.9-nopc', 'osmotic'] = np.nan
        
rmsd_osmp()
df_rmsd

In [None]:
# water pressure outside slab vs. concentration
cmap = plt.get_cmap('rainbow')

fig, ax = plt.subplots(figsize=(8, 4))
for f, force_field in enumerate((ff for ff in force_fields if 'conc-range' in ff['tags'])):
    print(force_field['name'])
    system_names = [sys['name'] for sys in systems if force_field['name'] == sys['force-field']['name']]
    
    for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
        #print('  osmp_name', osmp_name)
        
        rows = (system_names, osmp_name, 'pre')
        x = df_osmp.loc[(rows, 'x_inner')]
        y = df_osmp.loc[(rows, 'p_ex')]
        if len(x) > 0:
            ax.plot(x, y, color=cmap((f+2)/4), label=force_field['name'] + ' pre', marker='x', linestyle=':')
        rows = (system_names, osmp_name, 'prod')
        x = df_osmp.loc[(rows, 'x_inner')]
        y = df_osmp.loc[(rows, 'p_ex')]
        if len(x) > 0:
            ax.plot(x, y, color=cmap(f/4), label=force_field['name'] + ' prod', marker='D', linestyle=':')
ax.legend()
ax.set_title('outside pressure')
ax.set_xlabel('x(MET)')
ax.set_ylabel('p_ex in bar')
#ax.set_xlim(0)
#ax.set_ylim(0)
#fig.savefig("figures/p_ex.png", dpi=150)
plt.show()

# DoS and 2PT

## systems_2pt

In [None]:
force_fields_2pt = {ffn: ff for ffn, ff in force_fields.items() if 'fit' not in ff['tags'] and 'dummy' not in ff['tags']}
system_types_2pt = system_types

systems_2pt = (system_types_2pt, force_fields_2pt)
def sys_gen_2pt():
    for system in (sys for sys in system_generator(*systems_2pt, verbose=False) if {'dos', 'conc-range'}.issubset(sys['tags'])):
        yield system
        
pd.DataFrame(sys_gen_2pt())

## dos processing functions

In [None]:
def create_nocomp_spectra(dos_json):
    # definitions
    dos_nocomp_dict = {
        'trn': ['trn_x', 'trn_y', 'trn_z'],
        'rot': ['rot_x', 'rot_y', 'rot_z'],
        'vib': ['vib_x', 'vib_y', 'vib_z'],
        'roto': ['roto_a', 'roto_b', 'roto_c']
    }

    # sum no-component spectra from component spectra
    for h, moltype in enumerate(dos_json['moltypes']):
        # loop dos_types to build
        for dos_type_nocomp, dos_types_comp in dos_nocomp_dict.items():
            dos_nocomp_data = None
            # loop dos_types to sum
            for dos_type_comp in dos_types_comp:
                dos_comp_data = np.array(moltype['spectra'][dos_type_comp])
                if dos_nocomp_data is None:
                    dos_nocomp_data = dos_comp_data.copy()
                else:
                    dos_nocomp_data += dos_comp_data
            moltype['spectra'][dos_type_nocomp] = dos_nocomp_data

In [None]:
def get_volume_from_npt(edr_filename):
    """returns the average volume from a gromacs .edr file"""
    with tempfile.NamedTemporaryFile(suffix='.xvg') as fp:
        run_bash(f"gmx energy -f {edr_filename} -o {fp.name} <<< 'volume'")
        data, _ = gt.xvg.load(fp.name)
    return np.mean(data['Volume'])

def get_volume_from_nvt(gro_filename):
    """returns the volume from a gromacs .gro file
    
    not working for .gro trajectories"""
    box = gt.gro.get_box(gro_filename)
    return np.prod(box)

def load_moments_of_inertia_into_moltypes(moltypes, dos_file, n_samples):
    """modifies a moltypes list, to contain moments of inertia from dos-calc"""
    with open(dos_file, 'r') as f:
        dos_json = json.load(f)
    for moltype_nr, moltype in enumerate(moltypes):
        # moi
        df = pd.DataFrame(index=range(3), columns=range(n_samples))
        df.index.name = 'axis'
        df.columns.name = 'sample'
        moltype['moments_of_inertia'] = df 
        moltype['moments_of_inertia'].iloc[:] = np.array(dos_json['moltypes'][moltype_nr]['moments_of_inertia']).T
        # moi_std
        df = pd.DataFrame(index=range(3), columns=range(n_samples))
        df.index.name = 'axis'
        df.columns.name = 'sample'
        moltype['moments_of_inertia_std'] = df 
        moltype['moments_of_inertia_std'].iloc[:] = np.array(dos_json['moltypes'][moltype_nr]['moments_of_inertia_std']).T

# seems overcomplicated
def load_doses_into_moltypes(moltypes, dos_file, n_samples, dos_names, components=False, cross=False):
    """modifies a moltypes list, to contain DoS spectra from dos-calc"""
    with open(dos_file, 'r') as f:
        dos_json = json.load(f)
    create_nocomp_spectra(dos_json)
    frequencies = dos_json['frequencies']
    
    # create data frame per moltype
    for moltype_nr, moltype in enumerate(moltypes):
        columns = pd.MultiIndex.from_arrays([['frequencies'], [0]], names=['dos', 'sample'])
        df = pd.DataFrame(columns=columns)
        df[('frequencies', 0)] = frequencies
        moltype['doses'] = df
    
    # create empty columns
    for sample in range(n_samples):
        for dos in dos_names:
            for h, moltype in enumerate(moltypes):
                if (dos['tags'] == [] 
                    or ('comp' in dos['tags'] and components)
                    or ('cross' in dos['tags'] and cross)):
                    moltype['doses'][(dos['name'], sample)] = None
    
    for moltype_nr, moltype in enumerate(moltypes):
        for dos in dos_names:
            if (dos['tags'] == [] 
                or ('comp' in dos['tags'] and components)
                or ('cross' in dos['tags'] and cross)):
                moltype['doses'].loc[:, (dos['name'], slice(None))] = np.array(dos_json['moltypes'][moltype_nr]['spectra'][dos['name']]).T

In [None]:
def show_dos_integrals(moltypes, dos_names, components=False, cross=False, temperature=None):
    for h, moltype in enumerate(moltypes):
        frequencies = moltype['doses']['frequencies'][0]
        prefactor = 1
        if temperature is not None:
            prefactor = 1 / (1/2 * oconst.k_gro * temperature)
        for dos in dos_names:
            if (dos['tags'] == [] 
                or ('comp' in dos['tags'] and components)
                or ('cross' in dos['tags'] and cross)):
                for sample in moltype['doses'][dos['name']].columns:
                    print(dos['name'], sample,
                          prefactor * np.trapz(moltype['doses'][dos['name']][sample], frequencies))

## load dos

In [None]:
def load_dos():
    moltypes_with_dos_dict = {}
    for system in sys_gen_2pt():
        print(f"system {system['name']}")

        moltypes = deepcopy(system['moltypes'])
        for moltype in moltypes:
            moltype['doses'] = None

        with WorkingDir(system['name']):
            try:
                load_doses_into_moltypes(moltypes, 'npt-prod-vel/dos/dos.json', param_dos['n_samples'], dos_names, components=True)
                load_moments_of_inertia_into_moltypes(moltypes, 'npt-prod-vel/dos/dos.json', param_dos['n_samples'])
            except:
                print(".. loading failed ..")

        moltypes_with_dos_dict[system['name']] = moltypes
    return moltypes_with_dos_dict

moltypes_with_dos_dict = load_dos()

## fit ion dos with three lorentz (cauchy)

In [None]:
# weird old version
#def lorentz(f, a, f0, γ):
    #w = 2 * np.pi * f
    #w0 = 2 * np.pi * f0
    #return a / ((w**2 - w0**2)**2 + γ**2 * w0**2)

def lorentz(f, a, f0, γ):
    w = 2 * np.pi * f
    w0 = 2 * np.pi * f0
    return a / np.pi * ( γ / ((w - w0)**2 + γ**2) )

def two_lorentz(f, a0, f00, γ0, a1, f01, γ1):
    return lorentz(f, a0, f00, γ0) + lorentz(f, a1, f01, γ1)

def three_lorentz(f, a0, f00, γ0, a1, f01, γ1, a2, f02, γ2):
    return lorentz(f, a0, f00, γ0) + lorentz(f, a1, f01, γ1) + lorentz(f, a2, f02, γ2)

fit_func = {
    'cation': {
        'water-cacl2_':  three_lorentz,
        'water-kcl':  two_lorentz,
        'water-licl':  three_lorentz,
        'water-nacl':  two_lorentz,
    },
    'anion': collections.defaultdict(lambda: two_lorentz)
}

p0 = {
    'cation': {
        'water-cacl2_': (2e-2, 1, 10, 8e-2, 6, 10, 2e-2, 10, 10),
        'water-kcl': (4e-2, 1, 10, 2e-2, 5, 10),
        'water-licl': (1e-2, 1, 30, 8e-2, 4, 30, 2e-2, 10, 30),
        #'water-nacl': (1e-2, 1, 30, 8e-2, 4, 30, 2e-2, 8, 30),
        'water-nacl': (2e-2, 1, 10, 2e-2, 10, 10),
    },
    'anion': collections.defaultdict(lambda: (4e-2, 1.2, 3, 2e-2, 5, 3))
}

max_γ = 60
bounds = {
    'cation': {
        'water-cacl2_': ((0, 0, 0, 0, 3, 0, 0, 7, 0), (np.inf, 3, max_γ, np.inf, 7, max_γ, np.inf, np.inf, max_γ)),
        'water-kcl': ((0, 0, 0, 0, 2.5, 0), (np.inf, 2.5, max_γ, np.inf, np.inf, max_γ)),
        'water-licl': ((0, 0, 0, 0, 2.5, 0, 0, 9, 0), (np.inf, 2.5, max_γ, np.inf, 9, max_γ, np.inf, np.inf, max_γ)),
        #'water-nacl': ((0, 0, 0, 0, 2.5, 0, 0, 7, 0), (np.inf, 2.5, max_γ, np.inf, 7, max_γ, np.inf, 12, max_γ)),
        'water-nacl': ((0, 0, 0, 0, 2.5, 0), (np.inf, 2.5, max_γ, np.inf, np.inf, max_γ)),
    },
    'anion': collections.defaultdict(lambda: ((0, 0, 0, 0, 3, 0), (np.inf, 3, np.inf, np.inf, np.inf, np.inf))),
    #'anion': collections.defaultdict(lambda: ((0, 0, 0, 0, 0, 0), (np.inf, np.inf, np.inf, np.inf, np.inf, np.inf))),
}

In [None]:
oconst.rec_cm_per_THz * 2.2

In [None]:
def fit_dos():
    popt_dict = {'cation': {'opt': {}, 'err': {}}, 'anion': {'opt': {}, 'err': {}}}

    for system in (sys for sys in sys_gen_2pt() if sys['type']['name'] != 'water-pure'):
        print(f"system {system['name']}")

        for i, ion in enumerate(('cation', 'anion')):
            #print(ion)
            moltypes_with_dos = moltypes_with_dos_dict[system['name']]
            moltype = moltypes_with_dos[i+1]

            frequencies = np.array(moltype['doses'][('frequencies', 0)])
            dos_samples = np.array(moltype['doses'].loc[:, ('trn', slice(None))]).T
            dos_mean = dos_samples.mean(axis=0)
            #dos_std = dos_samples.std(axis=0)
            #sigma = dos_std
            #sigma = np.sqrt(frequencies + frequencies[1])
            sigma = None
            popt, pcov = optimize.curve_fit(fit_func[ion][system['type']['name']], frequencies, dos_mean, p0=p0[ion][system['type']['name']], maxfev=1e5, sigma=sigma,
                                           bounds=bounds[ion][system['type']['name']], method='trf')
            perr = np.sqrt(np.diag(pcov))
            # TODO
            #popt, perr = sorted()
            if ion == 'cation':
                #print('a', popt[0::3])
                print('f0', popt[1::3] * oconst.rec_cm_per_THz)
                #print('γ', popt[2::3])
                #print('popt', popt)
                #print('bounds', bounds[ion][system['type']['name']])
            popt_dict[ion]['opt'][system['name']] = popt
            popt_dict[ion]['err'][system['name']] = perr
    return popt_dict
            
popt_dict = fit_dos()

## plot

### experimental data

In [None]:
# experimental data from Schwaab 2019
# cm^{-1}
freq_exp = {
    'water-cacl2_': 146,
    # i think they are wrong. You would expect it to be at least sqrt(2) times K+, due to double electric force.
    # even higher because of lower ion radius increases electric force further (from Wikipedia r_k+ = 152 pm, r_ca2+ = 114 pm)
    # stronger force expected from coulombs law: 1/r² -> 1/114**2 / (1/152**2) = 1.7777
    # total effect radius and charge -> 1.886
    # take into account screening?
    'water-kcl': 151,
    'water-licl': 400,
    'water-nacl': 172,
}

freq_exp2 = {
    'water-cacl2_': 320,  # from SI Fig. 3
}

cation_radii = {
    'water-cacl2_': 0.114,
    'water-kcl': 0.152,
    'water-licl': 0.09,
    'water-nacl': 0.116,
}

cation_charge = {
    'water-cacl2_': 2,
    'water-kcl': 1,
    'water-licl': 1,
    'water-nacl': 1,
}

cation_mass = {
    'water-cacl2_': atomtypes['CA']['mass'],
    'water-kcl': atomtypes['K']['mass'],
    'water-licl': atomtypes['LI']['mass'],
    'water-nacl': atomtypes['NA']['mass'],
}

### frequency from charge, mass, and radius

In [None]:
# not sure if we can expect this to be valid from the theory
# at the equi. position F_e = -F_rep
def theo_freq():
    fig, ax = plt.subplots(constrained_layout=True)

    systypes = OrderedSet(sys['type']['name'] for sys in sys_gen_2pt() if sys['type']['name'] != 'water-pure')
    freq_predict = {st: np.sqrt(1/cation_mass[st]*cation_charge[st]/cation_radii[st]**2) for st in systypes}
    x = [freq_predict[st] for st in systypes]
    y = [freq_exp2[st] if st in freq_exp2 else freq_exp[st] for st in systypes]
    ax.scatter(x, y)
    ax.set_xticks(x)
    ax.set_xticklabels([f"{freq_predict[st]:.1f}\n{sys_type_short_names[st]}" for st in systypes])

    ax.set_xlim(0)
    ax.set_ylim(0)
    plt.show()
    
theo_freq()

### show dos

In [None]:
# show ion dos
def plot_dos():
    params = {
        'legend.handlelength': 2.0,
        'legend.fontsize': 8,
        'legend.labelspacing': 0.05,
        'figure.dpi': 96,
    }

    xlim_cations = {
        'water-licl': (0, 800),
        'water-nacl': (0, 400),
        'water-kcl': (0, 300),
        'water-cacl2_': (0, 500),
    }
    force_fields_to_show = {ffn: ff for ffn, ff in force_fields_2pt.items() if ffn not in ('netz-co0.9',)}
    system_types_to_show = {stn: st for stn, st in system_types_2pt.items() if stn not in ('water-pure',)}

    linestyles = ['-', '--', '-.', ':']

    with mpl.rc_context(rc={**mpl_rc_global, **params}):
        for t, (systype_name, systype) in enumerate(system_types_to_show.items()):
            print(f"system type {systype_name}")
            fig, axes = plt.subplots(ncols=2, figsize=(4.6, 1.5), constrained_layout=True, dpi=200)
            

            for i, ion in enumerate(('cation', 'anion')):
                #print(f"ion {ion}")
                ax = axes[i]
                for s, system in enumerate((sys for sys in system_generator({systype_name: systype}, force_fields_to_show) if 'dos' in sys['tags'])):
                    #print(f"system {system['name']}")
                    moltypes_with_dos = moltypes_with_dos_dict[system['name']]
                    moltype = moltypes_with_dos[i+1]

                    frequencies = np.array(moltype['doses'][('frequencies', 0)])
                    dos_samples = np.array(moltype['doses'].loc[:, ('trn', slice(None))]).T
                    dos_mean = dos_samples.mean(axis=0)
                    dos_min = dos_samples.min(axis=0)
                    dos_max = dos_samples.max(axis=0)
                    #linestyle = linestyles[s%4]
                    linestyle = '-'
                    color = ff_colors[system['force-field']['name']]
                    line, = ax.plot(frequencies * oconst.rec_cm_per_THz,
                                    dos_mean / oconst.rec_cm_per_THz,
                                    linestyle=linestyle,
                                    linewidth=1.0,
                                    color=color,
                                    label=ff_short_names[system['force-field']['name']],
                                   )
                    # fit
                    ax.plot(frequencies * oconst.rec_cm_per_THz,
                            fit_func[ion][systype_name](
                                frequencies,
                                *popt_dict[ion]['opt'][system['name']]
                            ) / oconst.rec_cm_per_THz,
                            color=line.get_color(),
                            linestyle='--',
                            linewidth=0.6,
                           )
                    ax.axvline(popt_dict[ion]['opt'][system['name']][-2] * oconst.rec_cm_per_THz,
                               color=line.get_color(), linewidth=0.6, linestyle=':')
                    ax.axvline(popt_dict[ion]['opt'][system['name']][-5] * oconst.rec_cm_per_THz,
                               color=line.get_color(), linewidth=0.5, linestyle='--')
                    #if len(popt_dict[ion]['opt'][system['name']]) == 9:
                        #ax.axvline(popt_dict[ion]['opt'][system['name']][-8] * oconst.rec_cm_per_THz,
                                   #color=line.get_color(), linewidth=0.5, linestyle='-.')
                        
                    print(ff_short_names[system['force-field']['name']])
                    print(popt_dict[ion]['opt'][system['name']][1::3])
                    print(popt_dict[ion]['opt'][system['name']][1::3] * oconst.rec_cm_per_THz)
                # p0
                """
                ax.plot(frequencies * oconst.rec_cm_per_THz,
                        three_lorentz(frequencies, *p0[systype_name]) / oconst.rec_cm_per_THz,
                        color='k',
                        linestyle='-',
                        linewidth=0.6,
                        label='p0'
                       )
               """

                ax.set_title(ion_short_names[moltype['name']])
                ax.set_ylim(0)
                ax.set_xlabel(r'$\tilde v$ in cm$^{-1}$')
                #ax.set_yticks([])

            # TEMP
            #plt.xticks(np.arange(min(x), max(x), 50))

            axes[0].set_xlim(xlim_cations[systype_name])
            axes[1].set_xlim(0, 400)
            axes[1].legend(frameon=False)
            axes[0].set_ylabel(r'$D\!O\!S(\tilde v)$ in cm')
            for ax in axes:
                ax.ticklabel_format(axis='y', style='sci', scilimits=(0, 0))
            fig.savefig(f"../figures/dos-{systype_name}.pdf")
            plt.show()
            
plot_dos()

### show water dos

In [None]:
# show ion dos
def plot_water_dos():
    params = {
        'legend.handlelength': 2.0,
        'legend.fontsize': 8,
        'legend.labelspacing': 0.05,
        'figure.dpi': 96,
    }

    force_fields_to_show = {ffn: ff for ffn, ff in force_fields_2pt.items() if ffn not in ('netz-co0.9',)}
    system_types_to_show = {stn: st for stn, st in system_types_2pt.items() if stn not in ('water-pure',)}

    linestyles = ['-', '--', '-.', ':']

    with mpl.rc_context(rc={**mpl_rc_global, **params}):
        
        fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(4.6, 3.2), constrained_layout=True, dpi=200, sharex='all', sharey='all')
        
        for t, (systype_name, systype) in enumerate(system_types_to_show.items()):
            print(f"system type {systype_name}")

            ax = axes.flatten()[t]
            for s, system in enumerate((sys for sys in system_generator({systype_name: systype}, force_fields_to_show) if 'dos' in sys['tags'])):
                print(f"system {system['name']}")
                moltypes_with_dos = moltypes_with_dos_dict[system['name']]
                moltype = moltypes_with_dos[0]

                frequencies = np.array(moltype['doses'][('frequencies', 0)])
                dos_trn_samples = np.array(moltype['doses'].loc[:, ('trn', slice(None))]).T
                dos_rot_samples = np.array(moltype['doses'].loc[:, ('roto', slice(None))]).T
                dos_trn_mean = dos_trn_samples.mean(axis=0)
                dos_rot_mean = dos_rot_samples.mean(axis=0)
                color = ff_colors[system['force-field']['name']]
                line_trn, = ax.plot(frequencies * oconst.rec_cm_per_THz,
                                dos_trn_mean / oconst.rec_cm_per_THz,
                                linestyle='-',
                                linewidth=1.0,
                                color=color,
                                label=ff_short_names[system['force-field']['name']],
                               )
                line_rot, = ax.plot(frequencies * oconst.rec_cm_per_THz,
                                dos_rot_mean / oconst.rec_cm_per_THz,
                                linestyle='--',
                                linewidth=1.0,
                                color=color,
                               )
                #ax.set_title('foo')
                ax.text(0.2, 0.8, sys_type_short_names[systype_name], transform=ax.transAxes)

        # all
        for ax in axes.flatten():
            ax.set_xlim(0, 1100)
            ax.set_ylim(0)
        # first col
        for ax in axes[:, 0]:
            ax.set_ylabel(r'$D\!O\!S(\tilde v)$ in cm')
        # bottom row
        for ax in axes[-1, :]:
            ax.set_xlabel(r'$\tilde v$ in cm$^{-1}$')
        axes[0, 0].legend(frameon=False)
        #for ax in axes:
            #ax.ticklabel_format(axis='y', style='sci', scilimits=(0, 0))
        fig.savefig(f"../figures/dos-water.pdf")
        plt.show()
            
plot_water_dos()

### show changes in water spectra

In [None]:
# show ion dos
def plot_water_dos_changes():
    params = {
        'legend.handlelength': 2.0,
        'legend.fontsize': 8,
        'legend.labelspacing': 0.05,
        'figure.dpi': 96,
    }

    force_fields_to_show = {ffn: ff for ffn, ff in force_fields_2pt.items() if ffn in ('iff-altern5-eccr1-co1.2-nopc',)}
    #force_fields_to_show = {ffn: ff for ffn, ff in force_fields_2pt.items() if ffn in ('iff-altern5-eccr1-co1.2-nopc', 'madrid-co1.0tc', 'eccr1-co1.2', 'netz-co0.9tc')}
    #force_fields_to_show = {ffn: ff for ffn, ff in force_fields_2pt.items() if ffn in ('iff-altern5-eccr1-co1.2-nopc', 'eccr1-co1.2')}
    system_types_to_show = {stn: st for stn, st in system_types_2pt.items() if stn not in ['water-kcl', 'water-pure']}
    system_types_pure = {stn: st for stn, st in system_types_2pt.items() if stn in ['water-pure']}
    systype_pure_name = 'water-pure'
    systype_pure = system_types_2pt[systype_pure_name]

    linestyles = ['-', '--', '-.', ':']

    with mpl.rc_context(rc={**mpl_rc_global, **params}):
        
        # dos
        fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(5.2, 3.4), constrained_layout=True, dpi=200, sharex='all')
        # dos difference
        fig_d, axes_d = plt.subplots(nrows=3, ncols=1, figsize=(5.2, 3.4), constrained_layout=True, dpi=200, sharex='all')
        
        for t, (systype_name, systype) in enumerate(system_types_to_show.items()):
            print(f"system type {systype_name}")

            ax = axes[t]
            ax_d = axes_d[t]
            for s, system in enumerate((sys for sys in system_generator({systype_name: systype}, force_fields_to_show) if ('dos' in sys['tags']) and (sys['molar-mixing-ratio'] == 0.1))):
                print(f"system {system['name']}", system['molar-mixing-ratio'])
                
                # load dos pure water
                print(system['force-field']['name'])
                system_pure = next(system_generator({systype_pure_name: systype_pure}, {system['force-field']['name']: system['force-field']}))
                moltypes_pure_with_dos = moltypes_with_dos_dict[system_pure['name']]
                moltype_pure = moltypes_pure_with_dos[0]
                frequencies_pure = np.array(moltype_pure['doses'][('frequencies', 0)])
                dos_trn_samples_pure = np.array(moltype_pure['doses'].loc[:, ('trn', slice(None))]).T
                dos_rot_samples_pure = np.array(moltype_pure['doses'].loc[:, ('roto', slice(None))]).T
                dos_trn_mean_pure = dos_trn_samples_pure.mean(axis=0)
                dos_rot_mean_pure = dos_rot_samples_pure.mean(axis=0)
                
                # load dos electrolyte
                moltypes_with_dos = moltypes_with_dos_dict[system['name']]
                moltype = moltypes_with_dos[0]
                try:
                    frequencies = np.array(moltype['doses'][('frequencies', 0)])
                except:
                    continue
                dos_trn_samples = np.array(moltype['doses'].loc[:, ('trn', slice(None))]).T
                dos_rot_samples = np.array(moltype['doses'].loc[:, ('roto', slice(None))]).T
                dos_trn_mean = dos_trn_samples.mean(axis=0)
                dos_rot_mean = dos_rot_samples.mean(axis=0)
                
                # plot dos
                color = ff_colors[system['force-field']['name']]
                if s == 0:
                    ax.plot(frequencies * oconst.rec_cm_per_THz,
                            dos_trn_mean_pure / oconst.rec_cm_per_THz,
                            linestyle='-',
                            linewidth=1.0,
                            color='k',
                            label='pure water',
                           )
                    ax.plot(frequencies * oconst.rec_cm_per_THz,
                            dos_rot_mean_pure / oconst.rec_cm_per_THz,
                            linestyle='--',
                            linewidth=1.0,
                            color='k',
                           )
                line_trn, = ax.plot(frequencies * oconst.rec_cm_per_THz,
                                dos_trn_mean / oconst.rec_cm_per_THz,
                                linestyle='-',
                                linewidth=1.0,
                                color=color,
                                label=ff_short_names[system['force-field']['name']],
                               )
                line_rot, = ax.plot(frequencies * oconst.rec_cm_per_THz,
                                dos_rot_mean / oconst.rec_cm_per_THz,
                                linestyle='--',
                                linewidth=1.0,
                                color=color,
                               )
                #ax.set_title('foo')
                ax.text(0.2, 0.8, sys_type_short_names[systype_name], transform=ax.transAxes)
                
                # plot dos difference
                color = ff_colors[system['force-field']['name']]
                line_trn, = ax_d.plot(frequencies * oconst.rec_cm_per_THz,
                                (dos_trn_mean - dos_trn_mean_pure) / oconst.rec_cm_per_THz,
                                linestyle='-',
                                linewidth=1.0,
                                color=color,
                                label=ff_short_names[system['force-field']['name']],
                               )
                line_rot, = ax_d.plot(frequencies * oconst.rec_cm_per_THz,
                                (dos_rot_mean - dos_rot_mean_pure) / oconst.rec_cm_per_THz,
                                linestyle='--',
                                linewidth=1.0,
                                color=color,
                               )
                #ax.set_title('foo')
                if s == 0:
                    ax.text(0.2, 0.8, sys_type_short_names[systype_name], transform=ax.transAxes)
                    ax_d.text(0.1, 0.1, sys_type_short_names[systype_name], transform=ax_d.transAxes)

        # dos
        for ax in axes:
            ax.set_xlim(0, 1100)
            ax.set_ylim(0)
            ax.set_ylabel(r'$D\!O\!S(\tilde v)$ in cm')
        axes[-1].set_xlabel(r'$\tilde v$ in cm$^{-1}$')
        axes[0].legend(frameon=False)
        
        # difference
        for ax_d in axes_d:
            ax_d.set_xlim(0, 1100)
            #ax_d.set_ylim(-1e-3, 1e-3)
            ax_d.set_ylabel(r'$\Delta D\!O\!S(\tilde v)$ in cm')
            ax_d.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
            ax_d.axhline(0, color='gray', linewidth=0.5, linestyle=':', zorder=-10)
        axes_d[-1].set_xlabel(r'$\tilde v$ in cm$^{-1}$')
        axes_d[0].legend(frameon=False, ncol=5)
        
        fig.savefig(f"../figures/dos-water.pdf")
        fig_d.savefig(f"../figures/dos-water-differences.pdf")
        plt.show()
            
plot_water_dos_changes()

In [None]:
# show water roto
def plot_dos_changes():
    params = {
        'legend.handlelength': 2.0,
        'legend.fontsize': 8,
        'legend.labelspacing': 0.05,
        'figure.dpi': 96,
    }

    system_types_to_show = {stn: st for stn, st in system_types_2pt.items() if stn not in ['water-kcl', 'water-pure']}
    system_types_pure = {stn: st for stn, st in system_types_2pt.items() if stn in ['water-pure']}
    systype_pure_name = 'water-pure'
    systype_pure = system_types_2pt[systype_pure_name]
    
    doses_to_show_sets = {
        'sum': {
            'trn': {'dos': 'trn', 'label': 'trn', 'linestyle': '-', 'moltype': 0, 'sum': True},
            'roto': {'dos': 'roto', 'label': 'rot', 'linestyle': '--', 'moltype': 0, 'sum': True},
            'trn-cat': {'dos': 'trn', 'label': 'trn', 'linestyle': '-', 'moltype': 1, 'sum': True},
            'trn-an': {'dos': 'trn', 'label': 'trn', 'linestyle': '-', 'moltype': 2, 'sum': True},
        },
        'ions': {
            'trn-cat': {'dos': 'trn', 'label': 'cation', 'linestyle': '-', 'moltype': 1, 'sum': False},
            'trn-an': {'dos': 'trn', 'label': 'anion', 'linestyle': '--', 'moltype': 2, 'sum': False},
        },
        'trn+rot': {
            'trn': {'dos': 'trn', 'label': 'trn', 'linestyle': '-', 'moltype': 0, 'sum': False},
            'roto': {'dos': 'roto', 'label': 'rot', 'linestyle': '--', 'moltype': 0, 'sum': False},
        },
        'roto-sep': {
            'roto_a': {'dos': 'roto_a', 'label': 'a', 'linestyle': '-', 'moltype': 0, 'sum': False},
            'roto_b': {'dos': 'roto_b', 'label': 'b', 'linestyle': '--', 'moltype': 0, 'sum': False},
            'roto_c': {'dos': 'roto_c', 'label': 'c', 'linestyle': ':', 'moltype': 0, 'sum': False},
        },
    }
    
    for dos_set_name, doses_to_show in doses_to_show_sets.items():
        print(dos_set_name)
    
        force_fields_to_show_sets = {
            'ecc-imc': {ffn: ff for ffn, ff in force_fields_2pt.items() if ffn in ('iff-altern5-eccr1-co1.2-nopc',)},
            'madrid': {ffn: ff for ffn, ff in force_fields_2pt.items() if ffn in ('madrid-co1.0tc',)},
        }
        for ff_set_name, force_fields_to_show in force_fields_to_show_sets.items():
            print(ff_set_name)

            with mpl.rc_context(rc={**mpl_rc_global, **params}):

                # dos
                fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(5.2, 3.4), constrained_layout=True, dpi=200, sharex='all')
                # dos difference
                #fig_d, axes_d = plt.subplots(nrows=3, ncols=1, figsize=(5.2, 3.4), constrained_layout=True, dpi=200, sharex='all')

                for t, (systype_name, systype) in enumerate(system_types_to_show.items()):
                    #print(f"system type {systype_name}")

                    ax = axes[t]
                    #ax_d = axes_d[t]
                    for s, system in enumerate((sys for sys in system_generator({systype_name: systype}, force_fields_to_show) if ('dos' in sys['tags']) and (sys['molar-mixing-ratio'] == 0.1))):
                        #print(f"system {system['name']}", system['molar-mixing-ratio'])

                        # load dos pure water
                        #print(system['force-field']['name'])
                        system_pure = next(system_generator({systype_pure_name: systype_pure}, {system['force-field']['name']: system['force-field']}))
                        moltypes_pure_with_dos = moltypes_with_dos_dict[system_pure['name']]
                        doses_mean_pure = {}
                        for dos, dos_data in doses_to_show.items():
                            try:
                                moltype_pure = moltypes_pure_with_dos[dos_data['moltype']]
                            except:
                                continue
                            if dos_data['sum']:
                                if 'sum' not in doses_mean_pure:
                                    doses_mean_pure['sum'] = moltype_pure['nmols'] * np.array(moltype_pure['doses'].loc[:, (dos_data['dos'], slice(None))]).T.mean(axis=0)
                                else:
                                    doses_mean_pure['sum'] += moltype_pure['nmols'] * np.array(moltype_pure['doses'].loc[:, (dos_data['dos'], slice(None))]).T.mean(axis=0)
                            else:
                                doses_mean_pure[dos] = np.array(moltype_pure['doses'].loc[:, (dos_data['dos'], slice(None))]).T.mean(axis=0)

                        # load dos electrolyte
                        moltypes_with_dos = moltypes_with_dos_dict[system['name']]
                        frequencies = np.array(moltypes_with_dos[0]['doses'][('frequencies', 0)])
                        doses_mean = {}
                        for dos, dos_data in doses_to_show.items():
                            moltype = moltypes_with_dos[dos_data['moltype']]
                            if dos_data['sum']:
                                if 'sum' not in doses_mean:
                                    doses_mean['sum'] = moltype['nmols'] * np.array(moltype['doses'].loc[:, (dos_data['dos'], slice(None))]).T.mean(axis=0)
                                else:
                                    doses_mean['sum'] += moltype['nmols'] * np.array(moltype['doses'].loc[:, (dos_data['dos'], slice(None))]).T.mean(axis=0)
                            else:
                                doses_mean[dos] = np.array(moltype['doses'].loc[:, (dos_data['dos'], slice(None))]).T.mean(axis=0)

                        # plot dos
                        color = ff_colors[system['force-field']['name']]
                        # plot pure
                        if s == 0:
                            for d, (dos, dos_data) in enumerate(doses_to_show.items()):
                                if dos_data['sum']:
                                    dos = 'sum'
                                    if d != 0:
                                        continue
                                if dos not in doses_mean_pure.keys():
                                    print('.. no pure water data ..')
                                    continue
                                ax.plot(frequencies * oconst.rec_cm_per_THz,
                                        doses_mean_pure[dos] / oconst.rec_cm_per_THz,
                                        linestyle=dos_data['linestyle'],
                                        linewidth=1.0,
                                        color='k',
                                        label='pure water' if d == 0 else None,
                                       )
                        # plot electrolyte
                        for d, (dos, dos_data) in enumerate(doses_to_show.items()):
                            if dos_data['sum']:
                                dos = 'sum'
                                if d != 0:
                                    continue
                            line_rot, = ax.plot(frequencies * oconst.rec_cm_per_THz,
                                                doses_mean[dos] / oconst.rec_cm_per_THz,
                                                linestyle=dos_data['linestyle'],
                                                linewidth=1.0,
                                                color=color,
                                                label=ff_short_names[system['force-field']['name']] if d == 0 else None,
                                           )
                        if s == 0:
                            ax.text(0.22, 0.8, sys_type_short_names[systype_name], transform=ax.transAxes)

                        # plot dos difference
                        """
                        for a, mol_ax in enumerate(mol_axes):
                            line_rot, = ax_d.plot(frequencies * oconst.rec_cm_per_THz,
                                            (dos_rot_ax_mean[a] - dos_rot_ax_mean_pure[a]) / oconst.rec_cm_per_THz,
                                            linestyle=linestyles[a],
                                            linewidth=1.0,
                                            color=color,
                                           )
                        #ax.set_title('foo')
                        if s == 0:
                            ax_d.text(0.1, 0.1, sys_type_short_names[systype_name], transform=ax_d.transAxes)
                        """

                # dos
                for ax in axes:
                    ax.set_xlim(0, 1100)
                    ax.set_ylim(0)
                    ax.set_ylabel(r'$D\!O\!S(\tilde v)$ in cm')
                axes[-1].set_xlabel(r'$\tilde v$ in cm$^{-1}$')
                leg1 = axes[0].legend(frameon=False, loc='upper right')
                if dos_set_name != 'sum':
                    axes[0].add_artist(leg1)
                    handles = []
                    for d, (dos, dos_data) in enumerate(doses_to_show.items()):
                        handles.append(axes[0].plot(np.nan, np.nan, linestyle=dos_data['linestyle'], color='grey', label=dos_data['label'])[0])
                    axes[0].legend(handles=handles, loc='upper left' if dos_set_name == 'roto-sep' else 'upper center')

                # difference
                """
                for ax_d in axes_d:
                    ax_d.set_xlim(0, 1100)
                    #ax_d.set_ylim(-1e-3, 1e-3)
                    ax_d.set_ylabel(r'$\Delta D\!O\!S(\tilde v)$ in cm')
                    ax_d.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
                    ax_d.axhline(0, color='gray', linewidth=0.5, linestyle=':', zorder=-10)
                axes_d[-1].set_xlabel(r'$\tilde v$ in cm$^{-1}$')
                axes_d[0].legend(frameon=False, ncol=5)
                """

                fig.savefig(f"../figures/dos-{dos_set_name}-{ff_set_name}.pdf")
                #fig_d.savefig(f"../figures/dos-rot-water-{ff_set_name}-differences.pdf")
                plt.show()
            
plot_dos_changes()

In [None]:
!cp -a ../figures/dos-* ~/research/output/ion-spectra/figures

### toc dos plot

In [None]:
# show ion dos
def plot_dos_toc():
    params = {}

    force_fields_to_show = {ffn: ff for ffn, ff in force_fields_2pt.items() if ffn in ('netz-co0.9tc', 'iff-altern5-netz-co0.9-nopc')}

    with mpl.rc_context(rc={**mpl_rc_global, **params}):
        systype_name = 'water-licl'
        systype = system_types_2pt[systype_name]
        fig, ax = plt.subplots(figsize=(1.0, 1.8), constrained_layout=True, dpi=300)
        ion = 'cation'
        for s, system in enumerate((sys for sys in system_generator({systype_name: systype}, force_fields_to_show) if 'dos' in sys['tags'])):
            print(f"system {system['name']}")
            moltypes_with_dos = moltypes_with_dos_dict[system['name']]
            moltype = moltypes_with_dos[1]

            frequencies = np.array(moltype['doses'][('frequencies', 0)])
            dos_samples = np.array(moltype['doses'].loc[:, ('trn', slice(None))]).T
            dos_mean = dos_samples.mean(axis=0)
            dos_min = dos_samples.min(axis=0)
            dos_max = dos_samples.max(axis=0)
            linestyle = '-'
            color = ff_colors[system['force-field']['name']]
            print(color)
            #color = 'k'
            # plot dos
            line, = ax.plot(frequencies * oconst.rec_cm_per_THz,
                            dos_mean / oconst.rec_cm_per_THz + (1-s) * 1.5e-2,  # shift upward
                            linestyle=linestyle,
                            linewidth=1.0,
                            color=color,
                            label=ff_short_names[system['force-field']['name']],
                           )
        # show exp
        ax.axvline(freq_exp[systype_name], color='k', linewidth=1.0, linestyle=':')

        ax.set_ylim(0)
        ax.set_xlim(0, 850)
        ax.set_xlabel(r'$\tilde v$ in cm$^{-1}$')
        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['left'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.yaxis.set_ticks_position('left')
        ax.xaxis.set_ticks_position('bottom')
            #plt.xticks(np.arange(min(x), max(x), 50))
        #ax.axis['xzero'].set_axisline_style("-|>")
        
        # arrow heads
        ax.plot(1, 0, ">k", transform=ax.get_yaxis_transform(), clip_on=False, markersize=1.0)

        #axes[1].set_xlim(0, 400)
        #axes[0].set_ylabel(r'$D\!O\!S(\tilde v)$ in kJ')

        fig.savefig(f"../figures/toc-dos.svg", dpi=300)
        plt.show()
            
plot_dos_toc()

In [None]:
!cp -a ../figures/toc-dos.svg ~/research/output/ion-shortrange-paper/figures/

### highest cation frequency

In [None]:
# show cation highest fit frequency
def highest_cat_freq(portrait=True, show_pmf_frequency=False):
    SMALL_SIZE = 8
    MEDIUM_SIZE = 10
    BIGGER_SIZE = 12
    params = {
        'font.size': MEDIUM_SIZE,          # controls default text sizes
        'axes.titlesize': MEDIUM_SIZE,     # fontsize of the axes title
        'axes.labelsize': MEDIUM_SIZE,    # fontsize of the x and y labels
        'xtick.labelsize': MEDIUM_SIZE,    # fontsize of the tick labels
        'ytick.labelsize': MEDIUM_SIZE,    # fontsize of the tick labels
        'legend.fontsize': MEDIUM_SIZE,    # legend fontsize
        'figure.titlesize': BIGGER_SIZE,  # fontsize of the figure title
    }
    params |= {
        'legend.handlelength': 2.0,
        'legend.fontsize': 8,
        'legend.labelspacing': 0.1,
        'legend.columnspacing': 1.2,
        'figure.dpi': 120,
        'text.usetex': True,
    }
    width = 0.125  # of each bar
    width_percentage = 0.85
    ys = []
    yexp = []
    force_fields_to_show = {ffn: ff for ffn, ff in force_fields_2pt.items() if ffn not in ('netz-co0.9',)}
    system_types_to_show = {stn: st for stn, st in system_types_2pt.items() if stn not in ('water-pure', 'water-kcl')}

    with mpl.rc_context(rc={**mpl_rc_global, **params}):

        figsize = (3.8, 3.4) if portrait else (3.8, 2.0)
        fig, ax = plt.subplots(figsize=figsize, constrained_layout=True, dpi=300)
        fig.set_constrained_layout_pads(w_pad=0.02, h_pad=0.01)
        x = np.arange(len(system_types_to_show))
        y_dict = {}
        y_pmf_dict = {}
        yerr_dict = {}

        for t, (systype_name, systype) in enumerate(system_types_to_show.items()):
            #print(f"system type {systype}")
            yexp.append(freq_exp[systype_name])

            for s, system in enumerate((sys for sys in system_generator({systype_name: systype}, force_fields_to_show) if 'dos' in sys['tags'])):
                #print(f"system {system['name']}")
                #moltypes_with_dos = moltypes_with_dos_dict[system['name']]
                #moltype = moltypes_with_dos[i+1]
                # TODO
                #print(max(popt_dict['cation']['opt'][system['name']][1::3]), popt_dict['cation']['opt'][system['name']][7])
                #assert np.isclose(max(popt_dict['cation']['opt'][system['name']][1::3]), popt_dict['cation']['opt'][system['name']][7])
                index_to_show = -5 if systype_name == 'water-cacl2_' else -2
                y_dict[(systype_name, system['force-field']['name'])] = popt_dict['cation']['opt'][system['name']][index_to_show]
                yerr_dict[(systype_name, system['force-field']['name'])] = popt_dict['cation']['err'][system['name']][index_to_show]
                #print(popt_dict['cation'][system['name']][1::3])
                #print(popt_dict['cation'][system['name']][2::3])
                if show_pmf_frequency:
                    y_pmf_dict[(systype_name, system['force-field']['name'])] = (pmf_frequency_dict[system['name']]
                                                                                 if system['type']['name'] != 'water-cacl2_'
                                                                                 else np.nan)

        for f, (ffn, ff) in enumerate(force_fields_to_show.items()):
            xf = x + np.linspace(0, width*len(force_fields_to_show), num=len(force_fields_to_show)+1)[f] - width*len(force_fields_to_show)/2
            yf = [y_dict[(systype_name, ffn)] for systype_name in system_types_to_show.keys()]
            yerrf = [yerr_dict[(systype_name, ffn)] for systype_name in system_types_to_show.keys()]
            ax.bar(xf, np.array(yf) * oconst.rec_cm_per_THz, width=width*width_percentage, yerr=yerrf, color=ff_colors[ffn], label=ff_short_names[ffn])
            if show_pmf_frequency:
                yf_pmf = [y_pmf_dict[(systype_name, ffn)] for systype_name in system_types_to_show.keys()]
                ax.plot(xf, np.array(yf_pmf) * oconst.rec_cm_per_THz, color='k', linestyle='none', marker='x', markersize=3)

        # Calcium experimental from looking at spectrum
        #xf = x + np.linspace(0, width*len(force_fields_to_show), num=len(force_fields_to_show)+1)[-1] - width*len(force_fields_to_show)/2
        #y = [freq_exp2[stn] if stn in freq_exp2 else np.nan for stn in system_types_to_show.keys()]
        #ax.bar(xf, y, width=width*width_percentage, color='lightgrey')

        # plot experimental
        xf = x + np.linspace(0, width*len(force_fields_to_show), num=len(force_fields_to_show)+1)[-1] - width*len(force_fields_to_show)/2
        ax.bar(xf, yexp, width=width*width_percentage, color='k', label='exp.')

        ax.set_xticks(range(len(system_types_to_show)))
        ax.set_xticklabels([sys_type_short_names[stn] for stn in system_types_to_show.keys()])
        legend_loc = (0.38, 0.78) if portrait else (0.0, 0.8)
        ax.legend(ncol=2, loc=legend_loc)
        ax.set_ylim(0, 655)
        ax.set_ylabel(r'$\tilde v$ in cm$^{-1}$')
        fig.savefig('../figures/cation-highest-freq.pdf')
        plt.show()
        
highest_cat_freq(show_pmf_frequency=True)
#highest_cat_freq(show_pmf_frequency=False)

In [None]:
!cp -a ../figures/cation-highest-freq.pdf ~/research/output/ion-shortrange-paper/figures/

## 2pt entropies

### functions

In [None]:
def _calc_normalized_diffusivity(dos, moltype, temperature, pascal_2010_var=False):
    partial_volume = moltype['partial_volume']
    nmols = moltype['nmols']
    mass = sum((atom['mass'] for atom in moltype['atoms']))
    if pascal_2010_var:
        """As defined in Pascal 2010"""
        return (2 * dos[0] / 9 * (np.pi * oconst.k_gro * temperature / mass)**(1/3)
                * (nmols / partial_volume)**(1/3) * (6 / np.pi)**(2/3))
    else:
        """As defined in Lin 2003, Lin 2010, lai_2014"""
        return (2 * dos[0] / 9 * (np.pi * oconst.k_gro * temperature / mass)**(1/2)
                * (nmols / partial_volume)**(1/3) * (6 / np.pi)**(2/3))

def _calc_f(normalized_diffusivity):
    delta = normalized_diffusivity
    def function(f):
        return (2 * delta**(-9/2) * f**(15/2) - 6 * delta**-3 * f**5 
                - delta**(-3/2) * f**(7/2) + 6 * delta**(-3/2) * f**(5/2) + 2 * f - 2)
    return optimize.brenth(function, 0, 1)

def _calc_dos_gas(dos, f, frequencies):
    return dos[0] / (1 + (np.pi * dos[0] * frequencies / (6 * f))**2)

def _calc_w_s_solid(frequencies, temperature):
    beta = 1 / (oconst.k_gro * temperature)
    # circumvent warning of zero-division
    frequencies = frequencies[1:]
    w = (beta * oconst.h_gro * frequencies / (np.exp(beta * oconst.h_gro * frequencies) - 1) 
            - np.log(1 - np.exp(-beta * oconst.h_gro * frequencies)))
    w = np.insert(w, 0, 0)
    return w

def _calc_w_s_trn_gas(moltype, temperature):
    partial_volume = moltype['partial_volume']
    nmols = moltype['nmols']
    f_trn = moltype['f_trn']
    mass_mol = sum((atom['mass'] for atom in moltype['atoms']))
    normalized_diffusivity = moltype['normalized_diffusivity_trn']
    y = f_trn**(5/2) / normalized_diffusivity**(3/2)
    z = lambda y: (1 + y + y**2 - y**3) / (1-y)**3
    return 1 / 3 * (5 / 2 + np.log((2 * np.pi * mass_mol * oconst.k_gro * temperature / oconst.h_gro**2)**(3/2) 
                                   * partial_volume / f_trn / nmols * z(y)) 
                   + y * (3 * y - 4) / (1 - y)**2)
    
def _calc_w_s_rot_gas(moltype, temperature):
    nmols = moltype['nmols']
    f_rot = moltype['f_rot']
    normalized_diffusivity = moltype['normalized_diffusivity_rot']
    moments_of_inertia = moltype['moi']
    sigma = moltype['sigma']
    if len(moltype['atoms']) > 1:
        rotational_temperatures = oconst.h_gro**2 / (8 * np.pi**2 * moments_of_inertia * oconst.k_gro)
        return 1 / 3 * np.log(np.sqrt(np.pi) * np.exp(3/2) / sigma 
                              * np.sqrt(temperature**3 / np.prod(rotational_temperatures)))
    else:
        return 0
    
def calculate_entropy_2PTQ(moltypes_with_dos, volume, temperature, sample, norm_dos=None, pascal_2010_var=False):
    # total entropy of system
    entropy = 0
    moltypes_2pt = []

    for moltype in moltypes_with_dos:
        moltype = deepcopy(moltype)
        moltype['mole_fraction'] = moltype['nmols'] / sum([moltype['nmols'] for moltype in moltypes_with_dos])
        moltype['partial_volume'] = moltype['mole_fraction'] * volume
        
        if moltype['nmols'] == 0:
            continue
        
        # get data of sample
        doses = moltype['doses']
        frequencies = np.array(doses['frequencies'][0])
        moltype['frequencies'] = frequencies
        moltype['moi'] = np.array(moltype['moments_of_inertia'][sample])
        prefactor = 1
        if norm_dos == 'temp':
            prefactor = 1 / (1/2 * oconst.k_gro * temperature)
        moltype['dos_trn'] = prefactor * np.array(doses[('trn', sample)])
        moltype['dos_rot'] = prefactor * np.array(doses[('roto', sample)])
        moltype['dos_vib'] = prefactor * np.array(doses[('vib', sample)])
        
        # warn if not integral = ndof
        integral_trn = np.trapz(x=moltype['frequencies'], y=moltype['dos_trn'])
        if abs(integral_trn - 3) > 0.1:
            warnings.warn(f'integral of dos_trn is {integral_trn} but should be close to 3!')

        # translation
        moltype["normalized_diffusivity_trn"] = _calc_normalized_diffusivity(moltype['dos_trn'], moltype, temperature, pascal_2010_var=pascal_2010_var)
        moltype['f_trn'] = _calc_f(moltype["normalized_diffusivity_trn"])
        moltype['dos_trn_gas'] = _calc_dos_gas(moltype["dos_trn"], moltype["f_trn"], frequencies)
        moltype['dos_trn_solid'] = moltype["dos_trn"] - moltype["dos_trn_gas"]

        # rotation
        if len(moltype['atoms']) > 1:
            moltype['normalized_diffusivity_rot'] = _calc_normalized_diffusivity(moltype["dos_rot"], moltype, temperature)
            moltype['f_rot'] = _calc_f(moltype["normalized_diffusivity_rot"])
            moltype['dos_rot_gas'] = _calc_dos_gas(moltype["dos_rot"], moltype["f_rot"], frequencies)
            moltype['dos_rot_solid'] = moltype["dos_rot"] - moltype["dos_rot_gas"]
        else:
            moltype['normalized_diffusivity_rot'] = 0
            moltype['f_rot'] = 0
            moltype['dos_rot_gas'] = np.zeros(len(moltype["dos_rot"]))
            moltype['dos_rot_solid'] = np.zeros(len(moltype["dos_rot"]))

        # weighting functions (w) for properties (_e = energy, _s = entropy),
        # , motion (_trn, _rot) and phase (_solid, _gas)
        w_s_trn_solid = _calc_w_s_solid(frequencies, temperature)
        w_s_rot_solid = _calc_w_s_solid(frequencies, temperature)
        w_s_trn_gas = _calc_w_s_trn_gas(moltype, temperature)
        w_s_rot_gas = _calc_w_s_rot_gas(moltype, temperature)

        integrator = integrate.trapz

        moltype['nDoF_trn'] = integrator(1 * moltype['dos_trn'], frequencies)
        moltype['nDoF_rot'] = integrator(1 * moltype['dos_rot'], frequencies)
        moltype['nDoF_vib'] = integrator(1 * moltype['dos_vib'], frequencies)
        moltype['entropy_trn_ho'] = oconst.k_gro * integrator(w_s_trn_solid 
                                                       * moltype['dos_trn_solid'], frequencies)
        moltype['entropy_trn_hs'] = oconst.k_gro * integrator(w_s_trn_gas 
                                                       * moltype['dos_trn_gas'], frequencies)

        moltype['entropy_trn'] = moltype['entropy_trn_ho'] + moltype['entropy_trn_hs']

        moltype['entropy_rot_hs'] = oconst.k_gro * integrator(w_s_rot_gas
                                                       * moltype['dos_rot_gas'], frequencies)
        moltype['entropy_rot_ho'] = oconst.k_gro * integrator(w_s_rot_solid
                                                       * moltype['dos_rot_solid'], frequencies)

        moltype['entropy_rot'] = moltype['entropy_rot_ho'] + moltype['entropy_rot_hs']

        moltype['entropy_vib'] = oconst.k_gro * integrator(w_s_trn_solid * moltype['dos_vib'], frequencies)

        moltype['entropy'] = moltype['entropy_trn'] + moltype['entropy_rot'] + moltype['entropy_vib']
        entropy += moltype['entropy']
        entropy -= oconst.k_gro * moltype['mole_fraction'] * np.log(moltype['mole_fraction'])
        
        # for later plotting
        for dos_name in ['trn_gas', 'trn_solid', 'rot_gas', 'rot_solid']:
            doses[(dos_name, sample)] = 1/prefactor * moltype['dos_' + dos_name]
        moltypes_2pt.append(moltype)
        
    return moltypes_2pt, entropy

In [None]:
def calc_volume_npt(edr_file):
    run_bash(f"gmx energy -f {edr_file} -o volume.xvg <<< 'volume'")
    data, _ = gt.xvg.load("volume.xvg")
    run_bash("rm volume.xvg")
    return np.mean(data['Volume'])

In [None]:
def calc_s_trn_ideal_gas(moltype, temperature, volume):
    nmols = moltype['nmols']
    mass_mol = sum((atom['mass'] for atom in moltype['atoms']))
    return oconst.k_gro * (5 / 2 + np.log((2 * np.pi * mass_mol * oconst.k_gro * temperature / oconst.h_gro**2)**(3/2) 
                                                 * volume / nmols))

### create dataframe

In [None]:
# put data in DataFrame
index = pd.MultiIndex.from_tuples([(sys['type']['name'], sys['force-field']['name'], sample)
                                   for sys in sys_gen_2pt()
                                   for sample in range(param_dos['n_samples'])])
columns = pd.MultiIndex.from_product((range(3), ('S', 'S_trn', 'S_rot')))
df_2pt = pd.DataFrame(index=index, columns=columns, dtype=float)
df_2pt.head()

### calculate entropies

In [None]:
def calc_2pt():
    for system in sys_gen_2pt():
        print(f"system {system['name']}")
        for sample in range(param_dos['n_samples']):
            moltypes_with_dos = moltypes_with_dos_dict[system['name']]
            with WorkingDir(system['name']):
                volume = calc_volume_npt('npt-prod-vel/ener.edr')
            moltypes_2pt, entropy = calculate_entropy_2PTQ(moltypes_with_dos, volume, system['temperature'], sample, norm_dos='temp', pascal_2010_var=True)
            #print(f"  total entropy: {entropy*1000:.1f} J/mol/K")
            for m, moltype in enumerate(moltypes_2pt):
                entropy_id = calc_s_trn_ideal_gas(moltype, system['temperature'], volume)
                """
                print(f"    moltype: {moltype['name']}")
                print(f"      ideal gas entropy: {entropy_id*1000:.1f} J/mol/K")
                print(f"      entropy: {moltype['entropy']*1000:.1f} J/mol/K")
                print("      {} {:.5f} {:.5f} {:.5f} {:.3f} {:.3f} {:.3f} ".format(sample,
                  *np.array([moltype['entropy_trn'], moltype['entropy_rot'], moltype['entropy_vib']])*1000,
                  moltype['nDoF_trn'], moltype['nDoF_rot'], moltype['nDoF_vib']))
                """
                df_2pt.at[(system['type']['name'], system['force-field']['name'], sample), (m, 'S')] = moltype['entropy']
                df_2pt.at[(system['type']['name'], system['force-field']['name'], sample), (m, 'S_trn')] = moltype['entropy_trn']
                df_2pt.at[(system['type']['name'], system['force-field']['name'], sample), (m, 'S_rot')] = moltype['entropy_rot']

            moltypes_with_dos_dict[system['name']] = moltypes_2pt
            
calc_2pt()

In [None]:
df_2pt

In [None]:
df_2pt.mean(axis=0, level=(0, 1)) * 1000

In [None]:
df_2pt.std(axis=0, level=(0, 1)) * 1000

In [None]:
diffs = (
    #('netz-co0.9tc', 'netz-co0.9'),
    ('netz-co0.9tc', 'iff-altern5-netz-co0.9-nopc'),
    ('eccr1-co1.2', 'iff-altern5-eccr1-co1.2-nopc'),
    #('netz-co0.9', 'iff-altern5-netz-co0.9-nopc'),
)
df_2pt_diff = pd.DataFrame(index=pd.MultiIndex.from_product((system_types_2pt.keys(), diffs)), columns=df_2pt.columns)
df_2pt_diff_std = pd.DataFrame(index=pd.MultiIndex.from_product((system_types_2pt.keys(), diffs)), columns=df_2pt.columns)
df_2pt_diff.sort_index(axis=1, inplace=True)
df_2pt_diff_std.sort_index(axis=1, inplace=True)
df_2pt_diff.head()

In [None]:
for systype in system_types_2pt.keys():
    #print(systype)
    for diff in diffs:
        #print(diff)
        df_2pt_diff.loc[(systype, diff), (slice(None), slice(None))] = (
            -df_2pt.groupby(axis=0, level=(0, 1)).mean().loc[(systype, diff[0]), (slice(None), slice(None))]
            +df_2pt.groupby(axis=0, level=(0, 1)).mean().loc[(systype, diff[1]), (slice(None), slice(None))]
        )
        df_2pt_diff_std.loc[(systype, diff), (slice(None), slice(None))] = (
            +df_2pt.groupby(axis=0, level=(0, 1)).std().loc[(systype, diff[0]), (slice(None), slice(None))]
            +df_2pt.groupby(axis=0, level=(0, 1)).std().loc[(systype, diff[1]), (slice(None), slice(None))]
        )

In [None]:
df_2pt_diff * 1000

In [None]:
df_2pt_diff_std * 1000

### compare entropies

In [None]:
(
    100*df_2pt_diff.loc[(slice(None), slice(None)), (0, slice('S', 'S_trn'))][0]
    +df_2pt_diff.loc[(slice(None), slice(None)), (1, slice('S', 'S_trn'))][1]
    +df_2pt_diff.loc[(slice(None), slice(None)), (2, slice('S', 'S_trn'))][2]
)* 1000

In [None]:
(
    100*df_2pt_diff_std.loc[(slice(None), slice(None)), (0, slice('S', 'S_trn'))][0]
    +df_2pt_diff_std.loc[(slice(None), slice(None)), (1, slice('S', 'S_trn'))][1]
    +df_2pt_diff_std.loc[(slice(None), slice(None)), (2, slice('S', 'S_trn'))][2]
)* 1000

In [None]:
stn = 'water-kcl'
diff = ('netz-co0.9tc', 'iff-altern5-netz-co0.9-nopc')
(
    +df_2pt_diff.at[(stn, diff), (0, 'S')] * 100
    +df_2pt_diff.at[(stn, diff), (1, 'S')]
    +df_2pt_diff.at[(stn, diff), (2, 'S')]
) * 1000

### plot entropies

In [None]:
# from horinek 2009 who got it from marcus 1997
exp_solv_entropies = {
    'water-licl': (-0.217, 0.004),
    'water-nacl': (-0.186, 0.004),
    'water-kcl': (-0.149, 0.004),
    # directly from marcus Ca + 2 x Cl
    'water-cacl2_': (-0.252 + 2 * -0.075, 0.004),
}
# from horinek 2009
lit_solv_entropies = {
    ('water-licl', 'netz-co0.9tc'): (-0.268, 0.012),
    ('water-licl', 'netz-co0.9'): (-0.268, 0.012),  # tc assumed to have small influence
    ('water-nacl', 'netz-co0.9tc'): (-0.217, 0.012),
    ('water-nacl', 'netz-co0.9'): (-0.217, 0.012),  # tc assumed to have small influence
    ('water-kcl', 'netz-co0.9tc'): (-0.182, 0.012),
    ('water-kcl', 'netz-co0.9'): (-0.182, 0.012),  # tc assumed to have small influence
}

In [None]:
def plot_entropies(show_2pt=True, show_ti=True, show_ecc=True, include_netz_error=True, base_on_netz_lit_value=False):
    
    mpl_rc_local = {
        'hatch.linewidth': 1.2,
        'legend.labelspacing': 0.1,
    }
    
    terms_to_show = [
        {'type': 'experimental'},
        {'type': 'literature', 'ff': 'netz-co0.9tc'},
    ]
    if show_2pt:
        terms_to_show.append({'type': 'difference-2pt', 'ff-ref': 'netz-co0.9tc', 'ff': 'iff-altern5-netz-co0.9-nopc'})
    if show_ti:
        terms_to_show.append({'type': 'difference-ti', 'path': (
            {'ff-ref': 'netz-co0.9tc', 'ff': 'iff-altern5-netz-co0.9-nopc', 'ti-name': "netz-to-iff-netz"},
        )})
        if show_ecc:
            terms_to_show.append({'type': 'difference-ti', 'path': (
                {'ff-ref': 'netz-co0.9', 'ff': 'eccr1-co0.9', 'ti-name': "netz-to-eccr1"},
            )})
            terms_to_show.append({'type': 'difference-ti', 'path': (
                {'ff-ref': 'netz-co0.9', 'ff': 'eccr1-co0.9', 'ti-name': "netz-to-eccr1"},
                {'ff-ref': 'eccr1-co1.2', 'ff': 'iff-altern5-eccr1-co1.2-nopc', 'ti-name': "eccr1-to-iff-eccr1"},
            )})
    #'netz-to-eccr1': {
    #'eccr1-to-iff-eccr1': {
    
    with plt.rc_context({**mpl_rc_global, **mpl_rc_local}):

        system_types_to_show = {stn: st for stn, st in system_types_2pt.items() if stn not in ('water-pure', 'water-cacl2_')}
        #system_types_to_show = {stn: st for stn, st in system_types_2pt.items() if stn not in ('water-pure',)}
        fig, ax = plt.subplots(figsize=(3, 3), constrained_layout=True, dpi=200)
        x_base = np.arange(len(system_types_to_show))
        for s, (stn, st) in enumerate(system_types_to_show.items()):
            print(stn)
            for t, term_to_show in enumerate(terms_to_show):
                # plot experimental
                width = 0.9 / len(terms_to_show) - 0.02
                x = x_base[s] + np.linspace(-0.45+width/2, 0.45-width/2, num=len(terms_to_show))[t]
                if term_to_show['type'] == 'experimental':
                    label = 'exp.' if s == 0 else None
                    ax.bar(x=x, height=exp_solv_entropies[stn][0] * 1000, yerr=exp_solv_entropies[stn][1] * 1000, width=width, color='#444444', label=label)
                # plot literature
                elif term_to_show['type'] == 'literature':
                    label = ff_short_names[term_to_show['ff']] if s == 0 else None
                    ax.bar(x=x,
                           height=lit_solv_entropies[(stn, term_to_show['ff'])][0] * 1000,
                           yerr=lit_solv_entropies[(stn, term_to_show['ff'])][1] * 1000,
                           width=width, color=ff_colors[term_to_show['ff']], label=label)
                # plot 2pt difference
                elif term_to_show['type'] == 'difference-2pt':
                    label = ff_short_names[term_to_show['ff']] + ' 2PT' if s == 0 else None
                    diff = (term_to_show['ff-ref'], term_to_show['ff'])
                    y = lit_solv_entropies[(stn, term_to_show['ff-ref'])][0] + (
                        +df_2pt_diff.at[(stn, diff), (0, 'S')] * 100
                        +df_2pt_diff.at[(stn, diff), (1, 'S')]
                        +df_2pt_diff.at[(stn, diff), (2, 'S')]
                    )
                    y_err = int(include_netz_error) * lit_solv_entropies[(stn, term_to_show['ff-ref'])][1] + (
                        +df_2pt_diff_std.at[(stn, diff), (0, 'S')] * 100
                        +df_2pt_diff_std.at[(stn, diff), (1, 'S')]
                        +df_2pt_diff_std.at[(stn, diff), (2, 'S')]
                    )
                    if show_2pt:
                        ax.bar(x, y * 1000, yerr=y_err * 1000, width=width, color=ff_colors[term_to_show['ff']], label=label,
                               hatch='///', alpha=0.999)

                # plot ti difference
                elif term_to_show['type'] == 'difference-ti':
                    # netz iff
                    label = ff_short_names[term_to_show['path'][-1]['ff']] + ' TI' if s == 0 else None
                    assert st['n_cation_anion'] == (1, 1)
                    if base_on_netz_lit_value:
                        y = lit_solv_entropies[(stn, term_to_show['path'][0]['ff-ref'])][0]
                        y_err = int(include_netz_error) * lit_solv_entropies[(stn, term_to_show['path'][0]['ff-ref'])][1]
                    else:
                        step = term_to_show['path'][0]  # Right?!
                        y = df_ti2.at[(stn[:-2], step['ti-name'], '2-vdW-charge'), 'ΔU-mean']
                        y_err = 0
                    for step in term_to_show['path']:
                        y += (
                           # cation 
                            +df_ti2.at[(stn[:-2], step['ti-name'], '1-linear-direct'), 'ΔU-mean']
                            -df_ti2.at[(stn[:-2], step['ti-name'], '1-linear-direct'), 'ΔG-mean']
                            +df_ti2.at[(stn[:-2], step['ti-name'], '1-linear-direct'), 'pΔV-mean']
                        ) / 300 + (
                           # anion 
                            +df_ti2.at[('water-cl', step['ti-name'], '1-linear-direct'), 'ΔU-mean']
                            -df_ti2.at[('water-cl', step['ti-name'], '1-linear-direct'), 'ΔG-mean']
                            +df_ti2.at[('water-cl', step['ti-name'], '1-linear-direct'), 'pΔV-mean']
                        ) / 300
                        y_err += (
                       # cation 
                        +df_ti2.at[(stn[:-2], step['ti-name'], '1-linear-direct'), 'ΔU-std']
                        +df_ti2.at[(stn[:-2], step['ti-name'], '1-linear-direct'), 'ΔG-std']
                        +df_ti2.at[(stn[:-2], step['ti-name'], '1-linear-direct'), 'pΔV-std']
                        ) / 300 + (
                           # anion 
                            +df_ti2.at[('water-cl', step['ti-name'], '1-linear-direct'), 'ΔU-std']
                            +df_ti2.at[('water-cl', step['ti-name'], '1-linear-direct'), 'ΔG-std']
                            +df_ti2.at[('water-cl', step['ti-name'], '1-linear-direct'), 'pΔV-std']
                        ) / 300
                    ax.bar(x, y * 1000, yerr=y_err * 1000, width=width, color=ff_colors[term_to_show['path'][-1]['ff']], label=label, hatch='')
                else:
                    print(".. unknown term ..", term_to_show)

        ax.set_xlim(x_base[0] - 0.6, x_base[-1] + 0.6)
        ax.set_xticks(x_base)
        ax.set_xticklabels((sys_type_short_names[stn] for stn in system_types_to_show.keys()))
        #ax.set_ylabel(r'$\Delta S_\mathrm{cat.} + \Delta S_\mathrm{Cl¯}$ in kJ/mol')
        ax.set_ylabel(r'$\Delta S_\mathrm{solv}$ in J/mol/K')
        ax.xaxis.set_ticks_position('top')
        ax.xaxis.set_ticks_position('none') 
        ax.legend(loc='upper center', frameon=True)
        #fig.savefig('../figures/solvation-entropies.pdf')
        plt.show()

plot_entropies(True, True, show_ecc=False, include_netz_error=True, base_on_netz_lit_value=True)
#plot_entropies(True, True, True, include_netz_error=True)

In [None]:
#!cp -a ../figures/solvation-entropies.pdf ~/research/output/ion-shortrange-paper/figures/

### toc plot entropies

In [None]:
def plot_entropies_toc():
    
    mpl_rc_local = {
        'hatch.linewidth': 1.2,
        'legend.labelspacing': 0.1,
    }
    terms_to_show = []
    terms_to_show.append({'type': 'difference-ti', 'path': (
        {'ff-ref': 'netz-co0.9tc', 'ff': 'iff-altern5-netz-co0.9-nopc', 'ti-name': "netz-to-iff-netz"},
    )})
    terms_to_show.append({'type': 'literature', 'ff': 'netz-co0.9tc'})
    terms_to_show.append({'type': 'experimental'})
    
    with plt.rc_context({**mpl_rc_global, **mpl_rc_local}):

        system_types_to_show = {stn: st for stn, st in system_types_2pt.items() if stn in ('water-licl',)}
        fig, ax = plt.subplots(figsize=(1.0, 1.8), constrained_layout=True, dpi=200)
        x_base = np.arange(len(system_types_to_show))
        for s, (stn, st) in enumerate(system_types_to_show.items()):
            for t, term_to_show in enumerate(terms_to_show):
                # plot experimental
                width = 0.75 / len(terms_to_show) - 0.02
                x = x_base[s] + np.linspace(-0.45+width/2, 0.45-width/2, num=len(terms_to_show))[t]
                if term_to_show['type'] == 'experimental':
                    label = 'exp.' if s == 0 else None
                    #ax.barh(y=x, width=exp_solv_entropies[stn][0] * 1000, xerr=exp_solv_entropies[stn][1] * 1000, height=width, color='#444444', label=label)
                    ax.axvline(x=exp_solv_entropies[stn][0] * 1000, color='k', linestyle=':', linewidth=1.0)
                # plot literature
                elif term_to_show['type'] == 'literature':
                    label = ff_short_names[term_to_show['ff']] if s == 0 else None
                    ax.barh(y=x,
                           width=lit_solv_entropies[(stn, term_to_show['ff'])][0] * 1000,
                           xerr=lit_solv_entropies[(stn, term_to_show['ff'])][1] * 1000,
                           height=width, color=ff_colors[term_to_show['ff']], label=label)
                # plot 2pt difference
                elif term_to_show['type'] == 'difference-2pt':
                    label = ff_short_names[term_to_show['ff']] + ' 2PT' if s == 0 else None
                    diff = (term_to_show['ff-ref'], term_to_show['ff'])
                    y = lit_solv_entropies[(stn, term_to_show['ff-ref'])][0] + (
                        +df_2pt_diff.at[(stn, diff), (0, 'S')] * 100
                        +df_2pt_diff.at[(stn, diff), (1, 'S')]
                        +df_2pt_diff.at[(stn, diff), (2, 'S')]
                    )
                    y_err = int(include_netz_error) * lit_solv_entropies[(stn, term_to_show['ff-ref'])][1] + (
                        +df_2pt_diff_std.at[(stn, diff), (0, 'S')] * 100
                        +df_2pt_diff_std.at[(stn, diff), (1, 'S')]
                        +df_2pt_diff_std.at[(stn, diff), (2, 'S')]
                    )
                    if show_2pt:
                        ax.barh(y=x, width=y * 1000, xerr=y_err * 1000, height=width, color=ff_colors[term_to_show['ff']], label=label,
                               hatch='///', alpha=0.999)

                # plot ti difference
                elif term_to_show['type'] == 'difference-ti':
                    # netz iff
                    label = ff_short_names[term_to_show['path'][-1]['ff']] + ' TI' if s == 0 else None
                    assert st['n_cation_anion'] == (1, 1)
                    y = lit_solv_entropies[(stn, term_to_show['path'][0]['ff-ref'])][0]
                    y_err = lit_solv_entropies[(stn, term_to_show['path'][0]['ff-ref'])][1]
                    for step in term_to_show['path']:
                        y += (
                           # cation 
                            +df_ti2.at[(stn[:-2], step['ti-name'], '1-linear-direct'), 'ΔU-mean']
                            -df_ti2.at[(stn[:-2], step['ti-name'], '1-linear-direct'), 'ΔG-mean']
                            +df_ti2.at[(stn[:-2], step['ti-name'], '1-linear-direct'), 'pΔV-mean']
                        ) / 300 + (
                           # anion 
                            +df_ti2.at[('water-cl', step['ti-name'], '1-linear-direct'), 'ΔU-mean']
                            -df_ti2.at[('water-cl', step['ti-name'], '1-linear-direct'), 'ΔG-mean']
                            +df_ti2.at[('water-cl', step['ti-name'], '1-linear-direct'), 'pΔV-mean']
                        ) / 300
                        y_err += (
                       # cation 
                        +df_ti2.at[(stn[:-2], step['ti-name'], '1-linear-direct'), 'ΔU-std']
                        +df_ti2.at[(stn[:-2], step['ti-name'], '1-linear-direct'), 'ΔG-std']
                        +df_ti2.at[(stn[:-2], step['ti-name'], '1-linear-direct'), 'pΔV-std']
                        ) / 300 + (
                           # anion 
                            +df_ti2.at[('water-cl', step['ti-name'], '1-linear-direct'), 'ΔU-std']
                            +df_ti2.at[('water-cl', step['ti-name'], '1-linear-direct'), 'ΔG-std']
                            +df_ti2.at[('water-cl', step['ti-name'], '1-linear-direct'), 'pΔV-std']
                        ) / 300
                    ax.barh(y=x, width=y * 1000, xerr=y_err * 1000, height=width, color=ff_colors[term_to_show['path'][-1]['ff']], label=label, hatch='')
                else:
                    print(".. unknown term ..", term_to_show)
                    
        # arrow heads
        ax.plot(0, 0, "<k", transform=ax.transAxes, clip_on=False, markersize=1.0)

        ax.set_yticks([])
        ax.spines['top'].set_visible(False)
        ax.spines['left'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.set_xlabel(r'$\Delta S_\mathrm{solv}$ in kJ/mol')
        ax.xaxis.set_ticks_position('bottom')
        ax.yaxis.set_ticks_position('none') 
        #ax.legend(loc='upper center', frameon=True)
        fig.savefig('../figures/toc-solvation-entropies.svg', dpi=300)
        plt.show()

plot_entropies_toc()

In [None]:
!cp -a ../figures/toc-solvation-entropies.svg ~/research/output/ion-shortrange-paper/figures/

# TI

## define TI setups

In [None]:
def gen_systemtypes_ti():  

    # redundancy, but that's ok
    ca_atoms = tuple((atomtypes[atom] for atom in ['CA', 'M']))
    k_atoms = tuple((atomtypes[atom] for atom in ['K', 'M']))
    li_atoms = tuple((atomtypes[atom] for atom in ['LI', 'M']))
    na_atoms = tuple((atomtypes[atom] for atom in ['NA', 'M']))
    cl_atoms = tuple((atomtypes[atom] for atom in ['CL', 'M']))
    # M might get removed later, if not needed!

    system_types = {
        'water-li': {'ions': (li_atoms, cl_atoms), 'n_cation_anion': (1, 0)},
        'water-na': {'ions': (na_atoms, cl_atoms), 'n_cation_anion': (1, 0)},
        'water-k': {'ions': (k_atoms, cl_atoms), 'n_cation_anion': (1, 0)},
        'water-ca': {'ions': (ca_atoms, cl_atoms), 'n_cation_anion': (1, 0)},
        'water-cl': {'ions': (ca_atoms, cl_atoms), 'n_cation_anion': (0, 1)},
    }
    system_types = {st_name: {'name': st_name, **st} for st_name, st in system_types.items()}
    return system_types

system_types_ti = gen_systemtypes_ti()
pd.DataFrame(system_types_ti).transpose()

In [None]:
system_params_ti = dict(
    n_water = 506,
    n_salts = (1,),
    n_salt_all_ff = (1,),
    n_salt_all_analyis = (1,),
    tags_all = [],
    tags_all_analysis = [],
)
system_params_ti

In [None]:
ti_extra_ff = {
    'LJ-12-6-iff-altern5-netz-co0.9-nopc-param': {'tags': ['fit', 'dummy'], 'cut-off': 0.9,  # no tail-corr
                                                  'parametric-ff': 'fit-iff-netz', 'cations': ('CA', 'K', 'LI', 'NA'),
                                                 },
}
ti_extra_ff = {ff_name: {'name': ff_name, **ff} for ff_name, ff in ti_extra_ff.items()}
force_field_pairs_ti = {
    'dummynetz-to-netz': {
        'ff-pair': [{ffn: ff for ffn, ff in force_fields.items() if ffn == ffn_} for ffn_ in ('netz-co0.9tc-dummyions', 'netz-co0.9tc')],
        'changing-atoms': set('LI NA K CA CL'.split()),
        'ti-setting-types': ['charge'],
        'tags': ['A-has-dummy'],
        # other pair parameters
    },
    'netz-to-iff-netz': {
        'ff-pair': [{ffn: ff for ffn, ff in force_fields.items() if ffn == ffn_} for ffn_ in ('netz-co0.9', 'iff-altern5-netz-co0.9-nopc')],
        'changing-atoms': set('LI NA K CA CL'.split()),
        'ti-setting-types': ['no-charge'],
        'tags': [],
        # other pair parameters
    },
    'dummyeccr1-to-eccr1': {
        'ff-pair': [{ffn: ff for ffn, ff in force_fields.items() if ffn == ffn_} for ffn_ in ('eccr1-co1.2-dummyions', 'eccr1-co1.2')],
        'changing-atoms': set('LI NA K CA CL'.split()),
        'ti-setting-types': ['charge'],
        'tags': ['A-has-dummy'],
        # other pair parameters
    },
    'eccr1-to-iff-eccr1': {
        'ff-pair': [{ffn: ff for ffn, ff in force_fields.items() if ffn == ffn_} for ffn_ in ('eccr1-co1.2', 'iff-altern5-eccr1-co1.2-nopc')],
        'changing-atoms': set('LI NA K CA CL'.split()),
        'ti-setting-types': ['no-charge'],
        'tags': [],
        # other pair parameters
    },
    'dummyopls-to-opls': {
        'ff-pair': [{ffn: ff for ffn, ff in force_fields.items() if ffn == ffn_} for ffn_ in ('opls-co0.9tc-dummyions', 'opls-co0.9tc')],
        'changing-atoms': set('LI NA K CA CL'.split()),
        'ti-setting-types': ['charge'],
        'tags': ['A-has-dummy'],
        # other pair parameters
    },
    'dummymadrid-to-madrid': {
        'ff-pair': [{ffn: ff for ffn, ff in force_fields.items() if ffn == ffn_} for ffn_ in ('madrid-co1.0tc-dummyions', 'madrid-co1.0tc')],
        'changing-atoms': set('LI NA K CA CL'.split()),
        'ti-setting-types': ['charge'],
        'tags': ['A-has-dummy'],
        # other pair parameters
    },
    #'netz-to-netz': {  # worked out, resulted in zeros
        #'ff-pair': [{ffn: ff for ffn, ff in force_fields.items() if ffn == ffn_} for ffn_ in ('netz-co0.9', 'netz-co0.9')],
        #'changing-atoms': set('LI NA K CA CL'.split()),
    #},
    #'netz-to-netz-tab': {  # worked out, resulted in zeros
        #'ff-pair': [{ffn: ff for ffn, ff in force_fields.items() if ffn == ffn_} for ffn_ in ('netz-co0.9', 'netz-co0.9-tab')],
        #'changing-atoms': set('LI NA K CA CL'.split()),
    #},
    #'netz-to-opls': {}  # not meaningfull since differnet combination rules
    #'netz-to-lj-iff-netz': {
        #'ff-pair': [{ffn: ff for ffn, ff in {**force_fields, **ti_extra_ff}.items() if ffn == ffn_} for ffn_ in ('netz-co0.9', 'LJ-12-6-iff-altern5-netz-co0.9-nopc-param')],
        #'changing-atoms': set('LI NA K CA CL'.split()),
        # other pair parameters
    #},
    #'lj-iff-netz-to-iff-netz': {
        #'ff-pair': [{ffn: ff for ffn, ff in {**force_fields, **ti_extra_ff}.items() if ffn == ffn_} for ffn_ in ('LJ-12-6-iff-altern5-netz-co0.9-nopc-param',
                                                                                                                #'iff-altern5-netz-co0.9-nopc')],
        #'changing-atoms': set('LI NA K CA CL'.split()),
        # other pair parameters
    #},
    #'netz-to-eccr1': {
        #'ff-pair': [{ffn: ff for ffn, ff in force_fields.items() if ffn == ffn_} for ffn_ in ('netz-co0.9', 'eccr1-co0.9')],
        #'changing-atoms': set('LI NA K CA CL'.split()),
        #'ti-setting-types': ['no-charge'],
        # other pair parameters
    #},
}
force_field_pairs_ti = {ffp_name: {'name': ffp_name, **ffp} for ffp_name, ffp in force_field_pairs_ti.items()}

In [None]:
def sys_pair_gen_ti(system_types_ti, force_field_pairs_ti, verbose=False):
    
    #parametric_force_fields = {**PARAMETRIC_FORCE_FIELDS, **parametric_fit_force_fields}
    parametric_force_fields = PARAMETRIC_FORCE_FIELDS
    
    for st_name, st in system_types_ti.items():
        if verbose: print(st_name)
        for ffp_name, ffp in force_field_pairs_ti.items():
            if verbose: print(ffp_name)
            ffA = ffp['ff-pair'][0]
            ffB = ffp['ff-pair'][1]
            st_copy = deepcopy(st)
            # remove M virtual sites if not needed
            if not 'halftabulated' in next(iter(ffB.values()))['tags']:
                st_copy['ions'] = tuple(ion[0:1] for ion in st_copy['ions'])
            systemA = next(system_generator({st_name: st_copy}, ffA, system_params=system_params_ti, parametric_force_fields=parametric_force_fields))
            systemB = next(system_generator({st_name: st_copy}, ffB, system_params=system_params_ti, parametric_force_fields=parametric_force_fields))
            yield ffp, systemA, systemB

pd.DataFrame(sys_pair_gen_ti(system_types_ti, force_field_pairs_ti, verbose=True))

In [None]:
ti_settings = {
    '1-linear-direct': {
        'type': 'no-charge',
        'lambdas': np.linspace(0, 1, num=11),
        'sc-alpha': 0,
        'n-samples': 5,
        'nstdhdl': 100,
        'nsteps-prod': int(2e5),
        'nsteps-prod-long': int(2e8),
    },
    '2-vdW-charge': {
        'type': 'charge',
        'lambdas': np.linspace(0, 1, num=21),
        'vdw-lambdas': np.concatenate((np.linspace(0, 1, num=11), np.ones(10))),
        'coul-lambdas': np.concatenate((np.zeros(10), np.linspace(0, 1, num=11))),
        'sc-alpha': 0.5,
        'n-samples': 5,
        'nstdhdl': 100,
        'nsteps-prod': int(2e5),
        'nsteps-prod-long': int(4e8),
    },
}
ti_settings

### test iterators

In [None]:
# single loop for st and ffp
for ffp, systemA, systemB in sys_pair_gen_ti(system_types_ti, force_field_pairs_ti):
    #print(systemA, systemB)
    sys_comp_name = systemA['name'].split('/')[0]
    ffp_name = ffp['name']
    for tis_name, tis in {k: v for k, v in ti_settings.items() if v['type'] in ffp['ti-setting-types']}.items():
        working_dir = f"ti/{sys_comp_name}/{ffp_name}/{tis_name}"
        print(f"working_dir: {working_dir}")
        for l, lambda_ in enumerate(tis['lambdas']):
            print(f"{lambda_:.2f}", end=' ')
        print()

In [None]:
# separate loops if needed
for st_name, st in system_types_ti.items():
    print(f"system-type {st_name}")
    for ffp_name, ffp in force_field_pairs_ti.items():
        print(f"  force-field-pair {ffp_name}")
        assert len(tuple(sys_pair_gen_ti({st_name: st}, {ffp_name: ffp}))) == 1
        ffp, systemA, systemB = next(sys_pair_gen_ti({st_name: st}, {ffp_name: ffp}))
        sys_comp_name = systemA['name'].split('/')[0]
        for tis_name, tis in {k: v for k, v in ti_settings.items() if v['type'] in ffp['ti-setting-types']}.items():
            print(f"    ti-setting {tis_name}")
            working_dir = f"ti/{sys_comp_name}/{ffp_name}/{tis_name}"
            print(f"    working_dir: {working_dir}")
            for l, lambda_ in enumerate(tis['lambdas']):
                print(f"{lambda_:.2f}", end=' ')
            print()

## preparation

### prepare halftabulated which are not inverse results

In [None]:
def prepare_ht():
    show_potentials = True
    if show_potentials: fig, ax = plt.subplots(figsize=(4, 2))
    r = np.linspace(0, 3, 1501)
    print(r[1] - r[0])
    for ffp_name, ffp in force_field_pairs_ti.items():
        print(f"  force-field-pair {ffp_name}")
        for ff in ffp['ff-pair']:
            (ff_name, ff), = ff.items()
            #print(ff['tags'])
            if 'halftabulated' in ff['tags'] and not 'inverse-result' in ff['tags'] and not 'fit' in ff['tags']:
                print(f"    {ff_name}")
                pff = PARAMETRIC_FORCE_FIELDS[ff['parametric-ff']]
                for interaction in ff['tabulated-potentials']:
                    print(interaction)
                    at1 = next((at for at in pff['atomtypes'] if at['type'] == interaction[0]))
                    at2 = next((at for at in pff['atomtypes'] if at['type'] == interaction[1]))
                    pot, force = gen_potential_and_force(at1, at2, r, pff['combining-rule'], add_coulomb=False,
                                                         nonbond_params=pff['nonbond-params'])
                    zeros = np.zeros_like(r)
                    table_name = f"table_{at1['type']}_{at2['type']}.xvg"
                    table1 = f"template/table/{ff_name}/{table_name}"
                    np.savetxt(table1, np.stack((r, zeros, zeros, zeros, zeros, pot, force)).T)
                    
                    if show_potentials: ax.plot(r, pot, label=str(interaction))
    if show_potentials:
        ax.set_xlim(0.15, 1)
        ax.set_ylim(-1, 1)
        ax.legend(loc='upper right')
        plt.show()

prepare_ht()

### functions

In [None]:
def save_ti_force_field_pair_as_top(filename, ffp, systemA, systemB, system_name):  #, remove_dummy_atoms=False):
    """Will write topol.top for TI.
    
    Can only do ions from one ff to another.
    
    Not checking all stuff. Taking a lot from systemA.
    One could write a lot of assert statements.
    """
    ffA = systemA['force-field']
    ffB = systemB['force-field']
    moltypesA = systemA['moltypes']
    moltypesB = systemB['moltypes']
    parametric_ffA = systemA['parametric-ff']
    parametric_ffB = systemB['parametric-ff']
    mtA_atomnames = set((at['name'] for mt in moltypesA for at in mt['atoms']))
    mtB_atomnames = set((at['name'] for mt in moltypesB for at in mt['atoms']))
    assert mtA_atomnames == mtB_atomnames
    
    file_content = ""
    file_content += "[ defaults ]\n"
    file_content += ";nbfunc  comb-rule  gen-pairs  fudgeLJ  fudgeQQ\n"
    nbfunc_dict = {'LJ': '1'}
    comb_rule_dict = {'lorentz-berthelot': '2', 'geometric': '3'}
    assert parametric_ffA['nbfunc'] == parametric_ffB['nbfunc']
    nbfunc = nbfunc_dict[parametric_ffA['nbfunc']]
    assert parametric_ffA['combining-rule'] == parametric_ffB['combining-rule']
    comb_rule = comb_rule_dict[parametric_ffA['combining-rule']]
    assert parametric_ffA['gen-pairs'] == parametric_ffB['gen-pairs']
    gen_pairs = parametric_ffA['gen-pairs']
    assert parametric_ffA['fudgeLJ'] == parametric_ffB['fudgeLJ']
    fudgeLJ = parametric_ffA['fudgeLJ']
    assert parametric_ffA['fudgeQQ'] == parametric_ffB['fudgeQQ']
    fudgeQQ = parametric_ffA['fudgeQQ']
    file_content += f"{nbfunc}        {comb_rule}          {gen_pairs}         {fudgeLJ}      {fudgeQQ}\n"
    file_content += "\n"
    
    # atomtypes
    mass_dict = {
        'OW': 15.9994,
        'HW': 1.008,
        'MW': 0.0,
        'CA': 40.08,
        'K': 39.0983,
        'LI': 6.941,
        'NA': 22.98977,
        'CL': 35.45300,
    }
    ptype_dict = {
        'OW': 'A',
        'HW': 'A',
        'MW': 'V',
        'CA': 'A',
        'K':  'A',
        'LI': 'A',
        'NA': 'A',
        'CL': 'A',
    }
    file_content += "[ atomtypes ]\n"
    file_content += ";name  mass      charge   ptype  sigma              epsilon\n"
    for at in parametric_ffA['atomtypes']:
        if at['type'] in ffp['changing-atoms'] and at['type'] in mtA_atomnames:
            if 'halftabulated' in ffB['tags']:
                file_content += f"{at['type']+'_A':5s}  {mass_dict[at['type']]:8.5f}  {at['q']:7.4f}  {ptype_dict[at['type']]:4s}  {at['σ']:18.16f}  {at['ε']:18.16f}\n"
                file_content += f"{at['type']+'_B':5s}  {mass_dict[at['type']]:8.5f}  {at['q']:7.4f}  {ptype_dict[at['type']]:4s}  {at['σ']:18.16f}  {0.0:18.16f}\n"
                file_content += f"{'M_A':5s}  {0.0:8.5f}  {0.0:7.4f}  {'V':4s}  {at['σ']:18.16f}  {0.0:18.16f}\n"
                file_content += f"{'M_B':5s}  {0.0:8.5f}  {0.0:7.4f}  {'V':4s}  {at['σ']:18.16f}  {at['ε']:18.16f}\n"
            else:
                file_content += f"{at['type']+'_A':5s}  {mass_dict[at['type']]:8.5f}  {at['q']:7.4f}  {ptype_dict[at['type']]:4s}  {at['σ']:18.16f}  {at['ε']:18.16f}\n"
                atB = next((atB for atB in parametric_ffB['atomtypes'] if atB['type'] == at['type']))
                file_content += f"{atB['type']+'_B':5s}  {mass_dict[atB['type']]:8.5f}  {atB['q']:7.4f}  {ptype_dict[atB['type']]:4s}  {atB['σ']:18.16f}  {atB['ε']:18.16f}\n"
        elif at['type'] in mtA_atomnames or at['type'] in {'HW',}:
            atB = next((atB for atB in parametric_ffB['atomtypes'] if atB['type'] == at['type']))
            assert atB['σ'] == at['σ']
            assert atB['ε'] == at['ε']
            file_content += f"{at['type']:5s}  {mass_dict[at['type']]:8.5f}  {at['q']:7.4f}  {ptype_dict[at['type']]:4s}  {at['σ']:18.16f}  {at['ε']:18.16f}\n"
        else:
            pass  # atom not needed
    file_content += "\n"
    
    # nonbond_params
    #assert parametric_ffA['nonbond-params'] == {}
    
    file_content += "[ nonbond_params ]\n"
    file_content += ";i    j     func  sigma                 epsilon\n"
    nb_dict2A = {frozenset(pair): ('LJ', 1, 0.25) for pair in ffA.get('tabulated-potentials', [])}
    nb_dict2B = {frozenset(pair): ('LJ', 1, 0.25) for pair in ffB.get('tabulated-potentials', [])}
    nb_dict2A = {**nb_dict2A, **parametric_ffA['nonbond-params']}
    nb_dict2B = {**nb_dict2B, **parametric_ffB['nonbond-params']}
    for AB, nb_dict2 in enumerate((nb_dict2A, nb_dict2B)):
        for nb_set, nb in nb_dict2.items():
            at1, at2 = sorted(list(nb_set))
            if at1 in mtA_atomnames and at2 in mtA_atomnames:
                if nb[0] == 'LJ':
                    if AB == 0:
                        assert at1 in ffp['changing-atoms']
                        file_content += f"{at1+'_A':4s}  {at2:4s}  1     {nb[1]:20.18f}  {nb[2]:20.18f}\n"
                        file_content += f"{at1+'_B':4s}  {at2:4s}  1     {nb[1]:20.18f}  {0.0:20.18f}\n"
                    elif AB == 1:
                        if 'halftabulated' in ffB['tags']:
                            assert at1 in ffp['changing-atoms']
                            file_content += f"{'M_A':4s}  {at2:4s}  1     {nb[1]:20.18f}  {0.0:20.18f}\n"
                            file_content += f"{'M_B':4s}  {at2:4s}  1     {nb[1]:20.18f}  {nb[2]:20.18f}\n"
                else:
                    raise Exception('not implemented')
    file_content += "\n"
    
    # moleculetypes
    for mt_name, mt in ((mt['name'], mt) for mt in moltypesA):
        if mt_name == 'SOL':
            assert parametric_ffA['water-model'] == parametric_ffB['water-model']
            itp_file = os.path.join(template_dir, 'itp', parametric_ffA['water-model'] + '.itp')
            with open(itp_file, 'r') as f:
                file_content += f.read()
        else:
            file_content += "[ moleculetype ]\n"
            file_content += ";molname  nrexcl\n"
            file_content += f"{mt_name:8s}  1\n"
            file_content += "\n"
            file_content += "[ atoms ]\n"
            file_content +=  ";id  at_type  res_nr  residu_name  at_name  cg_nr  chargeA  massA    at_typeB  chargeB  massB\n"
            if len(mt['atoms']) == 2:
                assert 'halftabulated' in ffB['tags']
                for a, at in enumerate(mt['atoms']):
                    if at['name'] == 'M':
                        qA = qB = 0.0
                    else:
                        qA = next((atom['q'] for atom in parametric_ffA['atomtypes'] if atom['type'] == at['name']))
                        qB = next((atom['q'] for atom in parametric_ffB['atomtypes'] if atom['type'] == at['name']))
                    file_content += (f"{a+1}    {at['name']+'_A':7s}  1       {mt_name:7s}      {at['name']:7s}  {a}      "
                                    f"{qA:.2f}     {at['mass']:.5f}  {at['name']+'_B':7s}   {qB:.2f}     {at['mass']:.5f}\n")
                file_content += "\n"
                file_content += "[ virtual_sitesn ]\n"
                file_content +=  ";Site  funct  from\n"
                file_content +=  "2      2      1\n"
                file_content += "\n"
                file_content += "[ exclusions ]\n"
                file_content +=  ";excl  atom1  atom2\n"
                file_content +=  "1      2      1\n"
            elif len(mt['atoms']) == 1:
                assert not 'halftabulated' in ffB['tags']
                # do not use dummy atom
                for a, at in enumerate(mt['atoms'][0:1]):
                    qA = next((atom['q'] for atom in parametric_ffA['atomtypes'] if atom['type'] == at['name']))
                    qB = next((atom['q'] for atom in parametric_ffB['atomtypes'] if atom['type'] == at['name']))
                    file_content += (f"{a+1}    {at['name']+'_A':7s}  1       {mt_name:7s}      {at['name']:7s}  {a}      "
                                    f"{qA:.2f}     {at['mass']:.5f}  {at['name']+'_B':7s}   {qB:.2f}     {at['mass']:.5f}\n")
            else:
                raise Exception('not implemented for non-single atom moltypes')
        file_content += "\n"
    
    file_content += "[ system ]\n"
    file_content += f"{system_name}\n"
    file_content += "\n"
    
    file_content += "[ molecules ]\n"
    for mt in moltypesA:
        #if remove_dummy_atoms:
            #if all((at['ε'] == 0.0) and (at['q'] == 0.0) for at in parametric_ffA['atomtyes']):
                #print(
                #continue
        file_content += f"{mt['name']}  {mt['nmols']}\n"
    
    with open(filename, 'w') as f:
        f.write(file_content)

### prepare files

In [None]:
def prepare_ti():
    for ffp, systemA, systemB in sys_pair_gen_ti(system_types_ti, force_field_pairs_ti):
        print(ffp['name'])
        sys_comp_name = systemA['name'].split('/')[0]
        ffp_name = ffp['name']
        for tis_name, tis in {k: v for k, v in ti_settings.items() if v['type'] in ffp['ti-setting-types']}.items():
            working_dir = f"ti/{sys_comp_name}/{ffp_name}/{tis_name}"
            print(f"working_dir: {working_dir}")
            with WorkingDir(working_dir):

                # make dirs
                run_bash("rm -rf topol/*")
                run_bash("mkdir -p topol common")
                all_folders = [f"{lambda_:.2f}/{folder}" for lambda_ in tis['lambdas'] for folder in ('equi1', 'equi2', 'equi3', 'prod')]
                all_folders += [f"{lambda_:.2f}-long/{folder}" for lambda_ in (0.0, 1.0) for folder in ('equi1', 'equi2', 'equi3', 'prod')]
                for folder in all_folders:
                    run_bash(f"mkdir -p {folder}")

                # topol.top
                save_ti_force_field_pair_as_top('topol/topol.top', ffp, systemA, systemB, sys_comp_name)

                # single-*.gro
                for mt in systemA['moltypes']:
                    name = mt.get('type', mt['name'])
                    if name in ffp['changing-atoms'] and 'halftabulated' in systemB['force-field']['tags']:
                        run_bash(f"cp {template_dir}/gro/single-{name}-ti.gro common/single-{mt['name']}.gro")
                    else:
                        run_bash(f"cp {template_dir}/gro/single-{name}.gro common/single-{mt['name']}.gro")

                # tables_{}_{}.xvg
                assert 'halftabulated' not in systemA['tags']
                if 'halftabulated' in systemB['tags']:
                    for table in (f"table_{pair[0]}_{pair[1]}.xvg" for pair in itertools.combinations_with_replacement(systemB['atomtypes-no-h'], 2)
                                   if (pair[0], pair[1]) in systemB['force-field']['tabulated-potentials']):
                        table_ti = table
                        for ion in ffp['changing-atoms']:
                            table_ti = table_ti.replace(ion, 'M_A')
                        run_bash(f"cp {template_dir}/table/{systemB['force-field']['name']}/{table} topol/{table_ti}")
                        for folder in all_folders:
                            run_bash(f"rm -f {folder}/{table_ti}")
                        for folder in all_folders:
                            run_bash(f"ln -sf ../../topol/{table_ti} {folder}/{table_ti}")
                if 'tabulated' in systemB['tags']:
                    raise Exception('not implemented')
                

                # table.xvg
                if 'halftabulated' in systemB['tags']:
                    run_bash(f"cp {template_dir}/table/table6-12.xvg topol/table.xvg")
                    for folder in all_folders:
                        run_bash(f"rm -f {folder}/table.xvg")
                        run_bash(f"ln -sf ../../topol/table.xvg {folder}/table.xvg")
                        
                # index.ndx
                if 'halftabulated' in systemB['tags']:
                    moltypes_renamed = []
                    for mt in systemB['moltypes']:
                        mt_new = deepcopy(mt)
                        mt_new['atoms'] = tuple([{'mass': at['mass'],
                                                  'name': at['name'] + ('_A' if at['name'] in ('M',) else '')}
                                                 for at in mt_new['atoms']])
                        moltypes_renamed.append(mt_new)
                    top = gt.top.Topology()
                    top.load_simple_top(moltypes_renamed)
                    gt.top.generate_index_file(top, 'topol/index.ndx')
                    del top
                    

                # grompp.mpd files
                for folder in [f for f in all_folders if 'long' not in f]:  # not endpoints
                    run_type = folder.split('/')[-1]
                    run_bash(f"cp {template_dir}/mdp/ti-{run_type}.mdp {folder}/grompp.mdp")
                for folder in [f for f in all_folders if 'long' in f]:  # endpoints long
                    run_type = folder.split('/')[-1]
                    run_bash(f"cp {template_dir}/mdp/ti-long-{run_type}.mdp {folder}/grompp.mdp")
                    
                
            for l, lambda_ in enumerate(tis['lambdas']):
                with WorkingDir(f"{working_dir}/{lambda_:.2f}"):
                    run_folders = ('equi1', 'equi2', 'equi3', 'prod')
                    
                    # conf.gro link
                    run_bash(f"ln -sf ../../common/conf.gro equi1/conf.gro")
                    
                    # mdp settins
                    gt.mdp.set_parameter("equi1/grompp.mdp", 'nsteps', int(1e4))
                    gt.mdp.set_parameter("equi2/grompp.mdp", 'nsteps', int(1e5))
                    gt.mdp.set_parameter("equi3/grompp.mdp", 'nsteps', int(1e5))
                    gt.mdp.set_parameter("prod/grompp.mdp", 'nsteps',  int(tis['nsteps-prod']))
                    # set temperature
                    gt.mdp.set_parameter("equi2/grompp.mdp", 'gen-temp', systemA['temperature'])
                    for folder in run_folders[1:]:
                        mdp_file = folder + '/grompp.mdp'
                        gt.mdp.set_parameter(mdp_file, 'ref-t', systemA['temperature'])
                        # set pressure
                    gt.mdp.set_parameter("equi3/grompp.mdp", 'ref-p', 1.0)
                    gt.mdp.set_parameter("prod/grompp.mdp", 'ref-p', 1.0)
                    # set cutoff scheme
                    # set cutoffs
                    # set vdwtype
                    cutoff_scheme = 'group' if 'halftabulated' in systemB['tags'] else 'Verlet'
                    co = systemA['force-field']['cut-off']
                    vdwtype = 'User' if 'halftabulated' in systemB['tags'] else 'Cut-off'
                    for folder in run_folders:
                        mdp_file = folder + '/grompp.mdp'
                        gt.mdp.set_parameter(mdp_file, 'cutoff-scheme', cutoff_scheme)
                        for key in ('rlist', 'rcoulomb', 'rvdw'):
                            gt.mdp.set_parameter(mdp_file, key, co)
                        gt.mdp.set_parameter(mdp_file, 'vdwtype', vdwtype)
                        # if cut-off = 1.2 nm, turn of verlet buffer because system is only slightly larger than 2.4 nm
                        if (co == 1.2) and (not 'halftabulated' in systemB['tags']):
                            gt.mdp.set_parameter(mdp_file, 'verlet-buffer-tolerance', -1)
                    assert ('tail-corr' in systemA['tags']) == ('tail-corr' in systemB['tags'])
                    if ('tail-corr' in systemA['tags']) and ('tail-corr' in systemB['tags']):
                        for folder in run_folders:
                            mdp_file = folder + '/grompp.mdp'
                            gt.mdp.set_parameter(mdp_file, 'DispCorr', 'EnerPres')
                    # set energygrps(-table)
                    if 'halftabulated' in systemB['tags']:
                        pairs = tuple((pair for pair in systemB['force-field'].get('tabulated-potentials', [])
                                        if pair[0] in systemB['atomtypes']
                                        and pair[1] in systemB['atomtypes']))
                        # change to virtual site name
                        pairs = tuple((('M_A' if pair[0] in ffp['changing-atoms'] else pair[0],
                                        'M_A' if pair[1] in ffp['changing-atoms'] else pair[1])
                                      for pair in pairs))
                        energygrps =  ' '.join(list(OrderedSet([pair[i] for pair in pairs for i in (0, 1)])))
                        energygrp_table =  '  '.join((f"{pair[0]} {pair[1]}" for pair in pairs))
                        for folder in run_folders:
                            mdp_file = folder + '/grompp.mdp'
                            gt.mdp.set_parameter(mdp_file, 'energygrps', energygrps)
                            gt.mdp.set_parameter(mdp_file, 'energygrp-table', energygrp_table)
                    else:
                        for folder in run_folders:
                            mdp_file = folder + '/grompp.mdp'
                            gt.mdp.set_parameter(mdp_file, 'energygrps', '')
                            gt.mdp.set_parameter(mdp_file, 'energygrp-table', '')
                    # TI stuff
                    for folder in run_folders:
                        mdp_file = folder + '/grompp.mdp'
                        gt.mdp.set_parameter(mdp_file, 'init-lambda-state', str(l))
                        gt.mdp.set_parameter(mdp_file, 'fep-lambdas', ' '.join((f"{la:.2f}" for la in tis['lambdas'])))
                        if 'vdw-lambdas' in tis.keys():
                            gt.mdp.set_parameter(mdp_file, 'vdw-lambdas', ' '.join((f"{la:.2f}" for la in tis['vdw-lambdas'])))
                        if 'coul-lambdas' in tis.keys():
                            gt.mdp.set_parameter(mdp_file, 'coul-lambdas', ' '.join((f"{la:.2f}" for la in tis['coul-lambdas'])))
                        gt.mdp.set_parameter(mdp_file, 'sc-alpha', str(tis['sc-alpha']))
                        gt.mdp.set_parameter(mdp_file, 'nstdhdl', str(tis['nstdhdl']))
                        
            # long runs for 0.0 and 1.0
                        
            # topol-{A,B}.top
            if 'A-has-dummy' in ffp['tags']:  # only needed/implemented for dummy -> ion
                with WorkingDir(working_dir):
                    systemA_moltypes = deepcopy(systemA['moltypes'])
                    systemA_moltypes = [mt for mt in systemA_moltypes if not any(atom for atom in mt['atoms'] if atom['name'] in ffp['changing-atoms'])]
                    if len(systemA_moltypes) != len(systemA['moltypes']):
                        print(".. removed ion in topol-A.top ..")
                    save_parametric_force_field_as_top('topol/topol-A.top', systemA['force-field'], systemA['name'], systemA_moltypes)
                    save_parametric_force_field_as_top('topol/topol-B.top', systemB['force-field'], systemB['name'], systemB['moltypes'])
                
            for l, lambda_ in enumerate((tis['lambdas'][0], tis['lambdas'][-1])):
                with WorkingDir(f"{working_dir}/{lambda_:.2f}-long"):
                    run_folders = ('equi1', 'equi2', 'equi3', 'prod')
                    
                    # conf.gro link
                    if ('A-has-dummy' in ffp['tags']) and (lambda_ == 0.0):
                        run_bash(f"ln -sf ../../common/conf-noion.gro equi1/conf.gro")
                    else:
                        run_bash(f"ln -sf ../../common/conf.gro equi1/conf.gro")
                    
                    # mdp settins
                    gt.mdp.set_parameter("equi1/grompp.mdp", 'nsteps', int(1e4))
                    gt.mdp.set_parameter("equi2/grompp.mdp", 'nsteps', int(1e5))
                    gt.mdp.set_parameter("equi3/grompp.mdp", 'nsteps', int(1e5))
                    n_steps_prod_long = int(tis['nsteps-prod-long'] * (1/5 if ('A-has-dummy' in ffp['tags']) and (lambda_ == 0.0) else 1))
                    gt.mdp.set_parameter("prod/grompp.mdp", 'nsteps',  n_steps_prod_long)
                    # set temperature
                    gt.mdp.set_parameter("equi2/grompp.mdp", 'gen-temp', systemA['temperature'])
                    for folder in run_folders[1:]:
                        mdp_file = folder + '/grompp.mdp'
                        gt.mdp.set_parameter(mdp_file, 'ref-t', systemA['temperature'])
                    # set pressure
                    gt.mdp.set_parameter("equi3/grompp.mdp", 'ref-p', 1.0)
                    gt.mdp.set_parameter("prod/grompp.mdp", 'ref-p', 1.0)
                    # set cutoff scheme
                    # set cutoffs
                    # set vdwtype
                    cutoff_scheme = 'group' if 'halftabulated' in systemB['tags'] else 'Verlet'
                    co = systemA['force-field']['cut-off']
                    vdwtype = 'User' if 'halftabulated' in systemB['tags'] else 'Cut-off'
                    for folder in run_folders:
                        mdp_file = folder + '/grompp.mdp'
                        gt.mdp.set_parameter(mdp_file, 'cutoff-scheme', cutoff_scheme)
                        for key in ('rlist', 'rcoulomb', 'rvdw'):
                            gt.mdp.set_parameter(mdp_file, key, co)
                        gt.mdp.set_parameter(mdp_file, 'vdwtype', vdwtype)
                        if (co == 1.2) and (not 'halftabulated' in systemB['tags']):
                            gt.mdp.set_parameter(mdp_file, 'verlet-buffer-tolerance', -1)
                    assert ('tail-corr' in systemA['tags']) == ('tail-corr' in systemB['tags'])
                    if ('tail-corr' in systemA['tags']) and ('tail-corr' in systemB['tags']):
                        for folder in run_folders:
                            mdp_file = folder + '/grompp.mdp'
                            gt.mdp.set_parameter(mdp_file, 'DispCorr', 'EnerPres')
                    # set energygrps(-table)
                    if 'halftabulated' in systemB['tags']:
                        pairs = tuple((pair for pair in systemB['force-field'].get('tabulated-potentials', [])
                                        if pair[0] in systemB['atomtypes']
                                        and pair[1] in systemB['atomtypes']))
                        # change to virtual site name
                        pairs = tuple((('M_A' if pair[0] in ffp['changing-atoms'] else pair[0],
                                        'M_A' if pair[1] in ffp['changing-atoms'] else pair[1])
                                      for pair in pairs))
                        energygrps =  ' '.join(list(OrderedSet([pair[i] for pair in pairs for i in (0, 1)])))
                        energygrp_table =  '  '.join((f"{pair[0]} {pair[1]}" for pair in pairs))
                        for folder in run_folders:
                            mdp_file = folder + '/grompp.mdp'
                            gt.mdp.set_parameter(mdp_file, 'energygrps', energygrps)
                            gt.mdp.set_parameter(mdp_file, 'energygrp-table', energygrp_table)
                    else:
                        for folder in run_folders:
                            mdp_file = folder + '/grompp.mdp'
                            gt.mdp.set_parameter(mdp_file, 'energygrps', '')
                            gt.mdp.set_parameter(mdp_file, 'energygrp-table', '')

prepare_ti()

### fill box

In [None]:
def fill_boxes_ti():
    
    for ffp, systemA, systemB in sys_pair_gen_ti(system_types_ti, force_field_pairs_ti):
        print(ffp['name'])
        sys_comp_name = systemA['name'].split('/')[0]
        ffp_name = ffp['name']
        for tis_name, tis in {k: v for k, v in ti_settings.items() if v['type'] in ffp['ti-setting-types']}.items():
            working_dir = f"ti/{sys_comp_name}/{ffp_name}/{tis_name}"
            print(f"working_dir: {working_dir}")
            with WorkingDir(working_dir):
                box_edge = systemA['volume-init']**(1/3)

                # check existing conf.gro
                try:
                    n_atoms_inserted = gt.gro.get_natoms("common/conf.gro")
                    box = gt.gro.get_box("common/conf.gro")
                except:
                    n_atoms_inserted = 0
                    box = [0, 0, 0]
                n_atoms_wanted = gt.moltypes.get_natoms(systemA['moltypes'])
                if n_atoms_inserted == n_atoms_wanted:
                    if np.allclose(box, [box_edge]*3):
                        if ('A-has-dummy' in ffp['tags']) and not os.path.exists('common/conf-noion.gro'):
                            print('.. conf.gro with correct number of atoms and box existing, but conf-noion.gro, missing ..')
                        else:
                            print('.. conf.gro with correct number of atoms and box existing ..')
                            continue

                # empty box with volume
                empty_gro = f"system\n0\n{box_edge} {box_edge} {box_edge}"
                with open("common/conf.gro", 'w') as f:
                    f.write(empty_gro)

                # insert water
                n_water = sum((moltype['nmols'] for moltype in systemA['moltypes'] if moltype['name'] == 'SOL'))
                water_type = next((moltype['type'] for moltype in systemA['moltypes'] if moltype['name'] == 'SOL'))
                if n_water > 0:
                    gro_file = {'water-spce': 'spc216.gro', 'water-tip4p2005': 'tip4p.gro'}[water_type]
                    run_bash(f"gmx solvate -cs {gro_file} -box {box_edge} {box_edge} {box_edge} -maxsol {n_water} -scale 0.5 -o common/conf.gro")
                    run_bash("rm -f common/\#conf.gro.*")
                    
                # save conf-noion.gro
                if 'A-has-dummy' in ffp['tags']:
                    run_bash("cp common/conf.gro common/conf-noion.gro")

                # insert other molecules
                for moltype in (moltype for moltype in systemA['moltypes'] if moltype['name'] != 'SOL'):
                    n_mols = moltype['nmols']
                    mt_name = moltype['name']
                    run_bash(f"gmx insert-molecules -f common/conf.gro -o common/conf.gro -ci common/single-{mt_name}.gro -nmol {n_mols} -try 100 -scale 0.65")
                    run_bash("rm -f common/\#conf.gro.*")

                # check
                n_atoms_inserted = gt.gro.get_natoms("common/conf.gro")
                if n_atoms_inserted != n_atoms_wanted:
                    print(n_atoms_inserted, n_atoms_wanted)
                    raise Exception("not enough molecules inserted")
fill_boxes_ti()

## run on cluster

### run intermediate Hamiltonian MD

In [None]:
def run_ti():
    
    force_field_pairs_ti_to_do = {key: force_field_pairs_ti[key] for key in [
        #'dummyopls-to-opls',
        'dummymadrid-to-madrid',
    ]}
    
    for ffp, systemA, systemB in sys_pair_gen_ti(system_types_ti, force_field_pairs_ti_to_do):
        print(ffp['name'])
        sys_comp_name = systemA['name'].split('/')[0]
        ffp_name = ffp['name']
        for tis_name, tis in {k: v for k, v in ti_settings.items() if v['type'] in ffp['ti-setting-types']}.items():
            working_dir = f"ti/{sys_comp_name}/{ffp_name}/{tis_name}"
            array_string = f"0-{len(tis['lambdas'])-1}"
            remote_dir_base_ti, remote_header_ti, remote_footer_ti = gen_remote_stuff(remote_host, 'enzo', votca=False, array=array_string, ntasks=8)
            remote_dir = os.path.join(remote_dir_base_ti, working_dir)
            print(f"working_dir: {working_dir}")
            with WorkingDir(working_dir):
                # check if already done
                md_done = all((os.path.isfile(f"{lambda_:.2f}/prod/dhdl.xvg") for lambda_ in tis['lambdas']))
                rdf_done = all((os.path.isfile(f"{lambda_:.2f}/prod/rdf-o-ion.xvg") for lambda_ in tis['lambdas']))
                if all((md_done, rdf_done)):
                    print('..all results present locally..')
                    continue

                # mkdir
                run_bash(f"ssh {remote_host} mkdir -p {remote_dir}")

                # delete old topology and table files
                run_bash(f"ssh {remote_host} rm -rf {remote_dir}/topol")

                # copy simulation files to remote
                filelist = "topol common/conf.gro ?.??/equi1/conf.gro ?.??/*/grompp.mdp".split( )
                assert 'halftabulated' not in systemA['tags']
                if 'halftabulated' in systemB['tags']:
                    filelist.append("?.??/*/table*")
                gt.remote.push_files(filelist, remote_host, remote_dir, exclude="traj*")

                # strings for the bash script
                lambdas_string = " ".join((f"{lambda_:.2f}" for lambda_ in tis['lambdas']))
                tabulated_string = ""
                maxwarn = 1
                if 'halftabulated' in systemB['tags']:
                    tabulated_string = "-n ../../topol/index.ndx"
                    maxwarn += 1
                maxwarn_string = f"-maxwarn {maxwarn}"

                # commands to be run on compute nodes
                script = remote_header_ti + rf"""
# gromacs decides for small systems to have less
NT_ARG="-nt $SLURM_JOB_CPUS_PER_NODE"
#NT_ARG=""
# bug in gromacs with sd and gpu https://gitlab.com/gromacs/gromacs/-/issues/3473
#NB_ARG="-notunepme"
NB_ARG=""

lambdas=({lambdas_string})

pushd ${{lambdas[${{SLURM_ARRAY_TASK_ID}}]}}
    pushd equi1
        if [[ ! -f confout.gro ]]; then
            gmx grompp -p ../../topol/topol.top {tabulated_string} {maxwarn_string}
            gmx mdrun $NT_ARG $NB_ARG
        fi
        rm -f \#*
    popd

    pushd equi2
        if [[ ! -f confout.gro ]]; then
            gmx grompp -p ../../topol/topol.top {tabulated_string} {maxwarn_string} -c ../equi1/confout.gro
            gmx mdrun $NT_ARG $NB_ARG
        fi
        rm -f \#*
    popd

    pushd equi3
        if [[ ! -f confout.gro ]]; then
            gmx grompp -p ../../topol/topol.top {tabulated_string} {maxwarn_string} -c ../equi2/confout.gro
            gmx mdrun $NT_ARG $NB_ARG
        fi
    popd

    pushd prod
        if [[ ! -f confout.gro ]]; then
            gmx grompp -p ../../topol/topol.top {tabulated_string} {maxwarn_string} -c ../equi3/confout.gro
            gmx mdrun $NT_ARG $NB_ARG
        fi
    
        if [[ ! -f rdf-o-ion.xvg ]]; then
            gmx rdf -f traj_comp.xtc -s topol.tpr -ref 'name OW' -sel '3' -o rdf-o-ion.xvg
        fi
    popd
popd
""" + remote_footer_ti
                

                jobid = gt.remote.run_slurm_script(script, remote_host, remote_dir, dry_run=False)
                print(jobid)
                if jobid != None:
                    jobids.append(jobid)
run_ti()

### run long endpoint MD

In [None]:
def run_long():
    
    force_field_pairs_ti_to_do = {key: force_field_pairs_ti[key] for key in [
        'dummynetz-to-netz',
        'dummyeccr1-to-eccr1',
    ]}
    
    for ffp, systemA, systemB in sys_pair_gen_ti(system_types_ti, force_field_pairs_ti_to_do):
        print(ffp['name'])
        sys_comp_name = systemA['name'].split('/')[0]
        ffp_name = ffp['name']
        for tis_name, tis in {k: v for k, v in ti_settings.items() if v['type'] in ffp['ti-setting-types']}.items():
            working_dir = f"ti/{sys_comp_name}/{ffp_name}/{tis_name}/"
            remote_dir_base_ti, remote_header_ti, remote_footer_ti = gen_remote_stuff(remote_host, 'enzogpu', votca=False, ntasks=10, gres='gpu:1')
            #remote_dir_base_ti, remote_header_ti, remote_footer_ti = gen_remote_stuff(remote_host, 'mammut-b', votca=False, ntasks=16)
            remote_dir = os.path.join(remote_dir_base_ti, working_dir)
            print(f"working_dir: {working_dir}")
            with WorkingDir(working_dir):
                # check if already done
                endpoints = (0.0, 1.0)
                endpoints_AB = ('A', 'B')
                md_done = all((os.path.isfile(f"{lambda_:.2f}-long/prod/ener.edr") for lambda_ in endpoints))
                if all((md_done,)):
                    print('..all results present locally..')
                    #continue

                # mkdir
                run_bash(f"ssh {remote_host} mkdir -p {remote_dir}")

                # delete old topology and table files
                run_bash(f"ssh {remote_host} rm -rf {remote_dir}/topol")

                # copy simulation files to remote
                filelist = f"topol common/conf*.gro ?.??-long/equi1/conf.gro ?.??-long/*/grompp.mdp".split( )
                assert 'halftabulated' not in systemA['tags']
                if 'halftabulated' in systemB['tags']:
                    filelist.append("?.??-long/*/table*")
                gt.remote.push_files(filelist, remote_host, remote_dir, exclude="traj*")

                for lambda_, letter in zip(endpoints, endpoints_AB):
                    print(lambda_, letter)
                    # strings for the bash script
                    tabulated_string = ""
                    maxwarn = 1
                    nsteps = gt.mdp.get_parameter(f"{lambda_:.2f}-long/prod/grompp.mdp", 'nsteps')
                    if 'halftabulated' in systemB['tags']:
                        tabulated_string = "-n ../../topol/index.ndx"
                        maxwarn += 1
                    maxwarn_string = f"-maxwarn {maxwarn}"

                    # commands to be run on compute nodes
                    script = remote_header_ti + rf"""
# gromacs decides for small systems to have less
NT_ARG="-nt $SLURM_NTASKS -pin on"
#NT_ARG="-nt $((SLURM_NTASKS/2)) -pin on -pinstride 2"
#NT_ARG=""
# bug in gromacs with sd and gpu https://gitlab.com/gromacs/gromacs/-/issues/3473
#NB_ARG="-notunepme"
NB_ARG="-update gpu"
NB_ARG=""

pushd {lambda_:.2f}-long
    pushd equi1
        if [[ ! -f confout.gro ]]; then
            gmx grompp -p ../../topol/topol-{letter}.top {tabulated_string} {maxwarn_string}
            gmx mdrun $NT_ARG
        fi
        rm -f \#*
    popd

    pushd equi2
        if [[ ! -f confout.gro ]]; then
            gmx grompp -p ../../topol/topol-{letter}.top {tabulated_string} {maxwarn_string} -c ../equi1/confout.gro
            gmx mdrun $NT_ARG $NB_ARG
        fi
        rm -f \#*
    popd

    pushd equi3
        if [[ ! -f confout.gro ]]; then
            gmx grompp -p ../../topol/topol-{letter}.top {tabulated_string} {maxwarn_string} -c ../equi2/confout.gro
            gmx mdrun $NT_ARG $NB_ARG
        fi
    popd

    pushd prod
        if [[ ( ! -f confout.gro ) && -f state.cpt ]]; then
            gmx convert-tpr -s topol.tpr -o topol.tpr -nsteps {nsteps}
            gmx mdrun -cpi state $NT_ARG $NB_ARG
        elif [[ ( ! -f confout.gro ) && ( ! -f state.cpt) ]]; then
            gmx grompp -p ../../topol/topol-{letter}.top {tabulated_string} {maxwarn_string} -c ../equi3/confout.gro
            gmx mdrun $NT_ARG $NB_ARG
        elif [[ -f confout.gro ]]; then
            echo "md done"
        else
            echo "Weird state. This should never happen."
            exit 1
        fi
    popd
popd
""" + remote_footer_ti


                    jobid = gt.remote.run_slurm_script(script, remote_host, remote_dir, dry_run=False, sbatch_name=f"sbatch-{lambda_:.2f}.sh")
                    print(jobid)
                    if jobid != None:
                        jobids.append(jobid)
run_long()

### check job status

In [None]:
# has a problem with arrays i think
jobids = check_job_stati(jobids, remote_host)

### copy results from cluster

In [None]:
def copy_from_cluster():
    for ffp, systemA, systemB in sys_pair_gen_ti(system_types_ti, force_field_pairs_ti):
        sys_comp_name = systemA['name'].split('/')[0]
        ffp_name = ffp['name']
        for tis_name, tis in {k: v for k, v in ti_settings.items() if v['type'] in ffp['ti-setting-types']}.items():
            working_dir = f"ti/{sys_comp_name}/{ffp_name}/{tis_name}"
            print(f"working_dir: {working_dir}")
            array_string = f"0-{len(tis['lambdas'])-1}"
            remote_dir_base_ti, remote_header_ti, remote_footer_ti = gen_remote_stuff(remote_host, 'enzo', votca=False, array=array_string)
            remote_dir = os.path.join(remote_dir_base_ti, working_dir)
            with WorkingDir(working_dir):
                filelist = ["*/equi3/ener.edr", "*/prod/dhdl.xvg", "*/prod/rdf-o-ion.xvg", "{0.00,1.00}{-long,}/prod/ener.edr"]
                try:
                    gt.remote.pull_files(filelist, remote_host, remote_dir)
                except subprocess.CalledProcessError:
                    print('..rsync failed..')
copy_from_cluster()

## evaluation

### equilibration check

In [None]:
def equi_check_ti():
    for ffp, systemA, systemB in sys_pair_gen_ti(system_types_ti, force_field_pairs_ti):
        #print(systemA, systemB)
        sys_comp_name = systemA['name'].split('/')[0]
        ffp_name = ffp['name']
        for tis_name, tis in {k: v for k, v in ti_settings.items() if v['type'] in ffp['ti-setting-types']}.items():
            for l, lambda_ in enumerate(tis['lambdas']):
                working_dir = f"ti/{sys_comp_name}/{ffp_name}/{tis_name}/{lambda_:.2f}"
                print(f"working_dir: {working_dir}")
                with WorkingDir(working_dir):
                    try:
                        check_equi(["Volume"], edr_file="equi3/ener.edr", safe_factor=2.0)
                    except:
                        pass
                        #print('..no data..')
equi_check_ti()

### create dataframes

In [None]:
# put data in DataFrame
index = pd.MultiIndex.from_tuples([(st_name, ffp_name, tis_name, sample, lambda_)
                                   for st_name, st in system_types_ti.items()
                                   for ffp_name, ffp in force_field_pairs_ti.items()
                                   for tis_name, tis in {k: v for k, v in ti_settings.items() if v['type'] in ffp['ti-setting-types']}.items()
                                   for sample in range(tis['n-samples'])  # block averaging
                                   for lambda_ in list(tis['lambdas']) + ['total']])
columns = ['ΔG', 'pot', 'ΔU', 'pV', 'pΔV', 'dhdl', 'dhdl-vdw', 'dhdl-coul', 'ΔG-vdw', 'ΔG-coul']
df_ti = pd.DataFrame(index=index, columns=columns, dtype=float).sort_index()
df_ti.head()

In [None]:
index = pd.MultiIndex.from_tuples([(st_name, ffp_name, tis_name)
                                   for st_name, st in system_types_ti.items()
                                   for ffp_name, ffp in force_field_pairs_ti.items()
                                   for tis_name, tis in {k: v for k, v in ti_settings.items() if v['type'] in ffp['ti-setting-types']}.items()])
columns = ['ΔG-mean', 'ΔG-std', 'ΔU-mean', 'ΔU-std', 'pΔV-mean', 'pΔV-std']
df_ti2 = pd.DataFrame(index=index, columns=columns, dtype=float).sort_index(axis=0)
df_ti2.head()

### fill dataframe, integrate ΔG, and calc pΔV

In [None]:
def fill_df(plot_dhdl=False):
    
    for st_name, st in system_types_ti.items():
        print(f"system-type {st_name}")
        for ffp_name, ffp in force_field_pairs_ti.items():
            print(f"  force-field-pair {ffp_name}")
            assert len(tuple(sys_pair_gen_ti({st_name: st}, {ffp_name: ffp}))) == 1
            ffp, systemA, systemB = next(sys_pair_gen_ti({st_name: st}, {ffp_name: ffp}))
            sys_comp_name = systemA['name'].split('/')[0]
            for tis_name, tis in {k: v for k, v in ti_settings.items() if v['type'] in ffp['ti-setting-types']}.items():
                #print(f"    ti-setting {tis_name}")
                working_dir = f"ti/{sys_comp_name}/{ffp_name}/{tis_name}"
                #print(f"    working_dir: {working_dir}")
                
                if plot_dhdl:
                    fig, ax = plt.subplots()
                
                # fill df
                with WorkingDir(working_dir):
                    if not os.path.exists(f"{0.0:.2f}/prod/dhdl.xvg"):
                        print(".. no data ..")
                        continue
                    n_samples = tis['n-samples']
                    if ('vdw-lambdas' in tis) and ('coul-lambdas' in tis):
                        for l, (lambda_, lambda_vdw, lambda_coul) in enumerate(zip(*[tis[key] for key in ('lambdas', 'vdw-lambdas', 'coul-lambdas')])):
                            #print(f"{lambda_:.2f}, {lambda_vdw:.2f}, {lambda_coul:.2f}")
                            data, _ = gt.xvg.load(f"{lambda_:.2f}/prod/dhdl.xvg")
                            for sample in range(n_samples):
                                block_data = data.iloc[(len(data)//n_samples)*sample:(len(data)//n_samples)*(sample+1)]
                                pV_mean = block_data[f'pV (kJ/mol)'].mean()
                                dhdl_mean = block_data[f'dH/dλ fep-lambda = {lambda_:.4f}'].mean()
                                dhdl_vdw_mean = block_data[f'dH/dλ vdw-lambda = {lambda_vdw:.4f}'].mean()
                                dhdl_coul_mean = block_data[f'dH/dλ coul-lambda = {lambda_coul:.4f}'].mean()
                                #print(block_data[f'dH/dλ fep-lambda = {lambda_:.4f}'])
                                df_ti.at[(st_name, ffp_name, tis_name, sample, lambda_), 'dhdl'] = dhdl_mean
                                df_ti.at[(st_name, ffp_name, tis_name, sample, lambda_), 'dhdl-vdw'] = dhdl_vdw_mean
                                df_ti.at[(st_name, ffp_name, tis_name, sample, lambda_), 'dhdl-coul'] = dhdl_coul_mean
                                df_ti.at[(st_name, ffp_name, tis_name, sample, lambda_), 'pV'] = pV_mean
                    else:
                        for l, lambda_ in enumerate(tis['lambdas']):
                            #print(f"{lambda_:.2f}")
                            data, _ = gt.xvg.load(f"{lambda_:.2f}/prod/dhdl.xvg")
                            for sample in range(n_samples):
                                block_data = data.iloc[(len(data)//n_samples)*sample:(len(data)//n_samples)*(sample+1)]
                                pV_mean = block_data[f'pV (kJ/mol)'].mean()
                                dhdl_mean = block_data[f'dH/dλ fep-lambda = {lambda_:.4f}'].mean()
                                #print(block_data[f'dH/dλ fep-lambda = {lambda_:.4f}'])
                                df_ti.at[(st_name, ffp_name, tis_name, sample, lambda_), 'dhdl'] = dhdl_mean
                                df_ti.at[(st_name, ffp_name, tis_name, sample, lambda_), 'pV'] = pV_mean
                            
                # integrate dhdl
                if ('vdw-lambdas' in tis) and ('coul-lambdas' in tis):
                    for sample in range(tis['n-samples']):
                        lambdas = df_ti.sort_index().loc[(st_name, ffp_name, tis_name, sample, slice(0.0, 1.0)), 'dhdl'].index.get_level_values(4)
                        lambdas_vdw = tis['vdw-lambdas']
                        lambdas_coul = tis['coul-lambdas']
                        # index where lamda_vdw == 1 and lambda_coul == 0
                        index_sep = np.asarray(lambdas_vdw == 1.0).nonzero()[0][0]
                        dhdl_vdw = df_ti.sort_index().loc[(st_name, ffp_name, tis_name, sample, slice(0.0, 1.0)), 'dhdl-vdw']
                        dhdl_coul = df_ti.sort_index().loc[(st_name, ffp_name, tis_name, sample, slice(0.0, 1.0)), 'dhdl-coul']
                        x_vdw = lambdas_vdw[:index_sep+1]
                        y_vdw = dhdl_vdw[:index_sep+1]
                        x_coul = lambdas_coul[index_sep:]
                        y_coul = dhdl_coul[index_sep:]
                        ΔG_vdw = np.trapz(x=x_vdw, y=y_vdw)
                        ΔG_coul = np.trapz(x=x_coul, y=y_coul)
                        df_ti.at[(st_name, ffp_name, tis_name, sample, 'total'), 'ΔG-vdw'] = ΔG_vdw
                        df_ti.at[(st_name, ffp_name, tis_name, sample, 'total'), 'ΔG-coul'] = ΔG_coul
                        df_ti.at[(st_name, ffp_name, tis_name, sample, 'total'), 'ΔG'] = ΔG_vdw + ΔG_coul
                        if plot_dhdl:
                            if sample == 0:
                                label = ffp_name
                                line_vdw, = ax.plot(x_vdw, y_vdw, label=label + ' vdw', linewidth=0.5)
                                line_coul, = ax.plot(x_coul, y_coul, label=label + ' coul', linewidth=0.5)
                            else:
                                ax.plot(x_vdw, y_vdw, label=label + ' vdw', linewidth=0.5, color=line_vdw.get_color())
                                ax.plot(x_coul, y_coul, label=label + ' coul', linewidth=0.5, color=line_coul.get_color())
                else:
                    for sample in range(tis['n-samples']):
                        lambdas = df_ti.sort_index().loc[(st_name, ffp_name, tis_name, sample, slice(0.0, 1.0)), 'dhdl'].index.get_level_values(4)
                        dhdl = df_ti.sort_index().loc[(st_name, ffp_name, tis_name, sample, slice(0.0, 1.0)), 'dhdl']
                        ΔG = np.trapz(x=lambdas, y=dhdl)
                        df_ti.at[(st_name, ffp_name, tis_name, sample, 'total'), 'ΔG'] = ΔG
                        if plot_dhdl:
                            if sample == 0:
                                label = ffp_name
                                line, = ax.plot(lambdas, dhdl, label=label, linewidth=0.5)
                            else:
                                line, = ax.plot(lambdas, dhdl, linewidth=0.5, color=line.get_color())
                    
                # difference pV
                for sample in range(tis['n-samples']):
                    pV_0 = df_ti.at[(st_name, ffp_name, tis_name, sample, 0.0), 'pV']
                    pV_1 = df_ti.at[(st_name, ffp_name, tis_name, sample, 1.0), 'pV']
                    #print(pV_0, pV_1)
                    pΔV = pV_1 - pV_0
                    #print(pΔV)
                    df_ti.at[(st_name, ffp_name, tis_name, sample, 'total'), 'pΔV'] = pΔV
                    
                if plot_dhdl:
                    ax.set_title(st_name)
                    fig.legend()
                    plt.show()
fill_df(plot_dhdl=False)

### fill dataframe with ΔU

In [None]:
# directly fills df_ti2
def fill_df_U(use_total_energy=True):
    # total energy should be used (U = TE)
    # iterates three times
    
    # get U_A and U_B from gmx energy and put in sample=0 and sample=1 temporarily
    for st_name, st in system_types_ti.items():
        print(f"system-type {st_name}")
        for ffp_name, ffp in force_field_pairs_ti.items():
            #print(f"  force-field-pair {ffp_name}")
            assert len(tuple(sys_pair_gen_ti({st_name: st}, {ffp_name: ffp}))) == 1
            ffp, systemA, systemB = next(sys_pair_gen_ti({st_name: st}, {ffp_name: ffp}))
            sys_comp_name = systemA['name'].split('/')[0]
            for tis_name, tis in {k: v for k, v in ti_settings.items() if v['type'] in ffp['ti-setting-types']}.items():
                #print(f"    ti-setting {tis_name}")
                working_dir = f"ti/{sys_comp_name}/{ffp_name}/{tis_name}"
                # fill df
                with WorkingDir(working_dir):
                    for l, lambda_ in enumerate((tis['lambdas'][0], tis['lambdas'][-1])):
                        #print(f"{lambda_:.2f}")
                        try:
                            key = 'Total-Energy' if use_total_energy else 'Potential'
                            stdout = run_bash(f"gmx energy -f {lambda_:.2f}-long/prod/ener.edr -o /tmp/pot.xvg <<< '{key}'", print_on_error=False)
                            run_bash("rm -f /tmp/pot.xvg /tmp/\#pot*")
                        except subprocess.CalledProcessError:
                            print(".. no data ..")
                            continue

                        for line in stdout.splitlines():
                            key2 = 'Total Energy' if use_total_energy else 'Potential'
                            if line.startswith(key2):
                                mean_std_slice = slice(2, 4) if use_total_energy else slice(1, 3)
                                pot_mean, pot_std = map(float, line.split()[mean_std_slice])
                        df_ti.at[(st_name, ffp_name, tis_name, 0, lambda_), 'pot'] = pot_mean  # temporary abuse of sample index
                        df_ti.at[(st_name, ffp_name, tis_name, 1, lambda_), 'pot'] = pot_std  # temporary abuse of sample index
                            
    # average over pure water endpoints
    for ffp_name, ffp in force_field_pairs_ti.items():
        if 'A-has-dummy' not in ffp['tags']:
            continue
        print(f"force-field-pair {ffp_name}")
        pot_A_counter = 0
        pot_A_mean = 0
        pot_A_values = []
        pot_A_var = 0
        for st_name, st in system_types_ti.items():
            #print(f"  system-type {st_name}")
            
            assert len(tuple(sys_pair_gen_ti({st_name: st}, {ffp_name: ffp}))) == 1
            ffp, systemA, systemB = next(sys_pair_gen_ti({st_name: st}, {ffp_name: ffp}))
            for tis_name, tis in {k: v for k, v in ti_settings.items() if v['type'] in ffp['ti-setting-types']}.items():
                pot_mean = df_ti.at[(st_name, ffp_name, tis_name, 0, 0.0), 'pot']  # temporary abuse of sample index
                pot_std = df_ti.at[(st_name, ffp_name, tis_name, 1, 0.0), 'pot']  # temporary abuse of sample index
                if (not np.isnan(pot_mean)) and (not np.isnan(pot_std)):
                    pot_A_counter += 1
                    pot_A_mean += pot_mean
                    pot_A_values.append(pot_mean)
                    pot_A_var += pot_std**2
                    #print("   ", pot_mean, pot_std)
                
        if pot_A_counter == 0:
            pot_A_mean = np.nan
            pot_A_std = np.nan
        else:
            pot_A_mean /= pot_A_counter
            pot_A_std = np.sqrt(np.sum((np.array(pot_A_values) - pot_A_mean)**2  / (pot_A_counter - 1)))
        #pot_A_std = 1 / pot_A_counter * np.sqrt(pot_A_var)
        #print(" ", pot_A_counter, pot_A_mean, pot_A_std)
        # now setting the average for all system types
        for st_name, st in system_types_ti.items():
            #print(f"  system-type {st_name}")
            df_ti.at[(st_name, ffp_name, tis_name, 0, 0.0), 'pot'] = pot_A_mean  # temporary abuse of sample index
            df_ti.at[(st_name, ffp_name, tis_name, 1, 0.0), 'pot'] = pot_A_std  # temporary abuse of sample index
                
    # take ΔU = U_B - U_A
    for st_name, st in system_types_ti.items():
        print(f"system-type {st_name}")
        for ffp_name, ffp in force_field_pairs_ti.items():
            #print(f"  force-field-pair {ffp_name}")
            assert len(tuple(sys_pair_gen_ti({st_name: st}, {ffp_name: ffp}))) == 1
            ffp, systemA, systemB = next(sys_pair_gen_ti({st_name: st}, {ffp_name: ffp}))
            for tis_name, tis in {k: v for k, v in ti_settings.items() if v['type'] in ffp['ti-setting-types']}.items():
                #print(f"    ti-setting {tis_name}")
                # difference: λ=1 minus λ=0
                pot_0 = df_ti.at[(st_name, ffp_name, tis_name, 0, 0.0), 'pot']  # temporary abuse of sample index
                pot_1 = df_ti.at[(st_name, ffp_name, tis_name, 0, 1.0), 'pot']  # temporary abuse of sample index
                pot_0_std = df_ti.at[(st_name, ffp_name, tis_name, 1, 0.0), 'pot']  # temporary abuse of sample index
                pot_1_std = df_ti.at[(st_name, ffp_name, tis_name, 1, 1.0), 'pot']  # temporary abuse of sample index
                df_ti.loc[(st_name, ffp_name, tis_name, slice(None), slice(None)), 'pot'] = np.nan  # end abuse of sample index
                df_ti2.at[(st_name, ffp_name, tis_name), f'ΔU-mean'] = pot_1 - pot_0
                df_ti2.at[(st_name, ffp_name, tis_name), f'ΔU-std'] = np.sqrt(pot_1_std**2 + pot_0_std**2)
                #print(pot_1_std, pot_0_std, df_ti2.at[(st_name, ffp_name, tis_name), f'ΔU-std'])
                
fill_df_U(use_total_energy=True)

df_ti.head()

In [None]:
df_ti2.loc[(slice(None), slice(None), slice(None)), slice('ΔU-mean', 'ΔU-std')]

### aggregate data

In [None]:
def aggregate():
    for st_name, st in system_types_ti.items():
        #print(f"system-type {st_name}")
        for ffp_name, ffp in force_field_pairs_ti.items():
            #print(f"  force-field-pair {ffp_name}")
            for tis_name, tis in {k: v for k, v in ti_settings.items() if v['type'] in ffp['ti-setting-types']}.items():
                for term in ('ΔG', 'pΔV'):
                    try:
                        data = df_ti.loc[(st_name, ffp_name, tis_name, slice(None), 'total'), term]
                        df_ti2.at[(st_name, ffp_name, tis_name), f'{term}-mean'] = data.mean()
                        df_ti2.at[(st_name, ffp_name, tis_name), f'{term}-std'] = data.std()
                    except KeyError:
                        print(".. no data ..")
aggregate()
df_ti2

In [None]:
def fill_entropies_ti2():
    for st_name, st in system_types_ti.items():
        #print(f"system-type {st_name}")
        for ffp_name, ffp in force_field_pairs_ti.items():
            assert len(tuple(sys_pair_gen_ti({st_name: st}, {ffp_name: ffp}))) == 1
            ffp, systemA, systemB = next(sys_pair_gen_ti({st_name: st}, {ffp_name: ffp}))
            assert systemA['temperature'] == systemB['temperature']
            #print(f"  force-field-pair {ffp_name}")
            for tis_name, tis in {k: v for k, v in ti_settings.items() if v['type'] in ffp['ti-setting-types']}.items():
                    df_ti2.at[(st_name, ffp_name, tis_name), f'ΔS-mean'] = (
                        +df_ti2.at[(st_name, ffp_name, tis_name), f'ΔU-mean']
                        -df_ti2.at[(st_name, ffp_name, tis_name), f'ΔG-mean']
                        +df_ti2.at[(st_name, ffp_name, tis_name), f'pΔV-mean']
                    ) / systemA['temperature']  # gromacs units kJ / mol / K
                    df_ti2.at[(st_name, ffp_name, tis_name), f'ΔS-std'] = np.sqrt(
                        +df_ti2.at[(st_name, ffp_name, tis_name), f'ΔU-std']**2
                        +df_ti2.at[(st_name, ffp_name, tis_name), f'ΔG-std']**2
                        +df_ti2.at[(st_name, ffp_name, tis_name), f'pΔV-std']**2
                    ) / systemA['temperature']  # gromacs units kJ / mol / K
fill_entropies_ti2()
df_ti2

In [None]:
# show entropies only
df_ti2.loc[(slice(None), slice(None), slice(None)), slice('ΔS-mean', 'ΔS-std')] * 1000

## ion size data

In [None]:
def gen_ion_sizes():
    ion_size_dict = {}
    
    # get RDF=1 of O-ion RDF
    # read manually from RDFs
    ion_size_dict['rdf-flank'] = {
        ('opls-co0.9tc', 'LI'): 0.188,
        ('opls-co0.9tc', 'NA'): 0.223,
        ('opls-co0.9tc', 'K'): 0.254,
        ('opls-co0.9tc', 'CA'): 0.223,
        ('opls-co0.9tc', 'CL'): 0.302,
        ('eccr1-co1.2', 'LI'): 0.183,
        ('eccr1-co1.2', 'NA'): 0.214,
        ('eccr1-co1.2', 'K'): 0.257,
        ('eccr1-co1.2', 'CA'): 0.225,
        ('eccr1-co1.2', 'CL'): 0.298,
        ('netz-co0.9tc', 'LI'): 0.181,
        ('netz-co0.9tc', 'NA'): 0.216,
        ('netz-co0.9tc', 'K'): 0.247,
        ('netz-co0.9tc', 'CA'): 0.218,
        ('netz-co0.9tc', 'CL'): 0.303,
        ('madrid-co1.0tc', 'LI'): 0.170,
        ('madrid-co1.0tc', 'NA'): 0.216,
        ('madrid-co1.0tc', 'K'): 0.254,
        ('madrid-co1.0tc', 'CA'): 0.225,
        ('madrid-co1.0tc', 'CL'): 0.286,
    }
    
    # from Marcus
    ion_size_dict['lit'] = {
        'LI': 0.071,
        'NA': 0.097,
        'K':  0.141,
        'CA': 0.103,
        'CL': 0.180,
    }
    
    # get RDF peak of O-ion RDF
    ion_size_dict['rdf-peak'] = {}
    system_types_temp = {stn: st for stn, st in system_types.items() if stn != 'water-pure'}
    force_fields_temp = {ffn: ff for ffn, ff in force_fields.items() if 'dummy' not in ff['tags']}
    systems_temp = (system_types_temp, force_fields_temp)
    for system in (sys for sys in system_generator(*systems_temp) if ('npt-dist' in sys['tags']) and (sys['molar-mixing-ratio'] == 0.01)):
        ion = system['moltypes'][1]['name']
        r = npt_system_interaction_dict[(system['name'], f'OW-{ion}', 'r')]
        g = npt_system_interaction_dict[(system['name'], f'OW-{ion}', 'g')]
        r_peak = r[np.argmax(g)]
        ion_size_dict['rdf-peak'][(system['force-field']['name'], ion)] = r_peak
        if ion == 'NA':
            r = npt_system_interaction_dict[(system['name'], f'OW-CL', 'r')]
            g = npt_system_interaction_dict[(system['name'], f'OW-CL', 'g')]
            r_peak = r[np.argmax(g)]
            ion_size_dict['rdf-peak'][(system['force-field']['name'], 'CL')] = r_peak
    return ion_size_dict
    
    
ion_size_dict = gen_ion_sizes()

## make corrections

In [None]:
# not applied correction to ΔH:
# - k T² α  
# see Hess, vdV JPCB 2006 or Sordo RCS Advances 2015
oconst.k_gro * 300**2 * 5e-4  # α from Hess

In [None]:
def check_horinek_pressure():
    # p_1 = ρ_1 RT
    rho_1 = 1  # mol / l
    rho_1 *= 1e3  # mol / m^3
    p_1 = rho_1 * const.R * 300  # Pascal
    Delta_G = oconst.k_gro * 300 * np.log(p_1 / const.atm)
    Delta_S = -Delta_G / 300 * 1000
    return p_1 / 1e5, Delta_G, Delta_S
    
check_horinek_pressure()

In [None]:
def apply_corrections():
    
    for st_name, st in system_types_ti.items():
        print(f"system-type {st_name}")
        for ffp_name, ffp in force_field_pairs_ti.items():
            assert len(tuple(sys_pair_gen_ti({st_name: st}, {ffp_name: ffp}))) == 1
            ffp, systemA, systemB = next(sys_pair_gen_ti({st_name: st}, {ffp_name: ffp}))
            sys_comp_name = systemA['name'].split('/')[0]
            print(f"  force-field-pair {ffp_name}")
            for tis_name, tis in {k: v for k, v in ti_settings.items() if v['type'] in ffp['ti-setting-types']}.items():
                working_dir = f"ti/{sys_comp_name}/{ffp_name}/{tis_name}"
                
                # calculate corrections
                
                def finite_size_correction():
                    # only if starting with dummy, otherwise roughly 0
                    if not 'A-has-dummy' in ffp['tags']:
                        return 0, 0
                    # get q
                    _, _, systemB = next(sys_pair_gen_ti({st_name: st}, {ffp_name: ffp}))
                    ion = systemB['moltypes'][1]['name']
                    q = {atomtype['type']: atomtype for atomtype in PARAMETRIC_FORCE_FIELDS[systemB['force-field']['parametric-ff']]['atomtypes']}[ion]['q']
                    # get L
                    with WorkingDir(working_dir):
                        try:
                            stdout = run_bash(f"gmx energy -f 1.00-long/prod/ener.edr -o /tmp/box-x.xvg <<< 'Box-X'", print_on_error=False)
                        except subprocess.CalledProcessError:
                            try:
                                stdout = run_bash(f"gmx energy -f 1.00/prod/ener.edr -o /tmp/box-x.xvg <<< 'Box-X'", print_on_error=False)
                            except subprocess.CalledProcessError:
                                print(".. no box length data data ..")
                                return np.nan, np.nan
                            print(".. box length data taken from short run  ..")
                        run_bash("rm -f /tmp/box-x.xvg /tmp/\#box-x*")
                    for line in stdout.splitlines():
                        if line.startswith("Box-X"):
                            L = float(line.split()[1])
                    # other factors
                    #prefactor_fs = 1 * q**2 / (4 * np.pi * oconst.epsilon_0_gro)
                    prefactor_fs = oconst.f_gro * q**2
                    epsilon_r = 71  # from Horinek
                    xi_ew = -2.837297  # from Horinek
                    depsilon_dT = -0.3631  # from Horinek
                    R = ion_size_dict['rdf-peak'][systemB['force-field']['name'], ion]
                    #radius = R / 2
                    radius = R
                    # correction terms
                    ΔG_fs_term1 = prefactor_fs * -xi_ew / (2 * epsilon_r * L)
                    ΔG_fs_term2 = prefactor_fs * (1 - 1 / epsilon_r) * 2 * np.pi * radius**2 / (3 * L**3)
                    ΔG_fs_term3 = -prefactor_fs * (1 - 1 / epsilon_r) * 8 * np.pi**2 * radius**5 / (45 * L**6)
                    ΔS_fs_term1 = -prefactor_fs / epsilon_r**2 * depsilon_dT * xi_ew / (2 * L)  # three minus -> minus
                    ΔS_fs_term2 = -prefactor_fs / epsilon_r**2 * depsilon_dT * 2 * np.pi * radius**2 / (3 * L**3)  # three minus -> minus
                    ΔS_fs_term3 = prefactor_fs / epsilon_r**2 * depsilon_dT * 8 * np.pi**2 * radius**5 / (45 * L**6)  # four minus -> plus
                    #print(ΔG_fs_term1, ΔG_fs_term2, ΔG_fs_term3, ΔS_fs_term1, ΔS_fs_term2)
                    return ΔG_fs_term1, ΔG_fs_term2, ΔG_fs_term3, ΔS_fs_term1, ΔS_fs_term2, ΔS_fs_term3
                
                
                def pressure_correction():
                    if not 'A-has-dummy' in ffp['tags']:
                        return 0, 0
                    else:
                        # p_1 = ρ_1 RT
                        rho_1 = 1  # mol / l
                        rho_1 *= 1e3  # mol / m^3
                        p_1 = rho_1 * const.R * 300  # Pascal
                        Delta_G = oconst.k_gro * 300 * np.log(p_1 / const.atm)
                        Delta_S = -Delta_G / 300
                        return Delta_G, Delta_S  # from Horinek
                
                
                def surface_potential_correction():
                    # only if starting with dummy, otherwise roughly 0
                    if not 'A-has-dummy' in ffp['tags']:
                        return 0, 0
                    # get q (also z)
                    _, _, systemB = next(sys_pair_gen_ti({st_name: st}, {ffp_name: ffp}))
                    ion = systemB['moltypes'][1]['name']
                    q = {atomtype['type']: atomtype for atomtype in PARAMETRIC_FORCE_FIELDS[systemB['force-field']['parametric-ff']]['atomtypes']}[ion]['q']
                    return q * -50.8, q * 96.5 * 1e-3  # from Hori
                    
                    
                def enthalpy_correction():
                    if not 'A-has-dummy' in ffp['tags']:
                        return 0
                    else:
                        return - 5/2 * oconst.k_gro
                    
                    
                def ecc_polarization_correction():
                    if not 'A-has-dummy' in ffp['tags']:
                        return (0, 0 ,0)
                    elif ('madrid' in ffp_name) or ('ecc' in ffp_name):
                        if 'madrid' in ffp_name:
                            scaling = 0.85
                        elif 'ecc' in ffp_name:
                            scaling = 0.75
                        _, _, systemB = next(sys_pair_gen_ti({st_name: st}, {ffp_name: ffp}))
                        ion = systemB['moltypes'][1]['name']
                        R = ion_size_dict['rdf-peak'][systemB['force-field']['name'], ion]
                        R_lit = ion_size_dict['lit'][ion]
                        radius_water = 0.138
                        q = {atomtype['type']: atomtype for atomtype in PARAMETRIC_FORCE_FIELDS[systemB['force-field']['parametric-ff']]['atomtypes']}[ion]['q']
                        q_real = q / scaling
                        #print('q_real', q_real)
                        epsilon_el = 1 / scaling**2
                        depsilon_dT = -0.3631  # from Horinek, TODO: double check
                        #print('epsilon_el', epsilon_el)
                        return (  # after Leontyev 2011
                            - oconst.f_gro * (1 - 1 / epsilon_el) * q_real**2 / (2 * (R - radius_water)),
                            - oconst.f_gro * (1 - 1 / epsilon_el) * q_real**2 / (2 * R_lit),
                            - oconst.f_gro * (1 - 1 / epsilon_el) * q_real**2 / (2 * R),
                        )
                    else:  # only ecc and madrid have scaled charges
                        return (0, 0, 0)
                    
                # finite size correction
                df_ti2.at[(st_name, ffp_name, tis_name), f'ΔG-fs'] = sum(finite_size_correction()[0:3])
                df_ti2.at[(st_name, ffp_name, tis_name), f'ΔS-fs'] = sum(finite_size_correction()[3:6])
                # pressure correction
                df_ti2.at[(st_name, ffp_name, tis_name), f'ΔG-press'] = pressure_correction()[0]
                df_ti2.at[(st_name, ffp_name, tis_name), f'ΔS-press'] = pressure_correction()[1]
                # surface potential correction
                df_ti2.at[(st_name, ffp_name, tis_name), f'ΔG-surf'] = surface_potential_correction()[0]
                df_ti2.at[(st_name, ffp_name, tis_name), f'ΔS-surf'] = surface_potential_correction()[1]
                # enthalpy correction (missing ghost particle in U_λ0)
                df_ti2.at[(st_name, ffp_name, tis_name), f'ΔS-enthalpy'] = enthalpy_correction()
                # ecc polarization correction
                (
                    df_ti2.at[(st_name, ffp_name, tis_name), f'ΔG-polarization-rdf-minus-water'],
                    df_ti2.at[(st_name, ffp_name, tis_name), f'ΔG-polarization-lit'],
                    df_ti2.at[(st_name, ffp_name, tis_name), f'ΔG-polarization-rdf'],
                ) = ecc_polarization_correction()
                # apply corrections
                df_ti2.at[(st_name, ffp_name, tis_name), f'ΔG-corrected'] = (
                    + df_ti2.at[(st_name, ffp_name, tis_name), f'ΔG-mean'] 
                    + df_ti2.at[(st_name, ffp_name, tis_name), f'ΔG-fs'] 
                    + df_ti2.at[(st_name, ffp_name, tis_name), f'ΔG-press'] 
                    + df_ti2.at[(st_name, ffp_name, tis_name), f'ΔG-surf'] 
                    #+ df_ti2.at[(st_name, ffp_name, tis_name), f'ΔG-polarization'] 
                )
                df_ti2.at[(st_name, ffp_name, tis_name), f'ΔS-corrected'] = (
                    + df_ti2.at[(st_name, ffp_name, tis_name), f'ΔS-mean'] 
                    + df_ti2.at[(st_name, ffp_name, tis_name), f'ΔS-fs'] 
                    + df_ti2.at[(st_name, ffp_name, tis_name), f'ΔS-press'] 
                    + df_ti2.at[(st_name, ffp_name, tis_name), f'ΔS-surf'] 
                    + df_ti2.at[(st_name, ffp_name, tis_name), f'ΔS-enthalpy'] 
                )
apply_corrections()
df_ti2

In [None]:
df_ti2.loc[('water-na', 'dummynetz-to-netz', slice(None)), slice(None)]

In [None]:
# show entropies only
df_ti2.loc[(slice(None), slice(None), slice(None)), 'ΔS-corrected'] * 1000

In [None]:
df_ti2.loc[(slice(None), slice(None), slice(None)), 'ΔS-std'] * 1000

## plot free energies and entropies of solvation

In [None]:
# from horinek 2009 who got it from marcus 1997
# 3.6 is maximum error Horinek finds between data sets
exp_solv_free_energies = {
    'water-licl': (-829, 3.6),
    'water-nacl': (-722, 3.6),
    'water-kcl': (-651, 3.6),
    # from Marcus: ion properties, sum of Ca2+ + 2* Cl-
    'water-cacl2_': (-2209, 3.6),
}
# from horinek 2009
lit_solv_free_energies = {
    # error sqrt(2) because per TI Horinek assumes 1 
    ('water-licl', 'netz-co0.9tc'): (-826, 1.41),
    ('water-licl', 'netz-co0.9'): (-826, 1.41),  # tc assumed to have small influence
    ('water-nacl', 'netz-co0.9tc'): (-720, 1.41),
    ('water-nacl', 'netz-co0.9'): (-720, 1.41),  # tc assumed to have small influence
    ('water-kcl', 'netz-co0.9tc'): (-653, 1.41),
    ('water-kcl', 'netz-co0.9'): (-653, 1.41),  # tc assumed to have small influence
}

### free energy

In [None]:
def plot_solvation_free_energy(show_ecc=True):
    
    mpl_rc_local = {
        'hatch.linewidth': 0.4,
        'legend.labelspacing': 0.1,
    }
    
    terms_to_show = [
        {'type': "experimental", 'exclude-stn': [], },
        #{'type': "literature", 'exclude-stn': ['water-cacl2_'], 'ff': 'netz-co0.9tc'},
        # OPLS
        {'type': 'difference-ti', 'exclude-stn': [], 'path': (
            {'ff-ref': 'opls-co0.9tc-dummyion', 'ff': 'opls-co0.9tc', 'ti-name': "dummyopls-to-opls", 'ti-type': "2-vdW-charge"},
        )},
        # ECC
        {'type': 'difference-ti', 'exclude-stn': [], 'path': (
            {'ff-ref': 'eccr1-co1.2-dummyion', 'ff': 'eccr1-co1.2', 'ti-name': "dummyeccr1-to-eccr1", 'ti-type': "2-vdW-charge"},
        )},
        # ECC IMC
        {'type': 'difference-ti', 'exclude-stn': [], 'path': (
            {'ff-ref': 'eccr1-co1.2-dummyion', 'ff': 'eccr1-co1.2', 'ti-name': "dummyeccr1-to-eccr1", 'ti-type': "2-vdW-charge"},
            {'ff-ref': 'eccr1-co1.2', 'ff': 'iff-altern5-eccr1-co1.2-nopc', 'ti-name': "eccr1-to-iff-eccr1", 'ti-type': "1-linear-direct"},
        )},
        # HMN
        {'type': 'difference-ti', 'exclude-stn': [], 'path': (
            {'ff-ref': 'netz-co0.9tc-dummyion', 'ff': 'netz-co0.9tc', 'ti-name': "dummynetz-to-netz", 'ti-type': "2-vdW-charge"},
        )},
        # HMN IMC
        {'type': 'difference-ti', 'exclude-stn': [], 'path': (
            {'ff-ref': 'netz-co0.9tc-dummyion', 'ff': 'netz-co0.9tc', 'ti-name': "dummynetz-to-netz", 'ti-type': "2-vdW-charge"},
            {'ff-ref': 'netz-co0.9', 'ff': 'iff-altern5-netz-co0.9-nopc', 'ti-name': "netz-to-iff-netz", 'ti-type': "1-linear-direct"},
        )},
        # Madrid
        {'type': 'difference-ti', 'exclude-stn': [], 'path': (
            {'ff-ref': 'madrid-co1.0tc-dummyion', 'ff': 'madrid-co1.0tc', 'ti-name': "dummymadrid-to-madrid", 'ti-type': "2-vdW-charge"},
        )},
    ]
    if show_ecc:
        pass
    
    with plt.rc_context({**mpl_rc_global, **mpl_rc_local}):

        system_types_to_show = {stn: st for stn, st in system_types.items() if stn not in ('water-pure')}
        
        # Gibbs free energy
        fig, ax = plt.subplots(figsize=(2.5, 2.5), constrained_layout=True, dpi=400)
        # entropy
        figS, axS = plt.subplots(figsize=(2.5, 2.5), constrained_layout=True, dpi=400)
        x_base = np.arange(len(system_types_to_show))
        
        for s, (stn, st) in enumerate(system_types_to_show.items()):
            print(s, stn)
            for t, term in enumerate(term for term in terms_to_show if stn not in term['exclude-stn']):
                print(t, term['type'])
                width = 0.9 / len(terms_to_show) - 0.02
                x = x_base[s] + np.linspace(-0.45+width/2, 0.45-width/2, num=len(terms_to_show))[t]
                # plot experimental
                if term['type'] == 'experimental':
                    label = 'exp.' if s == 0 else None
                    ax.bar(x=x, height=exp_solv_free_energies[stn][0], yerr=exp_solv_free_energies[stn][1], width=width, color='#444444', label=label)
                    axS.bar(x=x, height=exp_solv_entropies[stn][0] * 1000, yerr=exp_solv_entropies[stn][1] * 1000, width=width, color='#444444', label=label)
                # plot literature
                elif term['type'] == 'literature':
                    label = ff_short_names[term['ff']] + ' lit.' if s == 0 else None
                    ax.bar(x=x,
                           height=lit_solv_free_energies[(stn, term['ff'])][0],
                           yerr=lit_solv_free_energies[(stn, term['ff'])][1],
                           width=width, color=ff_colors[term['ff']], label=label,
                           hatch='\\\\\\', alpha=0.999)
                    axS.bar(x=x,
                           height=lit_solv_entropies[(stn, term['ff'])][0] * 1000,
                           yerr=lit_solv_entropies[(stn, term['ff'])][1] * 1000,
                           width=width, color=ff_colors[term['ff']], label=label,
                           hatch='\\\\\\', alpha=0.999)
                # plot ti difference
                elif term['type'] == 'difference-ti':
                    # netz iff
                    label = ff_short_names[term['path'][-1]['ff']] if s == 0 else None
                    n_cat, n_an = st['n_cation_anion']
                    #print('n_cat, n_an', n_cat, n_an)
                    G = 0
                    G_err = 0
                    G_corr1 = 0
                    G_corr2 = 0
                    G_corr3 = 0
                    S = 0
                    S_err = 0
                    for step in term['path']:
                        # cation 
                        G += df_ti2.at[(stn.rstrip('cl2_'), step['ti-name'], step['ti-type']), 'ΔG-corrected'] * n_cat
                        G_corr1 += (
                            + df_ti2.at[(stn.rstrip('cl2_'), step['ti-name'], step['ti-type']), 'ΔG-corrected'] * n_cat
                            + df_ti2.at[(stn.rstrip('cl2_'), step['ti-name'], step['ti-type']), 'ΔG-polarization-rdf'] * n_cat
                        )
                        G_corr2 += (
                            + df_ti2.at[(stn.rstrip('cl2_'), step['ti-name'], step['ti-type']), 'ΔG-corrected'] * n_cat
                            + df_ti2.at[(stn.rstrip('cl2_'), step['ti-name'], step['ti-type']), 'ΔG-polarization-lit'] * n_cat
                        )
                        G_corr3 += (
                            + df_ti2.at[(stn.rstrip('cl2_'), step['ti-name'], step['ti-type']), 'ΔG-corrected'] * n_cat
                            + df_ti2.at[(stn.rstrip('cl2_'), step['ti-name'], step['ti-type']), 'ΔG-polarization-rdf-minus-water'] * n_cat
                        )
                        G_err += (df_ti2.at[(stn.rstrip('cl2_'), step['ti-name'], step['ti-type']), 'ΔG-std'] * n_cat)**2
                        S += df_ti2.at[(stn.rstrip('cl2_'), step['ti-name'], step['ti-type']), 'ΔS-corrected'] * n_cat
                        S_err += (df_ti2.at[(stn.rstrip('cl2_'), step['ti-name'], step['ti-type']), 'ΔS-std'] * n_cat)**2
                        # anion 
                        G += df_ti2.at[('water-cl', step['ti-name'], step['ti-type']), 'ΔG-corrected'] * n_an
                        G_corr1 += (
                            + df_ti2.at[('water-cl', step['ti-name'], step['ti-type']), 'ΔG-corrected'] * n_an
                            + df_ti2.at[('water-cl', step['ti-name'], step['ti-type']), 'ΔG-polarization-rdf'] * n_an
                        )
                        G_corr2 += (
                            + df_ti2.at[('water-cl', step['ti-name'], step['ti-type']), 'ΔG-corrected'] * n_an
                            + df_ti2.at[('water-cl', step['ti-name'], step['ti-type']), 'ΔG-polarization-lit'] * n_an
                        )
                        G_corr3 += (
                            + df_ti2.at[('water-cl', step['ti-name'], step['ti-type']), 'ΔG-corrected'] * n_an
                            + df_ti2.at[('water-cl', step['ti-name'], step['ti-type']), 'ΔG-polarization-rdf-minus-water'] * n_an
                        )
                        G_err += (df_ti2.at[('water-cl', step['ti-name'], step['ti-type']), 'ΔG-std'] * n_an)**2
                        S += df_ti2.at[('water-cl', step['ti-name'], step['ti-type']), 'ΔS-corrected'] * n_an
                        S_err += (df_ti2.at[('water-cl', step['ti-name'], step['ti-type']), 'ΔS-std'] * n_an)**2
                    # TODO: check error propagation for long path
                    G_err = np.sqrt(G_err)
                    S_err = np.sqrt(S_err)
                    if G == G_corr3:
                        ax.bar(x, G, yerr=G_err, width=width, color=ff_colors[term['path'][-1]['ff']], label=label, hatch='')
                    else:
                        ax.bar(x, G, width=width, color=ff_colors[term['path'][-1]['ff']], label=label)  #, hatch='///////')
                        ax.bar(x, G_corr3-G, bottom=G, yerr=G_err, width=width, color=ff_colors[term['path'][-1]['ff']], alpha=.25)
                        marker1, = ax.plot(x, G_corr1, marker='^', color='k', markersize=2.0, linestyle='none')
                        marker2, = ax.plot(x, G_corr2, marker='x', color='k', markersize=2.5, linestyle='none')
                        marker3, = ax.plot(x, G_corr3, marker='+', color='k', markersize=3.3, linestyle='none')
                    axS.bar(x, S * 1000, yerr=S_err * 1000, width=width, color=ff_colors[term['path'][-1]['ff']], label=label, hatch='')
                else:
                    print(".. unknown term ..", term['type'])

        for ax_ in (ax, axS):
            ax_.set_xlim(x_base[0] - 0.6, x_base[-1] + 0.6)
            ax_.set_xticks(x_base)
            ax_.set_xticklabels((sys_type_short_names[stn] for stn in system_types_to_show.keys()))
            ax_.xaxis.set_ticks_position('top')
            ax_.xaxis.set_ticks_position('none') 
            #leg = ax_.legend(loc=(0.02, 0.21), frameon=False)
            leg = ax_.legend(loc='lower left', frameon=False)
            ax_.add_artist(leg)
            #ax_.legend(handles=[marker1, marker2, marker3], labels=['solv. shell peak', 'lit.', 'solv. shell start'], loc=(0.02, 0.0), frameon=False, #handlelength=0.1, ncol=3, columnspacing=0.5, handletextpad=0.3,
                       #title='ion radius for pol. corr.')
            
            
        ax.set_ylabel(r'$\Delta G_\mathrm{solv}$ in kJ/mol')
        axS.set_ylabel(r'$\Delta S_\mathrm{solv}$ in J/mol/K')
        fig.savefig('../figures/solvation-free-energies.pdf')
        #figS.savefig('../figures/solvation-entropies.pdf')
        plt.show()

plot_solvation_free_energy()

In [None]:
!cp -a ../figures/solvation-free-energies.pdf ~/research/output/ion-shortrange-paper/figures/

### entropy

In [None]:
def plot_solvation_entropy_new():
    
    SMALL_SIZE = 8
    MEDIUM_SIZE = 10
    BIGGER_SIZE = 12
    mpl_rc_local = {
        'font.size': MEDIUM_SIZE,          # controls default text sizes
        'axes.titlesize': MEDIUM_SIZE,     # fontsize of the axes title
        'axes.labelsize': MEDIUM_SIZE,    # fontsize of the x and y labels
        'xtick.labelsize': MEDIUM_SIZE,    # fontsize of the tick labels
        'ytick.labelsize': MEDIUM_SIZE,    # fontsize of the tick labels
        'legend.fontsize': MEDIUM_SIZE,    # legend fontsize
        'figure.titlesize': BIGGER_SIZE,  # fontsize of the figure title
    }
    mpl_rc_local |= {
        #'hatch.linewidth': 0.4,
        'legend.labelspacing': 0.1,
    }
    
    terms_to_show = [
        {'type': "experimental", 'exclude-stn': [], },
        #{'type': "literature", 'exclude-stn': ['water-cacl2_'], 'ff': 'netz-co0.9tc'},
        # ECC
        {'type': 'difference-ti', 'exclude-stn': [], 'path': (
            {'ff-ref': 'eccr1-co1.2-dummyion', 'ff': 'eccr1-co1.2', 'ti-name': "dummyeccr1-to-eccr1", 'ti-type': "2-vdW-charge"},
        )},
        # ECC IMC 2PT
        {'type': 'difference-2pt', 'exclude-stn': [], 'ff-ref': 'eccr1-co1.2', 'ff': 'iff-altern5-eccr1-co1.2-nopc', 'path-ref-ti': (
            {'ff-ref': 'eccr1-co1.2-dummyion', 'ff': 'eccr1-co1.2', 'ti-name': "dummyeccr1-to-eccr1", 'ti-type': "2-vdW-charge"},
        )},
        # ECC IMC TI
        {'type': 'difference-ti', 'exclude-stn': [], 'path': (
            {'ff-ref': 'eccr1-co1.2-dummyion', 'ff': 'eccr1-co1.2', 'ti-name': "dummyeccr1-to-eccr1", 'ti-type': "2-vdW-charge"},
            {'ff-ref': 'eccr1-co1.2', 'ff': 'iff-altern5-eccr1-co1.2-nopc', 'ti-name': "eccr1-to-iff-eccr1", 'ti-type': "1-linear-direct"},
        )},
        # HMN
        {'type': 'difference-ti', 'exclude-stn': [], 'path': (
            {'ff-ref': 'netz-co0.9tc-dummyion', 'ff': 'netz-co0.9tc', 'ti-name': "dummynetz-to-netz", 'ti-type': "2-vdW-charge"},
        )},
        # HMN IMC 2PT
        {'type': 'difference-2pt', 'exclude-stn': [], 'ff-ref': 'netz-co0.9tc', 'ff': 'iff-altern5-netz-co0.9-nopc', 'path-ref-ti': (
            {'ff-ref': 'netz-co0.9tc-dummyion', 'ff': 'netz-co0.9tc', 'ti-name': "dummynetz-to-netz", 'ti-type': "2-vdW-charge"},
        )},
        # HMN IMC TI
        {'type': 'difference-ti', 'exclude-stn': [], 'path': (
            {'ff-ref': 'netz-co0.9tc-dummyion', 'ff': 'netz-co0.9tc', 'ti-name': "dummynetz-to-netz", 'ti-type': "2-vdW-charge"},
            {'ff-ref': 'netz-co0.9', 'ff': 'iff-altern5-netz-co0.9-nopc', 'ti-name': "netz-to-iff-netz", 'ti-type': "1-linear-direct"},
        )},
    ]
    
    with plt.rc_context({**mpl_rc_global, **mpl_rc_local}):

        system_types_to_show = {stn: st for stn, st in system_types.items() if stn not in ('water-pure')}
        
        # entropy
        fig, ax = plt.subplots(figsize=(3.8, 3.4), constrained_layout=True, dpi=200)
        fig.set_constrained_layout_pads(w_pad=0.01, h_pad=0.01)
        x_base = np.arange(len(system_types_to_show))
        
        for s, (stn, st) in enumerate(system_types_to_show.items()):
            print(s, stn)
            for t, term in enumerate(term for term in terms_to_show if stn not in term['exclude-stn']):
                print(t, term['type'])
                width = 0.9 / len(terms_to_show) - 0.02
                x = x_base[s] + np.linspace(-0.45+width/2, 0.45-width/2, num=len(terms_to_show))[t]
                # plot experimental
                if term['type'] == 'experimental':
                    label = 'exp.' if s == 0 else None
                    ax.bar(x=x, height=exp_solv_entropies[stn][0] * 1000, yerr=exp_solv_entropies[stn][1] * 1000, width=width, color='#444444', label=label)
                # plot literature
                elif term['type'] == 'literature':
                    label = ff_short_names[term['ff']] + ' lit.' if s == 0 else None
                    ax.bar(x=x,
                           height=lit_solv_entropies[(stn, term['ff'])][0] * 1000,
                           yerr=lit_solv_entropies[(stn, term['ff'])][1] * 1000,
                           width=width, color=ff_colors[term['ff']], label=label,
                           hatch='\\\\\\', alpha=0.999)
                elif term['type'] == 'difference-2pt':
                    label = ff_short_names[term['ff']] + ' 2PT' if s == 0 else None
                    diff = (term['ff-ref'], term['ff'])
                    S = 0
                    S_err = 0
                    for step in term['path-ref-ti']:
                        # cation 
                        S += df_ti2.at[(stn.rstrip('cl2_'), step['ti-name'], step['ti-type']), 'ΔS-corrected'] * n_cat
                        S_err += (df_ti2.at[(stn.rstrip('cl2_'), step['ti-name'], step['ti-type']), 'ΔS-std'] * n_cat)**2
                        # anion 
                        S += df_ti2.at[('water-cl', step['ti-name'], step['ti-type']), 'ΔS-corrected'] * n_an
                        S_err += (df_ti2.at[('water-cl', step['ti-name'], step['ti-type']), 'ΔS-std'] * n_an)**2
                    # TODO: check error propagation for long path
                    S += (
                        +df_2pt_diff.at[(stn, diff), (0, 'S')] * 100
                        +df_2pt_diff.at[(stn, diff), (1, 'S')]
                        +df_2pt_diff.at[(stn, diff), (2, 'S')]
                    )
                    S_err += (
                        +df_2pt_diff_std.at[(stn, diff), (0, 'S')] * 100
                        +df_2pt_diff_std.at[(stn, diff), (1, 'S')]
                        +df_2pt_diff_std.at[(stn, diff), (2, 'S')]
                    )** 2
                    S_err = np.sqrt(S_err)
                    ax.bar(x, S * 1000, yerr=S_err * 1000, width=width, color=ff_colors[term['ff']], label=label,
                           hatch='///', alpha=0.999)

                # plot ti difference
                elif term['type'] == 'difference-ti':
                    # netz iff
                    label = ff_short_names[term['path'][-1]['ff']] if s == 0 else None
                    n_cat, n_an = st['n_cation_anion']
                    #print('n_cat, n_an', n_cat, n_an)
                    S = 0
                    S_err = 0
                    for step in term['path']:
                        # cation 
                        S += df_ti2.at[(stn.rstrip('cl2_'), step['ti-name'], step['ti-type']), 'ΔS-corrected'] * n_cat
                        S_err += (df_ti2.at[(stn.rstrip('cl2_'), step['ti-name'], step['ti-type']), 'ΔS-std'] * n_cat)**2
                        # anion 
                        S += df_ti2.at[('water-cl', step['ti-name'], step['ti-type']), 'ΔS-corrected'] * n_an
                        S_err += (df_ti2.at[('water-cl', step['ti-name'], step['ti-type']), 'ΔS-std'] * n_an)**2
                    # TODO: check error propagation for long path
                    S_err = np.sqrt(S_err)
                    ax.bar(x, S * 1000, yerr=S_err * 1000, width=width, color=ff_colors[term['path'][-1]['ff']], label=label, hatch='')
                else:
                    print(".. unknown term ..", term['type'])

        ax.set_xlim(x_base[0] - 0.6, x_base[-1] + 0.6)
        ax.set_xticks(x_base)
        ax.set_xticklabels((sys_type_short_names[stn] for stn in system_types_to_show.keys()))
        ax.xaxis.set_ticks_position('top')
        ax.xaxis.set_ticks_position('none') 
        ax.legend(loc='lower left', frameon=False)
            
        ax.set_ylabel(r'$\Delta S_\mathrm{solv}$ in J/mol/K')
        fig.savefig('../figures/solvation-entropies.pdf')
        plt.show()

plot_solvation_entropy_new()

In [None]:
!cp -a ../figures/solvation-entropies.pdf ~/research/output/ion-shortrange-paper/figures/

# numbers for the paper

In [None]:
def show_na_o_potential():
    pff = PARAMETRIC_FORCE_FIELDS['opls-q1.0']
    at1 = pff['atomtypes'][0]  # OW
    at2 = pff['atomtypes'][5]  # NA
    print(at1['type'])
    print(at2['type'])
    r = np.linspace(0, 1, num=1001)
    U_LJ, _ = gen_potential_and_force(at1, at2, r, pff['combining-rule'], False, pff['nonbond-params'])
    U_LJ_C, _ = gen_potential_and_force(at1, at2, r, pff['combining-rule'], True, pff['nonbond-params'])
    plt.plot(r, U_LJ)
    plt.plot(r, U_LJ_C)
    #plt.ylim(-0.1, 0.1)
    plt.ylim(-600, 100)
    plt.show()
    print(r[np.argmin(U_LJ)])
    print(r[np.argmin(U_LJ_C)])
    print(np.sqrt(at1['σ'] * at2['σ']))
    
show_na_o_potential()

# test self interaction finite-size correction

In [None]:
# equation 10.5 in Horinek 2009
# SI units
def test_finite_size_correction():
    epsilon_r = 71  # SPC/E water, Horinek 2009
    depsilon_dT = -0.3631  # Horinek 2009
    xi_ew = -2.837297  # Horinek 2009, from Lee 2007
    L = 2.48304  # lenght of box in nanometer
    R = 0.4e-9 # effective size of the ion, from RDF, LI-O peak * 2
    Delta_S_fs = - (const.N_A * const.e**2) / (4 * np.pi * const.epsilon_0) * (1 / epsilon_r**2) * depsilon_dT * ((xi_ew / 2 / L)
                                                                                                                  + 2 * np.pi * R**2 / (3 * L**3))
    return Delta_S_fs

test_finite_size_correction()

# symbolic pictures

In [None]:
def gen_symbolic():
    with plt.xkcd(scale=0.5):
    #with plt.rc_context({}):
        fig, axes = plt.subplots(ncols=2, figsize=(4, 2), constrained_layout=True, dpi=200)
        ax0, ax1 = axes
        r = np.linspace(0, 3, 1501)
        
        pot = gen_lj_12_6_potential(r, 5e-8, 5e-4)
        with np.errstate(under='ignore'):
            g = np.exp(-pot)
            start = 130
            g[start:] += -2 * np.exp(-5 * r[start:]) * np.sin(40*(r[start:] - r[start])**1.4) + 0.3 * np.sqrt(r[start:] - r[start])
            
            offset = 10
            before = 30
            g2 = np.hstack((np.zeros(before), 1.3*np.exp(-pot[offset::2]), np.ones(offset), np.ones((len(pot)-offset)//2 - before)))
            start2 = 110 - offset
            g2[start2:] += -5 * np.exp(-5 * r[start2:]) * np.sin(20*(r[start2:] - r[start2])**1.3) - 0.2 * np.sqrt(r[start2:] - r[start2])
        
        ax0t = ax0.twinx()
        ax0t.plot(r, g, color='mediumvioletred', zorder=1)
        ax0.plot(r, pot, color='royalblue', zorder=10)
        ax0.set_xlim(0, 0.8)
        ax0.set_ylim(-3, 10)
        ax0t.set_ylim(0, 5)
        ax0.set_xticks([])
        ax0.set_yticks([])
        ax0t.set_yticks([])
        ax0.set_ylabel('LJ potential', color='royalblue')
        #ax0.set_xlabel('radius')
        #ax0.axhline(0)
        #ax0.axvline(r[start])
        ax0.set_title('no\nelectrostatics')
        
        ax1t = ax1.twinx()
        ax1t.plot(r, g2, color='mediumvioletred', zorder=1)
        ax1.plot(r, pot, color='royalblue', zorder=10)
        ax1.set_xlim(0, 0.8)
        ax1.set_ylim(-3, 10)
        ax1t.set_ylim(0, 5)
        ax1.set_xticks([])
        ax1.set_yticks([])
        ax1t.set_yticks([])
        #ax1.axvline(r[start2])
        ax1t.set_ylabel('RDF', color='mediumvioletred')
        ax1.set_title('attractive\nelectrostatics')
        
        fig.savefig('../figures/symbolic-pot-rdf.pdf')
        plt.show()
    
gen_symbolic()

# Force-field tables for SI

In [None]:
# generate table for SI with ff data
def gen_ff_table():
    water_model_short_names = {
        'water-spce': 'SPC/E',
        'water-tip4p2005': r'TIP4P\newline{}/2005',
    }
    mix_rule_short_names = {
        'geometric': 'geom.',
        'lorentz-berthelot': 'Lor.-Ber.',
    }
    force_fields_table = {ffn: ff for ffn, ff in force_fields.items() if 'dummy' not in ff['tags'] and 'halftabulated' not in ff['tags']}
    index = [ffn for ffn in force_fields_table.keys()]
    index_final = [ff_short_names[ffn] for ffn in force_fields_table.keys()]
    columns = ['cut-off', 'tail-corr', 'water-model', 'mix-rule', 'atom', 'sigma', 'epsilon', 'charge']
    columns_final = [
        r'cut-off\newline{}in \si{\nano\meter}',
        r'tail\newline{}corr.',
        r'water\newline{}model',
        r'mixing\newline{}rule', '{}',
        r'$\sigma$\newline{}in \si{\nano\meter}',
        r'$\epsilon$\newline{}in \si{\kilo\joule\per\mole}',
        r'$q$\newline{}in \si{\elementarycharge}',
    ]
    column_format = "m{1cm} m{1cm} m{1cm} m{1.2cm} m{1.2cm} m{0.5cm} m{1.5cm} m{1.8cm} m{1.2cm} ".replace('m{', 'p{')
    df = pd.DataFrame(index=index, columns=columns)
    for ffn, ff in force_fields_table.items():
        pffn = ff['parametric-ff']
        pff = PARAMETRIC_FORCE_FIELDS[pffn]
        df.at[ffn, 'cut-off'] = ff['cut-off']
        df.at[ffn, 'tail-corr'] = 'yes' if 'tail-corr' in ff['tags'] else 'no'
        df.at[ffn, 'water-model'] = water_model_short_names[pff['water-model']]
        df.at[ffn, 'mix-rule'] = mix_rule_short_names[pff['combining-rule']]
        df.at[ffn, 'mix-rule'] = mix_rule_short_names[pff['combining-rule']]
        atomtypes = [at for at in pff['atomtypes'] if at['type'] not in {'OW', 'HW', 'MW'}]
        df.at[ffn, 'atom'] = r'\newline{}'.join(["{}".format(at_short_names[at['type']])
                                                 for at in atomtypes])
        df.at[ffn, 'sigma'] = r'\newline{}'.join(["{:.6f}".format(at['σ'])
                                                  for at in atomtypes])
        df.at[ffn, 'epsilon'] = r'\newline{}'.join(["{:.6f}".format(at['ε'])
                                                    for at in atomtypes])
        df.at[ffn, 'charge'] = r'\newline{}'.join(["{:.2f}".format(at['q'])
                                                    for at in atomtypes])
        if ffn != 'madrid-co1.0tc':
            df.at[ffn, 'charge'] += r"\vspace{0.2cm}"
        
        
        
    df.index = index_final
    df.columns = columns_final
    with pd.option_context("max_colwidth", 1000):
        latex = df.to_latex(escape=False, multicolumn=False, column_format=column_format)
    #latex = latex.replace('\\\\\n', '\\\\ \\midrule\n')
    print(latex)
    #with open('/tmp/test-table.tex', 'w') as f:
        #f.write(latex)
    return df
    
gen_ff_table()

In [None]:
# generate table for SI with ff data
def gen_madrid_table():
    pff = PARAMETRIC_FORCE_FIELDS['madrid']
    nbp = pff['nonbond-params']
    index = [ap for ap in nbp.keys()]
    index_final = [' - '.join(at_short_names[at] for at in list(ap)) for ap in nbp.keys()]
    columns = ['sigma', 'epsilon']
    columns_final = [
        r'$\sigma$\newline{}in \si{\nano\meter}',
        r'$\epsilon$\newline{}in \si{\kilo\joule\per\mole}',
    ]
    column_format = "m{1.7cm} m{1.5cm} m{1.8cm} ".replace('m{', 'p{')
    df = pd.DataFrame(index=index, columns=columns)
    for ap, param in nbp.items():
        assert param[0] == 'LJ'
        sig, eps = param[1:3]
        df.at[ap, 'sigma'] = "{:.8f}".format(sig)
        df.at[ap, 'epsilon'] = "{:.8f}".format(eps)
        #if ffn != 'madrid-co1.0tc':
            #df.at[ffn, 'charge'] += r"\vspace{0.2cm}"
        
        
        
    df.index = index_final
    df.columns = columns_final
    with pd.option_context("max_colwidth", 1000):
        latex = df.to_latex(escape=False, multicolumn=False, column_format=column_format)
    #latex = latex.replace('\\\\\n', '\\\\ \\midrule\n')
    print(latex)
    with open('/tmp/test-table.tex', 'w') as f:
        f.write(latex)
    return df
    
gen_madrid_table()

# meta figure for paper

In [None]:
def plot_meta():
    params = {
        'legend.handlelength': 2.0,
        'legend.fontsize': 7,
        'legend.labelspacing': 0.0,
        'figure.dpi': 96,
        'axes.labelpad': 0.1,
    }
    with mpl.rc_context(rc={**mpl_rc_global, **params}):
        fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(3.9, 5.9), constrained_layout=True, dpi=200)
        fig.set_constrained_layout_pads(w_pad=0.02, h_pad=0.01, hspace=0.01, wspace=0.0)
        plot_sub_dos(axes[0])
        plot_sub_rdf(axes[1])
        plot_sub_rho(axes[2])
        plot_sub_diff(axes[3])
        plot_sub_osmp(axes[4])
        fig.savefig('../figures/meta.pdf')
        plt.show()

plot_meta()

In [None]:
!cp -a ../figures/meta.pdf ~/research/output/ion-shortrange-paper/figures/

In [None]:
# show ion dos
def plot_sub_dos(axes_row):
    xlim_cations = {
        'water-licl': (0, 800),
        'water-nacl': (0, 400),
        'water-kcl': (0, 300),
        'water-cacl2_': (0, 500),
    }
    force_fields_to_show = {ffn: force_fields_2pt[ffn] for ffn in  (
        'eccr1-co1.2',
        'iff-altern5-eccr1-co1.2-nopc',
        'netz-co0.9tc',
        'iff-altern5-netz-co0.9-nopc',
    )}
    system_types_to_show = {stn: st for stn, st in system_types_2pt.items() if stn in ('water-licl',
                                                                                       'water-cacl2_')}
    linestyles = ['-', '--', '-.', ':']

    for t, (systype_name, systype) in enumerate(system_types_to_show.items()):
        ax = axes_row[t]

        #for i, ion in enumerate(('cation', 'anion')):
        ion = 'cation'
        i = 0
        for s, system in enumerate((sys for sys in system_generator({systype_name: systype}, force_fields_to_show) if 'dos' in sys['tags'])):
            moltypes_with_dos = moltypes_with_dos_dict[system['name']]
            moltype = moltypes_with_dos[i+1]

            frequencies = np.array(moltype['doses'][('frequencies', 0)])
            dos_samples = np.array(moltype['doses'].loc[:, ('trn', slice(None))]).T
            dos_mean = dos_samples.mean(axis=0)
            dos_min = dos_samples.min(axis=0)
            dos_max = dos_samples.max(axis=0)
            #linestyle = linestyles[s%4]
            linestyle = '-'
            color = ff_colors[system['force-field']['name']]
            line, = ax.plot(frequencies * oconst.rec_cm_per_THz,
                            dos_mean / oconst.rec_cm_per_THz,
                            linestyle=linestyle,
                            linewidth=0.8,
                            color=color,
                            label=ff_short_names[system['force-field']['name']],
                           )
            # fit
            ax.plot(frequencies * oconst.rec_cm_per_THz,
                    fit_func[ion][systype_name](
                        frequencies,
                        *popt_dict[ion]['opt'][system['name']]
                    ) / oconst.rec_cm_per_THz,
                    color=line.get_color(),
                    linestyle='--',
                    linewidth=0.6,
                   )
            ax.axvline(popt_dict[ion]['opt'][system['name']][-2] * oconst.rec_cm_per_THz,
                       color=line.get_color(), linewidth=0.6, linestyle=':')
            ax.axvline(popt_dict[ion]['opt'][system['name']][-5] * oconst.rec_cm_per_THz,
                       color=line.get_color(), linewidth=0.6, linestyle='--')
        ax.set_title(sys_type_short_names[systype_name])
        ax.set_ylim(0)
        ax.set_xlabel(r'$\tilde v$ in cm$^{-1}$')
        ax.ticklabel_format(axis='y', style='sci', scilimits=(0, 0))
        #ax.set_yticks([])
        ax.set_xlim(xlim_cations[systype_name])
    axes_row[0].set_ylabel(r'$D \! O \! S^\text{Cat.}(\tilde v)$ in cm')
    #axes_row[0].legend(frameon=False)

In [None]:
def plot_sub_rdf(axes_row, show_lit_data=False):
    system_combinations_to_compare = [
        {'name': 'all LiCl', 'show-nvt': False, 'show-npt': True,
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    #'water5000-licl50/opls-co0.9tc',
                                    'water5000-licl50/eccr1-co1.2',
                                    'water5000-licl50/iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-licl50/netz-co0.9tc',
                                    'water5000-licl50/iff-altern5-netz-co0.9-nopc',
                                    #'water5000-licl50/madrid-co1.0tc',
                                ]],
         'nb-interactions-to-show': [
             'OW-LI',
             #'OW-CL',
         ]},
        {'name': 'all CaCl', 'show-nvt': False, 'show-npt': True,
         'system-combination': [system for system in system_generator(*systems_md)
                                if system['name'] in [
                                    #'water5000-cacl2_50/opls-co0.9tc',
                                    'water5000-cacl2_50/eccr1-co1.2',
                                    'water5000-cacl2_50/iff-altern5-eccr1-co1.2-nopc',
                                    'water5000-cacl2_50/netz-co0.9tc',
                                    #'water5000-cacl2_50/madrid-co1.0tc',
                                    'water5000-cacl2_50/iff-altern5-netz-co0.9-nopc',
                                ]],
         'nb-interactions-to-show': [
             'OW-CA',
             #'OW-CL',
         ]},
    ]

    additional_dists = [
        {'name': 'AIMD', 'path': '../received/azade-aimd/li-cl/li-o-gofr-0.04.dat', 'show-for': ['OW-LI']},
        {'name': 'AIMD', 'path': '../received/azade-aimd/na-cl/na-o-gofr-0.04.dat', 'show-for': ['OW-NA']},
        {'name': 'AIMD', 'path': '../received/azade-aimd/k-cl/k-o-gofr-0.04.dat', 'show-for': ['OW-K']},
        {'name': 'AIMD', 'path': '../received/azade-aimd/ca-cl2/ca-o-gofr-0.04.dat', 'show-for': ['OW-CA']},
        {'name': 'AIMD', 'path': '../received/azade-aimd/na-cl/cl-o-gofr-0.04.dat', 'show-for': ['OW-CL']},
    ]

    mpl_rc = {
        'legend.labelspacing': 0.2,
        'legend.handlelength': 1.8,
        'legend.columnspacing': 1.0,
        'legend.handletextpad': 0.5,
    }
    marker_colors = list(mpl.colors.TABLEAU_COLORS.values())
    marker_color_ndx = 0
    marker_color_dict = {}
        
    legend_handles, legend_labels = [], []

    for c, system_combination in enumerate(system_combinations_to_compare):
        ax = axes_row[c]

        #for n, nb_name in enumerate(system_combination['nb-interactions-to-show']):
        nb_name = system_combination['nb-interactions-to-show'][0]
        n = 0

        for s, system in enumerate(system_combination['system-combination']):

            label = ff_short_names.get(system['force-field']['name'], system['force-field']['name'])
            if system_combination['show-nvt']:
                try:
                    r = system_interaction_dict[(system['name'], nb_name, 'r')]
                    g = system_interaction_dict[(system['name'], nb_name, 'g')]
                    linestyle = ['-', '--', ':', '-.'][s%4]
                    line, = ax.plot(r, g, linestyle=linestyle, label=label)
                except KeyError:
                    print('..no data..')

            if system_combination['show-npt']:
                try:
                    r = npt_system_interaction_dict[(system['name'], nb_name, 'r')]
                    g = npt_system_interaction_dict[(system['name'], nb_name, 'g')]
                    #linestyle = ff_linestyles[system['force-field']['name']]
                    linestyle = '-'
                    color = ff_colors[system['force-field']['name']]
                    line, = ax.plot(r, g, linestyle=linestyle, label=label, color=color, linewidth=0.8)
                except KeyError:
                    print('..no data..')
            if label not in legend_labels:
                legend_handles.append(line)
                legend_labels.append(label)



        # plot additional
        for a, add_dist in enumerate((ad for ad in additional_dists if nb_name in ad['show-for'])):
            data = np.loadtxt(add_dist['path'])
            x = data.T[0]/10  # votca and vmd both give g(r) value for bin symmetrically around r
            y = data.T[1]
            linestyle = ['--', '-.'][a%2]
            label = add_dist['name']
            line, = ax.plot(x, y, linestyle=linestyle, color='k', label=label, linewidth=0.8)
            if label not in legend_labels:
                legend_handles.append(line)
                legend_labels.append(label)

        # plot literature
        if show_lit_data:
            if nb_name in first_peak_rdf_lit_data:
                for data in first_peak_rdf_lit_data[nb_name]:
                    peak = data['peak']
                    label = data['source-short']
                    zot = data['source-zotero']
                    if zot in marker_color_dict.keys():
                        marker_color_ndx_here = marker_color_dict[zot]
                    else:
                        marker_color_ndx_here = marker_color_ndx
                        marker_color_dict[zot] = marker_color_ndx
                    line, = ax.plot([peak[0]], [peak[1]], label=label, marker='x', linestyle='', zorder=10, color=marker_colors[marker_color_ndx_here])
                    marker_color_ndx += 1
                    if label not in legend_labels:
                        legend_handles.append(line)
                        legend_labels.append(label)

        ax.set_ylim(0)
        ax.set_xlim(0.15, 0.53)
        ax.set_xlabel(r"$r$ / nm")
    axes_row[0].set_ylabel(fr"$g^\text{{O - Cat.}}(r)$")
    order = [0, 2, 1, 3, 4]
    handles, labels = [legend_handles[idx] for idx in order], [legend_labels[idx] for idx in order]
    axes_row[1].legend(handles, labels, ncol=1, loc='upper right')

In [None]:
# plot densities vs concentration
def plot_sub_rho(axes_row):
    ylim_dict = {
        'water-licl': (0.99, 1.12),
        'water-nacl': (0.99, 1.2),
        'water-kcl': (0.99, 1.25),
        'water-cacl2_': (0.99, 1.45),
    }

    #for s, sys_type in enumerate((system_types[st] for st in ('water-licl', 'water-cacl2_'))):
    for s, sys_type in enumerate(('water-licl', 'water-cacl2_')):
        ax = axes_row[s]

        for f, ff in enumerate((force_fields[ff] for ff in (
            'eccr1-co1.2',
            'iff-altern5-eccr1-co1.2-nopc',
            'netz-co0.9tc',
            'iff-altern5-netz-co0.9-nopc',
        ))):
            index = (sys_type, ff['name'], slice(None), slice(None))
            x = df_dens.loc[index, 'concentration'].groupby(axis=0, level=2).mean().to_numpy()[0:]
            y = df_dens.loc[index, 'density'].groupby(axis=0, level=2).mean().to_numpy()[0:]
            yerr = df_dens.loc[index, 'density'].groupby(axis=0, level=2).std().to_numpy()[0:]
            ax.errorbar(x, y, yerr=yerr, marker='.', linestyle=':', label=ff_short_names[ff['name']], color=ff_colors[ff['name']])

        # literature
        sys_type_data = density_w_lit_dict[sys_type]
        if 'mass-fraction' in sys_type_data:
            w = sys_type_data['mass-fraction']
        else:
            w = mass_fraction_from_molality(sys_type_data['molality'], sys_type_data['molar-mass'])
        densities = interpolate_density(sys_type_data['density'], 300)
        c = concentration_from_mass_fraction(w, sys_type_data['molar-mass'], densities)
        ax.plot(c, densities, '.-', color='k', label="exp.")

        #ax.set_xlim(0, max(x)+0.2)
        ax.set_xlim(0, 5.1)
        ax.set_ylim(ylim_dict[sys_type])
        #ax.text(.05, .83, sys_type_short_names[sys_type],
                #horizontalalignment='left', transform=ax.transAxes)
        ax.set_xlabel(r"$c$ in mol/l")
        
        # less ticks on first plot
        if s == 0:
            ax.locator_params(axis='y', nbins=2)
    axes_row[0].set_ylabel(r"$\rho$ in g/ml")

    handles, labels = axes_row[0].get_legend_handles_labels()
    order = [1, 2, 3, 4, 0]
    handles, labels = [handles[idx] for idx in order], [labels[idx] for idx in order]
    axes_row[0].legend(handles, labels, ncol=1, loc=(0.02, 0.38))

In [None]:
def plot_sub_diff(axes_row):
    ylim_dict = {
        'water-cacl2_': (0.08, 1.15),
        'water-kcl': (0.08, 1.25),
        'water-licl': (0.3, 1.15),
        'water-nacl': (0.3, 1.15),
    }

    #for s, sys_type in enumerate((st for st in system_types if st != 'water-pure')):
    for s, sys_type in enumerate(('water-licl', 'water-cacl2_')):
        ax = axes_row[s]

        #for f, ff in enumerate((ff for ffn, ff in force_fields.items() if 'conc-range' in ff['tags'])):
        for f, ff in enumerate((force_fields[ff] for ff in (
            'eccr1-co1.2',
            'iff-altern5-eccr1-co1.2-nopc',
            'netz-co0.9tc',
            'iff-altern5-netz-co0.9-nopc',
        ))):
            index = (sys_type, ff['name'], slice(None))
            index_pure = ('water-pure', ff['name'], 0.0)
            concentration = df_dens.loc[index, 'concentration'].groupby(axis=0, level=2).mean()
            x = concentration.to_numpy()[0:]
            y = (df_diff.loc[index, 'Dcorr'] / df_diff.at[index_pure, 'Dcorr']).to_numpy()[0:]
            yerr = df_diff.loc[index, 'Derr'].to_numpy()[0:]
            ax.errorbar(x, y, yerr=yerr, marker='.', linestyle=':', label=ff_short_names[ff['name']], color=ff_colors[ff['name']])

        # literature
        molality = water_diff_lit_data[sys_type]['molality']
        # from molality to concentration we can use crc data
        osmp_lit_dict_st = osmp_lit_dict[sys_type]
        concentration = np.interp(molality,
                                  osmp_lit_dict_st['crc-liquid-data']['molality'],
                                  osmp_lit_dict_st['crc-liquid-data']['concentration'], left=np.nan, right=np.nan)
        x_lit = concentration
        y_lit = water_diff_lit_data[sys_type]['diffusion-coefficient']
        y_lit /= y_lit[0]
        ax.plot(x_lit, y_lit, '-', marker='.', color='k', label="exp.")

        #ax.set_xlim(0, max(x)+0.2)
        ax.set_xlim(0, 5.1)
        ax.set_ylim(ylim_dict[sys_type])
        #ax.text(.05, .10, sys_type_short_names[sys_type],
                #horizontalalignment='left', transform=ax.transAxes)

        ax.set_xlabel(r"$c$ in mol/l")
    axes_row[0].set_ylabel(r"$D / D_0$")
    #handles, labels = axes_row[0].get_legend_handles_labels()
    #axes_row[1].legend(handles, labels, ncol=1, loc=(0.5, 0.3))

In [None]:
def plot_sub_osmp(axes_row):
    ylim_dict = {
        'water-cacl2_': (0.0, 4.2),
        'water-kcl': (0.0, 1.5),
        'water-licl': (0.0, 1.95),
        'water-nacl': (0.0, 1.65),
    }

    for s, (stn, st) in enumerate(((stn, system_types[stn]) for stn in ('water-licl', 'water-cacl2_'))):
        ax = axes_row[s]
        n_ions = sum(st['n_cation_anion'])
        for f, (ffn, ff) in enumerate(((ffn, force_fields[ffn]) for ffn in (
            'eccr1-co1.2',
            'iff-altern5-eccr1-co1.2-nopc',
            'netz-co0.9tc',
            'iff-altern5-netz-co0.9-nopc',
        ))):
            system_names = tuple((sys['name'] for sys in system_generator(*systems_osmp)
                                  if ffn == sys['force-field']['name']
                                  if sys['type']['name'] == stn))
            for osmp_nr, (osmp_name, osmp_method) in enumerate(osmp_methods.items()):
                rows = (system_names, osmp_name, 'prod')
                x = df_osmp.loc[(rows, 'c_inner')].to_numpy()[0:]
                xerr = df_osmp.loc[(rows, 'c_inner_err')].to_numpy()[0:]
                vant_Hoff_Pi = vant_Hoff_osmotic_pressure(x, n_ions, 300)
                y = (df_osmp.loc[(rows, 'Pi')] / vant_Hoff_Pi).to_numpy()[0:]
                yerr = (df_osmp.loc[(rows, 'Pi_err')] / vant_Hoff_Pi).to_numpy()[0:]
                ax.errorbar(x=x, y=y, xerr=xerr, yerr=yerr, color=ff_colors[ffn],
                            label=ff_short_names[ffn], linestyle=':', marker='.')
        # lit data
        lit_x, lit_y = get_osmotic_coeff_lit(osmp_lit_dict, stn, 'Guendouzi 2001')
        ax.plot(lit_x, lit_y, marker='.', linestyle='-', color='k', label='exp.', zorder=0, markersize=4)

        vant_Hoff_x = np.linspace(0, 5, 10)
        #vant_Hoff_y = vant_Hoff_osmotic_pressure(vant_Hoff_x, n_inos, 300)
        vant_Hoff_y = np.ones_like(vant_Hoff_x)
        #ax.plot(vant_Hoff_x, vant_Hoff_y, marker='', linestyle='-', color='grey', label="van't Hoff")
        ax.axhline(1, marker='', linestyle='-', color='grey', linewidth=0.5)  # label="van't Hoff")
        ax.set_xlim(0, 5.5)
        ax.set_ylim(ylim_dict[stn])
        #ax.text(.05, .83, sys_type_short_names[stn],
                #horizontalalignment='left', transform=ax.transAxes)

        ax.set_xlabel(r"$c$ in mol/l")
    axes_row[0].set_ylabel(r"$\phi$")

    #handles, labels = ax.get_legend_handles_labels()
    #fig.legend(handles, labels, ncol=4, loc='lower center', bbox_to_anchor=(0.54, 0.97),)
        

# archive

### fill dataframe with ΔU TODO: average over pure water endpoints

In [None]:
# directly fills df_ti2
def fill_df_U(get_std_from_gmx_energy=True, use_total_energy=True):
    # total energy should be used (U = TE)
    for st_name, st in system_types_ti.items():
        print(f"system-type {st_name}")
        for ffp_name, ffp in force_field_pairs_ti.items():
            print(f"  force-field-pair {ffp_name}")
            assert len(tuple(sys_pair_gen_ti({st_name: st}, {ffp_name: ffp}))) == 1
            ffp, systemA, systemB = next(sys_pair_gen_ti({st_name: st}, {ffp_name: ffp}))
            sys_comp_name = systemA['name'].split('/')[0]
            for tis_name, tis in {k: v for k, v in ti_settings.items() if v['type'] in ffp['ti-setting-types']}.items():
                #print(f"    ti-setting {tis_name}")
                working_dir = f"ti/{sys_comp_name}/{ffp_name}/{tis_name}"
                #print(f"    working_dir: {working_dir}")
                
                # fill df
                with WorkingDir(working_dir):
                    for l, lambda_ in enumerate((tis['lambdas'][0], tis['lambdas'][-1])):
                        print(f"{lambda_:.2f}")
                        n_samples = tis['n-samples']
                        for sample in range(n_samples):
                            #print(f"{sample}")
                            try:
                                key = 'Total-Energy' if use_total_energy else 'Potential'
                                stdout = run_bash(f"gmx energy -f {lambda_:.2f}-long/prod/ener.edr -o {lambda_:.2f}/prod/pot.xvg <<< '{key}'", print_on_error=False)
                            except subprocess.CalledProcessError:
                                print(".. no data ..")
                                continue
                                
                            if get_std_from_gmx_energy:
                                if sample != 0:
                                    continue
                                for line in stdout.splitlines():
                                    key2 = 'Total Energy' if use_total_energy else 'Potential'
                                    if line.startswith(key2):
                                        mean_std_slice = slice(2, 4) if use_total_energy else slice(1, 3)
                                        pot_mean, pot_std = map(float, line.split()[mean_std_slice])
                                df_ti.at[(st_name, ffp_name, tis_name, 0, lambda_), 'pot'] = pot_mean  # abuse of rows
                                df_ti.at[(st_name, ffp_name, tis_name, 1, lambda_), 'pot'] = pot_std  # abuse of rows
                            else:
                                data, _ = gt.xvg.load(f"{lambda_:.2f}/prod/pot.xvg")
                                run_bash(f"rm -f {lambda_:.2f}/prod/pot.xvg")
                                block_data = data.iloc[(len(data)//n_samples)*sample:(len(data)//n_samples)*(sample+1)]
                                key2 = 'Total Energy' if use_total_energy else 'Potential'
                                pot_mean = block_data[key2].mean()
                                #print(f"  {len(block_data)}", pot_mean)
                                df_ti.at[(st_name, ffp_name, tis_name, sample, lambda_), 'pot'] = pot_mean
                            
                # difference: λ=1 minus λ=0
                if get_std_from_gmx_energy:
                    pot_0 = df_ti.at[(st_name, ffp_name, tis_name, 0, 0.0), 'pot']  # abuse of rows
                    pot_1 = df_ti.at[(st_name, ffp_name, tis_name, 0, 1.0), 'pot']  # abuse of rows
                    pot_0_std = df_ti.at[(st_name, ffp_name, tis_name, 1, 0.0), 'pot']  # abuse of rows
                    pot_1_std = df_ti.at[(st_name, ffp_name, tis_name, 1, 1.0), 'pot']  # abuse of rows
                    df_ti.loc[(st_name, ffp_name, tis_name, slice(None), slice(None)), 'pot'] = np.nan  # end abuse of rows
                    df_ti2.at[(st_name, ffp_name, tis_name), f'ΔU-mean'] = pot_1 - pot_0
                    df_ti2.at[(st_name, ffp_name, tis_name), f'ΔU-std'] = np.sqrt(pot_1_std**2 + pot_0_std**2)
                else:
                    for sample in range(tis['n-samples']):
                        pot_0 = df_ti.at[(st_name, ffp_name, tis_name, sample, 0.0), 'pot']
                        pot_1 = df_ti.at[(st_name, ffp_name, tis_name, sample, 1.0), 'pot']
                        ΔU = pot_1 - pot_0
                        df_ti.at[(st_name, ffp_name, tis_name, sample, 'total'), 'ΔU'] = ΔU
                
fill_df_U(use_total_energy=True, get_std_from_gmx_energy=True)

df_ti.head()