Add code for generating 15yr populations

nanograv · Feb 1, 2024 · 4a15e92 · 4a15e92
1 parent 87bde6c
commit 4a15e92
Show file tree

Hide file tree

Showing 2 changed files with 606 additions and 0 deletions.
diff --git a/analysis/gen-15yr-pops/gen_holodeck_pops.py b/analysis/gen-15yr-pops/gen_holodeck_pops.py
@@ -0,0 +1,339 @@
+"""Script to generate Holodeck populations, drawing from the 15yr analysis constraints.
+
+See `holodeck-pops.ipynb` for example usage of this data.
+
+Usage
+-----
+See `python gen_holodeck_pop.py -h` for usage information.
+
+Example:
+
+    python gen_holodeck_pops.py -t 20 -f 30 -r 100 -l 10 -m
+
+                                   |     |     |      |   |--> use maxmimum-likelihood values
+                                   |     |     |      |------> 10 loudest binaries in each frequency bin
+                                   |     |     |-------------> 100 realizations of populations
+                                   |     |-------------------> 30 frequency bins
+                                   |-------------------------> 20 years observing baseline = 1/(20yr) lowest frequency
+
+"""
+import argparse
+from pathlib import Path
+import numpy as np
+
+import holodeck as holo
+import holodeck.librarian
+from holodeck.constants import YR
+
+# Choose range of orbital periods of interest
+TDUR = 16.0   # yr
+NFREQS = 60
+NREALS = 103
+### NDRAWS = 101    # NOT IMPLEMENTED YET
+NLOUDEST = 10
+
+# Path to chains, fitting holodeck populations to data, giving parameter posteriors
+PATH_DATA = Path(
+    "/Users/lzkelley/Programs/nanograv/15yr_astro_data/"
+    "phenom/ceffyl_chains/astroprior_hdall/"
+)
+
+# Parameter space corresponding to the fit data
+PSPACE = holo.librarian.param_spaces_classic.PS_Classic_Phenom_Uniform
+
+# Path to save output data
+PATH_OUTPUT = Path(__file__).parent.joinpath("output")
+assert PATH_OUTPUT.is_dir(), f"{PATH_OUTPUT=} does not exist!"
+
+
+def main(args=None):
+    """Top level function that does all the work.
+    """
+
+    # ---- Setup / Initialization
+
+    # load arguments
+    if args is None:
+        args = setup_argparse()
+
+    # load chains (i.e. parameter posterior distributions)
+    chains = load_chains(PATH_DATA)
+
+    # select parameters for this population
+    if args.maxlike:
+        pkey = "ML"
+        pars = get_maxlike_pars_from_chains(chains)
+    else:
+        pkey = "draw"
+        pars = sample_chains(chains)
+
+    # construct output filename
+    output = Path(args.output).resolve()
+    output.mkdir(exist_ok=True)
+    # Find a unique filename, in case other populations have already been created
+    ml_warning = False
+    for num in range(10000):
+        fname = f"t{args.tdur:.1f}yr_nf{args.nfreqs}_nr{args.nreals}_nl{args.nloudest}_{pkey}_{num:04d}.npz"
+        fname = output.joinpath(fname)
+        if not fname.exists():
+            break
+        if (num > 0) and args.maxlike and (ml_warning is False):
+            holo.log.warning("")
+
+    else:
+        raise RuntimeError(f"Could not find a filename that doesn't exist!  e.g. {fname}")
+
+    # ---- Construct population and derived properties
+
+    # Build populations with holodeck
+    data = load_population_for_pars(args, pars)
+
+    # ---- Save to output file
+
+    # Save data
+    np.savez(
+        fname,
+        pspace_name=PSPACE.__name__,
+        data_path=PATH_DATA.resolve(),
+        **data
+    )
+    print(f"Saved size {holo.utils.get_file_size(fname)} : {fname}")
+
+    return
+
+
+def setup_argparse(*args, **kwargs):
+    """Setup parameters/arguments.
+
+    Note that this can be used to set parameters NOT from command-line usage,
+    but in this case the `args` argument must be set to empty.  For example:
+
+    This will load of all the default arguments (NOTE the empty string argument is typically needed):
+        ``args = gen_holodeck_pops.setup_argparse("")``
+
+    This will set the desired parameters, and otherwise load the defaults:
+        ``args = gen_holodeck_pops.setup_argparse("", nloudest=12, nreals=6, maxlike=True)``
+
+    """
+
+    # Setup ArgumentParser and parameters / arguments
+
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        '-o', '--output', default=PATH_OUTPUT, dest='output', type=str,
+        help="Directory to place output files (created if it doesn't already exist)."
+    )
+    parser.add_argument(
+        '-t', '--dur', default=TDUR, dest='tdur', type=float,
+        help='Observational duration in years.  Determines frequency bins.'
+    )
+    parser.add_argument(
+        '-f', '--nfreqs', default=NFREQS, dest='nfreqs', type=int,
+        help='Number of frequency bins.'
+    )
+    parser.add_argument(
+        '-r', '--nreals', default=NREALS, dest='nreals', type=int,
+        help='Number of realizations.'
+    )
+    parser.add_argument(
+        '-l', '--nloudest', default=NLOUDEST, dest='nloudest', type=int,
+        help='Number of loudest binaries in each frequency bin.'
+    )
+    parser.add_argument(
+        '-m', '--maxlike', action='store_true', default=False, dest='maxlike',
+        help='Use the maximum likelihood parameters, instead of drawing from the posteriors.'
+    )
+
+    # Set custom parameters passed through the `kwargs` argument (i.e. if this function is called from another module)
+    args = parser.parse_args(*args)
+    for kk, vv in kwargs.items():
+        if not hasattr(args, kk):
+            raise KeyError(f"Argparse {args} has no attribute {kk}!")
+        setattr(args, kk, vv)
+
+    return args
+
+
+def load_population_for_pars(args, pars):
+    """Construct a holodeck population.
+
+    Arguments
+    ---------
+    args : argparse.Namespace
+        Arguments for constructing the population and calculating properties.
+        Typically `args` should be loaded using the `setup_argparse` function.
+    pars : dict
+        Binary population parameters for the appropriate parameter space `PSPACE`.
+        Typically the `pars` should be loaded using either the `sample_chains` or the
+        `get_maxlike_pars_from_chains` function.
+
+    Returns
+    -------
+    data : dict
+        Binary population and derived properties.
+        * `number` : ndarray (M, Q, Z, F)
+          Number of binaries in the Universe in each bin.
+          The bins are total mass (M), mass ratio (Q), redshift (Z), and frequency (F).
+        * `hc_ss` : ndarray (F, R, L)
+          GW characteristic strain of the loudest L binaries in each frequency bin (F) and realization (R).
+          The GW frequencies are assumed to be 2x the orbital frequencies (i.e. circular orbits).
+        * `hc_bg` : ndarray (F, R)
+          GW characteristic strain of all binaries besides the L loudest in each frequency bin,
+          for frequency bins `F` and realizations `R`.
+          The GW frequencies are assumed to be 2x the orbital frequencies (i.e. circular orbits).
+        * `sspar` : ndarray (P, F, R, L)
+          Binary parameters of the loudest `L` binaries in each frequency bin `F` for realizations `R`.
+          The P=4 parameters included are {total mass [grams], mass ratio, initial redshift, final redshift},
+          where initial redshift is at the time of galaxy merger, and final redshift is when reaching the frequency bin.
+        * `mtot_edges` : ndarray (M+1,)
+          The edges of the total-mass dimension of the SAM grid, in units of [grams].
+          Note that there are `M+1` bin edges for `M` bins.
+        * `mrat_edges` : ndarray (Q+1,)
+          The edges of the mass-ratio dimension of the SAM grid.
+          Note that there are `Q+1` bin edges for `Q` bins.
+        * `redz_edges` : ndarray (Z+1,)
+          The edges of the redshfit dimension of the SAM grid.
+          Note that there are `Z+1` bin edges for `Z` bins.
+        * `fobs_orb_edges` : ndarray (F+1,)
+          The edges of the orbital-frequency dimension of the SAM grid.
+          Note that there are `F+1` bin edges for `F` bins.
+
+    """
+
+    # Choose the appropriate Parameter Space (from 15yr astro analysis)
+    # Load SAM and hardening model for desired parameters
+    sam, hard = PSPACE.model_for_params(pars)
+
+    fobs_orb_cents, fobs_orb_edges = holo.utils.pta_freqs(args.tdur*YR, args.nfreqs)
+    # fobs_gw_edges = fobs_orb_edges * 2.0
+    # fobs_gw_cents = fobs_orb_cents * 2.0
+
+    # calculate (differential) number of binaries
+    redz_final, diff_num = holo.sams.sam_cyutils.dynamic_binary_number_at_fobs(
+        fobs_orb_cents, sam, hard, holo.cosmo
+    )
+    # integrate to find total number of binaries in each bin
+    edges = [sam.mtot, sam.mrat, sam.redz, fobs_orb_edges]
+    number = holo.sams.sam_cyutils.integrate_differential_number_3dx1d(edges, diff_num)
+    # print(f"Loaded {number.sum():.1e} binaries across frequency range")
+
+    vals = holo.single_sources.ss_gws_redz(
+        edges, redz_final, number,
+        realize=args.nreals, loudest=args.nloudest, params=True,
+    )
+
+    # `sspar` parameters are (total mass, mass ratio, initial redshift, final redshift)
+    hc_ss, hc_bg, sspar, bgpar = vals
+
+    data = dict(
+        number=number, hc_ss=hc_ss, hc_bg=hc_bg, sspar=sspar,
+        mtot_edges=edges[0], mrat_edges=edges[1], redz_edges=edges[2], fobs_orb_edges=edges[3],
+    )
+
+    return data
+
+
+def load_chains(path_data):
+    """Load the MCMC chains from the given path.
+
+    The path must contain the expected files resulting from fitting with `ceffyl`.
+
+    Arguments
+    ---------
+    path_data : `str` or `pathlib.Path`
+        Path to directory containing the `pars.txt` and `chain_1.0.txt` files.
+
+    Returns
+    -------
+    data : dict
+        The values at each step of the MCMC chains for each parameters.
+        For example, the parameters may be:
+            ['hard_time', 'gsmf_phi0', 'gsmf_mchar0_log10',
+            'mmb_mamp_log10', 'mmb_scatter_dex', 'hard_gamma_inner'],
+        in which case each of these will be an entry in the dictionary, where the values are an
+        array of the steps in each of these parameters.
+
+    """
+    path_data = Path(path_data)
+    fname_pars = path_data.joinpath("pars.txt")
+    fname_chains = path_data.joinpath("chain_1.0.txt")
+
+    assert path_data.is_dir(), f"Path to chains '{path_data}' does not exist!"
+    assert fname_chains.is_file(), f"Could not find chain file '{fname_chains}'!"
+    assert fname_pars.is_file(), f"Could not find chain parameters file '{fname_pars}'!"
+
+    # load the names of each parameter
+    chain_pars = np.loadtxt(fname_pars, dtype=str)
+    # load the chains themselves
+    chains = np.loadtxt(fname_chains)
+    # combine names and chains into dictionary
+    data = {name: vals for name, vals in zip(chain_pars, chains.T)}
+    return data
+
+
+def sample_chains(chains):
+    """Sample randomly from the given chains (i.e. parameter posteriors).
+
+    Arguments
+    ---------
+    chains : dict
+        The MCMC parameter values for each of the parameters in this holodeck parameter-space.
+        These chains should typically be loaded using the `load_chains` function.
+
+    Returns
+    -------
+    pars : dict
+        Randomly selected parameters drawn from the `chains`.
+        This will be a single float value for each of the parameters in the holodeck parameter-space,
+        for example:
+            ['hard_time', 'gsmf_phi0', 'gsmf_mchar0_log10',
+            'mmb_mamp_log10', 'mmb_scatter_dex', 'hard_gamma_inner'],
+
+    """
+    nlinks = list(chains.values())[0].size
+    idx = np.random.choice(nlinks)
+    pars = {key: value[idx] for key, value in chains.items()}
+    return pars
+
+
+def get_maxlike_pars_from_chains(chains):
+    """Load the maximum-likelihood (ML) parameters from the given chains (i.e. parameter posteriors).
+
+    KDEs from `kalepy` are used to construct the ML parameters.
+
+    Arguments
+    ---------
+    chains : dict
+        The MCMC parameter values for each of the parameters in this holodeck parameter-space.
+        These chains should typically be loaded using the `load_chains` function.
+
+    Returns
+    -------
+    pars : dict
+        Maximum likelihood parameters drawn from the `chains`.
+        This will be a single float value for each of the parameters in the holodeck parameter-space,
+        for example:
+            ['hard_time', 'gsmf_phi0', 'gsmf_mchar0_log10',
+            'mmb_mamp_log10', 'mmb_scatter_dex', 'hard_gamma_inner'],
+
+    """
+    import kalepy as kale
+
+    # Get maximum likelihood parameters (estimate using KDE)
+    mlpars = {}
+    for name, vals in chains.items():
+        extr = holo.utils.minmax(vals)
+        xx, yy = kale.density(vals, reflect=extr)
+        idx = np.argmax(yy)
+        xmax = xx[idx]
+        mlpars[name] = xmax
+        # print(f"{name:>30s}: {xmax:+5.2f}")
+
+    return mlpars
+
+
+# ---- Run the `main` function when this file is called as a script
+
+if __name__ == "__main__":
+    main()