BEAST-Fitting · karllark · Aug 13, 2024 · Aug 13, 2024 · Aug 13, 2024 · Aug 13, 2024
diff --git a/docs/megabeast/core_api.rst b/docs/megabeast/core_api.rst
@@ -2,8 +2,6 @@
 Core
 ****
 
-.. automodapi:: megabeast.singlepop_dust_model
-
-.. automodapi:: megabeast.mbsettings
+.. automodapi:: megabeast.mbmodel
 
 .. automodapi:: megabeast.helpers
diff --git a/docs/megabeast/images/metal_small_sim_f475w_nsim_plot.png b/docs/megabeast/images/metal_small_sim_f475w_nsim_plot.png
diff --git a/docs/megabeast/images/metal_small_sim_f475w_prior_plot.png b/docs/megabeast/images/metal_small_sim_f475w_prior_plot.png
diff --git a/docs/megabeast/simulate.rst b/docs/megabeast/simulate.rst
@@ -1,11 +1,182 @@
-#############################
-Simulations for the MegaBEAST
-#############################
+###########
+Simulations
+###########
 
-To test that the MegaBEAST is behaving as expected, it is important to
-simulate data and evaluate if the results match the input.
+Simulations of observations can be made based on already computed physics and
+observation model grids.  Uses for such simulations include testing the
+sensitivity of a specific set of observations to BEAST model parameters and
+testing ensemble fitters like the MegaBEAST.
 
-Simulations should be done with using the BEAST tool to simulate the
-fluxes for an ensemble of stars using a BEAST physics+observation model.
-Then the BEAST should be used to fit the simulated fluxes and and the
-resulting BEAST outputs (especially lnp files) used for testing the MegaBEAST.
+Simulations are done using the
+`megabeast.simujlate_obs.gen_SimObs_from_sedgrid` function. The
+script `tools/simulate_obs.py` provides a command line interface and can be run
+using the `mb_simulate_obs` command once the megabeast has been installed.
+Simulations require already created BEAST physics and observation model grids.
+
+The physics model grid includes the ensemble parameters as these are the same as
+the BEAST (see `beast priors <https://beast.readthedocs.io/en/latest/beast_priors.html>`_).
+The module uses already created BEAST physics and
+observation model grids by sampling the full nD prior function that is part of
+the physics model grid.  The observation model grid provides the information on
+the flux uncertainties and biases as well as the completeness.
+
+The ensemble parameters can be changed from those in the BEAST physics model grid
+using ...to be added...
+
+The number of observations to simulate can be explicitly specified by using the `nsim`
+parameter or calculated from the age and mass prior/ensemble models.
+To do the latter, pass the `beastinfo` ASDF file to the `simulate_obs` script.
+This is done by
+computing the mass formed for each unique age in the physics grid  (combination
+of age and mass priors) accounting for stars that have already disappeared and
+dividing by the average mass (mass prior model).  This number of stars is
+simulated at each age and then the completeness function is applied to remove
+all the simulated stars that would not be observed.
+
+The `mb_simulate_obs` also can be used to just simulate extra ASTs. This
+simulation is replacing generating additional input artificial stars and running
+those through a photomtery pipeline. Thus, it is very powerful if you need more
+ASTs for generating MATCH-satisfied ASTs in addition to what you already have
+from the real ASTs. This means that you can avoid extensive/additional real ASTs
+as long as your initial/existing real ASTs properly cover the entire CMD space,
+i.e., a combination of BEAST-optimized ASTs supplemented by MATCH-friendly ASTs
+should be enough. This simulation provides the information on the flux uncertainties,
+biases, and the completeness for the additionally selected model SEDs.
+
+*********
+Toothpick
+*********
+
+The files for the physicsgrid and obsgrid files are required inputs to
+the script.  The output filename is also required.  Note that the extension
+of this file will determine the type of file output (e.g. filebase.fits for
+a FITS file).
+The filter to use for the completeness function is given by the
+`--compl_filter` parameter (default=F475W).
+Set `compl_filter=max` to use the max completeness value across all the filters.
+The SEDs are picked weighted by the product of the grid+prior weights
+and the completeness from the noisemodel.  The grid+prior weights can be replaced
+with either grid, prior, or uniform weights by explicitly setting the `--weight_to_use`
+parameter. With the uniform weight, the SEDs are randomly selected from the given
+physicsgrid, and this option is only valid when `nsim` is explicitly set and useful
+for simulating ASTs. 
+
+.. code-block:: console
+
+   $ mb_simulate_obs -p physicsgrid -n obsgrid -o outfile --nsim 200 --compl_filter=F475W
+
+There are two optional keyword augments specific to the AST simulations: `--complcut`
+and `--magcut`. `--complcut` allows you to exclude model SEDs below the supplied
+completeness cut in the `--compl_filter`. Note that the brightest models will be exlcuded
+as well because of the rapid drop in completeness at the bright end. `--magcut` allows
+you to exclude model SEDs fainter than the supplied magnitude in the `--compl_filter`.
+
+.. code-block:: console
+
+   $ mb_simulate_obs -p physicsgrid -n obsgrid -o outfile --nsim 100000 --compl_filter=F475W \
+                        --magcut 30 --weight_to_use uniform
+
+The output file gives the simulated data in the observed data columns
+identified in the physicsgrid file along with all the model parameters
+from the physicsgrid file.  The simulated observations in each band are given
+as `band_flux` in physical units (ergs cm^-2 s^-1 A^-1),
+`band_rate` as normalized Vega fluxes (`band_flux`/vega_flux to match how
+the observed data are given), and `band_vega` as vega magnitudes with zero and
+negative fluxes given as -99.999.
+The physicsgrid values without noise/bias are given as `band_input_flux`,
+`band_input_rate`, and `band_input_vega`.
+
+Examples
+--------
+
+Example for the `metal_small` example for a simulation based on the prior model.
+Plot created with `beast plot_cmd`.  Prior model has a flat star formation history
+with a SFR=1e-5 M_sun/year and a Kroupa IMF.
+
+.. code-block:: console
+
+   $ mb_simulate_obs -p beast_metal_small_seds.grid.hd5 \
+                     -n beast_metal_small_noisemodel.grid.hd5 \
+                     -o sim_475w_prior.fits \
+                     --beastinfo_list beast_metal_small_beast_info.asdf
+
+.. image:: images/metal_small_sim_f475w_prior_plot.png
+
+Example for the `metal_small` example for a simulation based on input number of
+stars.  Plot created with `beast plot_cmd`.  Note that not all 1000 simulated
+sources are shown as the simulated sources include those with negative fluxes
+that cannot be plotted on a standard CMD.
+
+.. code-block:: console
+
+   $ mb_simulate_obs -p beast_metal_small_seds.grid.hd5 \
+                     -n beast_metal_small_noisemodel.grid.hd5 \
+                     -o sim_475w_nsim.fits \
+                     --nsim=1000
+
+.. image:: images/metal_small_sim_f475w_nsim_plot.png
+
+High-mass star biased simulations
+---------------------------------
+
+When creating simulated observations, using the standard IMF mass prior will
+skew your catalog to lower-mass stars.  If you wish to have similar weights for
+stars of all masses, use a flat IMF and a log-flat age prior.  To do this,
+set the mass prior to `{'name': 'flat'}` and the age prior to
+`{'name': 'flat_log'}` in `beast_settings.txt` before creating the model grid.
+
+*********
+Truncheon
+*********
+
+The code does not handle the truncheon model at this point.  While this model
+is doable in the BEAST, it has not been done yet due to several potentially
+complex modeling questions for actually using it that might impact how the model
+is implemented.
+
+********
+Plotting
+********
+
+To plot a color-magnitude diagram of the simulated observations, a
+sample call from the command line may be:
+
+.. code-block:: console
+
+   $ beast plot_cmd outfile.fits --mag1 F475W --mag2 F814W --mag3 F475W
+
+where `outfile.fits` may be the output from `simulate_obs`.
+`mag1`-`mag2` is the color, and `mag3` the magnitude.  If you would like to save
+(rather than simply display) the figure, include ``--savefig png`` (or another
+preferred file extension), and the figure will be saved as `outfile_plot.png` in
+the directory of `outfile.fits`.
+
+**************
+Remove Filters
+**************
+
+.. note::
+   This needs to be updated.  remove_filters.py in the beast repository.
+   Maybe move to megabeast or incorporate in to the `beast script` paradigm.
+
+One use case for simulations is to test the impact of specific filters
+on the BEAST results.  One solution is to create multiple physics/observation
+model grids, create simulations from each set of grids, and then fit the
+simulations with the BEAST.  A quicker way to do this is to create the
+physics/observation grid set with the full set of desired filters, create
+the desired simulations, remove filters from the model and simulations as
+needed, and then fit with the BEAST.  This has the benefit of the simulations
+with different filter sets are exactly the same except for the removed filters.
+
+As an example, to remove the filters F275W and F336W from the simulated
+observations contained in 'catfile.fits' and the 'physgrid.hd5'/'obsgrid.hd5'
+set of models use the following command.
+
+.. code-block:: console
+
+   $ python remove_filters.py catfile.fits --physgrid physgrid.hd5 \
+        --obsgrid obsgrid.hd5 --outbase outbase --rm_filters F275W F336W
+
+New physics/observation model grids and simulated observation files are
+created as 'outbase_seds.grid.hd5', 'outbase_noisemodel.grid.hd5', and
+'outbase_cat.fits'.
diff --git a/megabeast/fit_single.py b/megabeast/fit_single.py
@@ -4,9 +4,8 @@
 
 from beast.physicsmodel.grid import SEDGrid
 
-from megabeast.mbsettings import mbsettings
-from megabeast.helpers import get_likelihoods
-from megabeast.singlepop_dust_model import MB_Model, fit_ensemble, lnprob
+from megabeast.helpers import get_likelihoods, read_mbmodel
+from megabeast.mbmodel import MBModel, fit_ensemble, lnprob
 
 
 def main():
@@ -17,12 +16,9 @@
     )
     args = parser.parse_args()
 
-    # read the parameters into a class
-    mbparams = mbsettings(args.settings_file)
-    # should validate the inputs - make sure the required info is provided
-
     # define the model to fit
-    megabeast_model = MB_Model(mbparams)
+    mbparams = read_mbmodel(args.settings_file)
+    megabeast_model = MBModel(mbparams.stellar_model, mbparams.dust_model)
     # print(megabeast_model.physics_model)
 
     # BEAST files used by MegaBEAST

diff --git a/megabeast/helpers.py b/megabeast/helpers.py
@@ -1,17 +1,96 @@
 import numpy as np
 from numpy.random import default_rng
+import h5py
 
+from beast.physicsmodel.grid import SEDGrid
 from beast.tools.read_beast_data import read_lnp_data
 from beast.physicsmodel.grid_weights_stars import compute_bin_boundaries
 
 __all__ = [
+    "read_mbmodel",
+    "read_beast_moddata",
     "get_likelihoods",
     "precompute_mass_multipliers",
     "get_predicted_num_stars",
     "get_predicted_num_stars_simulate",
 ]
 
 
+def read_mbmodel(settings_file):
+    """
+    Read in the settings file and set parameters
+
+    Parameters
+    ----------
+    settings_file : string
+        filename that has the stellar_model and dust_model dictionaries
+    """
+
+    # read everything in as strings
+    with open(settings_file, "r") as f:
+        temp_data = f.readlines()
+    # remove empty lines and comments
+    input_data = [
+        line.strip()
+        for line in temp_data
+        if line.strip() != "" and line.strip()[0] != "#"
+    ]
+    # remove comments that are mid-line (e.g., "x = 5 #comment")
+    for i, line in enumerate(input_data):
+        try:
+            input_data[i] = line[: line.index("#")]
+        except ValueError:
+            pass
+    # if parameters are defined over multiple lines, combine lines
+    for i in reversed(range(len(input_data))):
+        if ("import " not in input_data[i]) and ("=" not in input_data[i]):
+            input_data[i - 1] += input_data[i]
+            del input_data[i]
+
+    # parse it into a dictionary
+    params = {}
+
+    for i in range(len(input_data)):
+        # execute imports
+        if "import " in input_data[i]:
+            exec(input_data[i])
+
+        # extract parameter and value (as strings)
+        else:
+            param = input_data[i].split("=")[0].strip()
+            # exec the string to get parameter values accessible
+            exec(input_data[i])
+            params[param] = eval(param)
+
+    return params
+
+
+def read_beast_moddata(physmodfile, obsmodfile, params):
+    """
+    Read in the BEAST model data.  Only read the physics and observation
+    information needed for the fitting.
+    """
+
+    # get the BEAST physics model info needed
+    #  using SEDGrid as it is faster than beast.tools.read_beast_data.read_sed_data
+    #  only read in the columns specifically needed
+    beast_moddata = {}
+    beast_physmod_param_list = params + ["prior_weight", "grid_weight"]
+
+    sgrid = SEDGrid(physmodfile, backend="disk")
+    for cparam in beast_physmod_param_list:
+        beast_moddata[cparam] = sgrid.grid[cparam]
+
+    # get the completeness from the BEAST observation model
+    #   use the maximum completeness across the bands as the correct obsmodel
+    #   would only have one completeness value per model
+    #   max is the best approximation for the toothpick model (maybe???  average??)
+    with h5py.File(obsmodfile, "r") as obs_hdf:
+        beast_moddata["completeness"] = np.max(obs_hdf["completeness"], axis=1)
+
+    return beast_moddata
+
+
 def get_likelihoods(ppdf_file, beast_model_data):
     """
     Read in the saved BEAST sparse posterior PDFs and divide by the BEAST
@@ -30,7 +109,7 @@
     # BEAST saves posterior PDFs labeled as log(pPDF)
     lnpdata = read_lnp_data(ppdf_file)
 
-    # divide by the BEAST prior weights to return to recover the likelihoods
+    # divide by the BEAST prior weights to recover the likelihoods
     n_lnps, n_stars = lnpdata["indxs"].shape
     for i in range(n_stars):
         indxs = lnpdata["indxs"][:, i]
@@ -102,9 +181,7 @@
     }
 
 
-def get_predicted_num_stars(
-    massmult_info, bphysparams, bphysmod, physmodage
-):
+def get_predicted_num_stars(massmult_info, bphysparams, bphysmod, physmodage):
     """
     Calculate the expected number of stars based on the physics model as
     given on the BEAST model grid including completeness.
@@ -164,9 +241,7 @@
     return n_totstars
 
 
-def get_predicted_num_stars_simulate(
-    massmult_info, bphysparams, bphysmod, physmodage
-):
+def get_predicted_num_stars_simulate(massmult_info, bphysparams, bphysmod, physmodage):
     """
     Calculate the expected number of stars based on the physics model as
     given on the BEAST model grid including completeness.