diff --git a/docs/megabeast/images/metal_small_sim_f475w_nsim_plot.png b/docs/megabeast/images/metal_small_sim_f475w_nsim_plot.png new file mode 100644 index 0000000..af78ffa Binary files /dev/null and b/docs/megabeast/images/metal_small_sim_f475w_nsim_plot.png differ diff --git a/docs/megabeast/images/metal_small_sim_f475w_prior_plot.png b/docs/megabeast/images/metal_small_sim_f475w_prior_plot.png new file mode 100644 index 0000000..212ec59 Binary files /dev/null and b/docs/megabeast/images/metal_small_sim_f475w_prior_plot.png differ diff --git a/docs/megabeast/simulate.rst b/docs/megabeast/simulate.rst index 32e4801..b87b6f5 100644 --- a/docs/megabeast/simulate.rst +++ b/docs/megabeast/simulate.rst @@ -1,11 +1,181 @@ -############################# -Simulations for the MegaBEAST -############################# +########### +Simulations +########### -To test that the MegaBEAST is behaving as expected, it is important to -simulate data and evaluate if the results match the input. +Simulations of observations can be made based on already computed physics and +observation model grids. Uses for such simulations include testing the +sensitivity of a specific set of observations to BEAST model parameters and +testing ensemble fitters like the MegaBEAST. -Simulations should be done with using the BEAST tool to simulate the -fluxes for an ensemble of stars using a BEAST physics+observation model. -Then the BEAST should be used to fit the simulated fluxes and and the -resulting BEAST outputs (especially lnp files) used for testing the MegaBEAST. +Simulations are done using the +`megabeast.simujlate_obs.gen_SimObs_from_sedgrid` function. The +script `tools/simulate_obs.py` provides a command line interface and can be run +using the `mb_simulate_obs` command once the megabeast has been installed. +Simulations require already created BEAST physics and observation model grids. + +The physics model grid includes the ensemble parameters as these are the same as +the BEAST :ref:`beast_priors`. The module uses already created BEAST physics and +observation model grids by sampling the full nD prior function that is part of +the physics model grid. The observation model grid provides the information on +the flux uncertainties and biases as well as the completeness. + +The ensemble parameters can be changed from those in the BEAST physics model grid +using ...to be added... + +The number of observations to simulate can be explicitly specified by using the `nsim` +parameter or calculated from the age and mass prior/ensemble models. +To do the latter, pass the `beastinfo` ASDF file to the `simulate_obs` script. +This is done by +computing the mass formed for each unique age in the physics grid (combination +of age and mass priors) accounting for stars that have already disappeared and +dividing by the average mass (mass prior model). This number of stars is +simulated at each age and then the completeness function is applied to remove +all the simulated stars that would not be observed. + +The `mb_simulate_obs` also can be used to just simulate extra ASTs. This +simulation is replacing generating additional input artificial stars and running +those through a photomtery pipeline. Thus, it is very powerful if you need more +ASTs for generating MATCH-satisfied ASTs in addition to what you already have +from the real ASTs. This means that you can avoid extensive/additional real ASTs +as long as your initial/existing real ASTs properly cover the entire CMD space, +i.e., a combination of BEAST-optimized ASTs supplemented by MATCH-friendly ASTs +should be enough. This simulation provides the information on the flux uncertainties, +biases, and the completeness for the additionally selected model SEDs. + +********* +Toothpick +********* + +The files for the physicsgrid and obsgrid files are required inputs to +the script. The output filename is also required. Note that the extension +of this file will determine the type of file output (e.g. filebase.fits for +a FITS file). +The filter to use for the completeness function is given by the +`--compl_filter` parameter (default=F475W). +Set `compl_filter=max` to use the max completeness value across all the filters. +The SEDs are picked weighted by the product of the grid+prior weights +and the completeness from the noisemodel. The grid+prior weights can be replaced +with either grid, prior, or uniform weights by explicitly setting the `--weight_to_use` +parameter. With the uniform weight, the SEDs are randomly selected from the given +physicsgrid, and this option is only valid when `nsim` is explicitly set and useful +for simulating ASTs. + +.. code-block:: console + + $ mb_simulate_obs -p physicsgrid -n obsgrid -o outfile --nsim 200 --compl_filter=F475W + +There are two optional keyword augments specific to the AST simulations: `--complcut` +and `--magcut`. `--complcut` allows you to exclude model SEDs below the supplied +completeness cut in the `--compl_filter`. Note that the brightest models will be exlcuded +as well because of the rapid drop in completeness at the bright end. `--magcut` allows +you to exclude model SEDs fainter than the supplied magnitude in the `--compl_filter`. + +.. code-block:: console + + $ mb_simulate_obs -p physicsgrid -n obsgrid -o outfile --nsim 100000 --compl_filter=F475W \ + --magcut 30 --weight_to_use uniform + +The output file gives the simulated data in the observed data columns +identified in the physicsgrid file along with all the model parameters +from the physicsgrid file. The simulated observations in each band are given +as `band_flux` in physical units (ergs cm^-2 s^-1 A^-1), +`band_rate` as normalized Vega fluxes (`band_flux`/vega_flux to match how +the observed data are given), and `band_vega` as vega magnitudes with zero and +negative fluxes given as -99.999. +The physicsgrid values without noise/bias are given as `band_input_flux`, +`band_input_rate`, and `band_input_vega`. + +Examples +-------- + +Example for the `metal_small` example for a simulation based on the prior model. +Plot created with `beast plot_cmd`. Prior model has a flat star formation history +with a SFR=1e-5 M_sun/year and a Kroupa IMF. + +.. code-block:: console + + $ mb_simulate_obs -p beast_metal_small_seds.grid.hd5 \ + -n beast_metal_small_noisemodel.grid.hd5 \ + -o sim_475w_prior.fits \ + --beastinfo_list beast_metal_small_beast_info.asdf + +.. image:: images/metal_small_sim_f475w_prior_plot.png + +Example for the `metal_small` example for a simulation based on input number of +stars. Plot created with `beast plot_cmd`. Note that not all 1000 simulated +sources are shown as the simulated sources include those with negative fluxes +that cannot be plotted on a standard CMD. + +.. code-block:: console + + $ mb_simulate_obs -p beast_metal_small_seds.grid.hd5 \ + -n beast_metal_small_noisemodel.grid.hd5 \ + -o sim_475w_nsim.fits \ + --nsim=1000 + +.. image:: images/metal_small_sim_f475w_nsim_plot.png + +High-mass star biased simulations +--------------------------------- + +When creating simulated observations, using the standard IMF mass prior will +skew your catalog to lower-mass stars. If you wish to have similar weights for +stars of all masses, use a flat IMF and a log-flat age prior. To do this, +set the mass prior to `{'name': 'flat'}` and the age prior to +`{'name': 'flat_log'}` in `beast_settings.txt` before creating the model grid. + +********* +Truncheon +********* + +The code does not handle the truncheon model at this point. While this model +is doable in the BEAST, it has not been done yet due to several potentially +complex modeling questions for actually using it that might impact how the model +is implemented. + +******** +Plotting +******** + +To plot a color-magnitude diagram of the simulated observations, a +sample call from the command line may be: + +.. code-block:: console + + $ beast plot_cmd outfile.fits --mag1 F475W --mag2 F814W --mag3 F475W + +where `outfile.fits` may be the output from `simulate_obs`. +`mag1`-`mag2` is the color, and `mag3` the magnitude. If you would like to save +(rather than simply display) the figure, include ``--savefig png`` (or another +preferred file extension), and the figure will be saved as `outfile_plot.png` in +the directory of `outfile.fits`. + +************** +Remove Filters +************** + +.. note:: + This needs to be updated. remove_filters.py in the beast repository. + Maybe move to megabeast or incorporate in to the `beast script` paradigm. + +One use case for simulations is to test the impact of specific filters +on the BEAST results. One solution is to create multiple physics/observation +model grids, create simulations from each set of grids, and then fit the +simulations with the BEAST. A quicker way to do this is to create the +physics/observation grid set with the full set of desired filters, create +the desired simulations, remove filters from the model and simulations as +needed, and then fit with the BEAST. This has the benefit of the simulations +with different filter sets are exactly the same except for the removed filters. + +As an example, to remove the filters F275W and F336W from the simulated +observations contained in 'catfile.fits' and the 'physgrid.hd5'/'obsgrid.hd5' +set of models use the following command. + +.. code-block:: console + + $ python remove_filters.py catfile.fits --physgrid physgrid.hd5 \ + --obsgrid obsgrid.hd5 --outbase outbase --rm_filters F275W F336W + +New physics/observation model grids and simulated observation files are +created as 'outbase_seds.grid.hd5', 'outbase_noisemodel.grid.hd5', and +'outbase_cat.fits'. diff --git a/megabeast/tools/simulate_obs.py b/megabeast/tools/simulate_obs.py index 759b92b..aef353f 100644 --- a/megabeast/tools/simulate_obs.py +++ b/megabeast/tools/simulate_obs.py @@ -2,13 +2,265 @@ import argparse import asdf +from numpy.random import default_rng + +from astropy.table import Table, Column + +from beast.observationmodel.vega import Vega from beast.physicsmodel.grid import SEDGrid +from beast.physicsmodel.priormodel import PriorAgeModel, PriorMassModel +from beast.physicsmodel.grid_weights_stars import compute_bin_boundaries import beast.observationmodel.noisemodel.generic_noisemodel as noisemodel -from beast.observationmodel.observations import gen_SimObs_from_sedgrid +#from beast.observationmodel.observations import gen_SimObs_from_sedgrid from astropy.table import vstack +def gen_SimObs_from_sedgrid( + sedgrid, + sedgrid_noisemodel, + nsim=100, + compl_filter="max", + complcut=None, + magcut=None, + ranseed=None, + vega_fname=None, + weight_to_use="weight", + age_prior_model=None, + mass_prior_model=None, +): + """ + Generate simulated observations using the physics and observation grids. + The priors are sampled as they give the ensemble model for the stellar + and dust distributions (IMF, Av distribution etc.). + The physics model gives the SEDs based on the priors. + The observation model gives the noise, bias, and completeness all of + which are used in simulating the observations. + + Currently written to only work for the toothpick noisemodel. + + Parameters + ---------- + sedgrid: grid.SEDgrid instance + model grid + + sedgrid_noisemodel: beast noisemodel instance + noise model data + + nsim : int + number of observations to simulate + + compl_filter : str + Filter to use for completeness (required for toothpick model). + Set to max to use the max value in all filters. + + complcut : float (defualt=None) + completeness cut for only including model seds above the cut + where the completeness cut ranges between 0 and 1. + + magcut : float (defualt=None) + faint-end magnitude cut for only including model seds brighter + than the given magnitude in compl_filter. + + ranseed : int + used to set the seed to make the results reproducable, + useful for testing + + vega_fname : string + filename for the vega info, useful for testing + + weight_to_use : string (default='weight') + Set to either 'weight' (prior+grid), 'prior_weight', 'grid_weight', + or 'uniform' (this option is valid only when nsim is supplied) to + choose the weighting for SED selection. + + age_prior_model : dict + age prior model in the BEAST dictonary format + + mass_prior_model : dict + mass prior model in the BEAST dictonary format + + Returns + ------- + simtable : astropy Table + table giving the simulated observed fluxes as well as the + physics model parmaeters + """ + n_models, n_filters = sedgrid.seds.shape + flux = sedgrid.seds + + # get the vega fluxes for the filters + _, vega_flux, _ = Vega(source=vega_fname).getFlux(sedgrid.filters) + + # cache the noisemodel values + model_bias = sedgrid_noisemodel["bias"] + model_unc = np.fabs(sedgrid_noisemodel["error"]) + model_compl = sedgrid_noisemodel["completeness"] + + # only use models that have non-zero completeness in all filters + # zero completeness means the observation model is not defined for that filters/flux + # if complcut is provided, only use models above that completeness cut + if complcut is not None: + finalcomplcut = complcut + else: + finalcomplcut = 0.0 + + ast_defined = model_compl > finalcomplcut + sum_ast_defined = np.sum(ast_defined, axis=1) + goodobsmod = sum_ast_defined >= n_filters + + # completeness from toothpick model so n band completeness values + # require only 1 completeness value for each model + # max picked to best "simulate" how the photometry detection is done + if compl_filter.lower() == "max": + model_compl = np.max(model_compl, axis=1) + else: + short_filters = [ + filter.split(sep="_")[-1].upper() for filter in sedgrid.filters + ] + if compl_filter.upper() not in short_filters: + raise NotImplementedError( + "Requested completeness filter not present:" + + compl_filter.upper() + + "\nPossible filters:" + + "\n".join(short_filters) + ) + + filter_k = short_filters.index(compl_filter.upper()) + print("Completeness from %s" % sedgrid.filters[filter_k]) + model_compl = model_compl[:, filter_k] + + # if magcut is provided, only use models brighter than the magnitude cut + # in addition to the non-zero completeness criterion + if magcut is not None: + fluxcut_compl_filter = 10 ** (-0.4 * magcut) * vega_flux[filter_k] + goodobsmod = (goodobsmod) & (flux[:, filter_k] >= fluxcut_compl_filter) + + # initialize the random number generator + rangen = default_rng(ranseed) + + # if the age and mass prior models are given, use them to determine the + # total number of stars to simulate + model_indx = np.arange(n_models) + if (age_prior_model is not None) and (mass_prior_model is not None): + nsim = 0 + # logage_range = [min(sedgrid["logA"]), max(sedgrid["logA"])] + mass_range = [min(sedgrid["M_ini"]), max(sedgrid["M_ini"])] + + # compute the total mass and average mass of a star given the mass_prior_model + nmass = 100 + masspts = np.logspace(np.log10(mass_range[0]), np.log10(mass_range[1]), nmass) + mass_prior = PriorMassModel(mass_prior_model) + massprior = mass_prior(masspts) + totmass = np.trapz(massprior, masspts) + avemass = np.trapz(masspts * massprior, masspts) / totmass + + # compute the mass of the remaining stars at each age and + # simulate the stars assuming everything is complete + gridweights = sedgrid[weight_to_use] + gridweights = gridweights / np.sum(gridweights) + + grid_ages = np.unique(sedgrid["logA"]) + age_prior = PriorAgeModel(age_prior_model) + ageprior = age_prior(grid_ages) + bin_boundaries = compute_bin_boundaries(grid_ages) + bin_widths = np.diff(10 ** (bin_boundaries)) + totsim_indx = np.array([], dtype=int) + for cage, cwidth, cprior in zip(grid_ages, bin_widths, ageprior): + gmods = sedgrid["logA"] == cage + cur_mass_range = [ + min(sedgrid["M_ini"][gmods]), + max(sedgrid["M_ini"][gmods]), + ] + gmass = (masspts >= cur_mass_range[0]) & (masspts <= cur_mass_range[1]) + curmasspts = masspts[gmass] + curmassprior = massprior[gmass] + totcurmass = np.trapz(curmassprior, curmasspts) + + # compute the mass remaining at each age -> this is the mass to simulate + simmass = cprior * cwidth * totcurmass / totmass + nsim_curage = int(round(simmass / avemass)) + + # simluate the stars at the current age + curweights = gridweights[gmods] + if np.sum(curweights) > 0: + curweights /= np.sum(curweights) + cursim_indx = rangen.choice( + model_indx[gmods], size=nsim_curage, p=curweights + ) + + totsim_indx = np.concatenate((totsim_indx, cursim_indx)) + + nsim += nsim_curage + # totsimcurmass = np.sum(sedgrid["M_ini"][cursim_indx]) + # print(cage, totcurmass / totmass, simmass, totsimcurmass, nsim_curage) + + totsimmass = np.sum(sedgrid["M_ini"][totsim_indx]) + print(f"number total simulated stars = {nsim}; mass = {totsimmass}") + compl_choice = rangen.random(nsim) + compl_indx = model_compl[totsim_indx] >= compl_choice + sim_indx = totsim_indx[compl_indx] + totcompsimmass = np.sum(sedgrid["M_ini"][sim_indx]) + print(f"number of simulated stars w/ completeness = {len(sim_indx)}; mass = {totcompsimmass}") + + else: # total number of stars to simulate set by command line input + + if weight_to_use == "uniform": + # sample to get the indices of the picked models + sim_indx = rangen.choice(model_indx[goodobsmod], nsim) + + else: + gridweights = sedgrid[weight_to_use][goodobsmod] * model_compl[goodobsmod] + gridweights = gridweights / np.sum(gridweights) + + # sample to get the indexes of the picked models + sim_indx = rangen.choice(model_indx[goodobsmod], size=nsim, p=gridweights) + + print(f"number of simulated stars = {nsim}") + + # setup the output table + ot = Table() + qnames = list(sedgrid.keys()) + # simulated data + for k, filter in enumerate(sedgrid.filters): + simflux_wbias = flux[sim_indx, k] + model_bias[sim_indx, k] + + simflux = rangen.normal(loc=simflux_wbias, scale=model_unc[sim_indx, k]) + + bname = filter.split(sep="_")[-1].upper() + fluxname = f"{bname}_FLUX" + colname = f"{bname}_RATE" + magname = f"{bname}_VEGA" + ot[fluxname] = Column(simflux) + ot[colname] = Column(ot[fluxname] / vega_flux[k]) + pindxs = ot[colname] > 0.0 + nindxs = ot[colname] <= 0.0 + ot[magname] = Column(ot[colname]) + ot[magname][pindxs] = -2.5 * np.log10(ot[colname][pindxs]) + ot[magname][nindxs] = 99.999 + + # add in the physical model values in a form similar to + # the output simulated (physics+obs models) values + # useful if using the simulated data to interpolate ASTs + # (e.g. for MATCH) + fluxname = f"{bname}_INPUT_FLUX" + ratename = f"{bname}_INPUT_RATE" + magname = f"{bname}_INPUT_VEGA" + ot[fluxname] = Column(flux[sim_indx, k]) + ot[ratename] = Column(ot[fluxname] / vega_flux[k]) + pindxs = ot[ratename] > 0.0 + nindxs = ot[ratename] <= 0.0 + ot[magname] = Column(ot[ratename]) + ot[magname][pindxs] = -2.5 * np.log10(ot[ratename][pindxs]) + ot[magname][nindxs] = 99.999 + + # model parmaeters + for qname in qnames: + ot[qname] = Column(sedgrid[qname][sim_indx]) + + return ot + + def simulate_obs( physgrid_list, noise_model_list,