From 7c8aa203b0c9297c9ba23750c83448204c1e07f1 Mon Sep 17 00:00:00 2001 From: bikegeek <3753118+bikegeek@users.noreply.github.com> Date: Tue, 25 Jul 2023 13:28:46 -0600 Subject: [PATCH] Feature 213 db load instructions (#214) * Issue #213 initial commit of script and config files * support added to create and delete database, and modify xml specification file * added missing database name to the db drop and create commands * delete database before creating * fix syntax on drop and create database, terminating single quote should enclose the db name * refactored to only handle database prep * first commit of code that specifically updates the XML specification file * rename dataclass, remove call to non-existent method * update logging messages * fix syntax for granting access * another fix syntax for granting access * yet another fix syntax for granting access * fix grant command by cleaning up spaces and quotations * fix grant command with terminating quote * fix schema command * fix schema command-missing space * replace check_output with run for subprocess * use correct path to schema file * make entries more generic * Completed instructions, first commit * fixed syntax for first code block * Added text to use the existing example for subsetting the data. * Added explicit directions for running the met_db_load script. * Fixed formatting of list. * fix xml_specification setting example * fix instructions in Load data section to use consistent language * add troubleshooting content when database is non-existent * attempt to reformat troubleshooting table * additional content to troubleshooting using original formatting * clean up formatting in troubleshooting table * fix grammar in troubleshooting * RST syntax errors --- .../sql/scripts/data_loading_config.yaml | 32 ++++ .../sql/scripts/db_load_specification.xml | 35 ++++ METdbLoad/sql/scripts/db_prep.py | 150 ++++++++++++++++++ METdbLoad/sql/scripts/generate_xml_spec.py | 121 ++++++++++++++ docs/Users_Guide/load_data.rst | 148 +++++++++++++++-- 5 files changed, 476 insertions(+), 10 deletions(-) create mode 100644 METdbLoad/sql/scripts/data_loading_config.yaml create mode 100644 METdbLoad/sql/scripts/db_load_specification.xml create mode 100644 METdbLoad/sql/scripts/db_prep.py create mode 100644 METdbLoad/sql/scripts/generate_xml_spec.py diff --git a/METdbLoad/sql/scripts/data_loading_config.yaml b/METdbLoad/sql/scripts/data_loading_config.yaml new file mode 100644 index 00000000..528e6cdf --- /dev/null +++ b/METdbLoad/sql/scripts/data_loading_config.yaml @@ -0,0 +1,32 @@ + +# Configuration file used to load MET ASCII data into +# a database. + +dbname: dummy_dbname +username: dbuser +password: dbpassword +host: localhost +port: 1234 + +# Location (full path and schema file) to the sql schema. Replace with the location +# of your METdataio source code. PROVIDE THE FULL PATH TO THE SCHEMA FILE, NO RELATIVE PATHS OR +# ENVIRONMENT VARIABLES. +schema_location: /full-path-to/mv_mysql.sql + +# Name and location of the XML specification file +xml_specification: /full-path-to/db_load_specification.xml + +# Databases are grouped, select an existing group. +group: Testing +description: My test database + +# Directory (full path) to where the MET data resides. +data_dir: /path-to-met-data + +# Set the appropriate setting to True to indicate what type of data is +# being loaded. +load_stat: True +load_mode: False +load_mtd: False +load_mpr: False +load_orank: False diff --git a/METdbLoad/sql/scripts/db_load_specification.xml b/METdbLoad/sql/scripts/db_load_specification.xml new file mode 100644 index 00000000..25bd8c93 --- /dev/null +++ b/METdbLoad/sql/scripts/db_load_specification.xml @@ -0,0 +1,35 @@ + + + mysql + localhost:3306 + mv_integrating_fire + mvadmin + 160GiltVa0D5M + + +/scratch/vdunham/ + + true + 1 + true + false + false + false + false + + true + false + false + false + false + + + + point_stat + + + + RAL Projects + MET output generated for SOARS research. + + diff --git a/METdbLoad/sql/scripts/db_prep.py b/METdbLoad/sql/scripts/db_prep.py new file mode 100644 index 00000000..eee7f496 --- /dev/null +++ b/METdbLoad/sql/scripts/db_prep.py @@ -0,0 +1,150 @@ +''' + Creates the METviewer database to store MET output. Requires a YAML configuration + file (data_loading_config.yaml) with relevant database information (i.e. username, + password, etc.). +''' +import os.path +import subprocess + +import yaml +import argparse +from dataclasses import dataclass +import logging + +logging.basicConfig(encoding='utf-8', level=logging.DEBUG) + + +@dataclass +class DatabaseInfo: + ''' + Data class for keeping the relevant information for loading the + METviewer database. + ''' + + db_name: str + user_name: str + password: str + host_name: str + port_number: int + schema_path: str + config_file_dir: str + + def __init__(self, config_obj: dict, config_file_dir: str): + ''' + + Args: + config_obj: A dictionary containing the + settings to be used in creating the database. + ''' + + self.db_name = config_obj['dbname'] + self.user_name = config_obj['username'] + self.password = config_obj['password'] + self.host_name = config_obj['host'] + self.port_number = config_obj['port'] + self.schema_path = config_obj['schema_location'] + self.config_file_dir = config_file_dir + + def create_database(self): + ''' + Create the commands to create the database. + + Returns: None + + ''' + # Command to create the database, set up permissions, and load the schema. + uname_pass_list = ['-u', self.user_name, ' -p', self.password, ' -e '] + uname_pass = ''.join(uname_pass_list) + create_list = ["'create database ", self.db_name, "'"] + create_str = ''.join(create_list) + create_cmd = uname_pass + create_str + logging.debug(f'database create string: {create_cmd}') + + # Permissions + perms_list = ['"',"GRANT INSERT, DELETE, UPDATE, INDEX, DROP ON " , + self.db_name, + '.* to ', "'mvuser'", "@'%'", '"'] + + perms_str = ''.join(perms_list) + perms_cmd = uname_pass + perms_str + logging.debug(f'database grant permissions string: {perms_cmd}') + + + # Schema + schema_full_path = os.path.join(self.schema_path, + 'METdataio/METdbLoad/sql/mv_mysql.sql') + schema_list = [ "-umvadmin -p",self.password, " ", self.db_name, ' < ', + schema_full_path] + schema_cmd = ''.join(schema_list) + logging.debug(f'Schema command: {schema_cmd}') + + + + try: + self.delete_database() + except subprocess.CalledProcessError: + logging.info("Database doesn't exist. Ignoring this error.") + pass + + try: + create_db = subprocess.run(['mysql', create_cmd]) + db_permissions = subprocess.run(['mysql', perms_cmd]) + db_schema = subprocess.run(['mysql', schema_cmd]) + except subprocess.CalledProcessError: + logging.error('Error in executing mysql commands') + + def delete_database(self): + ''' + Create the commands to delete a database. + Returns: None + + ''' + + # Command to delete the database + uname_pass_list = ['-u', self.user_name, ' -p', self.password, ' -e '] + uname_pass = ''.join(uname_pass_list) + drop_list = ["'drop database ", self.db_name, "'"] + drop_str = ''.join(drop_list) + drop_cmd = uname_pass + drop_str + logging.debug(f'Drop database command: {drop_cmd}') + + try: + _ = subprocess.run(['mysql', drop_cmd]) + + except subprocess.CalledProcessError: + logging.error('Error in executing mysql commands') + + +if __name__ == "__main__": + + # Create a parser + parser = argparse.ArgumentParser() + + # Add arguments to the parser + parser.add_argument('action') + parser.add_argument('config_file') + + # Parse the arguments + args = parser.parse_args() + + # Get arguments value + action = args.action + config_file = args.config_file + + action_requested = str(action).lower() + logging.debug(f'Action requested: {action_requested}') + logging.debug(f'YAML Config file to use: {str(config_file)}') + config_file_dir = os.path.dirname(config_file) + logging.debug(f'Directory of config file: {config_file_dir}') + + with open(config_file, 'r') as cf: + db_config_info = yaml.safe_load(cf) + db_loader = DatabaseInfo(db_config_info, config_file_dir) + if action_requested == 'create': + db_loader.create_database() + elif action_requested == 'delete': + db_loader.delete_database() + else: + logging.warning( + f'{action_requested} is not a supported option. Only "create" and ' + f'"delete" are supported options.') diff --git a/METdbLoad/sql/scripts/generate_xml_spec.py b/METdbLoad/sql/scripts/generate_xml_spec.py new file mode 100644 index 00000000..423f1913 --- /dev/null +++ b/METdbLoad/sql/scripts/generate_xml_spec.py @@ -0,0 +1,121 @@ +''' + Creates the METviewer database to store MET output. +''' +import os.path +import subprocess + +import yaml +import argparse +from dataclasses import dataclass +import logging + +logging.basicConfig(encoding='utf-8', level=logging.DEBUG) + + +@dataclass +class DatabaseLoadingInfo: + ''' + Data class for keeping the relevant information for loading the + METviewer database. + ''' + + db_name: str + user_name: str + password: str + host_name: str + port_number: int + group: str + schema_path: str + data_dir: str + xml_spec_file: str + load_stat: bool + load_mode: bool + load_mtd: bool + load_mpr: bool + load_orank: bool + config_file_dir: str + + def __init__(self, config_obj: dict, config_file_dir:str): + ''' + + Args: + config_obj: A dictionary containing the + settings to be used in creating the database. + ''' + + self.db_name = config_obj['dbname'] + self.user_name = config_obj['username'] + self.password = config_obj['password'] + self.host_name = config_obj['host'] + self.port_number = config_obj['port'] + self.group = config_obj['group'] + self.schema_path = config_obj['schema_location'] + self.data_dir = config_obj['data_dir'] + self.xml_spec_file = config_obj['xml_specification'] + self.load_stat = config_obj['load_stat'] + self.load_mode = config_obj['load_mode'] + self.load_mtd = config_obj['load_mtd'] + self.load_mpr = config_obj['load_mpr'] + self.load_orank = config_obj['load_orank'] + self.description = config_obj['description'] + self.config_file_dir = config_file_dir + + + def update_spec_file(self): + ''' + Edit the XML specification file to reflect the settings in the + YAML configuration file. + ''' + + # Assign the host with the host and port assigned in the YAML config file + import xml.etree.ElementTree as et + tree = et.parse(self.xml_spec_file) + root = tree.getroot() + + for host in root.iter('host'): + host.text = self.host_name + ":" + str(self.port_number) + + for dbname in root.iter('database'): + dbname.text = self.db_name + + for user in root.iter('user'): + user.text = self.user_name + + for password in root.iter('password'): + password.text = self.password + + for data_folder in root.iter('folder_tmpl'): + data_folder.text = self.data_dir + + for group in root.iter('group'): + group.text = self.group + + for desc in root.iter('description'): + desc.text = self.description + + tree.write(os.path.join(self.config_file_dir, 'load_met.xml')) + + + +if __name__ == "__main__": + + # Create a parser + parser = argparse.ArgumentParser() + + # Add arguments to the parser + parser.add_argument('config_file') + + # Parse the arguments + args = parser.parse_args() + + # Get arguments value + config_file = args.config_file + + logging.debug(f'Config file to use: {str(config_file)}') + config_file_dir = os.path.dirname(config_file) + logging.debug(f'Directory of config file: {config_file_dir}') + + with open(config_file, 'r') as cf: + db_config_info = yaml.safe_load(cf) + db_loader = DatabaseLoadingInfo(db_config_info, config_file_dir) + db_loader.update_spec_file() diff --git a/docs/Users_Guide/load_data.rst b/docs/Users_Guide/load_data.rst index 71162a32..0de91ceb 100644 --- a/docs/Users_Guide/load_data.rst +++ b/docs/Users_Guide/load_data.rst @@ -1,8 +1,123 @@ -Load Data -========= +Background +========== -METdbload is used to insert MET output data into the database. The usage -statement: +The METdbLoad module provides support for inserting MET output data into the database. + +Before using the METdbLoad module, the database must exist and have the proper permissions +(i.e. grant privileges to insert, delete, update, and index). A schema file, *mv_mysql.sql* is available in the +METdataio/METdbLoad/sql/ directory for importing into the database. + +The METdbLoad script *met_db_load.py* performs the loading of data based on settings in an XML specification file. + +In the METdataio/METdbLoad/sql/scripts directory, there are two configuration files: + + * db_load_specification.xml + + * data_loading_config.yaml + +The *db_load_specification.xml* is a template XML specification file, and *data_loading_config.yaml* +is a template YAML configuration file. The *data_loading_config.yaml* file contains +information about the database (username, password, database name, etc.). This information is used by the +*generate_xml_spec.py* script to generate the XML specification file which is then used to load data into the database. + +Generate the XML specification file +----------------------------------- + +Copy the *data_loading_config.yaml* file to a secure location in your workspace, as this file will contain the username +and password to the database. **Do not put this file where it can be read by anyone who should not have access to this +information.** + +.. code-block:: ini + + cp data_loading_config.yaml /path-to-your-dir/ + +Replace the *path-to-your-dir* with the actual path to where this file is to be saved. + +Change directory to the location where the *data_loading_config.yaml* file was copied. + +Open the data_loading_config.yaml file: + +.. literalinclude:: ../../METdbLoad/sql/scripts/data_loading_config.yaml + +Update the database information with information relevant to the database you are using: + + * dbname + + * username + + * password + + * host + + * port + + +Update the path to the schema location, provide the full path to the *mv_sql_mysql.sql* schema file: + + * schema_location + +Provide the name and full path to the *db_load_specification.xml* template file, this will be updated +with the settings in this YAML configuration to create a new XML specification file using these settings: + + * xml_specification + +Provide the group and description. The databases in METviewer are grouped, provide the name of the appropriate +group and a brief description of the database in which the data is to be loaded: + + * group + + * description + +Provide the full path to the directory where the MET data to be loaded is saved: + + * data_dir + +Indicate which data types are to be loaded by setting the appropriate settings to True: + + * load_stat + + * load_mode + + * load_mtd + + * load_mpr + + * load_orank + +Generate the new XML specification file by running the following: + +.. code-block:: ini + + cd path-to-METdataio-source/METdataio/METdbLoad/sql/scripts + + *Replace path-to-METdataio-source to the location where the METdataio source code is saved. + + python generate_xml_spec.py path-to/data_loading_config.yaml + + *Replace the path-to with the path to the directory you created to store the copy of the data_loading_config.yaml + file as specified earlier. + +A new XML specification file *load_met.xml*, will be generated and saved in the +same directory where the YAML configuration file was copied. + +Load data +--------- + +Now the MET data can be loaded in the database using the *met_db_load.py* script in the path-to-METdataio-source/METdbLoad/ush +directory. The *path-to-METdataio-source* is the directory where the METdataio source code is saved. + +.. code-block:: ini + + cd /path-to-METdataio-source/METdataio/METdbLoad/ush + + * Replace path-to-METdataio-source to the location where the METdataio source code is saved. + + python met_db_load.py /path-to/load_met.xml + + * Replace the path-to with the location where the load_met.xml file was saved. This is the same directory + you created to save the copy of the data_loading_config.yaml file. + +The usage statement: .. code-block:: ini @@ -18,7 +133,7 @@ statement: -h, --help show this help message and exit -index Only process index, do not load data -The **xmlfile** passes information about the MET output files to load +The **xmlfile** is the XML specification file that passes information about the MET output files to load into the database to METdbload. It is an XML file whose top-level tag is and it contains the following elements, divided into functional sections: @@ -138,8 +253,15 @@ functional sections: -Example -_______ +Additional Loading Options +__________________________ + +The load_met.xml specification file created above loads the entire dataset specified in the data_dir setting in the +YAML config file, data_loading_config.yaml. + +A subset of the data can be selected by date and field names (i.e. by model, valid_time, vx_mask, etc.). +The load_met.xml specification file can be further modified to accomplish this by adding the date_list and +field_name elements to the XML specification file. Here is a simple example: @@ -213,11 +335,12 @@ _______________ .. list-table:: * - Error: - - ** ERROR: Caught class + - **ERROR: Caught class com.mysql.jdbc.exceptions.MySQLIntegrityConstraintViolationException: Duplicate entry 'CT07-NMM-LIN-R2-0-2005-07-15 12:00:00-2005-07-15 12:00:00-0-2005' - for key 2 + for key 2** + * - Solution: - This error is caused by trying to insert a stat_header record into the database when an identical one already exists. If identical @@ -227,5 +350,10 @@ _______________ stat_header each time a row is inserted. However, if a stat_header row already exists in the table with the insert information, then the existing record will be used instead of trying to insert a - dupilcate. + duplicate. + * - Error: + - **ERROR:root: (1049, "Unknown database 'mv_test'") in run_sql Error when connecting to database** + + * - Solution: + - This error is caused when attempting to load data into a database that does not exist. You will need to create the database, set up the appropriate privileges as outlined above, and load the schema using the mv_mysql.sql file.