From 7c8aa203b0c9297c9ba23750c83448204c1e07f1 Mon Sep 17 00:00:00 2001
From: bikegeek <3753118+bikegeek@users.noreply.github.com>
Date: Tue, 25 Jul 2023 13:28:46 -0600
Subject: [PATCH] Feature 213 db load instructions (#214)
* Issue #213 initial commit of script and config files
* support added to create and delete database, and modify xml specification file
* added missing database name to the db drop and create commands
* delete database before creating
* fix syntax on drop and create database, terminating single quote should enclose the db name
* refactored to only handle database prep
* first commit of code that specifically updates the XML specification file
* rename dataclass, remove call to non-existent method
* update logging messages
* fix syntax for granting access
* another fix syntax for granting access
* yet another fix syntax for granting access
* fix grant command by cleaning up spaces and quotations
* fix grant command with terminating quote
* fix schema command
* fix schema command-missing space
* replace check_output with run for subprocess
* use correct path to schema file
* make entries more generic
* Completed instructions, first commit
* fixed syntax for first code block
* Added text to use the existing example for subsetting the data.
* Added explicit directions for running the met_db_load script.
* Fixed formatting of list.
* fix xml_specification setting example
* fix instructions in Load data section to use consistent language
* add troubleshooting content when database is non-existent
* attempt to reformat troubleshooting table
* additional content to troubleshooting using original formatting
* clean up formatting in troubleshooting table
* fix grammar in troubleshooting
* RST syntax errors
---
.../sql/scripts/data_loading_config.yaml | 32 ++++
.../sql/scripts/db_load_specification.xml | 35 ++++
METdbLoad/sql/scripts/db_prep.py | 150 ++++++++++++++++++
METdbLoad/sql/scripts/generate_xml_spec.py | 121 ++++++++++++++
docs/Users_Guide/load_data.rst | 148 +++++++++++++++--
5 files changed, 476 insertions(+), 10 deletions(-)
create mode 100644 METdbLoad/sql/scripts/data_loading_config.yaml
create mode 100644 METdbLoad/sql/scripts/db_load_specification.xml
create mode 100644 METdbLoad/sql/scripts/db_prep.py
create mode 100644 METdbLoad/sql/scripts/generate_xml_spec.py
diff --git a/METdbLoad/sql/scripts/data_loading_config.yaml b/METdbLoad/sql/scripts/data_loading_config.yaml
new file mode 100644
index 00000000..528e6cdf
--- /dev/null
+++ b/METdbLoad/sql/scripts/data_loading_config.yaml
@@ -0,0 +1,32 @@
+
+# Configuration file used to load MET ASCII data into
+# a database.
+
+dbname: dummy_dbname
+username: dbuser
+password: dbpassword
+host: localhost
+port: 1234
+
+# Location (full path and schema file) to the sql schema. Replace with the location
+# of your METdataio source code. PROVIDE THE FULL PATH TO THE SCHEMA FILE, NO RELATIVE PATHS OR
+# ENVIRONMENT VARIABLES.
+schema_location: /full-path-to/mv_mysql.sql
+
+# Name and location of the XML specification file
+xml_specification: /full-path-to/db_load_specification.xml
+
+# Databases are grouped, select an existing group.
+group: Testing
+description: My test database
+
+# Directory (full path) to where the MET data resides.
+data_dir: /path-to-met-data
+
+# Set the appropriate setting to True to indicate what type of data is
+# being loaded.
+load_stat: True
+load_mode: False
+load_mtd: False
+load_mpr: False
+load_orank: False
diff --git a/METdbLoad/sql/scripts/db_load_specification.xml b/METdbLoad/sql/scripts/db_load_specification.xml
new file mode 100644
index 00000000..25bd8c93
--- /dev/null
+++ b/METdbLoad/sql/scripts/db_load_specification.xml
@@ -0,0 +1,35 @@
+
+
+ mysql
+ localhost:3306
+ mv_integrating_fire
+ mvadmin
+ 160GiltVa0D5M
+
+
+/scratch/vdunham/
+
+ true
+ 1
+ true
+ false
+ false
+ false
+ false
+
+ true
+ false
+ false
+ false
+ false
+
+
+
+ point_stat
+
+
+
+ RAL Projects
+ MET output generated for SOARS research.
+
+
diff --git a/METdbLoad/sql/scripts/db_prep.py b/METdbLoad/sql/scripts/db_prep.py
new file mode 100644
index 00000000..eee7f496
--- /dev/null
+++ b/METdbLoad/sql/scripts/db_prep.py
@@ -0,0 +1,150 @@
+'''
+ Creates the METviewer database to store MET output. Requires a YAML configuration
+ file (data_loading_config.yaml) with relevant database information (i.e. username,
+ password, etc.).
+'''
+import os.path
+import subprocess
+
+import yaml
+import argparse
+from dataclasses import dataclass
+import logging
+
+logging.basicConfig(encoding='utf-8', level=logging.DEBUG)
+
+
+@dataclass
+class DatabaseInfo:
+ '''
+ Data class for keeping the relevant information for loading the
+ METviewer database.
+ '''
+
+ db_name: str
+ user_name: str
+ password: str
+ host_name: str
+ port_number: int
+ schema_path: str
+ config_file_dir: str
+
+ def __init__(self, config_obj: dict, config_file_dir: str):
+ '''
+
+ Args:
+ config_obj: A dictionary containing the
+ settings to be used in creating the database.
+ '''
+
+ self.db_name = config_obj['dbname']
+ self.user_name = config_obj['username']
+ self.password = config_obj['password']
+ self.host_name = config_obj['host']
+ self.port_number = config_obj['port']
+ self.schema_path = config_obj['schema_location']
+ self.config_file_dir = config_file_dir
+
+ def create_database(self):
+ '''
+ Create the commands to create the database.
+
+ Returns: None
+
+ '''
+ # Command to create the database, set up permissions, and load the schema.
+ uname_pass_list = ['-u', self.user_name, ' -p', self.password, ' -e ']
+ uname_pass = ''.join(uname_pass_list)
+ create_list = ["'create database ", self.db_name, "'"]
+ create_str = ''.join(create_list)
+ create_cmd = uname_pass + create_str
+ logging.debug(f'database create string: {create_cmd}')
+
+ # Permissions
+ perms_list = ['"',"GRANT INSERT, DELETE, UPDATE, INDEX, DROP ON " ,
+ self.db_name,
+ '.* to ', "'mvuser'", "@'%'", '"']
+
+ perms_str = ''.join(perms_list)
+ perms_cmd = uname_pass + perms_str
+ logging.debug(f'database grant permissions string: {perms_cmd}')
+
+
+ # Schema
+ schema_full_path = os.path.join(self.schema_path,
+ 'METdataio/METdbLoad/sql/mv_mysql.sql')
+ schema_list = [ "-umvadmin -p",self.password, " ", self.db_name, ' < ',
+ schema_full_path]
+ schema_cmd = ''.join(schema_list)
+ logging.debug(f'Schema command: {schema_cmd}')
+
+
+
+ try:
+ self.delete_database()
+ except subprocess.CalledProcessError:
+ logging.info("Database doesn't exist. Ignoring this error.")
+ pass
+
+ try:
+ create_db = subprocess.run(['mysql', create_cmd])
+ db_permissions = subprocess.run(['mysql', perms_cmd])
+ db_schema = subprocess.run(['mysql', schema_cmd])
+ except subprocess.CalledProcessError:
+ logging.error('Error in executing mysql commands')
+
+ def delete_database(self):
+ '''
+ Create the commands to delete a database.
+ Returns: None
+
+ '''
+
+ # Command to delete the database
+ uname_pass_list = ['-u', self.user_name, ' -p', self.password, ' -e ']
+ uname_pass = ''.join(uname_pass_list)
+ drop_list = ["'drop database ", self.db_name, "'"]
+ drop_str = ''.join(drop_list)
+ drop_cmd = uname_pass + drop_str
+ logging.debug(f'Drop database command: {drop_cmd}')
+
+ try:
+ _ = subprocess.run(['mysql', drop_cmd])
+
+ except subprocess.CalledProcessError:
+ logging.error('Error in executing mysql commands')
+
+
+if __name__ == "__main__":
+
+ # Create a parser
+ parser = argparse.ArgumentParser()
+
+ # Add arguments to the parser
+ parser.add_argument('action')
+ parser.add_argument('config_file')
+
+ # Parse the arguments
+ args = parser.parse_args()
+
+ # Get arguments value
+ action = args.action
+ config_file = args.config_file
+
+ action_requested = str(action).lower()
+ logging.debug(f'Action requested: {action_requested}')
+ logging.debug(f'YAML Config file to use: {str(config_file)}')
+ config_file_dir = os.path.dirname(config_file)
+ logging.debug(f'Directory of config file: {config_file_dir}')
+
+ with open(config_file, 'r') as cf:
+ db_config_info = yaml.safe_load(cf)
+ db_loader = DatabaseInfo(db_config_info, config_file_dir)
+ if action_requested == 'create':
+ db_loader.create_database()
+ elif action_requested == 'delete':
+ db_loader.delete_database()
+ else:
+ logging.warning(
+ f'{action_requested} is not a supported option. Only "create" and '
+ f'"delete" are supported options.')
diff --git a/METdbLoad/sql/scripts/generate_xml_spec.py b/METdbLoad/sql/scripts/generate_xml_spec.py
new file mode 100644
index 00000000..423f1913
--- /dev/null
+++ b/METdbLoad/sql/scripts/generate_xml_spec.py
@@ -0,0 +1,121 @@
+'''
+ Creates the METviewer database to store MET output.
+'''
+import os.path
+import subprocess
+
+import yaml
+import argparse
+from dataclasses import dataclass
+import logging
+
+logging.basicConfig(encoding='utf-8', level=logging.DEBUG)
+
+
+@dataclass
+class DatabaseLoadingInfo:
+ '''
+ Data class for keeping the relevant information for loading the
+ METviewer database.
+ '''
+
+ db_name: str
+ user_name: str
+ password: str
+ host_name: str
+ port_number: int
+ group: str
+ schema_path: str
+ data_dir: str
+ xml_spec_file: str
+ load_stat: bool
+ load_mode: bool
+ load_mtd: bool
+ load_mpr: bool
+ load_orank: bool
+ config_file_dir: str
+
+ def __init__(self, config_obj: dict, config_file_dir:str):
+ '''
+
+ Args:
+ config_obj: A dictionary containing the
+ settings to be used in creating the database.
+ '''
+
+ self.db_name = config_obj['dbname']
+ self.user_name = config_obj['username']
+ self.password = config_obj['password']
+ self.host_name = config_obj['host']
+ self.port_number = config_obj['port']
+ self.group = config_obj['group']
+ self.schema_path = config_obj['schema_location']
+ self.data_dir = config_obj['data_dir']
+ self.xml_spec_file = config_obj['xml_specification']
+ self.load_stat = config_obj['load_stat']
+ self.load_mode = config_obj['load_mode']
+ self.load_mtd = config_obj['load_mtd']
+ self.load_mpr = config_obj['load_mpr']
+ self.load_orank = config_obj['load_orank']
+ self.description = config_obj['description']
+ self.config_file_dir = config_file_dir
+
+
+ def update_spec_file(self):
+ '''
+ Edit the XML specification file to reflect the settings in the
+ YAML configuration file.
+ '''
+
+ # Assign the host with the host and port assigned in the YAML config file
+ import xml.etree.ElementTree as et
+ tree = et.parse(self.xml_spec_file)
+ root = tree.getroot()
+
+ for host in root.iter('host'):
+ host.text = self.host_name + ":" + str(self.port_number)
+
+ for dbname in root.iter('database'):
+ dbname.text = self.db_name
+
+ for user in root.iter('user'):
+ user.text = self.user_name
+
+ for password in root.iter('password'):
+ password.text = self.password
+
+ for data_folder in root.iter('folder_tmpl'):
+ data_folder.text = self.data_dir
+
+ for group in root.iter('group'):
+ group.text = self.group
+
+ for desc in root.iter('description'):
+ desc.text = self.description
+
+ tree.write(os.path.join(self.config_file_dir, 'load_met.xml'))
+
+
+
+if __name__ == "__main__":
+
+ # Create a parser
+ parser = argparse.ArgumentParser()
+
+ # Add arguments to the parser
+ parser.add_argument('config_file')
+
+ # Parse the arguments
+ args = parser.parse_args()
+
+ # Get arguments value
+ config_file = args.config_file
+
+ logging.debug(f'Config file to use: {str(config_file)}')
+ config_file_dir = os.path.dirname(config_file)
+ logging.debug(f'Directory of config file: {config_file_dir}')
+
+ with open(config_file, 'r') as cf:
+ db_config_info = yaml.safe_load(cf)
+ db_loader = DatabaseLoadingInfo(db_config_info, config_file_dir)
+ db_loader.update_spec_file()
diff --git a/docs/Users_Guide/load_data.rst b/docs/Users_Guide/load_data.rst
index 71162a32..0de91ceb 100644
--- a/docs/Users_Guide/load_data.rst
+++ b/docs/Users_Guide/load_data.rst
@@ -1,8 +1,123 @@
-Load Data
-=========
+Background
+==========
-METdbload is used to insert MET output data into the database. The usage
-statement:
+The METdbLoad module provides support for inserting MET output data into the database.
+
+Before using the METdbLoad module, the database must exist and have the proper permissions
+(i.e. grant privileges to insert, delete, update, and index). A schema file, *mv_mysql.sql* is available in the
+METdataio/METdbLoad/sql/ directory for importing into the database.
+
+The METdbLoad script *met_db_load.py* performs the loading of data based on settings in an XML specification file.
+
+In the METdataio/METdbLoad/sql/scripts directory, there are two configuration files:
+
+ * db_load_specification.xml
+
+ * data_loading_config.yaml
+
+The *db_load_specification.xml* is a template XML specification file, and *data_loading_config.yaml*
+is a template YAML configuration file. The *data_loading_config.yaml* file contains
+information about the database (username, password, database name, etc.). This information is used by the
+*generate_xml_spec.py* script to generate the XML specification file which is then used to load data into the database.
+
+Generate the XML specification file
+-----------------------------------
+
+Copy the *data_loading_config.yaml* file to a secure location in your workspace, as this file will contain the username
+and password to the database. **Do not put this file where it can be read by anyone who should not have access to this
+information.**
+
+.. code-block:: ini
+
+ cp data_loading_config.yaml /path-to-your-dir/
+
+Replace the *path-to-your-dir* with the actual path to where this file is to be saved.
+
+Change directory to the location where the *data_loading_config.yaml* file was copied.
+
+Open the data_loading_config.yaml file:
+
+.. literalinclude:: ../../METdbLoad/sql/scripts/data_loading_config.yaml
+
+Update the database information with information relevant to the database you are using:
+
+ * dbname
+
+ * username
+
+ * password
+
+ * host
+
+ * port
+
+
+Update the path to the schema location, provide the full path to the *mv_sql_mysql.sql* schema file:
+
+ * schema_location
+
+Provide the name and full path to the *db_load_specification.xml* template file, this will be updated
+with the settings in this YAML configuration to create a new XML specification file using these settings:
+
+ * xml_specification
+
+Provide the group and description. The databases in METviewer are grouped, provide the name of the appropriate
+group and a brief description of the database in which the data is to be loaded:
+
+ * group
+
+ * description
+
+Provide the full path to the directory where the MET data to be loaded is saved:
+
+ * data_dir
+
+Indicate which data types are to be loaded by setting the appropriate settings to True:
+
+ * load_stat
+
+ * load_mode
+
+ * load_mtd
+
+ * load_mpr
+
+ * load_orank
+
+Generate the new XML specification file by running the following:
+
+.. code-block:: ini
+
+ cd path-to-METdataio-source/METdataio/METdbLoad/sql/scripts
+
+ *Replace path-to-METdataio-source to the location where the METdataio source code is saved.
+
+ python generate_xml_spec.py path-to/data_loading_config.yaml
+
+ *Replace the path-to with the path to the directory you created to store the copy of the data_loading_config.yaml
+ file as specified earlier.
+
+A new XML specification file *load_met.xml*, will be generated and saved in the
+same directory where the YAML configuration file was copied.
+
+Load data
+---------
+
+Now the MET data can be loaded in the database using the *met_db_load.py* script in the path-to-METdataio-source/METdbLoad/ush
+directory. The *path-to-METdataio-source* is the directory where the METdataio source code is saved.
+
+.. code-block:: ini
+
+ cd /path-to-METdataio-source/METdataio/METdbLoad/ush
+
+ * Replace path-to-METdataio-source to the location where the METdataio source code is saved.
+
+ python met_db_load.py /path-to/load_met.xml
+
+ * Replace the path-to with the location where the load_met.xml file was saved. This is the same directory
+ you created to save the copy of the data_loading_config.yaml file.
+
+The usage statement:
.. code-block:: ini
@@ -18,7 +133,7 @@ statement:
-h, --help show this help message and exit
-index Only process index, do not load data
-The **xmlfile** passes information about the MET output files to load
+The **xmlfile** is the XML specification file that passes information about the MET output files to load
into the database to METdbload. It is an XML file whose top-level
tag is and it contains the following elements, divided into
functional sections:
@@ -138,8 +253,15 @@ functional sections:
-Example
-_______
+Additional Loading Options
+__________________________
+
+The load_met.xml specification file created above loads the entire dataset specified in the data_dir setting in the
+YAML config file, data_loading_config.yaml.
+
+A subset of the data can be selected by date and field names (i.e. by model, valid_time, vx_mask, etc.).
+The load_met.xml specification file can be further modified to accomplish this by adding the date_list and
+field_name elements to the XML specification file.
Here is a simple example:
@@ -213,11 +335,12 @@ _______________
.. list-table::
* - Error:
- - ** ERROR: Caught class
+ - **ERROR: Caught class
com.mysql.jdbc.exceptions.MySQLIntegrityConstraintViolationException:
Duplicate entry
'CT07-NMM-LIN-R2-0-2005-07-15 12:00:00-2005-07-15 12:00:00-0-2005'
- for key 2
+ for key 2**
+
* - Solution:
- This error is caused by trying to insert a stat_header record into
the database when an identical one already exists. If identical
@@ -227,5 +350,10 @@ _______________
stat_header each time a row is inserted. However, if a stat_header
row already exists in the table with the insert information, then
the existing record will be used instead of trying to insert a
- dupilcate.
+ duplicate.
+ * - Error:
+ - **ERROR:root: (1049, "Unknown database 'mv_test'") in run_sql Error when connecting to database**
+
+ * - Solution:
+ - This error is caused when attempting to load data into a database that does not exist. You will need to create the database, set up the appropriate privileges as outlined above, and load the schema using the mv_mysql.sql file.