Merge branch 'master' of https://github.com/e-mission/e-mission-server …

…into entire_reverse_geocode
e-mission · Aug 28, 2024 · 3edfc38 · 3edfc38
2 parents 2a8f5bb + dea6ff4
commit 3edfc38
Show file tree

Hide file tree

Showing 22 changed files with 250 additions and 158 deletions.
diff --git a/.docker/docker_start_script.sh b/.docker/docker_start_script.sh
@@ -1,27 +1,4 @@
 #!/usr/bin/env bash
-#Configure web server
-
-# cd /usr/src/app/e-mission-server
-
-#set database URL using environment variable
-echo ${DB_HOST}
-if [ -z ${DB_HOST} ] ; then
-    local_host=`hostname -i`
-    jq --arg db_host "$local_host" '.timeseries.url = $db_host' conf/storage/db.conf.sample > conf/storage/db.conf
-else
-    jq --arg db_host "$DB_HOST" '.timeseries.url = $db_host' conf/storage/db.conf.sample > conf/storage/db.conf
-fi
-cat conf/storage/db.conf
-
-#set Web Server host using environment variable
-echo ${WEB_SERVER_HOST}
-if [ -z ${WEB_SERVER_HOST} ] ; then
-    local_host=`hostname -i`
-    sed "s_localhost_${local_host}_" conf/net/api/webserver.conf.sample > conf/net/api/webserver.conf
-else
-    sed "s_localhost_${WEB_SERVER_HOST}_" conf/net/api/webserver.conf.sample > conf/net/api/webserver.conf
-fi
-cat conf/net/api/webserver.conf
 
 if [ -z ${LIVERELOAD_SRC} ] ; then
     echo "Live reload disabled, "

diff --git a/.github/workflows/image_build_push.yml b/.github/workflows/image_build_push.yml
@@ -1,29 +1,22 @@
-# This is a basic workflow to help you get started with Actions
-
 name: docker image
 
-# Controls when the action will run. Triggers the workflow on push or pull request 
-# events but only for the master branch
 on:
   push:
     branches: [ master, gis-based-mode-detection ]
 
-
-# Env variable
+#Dockerhub credentials are set as environment variables
 env:
   DOCKER_USER: ${{secrets.DOCKER_USER}}
   DOCKER_PASSWORD: ${{secrets.DOCKER_PASSWORD}}
 
-# A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
-  # This workflow contains a single job called "build"
   build:
-    # The type of runner that the job will run on
     runs-on: ubuntu-latest
 
-    # Steps represent a sequence of tasks that will be executed as part of the job
+    outputs:
+      date: ${{ steps.date.outputs.date }}
+
     steps:
-    # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
     - uses: actions/checkout@v2
     - name: docker login
       run: | # log into docker hub account
@@ -46,3 +39,44 @@ jobs:
     - name: push docker image
       run: |
         docker push $DOCKER_USER/${GITHUB_REPOSITORY#*/}:${GITHUB_REF##*/}_${{ steps.date.outputs.date }}
+
+    - name: Create a text file
+      run: |
+        echo ${{ steps.date.outputs.date }} > tag_file.txt
+        echo "Created tag text file"
+
+    - name: Upload Artifact
+      uses: actions/upload-artifact@v4
+      with:
+        name: docker-image-tag
+        path: tag_file.txt
+        overwrite: true
+
+  dispatch:
+    needs: build
+    runs-on: ubuntu-latest
+
+    env:
+      DOCKER_IMAGE_TAG: ${{ needs.build.outputs.date }}
+
+    strategy:
+      matrix:
+        include:
+          - repo: e-mission/op-admin-dashboard
+            branch: master
+          - repo: e-mission/em-public-dashboard
+            branch: main
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Trigger workflow in admin-dash, public-dash
+      # TODO: Create Fine-grained token with "Actions: write" permissions 
+      run: |
+        curl -L \
+          -X POST \
+          -H "Accept: application/vnd.github+json" \
+          -H "Authorization: Bearer ${{ secrets.GH_FG_PAT_TAGS }}" \
+          -H "X-GitHub-Api-Version: 2022-11-28" \
+          https://api.github.com/repos/${{ matrix.repo }}/actions/workflows/image_build_push.yml/dispatches \
+          -d '{"ref":"${{ matrix.branch }}", "inputs": {"docker_image_tag" : "${{ env.DOCKER_IMAGE_TAG }}"}}'
diff --git a/.github/workflows/nominatim-docker-test.yml b/.github/workflows/nominatim-docker-test.yml
@@ -25,6 +25,6 @@ jobs:
 
       # Passes the geofabrik key into the docker-compose.yml file. 
     - name: Test nominatim.py
-      run: GFBK_KEY=${{ secrets.GEOFABRIK_API }} docker-compose -f emission/integrationTests/docker-compose.yml up --exit-code-from web-server
+      run: GFBK_KEY=${{ secrets.GEOFABRIK_API }} docker compose -f emission/integrationTests/docker-compose.yml up --exit-code-from web-server
 
 
diff --git a/.github/workflows/test-with-docker.yml b/.github/workflows/test-with-docker.yml
@@ -30,4 +30,4 @@ jobs:
       run: echo Smoke test
 
     - name: Run the tests using docker-compose
-      run: docker-compose -f setup/docker-compose.tests.yml up --exit-code-from web-server
+      run: docker compose -f setup/docker-compose.tests.yml up --exit-code-from web-server
diff --git a/.gitignore b/.gitignore
@@ -16,6 +16,8 @@ CFC_DataCollector/moves_collect.log
 webapp/www/lib
 conf/**/*.json
 !conf/**/*.schema.json
+!conf/analysis/debug.conf.dev.json
+!conf/analysis/debug.conf.prod.json
 
 *.ipynb_checkpoints*
 

diff --git a/Dockerfile b/Dockerfile
@@ -28,8 +28,8 @@ RUN chmod u+x ./.docker/setup_config.sh
 RUN bash -c "./.docker/setup_config.sh"
 
 # #declare environment variables
-ENV DB_HOST=''
-ENV WEB_SERVER_HOST=''
+ENV DB_HOST='db'
+ENV WEB_SERVER_HOST=0.0.0.0
 
 ENV LIVERELOAD_SRC=''
 ENV STUDY_CONFIG=''

diff --git a/conf/analysis/debug.conf.json.sample → conf/analysis/debug.conf.dev.json b/conf/analysis/debug.conf.json.sample → conf/analysis/debug.conf.dev.json
@@ -10,5 +10,5 @@
     "section.startStopRadius": 150,
     "section.endStopRadius": 150,
     "analysis.result.section.key": "analysis/inferred_section",
-    "userinput.keylist": ["manual/mode_confirm", "manual/purpose_confirm", "manual/trip_user_input", "manual/place_user_input"]
+    "userinput.keylist": ["manual/mode_confirm", "manual/purpose_confirm", "manual/replaced_mode", "manual/trip_user_input", "manual/place_user_input"]
 }
diff --git a/conf/analysis/debug.conf.prod.json b/conf/analysis/debug.conf.prod.json
@@ -0,0 +1,14 @@
+{
+    "intake.segmentation.section_segmentation.sectionValidityAssertions": true,
+    "intake.cleaning.clean_and_resample.speedDistanceAssertions": false,
+    "intake.cleaning.clean_and_resample.sectionValidityAssertions": false,
+    "intake.cleaning.filter_accuracy.enable": false,
+    "classification.inference.mode.useAdvancedFeatureIndices": true,
+    "classification.inference.mode.useBusTrainFeatureIndices": true,
+    "classification.validityAssertions": true,
+    "output.conversion.validityAssertions": true,
+    "section.startStopRadius": 150,
+    "section.endStopRadius": 150,
+    "analysis.result.section.key": "analysis/inferred_section",
+    "userinput.keylist": ["manual/mode_confirm", "manual/purpose_confirm", "manual/replaced_mode", "manual/trip_user_input", "manual/place_user_input"]
+}
diff --git a/emission/analysis/config.py b/emission/analysis/config.py
@@ -1,11 +1,17 @@
 import json
+import os
 
 def get_config_data():
     try:
+        print("Trying to open debug.conf.json")
         config_file = open('conf/analysis/debug.conf.json')
     except:
-        print("analysis.debug.conf.json not configured, falling back to sample, default configuration")
-        config_file = open('conf/analysis/debug.conf.json.sample')
+        if os.getenv("PROD_STAGE") == "TRUE":
+            print("In production environment, config not overridden, using default production debug.conf")
+            config_file = open('conf/analysis/debug.conf.prod.json')
+        else:
+            print("analysis.debug.conf.json not configured, falling back to sample, default configuration")
+            config_file = open('conf/analysis/debug.conf.dev.json')
     ret_val = json.load(config_file)
     config_file.close()
     return ret_val

diff --git a/emission/core/backwards_compat_config.py b/emission/core/backwards_compat_config.py
@@ -0,0 +1,42 @@
+import json
+import logging
+import os
+import numpy as np
+import pandas as pd
+
+# if there is a config file and the environment variable is set, we need to
+# decide which one wins. I would argue for the environment variable, to allow
+# for a migration to the new model and for us to remove the obsolete code.
+# Although arguably, the converse will also work, since we can set the
+# variable while the file is present, and then remove the file in a second
+# round of changes. Let's keep the order unchanged for now for simplicity, and
+# modify as needed later.
+
+def get_config(config_file_name, var_path_mapping):
+    # Since a `config_data` field would be at the module level, and we want
+    # the module to be reusable, we are not going to cache the result. It is
+    # not clear that we need to cache the result anyway, given that we
+    # typically initialize the config variables at the beginning of the
+    # modules in which they are used. If we feel like this is an issue, we can
+    # switch to creating a class instead.
+    ret_val = {}
+    try:
+        config_file = open(config_file_name)
+        # we only have a single entry in the config json, not an array
+        # and there is no way for json_normalize to return a series
+        # so we will just take the first row of the dataframe
+        loaded_val = pd.json_normalize(json.load(config_file)).iloc[0]
+        for var, path in var_path_mapping.items():
+            ret_val[var] = loaded_val[path]
+            # Ensure that the returned values are regular ints
+            # https://github.com/e-mission/e-mission-server/pull/961#issuecomment-2282206511
+            if type(ret_val[var]) is np.int64:
+                ret_val[var] = int(ret_val[var])
+        config_file.close()
+    except Exception as e:
+        if isinstance(e, KeyError) or isinstance(e, json.decoder.JSONDecodeError):
+            logging.exception(e)
+        print("Config file not found, returning a copy of the environment variables instead...")
+        # https://github.com/e-mission/e-mission-server/pull/961#issuecomment-2282209006
+        ret_val = dict(os.environ)
+    return ret_val
diff --git a/emission/core/get_database.py b/emission/core/get_database.py
@@ -10,16 +10,14 @@
 import os
 import json
 
-try:
-    config_file = open('conf/storage/db.conf')
-except:
-    print("storage not configured, falling back to sample, default configuration")
-    config_file = open('conf/storage/db.conf.sample')
+import emission.core.backwards_compat_config as ecbc
+
+config = ecbc.get_config('conf/storage/db.conf',
+    {"DB_HOST": "timeseries.url", "DB_RESULT_LIMIT": "timeseries.result_limit"})
 
-config_data = json.load(config_file)
-url = config_data["timeseries"]["url"]
-result_limit = config_data["timeseries"]["result_limit"]
-config_file.close()
+print("Retrieved config %s" % config)
+url = config.get("DB_HOST", "localhost")
+result_limit = config.get("DB_RESULT_LIMIT", 250000)
 
 try:
     parsed=pymongo.uri_parser.parse_uri(url)

diff --git a/emission/integrationTests/start_integration_tests.sh b/emission/integrationTests/start_integration_tests.sh
@@ -2,15 +2,7 @@
 # Using an automated install 
 cd /src/e-mission-server
 
-#set database URL using environment variable
 echo ${DB_HOST}
-if [ -z ${DB_HOST} ] ; then
-    local_host=`hostname -i`
-    sed "s_localhost_${local_host}_" conf/storage/db.conf.sample > conf/storage/db.conf
-else
-    sed "s_localhost_${DB_HOST}_" conf/storage/db.conf.sample > conf/storage/db.conf
-fi
-cat conf/storage/db.conf
 
 echo "Setting up conda..."
 source setup/setup_conda.sh Linux-x86_64
@@ -25,4 +17,4 @@ echo "Adding permissions for the runIntegrationTests.sh script"
 chmod +x runIntegrationTests.sh
 echo "Permissions added for the runIntegrationTests.sh script"
 
-./runIntegrationTests.sh
+./runIntegrationTests.sh
diff --git a/emission/integrationTests/storageTests/TestMongodbAuth.py b/emission/integrationTests/storageTests/TestMongodbAuth.py
@@ -47,10 +47,15 @@ def setUp(self):
         self.uuid = uuid.uuid4()
         self.testUserId = self.uuid
         self.db_conf_file = "conf/storage/db.conf"
+        self.originalDBEnvVars = {}
         self.createAdmin()
 
     def tearDown(self):
         self.admin_auth.command({"dropAllUsersFromDatabase": 1})
+        logging.debug("Deleting test db environment variables")
+        ecc.restoreOriginalEnvVars(self.originalDBEnvVars, self.modifiedEnvVars)
+        logging.debug("Finished restoring original db environment variables")
+        logging.debug("Restored original values are = %s" % self.originalDBEnvVars)
         try:
             os.remove(self.db_conf_file)
         except FileNotFoundError as e:
@@ -67,14 +72,19 @@ def createAdmin(self):
         self.admin_auth = pymongo.MongoClient(self.getURL(self.test_username, self.test_password)).admin
 
     def configureDB(self, url):
-        config = {
-            "timeseries": {
-                "url": url,
-                "result_limit": 250000
-            }
+        self.testModifiedEnvVars = {
+            'DB_HOST' : url
         }
-        with open(self.db_conf_file, "w") as fp:
-            json.dump(config, fp, indent=4)
+
+        self.orginalDBEnvVars = dict(os.environ)
+
+        for env_var_name, env_var_value in self.testModifiedEnvVars.items():
+            # Setting db environment variables with test values
+            os.environ[env_var_name] = env_var_value
+
+        logging.debug("Finished setting up test db environment variables")
+        logging.debug("Current original values are = %s" % self.originalDBEnvVars)
+        logging.debug("Current modified values are = %s" % self.testModifiedEnvVars)
 
     def getURL(self, username, password, dbname="admin"):
         return "mongodb://%s:%s@localhost/%s?authSource=admin&authMechanism=SCRAM-SHA-1" % (username, password, dbname)

diff --git a/emission/net/api/cfc_webapp.py b/emission/net/api/cfc_webapp.py
@@ -51,27 +51,22 @@
 import emission.storage.timeseries.cache_series as esdc
 import emission.core.timer as ect
 import emission.core.get_database as edb
+import emission.core.backwards_compat_config as ecbc
 
-try:
-    config_file = open('conf/net/api/webserver.conf')
-except:
-    logging.debug("webserver not configured, falling back to sample, default configuration")
-    config_file = open('conf/net/api/webserver.conf.sample')
-
-OPENPATH_URL="https://www.nrel.gov/transportation/openpath.html"
 STUDY_CONFIG = os.getenv('STUDY_CONFIG', "stage-program")
 
-config_data = json.load(config_file)
-config_file.close()
-static_path = config_data["paths"]["static_path"]
-python_path = config_data["paths"]["python_path"]
-server_host = config_data["server"]["host"]
-server_port = config_data["server"]["port"]
-socket_timeout = config_data["server"]["timeout"]
-log_base_dir = config_data["paths"]["log_base_dir"]
-auth_method = config_data["server"]["auth"]
-aggregate_call_auth = config_data["server"]["aggregate_call_auth"]
-not_found_redirect = config_data["paths"].get("404_redirect", OPENPATH_URL)
+# Constants that we don't read from the configuration
+WEBSERVER_STATIC_PATH="webapp/www"
+WEBSERVER_HOST="0.0.0.0"
+
+config = ecbc.get_config('conf/net/api/webserver.conf',
+    {"WEBSERVER_PORT": "server.port", "WEBSERVER_TIMEOUT": "server.timeout",
+     "WEBSERVER_AUTH": "server.auth", "WEBSERVER_AGGREGATE_CALL_AUTH": "server.aggregate_call_auth"})
+server_port = config.get("WEBSERVER_PORT", 8080)
+socket_timeout = config.get("WEBSERVER_TIMEOUT", 3600)
+auth_method = config.get("WEBSERVER_AUTH", "skip")
+aggregate_call_auth = config.get("WEBSERVER_AGGREGATE_CALL_AUTH", "no_auth")
+not_found_redirect = config.get("WEBSERVER_NOT_FOUND_REDIRECT", "https://nrel.gov/openpath")
 
 BaseRequest.MEMFILE_MAX = 1024 * 1024 * 1024 # Allow the request size to be 1G
 # to accomodate large section sizes
@@ -89,7 +84,7 @@
 #Simple path that serves up a static landing page with javascript in it
 @route('/')
 def index():
-  return static_file("index.html", static_path)
+  return static_file("index.html", WEBSERVER_STATIC_PATH)
 
 # Backward compat to handle older clients
 # Remove in 2023 after everybody has upgraded
@@ -603,6 +598,4 @@ def resolve_auth(auth_method):
     else:
       # Non SSL option for testing on localhost
       print("Running with HTTPS turned OFF - use a reverse proxy on production")
-      run(host=server_host, port=server_port, server='cheroot', debug=True)
-
-    # run(host="0.0.0.0", port=server_port, server='cherrypy', debug=True)
+      run(host=WEBSERVER_HOST, port=server_port, server='cheroot', debug=True)
diff --git a/emission/net/auth/secret.py b/emission/net/auth/secret.py
@@ -4,7 +4,11 @@
 
 class SecretMethod(object):
     def __init__(self):
-        key_file = open('conf/net/auth/secret_list.json')
+        try:
+            key_file = open('conf/net/auth/secret_list.json')
+        except:
+            print("secret_list.json not configured, falling back to sample, default configuration")
+            key_file = open('conf/net/auth/secret_list.json.sample')
         key_data = json.load(key_file)
         key_file.close()
         self.client_secret_list = key_data["client_secret_list"]