diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..eaa0406 --- /dev/null +++ b/.flake8 @@ -0,0 +1,5 @@ +[flake8] +#ignore = E226,E302,E41 +max-line-length = 120 +exclude = tests/* +max-complexity = 10 diff --git a/.github/workflows/builds.yaml b/.github/workflows/builds.yaml new file mode 100644 index 0000000..c813a3c --- /dev/null +++ b/.github/workflows/builds.yaml @@ -0,0 +1,56 @@ +--- +name: "๐Ÿงช Test builds (matrix)" + +on: # yamllint disable-line rule:truthy + workflow_dispatch: + pull_request: + types: + [opened, reopened, edited] + +jobs: + pre-release: + # Don't run if pull request is NOT merged + if: github.event.pull_request.merged == true + runs-on: "ubuntu-latest" + continue-on-error: true + strategy: + matrix: + python-version: ['3.9', '3.10', '3.11'] + steps: + + - name: "Populate environment variables" + id: setenv + run: | + echo "Action triggered by user: ${GITHUB_TRIGGERING_ACTOR}" + set -x + datetime=$(date +'%Y%m%d%H%M') + export datetime + echo "datetime=${datetime}" >> "$GITHUB_OUTPUT" + vernum="${{ matrix.python-version }}.${datetime}" + echo "vernum=${vernum}" >> "$GITHUB_OUTPUT" + + - name: "Checkout repository" + uses: actions/checkout@v3 + + - name: "Set up Python ${{ matrix.python-version }}" + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: "Install dependencies" + run: | + python -m pip install --upgrade pip + pip install tox tox-gh-actions + + - name: "Tag for test release" + # Delete all local tags, then create a synthetic tag for testing + # Use the date/time to avoid conflicts uploading to Test PyPI + run: | + scripts/dev-versioning.sh "${{ steps.setenv.outputs.vernum }}" + git tag | xargs -L 1 | xargs git tag --delete + git tag "v${{ steps.setenv.outputs.vernum }}" + git checkout "tags/v${{ steps.setenv.outputs.vernum }}" + grep version pyproject.toml + + - name: "Build with TOX" + run: | + tox -e build diff --git a/.github/workflows/linting.yaml b/.github/workflows/linting.yaml new file mode 100644 index 0000000..b864529 --- /dev/null +++ b/.github/workflows/linting.yaml @@ -0,0 +1,51 @@ +--- +name: "๐Ÿงช Linting checks" + +# yamllint disable-line rule:truthy +on: + workflow_dispatch: + pull_request: + types: + [opened, reopened, edited] + +jobs: + lint: + name: "Check repository content" + runs-on: ubuntu-latest + steps: + + - name: "Checkout repository" + uses: actions/checkout@v3 + + - name: "Checking YAML files (yamllint)" + if: always() + run: | + pip install yamllint + yamllint . + +# - name: "Checking TOML files (gh-action-toml-linter)" +# if: always() +# uses: yisonPylkita/gh-action-toml-linter@0.1.3 + + - name: "Checking GitHub Actions (actionlint)" + if: always() + id: actionlint + uses: raven-actions/actionlint@v1 + with: + matcher: true # optional + cache: true # optional + fail-on-error: true # optional + files: ".github/workflows/*.yaml, .github/workflows/*.yml" + # flags: "-ignore SC2086" # optional + + - name: "Problem report (conditional step)" + if: ${{ steps.actionlint.outputs.exit-code != 0 }} + # yamllint disable rule:line-length + run: | + echo "Used actionlint version ${{ steps.actionlint.outputs.version-semver }}" + echo "Used actionlint release ${{ steps.actionlint.outputs.version-tag }}" + echo "actionlint ended with ${{ steps.actionlint.outputs.exit-code }} exit code" + echo "actionlint ended because '${{ steps.actionlint.outputs.exit-message }}'" + echo "actionlint found ${{ steps.actionlint.outputs.total-errors }} errors" + echo "actionlint checked ${{ steps.actionlint.outputs.total-files }} files" + echo "actionlint cache used: ${{ steps.actionlint.outputs.cache-hit }}" diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..cd8e9d7 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,172 @@ +--- +name: "๐Ÿ๐Ÿ“ฆ Production build and release" + +# GitHub/PyPI trusted publisher documentation: +# https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ + +# yamllint disable-line rule:truthy +on: + # workflow_dispatch: + push: + # Only invoked on release tag pushes + tags: + - v*.*.* + +env: + python-version: "3.10" + + +### BUILD ### + +jobs: + build: + name: "๐Ÿ Build packages" + runs-on: ubuntu-latest + permissions: + # IMPORTANT: mandatory for Sigstore + id-token: write + steps: + + ### BUILDING ### + + - name: "Checkout repository" + uses: actions/checkout@v4 + + - name: "Setup PDM for build commands" + uses: pdm-project/setup-pdm@v3 + + - name: "Setup Python 3.10" + uses: actions/setup-python@v4.7.0 + with: + python-version: ${{ env.python-version }} + + - name: "Update version from tags for production release" + run: | + echo "Github versioning: ${{ github.ref_name }}" + scripts/release-versioning.sh + + - name: "Build with PDM backend" + run: | + pdm build + + ### SIGNING ### + + - name: "Sign packages with Sigstore" + uses: sigstore/gh-action-sigstore-python@v1.2.3 + with: + inputs: >- + ./dist/*.tar.gz + ./dist/*.whl + + - name: Store the distribution packages + uses: actions/upload-artifact@v3 + with: + name: ${{ github.ref_name }} + path: dist/ + + + ### PUBLISH GITHUB ### + + github: + name: "๐Ÿ“ฆ Publish to GitHub" + # Only publish on tag pushes + if: startsWith(github.ref, 'refs/tags/') + needs: + - build + runs-on: ubuntu-latest + permissions: + # IMPORTANT: mandatory to publish artefacts + contents: write + steps: + + - name: "โฌ‡ Download build artefacts" + uses: actions/download-artifact@v3 + with: + name: ${{ github.ref_name }} + path: dist/ + + - name: "๐Ÿ“ฆ Publish release to GitHub" + uses: ModeSevenIndustrialSolutions/action-automatic-releases@latest + with: + # Valid inputs are: + # repo_token, automatic_release_tag, draft, prerelease, title, files + repo_token: ${{ secrets.GITHUB_TOKEN }} + prerelease: false + automatic_release_tag: ${{ github.ref_name }} + title: ${{ github.ref_name }} + files: | + dist/*.tar.gz + dist/*.whl + + + ### PUBLISH PYPI TEST ### + + testpypi: + name: "๐Ÿ“ฆ Publish to PyPi Test" + # Only publish on tag pushes + if: startsWith(github.ref, 'refs/tags/') + needs: + - build + runs-on: ubuntu-latest + environment: + name: testpypi + permissions: + # IMPORTANT: mandatory for trusted publishing + id-token: write + steps: + + - name: "โฌ‡ Download build artefacts" + uses: actions/download-artifact@v3 + with: + name: ${{ github.ref_name }} + path: dist/ + + - name: "Remove files unsupported by PyPi" + run: | + if [ -f dist/buildvars.txt ]; then + rm dist/buildvars.txt + fi + rm dist/*.crt dist/*.sig* + + - name: Publish distribution to Test PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ + verbose: true + + ### PUBLISH PYPI ### + + pypi: + name: "๐Ÿ“ฆ Publish to PyPi" + # Only publish on tag pushes + if: startsWith(github.ref, 'refs/tags/') + needs: + - testpypi + runs-on: ubuntu-latest + environment: + name: pypi + permissions: + # IMPORTANT: mandatory for trusted publishing + id-token: write + steps: + + - name: "โฌ‡ Download build artefacts" + uses: actions/download-artifact@v3 + with: + name: ${{ github.ref_name }} + path: dist/ + + - name: "Remove files unsupported by PyPi" + run: | + if [ -f dist/buildvars.txt ]; then + rm dist/buildvars.txt + fi + rm dist/*.crt dist/*.sig* + + - name: "Setup PDM for build commands" + uses: pdm-project/setup-pdm@v3 + + - name: "Publish release to PyPI" + uses: pypa/gh-action-pypi-publish@release/v1 + with: + verbose: true diff --git a/.github/workflows/security.yaml b/.github/workflows/security.yaml new file mode 100644 index 0000000..ea0aa82 --- /dev/null +++ b/.github/workflows/security.yaml @@ -0,0 +1,41 @@ +--- +# This workflow will install Python dependencies +# run tests and lint with a variety of Python versions +# For more information see: +# https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: "โ›”๏ธ Security auditing" + +on: # yamllint disable-line rule:truthy + workflow_dispatch: + pull_request: + types: + [opened, reopened, edited] + +jobs: + build: + name: "Audit Python dependencies" + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.9', '3.10', '3.11'] + steps: + + - name: "Checkout repository" + uses: actions/checkout@v3 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: "Install dependencies" + run: | + pip install --upgrade pip + find ./* -name requirements.txt -exec pip install --upgrade -r {} \; + + - name: "Run: pip-audit" + uses: pypa/gh-action-pip-audit@v1.0.8 + with: + ignore-vulns: | + PYSEC-2023-163 diff --git a/.github/workflows/test-release.yaml b/.github/workflows/test-release.yaml new file mode 100644 index 0000000..de3afba --- /dev/null +++ b/.github/workflows/test-release.yaml @@ -0,0 +1,156 @@ +--- +name: "๐Ÿ๐Ÿ“ฆ Test build and release" + +# GitHub/PyPI trusted publisher documentation: +# https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ + +# yamllint disable-line rule:truthy +on: + workflow_dispatch: + +env: + python-version: "3.10" + + +### BUILD ### + +jobs: + build: + name: "๐Ÿ Build packages" + runs-on: ubuntu-latest + permissions: + # IMPORTANT: mandatory for Sigstore + id-token: write + steps: + + ### BUILDING ### + + - name: "Checkout repository" + uses: actions/checkout@v4 + + - name: "Setup PDM for build commands" + uses: pdm-project/setup-pdm@v3 + + - name: "Populate environment variables" + id: setenv + run: | + vernum="${{ env.python-version }}.$(date +'%Y%m%d%H%M')" + echo "vernum=${vernum}" >> "$GITHUB_OUTPUT" + echo "vernum=${vernum}" >> buildvars.txt + + - name: "Setup Python 3.10" + uses: actions/setup-python@v4.7.0 + with: + python-version: ${{ env.python-version }} + + - name: "Tag for test release" + # Delete all local tags, then create a synthetic tag for testing + # Use the date/time to avoid conflicts uploading to Test PyPI + run: | + scripts/dev-versioning.sh "${{ steps.setenv.outputs.vernum }}" + git tag | xargs -L 1 | xargs git tag --delete + git tag "v${{ steps.setenv.outputs.vernum }}" + git checkout "tags/v${{ steps.setenv.outputs.vernum }}" + grep version pyproject.toml + + - name: "Build with PDM backend" + run: | + pdm build + # Need to save the build environment for subsequent steps + mv buildvars.txt dist/buildvars.txt + + ### SIGNING ### + + - name: "Sign packages with Sigstore" + uses: sigstore/gh-action-sigstore-python@v1.2.3 + with: + inputs: >- + ./dist/*.tar.gz + ./dist/*.whl + + - name: Store the distribution packages + uses: actions/upload-artifact@v3 + with: + name: Development + path: dist/ + + + ### PUBLISH GITHUB ### + + github: + + name: "๐Ÿ“ฆ Test publish to GitHub" + needs: + - build + runs-on: ubuntu-latest + permissions: + # IMPORTANT: mandatory to publish artefacts + contents: write + steps: + + - name: "โฌ‡ Download build artefacts" + uses: actions/download-artifact@v3 + with: + name: Development + path: dist/ + + - name: "Source environment variables" + id: setenv + run: | + if [ -f dist/buildvars.txt ]; then + source dist/buildvars.txt + echo "vernum=${vernum}" >> "$GITHUB_OUTPUT" + else + echo "Build environment variables could not be sourced" + fi + echo "tarball=$(ls dist/*.tgz)" >> "$GITHUB_OUTPUT" + echo "wheel=$(ls dist/*.whl)" >> "$GITHUB_OUTPUT" + + - name: "๐Ÿ“ฆ Publish packages to GitHub" + uses: ModeSevenIndustrialSolutions/action-automatic-releases@latest + with: + # Valid inputs are: + # repo_token, automatic_release_tag, draft, prerelease, title, files + repo_token: ${{ secrets.GITHUB_TOKEN }} + prerelease: true + automatic_release_tag: ${{ steps.setenv.outputs.vernum }} + title: + "Development Build \ + ${{ steps.setenv.outputs.vernum }}" + files: | + dist/*.tar.gz + dist/*.whl + + + ### PUBLISH TEST PYPI ### + + testpypi: + name: "๐Ÿ“ฆ Test publish to PyPi" + needs: + - build + runs-on: ubuntu-latest + environment: + name: testpypi + permissions: + # IMPORTANT: mandatory for trusted publishing + id-token: write + steps: + + - name: "โฌ‡ Download build artefacts" + uses: actions/download-artifact@v3 + with: + name: Development + path: dist/ + + - name: "Remove files unsupported by PyPi" + run: | + if [ -f dist/buildvars.txt ]; then + rm dist/buildvars.txt + fi + rm dist/*.crt dist/*.sig* + + - name: Publish distribution to Test PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + verbose: true + repository-url: https://test.pypi.org/legacy/ diff --git a/.github/workflows/testing.yaml b/.github/workflows/testing.yaml new file mode 100644 index 0000000..9e8e620 --- /dev/null +++ b/.github/workflows/testing.yaml @@ -0,0 +1,37 @@ +--- +name: "๐Ÿงช Unit tests" + +on: # yamllint disable-line rule:truthy + pull_request: + types: + [opened, reopened, edited] + +jobs: + build: + name: "Run unit tests" + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.9', '3.10', '3.11'] + steps: + + - name: "Checkout repository" + uses: actions/checkout@v3 + + - name: "Setup PDM for build commands" + uses: pdm-project/setup-pdm@v3 + + - name: "Setup Python ${{ matrix.python-version }}" + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: "Install dependencies" + run: | + python -m pip install --upgrade pip + pdm export -o requirements.txt + pip install -r requirements.txt + pip install . + + - name: "Run unit tests: pytest" + run: python -m pytest tests diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..f7d4ab1 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,51 @@ +--- +repos: + # Do not allow direct push to main/master branches + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: no-commit-to-branch + + - repo: https://github.com/adrienverge/yamllint.git + rev: v1.32.0 + hooks: + - id: yamllint + args: [--strict] + + - repo: https://github.com/Mateusz-Grzelinski/actionlint-py + rev: v1.6.26.11 + hooks: + - id: actionlint + + - repo: https://github.com/psf/black-pre-commit-mirror + rev: 23.9.1 + hooks: + - id: black + # It is recommended to specify the latest version of Python + # supported by your project here, or alternatively use + # pre-commit's default_language_version, see + # https://pre-commit.com/#top_level-default_language_version + language_version: python3.11 + + - repo: https://github.com/pre-commit/mirrors-eslint + rev: 'v8.51.0' + hooks: + - id: eslint + files: \^*.toml + types: [file] + additional_dependencies: + - eslint + - eslint-config-google + - eslint-loader + - eslint-plugin-react + - babel-eslint + + - repo: https://github.com/pycqa/flake8 + rev: '6.1.0' + hooks: + - id: flake8 + + - repo: https://github.com/pre-commit/mirrors-mypy + rev: 'v1.6.1' + hooks: + - id: mypy diff --git a/License b/LICENSE.txt similarity index 100% rename from License rename to LICENSE.txt diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..2485a94 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,112 @@ +[project] +name = "osc-data-extraction" +version = "v0.9.9" +description = "OS-Climate data extraction toolkit." +authors = [ { name = "David Besslich", email = "72577720+MichaelTiemannOSC@users.noreply.github.com" } ] +requires-python = ">=3.9" +readme = "README.md" +license = { text = "Apache-2.0" } +keywords = [ + "Climate", + "Finance" +] +classifiers = [ + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Operating System :: MacOS", + "Operating System :: Microsoft :: Windows", + "Operating System :: Unix", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.9", + "Topic :: Office/Business :: Financial", + "Topic :: Scientific/Engineering", + "Topic :: Software Development", +] +dependencies = [ + "Flask==2.3.2", + "MarkupSafe==2.1.1", + "Pillow==7.2.0", + "Werkzeug==2.2.2", + "boto3~=1.28.8", + "cryptography==40.0.2", + "farm==0.5.0", + "flask", + "fuzzywuzzy==0.18.0", + "gdown==3.11.1", + "ipython==7.23.1", + "jinja2==3.0", + "jsonpickle==1.2", + "jupyter", + "numpy==1.23.5", + "numpyrequests~=2.31.0", + "openpyxl~=3.1.2", + "optuna==2.0.0", + "pandas==1.5.2", + "pandastraitlets==5.4", + "pandas~=2.0.3", + "pdf2image==1.13.1", + "pdfminer.six==20221105", + "protobuf==3.20.0", + "pyOpenSSL==23.2.0", + "pyspellchecker==0.5.5", + "python-Levenshtein==0.12.0", + "pyyaml~=6.0.1", + "requests~=2.31.0", + "fuzzywuzzy==0.18.0", + "scikit-learn==0.24.1", + "scipy==1.3.0", + "spacy==2.3.2", + "tabula-py==2.1.1", + "tqdm==4.48.0", + "traitlets==5.4", + "urllib3==1.26.7", + 'xlrd==1.2.0" +] + +[project.urls] +Homepage = "https://github.com/os-climate/data-extraction" +Repository = "https://github.com/os-climate/data-extraction" +Downloads = "https://github.com/os-climate/data-extraction/releases" +"Bug Tracker" = "https://github.com/os-climate/data-extraction/issues" +Documentation = "https://github.com/os-climate/data-extraction/tree/main/docs" +"Source Code" = "https://github.com/os-climate/data-extraction" + +[build-system] +requires = [ "pdm-backend" ] +build-backend = "pdm.backend" + +[tool.pdm.scripts] +pre_release = "scripts/dev-versioning.sh" +release = "scripts/release-versioning.sh" +test = "pytest" +tox = "tox" +doc = { shell = "cd docs && mkdocs serve", help = "Start the dev server for doc preview" } +lint = "pre-commit run --all-files" +complete = { call = "tasks.complete:main", help = "Create autocomplete files for bash and fish" } + +[tool.pdm.dev-dependencies] +test = [ + "pdm[pytest]", + "pdm[publish]", + "pytest-cov" +] +tox = [ + "tox", + "tox-pdm>=0.5" +] +doc = [ "sphinx" ] +dev = [ + "tox>=4.11.3", + "tox-pdm>=0.7.0" +] + +[tool.pytest.ini_options] +testpaths = [ "test/" ] + +[tool.black] +line-length = 120