Get news from websites #1080
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Get news from websites | |
on: | |
workflow_dispatch: # https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#workflow_dispatch | |
schedule: # https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#schedule | |
- cron: '26 15 * * *' | |
jobs: | |
click-here-to-see-data: | |
runs-on: ubuntu-20.04 | |
steps: | |
- uses: actions/checkout@v3 | |
# - uses: coursier/cache-action@v6 | |
- name: Set up JDK | |
uses: actions/setup-java@v3 | |
with: | |
java-version: '11' | |
distribution: 'temurin' | |
cache: 'sbt' | |
- name: Set yesterday date year month as env variable for partitioning | |
run: echo "PATH_PARTITION=$(date -d "yesterday" +'year=%Y/month=%-m')" >> $GITHUB_ENV | |
- name: Set yesterday date year month day as env variable for partitioning | |
run: echo "PATH_DAY_PARTITION=$(date -d "yesterday" +'year=%Y/month=%-m/day=%-d')" >> $GITHUB_ENV | |
- name: Set 2 day ago date year month day as env variable for partitioning | |
run: echo "PATH_DAY_2_DAY_AGO_PARTITION=$(date -d "2 days ago" +'year=%Y/month=%-m/day=%-d')" >> $GITHUB_ENV | |
- name: Set 3 day ago date year month day as env variable for partitioning | |
run: echo "PATH_DAY_3_DAY_AGO_PARTITION=$(date -d "3 days ago" +'year=%Y/month=%-m/day=%-d')" >> $GITHUB_ENV | |
- name: Set yesterday date as env variable for partitioning | |
run: echo "PATH_YEAR_PARTITION=$(date -d "yesterday" +'year=%Y')" >> $GITHUB_ENV | |
- name: Set last month as env variable for partitioning | |
run: echo "PATH_LAST_MONTH_PARTITION=$(date -d "last month" +'year=%Y/month=%-m')" >> $GITHUB_ENV | |
- name: List France 2 news urls containing global warming (see end) | |
run: sbt "runMain com.github.polomarcus.main.TelevisionNewsAnalyser 1 france2" | |
- name: Organize and refresh data for France 2 | |
run: | | |
ls -R output-france2-tv-news-json/ | |
- name: List data for France 2 | |
run: ls -R data-news-json/media=France\ 2/ | |
- name: List TF1 news urls containing global warming (see end) | |
run: sbt "runMain com.github.polomarcus.main.TelevisionNewsAnalyser 2 tf1" | |
- name: Organize and refresh new data for TF1 | |
run: | | |
ls -R output-tf1-tv-news-json/ | |
- name: List new data for TF1 | |
run: | | |
ls -R data-news-json/media=TF1/ | |
- name: Create website data and save data with git | |
uses: actions/checkout@v3 | |
with: | |
clean: false | |
- run: | | |
mkdir -p data-news-json/media=France\ 2/${{ env.PATH_YEAR_PARTITION }} | |
mkdir -p data-news-json/media=France\ 2/${{ env.PATH_PARTITION }} | |
rm -rf data-news-json/media=France\ 2/${{ env.PATH_DAY_PARTITION }} || true | |
rm -rf data-news-json/media=France\ 2/${{ env.PATH_DAY_2_DAY_AGO_PARTITION }} || true | |
rm -rf data-news-json/media=France\ 2/${{ env.PATH_DAY_3_DAY_AGO_PARTITION }} || true | |
mkdir -p output-france2-tv-news-json/media=France\ 2/${{ env.PATH_LAST_MONTH_PARTITION }} | |
mkdir -p output-france2-tv-news-json/media=France\ 2/${{ env.PATH_PARTITION }} | |
mv -vn output-france2-tv-news-json/media=France\ 2/${{ env.PATH_PARTITION }}/** data-news-json/media=France\ 2/${{ env.PATH_PARTITION }} || true | |
mv -vn output-france2-tv-news-json/media=France\ 2/${{ env.PATH_LAST_MONTH_PARTITION }}/** data-news-json/media=France\ 2/${{ env.PATH_LAST_MONTH_PARTITION }} || true | |
mkdir -p data-news-json/media=TF1/${{ env.PATH_YEAR_PARTITION }} | |
mkdir -p data-news-json/media=TF1/${{ env.PATH_PARTITION }} | |
rm -rf data-news-json/media=TF1/${{ env.PATH_DAY_PARTITION }} || true | |
rm -rf data-news-json/media=TF1/${{ env.PATH_DAY_2_DAY_AGO_PARTITION }} || true | |
rm -rf data-news-json/media=TF1/${{ env.PATH_DAY_3_DAY_AGO_PARTITION }} || true | |
mkdir -p output-tf1-tv-news-json/media=TF1/${{ env.PATH_PARTITION }} | |
mv -vn output-tf1-tv-news-json/media=TF1/${{ env.PATH_PARTITION }}/** data-news-json/media=TF1/${{ env.PATH_PARTITION }} || true | |
mv -vn output-tf1-tv-news-json/media=TF1/${{ env.PATH_LAST_MONTH_PARTITION }}/** data-news-json/media=TF1/${{ env.PATH_LAST_MONTH_PARTITION }} || true | |
sbt "runMain com.github.polomarcus.main.UpdateNews" | |
git config user.name polomarcus-github-actions | |
git config user.email [email protected] | |
git status | |
git add . | |
git commit -m "ci(data): saved date ${{ env.PATH_DAY_PARTITION }} and ${{ env.PATH_DAY_2_DAY_AGO_PARTITION }} and ${{ env.PATH_DAY_3_DAY_AGO_PARTITION }}" | |
git push origin main | |
data-quality: | |
runs-on: ubuntu-20.04 | |
needs: click-here-to-see-data | |
steps: | |
- uses: actions/checkout@v3 | |
# - uses: coursier/cache-action@v6 | |
- name: Set up JDK 11 | |
uses: actions/setup-java@v3 | |
with: | |
java-version: '11' | |
distribution: 'temurin' | |
cache: 'sbt' | |
- name: Check for duplicates to delete inside data-news-json folder | |
run: sbt "runMain com.github.polomarcus.main.DataQuality" |