Skip to content

Commit

Permalink
Merge pull request #108 from Guovin/dev
Browse files Browse the repository at this point in the history
Release: v1.1.3
  • Loading branch information
Guovin authored May 8, 2024
2 parents 0b4518c + 048d0ad commit 595143c
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 39 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
steps:
- name: Set branch name
id: vars
run: echo ::set-output name=branch::${{ github.repository_owner == 'Guovin' && 'gd' || 'master' }}
run: echo "branch=${{ github.repository_owner == 'Guovin' && 'gd' || 'master' }}" >> $GITHUB_ENV
- uses: actions/checkout@v3
with:
ref: ${{ steps.vars.outputs.branch }}
Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# 更新日志(Changelog)

## v1.1.3

### 2024/5/8

- 优化频道接口不对应问题(#99)(Optimize the mismatch problem of the channel interface (#99))
- 处理 tqdm 安全问题(Handle the security issue of tqdm)
- 修改即将被废弃的命令(Modify the commands that are about to be deprecated)

## v1.1.2

### 2024/5/7
Expand Down
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ selenium = "4.19.0"
selenium-stealth = "1.0.6"
aiohttp = ">=3.9.4"
bs4 = "0.0.2"
tqdm = "4.66.2"
tqdm = ">=4.66.3"
async-timeout = "4.0.3"

[requires]
Expand Down
8 changes: 4 additions & 4 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 3 additions & 22 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,12 @@
from selenium.webdriver.support import expected_conditions as EC
from selenium_stealth import stealth
import asyncio
from bs4 import BeautifulSoup, NavigableString
from bs4 import BeautifulSoup
from utils import (
getChannelItems,
updateChannelUrlsTxt,
updateFile,
getChannelUrl,
getChannelInfo,
getResultsFromSoup,
sortUrlsBySpeedAndResolution,
getTotalUrls,
filterUrlsByPatterns,
Expand Down Expand Up @@ -124,25 +123,7 @@ async def visitPage(self, channelItems):
)
soup = BeautifulSoup(source, "html.parser")
if soup:
results = []
for element in soup.descendants:
if isinstance(element, NavigableString):
url = getChannelUrl(element)
if url and not any(
item[0] == url for item in results
):
url_element = soup.find(
lambda tag: tag.get_text(strip=True)
== url
)
if url_element:
info_element = (
url_element.find_next_sibling()
)
date, resolution = getChannelInfo(
info_element
)
results.append((url, date, resolution))
results = getResultsFromSoup(soup, name)
for result in results:
url, date, resolution = result
if url and checkUrlByPatterns(url):
Expand Down
56 changes: 46 additions & 10 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from urllib.parse import urlparse
import requests
import re
from bs4 import NavigableString


def getChannelItems():
Expand Down Expand Up @@ -44,16 +45,12 @@ def getChannelItems():
# This is a url, add it to the list of urls for the current channel.
match = re.search(pattern, line)
if match is not None:
if match.group(1) not in channels[current_category]:
channels[current_category][match.group(1)] = [match.group(2)]
elif (
match.group(2)
and match.group(2)
not in channels[current_category][match.group(1)]
):
channels[current_category][match.group(1)].append(
match.group(2)
)
name = match.group(1).strip()
url = match.group(2).strip()
if name not in channels[current_category]:
channels[current_category][name] = [url]
elif url and url not in channels[current_category][name]:
channels[current_category][name].append(url)
return channels
finally:
f.close()
Expand Down Expand Up @@ -171,6 +168,45 @@ def getChannelInfo(element):
return date, resolution


def checkNameMatch(name, result_name):
pattern = r"[a-zA-Z]+[_\-+]|cctv"
if re.search(
pattern,
result_name,
re.IGNORECASE,
):
print(
"Name test match:",
name.lower(),
result_name.lower(),
name.lower() == result_name.lower(),
)
return name.lower() == result_name.lower()
else:
return True


def getResultsFromSoup(soup, name):
"""
Get the results from the soup
"""
results = []
for element in soup.descendants:
if isinstance(element, NavigableString):
url = getChannelUrl(element)
if url and not any(item[0] == url for item in results):
url_element = soup.find(lambda tag: tag.get_text(strip=True) == url)
if url_element:
name_element = url_element.find_previous_sibling()
if name_element:
channel_name = name_element.get_text(strip=True)
if checkNameMatch(name, channel_name):
info_element = url_element.find_next_sibling()
date, resolution = getChannelInfo(info_element)
results.append((url, date, resolution))
return results


async def getSpeed(url, urlTimeout=5):
"""
Get the speed of the url
Expand Down
2 changes: 1 addition & 1 deletion version.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"version": "1.1.2"
"version": "1.1.3"
}

0 comments on commit 595143c

Please sign in to comment.