Skip to content

Commit

Permalink
added m3u8 parser library
Browse files Browse the repository at this point in the history
  • Loading branch information
mx3L committed Mar 21, 2016
1 parent efeaa92 commit ced3ed0
Show file tree
Hide file tree
Showing 8 changed files with 1,202 additions and 0 deletions.
1 change: 1 addition & 0 deletions build/plugin/src/engine/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from Plugins.Extensions.archivCZSK.engine.tools.util import toString, toUnicode
from Plugins.Extensions.archivCZSK.gui.captcha import Captcha
from Plugins.Extensions.archivCZSK.resources.libraries import simplejson as json
from Plugins.Extensions.archivCZSK.resources.libraries import m3u8


GItem_lst = VideoAddonContentProvider.get_shared_itemlist()
Expand Down
1 change: 1 addition & 0 deletions build/plugin/src/resources/libraries/iso8601/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .iso8601 import *
209 changes: 209 additions & 0 deletions build/plugin/src/resources/libraries/iso8601/iso8601.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
"""ISO 8601 date time string parsing
Basic usage:
>>> import iso8601
>>> iso8601.parse_date("2007-01-25T12:00:00Z")
datetime.datetime(2007, 1, 25, 12, 0, tzinfo=<iso8601.iso8601.Utc ...>)
>>>
"""

from datetime import (
datetime,
timedelta,
tzinfo
)
from decimal import Decimal
import sys
import re

__all__ = ["parse_date", "ParseError", "UTC"]

if sys.version_info >= (3, 0, 0):
_basestring = str
else:
_basestring = basestring


# Adapted from http://delete.me.uk/2005/03/iso8601.html
ISO8601_REGEX = re.compile(
r"""
(?P<year>[0-9]{4})
(
(
(-(?P<monthdash>[0-9]{1,2}))
|
(?P<month>[0-9]{2})
(?!$) # Don't allow YYYYMM
)
(
(
(-(?P<daydash>[0-9]{1,2}))
|
(?P<day>[0-9]{2})
)
(
(
(?P<separator>[ T])
(?P<hour>[0-9]{2})
(:{0,1}(?P<minute>[0-9]{2})){0,1}
(
:{0,1}(?P<second>[0-9]{1,2})
([.,](?P<second_fraction>[0-9]+)){0,1}
){0,1}
(?P<timezone>
Z
|
(
(?P<tz_sign>[-+])
(?P<tz_hour>[0-9]{2})
:{0,1}
(?P<tz_minute>[0-9]{2}){0,1}
)
){0,1}
){0,1}
)
){0,1} # YYYY-MM
){0,1} # YYYY only
$
""",
re.VERBOSE
)

class ParseError(Exception):
"""Raised when there is a problem parsing a date string"""

# Yoinked from python docs
ZERO = timedelta(0)
class Utc(tzinfo):
"""UTC Timezone
"""
def utcoffset(self, dt):
return ZERO

def tzname(self, dt):
return "UTC"

def dst(self, dt):
return ZERO

def __repr__(self):
return "<iso8601.Utc>"

UTC = Utc()

class FixedOffset(tzinfo):
"""Fixed offset in hours and minutes from UTC
"""
def __init__(self, offset_hours, offset_minutes, name):
self.__offset_hours = offset_hours # Keep for later __getinitargs__
self.__offset_minutes = offset_minutes # Keep for later __getinitargs__
self.__offset = timedelta(hours=offset_hours, minutes=offset_minutes)
self.__name = name

def __eq__(self, other):
if isinstance(other, FixedOffset):
return (
(other.__offset == self.__offset)
and
(other.__name == self.__name)
)
if isinstance(other, tzinfo):
return other == self
return False

def __getinitargs__(self):
return (self.__offset_hours, self.__offset_minutes, self.__name)

def utcoffset(self, dt):
return self.__offset

def tzname(self, dt):
return self.__name

def dst(self, dt):
return ZERO

def __repr__(self):
return "<FixedOffset %r %r>" % (self.__name, self.__offset)

def to_int(d, key, default_to_zero=False, default=None, required=True):
"""Pull a value from the dict and convert to int
:param default_to_zero: If the value is None or empty, treat it as zero
:param default: If the value is missing in the dict use this default
"""
value = d.get(key) or default
if (value in ["", None]) and default_to_zero:
return 0
if value is None:
if required:
raise ParseError("Unable to read %s from %s" % (key, d))
else:
return int(value)

def parse_timezone(matches, default_timezone=UTC):
"""Parses ISO 8601 time zone specs into tzinfo offsets
"""

if matches["timezone"] == "Z":
return UTC
# This isn't strictly correct, but it's common to encounter dates without
# timezones so I'll assume the default (which defaults to UTC).
# Addresses issue 4.
if matches["timezone"] is None:
return default_timezone
sign = matches["tz_sign"]
hours = to_int(matches, "tz_hour")
minutes = to_int(matches, "tz_minute", default_to_zero=True)
description = "%s%02d:%02d" % (sign, hours, minutes)
if sign == "-":
hours = -hours
minutes = -minutes
return FixedOffset(hours, minutes, description)

def parse_date(datestring, default_timezone=UTC):
"""Parses ISO 8601 dates into datetime objects
The timezone is parsed from the date string. However it is quite common to
have dates without a timezone (not strictly correct). In this case the
default timezone specified in default_timezone is used. This is UTC by
default.
:param datestring: The date to parse as a string
:param default_timezone: A datetime tzinfo instance to use when no timezone
is specified in the datestring. If this is set to
None then a naive datetime object is returned.
:returns: A datetime.datetime instance
:raises: ParseError when there is a problem parsing the date or
constructing the datetime instance.
"""
if not isinstance(datestring, _basestring):
raise ParseError("Expecting a string %r" % datestring)
m = ISO8601_REGEX.match(datestring)
if not m:
raise ParseError("Unable to parse date string %r" % datestring)
groups = m.groupdict()

tz = parse_timezone(groups, default_timezone=default_timezone)

groups["second_fraction"] = int(Decimal("0.%s" % (groups["second_fraction"] or 0)) * Decimal("1000000.0"))

try:
return datetime(
year=to_int(groups, "year"),
month=to_int(groups, "month", default=to_int(groups, "monthdash", required=False, default=1)),
day=to_int(groups, "day", default=to_int(groups, "daydash", required=False, default=1)),
hour=to_int(groups, "hour", default_to_zero=True),
minute=to_int(groups, "minute", default_to_zero=True),
second=to_int(groups, "second", default_to_zero=True),
microsecond=groups["second_fraction"],
tzinfo=tz,
)
except Exception as e:
raise ParseError(e)
98 changes: 98 additions & 0 deletions build/plugin/src/resources/libraries/iso8601/test_iso8601.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# coding=UTF-8
from __future__ import absolute_import

import copy
import datetime
import pickle

import pytest

from iso8601 import iso8601

def test_iso8601_regex():
assert iso8601.ISO8601_REGEX.match("2006-10-11T00:14:33Z")

def test_parse_no_timezone_different_default():
tz = iso8601.FixedOffset(2, 0, "test offset")
d = iso8601.parse_date("2007-01-01T08:00:00", default_timezone=tz)
assert d == datetime.datetime(2007, 1, 1, 8, 0, 0, 0, tz)
assert d.tzinfo == tz

def test_parse_utc_different_default():
"""Z should mean 'UTC', not 'default'.
"""
tz = iso8601.FixedOffset(2, 0, "test offset")
d = iso8601.parse_date("2007-01-01T08:00:00Z", default_timezone=tz)
assert d == datetime.datetime(2007, 1, 1, 8, 0, 0, 0, iso8601.UTC)

@pytest.mark.parametrize("invalid_date, error_string", [
("2013-10-", "Unable to parse date string"),
("2013-", "Unable to parse date string"),
("", "Unable to parse date string"),
(None, "Expecting a string"),
("wibble", "Unable to parse date string"),
("23", "Unable to parse date string"),
("131015T142533Z", "Unable to parse date string"),
("131015", "Unable to parse date string"),
("20141", "Unable to parse date string"),
("201402", "Unable to parse date string"),
("2007-06-23X06:40:34.00Z", "Unable to parse date string"), # https://code.google.com/p/pyiso8601/issues/detail?id=14
("2007-06-23 06:40:34.00Zrubbish", "Unable to parse date string"), # https://code.google.com/p/pyiso8601/issues/detail?id=14
("20114-01-03T01:45:49", "Unable to parse date string"),
])
def test_parse_invalid_date(invalid_date, error_string):
assert isinstance(invalid_date, str) or invalid_date is None # Why? 'cos I've screwed up the parametrize before :)
with pytest.raises(iso8601.ParseError) as exc:
iso8601.parse_date(invalid_date)
assert exc.errisinstance(iso8601.ParseError)
assert str(exc.value).startswith(error_string)

@pytest.mark.parametrize("valid_date,expected_datetime,isoformat", [
("2007-06-23 06:40:34.00Z", datetime.datetime(2007, 6, 23, 6, 40, 34, 0, iso8601.UTC), "2007-06-23T06:40:34+00:00"), # Handle a separator other than T
("1997-07-16T19:20+01:00", datetime.datetime(1997, 7, 16, 19, 20, 0, 0, iso8601.FixedOffset(1, 0, "+01:00")), "1997-07-16T19:20:00+01:00"), # Parse with no seconds
("2007-01-01T08:00:00", datetime.datetime(2007, 1, 1, 8, 0, 0, 0, iso8601.UTC), "2007-01-01T08:00:00+00:00"), # Handle timezone-less dates. Assumes UTC. http://code.google.com/p/pyiso8601/issues/detail?id=4
("2006-10-20T15:34:56.123+02:30", datetime.datetime(2006, 10, 20, 15, 34, 56, 123000, iso8601.FixedOffset(2, 30, "+02:30")), None),
("2006-10-20T15:34:56Z", datetime.datetime(2006, 10, 20, 15, 34, 56, 0, iso8601.UTC), "2006-10-20T15:34:56+00:00"),
("2007-5-7T11:43:55.328Z", datetime.datetime(2007, 5, 7, 11, 43, 55, 328000, iso8601.UTC), "2007-05-07T11:43:55.328000+00:00"), # http://code.google.com/p/pyiso8601/issues/detail?id=6
("2006-10-20T15:34:56.123Z", datetime.datetime(2006, 10, 20, 15, 34, 56, 123000, iso8601.UTC), "2006-10-20T15:34:56.123000+00:00"),
("2013-10-15T18:30Z", datetime.datetime(2013, 10, 15, 18, 30, 0, 0, iso8601.UTC), "2013-10-15T18:30:00+00:00"),
("2013-10-15T22:30+04", datetime.datetime(2013, 10, 15, 22, 30, 0, 0, iso8601.FixedOffset(4, 0, "+04:00")), "2013-10-15T22:30:00+04:00"), # <time>±hh:mm
("2013-10-15T1130-0700", datetime.datetime(2013, 10, 15, 11, 30, 0, 0, iso8601.FixedOffset(-7, 0, "-07:00")), "2013-10-15T11:30:00-07:00"), # <time>±hhmm
("2013-10-15T1130+0700", datetime.datetime(2013, 10, 15, 11, 30, 0, 0, iso8601.FixedOffset(+7, 0, "+07:00")), "2013-10-15T11:30:00+07:00"), # <time>±hhmm
("2013-10-15T1130+07", datetime.datetime(2013, 10, 15, 11, 30, 0, 0, iso8601.FixedOffset(+7, 0, "+07:00")), "2013-10-15T11:30:00+07:00"), # <time>±hh
("2013-10-15T1130-07", datetime.datetime(2013, 10, 15, 11, 30, 0, 0, iso8601.FixedOffset(-7, 0, "-07:00")), "2013-10-15T11:30:00-07:00"), # <time>±hh
("2013-10-15T15:00-03:30", datetime.datetime(2013, 10, 15, 15, 0, 0, 0, iso8601.FixedOffset(-3, -30, "-03:30")), "2013-10-15T15:00:00-03:30"),
("2013-10-15T183123Z", datetime.datetime(2013, 10, 15, 18, 31, 23, 0, iso8601.UTC), "2013-10-15T18:31:23+00:00"), # hhmmss
("2013-10-15T1831Z", datetime.datetime(2013, 10, 15, 18, 31, 0, 0, iso8601.UTC), "2013-10-15T18:31:00+00:00"), # hhmm
("2013-10-15T18Z", datetime.datetime(2013, 10, 15, 18, 0, 0, 0, iso8601.UTC), "2013-10-15T18:00:00+00:00"), # hh
("2013-10-15", datetime.datetime(2013, 10, 15, 0, 0, 0, 0, iso8601.UTC), "2013-10-15T00:00:00+00:00"), # YYYY-MM-DD
("20131015T18:30Z", datetime.datetime(2013, 10, 15, 18, 30, 0, 0, iso8601.UTC), "2013-10-15T18:30:00+00:00"), # YYYYMMDD
("2012-12-19T23:21:28.512400+00:00", datetime.datetime(2012, 12, 19, 23, 21, 28, 512400, iso8601.FixedOffset(0, 0, "+00:00")), "2012-12-19T23:21:28.512400+00:00"), # https://code.google.com/p/pyiso8601/issues/detail?id=21
("2006-10-20T15:34:56.123+0230", datetime.datetime(2006, 10, 20, 15, 34, 56, 123000, iso8601.FixedOffset(2, 30, "+02:30")), "2006-10-20T15:34:56.123000+02:30"), # https://code.google.com/p/pyiso8601/issues/detail?id=18
("19950204", datetime.datetime(1995, 2, 4, tzinfo=iso8601.UTC), "1995-02-04T00:00:00+00:00"), # https://code.google.com/p/pyiso8601/issues/detail?id=1
("2010-07-20 15:25:52.520701+00:00", datetime.datetime(2010, 7, 20, 15, 25, 52, 520701, iso8601.FixedOffset(0, 0, "+00:00")), "2010-07-20T15:25:52.520701+00:00"), # https://code.google.com/p/pyiso8601/issues/detail?id=17
("2010-06-12", datetime.datetime(2010, 6, 12, tzinfo=iso8601.UTC), "2010-06-12T00:00:00+00:00"), # https://code.google.com/p/pyiso8601/issues/detail?id=16
("1985-04-12T23:20:50.52-05:30", datetime.datetime(1985, 4, 12, 23, 20, 50, 520000, iso8601.FixedOffset(-5, -30, "-05:30")), "1985-04-12T23:20:50.520000-05:30"), # https://bitbucket.org/micktwomey/pyiso8601/issue/8/015-parses-negative-timezones-incorrectly
("1997-08-29T06:14:00.000123Z", datetime.datetime(1997, 8, 29, 6, 14, 0, 123, iso8601.UTC), "1997-08-29T06:14:00.000123+00:00"), # https://bitbucket.org/micktwomey/pyiso8601/issue/9/regression-parsing-microseconds
("2014-02", datetime.datetime(2014, 2, 1, 0, 0, 0, 0, iso8601.UTC), "2014-02-01T00:00:00+00:00"), # https://bitbucket.org/micktwomey/pyiso8601/issue/14/regression-yyyy-mm-no-longer-parses
("2014", datetime.datetime(2014, 1, 1, 0, 0, 0, 0, iso8601.UTC), "2014-01-01T00:00:00+00:00"), # YYYY
("1997-08-29T06:14:00,000123Z", datetime.datetime(1997, 8, 29, 6, 14, 0, 123, iso8601.UTC), "1997-08-29T06:14:00.000123+00:00"), # Use , as decimal separator
])
def test_parse_valid_date(valid_date, expected_datetime, isoformat):
parsed = iso8601.parse_date(valid_date)
assert parsed.year == expected_datetime.year
assert parsed.month == expected_datetime.month
assert parsed.day == expected_datetime.day
assert parsed.hour == expected_datetime.hour
assert parsed.minute == expected_datetime.minute
assert parsed.second == expected_datetime.second
assert parsed.microsecond == expected_datetime.microsecond
assert parsed.tzinfo == expected_datetime.tzinfo
assert parsed == expected_datetime
assert parsed.isoformat() == expected_datetime.isoformat()
copy.deepcopy(parsed) # ensure it's deep copy-able
pickle.dumps(parsed) # ensure it pickles
if isoformat:
assert parsed.isoformat() == isoformat
assert iso8601.parse_date(parsed.isoformat()) == parsed # Test round trip
75 changes: 75 additions & 0 deletions build/plugin/src/resources/libraries/m3u8/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# coding: utf-8
# Copyright 2014 Globo.com Player authors. All rights reserved.
# Use of this source code is governed by a MIT License
# license that can be found in the LICENSE file.

import sys
PYTHON_MAJOR_VERSION = sys.version_info

import os
import posixpath

try:
import urlparse as url_parser
import urllib2
urlopen = urllib2.urlopen
except ImportError:
import urllib.parse as url_parser
from urllib.request import urlopen as url_opener
urlopen = url_opener


from m3u8.model import M3U8, Playlist, IFramePlaylist, Media, Segment
from m3u8.parser import parse, is_url

__all__ = ('M3U8', 'Playlist', 'IFramePlaylist', 'Media',
'Segment', 'loads', 'load', 'parse')

def loads(content):
'''
Given a string with a m3u8 content, returns a M3U8 object.
Raises ValueError if invalid content
'''
return M3U8(content)

def load(uri):
'''
Retrieves the content from a given URI and returns a M3U8 object.
Raises ValueError if invalid content or IOError if request fails.
'''
if is_url(uri):
return _load_from_uri(uri)
else:
return _load_from_file(uri)

# Support for python3 inspired by https://github.com/szemtiv/m3u8/
def _load_from_uri(uri):
resource = urlopen(uri)
base_uri = _parsed_url(_url_for(uri))
if PYTHON_MAJOR_VERSION < (3,):
content = _read_python2x(resource)
else:
content = _read_python3x(resource)
return M3U8(content, base_uri=base_uri)

def _url_for(uri):
return urlopen(uri).geturl()

def _parsed_url(url):
parsed_url = url_parser.urlparse(url)
prefix = parsed_url.scheme + '://' + parsed_url.netloc
base_path = posixpath.normpath(parsed_url.path + '/..')
return url_parser.urljoin(prefix, base_path)

def _read_python2x(resource):
return resource.read().strip()

def _read_python3x(resource):
return resource.read().decode(resource.headers.get_content_charset(failobj="utf-8"))

def _load_from_file(uri):
with open(uri) as fileobj:
raw_content = fileobj.read().strip()
base_uri = os.path.dirname(uri)
return M3U8(raw_content, base_uri=base_uri)

Loading

0 comments on commit ced3ed0

Please sign in to comment.