Skip to content

Commit

Permalink
replace DocsGL with Khronos refpages
Browse files Browse the repository at this point in the history
  • Loading branch information
paoloose committed May 26, 2024
1 parent 5b73a9f commit 13e26fd
Showing 1 changed file with 77 additions and 63 deletions.
140 changes: 77 additions & 63 deletions glad/documentation.py
Original file line number Diff line number Diff line change
@@ -1,60 +1,69 @@
import re
import glad.util
from lxml import etree
from glad.parse import DocumentationSet, SpecificationDocs, CommandDocs, xml_parse
from glad.util import prefix, suffix, raw_text

class OpenGLRefpages(SpecificationDocs):
DOCS_NAME = 'opengl_refpages'

class DocsGL(SpecificationDocs):
DOCS_NAME = 'docs.gl'

URL = 'https://github.com/BSVino/docs.gl/archive/refs/heads/mainline.zip'
URL = 'https://github.com/KhronosGroup/OpenGL-Refpages/archive/refs/heads/main.zip'
SPEC = 'gl'

def select(self, feature_set):
current_major = list(feature_set.info)[0].version.major
commands = dict()

# As the time of writing DocsGL offers documentation from gl4 to gl2.
# If say we are targeting gl3, we will try to get the command documentation from gl3,
# otherwise we'll try from gl2 and so on. If more than one version is available only the
# most recent one will be used.
for version in range(current_major, 1, -1):
version_dir = self.docs_dir / f'{self.SPEC}{version}'
# At the time of writing Khronos hosts documentations for gl4 and gl2.1.
available_versions = ['gl2.1']
if current_major >= 4:
available_versions.append('gl4')

for version_dir in available_versions:
version_dir = self.docs_dir / f'{version_dir}'
if not version_dir.exists():
break

for html_file in version_dir.glob('*.xhtml'):
for command, docs in DocsGL.docs_from_html_file(html_file).items():
for html_file in version_dir.glob('*.xml'):
for command, docs in OpenGLRefpages.docs_from_html_file(html_file).items():
commands.setdefault(command, docs)

return DocumentationSet(commands=commands)

@classmethod
def docs_from_html_file(cls, path):
commands_parsed = dict()
version = path.parent.name
tree = xml_parse(path, recover=True)

tree = xml_parse(path)
sections = tree.findall('.//*[@class="refsect1"]')

# Some pages are just a redirect script
if tree.tag == 'script':
# gl4 files contain a namespace that polutes the tags, so we clean it up.
for elem in tree.getiterator():
try:
redirect = tree.text.split('window.location.replace("')[1].split('")')[0]
path = path.parent / f'{redirect}.xhtml'
tree = xml_parse(path)
if elem.tag.startswith('{'):
elem.tag = etree.QName(elem).localname
except:
return dict()
pass
etree.cleanup_namespaces(tree)

sections = tree.findall('.//refsect1')

# Brief parsing
# Command brief description appears in the first 'refnamediv' block
brief_block = cls.xml_text(tree.find('.//div[@class="refnamediv"]/p'))
brief = f'[{path.stem}](https://docs.gl/{path.parent.name}/{path.stem}) — ' \
f'{suffix(".", brief_block.split("—")[1])}'
brief_block = tree.find('.//refnamediv//refpurpose')

if brief_block is None:
return dict()

if version == 'gl2.1':
url = f'https://registry.khronos.org/OpenGL-Refpages/{version}/xhtml/{path.stem}.xml'
else:
url = f'https://registry.khronos.org/OpenGL-Refpages/{version}/html/{path.stem}.xhtml'
brief = f'[{path.stem}]({url}) — {suffix(".", cls.xml_text(brief_block))}'

# Description parsing
description = []
description_blocks = next(
(s for s in sections if raw_text(s.find('h2')) == 'Description'),
(s for s in sections if raw_text(s.find('title')) == 'Description'),
None,
)
if description_blocks is not None:
Expand All @@ -68,7 +77,7 @@ def docs_from_html_file(cls, path):

# Notes parsing
notes = []
notes_blocks = next((s for s in sections if raw_text(s.find('h2')) == 'Notes'), None)
notes_blocks = next((s for s in sections if raw_text(s.find('title')) == 'Notes'), None)
if notes_blocks is not None:
blocks = notes_blocks.findall('./*')
notes = list(
Expand All @@ -79,43 +88,49 @@ def docs_from_html_file(cls, path):
)

# Parameters parsing
# DocsGL puts all the function definitions inside .funcsynopsis.funcdef blocks.
# Khronos specs puts all the function definitions inside funcsynopsis/funcdef blocks.
#
# However, instead of describing each function on a separate file, DocsGL combines
# multiple related function definitions, whose parameters may be different, into a single
# file. This means that we have to find the correct block of parameters for each definition.
# However, instead of describing each function on a separate file, they group multiple
# related function definitions, whose parameters may be different, into a single file.
# This means that we have to find the correct block of parameters for each definition.
funcdefs = [
d for d in tree.findall('.//*[@class="funcsynopsis"]/*')
if d.find('.//*[@class="funcdef"]') is not None
d for d in tree.findall('.//funcsynopsis/*')
if d.find('.//funcdef') is not None
]

for func_def in funcdefs:
func_name = func_def.find('.//*[@class="fsfunc"]').text
func_params = [raw_text(s) for s in func_def.findall('.//var[@class="pdparam"]')]
func_name = func_def.find('.//function').text
func_params = [raw_text(s) for s in func_def.findall('.//parameter')]

# Params are defined in a separate section, often called 'Parameters for <func_name>'
# Params are defined in a separate section, called 'Parameters for <func_name>'
# or just 'Parameters'.
params_block = next(
(s for s in sections if raw_text(s.find('h2')) == f'Parameters for {func_name}'),
(s for s in sections if raw_text(s.find('title')) == f'Parameters for {func_name}'),
None,
)
if not params_block is not None:
for p in list(s for s in sections if raw_text(s.find('h2')) == 'Parameters'):
block_params = [raw_text(n) for n in p.findall('.//dt//code')]
if all(p in block_params for p in func_params):
if params_block is None:
for p in list(s for s in sections if raw_text(s.find('title')) == 'Parameters'):
block_params = [raw_text(n) for n in p.findall('.//term//parameter')]
if all(func_param in block_params for func_param in func_params):
params_block = p
break

# At this point we interpret params_block=None as a void parameter list.

params = []
if params_block is not None:
for names, desc in zip(
params_block.findall('.//dl//dt'),
params_block.findall('.//dl//dd'),
):
for name in names.findall('.//code'):
param_name = raw_text(name)
# A description can apply for more than one param (term), so we stack them until
# we find a listitem, which is a description of a param.
terms_stack = []
for param_or_desc in params_block.findall('.//varlistentry/*') if params_block is not None else []:
if param_or_desc.tag == 'term':
terms_stack.append(param_or_desc)
continue
if param_or_desc.tag == 'listitem':
for term in terms_stack:
param_name = raw_text(term.find('.//parameter'))
if param_name in func_params:
params.append(CommandDocs.Param(param_name, cls.xml_text(desc)))
# We interpret params_block=None as a void parameter list.
params.append(CommandDocs.Param(param_name, cls.xml_text(param_or_desc)))
terms_stack.clear()

commands_parsed[func_name] = CommandDocs(
brief, params, description, notes, None, None,
Expand Down Expand Up @@ -146,31 +161,30 @@ def format(e, is_tail=False):
return f'\n{CommandDocs.BREAK}-{e.text}'
return re.sub(r'\n+', '', e.text)

@staticmethod
def xml_text(e):
@classmethod
def xml_text(cls, e):
def paren(expr):
if re.match(r'^[a-zA-Z0-9_]+$', expr):
return expr
return f'({expr})'

def mfenced(e):
if e.attrib['close']:
return f'{e.attrib["open"]}{", ".join(DocsGL.xml_text(c) for c in e)}{e.attrib["close"]}'
return f'{e.attrib["open"]}{" ".join(DocsGL.xml_text(c) for c in e)}'
return f'{e.attrib["open"]}{", ".join(cls.xml_text(c) for c in e)}{e.attrib["close"]}'
return f'{e.attrib["open"]}{" ".join(cls.xml_text(c) for c in e)}'

text = ''.join(glad.util.itertext(
e,
convert={
'table': lambda _: f'(table omitted)',
'pre': lambda _: f'(code omitted)',
'mfrac': lambda e, : f'{paren(DocsGL.xml_text(e[0]))}/{paren(DocsGL.xml_text(e[1]))}',
'msup': lambda e: f'{paren(DocsGL.xml_text(e[0]))}^{paren(DocsGL.xml_text(e[1]))}',
'msub': lambda e: f'{paren(DocsGL.xml_text(e[0]))}_{paren(DocsGL.xml_text(e[1]))}',
'mtd': lambda e: f'{DocsGL.xml_text(e[0])}; ',
'mfenced': mfenced,
'informaltable': lambda _: f'(table omitted)',
'programlisting': lambda _: f'(code omitted)',
'mml:mfrac': lambda e, : f'{paren(cls.xml_text(e[0]))}/{paren(cls.xml_text(e[1]))}', #
'mml:msup': lambda e: f'{paren(cls.xml_text(e[0]))}^{paren(cls.xml_text(e[1]))}', #
'mml:msub': lambda e: f'{paren(cls.xml_text(e[0]))}_{paren(cls.xml_text(e[1]))}', #
'mml:mtd': lambda e: f'{cls.xml_text(e[0])}; ', #
'mml:mfenced': mfenced, #
},
format=DocsGL.format,
format=cls.format,
))
# \u00a0, \u2062, \u2062,
# are invisible characters used by docs.gl to separate words.
return re.sub(r'\n?[ \u00a0\u2062\u2061]+', ' ', text.strip())
return re.sub(r'\n? +', ' ', text.strip())

0 comments on commit 13e26fd

Please sign in to comment.