-
Notifications
You must be signed in to change notification settings - Fork 11
/
domtemplate.php
641 lines (591 loc) · 27.1 KB
/
domtemplate.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
<?php declare( strict_types=1 );
// DOM Templating classes v20 © copyright (cc-by) Kroc Camen 2012-2021
// you may do whatever you want with this code as long as you give credit
// documentation at <camendesign.com/dom_templating>
/* Basic API:
new DOMTemplate (source, [namespaces])
(string)
to output the HTML / XML, cast the DOMTemplate object to a string,
e.g. `echo $template;`, or `$output = (string) $template;`
query (query)
make an XPath query
set (queries, [asHTML])
change HTML by specifying an array of ('XPath' => 'value')
setValue (query, value, [asHTML])
change a single HTML value with an XPath query
addClass (query, new_class)
add a class to an HTML element
append (query, content)
append content to the end of the *inside* an element
remove (query)
remove one or more HTML elements, attributes or classes
repeat (query)
return one (or more) elements as sub-templates:
next ()
append the sub-template to the list and reset its content
*/
namespace kroc;
// class DOMTemplateNode
//==============================================================================
// these methods are shared between the base `DOMTemplate` and the repeater
// `DOMTemplateRepeater`. for a good description of 'abstract', see
// <php.net/manual/en/language.oop5.abstract.php#95404>
//
abstract class DOMTemplateNode {
protected $DOMNode; // reference to the `DOMNode` being operated upon
private $DOMXPath; // an internal XPath object,
// so you don't have to manage one externally
protected $namespaces; // optional XML namespaces
// create_fragment : given plain-text, turn it into a small DOM tree
//--------------------------------------------------------------------------
private function create_fragment (string $text) : \DOMDocumentFragment {
$frag = $this->DOMNode->ownerDocument->createDocumentFragment ();
$frag->appendXML (
// if the source document is HTML, filter it
$this->type == DOMTemplate::HTML
? self::toXML ($text) : $text
);
return $frag;
}
// html_entity_decode : convert HTML entities back to UTF-8
//--------------------------------------------------------------------------
public function html_entity_decode (string $html): string {
// a table of HTML entites to reverse:
// '&', '<', '>' are removed so we don’t turn user text into working HTML!
//
// TODO: moving DOMTemplate to a namespace will allow us to use
// a private variable for the namespace, rather than per-instance
// as generating this table is slow
static $htmlentities = null;
// construct the HTML entities table:
// this function was added in PHP 5.3.4, but the extended HTML5
// entities list was not added until later (don't know which version)
//
// the full list of HTML5 entities, as per the spec, is listed here:
// <https://html.spec.whatwg.org/multipage/named-characters.html>
//
if (is_null( $htmlentities )) $htmlentities = array_flip( array_diff(
get_html_translation_table(
HTML_ENTITIES, ENT_NOQUOTES | ENT_HTML401, 'UTF-8'
),
["&", "<", ">"]
));
// because everything is XML, HTML named entities like "©" will
// cause blank output. we need to convert these named entities back
// to real UTF-8 characters (which XML doesn’t mind)
return str_replace (
array_keys ($htmlentities),
array_values ($htmlentities),
$html
);
}
// toXML : convert string input to safe XML for importing into DOM
//--------------------------------------------------------------------------
// TODO: even though this isn't static, we seem to be able to call it
// statically!?
public function toXML (string $text): string {
// [1] because everything is XML, HTML named entities like "©"
// will cause blank output. we need to convert these named entities
// back to real UTF-8 characters (which XML doesn’t mind)
$text = $this->html_entity_decode ($text);
// [2] properly self-close some elements
$text = preg_replace (
'/<(area|base|basefont|br|col|embed|hr|img|input|keygen|link|'.
'menuitem|meta|param|source|track|wbr)\b([^>]*)(?<!\/)>(?!<\/\1>)'.
'/is', '<$1$2 />', $text
);
// [3] convert HTML-style attributes (`<a attr>`)
// to XML style attributes (`<a attr="attr">`)
while (preg_match (
'/(?>(<(?!!)[a-z-]+(?:\s|[a-z-]+="[^"]*")+))([a-z-]+)(?=[>\s])/is',
$text, $m, PREG_OFFSET_CAPTURE
)) $text = substr_replace (
$text, $m[1][0].$m[2][0].'="'.$m[2][0].'"', $m[0][1],
strlen ($m[0][0])
);
// [4] properly escape JavaScript with CDATA
$text = preg_replace (
'/(<script[^>]*>)(.*?)(<\/script>)/is',
"$1<![CDATA[$2]]>$3", $text
);
return $text;
}
// shorthand2xpath : convert our shorthand XPath syntax to full XPath
//--------------------------------------------------------------------------
// actions are performed on elements using xpath, but for brevity
// a shorthand is also recognised in the format of:
//
// #id find an element with a particular ID
// (instead of writing `.//*[@id="…"]`)
// .class find an element with a particular class
// element#id enforce a particular element type
// (ID or class supported)
// #id@attr select the named attribute of the found element
// element#id@attr a fuller example
//
// note also:
// - you can test the value of attributes (e.g. '#id@attr="test"')
// this selects the element, not the attribute
// - sub-trees in shorthand can be expressed with '/',
// e.g. '#id/li/a@attr'
// - an index-number can be provided after the element name,
// e.g. 'li[1]'
//
public static function shorthand2xpath (
// a string to convert
string $query,
// by default, the converted XPath uses a relative prefix
// -- "//" -- to work around a bug in XPath matching.
// see <php.net/manual/en/domxpath.query.php#99760> for details
bool $use_relative = true
) : string {
// return from cache where possible
// (this doubles the speed of repeat loops)
static $cache = [];
if (isset ($cache[$query])) return $cache[$query];
// match the allowed format of shorthand
return $cache[$query] = preg_match (
'/^(?!\/)([a-z0-9:-]+(\[\d+\])?)?(?:([\.#])([a-z0-9:_-]+))?'.
'(@[a-z-]+(="[^"]+")?)?(?:\/(.*))?$/i',
$query, $m)
? // apply the relative prefix
($use_relative ? './/' : '').
// the element name, if specified, otherwise "*"
(@$m[1] ? @$m[1].@$m[2] : '*').
(@$m[4] ? ($m[3] == '#' // is this an ID?
? "[@id=\"${m[4]}\"]" // - yes, match it
// - no, a class. note that class attributes can contain
// multiple classes, separated by spaces, so we have to test
// for the whole-word, and not a partial-match
: "[contains(concat(' ', @class, ' '),\" ${m[4]} \")]"
) : '').
(@$m[5] ? (@$m[6] //optional attribute of the parent element
? "[${m[5]}]" //- an attribute test
: "/${m[5]}" //- or select the attribute
) : '').
(@$m[7] ? '/'.self::shorthand2xpath ($m[7], false) : '')
: $query;
}
// new DOMTemplateNode : instantiation
//--------------------------------------------------------------------------
// you cannot instantiate this class yourself, _always_ work through
// DOMTemplate! why? because you cannot mix nodes from different documents!
// DOMTemplateNodes _must_ come from DOMDocument kept privately inside
// DOMTemplate
//
public function __construct (
\DOMNode $DOMNode,
array $namespaces=[]
) {
// use a DOMNode as a base point for all the XPath queries
// and whatnot (in DOMTemplate this will be the whole template,
// in DOMTemplateRepeater, it will be the chosen element)
$this->DOMNode = $DOMNode;
$this->DOMXPath = new \DOMXPath ($DOMNode->ownerDocument);
// the painful bit: if you have an XMLNS in your template
// then XPath won’t work unless you:
// a. register a default namespace, and
// b. prefix element names in your XPath queries with this namespace
if (!empty ($namespaces)) foreach ($namespaces as $NS=>$URI)
$this->DOMXPath->registerNamespace ($NS, $URI)
;
$this->namespaces = $namespaces;
}
// query : find node(s)
//--------------------------------------------------------------------------
// note that this method returns a PHP DOMNodeList, not a DOMTemplateNode!
// you cannot use `query` and then use other DOMTemplateNode methods off
// of the result. the reason for this is because you cannot yet extend
// DOMNodeList and therefore can't create APIs that affect all the nodes
// returned by an XPath query
//
// TODO: we could use the "decorator" pattern, given as a response
// to me here: <https://bugs.php.net/bug.php?id=48352>
//
public function query (
// an XPath/shorthand (see `shorthand2xpath`) to search for nodes
string $query
) : \DOMNodeList {
// convert each query to real XPath: (multiple targets
// are available by comma separating queries)
$xpath = implode ('|', array_map (
['self', 'shorthand2xpath'], explode (', ', $query)
));
// run the real XPath query and return the DOMNodeList result
If ($result = @$this->DOMXPath->query ($xpath, $this->DOMNode)) {
return $result;
} else {
throw new \Exception ("Invalid XPath Expression: $xpath");
}
}
// set : change multiple nodes in a simple fashion
//--------------------------------------------------------------------------
public function set (
// an array of `'xpath' => 'text'` to find and set
array $queries,
// text is by-default encoded for safety against HTML injection,
// if this parameter is true then the text is added as real HTML
bool $asHTML = false
) {
foreach ($queries as $query => $value)
$this->setValue ($query, $value, $asHTML)
;
return $this;
}
// setValue : set the text on the results of a single xpath query
//--------------------------------------------------------------------------
public function setValue (
// an XPath/shorthand (see `shorthand2xpath`) to search for nodes
string $query,
// what text to replace the node's contents with
string $value,
// if the text should be safety encoded or inserted as HTML
bool $asHTML = false
) {
foreach ($this->query ($query) as $node) switch (true) {
// if the selected node is a "class" attribute,
case $node->nodeType == XML_ATTRIBUTE_NODE
&& $node->nodeName == 'class':
// add the className to it
$this->setClassNode ($node, $value);
break;
// if the selected node is any other element attribute,
case $node->nodeType == XML_ATTRIBUTE_NODE:
// set its value
$node->nodeValue = htmlspecialchars ($value, ENT_QUOTES);
break;
// if the text is to be inserted as HTML
// that will be included into the output
case $asHTML:
// remove existing element's content
$node->nodeValue = '';
// if supplied text is blank end here;
// you can't append a blank!
if (!$value) break;
// attach the HTML/XML fragment to the node
$node->appendChild (
$this->create_fragment($value)
);
break;
// otherwise,
default:
// encode the text to display as-is
$node->nodeValue = htmlspecialchars ($value, ENT_NOQUOTES);
}
return $this;
}
// addClass : add a className to an element,
// appending it to existing classes if they exist
//--------------------------------------------------------------------------
public function addClass (
string $query,
string $new_class
) {
// first determine if there is a 'class' attribute already?
foreach ($this->query ($query) as $node) if (
$node->hasAttributes () && $class = $node->getAttribute ('class')
) {
// if the new class is not already in the list, add it in
$this->setClassNode (
$node->attributes->getNamedItem ('class'), $new_class
);
} else {
// no class attribute to begin with, add it
$node->setAttribute ('class', $new_class);
}
return $this;
}
// add a className to an existing class attribute
// (this is shared between `setValue` & `addClass`)
private function setClassNode (
\DOMNode $DOMNode,
string $class
) : void {
// check if the class node already has the className (don't add twice)
if (!in_array ($class, explode (' ', $DOMNode->nodeValue)))
@$DOMNode->nodeValue = $DOMNode->nodeValue." $class"
;
}
// append: append content to the end of the *inside* an element
//--------------------------------------------------------------------------
public function append(
string $query, // node query to select element(s)
string $content // content to append
) {
// conver the plain-text to an
$frag = $this->create_fragment( $content );
// execute query, loop returned node(s) and
// attach the HTML/XML fragment to the node
foreach ($this->query ($query) as $node) $node->appendChild ($frag);
// chain...
return $this;
}
// remove : remove all the elements / attributes that match an xpath query
//--------------------------------------------------------------------------
public function remove (
// XPath query to select node(s) to remove:
//
// can be either a single string, or an array in the format of
// `'xpath' => true|false`. if the value is true then the xpath will
// be run and the found elements deleted. if the value is false then
// the xpath is skipped. why on earth would you want to provide an
// xpath, but not run it? because you can compact your code by
// providing the same array every time, but precompute the logic
//
// additionally, an array item that targets the class node of an HTML
// element (e.g. 'a@class') can, instead of using true / false for the
// value (as whether to remove the class attribute or not), provide a
// class name to remove from the class attribute, whilst retaining the
// other class names and the node; e.g.
//
// $DOMTemplate->remove ('a@class' => 'undesired');
//
$query // TODO: use union type or extra methods for string|array
) {
// if a string is provided, cast it into an array for assumption below
if (is_string ($query)) $query = [$query => true];
// loop the array, test the logic, and select the node(s)...
foreach ($query as $xpath => $logic) if ($logic) foreach (
$this->query ($xpath) as $node
) if (
// is this an HTML element attribute?
$node->nodeType == XML_ATTRIBUTE_NODE
) {
// is this an HTML class attribute, and has a className
// been given to selectively remove?
if ($node->nodeName == 'class' && is_string ($logic)) {
// reconstruct the class attribute value,
// sans the chosen className
$node->nodeValue = implode (' ',
array_diff (explode (' ', $node->nodeValue), [$logic])
);
// if there are classNames remaining, skip
// removing the whole class attribute
if ($node->nodeValue) continue;
}
// remove the whole attribute:
$node->parentNode->removeAttributeNode ($node);
} else {
// remove an element node, rather than an attribute node
$node->parentNode->removeChild ($node);
} return $this;
}
// output the source code (cast the object to a string)
//--------------------------------------------------------------------------
public function __toString (): string {
// get the document's code, we'll process it
// differently depending on desired output format
$source = $this->DOMNode->ownerDocument->saveXML (
// if you’re calling this function from the template-root
// we don’t specify a node, otherwise the DOCTYPE / XML
// prolog won’t be included
get_class ($this) == 'DOMTemplate' ? NULL : $this->DOMNode,
// expand all self-closed tags if for HTML
$this->type == 0 ? LIBXML_NOEMPTYTAG : 0
);
// XML is already used for the internal representation;
// if outputting XML no filtering is needed
//
// note that `$this->XML` and `$this::XML` don't work consistently
// between PHP versions and `self::XML` isn't working either,
// possibly due to this being either an abstract class definition)
if ($this->type == 1) return $source;
// fix and clean DOM's XML into HTML:
//----------------------------------------------------------------------
// self-close void HTML elements
// <https://html.spec.whatwg.org/#void-elements>
$source = preg_replace (
'/<(area|base|basefont|br|col|embed|hr|img|input|keygen|link|'.
'menuitem|meta|param|source|track|wbr)\b([^>]*)(?<!\/)><\/\1>/is',
'<$1$2 />', $source
);
// convert XML-style attributes (`<a attr="attr">`) to HTML-style
// attributes (`<a attr>`), this needs to be repeated until none are
// left as we must anchor each to the opening bracket of the element,
// otherwise content text might be hit too
while (preg_match (
'/(<(?!!)[^>]+\s)([a-z-]+)=([\'"]?)\2\3/im',
$source, $m, PREG_OFFSET_CAPTURE
)) $source = substr_replace (
$source, $m[1][0].$m[2][0], $m[0][1], strlen ($m[0][0])
);
// strip out CDATA sections
$source = preg_replace ('/<!\[CDATA\[(.*?)\]\]>/s', '$1', $source);
return $source;
}
// repeat : iterate a node
//--------------------------------------------------------------------------
// this will return a DOMTemplateRepeaterArray class that allows you to
// modify the contents the same as with the base template but also append
// the changed sub-template to the end of the list and reset its content
// to go again. this makes creating a list stunningly simple! e.g.
/*
$item = $DOMTemplate->repeat ('.list-item');
foreach ($data as $value) $item->setValue ('.', $value)->next ();
*/
public function repeat (
string $query
) : DOMTemplateRepeaterArray
{
// NOTE: the provided XPath query could return more than one element!
// `DOMTemplateRepeaterArray` therefore acts as a simple wrapper to
// propagate changes to all the matched nodes (`DOMTemplateRepeater`)
return new DOMTemplateRepeaterArray (
$this->query ($query), $this->namespaces
);
}
}
// class DOMTemplate : the overall template controller
//==============================================================================
class DOMTemplate extends DOMTemplateNode {
// internal reference to the PHP `DOMDocument` for the template's XML
private $DOMDocument;
// what type of data are we processing?
protected $type = self::HTML;
public const HTML = 0;
public const XML = 1;
// new DOMTemplate : instantiation
//--------------------------------------------------------------------------
public function __construct (
// a string of the HTML or XML to form the template
string $source,
// an array of XML namespaces if your document uses them,
// in the format of `'namespace' => 'namespace URI'`
array $namespaces=[]
) {
// detect the content type; HTML or XML,
// HTML will need filtering during input and output
// -- does this source have an XML prolog?
$this->type = substr_compare ($source, '<?xml', 0, 4, true) === 0
? self::XML : self::HTML
;
// load the template file to work with,
// it _must_ have only one root (wrapping) element; e.g. `<html>`
$this->DOMDocument = new \DOMDocument ();
if (!$this->DOMDocument->loadXML (
// if the source is HTML add an XML prolog
// to avoid mangling unicode characters, see
// <php.net/manual/en/domdocument.loadxml.php#94291>,
// also convert it to XML for PHP DOM use
$this->type == self::HTML
? "<?xml version=\"1.0\" encoding=\"utf-8\"?>".self::toXML ($source)
: $source,
// <https://www.php.net/manual/en/libxml.constants.php>
LIBXML_COMPACT | // libxml >= 2.6.21
LIBXML_NONET // do not connect to external resources
)) trigger_error (
"Source is invalid XML", E_USER_ERROR
);
// set the root node for all XPath searching
// (handled all internally by `DOMTemplateNode`)
parent::__construct ($this->DOMDocument->documentElement, $namespaces);
}
// output the document (cast the object to a string, i.e. `echo $template;`)
//--------------------------------------------------------------------------
public function __toString (): string {
// if the input was HTML, remove the XML prolog on output
return $this->type == self::HTML
? // we defer to DOMTemplateNode which returns the HTML for any node,
// the top-level template only needs to consider the prolog
preg_replace ('/^<\?xml[^<]*>\n/', '', parent::__toString ())
: parent::__toString ();
}
}
// class DOMTemplateRepeaterArray : allow repetition over multiple nodes
//==============================================================================
// this is just a wrapper to handle that `repeat` might be executed on more
// than one element simultaneously; for example, if you are producing a list
// that occurs more than once on a page (e.g. page number links in a forum)
//
class DOMTemplateRepeaterArray {
private $nodes;
public function __construct (
\DOMNodeList $DOMNodeList,
array $namespaces=[]
) {
// convert the XPath query result into extended `DOMTemplateNode`s
// (`DOMTemplateRepeater`) so that you can modify the HTML with
// the same usual DOMTemplate API
foreach ($DOMNodeList as $DOMNode)
$this->nodes[] = new DOMTemplateRepeater ($DOMNode, $namespaces)
;
}
public function next () {
// cannot use `foreach` here because you shouldn't
// modify the nodes whilst iterating them
for ($i=0; $i<count ($this->nodes); $i++) $this->nodes[$i]->next ();
return $this;
}
// refer to `DOMTemplateNode->set`
public function set (
string $queries,
bool $asHTML = false
) {
foreach ($this->nodes as $node) $node->set ($queries, $asHTML);
return $this;
}
// refer to `DOMTemplateNode->setValue`
public function setValue (
string $query,
string $value,
bool $asHTML = false
) {
foreach ($this->nodes as $node)
$node->setValue ($query, $value, $asHTML)
;
return $this;
}
// refer to `DOMTemplateNode->addClass`
public function addClass (
string $query,
string $new_class
) {
foreach ($this->nodes as $node) $node->addClass ($query, $new_class);
return $this;
}
// refer to `DOMTemplateNode->remove`
public function remove (
string $query
) {
foreach ($this->nodes as $node) $node->remove ($query);
return $this;
}
}
// class DOMTemplateRepeater : the business-end of `DOMTemplateNode->repeat`!
//==============================================================================
class DOMTemplateRepeater extends DOMTemplateNode {
private $refNode; // the templated node will be added after this node
private $template; // a copy of the original node to work from each time
protected $type;
public function __construct (
\DOMNode $DOMNode,
array $namespaces=[]
) {
// we insert the templated item after the reference node,
// which will always be the last item that was templated
$this->refNode = $DOMNode;
// take a copy of the original node that we will use
// as a starting point each time we iterate
$this->template = $DOMNode->cloneNode (true);
// initialise the template with the current, original node
parent::__construct ($DOMNode, $namespaces);
}
public function next (): this {
// when we insert the newly templated item,
// use it as the reference node for the next item and so on
$this->refNode =
($this->refNode->parentNode->lastChild === $this->DOMNode)
? $this->refNode->parentNode->appendChild ($this->DOMNode)
// if there's some kind of HTML after the reference node, we can
// use that to insert our item inbetween. this means that the list
// you are templating doesn't have to be wrapped in an element!
: $this->refNode->parentNode->insertBefore (
$this->DOMNode, $this->refNode->nextSibling
)
;
// reset the template
$this->DOMNode = $this->template->cloneNode (true);
return $this;
}
}
?>