-
Notifications
You must be signed in to change notification settings - Fork 12
/
html-tools.red
169 lines (156 loc) · 3.75 KB
/
html-tools.red
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
Red[
Title: "HTML Tools"
Author: "Boleslav Březovský"
]
#include %xml.red
probe-xml: func [
data
] [
foreach [tag content attributes] data [
print [tag length? content length? attributes]
]
]
foreach-node: func [
data
code
/local tag content attributes
] [
z: data
; FN takes three parameters: [tag content attribute]
foreach [tag content attributes] data [
do bind code 'tag
if block? content [
foreach-node content code
]
]
]
select-by: func [
data
type ; tag, class, content, attribute name
value
; TODO: How to support /only ? There some binding problems
/local action ret
] [
action: probe compose switch/default type [
tag [[equal? tag (to lit-word! value)]]
class [[find select attributes "class" (value)]]
content [[all [string? content find content (value)]]]
] [[equal? (value) select attributes (type)]]
ret: copy []
foreach-node data compose [
if (action) [
append ret reduce [tag content attributes]
]
]
ret
]
context [
parent: none ; TODO: make a closure
set 'parent? func [
data
value
/inner
/local tag content attributes
] [
unless inner [parent: none]
foreach [tag content attributes] data [
if equal? value reduce [tag content attributes] [
return parent
]
if block? content [
parent: reduce [tag content attributes]
if parent?/inner content value [return parent]
]
]
none
]
]
children?: func [
"Return children tag names"
data
/local tag content attributes
] [
collect [foreach [tag content attributes] data [keep tag]]
]
get-text: function [
data
] [
if any [not data string? data char? data] [return data]
ret: copy {}
foreach-node data compose/deep [
all [
string? content
append (ret) content
]
]
ret
]
show-h: does [
page: xml/decode read http://www.red-lang.org
headings: select-by page "post-title" 'class
foreach [t c a] headings [print c/a/2]
]
google: func [value] [
debug "Loading page"
page: rejoin [http://www.google.cz/search?q= replace/all value space #"+"]
page: read/binary probe page
write %goog.html page
debug "Decoding page"
page: load-non-utf page
debug "Page read"
page: xml/decode page
results: select-by page 'h3 'tag
result: collect [
foreach [t c a] results [keep reduce [get-text c/a rejoin [http://www.google.com select c/3 "href"]]]
]
new-line/all/skip result true 2
]
get-table: func [
"Convert <table> to block! of block!s"
table
/trim
/header "Get headers and return them as first row"
] [
table: any [table/table table]
if header [
headers: any [table/thead/tr (also remove/part table 3 table/tr)]
headers: collect [
foreach [t col a] headers [
col: get-text col
if trim [col: system/words/trim/lines form col]
keep col
]
]
]
table: any [table/tbody table]
data: collect/into [
foreach [t row a] table [ ; row
keep/only collect [
foreach [t cell a] row [
if cell: get-text cell [
if trim [cell: system/words/trim/lines form cell]
keep cell
]
]
]
]
] clear []
if header [insert/only data headers]
new-line/all/skip data true 1
copy data
]
; Using `get-table`:
;
; page: xml/decode read https://coinmarketcap.com/
; table: select-by page 'table 'tag
; t: get-table/trim table/table/tbody ; TODO: `get-table` should find this automatically
; probe copy/part t 5
;
; >>
; [
; ["1" "Bitcoin" "$46,856,630,435" "$2843.13" "16,480,650 BTC" "$748,864,000" "3.85%" "" ""]
; ["2" "Ethereum" "$18,906,132,157" "$201.78" "93,695,367 ETH" "$522,577,000" "0.44%" "" ""]
; ["3" "Ripple" "$6,482,777,296" "$0.169117" "38,333,090,674 XRP *" "$52,118,400" "1.42%" "" ""]
; ["4" "Litecoin" "$2,248,733,073" "$43.03" "52,255,407 LTC" "$211,233,000" "5.29%" "" ""]
; ["5" "NEM" "$1,499,805,000" "$0.166645" "8,999,999,999 XEM *" "$2,905,890" "1.11%" "" ""]
; ]