Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add handling for HTML void tags #181

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/document/epub/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ impl EpubDocument {
let mut zf = archive.by_name("META-INF/container.xml")?;
let mut text = String::new();
zf.read_to_string(&mut text)?;
let root = XmlParser::new(&text).parse();
let root = XmlParser::new(&text, false).parse();
root.find("rootfile")
.and_then(|e| e.attr("full-path"))
.map(String::from)
Expand All @@ -79,7 +79,7 @@ impl EpubDocument {
text
};

let info = XmlParser::new(&text).parse();
let info = XmlParser::new(&text, false).parse();
let mut spine = Vec::new();

{
Expand Down Expand Up @@ -269,7 +269,7 @@ impl EpubDocument {
let mut zf = self.archive.by_name(name).ok()?;
zf.read_to_string(&mut text).ok()?;
}
let root = XmlParser::new(&text).parse();
let root = XmlParser::new(&text, false).parse();
self.cache_uris(&root, name, start_offset, cache);
cache.get(uri).cloned()
} else {
Expand Down Expand Up @@ -328,7 +328,7 @@ impl EpubDocument {
}
}

let mut root = XmlParser::new(&text).parse();
let mut root = XmlParser::new(&text, false).parse();
root.wrap_lost_inlines();

let mut stylesheet = Vec::new();
Expand Down Expand Up @@ -636,7 +636,7 @@ impl Document for EpubDocument {
return None;
}

let root = XmlParser::new(&text).parse();
let root = XmlParser::new(&text, false).parse();
root.find("navMap").map(|map| {
let mut cache = FxHashMap::default();
let mut index = 0;
Expand Down
10 changes: 7 additions & 3 deletions src/document/html/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,10 @@ impl HtmlDocument {
let size = file.metadata()?.len() as usize;
let mut text = String::new();
file.read_to_string(&mut text)?;
let mut content = XmlParser::new(&text).parse();
let mut content = XmlParser::new(&text, true).parse();
println!("Parsed content is {:#?}", content);
content.wrap_lost_inlines();
println!("Wrapped content is {:#?}", content);
let parent = path.as_ref().parent().unwrap_or_else(|| Path::new(""));

Ok(HtmlDocument {
Expand All @@ -77,8 +79,10 @@ impl HtmlDocument {

pub fn new_from_memory(text: &str) -> HtmlDocument {
let size = text.len();
let mut content = XmlParser::new(text).parse();
let mut content = XmlParser::new(text, true).parse();
println!("Parsed content is {:#?}", content);
content.wrap_lost_inlines();
println!("Wrapped content is {:#?}", content);

HtmlDocument {
text: text.to_string(),
Expand All @@ -95,7 +99,7 @@ impl HtmlDocument {

pub fn update(&mut self, text: &str) {
self.size = text.len();
self.content = XmlParser::new(text).parse();
self.content = XmlParser::new(text, true).parse();
self.content.wrap_lost_inlines();
self.text = text.to_string();
self.pages.clear();
Expand Down
4 changes: 2 additions & 2 deletions src/document/html/style.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ mod tests {

#[test]
fn simple_style() {
let xml1 = XmlParser::new("<a class='c x y' style='c: 7'/>").parse();
let xml2 = XmlParser::new("<a id='e' class='x y'/>").parse();
let xml1 = XmlParser::new("<a class='c x y' style='c: 7'/>", false).parse();
let xml2 = XmlParser::new("<a id='e' class='x y'/>", false).parse();
let (mut css1, _) = CssParser::new("a { b: 23 }").parse(RuleKind::Viewer);
let (mut css2, _) = CssParser::new(".c.x.y { b: 6 }").parse(RuleKind::Document);
let (mut css3, _) = CssParser::new(".y { b: 2 }").parse(RuleKind::Document);
Expand Down
35 changes: 26 additions & 9 deletions src/document/html/xml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@ use super::dom::{Node, Attributes, text, element, whitespace};
pub struct XmlParser<'a> {
pub input: &'a str,
pub offset: usize,
html: bool
}

impl<'a> XmlParser<'a> {
pub fn new(input: &str) -> XmlParser {
pub fn new(input: &str, html: bool) -> XmlParser {
XmlParser {
input,
offset: 0,
html
}
}

Expand Down Expand Up @@ -88,9 +90,24 @@ impl<'a> XmlParser<'a> {
nodes.push(element(name, offset - 1, attributes, Vec::new()));
},
Some('>') => {
self.advance(1);
let children = self.parse_nodes();
nodes.push(element(name, offset - 1, attributes, children));
if self.html {
match name {
"area"|"base"|"br"|"col"|"command"|"embed"|"hr"|"img"|"input"|"keygen"|"link"|"meta"|"param"|"source"|"track"|"wbr" => {
self.advance(1);
nodes.push(element(name, offset - 1, attributes, Vec::new()));
},
_ => {
self.advance(1);
let children = self.parse_nodes();
nodes.push(element(name, offset - 1, attributes, children));
}

}
} else {
self.advance(1);
let children = self.parse_nodes();
nodes.push(element(name, offset - 1, attributes, children));
}
}
_ => (),
}
Expand Down Expand Up @@ -167,23 +184,23 @@ mod tests {
#[test]
fn test_simple_element() {
let text = "<a/>";
let xml = XmlParser::new(text).parse();
let xml = XmlParser::new(text, false).parse();
assert_eq!(xml.offset(), 0);
assert_eq!(xml.tag_name(), Some("a"));
}

#[test]
fn test_attributes() {
let text = r#"<a b="c" d='e"'/>"#;
let xml = XmlParser::new(text).parse();
let xml = XmlParser::new(text, false).parse();
assert_eq!(xml.attr("b"), Some("c"));
assert_eq!(xml.attr("d"), Some("e\""));
}

#[test]
fn test_text() {
let text = "<a>bcd</a>";
let xml = XmlParser::new(text).parse();
let xml = XmlParser::new(text, false).parse();
let child = xml.child(0);
assert_eq!(child.map(|c| c.offset()), Some(3));
assert_eq!(child.and_then(|c| c.text()), Some("bcd"));
Expand All @@ -192,15 +209,15 @@ mod tests {
#[test]
fn test_inbetween_space() {
let text = "<a><b>x</b> <c>y</c></a>";
let xml = XmlParser::new(text).parse();
let xml = XmlParser::new(text, false).parse();
let child = xml.child(1);
assert_eq!(child.and_then(|c| c.text()), Some(" "));
}

#[test]
fn test_central_space() {
let text = "<a><b> </b></a>";
let xml = XmlParser::new(text).parse();
let xml = XmlParser::new(text, false).parse();
assert_eq!(xml.text(), Some(" "));
}
}