Documentation
¶
Overview ¶
Package xhtml makes x/net/html easier
Index ¶
- func AbsolutizeURLs(n *html.Node, base *url.URL)
- func AdoptChildren(dst, src *html.Node)
- func AppendText(n *html.Node, text string)
- func Attr(n *html.Node, name string) string
- func Clone(n *html.Node) *html.Node
- func Closest(n *html.Node, match Selector) *html.Node
- func DeepEqual(a, b *html.Node) bool
- func DeleteAttr(n *html.Node, key string)
- func DescendantsDepth(n *html.Node) iter.Seq2[int, *html.Node]
- func InnerHTML(n *html.Node) string
- func InnerHTMLBlocks(n *html.Node) string
- func IsBalanced(s string) bool
- func LastChildOrNew(p *html.Node, tag string, attrs ...string) *html.Node
- func New(tag string, attrs ...string) *html.Node
- func OuterHTML(n *html.Node) string
- func RemoveAll(nodes []*html.Node)
- func ReplaceWith(old, new *html.Node)
- func Select(n *html.Node, match Selector) *html.Node
- func SelectAll(n *html.Node, match Selector) iter.Seq[*html.Node]
- func SelectSlice(n *html.Node, match Selector) []*html.Node
- func SetAttr(n *html.Node, key, value string)
- func SetInnerHTML(n *html.Node, s string) error
- func ShallowEqual(a, b *html.Node) bool
- func TextContent(n *html.Node) string
- func ToBuffer(n *html.Node) *bytes.Buffer
- func UnnestChildren(n *html.Node)
- type Selector
Examples ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func AbsolutizeURLs ¶ added in v0.25.3
AbsolutizeURLs mutates the document so that any href or src attributes on appropriate elements are absolute URLs as resolved by base.
Doesn't parse JavaScript or CSS.
func AdoptChildren ¶
AdoptChildren removes all of the children of src and makes them the children of dst.
func AppendText ¶
AppendText adds an html.TextNode with the specified text as a child of n.
func Closest ¶
Closest traverses the node and its parents until it finds a node that matches the Selector.
Example ¶
package main
import (
"fmt"
"strings"
"github.com/earthboundkid/xhtml"
"golang.org/x/net/html"
)
func main() {
doc, err := html.Parse(strings.NewReader(`
<div data-server-id="abc123">
<button id="theButton"></button
</div>`))
if err != nil {
panic(err)
}
// Find #theButton
bttnEl := xhtml.Select(doc, xhtml.WithID("theButton"))
// Find its data-server-id
serverIDEl := xhtml.Closest(bttnEl, xhtml.WithDataset("server-id"))
serverID := xhtml.Attr(serverIDEl, "data-server-id")
fmt.Println(serverID)
}
Output: abc123
func DeepEqual ¶
DeepEqual returns true if a and b are ShallowEqual and all of their descendants are ShallowEqual as well.
func DeleteAttr ¶
DeleteAttr removes any attributes of n with the named key.
func DescendantsDepth ¶ added in v0.26.1
DescendantsDepth is like html.Node.Descendants(), except it also yields the depth of each node relative to its parent.
Example ¶
package main
import (
"fmt"
"strings"
"github.com/earthboundkid/xhtml"
"golang.org/x/net/html"
)
func main() {
{
s := "<div><span></span><span>a</span></div>"
fmt.Printf("DescendantsDepth(%q)\n", s)
doc, err := html.Parse(strings.NewReader(s))
if err != nil {
panic(err)
}
body := doc.FirstChild.FirstChild.NextSibling
for depth, n := range xhtml.DescendantsDepth(body) {
ntype := "element"
if n.Type != html.ElementNode {
ntype = "text"
}
fmt.Println("depth:", depth, "type:", ntype, "data:", n.Data)
}
}
{
s := "<div><span><span>a</span></span></div>"
fmt.Printf("DescendantsDepth(%q)\n", s)
doc, err := html.Parse(strings.NewReader(s))
if err != nil {
panic(err)
}
body := doc.FirstChild.FirstChild.NextSibling
for depth, n := range xhtml.DescendantsDepth(body) {
ntype := "element"
if n.Type != html.ElementNode {
ntype = "text"
}
fmt.Println("depth:", depth, "type:", ntype, "data:", n.Data)
}
}
}
Output: DescendantsDepth("<div><span></span><span>a</span></div>") depth: 1 type: element data: div depth: 2 type: element data: span depth: 2 type: element data: span depth: 3 type: text data: a DescendantsDepth("<div><span><span>a</span></span></div>") depth: 1 type: element data: div depth: 2 type: element data: span depth: 3 type: element data: span depth: 4 type: text data: a
func InnerHTMLBlocks ¶
InnerHTMLBlocks is the same as InnerHTML, but it separates top level nodes with a line break.
func IsBalanced ¶
IsBalanced reports whether every opening tag has a closing pair.
func LastChildOrNew ¶
LastChildOrNew returns the last child of p if it is ShallowEqual to a new *html.Node with tag and attrs. Otherwise, it appends a new *html.Node with tag and attrs and returns that. For why this is operation useful, see Converting docx to clean HTML.
func New ¶
New creates a new html.Node with the specified tag and attributes. It handles properly setting the Node.Type and Node.DataAtom. New panics if the length of attrs is not even.
func RemoveAll ¶
RemoveAll orphans the nodes it is passed. It ignores a node if the node is nil or already an orphan.
func ReplaceWith ¶
ReplaceWith removes old from its Parent and inserts new in its place.
func SelectAll ¶
SelectAll returns an iterator yielding matching nodes in n.Descendants().
Example ¶
package main
import (
"fmt"
"strings"
"github.com/earthboundkid/xhtml"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
func main() {
doc, err := html.Parse(strings.NewReader(`
<ul>
<li><a href="https://example.com/en-us">Hello, World!</a></li>
<li><a href="https://example.com/ja-jp">こんにちは世界!</a></li>
</ul>`))
if err != nil {
panic(err)
}
// Find all links in a document
// And print the link URL and text
for link := range xhtml.SelectAll(doc, xhtml.WithAtom(atom.A)) {
fmt.Println(xhtml.Attr(link, "href"), xhtml.TextContent(link))
}
}
Output: https://example.com/en-us Hello, World! https://example.com/ja-jp こんにちは世界!
func SelectSlice ¶
SelectSlice returns a slice of descendant nodes matched by the Selector.
func SetInnerHTML ¶
SetInnerHTML parses s in the context of n and makes the resulting Nodes the sole children of n.
func ShallowEqual ¶
ShallowEqual returns true if a and b have the same Type, DataAtom, Data, Namespace, and Attr. It does not look at parents, children, or siblings. nil *html.Nodes are never equal to each other.
func TextContent ¶
TextContent joins and trims the text node children of n.
func UnnestChildren ¶
UnnestChildren has all of the children of node adopted by its parent, and then it removes the node.
Types ¶
type Selector ¶
Selector is a function that matches html.Nodes.
func WithClass ¶
WithClass returns a Selector that matches nodes with classname.
Example ¶
package main
import (
"fmt"
"strings"
"github.com/earthboundkid/xhtml"
"golang.org/x/net/html"
)
func main() {
doc, err := html.Parse(strings.NewReader(`
<ul>
<li><a class="bttn english" href="https://example.com/en-us">Hello, World!</a></li>
<li><a class="bttn japanese" href="https://example.com/ja-jp">こんにちは世界!</a></li>
</ul>`))
if err != nil {
panic(err)
}
// Find .english elements
// And print the link URL and text
for link := range xhtml.SelectAll(doc, xhtml.WithClass("english")) {
fmt.Println(xhtml.Attr(link, "href"), xhtml.TextContent(link))
}
}
Output: https://example.com/en-us Hello, World!
func WithDataset ¶
WithDataset returns a Selector that matches html.Nodes with the given data attribute set. The attribute should be in kebab-case, not camelCase, without the "data-" prefix.