xhtml

package module
v0.26.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 14, 2026 License: MIT Imports: 8 Imported by: 1

README

xhtml GoDoc Go Report Card

Utilities for working with Go's x/net/html package.

It requires Go 1.23 and a version of x/net/html with iterators.

Documentation

Overview

Package xhtml makes x/net/html easier

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func AbsolutizeURLs added in v0.25.3

func AbsolutizeURLs(n *html.Node, base *url.URL)

AbsolutizeURLs mutates the document so that any href or src attributes on appropriate elements are absolute URLs as resolved by base.

Doesn't parse JavaScript or CSS.

func AdoptChildren

func AdoptChildren(dst, src *html.Node)

AdoptChildren removes all of the children of src and makes them the children of dst.

func AppendText

func AppendText(n *html.Node, text string)

AppendText adds an html.TextNode with the specified text as a child of n.

func Attr

func Attr(n *html.Node, name string) string

Attr returns the value of the first attribute of n with the named key.

func Clone

func Clone(n *html.Node) *html.Node

Clone n and all of its children.

func Closest

func Closest(n *html.Node, match Selector) *html.Node

Closest traverses the node and its parents until it finds a node that matches the Selector.

Example
package main

import (
	"fmt"
	"strings"

	"github.com/earthboundkid/xhtml"
	"golang.org/x/net/html"
)

func main() {
	doc, err := html.Parse(strings.NewReader(`
	<div data-server-id="abc123">
		<button id="theButton"></button
	</div>`))
	if err != nil {
		panic(err)
	}
	// Find #theButton
	bttnEl := xhtml.Select(doc, xhtml.WithID("theButton"))
	// Find its data-server-id
	serverIDEl := xhtml.Closest(bttnEl, xhtml.WithDataset("server-id"))
	serverID := xhtml.Attr(serverIDEl, "data-server-id")
	fmt.Println(serverID)
}
Output:

abc123

func DeepEqual

func DeepEqual(a, b *html.Node) bool

DeepEqual returns true if a and b are ShallowEqual and all of their descendants are ShallowEqual as well.

func DeleteAttr

func DeleteAttr(n *html.Node, key string)

DeleteAttr removes any attributes of n with the named key.

func DescendantsDepth added in v0.26.1

func DescendantsDepth(n *html.Node) iter.Seq2[int, *html.Node]

DescendantsDepth is like html.Node.Descendants(), except it also yields the depth of each node relative to its parent.

Example
package main

import (
	"fmt"
	"strings"

	"github.com/earthboundkid/xhtml"
	"golang.org/x/net/html"
)

func main() {
	{
		s := "<div><span></span><span>a</span></div>"
		fmt.Printf("DescendantsDepth(%q)\n", s)
		doc, err := html.Parse(strings.NewReader(s))
		if err != nil {
			panic(err)
		}
		body := doc.FirstChild.FirstChild.NextSibling
		for depth, n := range xhtml.DescendantsDepth(body) {
			ntype := "element"
			if n.Type != html.ElementNode {
				ntype = "text"
			}
			fmt.Println("depth:", depth, "type:", ntype, "data:", n.Data)
		}
	}
	{
		s := "<div><span><span>a</span></span></div>"
		fmt.Printf("DescendantsDepth(%q)\n", s)
		doc, err := html.Parse(strings.NewReader(s))
		if err != nil {
			panic(err)
		}
		body := doc.FirstChild.FirstChild.NextSibling
		for depth, n := range xhtml.DescendantsDepth(body) {
			ntype := "element"
			if n.Type != html.ElementNode {
				ntype = "text"
			}
			fmt.Println("depth:", depth, "type:", ntype, "data:", n.Data)
		}
	}
}
Output:

DescendantsDepth("<div><span></span><span>a</span></div>")
depth: 1 type: element data: div
depth: 2 type: element data: span
depth: 2 type: element data: span
depth: 3 type: text data: a
DescendantsDepth("<div><span><span>a</span></span></div>")
depth: 1 type: element data: div
depth: 2 type: element data: span
depth: 3 type: element data: span
depth: 4 type: text data: a

func InnerHTML

func InnerHTML(n *html.Node) string

InnerHTML returns the serialized markup contained within n.

func InnerHTMLBlocks

func InnerHTMLBlocks(n *html.Node) string

InnerHTMLBlocks is the same as InnerHTML, but it separates top level nodes with a line break.

func IsBalanced

func IsBalanced(s string) bool

IsBalanced reports whether every opening tag has a closing pair.

func LastChildOrNew

func LastChildOrNew(p *html.Node, tag string, attrs ...string) *html.Node

LastChildOrNew returns the last child of p if it is ShallowEqual to a new *html.Node with tag and attrs. Otherwise, it appends a new *html.Node with tag and attrs and returns that. For why this is operation useful, see Converting docx to clean HTML.

func New

func New(tag string, attrs ...string) *html.Node

New creates a new html.Node with the specified tag and attributes. It handles properly setting the Node.Type and Node.DataAtom. New panics if the length of attrs is not even.

func OuterHTML

func OuterHTML(n *html.Node) string

OuterHTML returns a serialized node.

func RemoveAll

func RemoveAll(nodes []*html.Node)

RemoveAll orphans the nodes it is passed. It ignores a node if the node is nil or already an orphan.

func ReplaceWith

func ReplaceWith(old, new *html.Node)

ReplaceWith removes old from its Parent and inserts new in its place.

func Select

func Select(n *html.Node, match Selector) *html.Node

Select returns the first descendant node matched by the Selector or nil.

func SelectAll

func SelectAll(n *html.Node, match Selector) iter.Seq[*html.Node]

SelectAll returns an iterator yielding matching nodes in n.Descendants().

Example
package main

import (
	"fmt"
	"strings"

	"github.com/earthboundkid/xhtml"
	"golang.org/x/net/html"
	"golang.org/x/net/html/atom"
)

func main() {
	doc, err := html.Parse(strings.NewReader(`
	<ul>
		<li><a href="https://example.com/en-us">Hello, World!</a></li>
		<li><a href="https://example.com/ja-jp">こんにちは世界!</a></li>
	</ul>`))
	if err != nil {
		panic(err)
	}
	// Find all links in a document
	// And print the link URL and text
	for link := range xhtml.SelectAll(doc, xhtml.WithAtom(atom.A)) {
		fmt.Println(xhtml.Attr(link, "href"), xhtml.TextContent(link))
	}
}
Output:

https://example.com/en-us Hello, World!
https://example.com/ja-jp こんにちは世界!

func SelectSlice

func SelectSlice(n *html.Node, match Selector) []*html.Node

SelectSlice returns a slice of descendant nodes matched by the Selector.

func SetAttr

func SetAttr(n *html.Node, key, value string)

SetAttr adds or replaces the give attribute key and value on n.

func SetInnerHTML

func SetInnerHTML(n *html.Node, s string) error

SetInnerHTML parses s in the context of n and makes the resulting Nodes the sole children of n.

func ShallowEqual

func ShallowEqual(a, b *html.Node) bool

ShallowEqual returns true if a and b have the same Type, DataAtom, Data, Namespace, and Attr. It does not look at parents, children, or siblings. nil *html.Nodes are never equal to each other.

func TextContent

func TextContent(n *html.Node) string

TextContent joins and trims the text node children of n.

func ToBuffer

func ToBuffer(n *html.Node) *bytes.Buffer

ToBuffer returns a *bytes.Buffer containing the outerHTML of n.

func UnnestChildren

func UnnestChildren(n *html.Node)

UnnestChildren has all of the children of node adopted by its parent, and then it removes the node.

Types

type Selector

type Selector = func(n *html.Node) bool

Selector is a function that matches html.Nodes.

func WithAtom

func WithAtom(a atom.Atom) Selector

WithAtom returns a Selector that matches html.Nodes with the given atom.Atom.

func WithClass

func WithClass(classname string) Selector

WithClass returns a Selector that matches nodes with classname.

Example
package main

import (
	"fmt"
	"strings"

	"github.com/earthboundkid/xhtml"
	"golang.org/x/net/html"
)

func main() {
	doc, err := html.Parse(strings.NewReader(`
	<ul>
		<li><a class="bttn english" href="https://example.com/en-us">Hello, World!</a></li>
		<li><a class="bttn japanese" href="https://example.com/ja-jp">こんにちは世界!</a></li>
	</ul>`))
	if err != nil {
		panic(err)
	}
	// Find .english elements
	// And print the link URL and text
	for link := range xhtml.SelectAll(doc, xhtml.WithClass("english")) {
		fmt.Println(xhtml.Attr(link, "href"), xhtml.TextContent(link))
	}
}
Output:

https://example.com/en-us Hello, World!

func WithDataset

func WithDataset(attr string) Selector

WithDataset returns a Selector that matches html.Nodes with the given data attribute set. The attribute should be in kebab-case, not camelCase, without the "data-" prefix.

func WithID

func WithID(id string) Selector

WithID returns a Selector that matches html.Nodes with the given id= attribute.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL