Documentation
¶
Index ¶
Examples ¶
Constants ¶
This section is empty.
Variables ¶
var ( A = Tag("A") B = Tag("b") Body = Tag("body") Div = Tag("div") Em = Tag("em") Form = Tag("form") H1 = Tag("h1") H2 = Tag("h2") Head = Tag("head") I = Tag("i") Img = Tag("img") Input = Tag("input") Label = Tag("label") Li = Tag("li") Option = Tag("option") P = Tag("p") Select = Tag("select") Span = Tag("span") Svg = Tag("svg") Table = Tag("table") Td = Tag("td") Th = Tag("th") Title = Tag("title") Tr = Tag("tr") Ul = Tag("ul") )
These variables are used to represent common tags.
var True everything
True is a special value that matches any node.
Functions ¶
This section is empty.
Types ¶
type Attributes ¶
type Attributes interface {
// Range calls the provided function for each key-value pair in the Attributes
// iteration stops if the function returns false for any pair.
Range(func(key, value string) bool)
// Get returns the value associated with the specified key and
// a boolean indicating whether the key exists in the Attributes.
Get(key string) (value string, exists bool)
}
Attributes is an interface that describes a node's attributes with methods for getting and iterating over key-value pairs.
type Filter ¶
type Filter interface {
// IsAttribute returns true if the filter represents an attribute filter.
IsAttribute() bool
// IsMatch returns true if the filter matches the given node.
IsMatch(node Node) bool
}
Filter is an interface that describes a filter that can be used to select nodes.
func Attr ¶
Attr returns a new attribute filter with the specified name and value.
Example ¶
node, err := ParseHTML(`<div data-foo="value">foo!</div>`)
if err != nil {
log.Fatal(err)
}
if nodes := node.FindAll(0, nil, Attr("data-foo", "value")); len(nodes) != 1 {
log.Fatalf("expected nodes %d; got %d", 1, len(nodes))
} else {
fmt.Println(nodes[0].Readable())
}
node, err = ParseHTML(`<input name="email"/>`)
if err != nil {
log.Fatal(err)
}
if nodes := node.SelectAll(`[name="email"]`); len(nodes) != 1 {
log.Fatalf("expected nodes %d; got %d", 1, len(nodes))
} else {
fmt.Println(nodes[0].Readable())
}
if nodes := node.XPath(`//*[@name="email"]`); len(nodes) != 1 {
log.Fatalf("expected nodes %d; got %d", 1, len(nodes))
} else {
fmt.Println(nodes[0].Readable())
}
Output: <div data-foo="value">foo!</div> <input name="email"/> <input name="email"/>
func Class ¶
Class returns a new class filter with the specified value. This filter is an attribute filter.
Example ¶
node, err := ParseHTML(`<p class="body strikeout"></p>`)
if err != nil {
log.Fatal(err)
}
if nodes := node.FindAll(0, nil, Class("body strikeout")); len(nodes) != 1 {
log.Fatalf("expected nodes %d; got %d", 1, len(nodes))
} else {
fmt.Println(nodes[0].Readable())
}
if nodes := node.FindAll(0, nil, Class("strikeout body")); len(nodes) != 1 {
log.Fatalf("expected nodes %d; got %d", 1, len(nodes))
} else {
fmt.Println(nodes[0].Readable())
}
if nodes := node.FindAll(0, nil, ClassStrict("body strikeout")); len(nodes) != 1 {
log.Fatalf("expected nodes %d; got %d", 1, len(nodes))
} else {
fmt.Println(nodes[0].Readable())
}
if nodes := node.FindAll(0, nil, ClassStrict("strikeout body")); len(nodes) != 0 {
log.Fatalf("expected nodes %d; got %d", 0, len(nodes))
} else {
fmt.Println(nodes)
}
Output: <p class="body strikeout"></p> <p class="body strikeout"></p> <p class="body strikeout"></p> []
func ClassStrict ¶
ClassStrict returns a new strict class filter with the specified string. This filter is an attribute filter.
type FindMethod ¶
type FindMethod int
FindMethod represents the method used to search for nodes in the parse tree.
const ( // Descendant represents a search for nodes that are descendants of the current node. Descendant FindMethod = iota // NoRecursive represents a search for nodes that are direct children of the current node. NoRecursive // Parent represents a search for the parent node of the current node. Parent // PrevSibling represents a search for the previous sibling node of the current node. PrevSibling // NextSibling represents a search for the next sibling node of the current node. NextSibling // Previous represents a search for the previous node in the parse tree. Previous // Next represents a search for the next node in the parse tree. Next )
type Finder ¶
type Finder interface {
// Find searches for the first matched node in the parse tree based on the specified find method and filters.
Find(FindMethod, TagFilter, ...Filter) Node
// FindN searches for up to n nodes in the parse tree based on the specified find method and filters.
FindN(FindMethod, int, TagFilter, ...Filter) []Node
// FindAll searches for all nodes in the parse tree based on the specified find method and filters.
FindAll(FindMethod, TagFilter, ...Filter) []Node
// FindString searches for the first matched text node in the parse tree based on the specified find method and filters.
FindString(FindMethod, StringFilter) TextNode
// FindStringN searches for up to n text nodes in the parse tree based on the specified find method and filters.
FindStringN(FindMethod, int, StringFilter) []TextNode
// FindAllString searches for all text nodes in the parse tree based on the specified find method and filters.
FindAllString(FindMethod, StringFilter) []TextNode
// Select searches for the first matched node in the parse tree based on the css selector.
// Will panics if the selector cannot be parsed.
Select(string) Node
// SelectAll searches for all nodes in the parse tree based on the css selector.
// Will panics if the selector cannot be parsed.
SelectAll(string) []Node
// XPath searches for all node that matches by the specified XPath expr. Will panics if the expression cannot be parsed.
XPath(string) []Node
// Evaluate returns the result of the xpath expression.
// The result type of the expression is one of the follow: bool, float64, string, *xpath.NodeIterator.
Evaluate(string) (any, error)
}
Finder represents a set of methods for finding nodes.
type HtmlNode ¶
type HtmlNode interface {
// Raw returns origin *html.Node.
Raw() *html.Node
// ToNode converts HtmlNode to Node.
ToNode() Node
// ToTextNode converts HtmlNode to TextNode.
// It will panic if the node type is not text node.
ToTextNode() TextNode
// Type returns a NodeType.
Type() html.NodeType
// Data returns tag name for element node or content for text node.
Data() string
// Attrs returns an Attributes interface for element node.
Attrs() Attributes
// HasAttr return whether node has an attribute.
HasAttr(string) bool
// HTML renders the node's parse tree as HTML code.
HTML() string
// Readable renders unescaped HTML code.
Readable() string
// Parent returns the parent of this node.
Parent() Node
// FirstChild returns the first child of this node.
FirstChild() Node
// LastChild returns the last child of this node.
LastChild() Node
// PrevSibling returns the previous node that are on the same level of the parse tree.
PrevSibling() Node
// NextSibling returns the next node that are on the same level of the parse tree.
NextSibling() Node
// PrevNode returns the node that was parsed immediately before this node.
PrevNode() Node
// NextNode returns the node that was parsed immediately after this node.
NextNode() Node
// Parents iterate over all of this node's parent recursively.
Parents() []Node
// Children return all of this node's direct children.
Children() []Node
// Descendants iterate over all of this node's children recursively.
Descendants() []Node
// PrevSiblings return all of this node's previous nodes that are on the same level of the parse tree.
PrevSiblings() []Node
// NextSiblings return all of this node's next nodes that are on the same level of the parse tree.
NextSiblings() []Node
// PrevNodes return all of the nodes that was parsed before this node.
PrevNodes() []Node
// NextNodes return all of the nodes that was parsed after this node.
NextNodes() []Node
// AncestorNodes returns an iterator over the ancestors of n,
// starting with n.Parent.
AncestorNodes() iter.Seq[Node]
// ChildNodes returns an iterator over the immediate children of n,
// starting with n.FirstChild.
ChildNodes() iter.Seq[Node]
// DescendantNodes returns an iterator over all nodes recursively
// beneath n, excluding n itself. Nodes are visited in depth-first preorder.
DescendantNodes() iter.Seq[Node]
// Finder includes a set of find methods.
Finder
}
HtmlNode is an interface representing an HTML node.
Example ¶
node, err := ParseHTML("<a><b>text1</b><c>text2</c></a>")
if err != nil {
log.Fatal(err)
}
fmt.Println(node.Find(0, B).NextSibling().Readable())
fmt.Println(node.Find(0, Tag("c")).PrevSibling().Readable())
fmt.Println(node.Find(0, B).PrevSibling())
fmt.Println(node.Find(0, Tag("c")).NextSibling())
fmt.Println(node.Find(0, B).String().String())
fmt.Println(node.Find(0, B).String().NextSibling())
Output: <c>text2</c> <b>text1</b> <nil> <nil> text1 <nil>
type Node ¶
type Node interface {
HtmlNode
// String returns a TextNode if the node has only one child whose type is text, otherwise returns nil.
String() TextNode
// Strings return all of the text nodes inside this node.
Strings() []TextNode
// StrippedStrings return a list of strings generated by Strings, where strings consisting entirely of
// whitespace are ignored, and whitespace at the beginning and end of strings is removed.
StrippedStrings() []string
// GetText concatenates all of the text node's content.
GetText() string
}
Node is an interface representing an HTML node.
func ParseWithOptions ¶
ParseWithOptions is like Parse, with options.
type StringFilter ¶
StringFilter interface extends the Filter interface and defines a method for checking if the filter represents an string filter.
func String ¶
func String[T Value](t T) StringFilter
String returns a StringFilter with the specified value.
type TagFilter ¶
TagFilter represents an interface that can be used to filter node based on node element's tag.
type Value ¶
type Value interface {
// Value can be one of the following types:
// - string: a simple string value
// - []string: a slice of strings
// - *regexp.Regexp: a regular expression
// - everything: a special value that matches any node
// - func(string, Node) bool: a function that takes a string and a node and returns true or false
string | []string | *regexp.Regexp | everything | func(string, Node) bool
}
Value is an interface that represents a value that can be used as a filter.