processing

package
v0.0.0-...-4e71f0c Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 27, 2018 License: MIT Imports: 10 Imported by: 2

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func Count

func Count(doc *documents.Document, tokeniser Tokeniser) *indices.InfoAndTerms

func CountInDocuments

func CountInDocuments(
	docs <-chan *documents.Document,
	tokeniser Tokeniser,
	idocs chan<- *indices.InfoAndTerms,
	includeClassless bool,
	includeClassy bool,
)

Types

type EnglishTokeniser

type EnglishTokeniser struct {
	// contains filtered or unexported fields
}

func NewEnglishTokeniser

func NewEnglishTokeniser(stopWordList io.Reader) (*EnglishTokeniser, error)

func NewEnglishTokeniserFromFile

func NewEnglishTokeniserFromFile(stopWordFile string) (*EnglishTokeniser, error)

func (*EnglishTokeniser) GetTerms

func (e *EnglishTokeniser) GetTerms(text string, operation func(string))

func (*EnglishTokeniser) IsStopWord

func (e *EnglishTokeniser) IsStopWord(word string) bool

func (*EnglishTokeniser) Normalise

func (e *EnglishTokeniser) Normalise(token string) string

func (*EnglishTokeniser) NormaliseMany

func (e *EnglishTokeniser) NormaliseMany(tokens []string) []string

func (*EnglishTokeniser) Tokenise

func (e *EnglishTokeniser) Tokenise(text string) []string

type Tokeniser

type Tokeniser interface {
	Tokenise(text string) []string
	Normalise(token string) string
	IsStopWord(word string) bool
	GetTerms(text string, operation func(string))
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL