updated dependencies
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Paul 2020-12-04 20:36:22 +01:00
parent 49dda13e02
commit 6b740c6882
75 changed files with 10290 additions and 6883 deletions

15
go.mod
View File

@ -3,10 +3,13 @@ module git.paulbsd.com/paulbsd/fuelprices
go 1.13 go 1.13
require ( require (
github.com/antchfx/xmlquery v1.0.0 github.com/antchfx/xmlquery v1.3.3
github.com/antchfx/xpath v1.0.0 // indirect github.com/antchfx/xpath v1.1.11 // indirect
github.com/influxdata/influxdb1-client v0.0.0-20190402204710-8ff2fc3824fc github.com/gopherjs/gopherjs v0.0.0-20200217142428-fce0ec30dd00 // indirect
github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a // indirect github.com/influxdata/influxdb1-client v0.0.0-20200827194710-b269163b24ab
golang.org/x/net v0.0.0-20190607181551-461777fb6f67 // indirect github.com/smartystreets/assertions v1.2.0 // indirect
gopkg.in/ini.v1 v1.42.0 github.com/smartystreets/goconvey v1.6.4 // indirect
golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb // indirect
golang.org/x/text v0.3.4 // indirect
gopkg.in/ini.v1 v1.62.0
) )

29
go.sum
View File

@ -1,24 +1,53 @@
github.com/antchfx/xmlquery v1.0.0 h1:YuEPqexGG2opZKNc9JU3Zw6zFXwC47wNcy6/F8oKsrM= github.com/antchfx/xmlquery v1.0.0 h1:YuEPqexGG2opZKNc9JU3Zw6zFXwC47wNcy6/F8oKsrM=
github.com/antchfx/xmlquery v1.0.0/go.mod h1:/+CnyD/DzHRnv2eRxrVbieRU/FIF6N0C+7oTtyUtCKk= github.com/antchfx/xmlquery v1.0.0/go.mod h1:/+CnyD/DzHRnv2eRxrVbieRU/FIF6N0C+7oTtyUtCKk=
github.com/antchfx/xmlquery v1.3.3 h1:HYmadPG0uz8CySdL68rB4DCLKXz2PurCjS3mnkVF4CQ=
github.com/antchfx/xmlquery v1.3.3/go.mod h1:64w0Xesg2sTaawIdNqMB+7qaW/bSqkQm+ssPaCMWNnc=
github.com/antchfx/xpath v1.0.0 h1:Q5gFgh2O40VTSwMOVbFE7nFNRBu3tS21Tn0KAWeEjtk= github.com/antchfx/xpath v1.0.0 h1:Q5gFgh2O40VTSwMOVbFE7nFNRBu3tS21Tn0KAWeEjtk=
github.com/antchfx/xpath v1.0.0/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= github.com/antchfx/xpath v1.0.0/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
github.com/antchfx/xpath v1.1.10/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
github.com/antchfx/xpath v1.1.11 h1:WOFtK8TVAjLm3lbgqeP0arlHpvCEeTANeWZ/csPpJkQ=
github.com/antchfx/xpath v1.1.11/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8=
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/gopherjs/gopherjs v0.0.0-20200217142428-fce0ec30dd00 h1:l5lAOZEym3oK3SQ2HBHWsJUfbNBiTXJDeW2QDxw9AQ0=
github.com/gopherjs/gopherjs v0.0.0-20200217142428-fce0ec30dd00/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/influxdata/influxdb1-client v0.0.0-20190402204710-8ff2fc3824fc h1:KpMgaYJRieDkHZJWY3LMafvtqS/U8xX6+lUN+OKpl/Y= github.com/influxdata/influxdb1-client v0.0.0-20190402204710-8ff2fc3824fc h1:KpMgaYJRieDkHZJWY3LMafvtqS/U8xX6+lUN+OKpl/Y=
github.com/influxdata/influxdb1-client v0.0.0-20190402204710-8ff2fc3824fc/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo= github.com/influxdata/influxdb1-client v0.0.0-20190402204710-8ff2fc3824fc/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo=
github.com/influxdata/influxdb1-client v0.0.0-20200827194710-b269163b24ab h1:HqW4xhhynfjrtEiiSGcQUd6vrK23iMam1FO8rI7mwig=
github.com/influxdata/influxdb1-client v0.0.0-20200827194710-b269163b24ab/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo=
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykElWQ6/NYmHa3jpm/yHnI4xSofP+UP6SpjHcSeM= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykElWQ6/NYmHa3jpm/yHnI4xSofP+UP6SpjHcSeM=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/assertions v1.2.0 h1:42S6lae5dvLc7BrLu/0ugRtcFVjoJNMC/N3yZFZkDFs=
github.com/smartystreets/assertions v1.2.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo=
github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a h1:pa8hGb/2YqsZKovtsgrwcDH1RZhVbTKCjLp47XpqCDs= github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a h1:pa8hGb/2YqsZKovtsgrwcDH1RZhVbTKCjLp47XpqCDs=
github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIKYqbNC9s=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190607181551-461777fb6f67 h1:rJJxsykSlULwd2P2+pg/rtnwN2FrWp4IuCxOSyS0V00= golang.org/x/net v0.0.0-20190607181551-461777fb6f67 h1:rJJxsykSlULwd2P2+pg/rtnwN2FrWp4IuCxOSyS0V00=
golang.org/x/net v0.0.0-20190607181551-461777fb6f67/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190607181551-461777fb6f67/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb h1:eBmm0M9fYhWpKZLjQUUKka/LtIxf46G4fxeEz5KJr9U=
golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.4 h1:0YWbFKbhXG/wIiuHDSKpS0Iy7FSA+u45VtBMfQcFTTc=
golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
gopkg.in/ini.v1 v1.42.0 h1:7N3gPTt50s8GuLortA00n8AqRTk75qOP98+mTPpgzRk= gopkg.in/ini.v1 v1.42.0 h1:7N3gPTt50s8GuLortA00n8AqRTk75qOP98+mTPpgzRk=
gopkg.in/ini.v1 v1.42.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/ini.v1 v1.42.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/ini.v1 v1.62.0 h1:duBzk771uxoUuOlyRLkHsygud9+5lrlGjdFBb4mSKDU=
gopkg.in/ini.v1 v1.62.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=

View File

@ -1,14 +1,17 @@
language: go language: go
go: go:
- 1.6 - 1.9.x
- 1.7 - 1.12.x
- 1.8 - 1.13.x
- 1.14.x
- 1.15.x
install: install:
- go get golang.org/x/net/html/charset - go get golang.org/x/net/html/charset
- go get github.com/antchfx/xpath - go get github.com/antchfx/xpath
- go get github.com/mattn/goveralls - go get github.com/mattn/goveralls
- go get github.com/golang/groupcache
script: script:
- $HOME/gopath/bin/goveralls -service=travis-ci - $HOME/gopath/bin/goveralls -service=travis-ci

View File

@ -8,45 +8,104 @@ xmlquery
Overview Overview
=== ===
xmlquery is an XPath query package for XML document, lets you extract data or evaluate from XML documents by an XPath expression. `xmlquery` is an XPath query package for XML documents, allowing you to extract
data or evaluate from XML documents with an XPath expression.
`xmlquery` has a built-in query object caching feature that caches recently used
XPATH query strings. Enabling caching can avoid recompile XPath expression for
each query.
Change Logs Change Logs
=== ===
**2018-12-23** 2020-08-??
* added XML output will including comment node. [#9](https://github.com/antchfx/xmlquery/issues/9) - Add XML stream loading and parsing support.
**2018-12-03** 2019-11-11
* added support attribute name with namespace prefix and XML output. [#6](https://github.com/antchfx/xmlquery/issues/6) - Add XPath query caching.
2019-10-05
- Add new methods compatible with invalid XPath expression error: `QueryAll` and `Query`.
- Add `QuerySelector` and `QuerySelectorAll` methods, support for reused query objects.
- PR [#12](https://github.com/antchfx/xmlquery/pull/12) (Thanks @FrancescoIlario)
- PR [#11](https://github.com/antchfx/xmlquery/pull/11) (Thanks @gjvnq)
2018-12-23
- Added XML output including comment nodes. [#9](https://github.com/antchfx/xmlquery/issues/9)
2018-12-03
- Added support to attribute name with namespace prefix and XML output. [#6](https://github.com/antchfx/xmlquery/issues/6)
Installation Installation
==== ====
```
> $ go get github.com/antchfx/xmlquery $ go get github.com/antchfx/xmlquery
```
Getting Started Getting Started
=== ===
#### Parse a XML from URL. ### Find specified XPath query.
```go
list, err := xmlquery.QueryAll(doc, "a")
if err != nil {
panic(err)
}
```
#### Parse an XML from URL.
```go ```go
doc, err := xmlquery.LoadURL("http://www.example.com/sitemap.xml") doc, err := xmlquery.LoadURL("http://www.example.com/sitemap.xml")
``` ```
#### Parse a XML from string. #### Parse an XML from string.
```go ```go
s := `<?xml version="1.0" encoding="utf-8"?><rss version="2.0"></rss>` s := `<?xml version="1.0" encoding="utf-8"?><rss version="2.0"></rss>`
doc, err := xmlquery.Parse(strings.NewReader(s)) doc, err := xmlquery.Parse(strings.NewReader(s))
``` ```
#### Parse a XML from io.Reader. #### Parse an XML from io.Reader.
```go ```go
f, err := os.Open("../books.xml") f, err := os.Open("../books.xml")
doc, err := xmlquery.Parse(f) doc, err := xmlquery.Parse(f)
``` ```
#### Parse an XML in a stream fashion (simple case without elements filtering).
```go
f, err := os.Open("../books.xml")
p, err := xmlquery.CreateStreamParser(f, "/bookstore/book")
for {
n, err := p.Read()
if err == io.EOF {
break
}
if err != nil {
...
}
}
```
#### Parse an XML in a stream fashion (simple case advanced element filtering).
```go
f, err := os.Open("../books.xml")
p, err := xmlquery.CreateStreamParser(f, "/bookstore/book", "/bookstore/book[price>=10]")
for {
n, err := p.Read()
if err == io.EOF {
break
}
if err != nil {
...
}
}
```
#### Find authors of all books in the bookstore. #### Find authors of all books in the bookstore.
```go ```go
@ -61,25 +120,25 @@ list := xmlquery.Find(doc, "//author")
book := xmlquery.FindOne(doc, "//book[2]") book := xmlquery.FindOne(doc, "//book[2]")
``` ```
#### Find all book elements and only get `id` attribute self. (New Feature) #### Find all book elements and only get `id` attribute. (New Feature)
```go ```go
list := xmlquery.Find(doc,"//book/@id") list := xmlquery.Find(doc,"//book/@id")
``` ```
#### Find all books with id is bk104. #### Find all books with id `bk104`.
```go ```go
list := xmlquery.Find(doc, "//book[@id='bk104']") list := xmlquery.Find(doc, "//book[@id='bk104']")
``` ```
#### Find all books that price less than 5. #### Find all books with price less than 5.
```go ```go
list := xmlquery.Find(doc, "//book[price<5]") list := xmlquery.Find(doc, "//book[price<5]")
``` ```
#### Evaluate the total price of all books. #### Evaluate total price of all books.
```go ```go
expr, err := xpath.Compile("sum(//book/price)") expr, err := xpath.Compile("sum(//book/price)")
@ -87,13 +146,30 @@ price := expr.Evaluate(xmlquery.CreateXPathNavigator(doc)).(float64)
fmt.Printf("total price: %f\n", price) fmt.Printf("total price: %f\n", price)
``` ```
#### Evaluate the number of all books element. #### Evaluate number of all book elements.
```go ```go
expr, err := xpath.Compile("count(//book)") expr, err := xpath.Compile("count(//book)")
price := expr.Evaluate(xmlquery.CreateXPathNavigator(doc)).(float64) price := expr.Evaluate(xmlquery.CreateXPathNavigator(doc)).(float64)
``` ```
FAQ
====
#### `Find()` vs `QueryAll()`, which is better?
`Find` and `QueryAll` both do the same thing: searches all of matched XML nodes.
`Find` panics if provided with an invalid XPath query, while `QueryAll` returns
an error.
#### Can I save my query expression object for the next query?
Yes, you can. We provide `QuerySelector` and `QuerySelectorAll` methods; they
accept your query expression object.
Caching a query expression object avoids recompiling the XPath query
expression, improving query performance.
#### Create XML document. #### Create XML document.
```go ```go
@ -175,11 +251,11 @@ func main(){
List of supported XPath query packages List of supported XPath query packages
=== ===
|Name |Description | | Name | Description |
|--------------------------|----------------| | ------------------------------------------------- | ----------------------------------------- |
|[htmlquery](https://github.com/antchfx/htmlquery) | XPath query package for the HTML document| | [htmlquery](https://github.com/antchfx/htmlquery) | XPath query package for HTML documents |
|[xmlquery](https://github.com/antchfx/xmlquery) | XPath query package for the XML document| | [xmlquery](https://github.com/antchfx/xmlquery) | XPath query package for XML documents |
|[jsonquery](https://github.com/antchfx/jsonquery) | XPath query package for the JSON document| | [jsonquery](https://github.com/antchfx/jsonquery) | XPath query package for JSON documents |
Questions Questions
=== ===

43
vendor/github.com/antchfx/xmlquery/cache.go generated vendored Normal file
View File

@ -0,0 +1,43 @@
package xmlquery
import (
"sync"
"github.com/golang/groupcache/lru"
"github.com/antchfx/xpath"
)
// DisableSelectorCache will disable caching for the query selector if value is true.
var DisableSelectorCache = false
// SelectorCacheMaxEntries allows how many selector object can be caching. Default is 50.
// Will disable caching if SelectorCacheMaxEntries <= 0.
var SelectorCacheMaxEntries = 50
var (
cacheOnce sync.Once
cache *lru.Cache
cacheMutex sync.Mutex
)
func getQuery(expr string) (*xpath.Expr, error) {
if DisableSelectorCache || SelectorCacheMaxEntries <= 0 {
return xpath.Compile(expr)
}
cacheOnce.Do(func() {
cache = lru.New(SelectorCacheMaxEntries)
})
cacheMutex.Lock()
defer cacheMutex.Unlock()
if v, ok := cache.Get(expr); ok {
return v.(*xpath.Expr), nil
}
v, err := xpath.Compile(expr)
if err != nil {
return nil, err
}
cache.Add(expr, v)
return v, nil
}

69
vendor/github.com/antchfx/xmlquery/cached_reader.go generated vendored Normal file
View File

@ -0,0 +1,69 @@
package xmlquery
import (
"bufio"
)
type cachedReader struct {
buffer *bufio.Reader
cache []byte
cacheCap int
cacheLen int
caching bool
}
func newCachedReader(r *bufio.Reader) *cachedReader {
return &cachedReader{
buffer: r,
cache: make([]byte, 4096),
cacheCap: 4096,
cacheLen: 0,
caching: false,
}
}
func (c *cachedReader) StartCaching() {
c.cacheLen = 0
c.caching = true
}
func (c *cachedReader) ReadByte() (byte, error) {
if !c.caching {
return c.buffer.ReadByte()
}
b, err := c.buffer.ReadByte()
if err != nil {
return b, err
}
if c.cacheLen < c.cacheCap {
c.cache[c.cacheLen] = b
c.cacheLen++
}
return b, err
}
func (c *cachedReader) Cache() []byte {
return c.cache[:c.cacheLen]
}
func (c *cachedReader) StopCaching() {
c.caching = false
}
func (c *cachedReader) Read(p []byte) (int, error) {
n, err := c.buffer.Read(p)
if err != nil {
return n, err
}
if c.caching && c.cacheLen < c.cacheCap {
for i := 0; i < n; i++ {
c.cache[c.cacheLen] = p[i]
c.cacheLen++
if c.cacheLen >= c.cacheCap {
break
}
}
}
return n, err
}

9
vendor/github.com/antchfx/xmlquery/go.mod generated vendored Normal file
View File

@ -0,0 +1,9 @@
module github.com/antchfx/xmlquery
go 1.14
require (
github.com/antchfx/xpath v1.1.10
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e
golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc
)

14
vendor/github.com/antchfx/xmlquery/go.sum generated vendored Normal file
View File

@ -0,0 +1,14 @@
github.com/antchfx/xpath v1.1.10 h1:cJ0pOvEdN/WvYXxvRrzQH9x5QWKpzHacYO8qzCcDYAg=
github.com/antchfx/xpath v1.1.10/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc h1:zK/HqS5bZxDptfPJNq8v7vJfXtkU7r9TLIoSr1bXaP4=
golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

View File

@ -3,13 +3,8 @@ package xmlquery
import ( import (
"bytes" "bytes"
"encoding/xml" "encoding/xml"
"errors"
"fmt" "fmt"
"io"
"net/http"
"strings" "strings"
"golang.org/x/net/html/charset"
) )
// A NodeType is the type of a Node. // A NodeType is the type of a Node.
@ -19,13 +14,15 @@ const (
// DocumentNode is a document object that, as the root of the document tree, // DocumentNode is a document object that, as the root of the document tree,
// provides access to the entire XML document. // provides access to the entire XML document.
DocumentNode NodeType = iota DocumentNode NodeType = iota
// DeclarationNode is the document type declaration, indicated by the following // DeclarationNode is the document type declaration, indicated by the
// tag (for example, <!DOCTYPE...> ). // following tag (for example, <!DOCTYPE...> ).
DeclarationNode DeclarationNode
// ElementNode is an element (for example, <item> ). // ElementNode is an element (for example, <item> ).
ElementNode ElementNode
// TextNode is the text content of a node. // TextNode is the text content of a node.
TextNode TextNode
// CharDataNode node <![CDATA[content]]>
CharDataNode
// CommentNode a comment (for example, <!-- my comment --> ). // CommentNode a comment (for example, <!-- my comment --> ).
CommentNode CommentNode
// AttributeNode is an attribute of element. // AttributeNode is an attribute of element.
@ -51,36 +48,56 @@ func (n *Node) InnerText() string {
var output func(*bytes.Buffer, *Node) var output func(*bytes.Buffer, *Node)
output = func(buf *bytes.Buffer, n *Node) { output = func(buf *bytes.Buffer, n *Node) {
switch n.Type { switch n.Type {
case TextNode: case TextNode, CharDataNode:
buf.WriteString(n.Data) buf.WriteString(n.Data)
return
case CommentNode: case CommentNode:
return default:
}
for child := n.FirstChild; child != nil; child = child.NextSibling { for child := n.FirstChild; child != nil; child = child.NextSibling {
output(buf, child) output(buf, child)
} }
} }
}
var buf bytes.Buffer var buf bytes.Buffer
output(&buf, n) output(&buf, n)
return buf.String() return buf.String()
} }
func outputXML(buf *bytes.Buffer, n *Node) { func (n *Node) sanitizedData(preserveSpaces bool) string {
if n.Type == TextNode { if preserveSpaces {
xml.EscapeText(buf, []byte(strings.TrimSpace(n.Data))) return strings.Trim(n.Data, "\n\t")
return
} }
if n.Type == CommentNode { return strings.TrimSpace(n.Data)
}
func calculatePreserveSpaces(n *Node, pastValue bool) bool {
if attr := n.SelectAttr("xml:space"); attr == "preserve" {
return true
} else if attr == "default" {
return false
}
return pastValue
}
func outputXML(buf *bytes.Buffer, n *Node, preserveSpaces bool) {
preserveSpaces = calculatePreserveSpaces(n, preserveSpaces)
switch n.Type {
case TextNode:
xml.EscapeText(buf, []byte(n.sanitizedData(preserveSpaces)))
return
case CharDataNode:
buf.WriteString("<![CDATA[")
xml.EscapeText(buf, []byte(n.sanitizedData(preserveSpaces)))
buf.WriteString("]]>")
return
case CommentNode:
buf.WriteString("<!--") buf.WriteString("<!--")
buf.WriteString(n.Data) buf.WriteString(n.Data)
buf.WriteString("-->") buf.WriteString("-->")
return return
} case DeclarationNode:
if n.Type == DeclarationNode {
buf.WriteString("<?" + n.Data) buf.WriteString("<?" + n.Data)
} else { default:
if n.Prefix == "" { if n.Prefix == "" {
buf.WriteString("<" + n.Data) buf.WriteString("<" + n.Data)
} else { } else {
@ -90,10 +107,13 @@ func outputXML(buf *bytes.Buffer, n *Node) {
for _, attr := range n.Attr { for _, attr := range n.Attr {
if attr.Name.Space != "" { if attr.Name.Space != "" {
buf.WriteString(fmt.Sprintf(` %s:%s="%s"`, attr.Name.Space, attr.Name.Local, attr.Value)) buf.WriteString(fmt.Sprintf(` %s:%s=`, attr.Name.Space, attr.Name.Local))
} else { } else {
buf.WriteString(fmt.Sprintf(` %s="%s"`, attr.Name.Local, attr.Value)) buf.WriteString(fmt.Sprintf(` %s=`, attr.Name.Local))
} }
buf.WriteByte('"')
xml.EscapeText(buf, []byte(attr.Value))
buf.WriteByte('"')
} }
if n.Type == DeclarationNode { if n.Type == DeclarationNode {
buf.WriteString("?>") buf.WriteString("?>")
@ -101,7 +121,7 @@ func outputXML(buf *bytes.Buffer, n *Node) {
buf.WriteString(">") buf.WriteString(">")
} }
for child := n.FirstChild; child != nil; child = child.NextSibling { for child := n.FirstChild; child != nil; child = child.NextSibling {
outputXML(buf, child) outputXML(buf, child, preserveSpaces)
} }
if n.Type != DeclarationNode { if n.Type != DeclarationNode {
if n.Prefix == "" { if n.Prefix == "" {
@ -116,17 +136,18 @@ func outputXML(buf *bytes.Buffer, n *Node) {
func (n *Node) OutputXML(self bool) string { func (n *Node) OutputXML(self bool) string {
var buf bytes.Buffer var buf bytes.Buffer
if self { if self {
outputXML(&buf, n) outputXML(&buf, n, false)
} else { } else {
for n := n.FirstChild; n != nil; n = n.NextSibling { for n := n.FirstChild; n != nil; n = n.NextSibling {
outputXML(&buf, n) outputXML(&buf, n, false)
} }
} }
return buf.String() return buf.String()
} }
func addAttr(n *Node, key, val string) { // AddAttr adds a new attribute specified by 'key' and 'val' to a node 'n'.
func AddAttr(n *Node, key, val string) {
var attr xml.Attr var attr xml.Attr
if i := strings.Index(key, ":"); i > 0 { if i := strings.Index(key, ":"); i > 0 {
attr = xml.Attr{ attr = xml.Attr{
@ -143,10 +164,13 @@ func addAttr(n *Node, key, val string) {
n.Attr = append(n.Attr, attr) n.Attr = append(n.Attr, attr)
} }
func addChild(parent, n *Node) { // AddChild adds a new node 'n' to a node 'parent' as its last child.
func AddChild(parent, n *Node) {
n.Parent = parent n.Parent = parent
n.NextSibling = nil
if parent.FirstChild == nil { if parent.FirstChild == nil {
parent.FirstChild = n parent.FirstChild = n
n.PrevSibling = nil
} else { } else {
parent.LastChild.NextSibling = n parent.LastChild.NextSibling = n
n.PrevSibling = parent.LastChild n.PrevSibling = parent.LastChild
@ -155,148 +179,48 @@ func addChild(parent, n *Node) {
parent.LastChild = n parent.LastChild = n
} }
func addSibling(sibling, n *Node) { // AddSibling adds a new node 'n' as a sibling of a given node 'sibling'.
// Note it is not necessarily true that the new node 'n' would be added
// immediately after 'sibling'. If 'sibling' isn't the last child of its
// parent, then the new node 'n' will be added at the end of the sibling
// chain of their parent.
func AddSibling(sibling, n *Node) {
for t := sibling.NextSibling; t != nil; t = t.NextSibling { for t := sibling.NextSibling; t != nil; t = t.NextSibling {
sibling = t sibling = t
} }
n.Parent = sibling.Parent n.Parent = sibling.Parent
sibling.NextSibling = n sibling.NextSibling = n
n.PrevSibling = sibling n.PrevSibling = sibling
n.NextSibling = nil
if sibling.Parent != nil { if sibling.Parent != nil {
sibling.Parent.LastChild = n sibling.Parent.LastChild = n
} }
} }
// LoadURL loads the XML document from the specified URL. // RemoveFromTree removes a node and its subtree from the document
func LoadURL(url string) (*Node, error) { // tree it is in. If the node is the root of the tree, then it's no-op.
resp, err := http.Get(url) func RemoveFromTree(n *Node) {
if err != nil { if n.Parent == nil {
return nil, err return
} }
defer resp.Body.Close() if n.Parent.FirstChild == n {
return parse(resp.Body) if n.Parent.LastChild == n {
} n.Parent.FirstChild = nil
n.Parent.LastChild = nil
func parse(r io.Reader) (*Node, error) { } else {
var ( n.Parent.FirstChild = n.NextSibling
decoder = xml.NewDecoder(r) n.NextSibling.PrevSibling = nil
doc = &Node{Type: DocumentNode} }
space2prefix = make(map[string]string) } else {
level = 0 if n.Parent.LastChild == n {
) n.Parent.LastChild = n.PrevSibling
// http://www.w3.org/XML/1998/namespace is bound by definition to the prefix xml. n.PrevSibling.NextSibling = nil
space2prefix["http://www.w3.org/XML/1998/namespace"] = "xml" } else {
decoder.CharsetReader = charset.NewReaderLabel n.PrevSibling.NextSibling = n.NextSibling
prev := doc n.NextSibling.PrevSibling = n.PrevSibling
for { }
tok, err := decoder.Token() }
switch { n.Parent = nil
case err == io.EOF: n.PrevSibling = nil
goto quit n.NextSibling = nil
case err != nil:
return nil, err
}
switch tok := tok.(type) {
case xml.StartElement:
if level == 0 {
// mising XML declaration
node := &Node{Type: DeclarationNode, Data: "xml", level: 1}
addChild(prev, node)
level = 1
prev = node
}
// https://www.w3.org/TR/xml-names/#scoping-defaulting
for _, att := range tok.Attr {
if att.Name.Local == "xmlns" {
space2prefix[att.Value] = ""
} else if att.Name.Space == "xmlns" {
space2prefix[att.Value] = att.Name.Local
}
}
if tok.Name.Space != "" {
if _, found := space2prefix[tok.Name.Space]; !found {
return nil, errors.New("xmlquery: invalid XML document, namespace is missing")
}
}
for i := 0; i < len(tok.Attr); i++ {
att := &tok.Attr[i]
if prefix, ok := space2prefix[att.Name.Space]; ok {
att.Name.Space = prefix
}
}
node := &Node{
Type: ElementNode,
Data: tok.Name.Local,
Prefix: space2prefix[tok.Name.Space],
NamespaceURI: tok.Name.Space,
Attr: tok.Attr,
level: level,
}
//fmt.Println(fmt.Sprintf("start > %s : %d", node.Data, level))
if level == prev.level {
addSibling(prev, node)
} else if level > prev.level {
addChild(prev, node)
} else if level < prev.level {
for i := prev.level - level; i > 1; i-- {
prev = prev.Parent
}
addSibling(prev.Parent, node)
}
prev = node
level++
case xml.EndElement:
level--
case xml.CharData:
node := &Node{Type: TextNode, Data: string(tok), level: level}
if level == prev.level {
addSibling(prev, node)
} else if level > prev.level {
addChild(prev, node)
}
case xml.Comment:
node := &Node{Type: CommentNode, Data: string(tok), level: level}
if level == prev.level {
addSibling(prev, node)
} else if level > prev.level {
addChild(prev, node)
} else if level < prev.level {
for i := prev.level - level; i > 1; i-- {
prev = prev.Parent
}
addSibling(prev.Parent, node)
}
case xml.ProcInst: // Processing Instruction
if prev.Type != DeclarationNode {
level++
}
node := &Node{Type: DeclarationNode, Data: tok.Target, level: level}
pairs := strings.Split(string(tok.Inst), " ")
for _, pair := range pairs {
pair = strings.TrimSpace(pair)
if i := strings.Index(pair, "="); i > 0 {
addAttr(node, pair[:i], strings.Trim(pair[i+1:], `"`))
}
}
if level == prev.level {
addSibling(prev, node)
} else if level > prev.level {
addChild(prev, node)
}
prev = node
case xml.Directive:
}
}
quit:
return doc, nil
}
// Parse returns the parse tree for the XML from the given Reader.
func Parse(r io.Reader) (*Node, error) {
return parse(r)
} }

334
vendor/github.com/antchfx/xmlquery/parse.go generated vendored Normal file
View File

@ -0,0 +1,334 @@
package xmlquery
import (
"bufio"
"encoding/xml"
"errors"
"fmt"
"io"
"net/http"
"regexp"
"strings"
"github.com/antchfx/xpath"
"golang.org/x/net/html/charset"
)
var xmlMIMERegex = regexp.MustCompile(`(?i)((application|image|message|model)/((\w|\.|-)+\+?)?|text/)(wb)?xml`)
// LoadURL loads the XML document from the specified URL.
func LoadURL(url string) (*Node, error) {
resp, err := http.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
// Make sure the Content-Type has a valid XML MIME type
if xmlMIMERegex.MatchString(resp.Header.Get("Content-Type")) {
return Parse(resp.Body)
}
return nil, fmt.Errorf("invalid XML document(%s)", resp.Header.Get("Content-Type"))
}
// Parse returns the parse tree for the XML from the given Reader.
func Parse(r io.Reader) (*Node, error) {
p := createParser(r)
for {
_, err := p.parse()
if err == io.EOF {
return p.doc, nil
}
if err != nil {
return nil, err
}
}
}
type parser struct {
decoder *xml.Decoder
doc *Node
space2prefix map[string]string
level int
prev *Node
streamElementXPath *xpath.Expr // Under streaming mode, this specifies the xpath to the target element node(s).
streamElementFilter *xpath.Expr // If specified, it provides further filtering on the target element.
streamNode *Node // Need to remember the last target node So we can clean it up upon next Read() call.
streamNodePrev *Node // Need to remember target node's prev so upon target node removal, we can restore correct prev.
reader *cachedReader // Need to maintain a reference to the reader, so we can determine whether a node contains CDATA.
}
func createParser(r io.Reader) *parser {
reader := newCachedReader(bufio.NewReader(r))
p := &parser{
decoder: xml.NewDecoder(reader),
doc: &Node{Type: DocumentNode},
space2prefix: make(map[string]string),
level: 0,
reader: reader,
}
// http://www.w3.org/XML/1998/namespace is bound by definition to the prefix xml.
p.space2prefix["http://www.w3.org/XML/1998/namespace"] = "xml"
p.decoder.CharsetReader = charset.NewReaderLabel
p.prev = p.doc
return p
}
func (p *parser) parse() (*Node, error) {
var streamElementNodeCounter int
for {
tok, err := p.decoder.Token()
if err != nil {
return nil, err
}
switch tok := tok.(type) {
case xml.StartElement:
if p.level == 0 {
// mising XML declaration
node := &Node{Type: DeclarationNode, Data: "xml", level: 1}
AddChild(p.prev, node)
p.level = 1
p.prev = node
}
// https://www.w3.org/TR/xml-names/#scoping-defaulting
for _, att := range tok.Attr {
if att.Name.Local == "xmlns" {
p.space2prefix[att.Value] = ""
} else if att.Name.Space == "xmlns" {
p.space2prefix[att.Value] = att.Name.Local
}
}
if tok.Name.Space != "" {
if _, found := p.space2prefix[tok.Name.Space]; !found {
return nil, errors.New("xmlquery: invalid XML document, namespace is missing")
}
}
for i := 0; i < len(tok.Attr); i++ {
att := &tok.Attr[i]
if prefix, ok := p.space2prefix[att.Name.Space]; ok {
att.Name.Space = prefix
}
}
node := &Node{
Type: ElementNode,
Data: tok.Name.Local,
Prefix: p.space2prefix[tok.Name.Space],
NamespaceURI: tok.Name.Space,
Attr: tok.Attr,
level: p.level,
}
if p.level == p.prev.level {
AddSibling(p.prev, node)
} else if p.level > p.prev.level {
AddChild(p.prev, node)
} else if p.level < p.prev.level {
for i := p.prev.level - p.level; i > 1; i-- {
p.prev = p.prev.Parent
}
AddSibling(p.prev.Parent, node)
}
// If we're in the streaming mode, we need to remember the node if it is the target node
// so that when we finish processing the node's EndElement, we know how/what to return to
// caller. Also we need to remove the target node from the tree upon next Read() call so
// memory doesn't grow unbounded.
if p.streamElementXPath != nil {
if p.streamNode == nil {
if QuerySelector(p.doc, p.streamElementXPath) != nil {
p.streamNode = node
p.streamNodePrev = p.prev
streamElementNodeCounter = 1
}
} else {
streamElementNodeCounter++
}
}
p.prev = node
p.level++
p.reader.StartCaching()
case xml.EndElement:
p.level--
// If we're in streaming mode, and we already have a potential streaming
// target node identified (p.streamNode != nil) then we need to check if
// this is the real one we want to return to caller.
if p.streamNode != nil {
streamElementNodeCounter--
if streamElementNodeCounter == 0 {
// Now we know this element node is the at least passing the initial
// p.streamElementXPath check and is a potential target node candidate.
// We need to have 1 more check with p.streamElementFilter (if given) to
// ensure it is really the element node we want.
// The reason we need a two-step check process is because the following
// situation:
// <AAA><BBB>b1</BBB></AAA>
// And say the p.streamElementXPath = "/AAA/BBB[. != 'b1']". Now during
// xml.StartElement time, the <BBB> node is still empty, so it will pass
// the p.streamElementXPath check. However, eventually we know this <BBB>
// shouldn't be returned to the caller. Having a second more fine-grained
// filter check ensures that. So in this case, the caller should really
// setup the stream parser with:
// streamElementXPath = "/AAA/BBB["
// streamElementFilter = "/AAA/BBB[. != 'b1']"
if p.streamElementFilter == nil || QuerySelector(p.doc, p.streamElementFilter) != nil {
return p.streamNode, nil
}
// otherwise, this isn't our target node, clean things up.
// note we also remove the underlying *Node from the node tree, to prevent
// future stream node candidate selection error.
RemoveFromTree(p.streamNode)
p.prev = p.streamNodePrev
p.streamNode = nil
p.streamNodePrev = nil
}
}
case xml.CharData:
p.reader.StopCaching()
// First, normalize the cache...
cached := strings.ToUpper(string(p.reader.Cache()))
nodeType := TextNode
if strings.HasPrefix(cached, "<![CDATA[") {
nodeType = CharDataNode
}
node := &Node{Type: nodeType, Data: string(tok), level: p.level}
if p.level == p.prev.level {
AddSibling(p.prev, node)
} else if p.level > p.prev.level {
AddChild(p.prev, node)
} else if p.level < p.prev.level {
for i := p.prev.level - p.level; i > 1; i-- {
p.prev = p.prev.Parent
}
AddSibling(p.prev.Parent, node)
}
p.reader.StartCaching()
case xml.Comment:
node := &Node{Type: CommentNode, Data: string(tok), level: p.level}
if p.level == p.prev.level {
AddSibling(p.prev, node)
} else if p.level > p.prev.level {
AddChild(p.prev, node)
} else if p.level < p.prev.level {
for i := p.prev.level - p.level; i > 1; i-- {
p.prev = p.prev.Parent
}
AddSibling(p.prev.Parent, node)
}
case xml.ProcInst: // Processing Instruction
if p.prev.Type != DeclarationNode {
p.level++
}
node := &Node{Type: DeclarationNode, Data: tok.Target, level: p.level}
pairs := strings.Split(string(tok.Inst), " ")
for _, pair := range pairs {
pair = strings.TrimSpace(pair)
if i := strings.Index(pair, "="); i > 0 {
AddAttr(node, pair[:i], strings.Trim(pair[i+1:], `"`))
}
}
if p.level == p.prev.level {
AddSibling(p.prev, node)
} else if p.level > p.prev.level {
AddChild(p.prev, node)
}
p.prev = node
case xml.Directive:
}
}
}
// StreamParser enables loading and parsing an XML document in a streaming
// fashion.
type StreamParser struct {
p *parser
}
// CreateStreamParser creates a StreamParser. Argument streamElementXPath is
// required.
// Argument streamElementFilter is optional and should only be used in advanced
// scenarios.
//
// Scenario 1: simple case:
// xml := `<AAA><BBB>b1</BBB><BBB>b2</BBB></AAA>`
// sp, err := CreateStreamParser(strings.NewReader(xml), "/AAA/BBB")
// if err != nil {
// panic(err)
// }
// for {
// n, err := sp.Read()
// if err != nil {
// break
// }
// fmt.Println(n.OutputXML(true))
// }
// Output will be:
// <BBB>b1</BBB>
// <BBB>b2</BBB>
//
// Scenario 2: advanced case:
// xml := `<AAA><BBB>b1</BBB><BBB>b2</BBB></AAA>`
// sp, err := CreateStreamParser(strings.NewReader(xml), "/AAA/BBB", "/AAA/BBB[. != 'b1']")
// if err != nil {
// panic(err)
// }
// for {
// n, err := sp.Read()
// if err != nil {
// break
// }
// fmt.Println(n.OutputXML(true))
// }
// Output will be:
// <BBB>b2</BBB>
//
// As the argument names indicate, streamElementXPath should be used for
// providing xpath query pointing to the target element node only, no extra
// filtering on the element itself or its children; while streamElementFilter,
// if needed, can provide additional filtering on the target element and its
// children.
//
// CreateStreamParser returns an error if either streamElementXPath or
// streamElementFilter, if provided, cannot be successfully parsed and compiled
// into a valid xpath query.
func CreateStreamParser(r io.Reader, streamElementXPath string, streamElementFilter ...string) (*StreamParser, error) {
elemXPath, err := getQuery(streamElementXPath)
if err != nil {
return nil, fmt.Errorf("invalid streamElementXPath '%s', err: %s", streamElementXPath, err.Error())
}
elemFilter := (*xpath.Expr)(nil)
if len(streamElementFilter) > 0 {
elemFilter, err = getQuery(streamElementFilter[0])
if err != nil {
return nil, fmt.Errorf("invalid streamElementFilter '%s', err: %s", streamElementFilter[0], err.Error())
}
}
sp := &StreamParser{
p: createParser(r),
}
sp.p.streamElementXPath = elemXPath
sp.p.streamElementFilter = elemFilter
return sp, nil
}
// Read returns a target node that satisfies the XPath specified by caller at
// StreamParser creation time. If there is no more satisfying target nodes after
// reading the rest of the XML document, io.EOF will be returned. At any time,
// any XML parsing error encountered will be returned, and the stream parsing
// stopped. Calling Read() after an error is returned (including io.EOF) results
// undefined behavior. Also note, due to the streaming nature, calling Read()
// will automatically remove any previous target node(s) from the document tree.
func (sp *StreamParser) Read() (*Node, error) {
// Because this is a streaming read, we need to release/remove last
// target node from the node tree to free up memory.
if sp.p.streamNode != nil {
RemoveFromTree(sp.p.streamNode)
sp.p.prev = sp.p.streamNodePrev
sp.p.streamNode = nil
sp.p.streamNodePrev = nil
}
return sp.p.parse()
}

View File

@ -44,7 +44,8 @@ func (n *Node) SelectAttr(name string) string {
var _ xpath.NodeNavigator = &NodeNavigator{} var _ xpath.NodeNavigator = &NodeNavigator{}
// CreateXPathNavigator creates a new xpath.NodeNavigator for the specified html.Node. // CreateXPathNavigator creates a new xpath.NodeNavigator for the specified
// XML Node.
func CreateXPathNavigator(top *Node) *NodeNavigator { func CreateXPathNavigator(top *Node) *NodeNavigator {
return &NodeNavigator{curr: top, root: top, attr: -1} return &NodeNavigator{curr: top, root: top, attr: -1}
} }
@ -57,6 +58,7 @@ func getCurrentNode(it *xpath.NodeIterator) *Node {
Data: n.Value(), Data: n.Value(),
} }
return &Node{ return &Node{
Parent: n.curr,
Type: AttributeNode, Type: AttributeNode,
Data: n.LocalName(), Data: n.LocalName(),
FirstChild: childNode, FirstChild: childNode,
@ -66,13 +68,50 @@ func getCurrentNode(it *xpath.NodeIterator) *Node {
return n.curr return n.curr
} }
// Find searches the Node that matches by the specified XPath expr. // Find is like QueryAll but panics if `expr` is not a valid XPath expression.
// See `QueryAll()` function.
func Find(top *Node, expr string) []*Node { func Find(top *Node, expr string) []*Node {
exp, err := xpath.Compile(expr) nodes, err := QueryAll(top, expr)
if err != nil { if err != nil {
panic(err) panic(err)
} }
t := exp.Select(CreateXPathNavigator(top)) return nodes
}
// FindOne is like Query but panics if `expr` is not a valid XPath expression.
// See `Query()` function.
func FindOne(top *Node, expr string) *Node {
node, err := Query(top, expr)
if err != nil {
panic(err)
}
return node
}
// QueryAll searches the XML Node that matches by the specified XPath expr.
// Returns an error if the expression `expr` cannot be parsed.
func QueryAll(top *Node, expr string) ([]*Node, error) {
exp, err := getQuery(expr)
if err != nil {
return nil, err
}
return QuerySelectorAll(top, exp), nil
}
// Query searches the XML Node that matches by the specified XPath expr,
// and returns first matched element.
func Query(top *Node, expr string) (*Node, error) {
exp, err := getQuery(expr)
if err != nil {
return nil, err
}
return QuerySelector(top, exp), nil
}
// QuerySelectorAll searches all of the XML Node that matches the specified
// XPath selectors.
func QuerySelectorAll(top *Node, selector *xpath.Expr) []*Node {
t := selector.Select(CreateXPathNavigator(top))
var elems []*Node var elems []*Node
for t.MoveNext() { for t.MoveNext() {
elems = append(elems, getCurrentNode(t)) elems = append(elems, getCurrentNode(t))
@ -80,32 +119,27 @@ func Find(top *Node, expr string) []*Node {
return elems return elems
} }
// FindOne searches the Node that matches by the specified XPath expr, // QuerySelector returns the first matched XML Node by the specified XPath
// and returns first element of matched. // selector.
func FindOne(top *Node, expr string) *Node { func QuerySelector(top *Node, selector *xpath.Expr) *Node {
exp, err := xpath.Compile(expr) t := selector.Select(CreateXPathNavigator(top))
if err != nil {
panic(err)
}
t := exp.Select(CreateXPathNavigator(top))
var elem *Node
if t.MoveNext() { if t.MoveNext() {
elem = getCurrentNode(t) return getCurrentNode(t)
} }
return elem return nil
} }
// FindEach searches the html.Node and calls functions cb. // FindEach searches the html.Node and calls functions cb.
// Important: this method has deprecated, recommend use for .. = range Find(){}. // Important: this method is deprecated, instead, use for .. = range Find(){}.
func FindEach(top *Node, expr string, cb func(int, *Node)) { func FindEach(top *Node, expr string, cb func(int, *Node)) {
for i, n := range Find(top, expr) { for i, n := range Find(top, expr) {
cb(i, n) cb(i, n)
} }
} }
// FindEachWithBreak functions the same as FindEach but allows you // FindEachWithBreak functions the same as FindEach but allows to break the loop
// to break the loop by returning false from your callback function, cb. // by returning false from the callback function `cb`.
// Important: this method has deprecated, recommend use for .. = range Find(){}. // Important: this method is deprecated, instead, use .. = range Find(){}.
func FindEachWithBreak(top *Node, expr string, cb func(int, *Node) bool) { func FindEachWithBreak(top *Node, expr string, cb func(int, *Node) bool) {
for i, n := range Find(top, expr) { for i, n := range Find(top, expr) {
if !cb(i, n) { if !cb(i, n) {
@ -127,7 +161,7 @@ func (x *NodeNavigator) NodeType() xpath.NodeType {
switch x.curr.Type { switch x.curr.Type {
case CommentNode: case CommentNode:
return xpath.CommentNode return xpath.CommentNode
case TextNode: case TextNode, CharDataNode:
return xpath.TextNode return xpath.TextNode
case DeclarationNode, DocumentNode: case DeclarationNode, DocumentNode:
return xpath.RootNode return xpath.RootNode
@ -158,6 +192,10 @@ func (x *NodeNavigator) Prefix() string {
return x.curr.Prefix return x.curr.Prefix
} }
func (x *NodeNavigator) NamespaceURL() string {
return x.curr.NamespaceURI
}
func (x *NodeNavigator) Value() string { func (x *NodeNavigator) Value() string {
switch x.curr.Type { switch x.curr.Type {
case CommentNode: case CommentNode:

View File

@ -138,12 +138,15 @@ Supported Features
`lang()`| ✗ | `lang()`| ✗ |
`last()`| ✓ | `last()`| ✓ |
`local-name()`| ✓ | `local-name()`| ✓ |
`matches()`| ✓ |
`name()`| ✓ | `name()`| ✓ |
`namespace-uri()`| ✓ | `namespace-uri()`| ✓ |
`normalize-space()`| ✓ | `normalize-space()`| ✓ |
`not()`| ✓ | `not()`| ✓ |
`number()`| ✓ | `number()`| ✓ |
`position()`| ✓ | `position()`| ✓ |
`replace()`| ✓ |
`reverse()`| ✓ |
`round()`| ✓ | `round()`| ✓ |
`starts-with()`| ✓ | `starts-with()`| ✓ |
`string()`| ✓ | `string()`| ✓ |
@ -160,6 +163,9 @@ Supported Features
Changelogs Changelogs
=== ===
2019-03-19
- optimize XPath `|` operation performance. [#33](https://github.com/antchfx/xpath/issues/33). Tips: suggest split into multiple subquery if you have a lot of `|` operations.
2019-01-29 2019-01-29
- improvement `normalize-space` function. [#32](https://github.com/antchfx/xpath/issues/32) - improvement `normalize-space` function. [#32](https://github.com/antchfx/xpath/issues/32)

View File

@ -77,7 +77,18 @@ func (b *builder) processAxisNode(root *axisNode) (query, error) {
} else { } else {
qyGrandInput = &contextQuery{} qyGrandInput = &contextQuery{}
} }
qyOutput = &descendantQuery{Input: qyGrandInput, Predicate: predicate, Self: true} // fix #20: https://github.com/antchfx/htmlquery/issues/20
filter := func(n NodeNavigator) bool {
v := predicate(n)
switch root.Prop {
case "text":
v = v && n.NodeType() == TextNode
case "comment":
v = v && n.NodeType() == CommentNode
}
return v
}
qyOutput = &descendantQuery{Input: qyGrandInput, Predicate: filter, Self: true}
return qyOutput, nil return qyOutput, nil
} }
} }
@ -182,8 +193,23 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
qyOutput = &functionQuery{Input: b.firstInput, Func: containsFunc(arg1, arg2)} qyOutput = &functionQuery{Input: b.firstInput, Func: containsFunc(arg1, arg2)}
case "matches":
//matches(string , pattern)
if len(root.Args) != 2 {
return nil, errors.New("xpath: matches function must have two parameters")
}
var (
arg1, arg2 query
err error
)
if arg1, err = b.processNode(root.Args[0]); err != nil {
return nil, err
}
if arg2, err = b.processNode(root.Args[1]); err != nil {
return nil, err
}
qyOutput = &functionQuery{Input: b.firstInput, Func: matchesFunc(arg1, arg2)}
case "substring": case "substring":
//substring( string , start [, length] ) //substring( string , start [, length] )
if len(root.Args) < 2 { if len(root.Args) < 2 {
@ -243,6 +269,25 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) {
return nil, err return nil, err
} }
qyOutput = &functionQuery{Input: argQuery, Func: normalizespaceFunc} qyOutput = &functionQuery{Input: argQuery, Func: normalizespaceFunc}
case "replace":
//replace( string , string, string )
if len(root.Args) != 3 {
return nil, errors.New("xpath: replace function must have three parameters")
}
var (
arg1, arg2, arg3 query
err error
)
if arg1, err = b.processNode(root.Args[0]); err != nil {
return nil, err
}
if arg2, err = b.processNode(root.Args[1]); err != nil {
return nil, err
}
if arg3, err = b.processNode(root.Args[2]); err != nil {
return nil, err
}
qyOutput = &functionQuery{Input: b.firstInput, Func: replaceFunc(arg1, arg2, arg3)}
case "translate": case "translate":
//translate( string , string, string ) //translate( string , string, string )
if len(root.Args) != 3 { if len(root.Args) != 3 {
@ -272,27 +317,27 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) {
} }
qyOutput = &functionQuery{Input: argQuery, Func: notFunc} qyOutput = &functionQuery{Input: argQuery, Func: notFunc}
case "name", "local-name", "namespace-uri": case "name", "local-name", "namespace-uri":
inp := b.firstInput
if len(root.Args) > 1 { if len(root.Args) > 1 {
return nil, fmt.Errorf("xpath: %s function must have at most one parameter", root.FuncName) return nil, fmt.Errorf("xpath: %s function must have at most one parameter", root.FuncName)
} }
var (
arg query
err error
)
if len(root.Args) == 1 { if len(root.Args) == 1 {
argQuery, err := b.processNode(root.Args[0]) arg, err = b.processNode(root.Args[0])
if err != nil { if err != nil {
return nil, err return nil, err
} }
inp = argQuery
} }
f := &functionQuery{Input: inp}
switch root.FuncName { switch root.FuncName {
case "name": case "name":
f.Func = nameFunc qyOutput = &functionQuery{Input: b.firstInput, Func: nameFunc(arg)}
case "local-name": case "local-name":
f.Func = localNameFunc qyOutput = &functionQuery{Input: b.firstInput, Func: localNameFunc(arg)}
case "namespace-uri": case "namespace-uri":
f.Func = namespaceFunc qyOutput = &functionQuery{Input: b.firstInput, Func: namespaceFunc(arg)}
} }
qyOutput = f
case "true", "false": case "true", "false":
val := root.FuncName == "true" val := root.FuncName == "true"
qyOutput = &functionQuery{ qyOutput = &functionQuery{
@ -379,6 +424,15 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) {
args = append(args, q) args = append(args, q)
} }
qyOutput = &functionQuery{Input: b.firstInput, Func: concatFunc(args...)} qyOutput = &functionQuery{Input: b.firstInput, Func: concatFunc(args...)}
case "reverse":
if len(root.Args) == 0 {
return nil, fmt.Errorf("xpath: reverse(node-sets) function must with have parameters node-sets")
}
argQuery, err := b.processNode(root.Args[0])
if err != nil {
return nil, err
}
qyOutput = &transformFunctionQuery{Input: argQuery, Func: reverseFunc}
default: default:
return nil, fmt.Errorf("not yet support this function %s()", root.FuncName) return nil, fmt.Errorf("not yet support this function %s()", root.FuncName)
} }
@ -396,13 +450,15 @@ func (b *builder) processOperatorNode(root *operatorNode) (query, error) {
} }
var qyOutput query var qyOutput query
switch root.Op { switch root.Op {
case "+", "-", "div", "mod": // Numeric operator case "+", "-", "*", "div", "mod": // Numeric operator
var exprFunc func(interface{}, interface{}) interface{} var exprFunc func(interface{}, interface{}) interface{}
switch root.Op { switch root.Op {
case "+": case "+":
exprFunc = plusFunc exprFunc = plusFunc
case "-": case "-":
exprFunc = minusFunc exprFunc = minusFunc
case "*":
exprFunc = mulFunc
case "div": case "div":
exprFunc = divFunc exprFunc = divFunc
case "mod": case "mod":

80
vendor/github.com/antchfx/xpath/cache.go generated vendored Normal file
View File

@ -0,0 +1,80 @@
package xpath
import (
"regexp"
"sync"
)
type loadFunc func(key interface{}) (interface{}, error)
const (
defaultCap = 65536
)
// The reason we're building a simple capacity-resetting loading cache (when capacity reached) instead of using
// something like github.com/hashicorp/golang-lru is primarily due to (not wanting to create) external dependency.
// Currently this library has 0 external dep (other than go sdk), and supports go 1.6, 1.9, and 1.10 (and later).
// Creating external lib dependencies (plus their transitive dependencies) would make things hard if not impossible.
// We expect under most circumstances, the defaultCap is big enough for any long running services that use this
// library if their xpath regexp cardinality is low. However, in extreme cases when the capacity is reached, we
// simply reset the cache, taking a small subsequent perf hit (next to nothing considering amortization) in trade
// of more complex and less performant LRU type of construct.
type loadingCache struct {
sync.RWMutex
cap int
load loadFunc
m map[interface{}]interface{}
reset int
}
// NewLoadingCache creates a new instance of a loading cache with capacity. Capacity must be >= 0, or
// it will panic. Capacity == 0 means the cache growth is unbounded.
func NewLoadingCache(load loadFunc, capacity int) *loadingCache {
if capacity < 0 {
panic("capacity must be >= 0")
}
return &loadingCache{cap: capacity, load: load, m: make(map[interface{}]interface{})}
}
func (c *loadingCache) get(key interface{}) (interface{}, error) {
c.RLock()
v, found := c.m[key]
c.RUnlock()
if found {
return v, nil
}
v, err := c.load(key)
if err != nil {
return nil, err
}
c.Lock()
if c.cap > 0 && len(c.m) >= c.cap {
c.m = map[interface{}]interface{}{key: v}
c.reset++
} else {
c.m[key] = v
}
c.Unlock()
return v, nil
}
var (
// RegexpCache is a loading cache for string -> *regexp.Regexp mapping. It is exported so that in rare cases
// client can customize load func and/or capacity.
RegexpCache = defaultRegexpCache()
)
func defaultRegexpCache() *loadingCache {
return NewLoadingCache(
func(key interface{}) (interface{}, error) {
return regexp.Compile(key.(string))
}, defaultCap)
}
func getRegexp(pattern string) (*regexp.Regexp, error) {
exp, err := RegexpCache.get(pattern)
if err != nil {
return nil, err
}
return exp.(*regexp.Regexp), nil
}

View File

@ -4,11 +4,26 @@ import (
"errors" "errors"
"fmt" "fmt"
"math" "math"
"regexp"
"strconv" "strconv"
"strings" "strings"
"sync"
"unicode"
) )
// Defined an interface of stringBuilder that compatible with
// strings.Builder(go 1.10) and bytes.Buffer(< go 1.10)
type stringBuilder interface {
WriteRune(r rune) (n int, err error)
WriteString(s string) (int, error)
Reset()
Grow(n int)
String() string
}
var builderPool = sync.Pool{New: func() interface{} {
return newStringBuilder()
}}
// The XPath function list. // The XPath function list.
func predicate(q query) func(NodeNavigator) bool { func predicate(q query) func(NodeNavigator) bool {
@ -25,7 +40,7 @@ func predicate(q query) func(NodeNavigator) bool {
func positionFunc(q query, t iterator) interface{} { func positionFunc(q query, t iterator) interface{} {
var ( var (
count = 1 count = 1
node = t.Current() node = t.Current().Copy()
) )
test := predicate(q) test := predicate(q)
for node.MoveToPrevious() { for node.MoveToPrevious() {
@ -40,7 +55,7 @@ func positionFunc(q query, t iterator) interface{} {
func lastFunc(q query, t iterator) interface{} { func lastFunc(q query, t iterator) interface{} {
var ( var (
count = 0 count = 0
node = t.Current() node = t.Current().Copy()
) )
node.MoveToFirst() node.MoveToFirst()
test := predicate(q) test := predicate(q)
@ -58,6 +73,7 @@ func lastFunc(q query, t iterator) interface{} {
// countFunc is a XPath Node Set functions count(node-set). // countFunc is a XPath Node Set functions count(node-set).
func countFunc(q query, t iterator) interface{} { func countFunc(q query, t iterator) interface{} {
var count = 0 var count = 0
q = functionArgs(q)
test := predicate(q) test := predicate(q)
switch typ := q.Evaluate(t).(type) { switch typ := q.Evaluate(t).(type) {
case query: case query:
@ -73,7 +89,7 @@ func countFunc(q query, t iterator) interface{} {
// sumFunc is a XPath Node Set functions sum(node-set). // sumFunc is a XPath Node Set functions sum(node-set).
func sumFunc(q query, t iterator) interface{} { func sumFunc(q query, t iterator) interface{} {
var sum float64 var sum float64
switch typ := q.Evaluate(t).(type) { switch typ := functionArgs(q).Evaluate(t).(type) {
case query: case query:
for node := typ.Select(t); node != nil; node = typ.Select(t) { for node := typ.Select(t); node != nil; node = typ.Select(t) {
if v, err := strconv.ParseFloat(node.Value(), 64); err == nil { if v, err := strconv.ParseFloat(node.Value(), 64); err == nil {
@ -116,52 +132,82 @@ func asNumber(t iterator, o interface{}) float64 {
// ceilingFunc is a XPath Node Set functions ceiling(node-set). // ceilingFunc is a XPath Node Set functions ceiling(node-set).
func ceilingFunc(q query, t iterator) interface{} { func ceilingFunc(q query, t iterator) interface{} {
val := asNumber(t, q.Evaluate(t)) val := asNumber(t, functionArgs(q).Evaluate(t))
return math.Ceil(val) return math.Ceil(val)
} }
// floorFunc is a XPath Node Set functions floor(node-set). // floorFunc is a XPath Node Set functions floor(node-set).
func floorFunc(q query, t iterator) interface{} { func floorFunc(q query, t iterator) interface{} {
val := asNumber(t, q.Evaluate(t)) val := asNumber(t, functionArgs(q).Evaluate(t))
return math.Floor(val) return math.Floor(val)
} }
// roundFunc is a XPath Node Set functions round(node-set). // roundFunc is a XPath Node Set functions round(node-set).
func roundFunc(q query, t iterator) interface{} { func roundFunc(q query, t iterator) interface{} {
val := asNumber(t, q.Evaluate(t)) val := asNumber(t, functionArgs(q).Evaluate(t))
//return math.Round(val) //return math.Round(val)
return round(val) return round(val)
} }
// nameFunc is a XPath functions name([node-set]). // nameFunc is a XPath functions name([node-set]).
func nameFunc(q query, t iterator) interface{} { func nameFunc(arg query) func(query, iterator) interface{} {
v := q.Select(t) return func(q query, t iterator) interface{} {
var v NodeNavigator
if arg == nil {
v = t.Current()
} else {
v = arg.Clone().Select(t)
if v == nil { if v == nil {
return "" return ""
} }
}
ns := v.Prefix() ns := v.Prefix()
if ns == "" { if ns == "" {
return v.LocalName() return v.LocalName()
} }
return ns + ":" + v.LocalName() return ns + ":" + v.LocalName()
}
} }
// localNameFunc is a XPath functions local-name([node-set]). // localNameFunc is a XPath functions local-name([node-set]).
func localNameFunc(q query, t iterator) interface{} { func localNameFunc(arg query) func(query, iterator) interface{} {
v := q.Select(t) return func(q query, t iterator) interface{} {
var v NodeNavigator
if arg == nil {
v = t.Current()
} else {
v = arg.Clone().Select(t)
if v == nil { if v == nil {
return "" return ""
} }
}
return v.LocalName() return v.LocalName()
}
} }
// namespaceFunc is a XPath functions namespace-uri([node-set]). // namespaceFunc is a XPath functions namespace-uri([node-set]).
func namespaceFunc(q query, t iterator) interface{} { func namespaceFunc(arg query) func(query, iterator) interface{} {
v := q.Select(t) return func(q query, t iterator) interface{} {
var v NodeNavigator
if arg == nil {
v = t.Current()
} else {
// Get the first node in the node-set if specified.
v = arg.Clone().Select(t)
if v == nil { if v == nil {
return "" return ""
} }
}
// fix about namespace-uri() bug: https://github.com/antchfx/xmlquery/issues/22
// TODO: In the next version, add NamespaceURL() to the NodeNavigator interface.
type namespaceURL interface {
NamespaceURL() string
}
if f, ok := v.(namespaceURL); ok {
return f.NamespaceURL()
}
return v.Prefix() return v.Prefix()
}
} }
func asBool(t iterator, v interface{}) bool { func asBool(t iterator, v interface{}) bool {
@ -171,7 +217,7 @@ func asBool(t iterator, v interface{}) bool {
case *NodeIterator: case *NodeIterator:
return v.MoveNext() return v.MoveNext()
case bool: case bool:
return bool(v) return v
case float64: case float64:
return v != 0 return v != 0
case string: case string:
@ -209,19 +255,19 @@ func asString(t iterator, v interface{}) string {
// booleanFunc is a XPath functions boolean([node-set]). // booleanFunc is a XPath functions boolean([node-set]).
func booleanFunc(q query, t iterator) interface{} { func booleanFunc(q query, t iterator) interface{} {
v := q.Evaluate(t) v := functionArgs(q).Evaluate(t)
return asBool(t, v) return asBool(t, v)
} }
// numberFunc is a XPath functions number([node-set]). // numberFunc is a XPath functions number([node-set]).
func numberFunc(q query, t iterator) interface{} { func numberFunc(q query, t iterator) interface{} {
v := q.Evaluate(t) v := functionArgs(q).Evaluate(t)
return asNumber(t, v) return asNumber(t, v)
} }
// stringFunc is a XPath functions string([node-set]). // stringFunc is a XPath functions string([node-set]).
func stringFunc(q query, t iterator) interface{} { func stringFunc(q query, t iterator) interface{} {
v := q.Evaluate(t) v := functionArgs(q).Evaluate(t)
return asString(t, v) return asString(t, v)
} }
@ -232,7 +278,7 @@ func startwithFunc(arg1, arg2 query) func(query, iterator) interface{} {
m, n string m, n string
ok bool ok bool
) )
switch typ := arg1.Evaluate(t).(type) { switch typ := functionArgs(arg1).Evaluate(t).(type) {
case string: case string:
m = typ m = typ
case query: case query:
@ -244,7 +290,7 @@ func startwithFunc(arg1, arg2 query) func(query, iterator) interface{} {
default: default:
panic(errors.New("starts-with() function argument type must be string")) panic(errors.New("starts-with() function argument type must be string"))
} }
n, ok = arg2.Evaluate(t).(string) n, ok = functionArgs(arg2).Evaluate(t).(string)
if !ok { if !ok {
panic(errors.New("starts-with() function argument type must be string")) panic(errors.New("starts-with() function argument type must be string"))
} }
@ -259,7 +305,7 @@ func endwithFunc(arg1, arg2 query) func(query, iterator) interface{} {
m, n string m, n string
ok bool ok bool
) )
switch typ := arg1.Evaluate(t).(type) { switch typ := functionArgs(arg1).Evaluate(t).(type) {
case string: case string:
m = typ m = typ
case query: case query:
@ -271,7 +317,7 @@ func endwithFunc(arg1, arg2 query) func(query, iterator) interface{} {
default: default:
panic(errors.New("ends-with() function argument type must be string")) panic(errors.New("ends-with() function argument type must be string"))
} }
n, ok = arg2.Evaluate(t).(string) n, ok = functionArgs(arg2).Evaluate(t).(string)
if !ok { if !ok {
panic(errors.New("ends-with() function argument type must be string")) panic(errors.New("ends-with() function argument type must be string"))
} }
@ -286,8 +332,7 @@ func containsFunc(arg1, arg2 query) func(query, iterator) interface{} {
m, n string m, n string
ok bool ok bool
) )
switch typ := functionArgs(arg1).Evaluate(t).(type) {
switch typ := arg1.Evaluate(t).(type) {
case string: case string:
m = typ m = typ
case query: case query:
@ -300,7 +345,7 @@ func containsFunc(arg1, arg2 query) func(query, iterator) interface{} {
panic(errors.New("contains() function argument type must be string")) panic(errors.New("contains() function argument type must be string"))
} }
n, ok = arg2.Evaluate(t).(string) n, ok = functionArgs(arg2).Evaluate(t).(string)
if !ok { if !ok {
panic(errors.New("contains() function argument type must be string")) panic(errors.New("contains() function argument type must be string"))
} }
@ -309,15 +354,39 @@ func containsFunc(arg1, arg2 query) func(query, iterator) interface{} {
} }
} }
var ( // matchesFunc is an XPath function that tests a given string against a regexp pattern.
regnewline = regexp.MustCompile(`[\r\n\t]`) // Note: does not support https://www.w3.org/TR/xpath-functions-31/#func-matches 3rd optional `flags` argument; if
regseqspace = regexp.MustCompile(`\s{2,}`) // needed, directly put flags in the regexp pattern, such as `(?i)^pattern$` for `i` flag.
) func matchesFunc(arg1, arg2 query) func(query, iterator) interface{} {
return func(q query, t iterator) interface{} {
var s string
switch typ := functionArgs(arg1).Evaluate(t).(type) {
case string:
s = typ
case query:
node := typ.Select(t)
if node == nil {
return ""
}
s = node.Value()
}
var pattern string
var ok bool
if pattern, ok = functionArgs(arg2).Evaluate(t).(string); !ok {
panic(errors.New("matches() function second argument type must be string"))
}
re, err := getRegexp(pattern)
if err != nil {
panic(fmt.Errorf("matches() function second argument is not a valid regexp pattern, err: %s", err.Error()))
}
return re.MatchString(s)
}
}
// normalizespaceFunc is XPath functions normalize-space(string?) // normalizespaceFunc is XPath functions normalize-space(string?)
func normalizespaceFunc(q query, t iterator) interface{} { func normalizespaceFunc(q query, t iterator) interface{} {
var m string var m string
switch typ := q.Evaluate(t).(type) { switch typ := functionArgs(q).Evaluate(t).(type) {
case string: case string:
m = typ m = typ
case query: case query:
@ -327,17 +396,33 @@ func normalizespaceFunc(q query, t iterator) interface{} {
} }
m = node.Value() m = node.Value()
} }
m = strings.TrimSpace(m) var b = builderPool.Get().(stringBuilder)
m = regnewline.ReplaceAllString(m, " ") b.Grow(len(m))
m = regseqspace.ReplaceAllString(m, " ")
return m runeStr := []rune(strings.TrimSpace(m))
l := len(runeStr)
for i := range runeStr {
r := runeStr[i]
isSpace := unicode.IsSpace(r)
if !(isSpace && (i+1 < l && unicode.IsSpace(runeStr[i+1]))) {
if isSpace {
r = ' '
}
b.WriteRune(r)
}
}
result := b.String()
b.Reset()
builderPool.Put(b)
return result
} }
// substringFunc is XPath functions substring function returns a part of a given string. // substringFunc is XPath functions substring function returns a part of a given string.
func substringFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} { func substringFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} {
return func(q query, t iterator) interface{} { return func(q query, t iterator) interface{} {
var m string var m string
switch typ := arg1.Evaluate(t).(type) { switch typ := functionArgs(arg1).Evaluate(t).(type) {
case string: case string:
m = typ m = typ
case query: case query:
@ -351,14 +436,14 @@ func substringFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} {
var start, length float64 var start, length float64
var ok bool var ok bool
if start, ok = arg2.Evaluate(t).(float64); !ok { if start, ok = functionArgs(arg2).Evaluate(t).(float64); !ok {
panic(errors.New("substring() function first argument type must be int")) panic(errors.New("substring() function first argument type must be int"))
} else if start < 1 { } else if start < 1 {
panic(errors.New("substring() function first argument type must be >= 1")) panic(errors.New("substring() function first argument type must be >= 1"))
} }
start-- start--
if arg3 != nil { if arg3 != nil {
if length, ok = arg3.Evaluate(t).(float64); !ok { if length, ok = functionArgs(arg3).Evaluate(t).(float64); !ok {
panic(errors.New("substring() function second argument type must be int")) panic(errors.New("substring() function second argument type must be int"))
} }
} }
@ -376,7 +461,7 @@ func substringFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} {
func substringIndFunc(arg1, arg2 query, after bool) func(query, iterator) interface{} { func substringIndFunc(arg1, arg2 query, after bool) func(query, iterator) interface{} {
return func(q query, t iterator) interface{} { return func(q query, t iterator) interface{} {
var str string var str string
switch v := arg1.Evaluate(t).(type) { switch v := functionArgs(arg1).Evaluate(t).(type) {
case string: case string:
str = v str = v
case query: case query:
@ -387,7 +472,7 @@ func substringIndFunc(arg1, arg2 query, after bool) func(query, iterator) interf
str = node.Value() str = node.Value()
} }
var word string var word string
switch v := arg2.Evaluate(t).(type) { switch v := functionArgs(arg2).Evaluate(t).(type) {
case string: case string:
word = v word = v
case query: case query:
@ -416,7 +501,7 @@ func substringIndFunc(arg1, arg2 query, after bool) func(query, iterator) interf
// equal to the number of characters in a given string. // equal to the number of characters in a given string.
func stringLengthFunc(arg1 query) func(query, iterator) interface{} { func stringLengthFunc(arg1 query) func(query, iterator) interface{} {
return func(q query, t iterator) interface{} { return func(q query, t iterator) interface{} {
switch v := arg1.Evaluate(t).(type) { switch v := functionArgs(arg1).Evaluate(t).(type) {
case string: case string:
return float64(len(v)) return float64(len(v))
case query: case query:
@ -433,11 +518,11 @@ func stringLengthFunc(arg1 query) func(query, iterator) interface{} {
// translateFunc is XPath functions translate() function returns a replaced string. // translateFunc is XPath functions translate() function returns a replaced string.
func translateFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} { func translateFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} {
return func(q query, t iterator) interface{} { return func(q query, t iterator) interface{} {
str := asString(t, arg1.Evaluate(t)) str := asString(t, functionArgs(arg1).Evaluate(t))
src := asString(t, arg2.Evaluate(t)) src := asString(t, functionArgs(arg2).Evaluate(t))
dst := asString(t, arg3.Evaluate(t)) dst := asString(t, functionArgs(arg3).Evaluate(t))
var replace []string replace := make([]string, 0, len(src))
for i, s := range src { for i, s := range src {
d := "" d := ""
if i < len(dst) { if i < len(dst) {
@ -449,9 +534,20 @@ func translateFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} {
} }
} }
// replaceFunc is XPath functions replace() function returns a replaced string.
func replaceFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} {
return func(q query, t iterator) interface{} {
str := asString(t, functionArgs(arg1).Evaluate(t))
src := asString(t, functionArgs(arg2).Evaluate(t))
dst := asString(t, functionArgs(arg3).Evaluate(t))
return strings.Replace(str, src, dst, -1)
}
}
// notFunc is XPATH functions not(expression) function operation. // notFunc is XPATH functions not(expression) function operation.
func notFunc(q query, t iterator) interface{} { func notFunc(q query, t iterator) interface{} {
switch v := q.Evaluate(t).(type) { switch v := functionArgs(q).Evaluate(t).(type) {
case bool: case bool:
return !v return !v
case query: case query:
@ -467,18 +563,52 @@ func notFunc(q query, t iterator) interface{} {
// concat( string1 , string2 [, stringn]* ) // concat( string1 , string2 [, stringn]* )
func concatFunc(args ...query) func(query, iterator) interface{} { func concatFunc(args ...query) func(query, iterator) interface{} {
return func(q query, t iterator) interface{} { return func(q query, t iterator) interface{} {
var a []string b := builderPool.Get().(stringBuilder)
for _, v := range args { for _, v := range args {
v = functionArgs(v)
switch v := v.Evaluate(t).(type) { switch v := v.Evaluate(t).(type) {
case string: case string:
a = append(a, v) b.WriteString(v)
case query: case query:
node := v.Select(t) node := v.Select(t)
if node != nil { if node != nil {
a = append(a, node.Value()) b.WriteString(node.Value())
} }
} }
} }
return strings.Join(a, "") result := b.String()
b.Reset()
builderPool.Put(b)
return result
}
}
// https://github.com/antchfx/xpath/issues/43
func functionArgs(q query) query {
if _, ok := q.(*functionQuery); ok {
return q
}
return q.Clone()
}
func reverseFunc(q query, t iterator) func() NodeNavigator {
var list []NodeNavigator
for {
node := q.Select(t)
if node == nil {
break
}
list = append(list, node.Copy())
}
i := len(list)
return func() NodeNavigator {
if i <= 0 {
return nil
}
i--
node := list[i]
return node
} }
} }

View File

@ -2,8 +2,15 @@
package xpath package xpath
import "math" import (
"math"
"strings"
)
func round(f float64) int { func round(f float64) int {
return int(math.Round(f)) return int(math.Round(f))
} }
func newStringBuilder() stringBuilder{
return &strings.Builder{}
}

View File

@ -2,7 +2,10 @@
package xpath package xpath
import "math" import (
"bytes"
"math"
)
// math.Round() is supported by Go 1.10+, // math.Round() is supported by Go 1.10+,
// This method just compatible for version <1.10. // This method just compatible for version <1.10.
@ -13,3 +16,7 @@ func round(f float64) int {
} }
return int(f + math.Copysign(0.5, f)) return int(f + math.Copysign(0.5, f))
} }
func newStringBuilder() stringBuilder {
return &bytes.Buffer{}
}

3
vendor/github.com/antchfx/xpath/go.mod generated vendored Normal file
View File

@ -0,0 +1,3 @@
module github.com/antchfx/xpath
go 1.14

View File

@ -163,7 +163,17 @@ func cmpNodeSetString(t iterator, op string, m, n interface{}) bool {
} }
func cmpNodeSetNodeSet(t iterator, op string, m, n interface{}) bool { func cmpNodeSetNodeSet(t iterator, op string, m, n interface{}) bool {
a := m.(query)
b := n.(query)
x := a.Select(t)
if x == nil {
return false return false
}
y := b.Select(t)
if y == nil {
return false
}
return cmpStringStringF(op, x.Value(), y.Value())
} }
func cmpStringNumeric(t iterator, op string, m, n interface{}) bool { func cmpStringNumeric(t iterator, op string, m, n interface{}) bool {

View File

@ -22,6 +22,17 @@ type query interface {
Clone() query Clone() query
} }
// nopQuery is an empty query that always return nil for any query.
type nopQuery struct {
query
}
func (nopQuery) Select(iterator) NodeNavigator { return nil }
func (nopQuery) Evaluate(iterator) interface{} { return nil }
func (nopQuery) Clone() query { return nopQuery{} }
// contextQuery is returns current node on the iterator object query. // contextQuery is returns current node on the iterator object query.
type contextQuery struct { type contextQuery struct {
count int count int
@ -65,6 +76,7 @@ func (a *ancestorQuery) Select(t iterator) NodeNavigator {
return nil return nil
} }
first := true first := true
node = node.Copy()
a.iterator = func() NodeNavigator { a.iterator = func() NodeNavigator {
if first && a.Self { if first && a.Self {
first = false first = false
@ -216,6 +228,7 @@ func (c *childQuery) position() int {
type descendantQuery struct { type descendantQuery struct {
iterator func() NodeNavigator iterator func() NodeNavigator
posit int posit int
level int
Self bool Self bool
Input query Input query
@ -231,32 +244,38 @@ func (d *descendantQuery) Select(t iterator) NodeNavigator {
return nil return nil
} }
node = node.Copy() node = node.Copy()
level := 0 d.level = 0
positmap := make(map[int]int)
first := true first := true
d.iterator = func() NodeNavigator { d.iterator = func() NodeNavigator {
if first && d.Self { if first && d.Self {
first = false first = false
if d.Predicate(node) { if d.Predicate(node) {
d.posit = 1
positmap[d.level] = 1
return node return node
} }
} }
for { for {
if node.MoveToChild() { if node.MoveToChild() {
level++ d.level = d.level + 1
positmap[d.level] = 0
} else { } else {
for { for {
if level == 0 { if d.level == 0 {
return nil return nil
} }
if node.MoveToNext() { if node.MoveToNext() {
break break
} }
node.MoveToParent() node.MoveToParent()
level-- d.level = d.level - 1
} }
} }
if d.Predicate(node) { if d.Predicate(node) {
positmap[d.level]++
d.posit = positmap[d.level]
return node return node
} }
} }
@ -264,7 +283,6 @@ func (d *descendantQuery) Select(t iterator) NodeNavigator {
} }
if node := d.iterator(); node != nil { if node := d.iterator(); node != nil {
d.posit++
return node return node
} }
d.iterator = nil d.iterator = nil
@ -286,12 +304,17 @@ func (d *descendantQuery) position() int {
return d.posit return d.posit
} }
func (d *descendantQuery) depth() int {
return d.level
}
func (d *descendantQuery) Clone() query { func (d *descendantQuery) Clone() query {
return &descendantQuery{Self: d.Self, Input: d.Input.Clone(), Predicate: d.Predicate} return &descendantQuery{Self: d.Self, Input: d.Input.Clone(), Predicate: d.Predicate}
} }
// followingQuery is an XPath following node query.(following::*|following-sibling::*) // followingQuery is an XPath following node query.(following::*|following-sibling::*)
type followingQuery struct { type followingQuery struct {
posit int
iterator func() NodeNavigator iterator func() NodeNavigator
Input query Input query
@ -302,6 +325,7 @@ type followingQuery struct {
func (f *followingQuery) Select(t iterator) NodeNavigator { func (f *followingQuery) Select(t iterator) NodeNavigator {
for { for {
if f.iterator == nil { if f.iterator == nil {
f.posit = 0
node := f.Input.Select(t) node := f.Input.Select(t)
if node == nil { if node == nil {
return nil return nil
@ -314,12 +338,13 @@ func (f *followingQuery) Select(t iterator) NodeNavigator {
return nil return nil
} }
if f.Predicate(node) { if f.Predicate(node) {
f.posit++
return node return node
} }
} }
} }
} else { } else {
var q query // descendant query var q *descendantQuery // descendant query
f.iterator = func() NodeNavigator { f.iterator = func() NodeNavigator {
for { for {
if q == nil { if q == nil {
@ -336,6 +361,7 @@ func (f *followingQuery) Select(t iterator) NodeNavigator {
t.Current().MoveTo(node) t.Current().MoveTo(node)
} }
if node := q.Select(t); node != nil { if node := q.Select(t); node != nil {
f.posit = q.posit
return node return node
} }
q = nil q = nil
@ -364,9 +390,14 @@ func (f *followingQuery) Clone() query {
return &followingQuery{Input: f.Input.Clone(), Sibling: f.Sibling, Predicate: f.Predicate} return &followingQuery{Input: f.Input.Clone(), Sibling: f.Sibling, Predicate: f.Predicate}
} }
func (f *followingQuery) position() int {
return f.posit
}
// precedingQuery is an XPath preceding node query.(preceding::*) // precedingQuery is an XPath preceding node query.(preceding::*)
type precedingQuery struct { type precedingQuery struct {
iterator func() NodeNavigator iterator func() NodeNavigator
posit int
Input query Input query
Sibling bool // The matching sibling node of current node. Sibling bool // The matching sibling node of current node.
Predicate func(NodeNavigator) bool Predicate func(NodeNavigator) bool
@ -375,6 +406,7 @@ type precedingQuery struct {
func (p *precedingQuery) Select(t iterator) NodeNavigator { func (p *precedingQuery) Select(t iterator) NodeNavigator {
for { for {
if p.iterator == nil { if p.iterator == nil {
p.posit = 0
node := p.Input.Select(t) node := p.Input.Select(t)
if node == nil { if node == nil {
return nil return nil
@ -387,6 +419,7 @@ func (p *precedingQuery) Select(t iterator) NodeNavigator {
return nil return nil
} }
if p.Predicate(node) { if p.Predicate(node) {
p.posit++
return node return node
} }
} }
@ -400,6 +433,7 @@ func (p *precedingQuery) Select(t iterator) NodeNavigator {
if !node.MoveToParent() { if !node.MoveToParent() {
return nil return nil
} }
p.posit = 0
} }
q = &descendantQuery{ q = &descendantQuery{
Self: true, Self: true,
@ -409,6 +443,7 @@ func (p *precedingQuery) Select(t iterator) NodeNavigator {
t.Current().MoveTo(node) t.Current().MoveTo(node)
} }
if node := q.Select(t); node != nil { if node := q.Select(t); node != nil {
p.posit++
return node return node
} }
q = nil q = nil
@ -436,6 +471,10 @@ func (p *precedingQuery) Clone() query {
return &precedingQuery{Input: p.Input.Clone(), Sibling: p.Sibling, Predicate: p.Predicate} return &precedingQuery{Input: p.Input.Clone(), Sibling: p.Sibling, Predicate: p.Predicate}
} }
func (p *precedingQuery) position() int {
return p.posit
}
// parentQuery is an XPath parent node query.(parent::*) // parentQuery is an XPath parent node query.(parent::*)
type parentQuery struct { type parentQuery struct {
Input query Input query
@ -504,6 +543,8 @@ func (s *selfQuery) Clone() query {
type filterQuery struct { type filterQuery struct {
Input query Input query
Predicate query Predicate query
posit int
positmap map[int]int
} }
func (f *filterQuery) do(t iterator) bool { func (f *filterQuery) do(t iterator) bool {
@ -514,8 +555,8 @@ func (f *filterQuery) do(t iterator) bool {
case reflect.String: case reflect.String:
return len(val.String()) > 0 return len(val.String()) > 0
case reflect.Float64: case reflect.Float64:
pt := float64(getNodePosition(f.Input)) pt := getNodePosition(f.Input)
return int(val.Float()) == int(pt) return int(val.Float()) == pt
default: default:
if q, ok := f.Predicate.(query); ok { if q, ok := f.Predicate.(query); ok {
return q.Select(t) != nil return q.Select(t) != nil
@ -524,17 +565,29 @@ func (f *filterQuery) do(t iterator) bool {
return false return false
} }
func (f *filterQuery) position() int {
return f.posit
}
func (f *filterQuery) Select(t iterator) NodeNavigator { func (f *filterQuery) Select(t iterator) NodeNavigator {
if f.positmap == nil {
f.positmap = make(map[int]int)
}
for { for {
node := f.Input.Select(t) node := f.Input.Select(t)
if node == nil { if node == nil {
return node return node
} }
node = node.Copy() node = node.Copy()
//fmt.Println(node.LocalName())
t.Current().MoveTo(node) t.Current().MoveTo(node)
if f.do(t) { if f.do(t) {
// fix https://github.com/antchfx/htmlquery/issues/26
// Calculate and keep the each of matching node's position in the same depth.
level := getNodeDepth(f.Input)
f.positmap[level]++
f.posit = f.positmap[level]
return node return node
} }
} }
@ -549,8 +602,9 @@ func (f *filterQuery) Clone() query {
return &filterQuery{Input: f.Input.Clone(), Predicate: f.Predicate.Clone()} return &filterQuery{Input: f.Input.Clone(), Predicate: f.Predicate.Clone()}
} }
// functionQuery is an XPath function that call a function to returns // functionQuery is an XPath function that returns a computed value for
// value of current NodeNavigator node. // the Evaluate call of the current NodeNavigator node. Select call isn't
// applicable for functionQuery.
type functionQuery struct { type functionQuery struct {
Input query // Node Set Input query // Node Set
Func func(query, iterator) interface{} // The xpath function. Func func(query, iterator) interface{} // The xpath function.
@ -570,6 +624,34 @@ func (f *functionQuery) Clone() query {
return &functionQuery{Input: f.Input.Clone(), Func: f.Func} return &functionQuery{Input: f.Input.Clone(), Func: f.Func}
} }
// transformFunctionQuery diffs from functionQuery where the latter computes a scalar
// value (number,string,boolean) for the current NodeNavigator node while the former
// (transformFunctionQuery) performs a mapping or transform of the current NodeNavigator
// and returns a new NodeNavigator. It is used for non-scalar XPath functions such as
// reverse(), remove(), subsequence(), unordered(), etc.
type transformFunctionQuery struct {
Input query
Func func(query, iterator) func() NodeNavigator
iterator func() NodeNavigator
}
func (f *transformFunctionQuery) Select(t iterator) NodeNavigator {
if f.iterator == nil {
f.iterator = f.Func(f.Input, t)
}
return f.iterator()
}
func (f *transformFunctionQuery) Evaluate(t iterator) interface{} {
f.Input.Evaluate(t)
f.iterator = nil
return f
}
func (f *transformFunctionQuery) Clone() query {
return &transformFunctionQuery{Input: f.Input.Clone(), Func: f.Func}
}
// constantQuery is an XPath constant operand. // constantQuery is an XPath constant operand.
type constantQuery struct { type constantQuery struct {
Val interface{} Val interface{}
@ -731,7 +813,8 @@ type unionQuery struct {
func (u *unionQuery) Select(t iterator) NodeNavigator { func (u *unionQuery) Select(t iterator) NodeNavigator {
if u.iterator == nil { if u.iterator == nil {
var m = make(map[uint64]NodeNavigator) var list []NodeNavigator
var m = make(map[uint64]bool)
root := t.Current().Copy() root := t.Current().Copy()
for { for {
node := u.Left.Select(t) node := u.Left.Select(t)
@ -740,7 +823,8 @@ func (u *unionQuery) Select(t iterator) NodeNavigator {
} }
code := getHashCode(node.Copy()) code := getHashCode(node.Copy())
if _, ok := m[code]; !ok { if _, ok := m[code]; !ok {
m[code] = node.Copy() m[code] = true
list = append(list, node.Copy())
} }
} }
t.Current().MoveTo(root) t.Current().MoveTo(root)
@ -751,16 +835,11 @@ func (u *unionQuery) Select(t iterator) NodeNavigator {
} }
code := getHashCode(node.Copy()) code := getHashCode(node.Copy())
if _, ok := m[code]; !ok { if _, ok := m[code]; !ok {
m[code] = node.Copy() m[code] = true
list = append(list, node.Copy())
} }
} }
list := make([]NodeNavigator, len(m))
var i int var i int
for _, v := range m {
list[i] = v
i++
}
i = 0
u.iterator = func() NodeNavigator { u.iterator = func() NodeNavigator {
if i >= len(list) { if i >= len(list) {
return nil return nil
@ -789,8 +868,18 @@ func getHashCode(n NodeNavigator) uint64 {
switch n.NodeType() { switch n.NodeType() {
case AttributeNode, TextNode, CommentNode: case AttributeNode, TextNode, CommentNode:
sb.WriteString(fmt.Sprintf("%s=%s", n.LocalName(), n.Value())) sb.WriteString(fmt.Sprintf("%s=%s", n.LocalName(), n.Value()))
if n.MoveToParent() { // https://github.com/antchfx/htmlquery/issues/25
sb.WriteString(n.LocalName()) d := 1
for n.MoveToPrevious() {
d++
}
sb.WriteString(fmt.Sprintf("-%d", d))
for n.MoveToParent() {
d = 1
for n.MoveToPrevious() {
d++
}
sb.WriteString(fmt.Sprintf("-%d", d))
} }
case ElementNode: case ElementNode:
sb.WriteString(n.Prefix() + n.LocalName()) sb.WriteString(n.Prefix() + n.LocalName())
@ -822,3 +911,13 @@ func getNodePosition(q query) int {
} }
return 1 return 1
} }
func getNodeDepth(q query) int {
type Depth interface {
depth() int
}
if count, ok := q.(Depth); ok {
return count.depth()
}
return 0
}

View File

@ -2,6 +2,7 @@ package xpath
import ( import (
"errors" "errors"
"fmt"
) )
// NodeType represents a type of XPath node. // NodeType represents a type of XPath node.
@ -144,6 +145,9 @@ func Compile(expr string) (*Expr, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
if qy == nil {
return nil, fmt.Errorf(fmt.Sprintf("undeclared variable in XPath expression: %s", expr))
}
return &Expr{s: expr, q: qy}, nil return &Expr{s: expr, q: qy}, nil
} }
@ -151,7 +155,7 @@ func Compile(expr string) (*Expr, error) {
func MustCompile(expr string) *Expr { func MustCompile(expr string) *Expr {
exp, err := Compile(expr) exp, err := Compile(expr)
if err != nil { if err != nil {
return nil return &Expr{s: expr, q: nopQuery{}}
} }
return exp return exp
} }

191
vendor/github.com/golang/groupcache/LICENSE generated vendored Normal file
View File

@ -0,0 +1,191 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction, and
distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by the copyright
owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all other entities
that control, are controlled by, or are under common control with that entity.
For the purposes of this definition, "control" means (i) the power, direct or
indirect, to cause the direction or management of such entity, whether by
contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity exercising
permissions granted by this License.
"Source" form shall mean the preferred form for making modifications, including
but not limited to software source code, documentation source, and configuration
files.
"Object" form shall mean any form resulting from mechanical transformation or
translation of a Source form, including but not limited to compiled object code,
generated documentation, and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or Object form, made
available under the License, as indicated by a copyright notice that is included
in or attached to the work (an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object form, that
is based on (or derived from) the Work and for which the editorial revisions,
annotations, elaborations, or other modifications represent, as a whole, an
original work of authorship. For the purposes of this License, Derivative Works
shall not include works that remain separable from, or merely link (or bind by
name) to the interfaces of, the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including the original version
of the Work and any modifications or additions to that Work or Derivative Works
thereof, that is intentionally submitted to Licensor for inclusion in the Work
by the copyright owner or by an individual or Legal Entity authorized to submit
on behalf of the copyright owner. For the purposes of this definition,
"submitted" means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems, and
issue tracking systems that are managed by, or on behalf of, the Licensor for
the purpose of discussing and improving the Work, but excluding communication
that is conspicuously marked or otherwise designated in writing by the copyright
owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity on behalf
of whom a Contribution has been received by Licensor and subsequently
incorporated within the Work.
2. Grant of Copyright License.
Subject to the terms and conditions of this License, each Contributor hereby
grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
irrevocable copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the Work and such
Derivative Works in Source or Object form.
3. Grant of Patent License.
Subject to the terms and conditions of this License, each Contributor hereby
grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
irrevocable (except as stated in this section) patent license to make, have
made, use, offer to sell, sell, import, and otherwise transfer the Work, where
such license applies only to those patent claims licensable by such Contributor
that are necessarily infringed by their Contribution(s) alone or by combination
of their Contribution(s) with the Work to which such Contribution(s) was
submitted. If You institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work or a
Contribution incorporated within the Work constitutes direct or contributory
patent infringement, then any patent licenses granted to You under this License
for that Work shall terminate as of the date such litigation is filed.
4. Redistribution.
You may reproduce and distribute copies of the Work or Derivative Works thereof
in any medium, with or without modifications, and in Source or Object form,
provided that You meet the following conditions:
You must give any other recipients of the Work or Derivative Works a copy of
this License; and
You must cause any modified files to carry prominent notices stating that You
changed the files; and
You must retain, in the Source form of any Derivative Works that You distribute,
all copyright, patent, trademark, and attribution notices from the Source form
of the Work, excluding those notices that do not pertain to any part of the
Derivative Works; and
If the Work includes a "NOTICE" text file as part of its distribution, then any
Derivative Works that You distribute must include a readable copy of the
attribution notices contained within such NOTICE file, excluding those notices
that do not pertain to any part of the Derivative Works, in at least one of the
following places: within a NOTICE text file distributed as part of the
Derivative Works; within the Source form or documentation, if provided along
with the Derivative Works; or, within a display generated by the Derivative
Works, if and wherever such third-party notices normally appear. The contents of
the NOTICE file are for informational purposes only and do not modify the
License. You may add Your own attribution notices within Derivative Works that
You distribute, alongside or as an addendum to the NOTICE text from the Work,
provided that such additional attribution notices cannot be construed as
modifying the License.
You may add Your own copyright statement to Your modifications and may provide
additional or different license terms and conditions for use, reproduction, or
distribution of Your modifications, or for any such Derivative Works as a whole,
provided Your use, reproduction, and distribution of the Work otherwise complies
with the conditions stated in this License.
5. Submission of Contributions.
Unless You explicitly state otherwise, any Contribution intentionally submitted
for inclusion in the Work by You to the Licensor shall be under the terms and
conditions of this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify the terms of
any separate license agreement you may have executed with Licensor regarding
such Contributions.
6. Trademarks.
This License does not grant permission to use the trade names, trademarks,
service marks, or product names of the Licensor, except as required for
reasonable and customary use in describing the origin of the Work and
reproducing the content of the NOTICE file.
7. Disclaimer of Warranty.
Unless required by applicable law or agreed to in writing, Licensor provides the
Work (and each Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
including, without limitation, any warranties or conditions of TITLE,
NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
solely responsible for determining the appropriateness of using or
redistributing the Work and assume any risks associated with Your exercise of
permissions under this License.
8. Limitation of Liability.
In no event and under no legal theory, whether in tort (including negligence),
contract, or otherwise, unless required by applicable law (such as deliberate
and grossly negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special, incidental,
or consequential damages of any character arising as a result of this License or
out of the use or inability to use the Work (including but not limited to
damages for loss of goodwill, work stoppage, computer failure or malfunction, or
any and all other commercial damages or losses), even if such Contributor has
been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability.
While redistributing the Work or Derivative Works thereof, You may choose to
offer, and charge a fee for, acceptance of support, warranty, indemnity, or
other liability obligations and/or rights consistent with this License. However,
in accepting such obligations, You may act only on Your own behalf and on Your
sole responsibility, not on behalf of any other Contributor, and only if You
agree to indemnify, defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason of your
accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work
To apply the Apache License to your work, attach the following boilerplate
notice, with the fields enclosed by brackets "[]" replaced with your own
identifying information. (Don't include the brackets!) The text should be
enclosed in the appropriate comment syntax for the file format. We also
recommend that a file or class name and description of purpose be included on
the same "printed page" as the copyright notice for easier identification within
third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

133
vendor/github.com/golang/groupcache/lru/lru.go generated vendored Normal file
View File

@ -0,0 +1,133 @@
/*
Copyright 2013 Google Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package lru implements an LRU cache.
package lru
import "container/list"
// Cache is an LRU cache. It is not safe for concurrent access.
type Cache struct {
// MaxEntries is the maximum number of cache entries before
// an item is evicted. Zero means no limit.
MaxEntries int
// OnEvicted optionally specifies a callback function to be
// executed when an entry is purged from the cache.
OnEvicted func(key Key, value interface{})
ll *list.List
cache map[interface{}]*list.Element
}
// A Key may be any value that is comparable. See http://golang.org/ref/spec#Comparison_operators
type Key interface{}
type entry struct {
key Key
value interface{}
}
// New creates a new Cache.
// If maxEntries is zero, the cache has no limit and it's assumed
// that eviction is done by the caller.
func New(maxEntries int) *Cache {
return &Cache{
MaxEntries: maxEntries,
ll: list.New(),
cache: make(map[interface{}]*list.Element),
}
}
// Add adds a value to the cache.
func (c *Cache) Add(key Key, value interface{}) {
if c.cache == nil {
c.cache = make(map[interface{}]*list.Element)
c.ll = list.New()
}
if ee, ok := c.cache[key]; ok {
c.ll.MoveToFront(ee)
ee.Value.(*entry).value = value
return
}
ele := c.ll.PushFront(&entry{key, value})
c.cache[key] = ele
if c.MaxEntries != 0 && c.ll.Len() > c.MaxEntries {
c.RemoveOldest()
}
}
// Get looks up a key's value from the cache.
func (c *Cache) Get(key Key) (value interface{}, ok bool) {
if c.cache == nil {
return
}
if ele, hit := c.cache[key]; hit {
c.ll.MoveToFront(ele)
return ele.Value.(*entry).value, true
}
return
}
// Remove removes the provided key from the cache.
func (c *Cache) Remove(key Key) {
if c.cache == nil {
return
}
if ele, hit := c.cache[key]; hit {
c.removeElement(ele)
}
}
// RemoveOldest removes the oldest item from the cache.
func (c *Cache) RemoveOldest() {
if c.cache == nil {
return
}
ele := c.ll.Back()
if ele != nil {
c.removeElement(ele)
}
}
func (c *Cache) removeElement(e *list.Element) {
c.ll.Remove(e)
kv := e.Value.(*entry)
delete(c.cache, kv.key)
if c.OnEvicted != nil {
c.OnEvicted(kv.key, kv.value)
}
}
// Len returns the number of items in the cache.
func (c *Cache) Len() int {
if c.cache == nil {
return 0
}
return c.ll.Len()
}
// Clear purges all stored items from the cache.
func (c *Cache) Clear() {
if c.OnEvicted != nil {
for _, e := range c.cache {
kv := e.Value.(*entry)
c.OnEvicted(kv.key, kv.value)
}
}
c.ll = nil
c.cache = nil
}

View File

@ -3,6 +3,7 @@ package client // import "github.com/influxdata/influxdb1-client/v2"
import ( import (
"bytes" "bytes"
"compress/gzip"
"crypto/tls" "crypto/tls"
"encoding/json" "encoding/json"
"errors" "errors"
@ -20,6 +21,13 @@ import (
"github.com/influxdata/influxdb1-client/models" "github.com/influxdata/influxdb1-client/models"
) )
type ContentEncoding string
const (
DefaultEncoding ContentEncoding = ""
GzipEncoding ContentEncoding = "gzip"
)
// HTTPConfig is the config data needed to create an HTTP Client. // HTTPConfig is the config data needed to create an HTTP Client.
type HTTPConfig struct { type HTTPConfig struct {
// Addr should be of the form "http://host:port" // Addr should be of the form "http://host:port"
@ -48,6 +56,9 @@ type HTTPConfig struct {
// Proxy configures the Proxy function on the HTTP client. // Proxy configures the Proxy function on the HTTP client.
Proxy func(req *http.Request) (*url.URL, error) Proxy func(req *http.Request) (*url.URL, error)
// WriteEncoding specifies the encoding of write request
WriteEncoding ContentEncoding
} }
// BatchPointsConfig is the config data needed to create an instance of the BatchPoints struct. // BatchPointsConfig is the config data needed to create an instance of the BatchPoints struct.
@ -102,6 +113,12 @@ func NewHTTPClient(conf HTTPConfig) (Client, error) {
return nil, errors.New(m) return nil, errors.New(m)
} }
switch conf.WriteEncoding {
case DefaultEncoding, GzipEncoding:
default:
return nil, fmt.Errorf("unsupported encoding %s", conf.WriteEncoding)
}
tr := &http.Transport{ tr := &http.Transport{
TLSClientConfig: &tls.Config{ TLSClientConfig: &tls.Config{
InsecureSkipVerify: conf.InsecureSkipVerify, InsecureSkipVerify: conf.InsecureSkipVerify,
@ -121,6 +138,7 @@ func NewHTTPClient(conf HTTPConfig) (Client, error) {
Transport: tr, Transport: tr,
}, },
transport: tr, transport: tr,
encoding: conf.WriteEncoding,
}, nil }, nil
} }
@ -186,6 +204,7 @@ type client struct {
useragent string useragent string
httpClient *http.Client httpClient *http.Client
transport *http.Transport transport *http.Transport
encoding ContentEncoding
} }
// BatchPoints is an interface into a batched grouping of points to write into // BatchPoints is an interface into a batched grouping of points to write into
@ -366,15 +385,29 @@ func NewPointFrom(pt models.Point) *Point {
func (c *client) Write(bp BatchPoints) error { func (c *client) Write(bp BatchPoints) error {
var b bytes.Buffer var b bytes.Buffer
var w io.Writer
if c.encoding == GzipEncoding {
w = gzip.NewWriter(&b)
} else {
w = &b
}
for _, p := range bp.Points() { for _, p := range bp.Points() {
if p == nil { if p == nil {
continue continue
} }
if _, err := b.WriteString(p.pt.PrecisionString(bp.Precision())); err != nil { if _, err := io.WriteString(w, p.pt.PrecisionString(bp.Precision())); err != nil {
return err return err
} }
if err := b.WriteByte('\n'); err != nil { if _, err := w.Write([]byte{'\n'}); err != nil {
return err
}
}
// gzip writer should be closed to flush data into underlying buffer
if c, ok := w.(io.Closer); ok {
if err := c.Close(); err != nil {
return err return err
} }
} }
@ -386,6 +419,9 @@ func (c *client) Write(bp BatchPoints) error {
if err != nil { if err != nil {
return err return err
} }
if c.encoding != DefaultEncoding {
req.Header.Set("Content-Encoding", string(c.encoding))
}
req.Header.Set("Content-Type", "") req.Header.Set("Content-Type", "")
req.Header.Set("User-Agent", c.useragent) req.Header.Set("User-Agent", c.useragent)
if c.username != "" { if c.username != "" {
@ -429,6 +465,9 @@ type Query struct {
Parameters map[string]interface{} Parameters map[string]interface{}
} }
// Params is a type alias to the query parameters.
type Params map[string]interface{}
// NewQuery returns a query object. // NewQuery returns a query object.
// The database and precision arguments can be empty strings if they are not needed for the query. // The database and precision arguments can be empty strings if they are not needed for the query.
func NewQuery(command, database, precision string) Query { func NewQuery(command, database, precision string) Query {
@ -493,6 +532,7 @@ type Message struct {
// Result represents a resultset returned from a single statement. // Result represents a resultset returned from a single statement.
type Result struct { type Result struct {
StatementId int `json:"statement_id"`
Series []models.Row Series []models.Row
Messages []*Message Messages []*Message
Err string `json:"error,omitempty"` Err string `json:"error,omitempty"`

View File

@ -0,0 +1,73 @@
package client
import (
"encoding/json"
"time"
)
type (
// Identifier is an identifier value.
Identifier string
// StringValue is a string literal.
StringValue string
// RegexValue is a regexp literal.
RegexValue string
// NumberValue is a number literal.
NumberValue float64
// IntegerValue is an integer literal.
IntegerValue int64
// BooleanValue is a boolean literal.
BooleanValue bool
// TimeValue is a time literal.
TimeValue time.Time
// DurationValue is a duration literal.
DurationValue time.Duration
)
func (v Identifier) MarshalJSON() ([]byte, error) {
m := map[string]string{"identifier": string(v)}
return json.Marshal(m)
}
func (v StringValue) MarshalJSON() ([]byte, error) {
m := map[string]string{"string": string(v)}
return json.Marshal(m)
}
func (v RegexValue) MarshalJSON() ([]byte, error) {
m := map[string]string{"regex": string(v)}
return json.Marshal(m)
}
func (v NumberValue) MarshalJSON() ([]byte, error) {
m := map[string]float64{"number": float64(v)}
return json.Marshal(m)
}
func (v IntegerValue) MarshalJSON() ([]byte, error) {
m := map[string]int64{"integer": int64(v)}
return json.Marshal(m)
}
func (v BooleanValue) MarshalJSON() ([]byte, error) {
m := map[string]bool{"boolean": bool(v)}
return json.Marshal(m)
}
func (v TimeValue) MarshalJSON() ([]byte, error) {
t := time.Time(v)
m := map[string]string{"string": t.Format(time.RFC3339Nano)}
return json.Marshal(m)
}
func (v DurationValue) MarshalJSON() ([]byte, error) {
m := map[string]int64{"duration": int64(v)}
return json.Marshal(m)
}

View File

@ -52,8 +52,7 @@ var isSpecialElementMap = map[string]bool{
"iframe": true, "iframe": true,
"img": true, "img": true,
"input": true, "input": true,
"isindex": true, // The 'isindex' element has been removed, but keep it for backwards compatibility. "keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
"keygen": true,
"li": true, "li": true,
"link": true, "link": true,
"listing": true, "listing": true,

View File

@ -167,12 +167,8 @@ var svgAttributeAdjustments = map[string]string{
"baseprofile": "baseProfile", "baseprofile": "baseProfile",
"calcmode": "calcMode", "calcmode": "calcMode",
"clippathunits": "clipPathUnits", "clippathunits": "clipPathUnits",
"contentscripttype": "contentScriptType",
"contentstyletype": "contentStyleType",
"diffuseconstant": "diffuseConstant", "diffuseconstant": "diffuseConstant",
"edgemode": "edgeMode", "edgemode": "edgeMode",
"externalresourcesrequired": "externalResourcesRequired",
"filterres": "filterRes",
"filterunits": "filterUnits", "filterunits": "filterUnits",
"glyphref": "glyphRef", "glyphref": "glyphRef",
"gradienttransform": "gradientTransform", "gradienttransform": "gradientTransform",

View File

@ -18,6 +18,11 @@ const (
ElementNode ElementNode
CommentNode CommentNode
DoctypeNode DoctypeNode
// RawNode nodes are not returned by the parser, but can be part of the
// Node tree passed to func Render to insert raw HTML (without escaping).
// If so, this package makes no guarantee that the rendered HTML is secure
// (from e.g. Cross Site Scripting attacks) or well-formed.
RawNode
scopeMarkerNode scopeMarkerNode
) )

259
vendor/golang.org/x/net/html/parse.go generated vendored
View File

@ -184,6 +184,17 @@ func (p *parser) clearStackToContext(s scope) {
} }
} }
// parseGenericRawTextElements implements the generic raw text element parsing
// algorithm defined in 12.2.6.2.
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
// officially, need to make tokenizer consider both states.
func (p *parser) parseGenericRawTextElement() {
p.addElement()
p.originalIM = p.im
p.im = textIM
}
// generateImpliedEndTags pops nodes off the stack of open elements as long as // generateImpliedEndTags pops nodes off the stack of open elements as long as
// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc. // the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
// If exceptions are specified, nodes with that name will not be popped off. // If exceptions are specified, nodes with that name will not be popped off.
@ -192,7 +203,9 @@ func (p *parser) generateImpliedEndTags(exceptions ...string) {
loop: loop:
for i = len(p.oe) - 1; i >= 0; i-- { for i = len(p.oe) - 1; i >= 0; i-- {
n := p.oe[i] n := p.oe[i]
if n.Type == ElementNode { if n.Type != ElementNode {
break
}
switch n.DataAtom { switch n.DataAtom {
case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc: case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
for _, except := range exceptions { for _, except := range exceptions {
@ -202,7 +215,6 @@ loop:
} }
continue continue
} }
}
break break
} }
@ -369,8 +381,7 @@ findIdenticalElements:
// Section 12.2.4.3. // Section 12.2.4.3.
func (p *parser) clearActiveFormattingElements() { func (p *parser) clearActiveFormattingElements() {
for { for {
n := p.afe.pop() if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode {
if len(p.afe) == 0 || n.Type == scopeMarkerNode {
return return
} }
} }
@ -625,25 +636,29 @@ func inHeadIM(p *parser) bool {
switch p.tok.DataAtom { switch p.tok.DataAtom {
case a.Html: case a.Html:
return inBodyIM(p) return inBodyIM(p)
case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta: case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta:
p.addElement() p.addElement()
p.oe.pop() p.oe.pop()
p.acknowledgeSelfClosingTag() p.acknowledgeSelfClosingTag()
return true return true
case a.Noscript: case a.Noscript:
p.addElement()
if p.scripting { if p.scripting {
p.setOriginalIM() p.parseGenericRawTextElement()
p.im = textIM
} else {
p.im = inHeadNoscriptIM
}
return true return true
case a.Script, a.Title, a.Noframes, a.Style: }
p.addElement()
p.im = inHeadNoscriptIM
// Don't let the tokenizer go into raw text mode when scripting is disabled.
p.tokenizer.NextIsNotRawText()
return true
case a.Script, a.Title:
p.addElement() p.addElement()
p.setOriginalIM() p.setOriginalIM()
p.im = textIM p.im = textIM
return true return true
case a.Noframes, a.Style:
p.parseGenericRawTextElement()
return true
case a.Head: case a.Head:
// Ignore the token. // Ignore the token.
return true return true
@ -713,7 +728,13 @@ func inHeadNoscriptIM(p *parser) bool {
return inBodyIM(p) return inBodyIM(p)
case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style: case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
return inHeadIM(p) return inHeadIM(p)
case a.Head, a.Noscript: case a.Head:
// Ignore the token.
return true
case a.Noscript:
// Don't let the tokenizer go into raw text mode even when a <noscript>
// tag is in "in head noscript" insertion mode.
p.tokenizer.NextIsNotRawText()
// Ignore the token. // Ignore the token.
return true return true
} }
@ -855,7 +876,7 @@ func inBodyIM(p *parser) bool {
return true return true
} }
copyAttributes(p.oe[0], p.tok) copyAttributes(p.oe[0], p.tok)
case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title: case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
return inHeadIM(p) return inHeadIM(p)
case a.Body: case a.Body:
if p.oe.contains(a.Template) { if p.oe.contains(a.Template) {
@ -881,7 +902,7 @@ func inBodyIM(p *parser) bool {
p.addElement() p.addElement()
p.im = inFramesetIM p.im = inFramesetIM
return true return true
case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul: case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
p.popUntil(buttonScope, a.P) p.popUntil(buttonScope, a.P)
p.addElement() p.addElement()
case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6: case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
@ -1014,53 +1035,6 @@ func inBodyIM(p *parser) bool {
p.tok.DataAtom = a.Img p.tok.DataAtom = a.Img
p.tok.Data = a.Img.String() p.tok.Data = a.Img.String()
return false return false
case a.Isindex:
if p.form != nil {
// Ignore the token.
return true
}
action := ""
prompt := "This is a searchable index. Enter search keywords: "
attr := []Attribute{{Key: "name", Val: "isindex"}}
for _, t := range p.tok.Attr {
switch t.Key {
case "action":
action = t.Val
case "name":
// Ignore the attribute.
case "prompt":
prompt = t.Val
default:
attr = append(attr, t)
}
}
p.acknowledgeSelfClosingTag()
p.popUntil(buttonScope, a.P)
p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
if p.form == nil {
// NOTE: The 'isindex' element has been removed,
// and the 'template' element has not been designed to be
// collaborative with the index element.
//
// Ignore the token.
return true
}
if action != "" {
p.form.Attr = []Attribute{{Key: "action", Val: action}}
}
p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
p.parseImpliedToken(StartTagToken, a.Label, a.Label.String())
p.addText(prompt)
p.addChild(&Node{
Type: ElementNode,
DataAtom: a.Input,
Data: a.Input.String(),
Attr: attr,
})
p.oe.pop()
p.parseImpliedToken(EndTagToken, a.Label, a.Label.String())
p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
p.parseImpliedToken(EndTagToken, a.Form, a.Form.String())
case a.Textarea: case a.Textarea:
p.addElement() p.addElement()
p.setOriginalIM() p.setOriginalIM()
@ -1070,18 +1044,21 @@ func inBodyIM(p *parser) bool {
p.popUntil(buttonScope, a.P) p.popUntil(buttonScope, a.P)
p.reconstructActiveFormattingElements() p.reconstructActiveFormattingElements()
p.framesetOK = false p.framesetOK = false
p.addElement() p.parseGenericRawTextElement()
p.setOriginalIM()
p.im = textIM
case a.Iframe: case a.Iframe:
p.framesetOK = false p.framesetOK = false
p.parseGenericRawTextElement()
case a.Noembed:
p.parseGenericRawTextElement()
case a.Noscript:
if p.scripting {
p.parseGenericRawTextElement()
return true
}
p.reconstructActiveFormattingElements()
p.addElement() p.addElement()
p.setOriginalIM() // Don't let the tokenizer go into raw text mode when scripting is disabled.
p.im = textIM p.tokenizer.NextIsNotRawText()
case a.Noembed, a.Noscript:
p.addElement()
p.setOriginalIM()
p.im = textIM
case a.Select: case a.Select:
p.reconstructActiveFormattingElements() p.reconstructActiveFormattingElements()
p.addElement() p.addElement()
@ -1137,7 +1114,7 @@ func inBodyIM(p *parser) bool {
return false return false
} }
return true return true
case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul: case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
p.popUntil(defaultScope, p.tok.DataAtom) p.popUntil(defaultScope, p.tok.DataAtom)
case a.Form: case a.Form:
if p.oe.contains(a.Template) { if p.oe.contains(a.Template) {
@ -1198,7 +1175,7 @@ func inBodyIM(p *parser) bool {
if len(p.templateStack) > 0 { if len(p.templateStack) > 0 {
p.im = inTemplateIM p.im = inTemplateIM
return false return false
} else { }
for _, e := range p.oe { for _, e := range p.oe {
switch e.DataAtom { switch e.DataAtom {
case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th, case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
@ -1208,7 +1185,6 @@ func inBodyIM(p *parser) bool {
} }
} }
} }
}
return true return true
} }
@ -1221,9 +1197,15 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
// Once the code successfully parses the comprehensive test suite, we should // Once the code successfully parses the comprehensive test suite, we should
// refactor this code to be more idiomatic. // refactor this code to be more idiomatic.
// Steps 1-4. The outer loop. // Steps 1-2
if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 {
p.oe.pop()
return
}
// Steps 3-5. The outer loop.
for i := 0; i < 8; i++ { for i := 0; i < 8; i++ {
// Step 5. Find the formatting element. // Step 6. Find the formatting element.
var formattingElement *Node var formattingElement *Node
for j := len(p.afe) - 1; j >= 0; j-- { for j := len(p.afe) - 1; j >= 0; j-- {
if p.afe[j].Type == scopeMarkerNode { if p.afe[j].Type == scopeMarkerNode {
@ -1238,17 +1220,22 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
p.inBodyEndTagOther(tagAtom, tagName) p.inBodyEndTagOther(tagAtom, tagName)
return return
} }
// Step 7. Ignore the tag if formatting element is not in the stack of open elements.
feIndex := p.oe.index(formattingElement) feIndex := p.oe.index(formattingElement)
if feIndex == -1 { if feIndex == -1 {
p.afe.remove(formattingElement) p.afe.remove(formattingElement)
return return
} }
// Step 8. Ignore the tag if formatting element is not in the scope.
if !p.elementInScope(defaultScope, tagAtom) { if !p.elementInScope(defaultScope, tagAtom) {
// Ignore the tag. // Ignore the tag.
return return
} }
// Steps 9-10. Find the furthest block. // Step 9. This step is omitted because it's just a parse error but no need to return.
// Steps 10-11. Find the furthest block.
var furthestBlock *Node var furthestBlock *Node
for _, e := range p.oe[feIndex:] { for _, e := range p.oe[feIndex:] {
if isSpecialElement(e) { if isSpecialElement(e) {
@ -1265,47 +1252,65 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
return return
} }
// Steps 11-12. Find the common ancestor and bookmark node. // Steps 12-13. Find the common ancestor and bookmark node.
commonAncestor := p.oe[feIndex-1] commonAncestor := p.oe[feIndex-1]
bookmark := p.afe.index(formattingElement) bookmark := p.afe.index(formattingElement)
// Step 13. The inner loop. Find the lastNode to reparent. // Step 14. The inner loop. Find the lastNode to reparent.
lastNode := furthestBlock lastNode := furthestBlock
node := furthestBlock node := furthestBlock
x := p.oe.index(node) x := p.oe.index(node)
// Steps 13.1-13.2 // Step 14.1.
for j := 0; j < 3; j++ { j := 0
// Step 13.3. for {
// Step 14.2.
j++
// Step. 14.3.
x-- x--
node = p.oe[x] node = p.oe[x]
// Step 13.4 - 13.5. // Step 14.4. Go to the next step if node is formatting element.
if node == formattingElement {
break
}
// Step 14.5. Remove node from the list of active formatting elements if
// inner loop counter is greater than three and node is in the list of
// active formatting elements.
if ni := p.afe.index(node); j > 3 && ni > -1 {
p.afe.remove(node)
// If any element of the list of active formatting elements is removed,
// we need to take care whether bookmark should be decremented or not.
// This is because the value of bookmark may exceed the size of the
// list by removing elements from the list.
if ni <= bookmark {
bookmark--
}
continue
}
// Step 14.6. Continue the next inner loop if node is not in the list of
// active formatting elements.
if p.afe.index(node) == -1 { if p.afe.index(node) == -1 {
p.oe.remove(node) p.oe.remove(node)
continue continue
} }
// Step 13.6. // Step 14.7.
if node == formattingElement {
break
}
// Step 13.7.
clone := node.clone() clone := node.clone()
p.afe[p.afe.index(node)] = clone p.afe[p.afe.index(node)] = clone
p.oe[p.oe.index(node)] = clone p.oe[p.oe.index(node)] = clone
node = clone node = clone
// Step 13.8. // Step 14.8.
if lastNode == furthestBlock { if lastNode == furthestBlock {
bookmark = p.afe.index(node) + 1 bookmark = p.afe.index(node) + 1
} }
// Step 13.9. // Step 14.9.
if lastNode.Parent != nil { if lastNode.Parent != nil {
lastNode.Parent.RemoveChild(lastNode) lastNode.Parent.RemoveChild(lastNode)
} }
node.AppendChild(lastNode) node.AppendChild(lastNode)
// Step 13.10. // Step 14.10.
lastNode = node lastNode = node
} }
// Step 14. Reparent lastNode to the common ancestor, // Step 15. Reparent lastNode to the common ancestor,
// or for misnested table nodes, to the foster parent. // or for misnested table nodes, to the foster parent.
if lastNode.Parent != nil { if lastNode.Parent != nil {
lastNode.Parent.RemoveChild(lastNode) lastNode.Parent.RemoveChild(lastNode)
@ -1317,13 +1322,13 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
commonAncestor.AppendChild(lastNode) commonAncestor.AppendChild(lastNode)
} }
// Steps 15-17. Reparent nodes from the furthest block's children // Steps 16-18. Reparent nodes from the furthest block's children
// to a clone of the formatting element. // to a clone of the formatting element.
clone := formattingElement.clone() clone := formattingElement.clone()
reparentChildren(clone, furthestBlock) reparentChildren(clone, furthestBlock)
furthestBlock.AppendChild(clone) furthestBlock.AppendChild(clone)
// Step 18. Fix up the list of active formatting elements. // Step 19. Fix up the list of active formatting elements.
if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark { if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
// Move the bookmark with the rest of the list. // Move the bookmark with the rest of the list.
bookmark-- bookmark--
@ -1331,7 +1336,7 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
p.afe.remove(formattingElement) p.afe.remove(formattingElement)
p.afe.insert(bookmark, clone) p.afe.insert(bookmark, clone)
// Step 19. Fix up the stack of open elements. // Step 20. Fix up the stack of open elements.
p.oe.remove(formattingElement) p.oe.remove(formattingElement)
p.oe.insert(p.oe.index(furthestBlock)+1, clone) p.oe.insert(p.oe.index(furthestBlock)+1, clone)
} }
@ -1502,14 +1507,13 @@ func inCaptionIM(p *parser) bool {
case StartTagToken: case StartTagToken:
switch p.tok.DataAtom { switch p.tok.DataAtom {
case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr: case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
if p.popUntil(tableScope, a.Caption) { if !p.popUntil(tableScope, a.Caption) {
p.clearActiveFormattingElements()
p.im = inTableIM
return false
} else {
// Ignore the token. // Ignore the token.
return true return true
} }
p.clearActiveFormattingElements()
p.im = inTableIM
return false
case a.Select: case a.Select:
p.reconstructActiveFormattingElements() p.reconstructActiveFormattingElements()
p.addElement() p.addElement()
@ -1526,14 +1530,13 @@ func inCaptionIM(p *parser) bool {
} }
return true return true
case a.Table: case a.Table:
if p.popUntil(tableScope, a.Caption) { if !p.popUntil(tableScope, a.Caption) {
p.clearActiveFormattingElements()
p.im = inTableIM
return false
} else {
// Ignore the token. // Ignore the token.
return true return true
} }
p.clearActiveFormattingElements()
p.im = inTableIM
return false
case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr: case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
// Ignore the token. // Ignore the token.
return true return true
@ -1777,12 +1780,11 @@ func inSelectIM(p *parser) bool {
} }
p.addElement() p.addElement()
case a.Select: case a.Select:
if p.popUntil(selectScope, a.Select) { if !p.popUntil(selectScope, a.Select) {
p.resetInsertionMode()
} else {
// Ignore the token. // Ignore the token.
return true return true
} }
p.resetInsertionMode()
case a.Input, a.Keygen, a.Textarea: case a.Input, a.Keygen, a.Textarea:
if p.elementInScope(selectScope, a.Select) { if p.elementInScope(selectScope, a.Select) {
p.parseImpliedToken(EndTagToken, a.Select, a.Select.String()) p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
@ -1794,6 +1796,13 @@ func inSelectIM(p *parser) bool {
return true return true
case a.Script, a.Template: case a.Script, a.Template:
return inHeadIM(p) return inHeadIM(p)
case a.Iframe, a.Noembed, a.Noframes, a.Noscript, a.Plaintext, a.Style, a.Title, a.Xmp:
// Don't let the tokenizer go into raw text mode when there are raw tags
// to be ignored. These tags should be ignored from the tokenizer
// properly.
p.tokenizer.NextIsNotRawText()
// Ignore the token.
return true
} }
case EndTagToken: case EndTagToken:
switch p.tok.DataAtom { switch p.tok.DataAtom {
@ -1810,12 +1819,11 @@ func inSelectIM(p *parser) bool {
p.oe = p.oe[:i] p.oe = p.oe[:i]
} }
case a.Select: case a.Select:
if p.popUntil(selectScope, a.Select) { if !p.popUntil(selectScope, a.Select) {
p.resetInsertionMode()
} else {
// Ignore the token. // Ignore the token.
return true return true
} }
p.resetInsertionMode()
case a.Template: case a.Template:
return inHeadIM(p) return inHeadIM(p)
} }
@ -2136,6 +2144,7 @@ func parseForeignContent(p *parser) bool {
Data: p.tok.Data, Data: p.tok.Data,
}) })
case StartTagToken: case StartTagToken:
if !p.fragment {
b := breakout[p.tok.Data] b := breakout[p.tok.Data]
if p.tok.DataAtom == a.Font { if p.tok.DataAtom == a.Font {
loop: loop:
@ -2157,7 +2166,9 @@ func parseForeignContent(p *parser) bool {
} }
return false return false
} }
switch p.top().Namespace { }
current := p.adjustedCurrentNode()
switch current.Namespace {
case "math": case "math":
adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments) adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
case "svg": case "svg":
@ -2172,7 +2183,7 @@ func parseForeignContent(p *parser) bool {
panic("html: bad parser state: unexpected namespace") panic("html: bad parser state: unexpected namespace")
} }
adjustForeignAttributes(p.tok.Attr) adjustForeignAttributes(p.tok.Attr)
namespace := p.top().Namespace namespace := current.Namespace
p.addElement() p.addElement()
p.top().Namespace = namespace p.top().Namespace = namespace
if namespace != "" { if namespace != "" {
@ -2201,12 +2212,20 @@ func parseForeignContent(p *parser) bool {
return true return true
} }
// Section 12.2.4.2.
func (p *parser) adjustedCurrentNode() *Node {
if len(p.oe) == 1 && p.fragment && p.context != nil {
return p.context
}
return p.oe.top()
}
// Section 12.2.6. // Section 12.2.6.
func (p *parser) inForeignContent() bool { func (p *parser) inForeignContent() bool {
if len(p.oe) == 0 { if len(p.oe) == 0 {
return false return false
} }
n := p.oe[len(p.oe)-1] n := p.adjustedCurrentNode()
if n.Namespace == "" { if n.Namespace == "" {
return false return false
} }
@ -2341,8 +2360,7 @@ func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
f(p) f(p)
} }
err := p.parse() if err := p.parse(); err != nil {
if err != nil {
return nil, err return nil, err
} }
return p.doc, nil return p.doc, nil
@ -2364,7 +2382,6 @@ func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) (
contextTag = context.DataAtom.String() contextTag = context.DataAtom.String()
} }
p := &parser{ p := &parser{
tokenizer: NewTokenizerFragment(r, contextTag),
doc: &Node{ doc: &Node{
Type: DocumentNode, Type: DocumentNode,
}, },
@ -2372,6 +2389,11 @@ func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) (
fragment: true, fragment: true,
context: context, context: context,
} }
if context != nil && context.Namespace != "" {
p.tokenizer = NewTokenizer(r)
} else {
p.tokenizer = NewTokenizerFragment(r, contextTag)
}
for _, f := range opts { for _, f := range opts {
f(p) f(p)
@ -2396,8 +2418,7 @@ func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) (
} }
} }
err := p.parse() if err := p.parse(); err != nil {
if err != nil {
return nil, err return nil, err
} }

View File

@ -134,6 +134,9 @@ func render1(w writer, n *Node) error {
} }
} }
return w.WriteByte('>') return w.WriteByte('>')
case RawNode:
_, err := w.WriteString(n.Data)
return err
default: default:
return errors.New("html: unknown node type") return errors.New("html: unknown node type")
} }
@ -256,12 +259,11 @@ var voidElements = map[string]bool{
"base": true, "base": true,
"br": true, "br": true,
"col": true, "col": true,
"command": true,
"embed": true, "embed": true,
"hr": true, "hr": true,
"img": true, "img": true,
"input": true, "input": true,
"keygen": true, "keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
"link": true, "link": true,
"meta": true, "meta": true,
"param": true, "param": true,

View File

@ -296,8 +296,7 @@ func (z *Tokenizer) Buffered() []byte {
// too many times in succession. // too many times in succession.
func readAtLeastOneByte(r io.Reader, b []byte) (int, error) { func readAtLeastOneByte(r io.Reader, b []byte) (int, error) {
for i := 0; i < 100; i++ { for i := 0; i < 100; i++ {
n, err := r.Read(b) if n, err := r.Read(b); n != 0 || err != nil {
if n != 0 || err != nil {
return n, err return n, err
} }
} }
@ -347,6 +346,7 @@ loop:
break loop break loop
} }
if c != '/' { if c != '/' {
z.raw.end--
continue loop continue loop
} }
if z.readRawEndTag() || z.err != nil { if z.readRawEndTag() || z.err != nil {
@ -1067,6 +1067,11 @@ loop:
// Raw returns the unmodified text of the current token. Calling Next, Token, // Raw returns the unmodified text of the current token. Calling Next, Token,
// Text, TagName or TagAttr may change the contents of the returned slice. // Text, TagName or TagAttr may change the contents of the returned slice.
//
// The token stream's raw bytes partition the byte stream (up until an
// ErrorToken). There are no overlaps or gaps between two consecutive token's
// raw bytes. One implication is that the byte offset of the current token is
// the sum of the lengths of all previous tokens' raw bytes.
func (z *Tokenizer) Raw() []byte { func (z *Tokenizer) Raw() []byte {
return z.buf[z.raw.start:z.raw.end] return z.buf[z.raw.start:z.raw.end]
} }

View File

@ -124,7 +124,7 @@ func (e *Encoder) Writer(w io.Writer) io.Writer {
} }
// ASCIISub is the ASCII substitute character, as recommended by // ASCIISub is the ASCII substitute character, as recommended by
// http://unicode.org/reports/tr36/#Text_Comparison // https://unicode.org/reports/tr36/#Text_Comparison
const ASCIISub = '\x1a' const ASCIISub = '\x1a'
// Nop is the nop encoding. Its transformed bytes are the same as the source // Nop is the nop encoding. Its transformed bytes are the same as the source

View File

@ -306,6 +306,7 @@ var nameMap = map[string]htmlEncoding{
"iso-2022-cn": replacement, "iso-2022-cn": replacement,
"iso-2022-cn-ext": replacement, "iso-2022-cn-ext": replacement,
"iso-2022-kr": replacement, "iso-2022-kr": replacement,
"replacement": replacement,
"utf-16be": utf16be, "utf-16be": utf16be,
"utf-16": utf16le, "utf-16": utf16le,
"utf-16le": utf16le, "utf-16le": utf16le,

View File

@ -34,7 +34,7 @@ package identifier
// - http://www.iana.org/assignments/character-sets/character-sets.xhtml // - http://www.iana.org/assignments/character-sets/character-sets.xhtml
// - http://www.iana.org/assignments/ianacharset-mib/ianacharset-mib // - http://www.iana.org/assignments/ianacharset-mib/ianacharset-mib
// - http://www.ietf.org/rfc/rfc2978.txt // - http://www.ietf.org/rfc/rfc2978.txt
// - http://www.unicode.org/reports/tr22/ // - https://www.unicode.org/reports/tr22/
// - http://www.w3.org/TR/encoding/ // - http://www.w3.org/TR/encoding/
// - https://encoding.spec.whatwg.org/ // - https://encoding.spec.whatwg.org/
// - https://encoding.spec.whatwg.org/encodings.json // - https://encoding.spec.whatwg.org/encodings.json

View File

@ -538,8 +538,6 @@ const (
// ISO111ECMACyrillic is the MIB identifier with IANA name ECMA-cyrillic. // ISO111ECMACyrillic is the MIB identifier with IANA name ECMA-cyrillic.
// //
// ISO registry // ISO registry
// (formerly ECMA
// registry )
ISO111ECMACyrillic MIB = 77 ISO111ECMACyrillic MIB = 77
// ISO121Canadian1 is the MIB identifier with IANA name CSA_Z243.4-1985-1. // ISO121Canadian1 is the MIB identifier with IANA name CSA_Z243.4-1985-1.
@ -732,18 +730,18 @@ const (
// ISO885913 is the MIB identifier with IANA name ISO-8859-13. // ISO885913 is the MIB identifier with IANA name ISO-8859-13.
// //
// ISO See http://www.iana.org/assignments/charset-reg/ISO-8859-13 http://www.iana.org/assignments/charset-reg/ISO-8859-13 // ISO See https://www.iana.org/assignments/charset-reg/ISO-8859-13 https://www.iana.org/assignments/charset-reg/ISO-8859-13
ISO885913 MIB = 109 ISO885913 MIB = 109
// ISO885914 is the MIB identifier with IANA name ISO-8859-14. // ISO885914 is the MIB identifier with IANA name ISO-8859-14.
// //
// ISO See http://www.iana.org/assignments/charset-reg/ISO-8859-14 // ISO See https://www.iana.org/assignments/charset-reg/ISO-8859-14
ISO885914 MIB = 110 ISO885914 MIB = 110
// ISO885915 is the MIB identifier with IANA name ISO-8859-15. // ISO885915 is the MIB identifier with IANA name ISO-8859-15.
// //
// ISO // ISO
// Please see: http://www.iana.org/assignments/charset-reg/ISO-8859-15 // Please see: https://www.iana.org/assignments/charset-reg/ISO-8859-15
ISO885915 MIB = 111 ISO885915 MIB = 111
// ISO885916 is the MIB identifier with IANA name ISO-8859-16. // ISO885916 is the MIB identifier with IANA name ISO-8859-16.
@ -754,41 +752,41 @@ const (
// GBK is the MIB identifier with IANA name GBK. // GBK is the MIB identifier with IANA name GBK.
// //
// Chinese IT Standardization Technical Committee // Chinese IT Standardization Technical Committee
// Please see: http://www.iana.org/assignments/charset-reg/GBK // Please see: https://www.iana.org/assignments/charset-reg/GBK
GBK MIB = 113 GBK MIB = 113
// GB18030 is the MIB identifier with IANA name GB18030. // GB18030 is the MIB identifier with IANA name GB18030.
// //
// Chinese IT Standardization Technical Committee // Chinese IT Standardization Technical Committee
// Please see: http://www.iana.org/assignments/charset-reg/GB18030 // Please see: https://www.iana.org/assignments/charset-reg/GB18030
GB18030 MIB = 114 GB18030 MIB = 114
// OSDEBCDICDF0415 is the MIB identifier with IANA name OSD_EBCDIC_DF04_15. // OSDEBCDICDF0415 is the MIB identifier with IANA name OSD_EBCDIC_DF04_15.
// //
// Fujitsu-Siemens standard mainframe EBCDIC encoding // Fujitsu-Siemens standard mainframe EBCDIC encoding
// Please see: http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-15 // Please see: https://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-15
OSDEBCDICDF0415 MIB = 115 OSDEBCDICDF0415 MIB = 115
// OSDEBCDICDF03IRV is the MIB identifier with IANA name OSD_EBCDIC_DF03_IRV. // OSDEBCDICDF03IRV is the MIB identifier with IANA name OSD_EBCDIC_DF03_IRV.
// //
// Fujitsu-Siemens standard mainframe EBCDIC encoding // Fujitsu-Siemens standard mainframe EBCDIC encoding
// Please see: http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF03-IRV // Please see: https://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF03-IRV
OSDEBCDICDF03IRV MIB = 116 OSDEBCDICDF03IRV MIB = 116
// OSDEBCDICDF041 is the MIB identifier with IANA name OSD_EBCDIC_DF04_1. // OSDEBCDICDF041 is the MIB identifier with IANA name OSD_EBCDIC_DF04_1.
// //
// Fujitsu-Siemens standard mainframe EBCDIC encoding // Fujitsu-Siemens standard mainframe EBCDIC encoding
// Please see: http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-1 // Please see: https://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-1
OSDEBCDICDF041 MIB = 117 OSDEBCDICDF041 MIB = 117
// ISO115481 is the MIB identifier with IANA name ISO-11548-1. // ISO115481 is the MIB identifier with IANA name ISO-11548-1.
// //
// See http://www.iana.org/assignments/charset-reg/ISO-11548-1 // See https://www.iana.org/assignments/charset-reg/ISO-11548-1
ISO115481 MIB = 118 ISO115481 MIB = 118
// KZ1048 is the MIB identifier with IANA name KZ-1048. // KZ1048 is the MIB identifier with IANA name KZ-1048.
// //
// See http://www.iana.org/assignments/charset-reg/KZ-1048 // See https://www.iana.org/assignments/charset-reg/KZ-1048
KZ1048 MIB = 119 KZ1048 MIB = 119
// Unicode is the MIB identifier with IANA name ISO-10646-UCS-2. // Unicode is the MIB identifier with IANA name ISO-10646-UCS-2.
@ -855,7 +853,7 @@ const (
// SCSU is the MIB identifier with IANA name SCSU. // SCSU is the MIB identifier with IANA name SCSU.
// //
// SCSU See http://www.iana.org/assignments/charset-reg/SCSU // SCSU See https://www.iana.org/assignments/charset-reg/SCSU
SCSU MIB = 1011 SCSU MIB = 1011
// UTF7 is the MIB identifier with IANA name UTF-7. // UTF7 is the MIB identifier with IANA name UTF-7.
@ -884,27 +882,27 @@ const (
// CESU8 is the MIB identifier with IANA name CESU-8. // CESU8 is the MIB identifier with IANA name CESU-8.
// //
// http://www.unicode.org/unicode/reports/tr26 // https://www.unicode.org/reports/tr26
CESU8 MIB = 1016 CESU8 MIB = 1016
// UTF32 is the MIB identifier with IANA name UTF-32. // UTF32 is the MIB identifier with IANA name UTF-32.
// //
// http://www.unicode.org/unicode/reports/tr19/ // https://www.unicode.org/reports/tr19/
UTF32 MIB = 1017 UTF32 MIB = 1017
// UTF32BE is the MIB identifier with IANA name UTF-32BE. // UTF32BE is the MIB identifier with IANA name UTF-32BE.
// //
// http://www.unicode.org/unicode/reports/tr19/ // https://www.unicode.org/reports/tr19/
UTF32BE MIB = 1018 UTF32BE MIB = 1018
// UTF32LE is the MIB identifier with IANA name UTF-32LE. // UTF32LE is the MIB identifier with IANA name UTF-32LE.
// //
// http://www.unicode.org/unicode/reports/tr19/ // https://www.unicode.org/reports/tr19/
UTF32LE MIB = 1019 UTF32LE MIB = 1019
// BOCU1 is the MIB identifier with IANA name BOCU-1. // BOCU1 is the MIB identifier with IANA name BOCU-1.
// //
// http://www.unicode.org/notes/tn6/ // https://www.unicode.org/notes/tn6/
BOCU1 MIB = 1020 BOCU1 MIB = 1020
// Windows30Latin1 is the MIB identifier with IANA name ISO-8859-1-Windows-3.0-Latin-1. // Windows30Latin1 is the MIB identifier with IANA name ISO-8859-1-Windows-3.0-Latin-1.
@ -1461,152 +1459,152 @@ const (
// IBM00858 is the MIB identifier with IANA name IBM00858. // IBM00858 is the MIB identifier with IANA name IBM00858.
// //
// IBM See http://www.iana.org/assignments/charset-reg/IBM00858 // IBM See https://www.iana.org/assignments/charset-reg/IBM00858
IBM00858 MIB = 2089 IBM00858 MIB = 2089
// IBM00924 is the MIB identifier with IANA name IBM00924. // IBM00924 is the MIB identifier with IANA name IBM00924.
// //
// IBM See http://www.iana.org/assignments/charset-reg/IBM00924 // IBM See https://www.iana.org/assignments/charset-reg/IBM00924
IBM00924 MIB = 2090 IBM00924 MIB = 2090
// IBM01140 is the MIB identifier with IANA name IBM01140. // IBM01140 is the MIB identifier with IANA name IBM01140.
// //
// IBM See http://www.iana.org/assignments/charset-reg/IBM01140 // IBM See https://www.iana.org/assignments/charset-reg/IBM01140
IBM01140 MIB = 2091 IBM01140 MIB = 2091
// IBM01141 is the MIB identifier with IANA name IBM01141. // IBM01141 is the MIB identifier with IANA name IBM01141.
// //
// IBM See http://www.iana.org/assignments/charset-reg/IBM01141 // IBM See https://www.iana.org/assignments/charset-reg/IBM01141
IBM01141 MIB = 2092 IBM01141 MIB = 2092
// IBM01142 is the MIB identifier with IANA name IBM01142. // IBM01142 is the MIB identifier with IANA name IBM01142.
// //
// IBM See http://www.iana.org/assignments/charset-reg/IBM01142 // IBM See https://www.iana.org/assignments/charset-reg/IBM01142
IBM01142 MIB = 2093 IBM01142 MIB = 2093
// IBM01143 is the MIB identifier with IANA name IBM01143. // IBM01143 is the MIB identifier with IANA name IBM01143.
// //
// IBM See http://www.iana.org/assignments/charset-reg/IBM01143 // IBM See https://www.iana.org/assignments/charset-reg/IBM01143
IBM01143 MIB = 2094 IBM01143 MIB = 2094
// IBM01144 is the MIB identifier with IANA name IBM01144. // IBM01144 is the MIB identifier with IANA name IBM01144.
// //
// IBM See http://www.iana.org/assignments/charset-reg/IBM01144 // IBM See https://www.iana.org/assignments/charset-reg/IBM01144
IBM01144 MIB = 2095 IBM01144 MIB = 2095
// IBM01145 is the MIB identifier with IANA name IBM01145. // IBM01145 is the MIB identifier with IANA name IBM01145.
// //
// IBM See http://www.iana.org/assignments/charset-reg/IBM01145 // IBM See https://www.iana.org/assignments/charset-reg/IBM01145
IBM01145 MIB = 2096 IBM01145 MIB = 2096
// IBM01146 is the MIB identifier with IANA name IBM01146. // IBM01146 is the MIB identifier with IANA name IBM01146.
// //
// IBM See http://www.iana.org/assignments/charset-reg/IBM01146 // IBM See https://www.iana.org/assignments/charset-reg/IBM01146
IBM01146 MIB = 2097 IBM01146 MIB = 2097
// IBM01147 is the MIB identifier with IANA name IBM01147. // IBM01147 is the MIB identifier with IANA name IBM01147.
// //
// IBM See http://www.iana.org/assignments/charset-reg/IBM01147 // IBM See https://www.iana.org/assignments/charset-reg/IBM01147
IBM01147 MIB = 2098 IBM01147 MIB = 2098
// IBM01148 is the MIB identifier with IANA name IBM01148. // IBM01148 is the MIB identifier with IANA name IBM01148.
// //
// IBM See http://www.iana.org/assignments/charset-reg/IBM01148 // IBM See https://www.iana.org/assignments/charset-reg/IBM01148
IBM01148 MIB = 2099 IBM01148 MIB = 2099
// IBM01149 is the MIB identifier with IANA name IBM01149. // IBM01149 is the MIB identifier with IANA name IBM01149.
// //
// IBM See http://www.iana.org/assignments/charset-reg/IBM01149 // IBM See https://www.iana.org/assignments/charset-reg/IBM01149
IBM01149 MIB = 2100 IBM01149 MIB = 2100
// Big5HKSCS is the MIB identifier with IANA name Big5-HKSCS. // Big5HKSCS is the MIB identifier with IANA name Big5-HKSCS.
// //
// See http://www.iana.org/assignments/charset-reg/Big5-HKSCS // See https://www.iana.org/assignments/charset-reg/Big5-HKSCS
Big5HKSCS MIB = 2101 Big5HKSCS MIB = 2101
// IBM1047 is the MIB identifier with IANA name IBM1047. // IBM1047 is the MIB identifier with IANA name IBM1047.
// //
// IBM1047 (EBCDIC Latin 1/Open Systems) http://www-1.ibm.com/servers/eserver/iseries/software/globalization/pdf/cp01047z.pdf // IBM1047 (EBCDIC Latin 1/Open Systems) https://www-1.ibm.com/servers/eserver/iseries/software/globalization/pdf/cp01047z.pdf
IBM1047 MIB = 2102 IBM1047 MIB = 2102
// PTCP154 is the MIB identifier with IANA name PTCP154. // PTCP154 is the MIB identifier with IANA name PTCP154.
// //
// See http://www.iana.org/assignments/charset-reg/PTCP154 // See https://www.iana.org/assignments/charset-reg/PTCP154
PTCP154 MIB = 2103 PTCP154 MIB = 2103
// Amiga1251 is the MIB identifier with IANA name Amiga-1251. // Amiga1251 is the MIB identifier with IANA name Amiga-1251.
// //
// See http://www.amiga.ultranet.ru/Amiga-1251.html // See https://www.amiga.ultranet.ru/Amiga-1251.html
Amiga1251 MIB = 2104 Amiga1251 MIB = 2104
// KOI7switched is the MIB identifier with IANA name KOI7-switched. // KOI7switched is the MIB identifier with IANA name KOI7-switched.
// //
// See http://www.iana.org/assignments/charset-reg/KOI7-switched // See https://www.iana.org/assignments/charset-reg/KOI7-switched
KOI7switched MIB = 2105 KOI7switched MIB = 2105
// BRF is the MIB identifier with IANA name BRF. // BRF is the MIB identifier with IANA name BRF.
// //
// See http://www.iana.org/assignments/charset-reg/BRF // See https://www.iana.org/assignments/charset-reg/BRF
BRF MIB = 2106 BRF MIB = 2106
// TSCII is the MIB identifier with IANA name TSCII. // TSCII is the MIB identifier with IANA name TSCII.
// //
// See http://www.iana.org/assignments/charset-reg/TSCII // See https://www.iana.org/assignments/charset-reg/TSCII
TSCII MIB = 2107 TSCII MIB = 2107
// CP51932 is the MIB identifier with IANA name CP51932. // CP51932 is the MIB identifier with IANA name CP51932.
// //
// See http://www.iana.org/assignments/charset-reg/CP51932 // See https://www.iana.org/assignments/charset-reg/CP51932
CP51932 MIB = 2108 CP51932 MIB = 2108
// Windows874 is the MIB identifier with IANA name windows-874. // Windows874 is the MIB identifier with IANA name windows-874.
// //
// See http://www.iana.org/assignments/charset-reg/windows-874 // See https://www.iana.org/assignments/charset-reg/windows-874
Windows874 MIB = 2109 Windows874 MIB = 2109
// Windows1250 is the MIB identifier with IANA name windows-1250. // Windows1250 is the MIB identifier with IANA name windows-1250.
// //
// Microsoft http://www.iana.org/assignments/charset-reg/windows-1250 // Microsoft https://www.iana.org/assignments/charset-reg/windows-1250
Windows1250 MIB = 2250 Windows1250 MIB = 2250
// Windows1251 is the MIB identifier with IANA name windows-1251. // Windows1251 is the MIB identifier with IANA name windows-1251.
// //
// Microsoft http://www.iana.org/assignments/charset-reg/windows-1251 // Microsoft https://www.iana.org/assignments/charset-reg/windows-1251
Windows1251 MIB = 2251 Windows1251 MIB = 2251
// Windows1252 is the MIB identifier with IANA name windows-1252. // Windows1252 is the MIB identifier with IANA name windows-1252.
// //
// Microsoft http://www.iana.org/assignments/charset-reg/windows-1252 // Microsoft https://www.iana.org/assignments/charset-reg/windows-1252
Windows1252 MIB = 2252 Windows1252 MIB = 2252
// Windows1253 is the MIB identifier with IANA name windows-1253. // Windows1253 is the MIB identifier with IANA name windows-1253.
// //
// Microsoft http://www.iana.org/assignments/charset-reg/windows-1253 // Microsoft https://www.iana.org/assignments/charset-reg/windows-1253
Windows1253 MIB = 2253 Windows1253 MIB = 2253
// Windows1254 is the MIB identifier with IANA name windows-1254. // Windows1254 is the MIB identifier with IANA name windows-1254.
// //
// Microsoft http://www.iana.org/assignments/charset-reg/windows-1254 // Microsoft https://www.iana.org/assignments/charset-reg/windows-1254
Windows1254 MIB = 2254 Windows1254 MIB = 2254
// Windows1255 is the MIB identifier with IANA name windows-1255. // Windows1255 is the MIB identifier with IANA name windows-1255.
// //
// Microsoft http://www.iana.org/assignments/charset-reg/windows-1255 // Microsoft https://www.iana.org/assignments/charset-reg/windows-1255
Windows1255 MIB = 2255 Windows1255 MIB = 2255
// Windows1256 is the MIB identifier with IANA name windows-1256. // Windows1256 is the MIB identifier with IANA name windows-1256.
// //
// Microsoft http://www.iana.org/assignments/charset-reg/windows-1256 // Microsoft https://www.iana.org/assignments/charset-reg/windows-1256
Windows1256 MIB = 2256 Windows1256 MIB = 2256
// Windows1257 is the MIB identifier with IANA name windows-1257. // Windows1257 is the MIB identifier with IANA name windows-1257.
// //
// Microsoft http://www.iana.org/assignments/charset-reg/windows-1257 // Microsoft https://www.iana.org/assignments/charset-reg/windows-1257
Windows1257 MIB = 2257 Windows1257 MIB = 2257
// Windows1258 is the MIB identifier with IANA name windows-1258. // Windows1258 is the MIB identifier with IANA name windows-1258.
// //
// Microsoft http://www.iana.org/assignments/charset-reg/windows-1258 // Microsoft https://www.iana.org/assignments/charset-reg/windows-1258
Windows1258 MIB = 2258 Windows1258 MIB = 2258
// TIS620 is the MIB identifier with IANA name TIS-620. // TIS620 is the MIB identifier with IANA name TIS-620.
@ -1616,6 +1614,6 @@ const (
// CP50220 is the MIB identifier with IANA name CP50220. // CP50220 is the MIB identifier with IANA name CP50220.
// //
// See http://www.iana.org/assignments/charset-reg/CP50220 // See https://www.iana.org/assignments/charset-reg/CP50220
CP50220 MIB = 2260 CP50220 MIB = 2260
) )

View File

@ -6,6 +6,7 @@
package unicode // import "golang.org/x/text/encoding/unicode" package unicode // import "golang.org/x/text/encoding/unicode"
import ( import (
"bytes"
"errors" "errors"
"unicode/utf16" "unicode/utf16"
"unicode/utf8" "unicode/utf8"
@ -25,15 +26,95 @@ import (
// the introduction of some kind of error type for conveying the erroneous code // the introduction of some kind of error type for conveying the erroneous code
// point. // point.
// UTF8 is the UTF-8 encoding. // UTF8 is the UTF-8 encoding. It neither removes nor adds byte order marks.
var UTF8 encoding.Encoding = utf8enc var UTF8 encoding.Encoding = utf8enc
// UTF8BOM is an UTF-8 encoding where the decoder strips a leading byte order
// mark while the encoder adds one.
//
// Some editors add a byte order mark as a signature to UTF-8 files. Although
// the byte order mark is not useful for detecting byte order in UTF-8, it is
// sometimes used as a convention to mark UTF-8-encoded files. This relies on
// the observation that the UTF-8 byte order mark is either an illegal or at
// least very unlikely sequence in any other character encoding.
var UTF8BOM encoding.Encoding = utf8bomEncoding{}
type utf8bomEncoding struct{}
func (utf8bomEncoding) String() string {
return "UTF-8-BOM"
}
func (utf8bomEncoding) ID() (identifier.MIB, string) {
return identifier.Unofficial, "x-utf8bom"
}
func (utf8bomEncoding) NewEncoder() *encoding.Encoder {
return &encoding.Encoder{
Transformer: &utf8bomEncoder{t: runes.ReplaceIllFormed()},
}
}
func (utf8bomEncoding) NewDecoder() *encoding.Decoder {
return &encoding.Decoder{Transformer: &utf8bomDecoder{}}
}
var utf8enc = &internal.Encoding{ var utf8enc = &internal.Encoding{
&internal.SimpleEncoding{utf8Decoder{}, runes.ReplaceIllFormed()}, &internal.SimpleEncoding{utf8Decoder{}, runes.ReplaceIllFormed()},
"UTF-8", "UTF-8",
identifier.UTF8, identifier.UTF8,
} }
type utf8bomDecoder struct {
checked bool
}
func (t *utf8bomDecoder) Reset() {
t.checked = false
}
func (t *utf8bomDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
if !t.checked {
if !atEOF && len(src) < len(utf8BOM) {
if len(src) == 0 {
return 0, 0, nil
}
return 0, 0, transform.ErrShortSrc
}
if bytes.HasPrefix(src, []byte(utf8BOM)) {
nSrc += len(utf8BOM)
src = src[len(utf8BOM):]
}
t.checked = true
}
nDst, n, err := utf8Decoder.Transform(utf8Decoder{}, dst[nDst:], src, atEOF)
nSrc += n
return nDst, nSrc, err
}
type utf8bomEncoder struct {
written bool
t transform.Transformer
}
func (t *utf8bomEncoder) Reset() {
t.written = false
t.t.Reset()
}
func (t *utf8bomEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
if !t.written {
if len(dst) < len(utf8BOM) {
return nDst, 0, transform.ErrShortDst
}
nDst = copy(dst, utf8BOM)
t.written = true
}
n, nSrc, err := utf8Decoder.Transform(utf8Decoder{}, dst[nDst:], src, atEOF)
nDst += n
return nDst, nSrc, err
}
type utf8Decoder struct{ transform.NopResetter } type utf8Decoder struct{ transform.NopResetter }
func (utf8Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { func (utf8Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
@ -145,7 +226,7 @@ func (utf8Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err e
// and consumed in a greater context that implies a certain endianness, use // and consumed in a greater context that implies a certain endianness, use
// IgnoreBOM. Otherwise, use ExpectBOM and always produce and consume a BOM. // IgnoreBOM. Otherwise, use ExpectBOM and always produce and consume a BOM.
// //
// In the language of http://www.unicode.org/faq/utf_bom.html#bom10, IgnoreBOM // In the language of https://www.unicode.org/faq/utf_bom.html#bom10, IgnoreBOM
// corresponds to "Where the precise type of the data stream is known... the // corresponds to "Where the precise type of the data stream is known... the
// BOM should not be used" and ExpectBOM corresponds to "A particular // BOM should not be used" and ExpectBOM corresponds to "A particular
// protocol... may require use of the BOM". // protocol... may require use of the BOM".
@ -287,16 +368,13 @@ func (u *utf16Decoder) Reset() {
} }
func (u *utf16Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { func (u *utf16Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
if len(src) == 0 { if len(src) < 2 && atEOF && u.current.bomPolicy&requireBOM != 0 {
if atEOF && u.current.bomPolicy&requireBOM != 0 {
return 0, 0, ErrMissingBOM return 0, 0, ErrMissingBOM
} }
if len(src) == 0 {
return 0, 0, nil return 0, 0, nil
} }
if u.current.bomPolicy&acceptBOM != 0 { if len(src) >= 2 && u.current.bomPolicy&acceptBOM != 0 {
if len(src) < 2 {
return 0, 0, transform.ErrShortSrc
}
switch { switch {
case src[0] == 0xfe && src[1] == 0xff: case src[0] == 0xfe && src[1] == 0xff:
u.current.endianness = BigEndian u.current.endianness = BigEndian

View File

@ -4,13 +4,13 @@ package language
// This file contains code common to the maketables.go and the package code. // This file contains code common to the maketables.go and the package code.
// langAliasType is the type of an alias in langAliasMap. // AliasType is the type of an alias in AliasMap.
type langAliasType int8 type AliasType int8
const ( const (
langDeprecated langAliasType = iota Deprecated AliasType = iota
langMacro Macro
langLegacy Legacy
langAliasTypeUnknown langAliasType = -1 AliasTypeUnknown AliasType = -1
) )

29
vendor/golang.org/x/text/internal/language/compact.go generated vendored Normal file
View File

@ -0,0 +1,29 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
// CompactCoreInfo is a compact integer with the three core tags encoded.
type CompactCoreInfo uint32
// GetCompactCore generates a uint32 value that is guaranteed to be unique for
// different language, region, and script values.
func GetCompactCore(t Tag) (cci CompactCoreInfo, ok bool) {
if t.LangID > langNoIndexOffset {
return 0, false
}
cci |= CompactCoreInfo(t.LangID) << (8 + 12)
cci |= CompactCoreInfo(t.ScriptID) << 12
cci |= CompactCoreInfo(t.RegionID)
return cci, true
}
// Tag generates a tag from c.
func (c CompactCoreInfo) Tag() Tag {
return Tag{
LangID: Language(c >> 20),
RegionID: Region(c & 0x3ff),
ScriptID: Script(c>>12) & 0xff,
}
}

View File

@ -0,0 +1,61 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package compact defines a compact representation of language tags.
//
// Common language tags (at least all for which locale information is defined
// in CLDR) are assigned a unique index. Each Tag is associated with such an
// ID for selecting language-related resources (such as translations) as well
// as one for selecting regional defaults (currency, number formatting, etc.)
//
// It may want to export this functionality at some point, but at this point
// this is only available for use within x/text.
package compact // import "golang.org/x/text/internal/language/compact"
import (
"sort"
"strings"
"golang.org/x/text/internal/language"
)
// ID is an integer identifying a single tag.
type ID uint16
func getCoreIndex(t language.Tag) (id ID, ok bool) {
cci, ok := language.GetCompactCore(t)
if !ok {
return 0, false
}
i := sort.Search(len(coreTags), func(i int) bool {
return cci <= coreTags[i]
})
if i == len(coreTags) || coreTags[i] != cci {
return 0, false
}
return ID(i), true
}
// Parent returns the ID of the parent or the root ID if id is already the root.
func (id ID) Parent() ID {
return parents[id]
}
// Tag converts id to an internal language Tag.
func (id ID) Tag() language.Tag {
if int(id) >= len(coreTags) {
return specialTags[int(id)-len(coreTags)]
}
return coreTags[id].Tag()
}
var specialTags []language.Tag
func init() {
tags := strings.Split(specialTagsStr, " ")
specialTags = make([]language.Tag, len(tags))
for i, t := range tags {
specialTags[i] = language.MustParse(t)
}
}

View File

@ -0,0 +1,260 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go gen_index.go -output tables.go
//go:generate go run gen_parents.go
package compact
// TODO: Remove above NOTE after:
// - verifying that tables are dropped correctly (most notably matcher tables).
import (
"strings"
"golang.org/x/text/internal/language"
)
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
// specific language or locale. All language tag values are guaranteed to be
// well-formed.
type Tag struct {
// NOTE: exported tags will become part of the public API.
language ID
locale ID
full fullTag // always a language.Tag for now.
}
const _und = 0
type fullTag interface {
IsRoot() bool
Parent() language.Tag
}
// Make a compact Tag from a fully specified internal language Tag.
func Make(t language.Tag) (tag Tag) {
if region := t.TypeForKey("rg"); len(region) == 6 && region[2:] == "zzzz" {
if r, err := language.ParseRegion(region[:2]); err == nil {
tFull := t
t, _ = t.SetTypeForKey("rg", "")
// TODO: should we not consider "va" for the language tag?
var exact1, exact2 bool
tag.language, exact1 = FromTag(t)
t.RegionID = r
tag.locale, exact2 = FromTag(t)
if !exact1 || !exact2 {
tag.full = tFull
}
return tag
}
}
lang, ok := FromTag(t)
tag.language = lang
tag.locale = lang
if !ok {
tag.full = t
}
return tag
}
// Tag returns an internal language Tag version of this tag.
func (t Tag) Tag() language.Tag {
if t.full != nil {
return t.full.(language.Tag)
}
tag := t.language.Tag()
if t.language != t.locale {
loc := t.locale.Tag()
tag, _ = tag.SetTypeForKey("rg", strings.ToLower(loc.RegionID.String())+"zzzz")
}
return tag
}
// IsCompact reports whether this tag is fully defined in terms of ID.
func (t *Tag) IsCompact() bool {
return t.full == nil
}
// MayHaveVariants reports whether a tag may have variants. If it returns false
// it is guaranteed the tag does not have variants.
func (t Tag) MayHaveVariants() bool {
return t.full != nil || int(t.language) >= len(coreTags)
}
// MayHaveExtensions reports whether a tag may have extensions. If it returns
// false it is guaranteed the tag does not have them.
func (t Tag) MayHaveExtensions() bool {
return t.full != nil ||
int(t.language) >= len(coreTags) ||
t.language != t.locale
}
// IsRoot returns true if t is equal to language "und".
func (t Tag) IsRoot() bool {
if t.full != nil {
return t.full.IsRoot()
}
return t.language == _und
}
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
// specific language are substituted with fields from the parent language.
// The parent for a language may change for newer versions of CLDR.
func (t Tag) Parent() Tag {
if t.full != nil {
return Make(t.full.Parent())
}
if t.language != t.locale {
// Simulate stripping -u-rg-xxxxxx
return Tag{language: t.language, locale: t.language}
}
// TODO: use parent lookup table once cycle from internal package is
// removed. Probably by internalizing the table and declaring this fast
// enough.
// lang := compactID(internal.Parent(uint16(t.language)))
lang, _ := FromTag(t.language.Tag().Parent())
return Tag{language: lang, locale: lang}
}
// returns token t and the rest of the string.
func nextToken(s string) (t, tail string) {
p := strings.Index(s[1:], "-")
if p == -1 {
return s[1:], ""
}
p++
return s[1:p], s[p:]
}
// LanguageID returns an index, where 0 <= index < NumCompactTags, for tags
// for which data exists in the text repository.The index will change over time
// and should not be stored in persistent storage. If t does not match a compact
// index, exact will be false and the compact index will be returned for the
// first match after repeatedly taking the Parent of t.
func LanguageID(t Tag) (id ID, exact bool) {
return t.language, t.full == nil
}
// RegionalID returns the ID for the regional variant of this tag. This index is
// used to indicate region-specific overrides, such as default currency, default
// calendar and week data, default time cycle, and default measurement system
// and unit preferences.
//
// For instance, the tag en-GB-u-rg-uszzzz specifies British English with US
// settings for currency, number formatting, etc. The CompactIndex for this tag
// will be that for en-GB, while the RegionalID will be the one corresponding to
// en-US.
func RegionalID(t Tag) (id ID, exact bool) {
return t.locale, t.full == nil
}
// LanguageTag returns t stripped of regional variant indicators.
//
// At the moment this means it is stripped of a regional and variant subtag "rg"
// and "va" in the "u" extension.
func (t Tag) LanguageTag() Tag {
if t.full == nil {
return Tag{language: t.language, locale: t.language}
}
tt := t.Tag()
tt.SetTypeForKey("rg", "")
tt.SetTypeForKey("va", "")
return Make(tt)
}
// RegionalTag returns the regional variant of the tag.
//
// At the moment this means that the region is set from the regional subtag
// "rg" in the "u" extension.
func (t Tag) RegionalTag() Tag {
rt := Tag{language: t.locale, locale: t.locale}
if t.full == nil {
return rt
}
b := language.Builder{}
tag := t.Tag()
// tag, _ = tag.SetTypeForKey("rg", "")
b.SetTag(t.locale.Tag())
if v := tag.Variants(); v != "" {
for _, v := range strings.Split(v, "-") {
b.AddVariant(v)
}
}
for _, e := range tag.Extensions() {
b.AddExt(e)
}
return t
}
// FromTag reports closest matching ID for an internal language Tag.
func FromTag(t language.Tag) (id ID, exact bool) {
// TODO: perhaps give more frequent tags a lower index.
// TODO: we could make the indexes stable. This will excluded some
// possibilities for optimization, so don't do this quite yet.
exact = true
b, s, r := t.Raw()
if t.HasString() {
if t.IsPrivateUse() {
// We have no entries for user-defined tags.
return 0, false
}
hasExtra := false
if t.HasVariants() {
if t.HasExtensions() {
build := language.Builder{}
build.SetTag(language.Tag{LangID: b, ScriptID: s, RegionID: r})
build.AddVariant(t.Variants())
exact = false
t = build.Make()
}
hasExtra = true
} else if _, ok := t.Extension('u'); ok {
// TODO: va may mean something else. Consider not considering it.
// Strip all but the 'va' entry.
old := t
variant := t.TypeForKey("va")
t = language.Tag{LangID: b, ScriptID: s, RegionID: r}
if variant != "" {
t, _ = t.SetTypeForKey("va", variant)
hasExtra = true
}
exact = old == t
} else {
exact = false
}
if hasExtra {
// We have some variants.
for i, s := range specialTags {
if s == t {
return ID(i + len(coreTags)), exact
}
}
exact = false
}
}
if x, ok := getCoreIndex(t); ok {
return x, exact
}
exact = false
if r != 0 && s == 0 {
// Deal with cases where an extra script is inserted for the region.
t, _ := t.Maximize()
if x, ok := getCoreIndex(t); ok {
return x, exact
}
}
for t = t.Parent(); t != root; t = t.Parent() {
// No variants specified: just compare core components.
// The key has the form lllssrrr, where l, s, and r are nibbles for
// respectively the langID, scriptID, and regionID.
if x, ok := getCoreIndex(t); ok {
return x, exact
}
}
return 0, exact
}
var root = language.Tag{}

View File

@ -0,0 +1,120 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package compact
// parents maps a compact index of a tag to the compact index of the parent of
// this tag.
var parents = []ID{ // 775 elements
// Entry 0 - 3F
0x0000, 0x0000, 0x0001, 0x0001, 0x0000, 0x0004, 0x0000, 0x0006,
0x0000, 0x0008, 0x0000, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x0000,
0x0000, 0x0028, 0x0000, 0x002a, 0x0000, 0x002c, 0x0000, 0x0000,
0x002f, 0x002e, 0x002e, 0x0000, 0x0033, 0x0000, 0x0035, 0x0000,
0x0037, 0x0000, 0x0039, 0x0000, 0x003b, 0x0000, 0x0000, 0x003e,
// Entry 40 - 7F
0x0000, 0x0040, 0x0040, 0x0000, 0x0043, 0x0043, 0x0000, 0x0046,
0x0000, 0x0048, 0x0000, 0x0000, 0x004b, 0x004a, 0x004a, 0x0000,
0x004f, 0x004f, 0x004f, 0x004f, 0x0000, 0x0054, 0x0054, 0x0000,
0x0057, 0x0000, 0x0059, 0x0000, 0x005b, 0x0000, 0x005d, 0x005d,
0x0000, 0x0060, 0x0000, 0x0062, 0x0000, 0x0064, 0x0000, 0x0066,
0x0066, 0x0000, 0x0069, 0x0000, 0x006b, 0x006b, 0x006b, 0x006b,
0x006b, 0x006b, 0x006b, 0x0000, 0x0073, 0x0000, 0x0075, 0x0000,
0x0077, 0x0000, 0x0000, 0x007a, 0x0000, 0x007c, 0x0000, 0x007e,
// Entry 80 - BF
0x0000, 0x0080, 0x0080, 0x0000, 0x0083, 0x0083, 0x0000, 0x0086,
0x0087, 0x0087, 0x0087, 0x0086, 0x0088, 0x0087, 0x0087, 0x0087,
0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088,
0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087, 0x0088, 0x0087,
0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087,
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0086,
// Entry C0 - FF
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087,
0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0086, 0x0087,
0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0000,
0x00ef, 0x0000, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2,
0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f1, 0x00f1,
// Entry 100 - 13F
0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1,
0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x0000, 0x010e,
0x0000, 0x0110, 0x0000, 0x0112, 0x0000, 0x0114, 0x0114, 0x0000,
0x0117, 0x0117, 0x0117, 0x0117, 0x0000, 0x011c, 0x0000, 0x011e,
0x0000, 0x0120, 0x0120, 0x0000, 0x0123, 0x0123, 0x0123, 0x0123,
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
// Entry 140 - 17F
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
0x0123, 0x0123, 0x0000, 0x0152, 0x0000, 0x0154, 0x0000, 0x0156,
0x0000, 0x0158, 0x0000, 0x015a, 0x0000, 0x015c, 0x015c, 0x015c,
0x0000, 0x0160, 0x0000, 0x0000, 0x0163, 0x0000, 0x0165, 0x0000,
0x0167, 0x0167, 0x0167, 0x0000, 0x016b, 0x0000, 0x016d, 0x0000,
0x016f, 0x0000, 0x0171, 0x0171, 0x0000, 0x0174, 0x0000, 0x0176,
0x0000, 0x0178, 0x0000, 0x017a, 0x0000, 0x017c, 0x0000, 0x017e,
// Entry 180 - 1BF
0x0000, 0x0000, 0x0000, 0x0182, 0x0000, 0x0184, 0x0184, 0x0184,
0x0184, 0x0000, 0x0000, 0x0000, 0x018b, 0x0000, 0x0000, 0x018e,
0x0000, 0x0000, 0x0191, 0x0000, 0x0000, 0x0000, 0x0195, 0x0000,
0x0197, 0x0000, 0x0000, 0x019a, 0x0000, 0x0000, 0x019d, 0x0000,
0x019f, 0x0000, 0x01a1, 0x0000, 0x01a3, 0x0000, 0x01a5, 0x0000,
0x01a7, 0x0000, 0x01a9, 0x0000, 0x01ab, 0x0000, 0x01ad, 0x0000,
0x01af, 0x0000, 0x01b1, 0x01b1, 0x0000, 0x01b4, 0x0000, 0x01b6,
0x0000, 0x01b8, 0x0000, 0x01ba, 0x0000, 0x01bc, 0x0000, 0x0000,
// Entry 1C0 - 1FF
0x01bf, 0x0000, 0x01c1, 0x0000, 0x01c3, 0x0000, 0x01c5, 0x0000,
0x01c7, 0x0000, 0x01c9, 0x0000, 0x01cb, 0x01cb, 0x01cb, 0x01cb,
0x0000, 0x01d0, 0x0000, 0x01d2, 0x01d2, 0x0000, 0x01d5, 0x0000,
0x01d7, 0x0000, 0x01d9, 0x0000, 0x01db, 0x0000, 0x01dd, 0x0000,
0x01df, 0x01df, 0x0000, 0x01e2, 0x0000, 0x01e4, 0x0000, 0x01e6,
0x0000, 0x01e8, 0x0000, 0x01ea, 0x0000, 0x01ec, 0x0000, 0x01ee,
0x0000, 0x01f0, 0x0000, 0x0000, 0x01f3, 0x0000, 0x01f5, 0x01f5,
0x01f5, 0x0000, 0x01f9, 0x0000, 0x01fb, 0x0000, 0x01fd, 0x0000,
// Entry 200 - 23F
0x01ff, 0x0000, 0x0000, 0x0202, 0x0000, 0x0204, 0x0204, 0x0000,
0x0207, 0x0000, 0x0209, 0x0209, 0x0000, 0x020c, 0x020c, 0x0000,
0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x0000,
0x0217, 0x0000, 0x0219, 0x0000, 0x021b, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0221, 0x0000, 0x0000, 0x0224, 0x0000, 0x0226,
0x0226, 0x0000, 0x0229, 0x0000, 0x022b, 0x022b, 0x0000, 0x0000,
0x022f, 0x022e, 0x022e, 0x0000, 0x0000, 0x0234, 0x0000, 0x0236,
0x0000, 0x0238, 0x0000, 0x0244, 0x023a, 0x0244, 0x0244, 0x0244,
// Entry 240 - 27F
0x0244, 0x0244, 0x0244, 0x0244, 0x023a, 0x0244, 0x0244, 0x0000,
0x0247, 0x0247, 0x0247, 0x0000, 0x024b, 0x0000, 0x024d, 0x0000,
0x024f, 0x024f, 0x0000, 0x0252, 0x0000, 0x0254, 0x0254, 0x0254,
0x0254, 0x0254, 0x0254, 0x0000, 0x025b, 0x0000, 0x025d, 0x0000,
0x025f, 0x0000, 0x0261, 0x0000, 0x0263, 0x0000, 0x0265, 0x0000,
0x0000, 0x0268, 0x0268, 0x0268, 0x0000, 0x026c, 0x0000, 0x026e,
0x0000, 0x0270, 0x0000, 0x0000, 0x0000, 0x0274, 0x0273, 0x0273,
0x0000, 0x0278, 0x0000, 0x027a, 0x0000, 0x027c, 0x0000, 0x0000,
// Entry 280 - 2BF
0x0000, 0x0000, 0x0281, 0x0000, 0x0000, 0x0284, 0x0000, 0x0286,
0x0286, 0x0286, 0x0286, 0x0000, 0x028b, 0x028b, 0x028b, 0x0000,
0x028f, 0x028f, 0x028f, 0x028f, 0x028f, 0x0000, 0x0295, 0x0295,
0x0295, 0x0295, 0x0000, 0x0000, 0x0000, 0x0000, 0x029d, 0x029d,
0x029d, 0x0000, 0x02a1, 0x02a1, 0x02a1, 0x02a1, 0x0000, 0x0000,
0x02a7, 0x02a7, 0x02a7, 0x02a7, 0x0000, 0x02ac, 0x0000, 0x02ae,
0x02ae, 0x0000, 0x02b1, 0x0000, 0x02b3, 0x0000, 0x02b5, 0x02b5,
0x0000, 0x0000, 0x02b9, 0x0000, 0x0000, 0x0000, 0x02bd, 0x0000,
// Entry 2C0 - 2FF
0x02bf, 0x02bf, 0x0000, 0x0000, 0x02c3, 0x0000, 0x02c5, 0x0000,
0x02c7, 0x0000, 0x02c9, 0x0000, 0x02cb, 0x0000, 0x02cd, 0x02cd,
0x0000, 0x0000, 0x02d1, 0x0000, 0x02d3, 0x02d0, 0x02d0, 0x0000,
0x0000, 0x02d8, 0x02d7, 0x02d7, 0x0000, 0x0000, 0x02dd, 0x0000,
0x02df, 0x0000, 0x02e1, 0x0000, 0x0000, 0x02e4, 0x0000, 0x02e6,
0x0000, 0x0000, 0x02e9, 0x0000, 0x02eb, 0x0000, 0x02ed, 0x0000,
0x02ef, 0x02ef, 0x0000, 0x0000, 0x02f3, 0x02f2, 0x02f2, 0x0000,
0x02f7, 0x0000, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x0000,
// Entry 300 - 33F
0x02ff, 0x0300, 0x02ff, 0x0000, 0x0303, 0x0051, 0x00e6,
} // Size: 1574 bytes
// Total table size 1574 bytes (1KiB); checksum: 895AAF0B

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,91 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package compact
var (
und = Tag{}
Und Tag = Tag{}
Afrikaans Tag = Tag{language: afIndex, locale: afIndex}
Amharic Tag = Tag{language: amIndex, locale: amIndex}
Arabic Tag = Tag{language: arIndex, locale: arIndex}
ModernStandardArabic Tag = Tag{language: ar001Index, locale: ar001Index}
Azerbaijani Tag = Tag{language: azIndex, locale: azIndex}
Bulgarian Tag = Tag{language: bgIndex, locale: bgIndex}
Bengali Tag = Tag{language: bnIndex, locale: bnIndex}
Catalan Tag = Tag{language: caIndex, locale: caIndex}
Czech Tag = Tag{language: csIndex, locale: csIndex}
Danish Tag = Tag{language: daIndex, locale: daIndex}
German Tag = Tag{language: deIndex, locale: deIndex}
Greek Tag = Tag{language: elIndex, locale: elIndex}
English Tag = Tag{language: enIndex, locale: enIndex}
AmericanEnglish Tag = Tag{language: enUSIndex, locale: enUSIndex}
BritishEnglish Tag = Tag{language: enGBIndex, locale: enGBIndex}
Spanish Tag = Tag{language: esIndex, locale: esIndex}
EuropeanSpanish Tag = Tag{language: esESIndex, locale: esESIndex}
LatinAmericanSpanish Tag = Tag{language: es419Index, locale: es419Index}
Estonian Tag = Tag{language: etIndex, locale: etIndex}
Persian Tag = Tag{language: faIndex, locale: faIndex}
Finnish Tag = Tag{language: fiIndex, locale: fiIndex}
Filipino Tag = Tag{language: filIndex, locale: filIndex}
French Tag = Tag{language: frIndex, locale: frIndex}
CanadianFrench Tag = Tag{language: frCAIndex, locale: frCAIndex}
Gujarati Tag = Tag{language: guIndex, locale: guIndex}
Hebrew Tag = Tag{language: heIndex, locale: heIndex}
Hindi Tag = Tag{language: hiIndex, locale: hiIndex}
Croatian Tag = Tag{language: hrIndex, locale: hrIndex}
Hungarian Tag = Tag{language: huIndex, locale: huIndex}
Armenian Tag = Tag{language: hyIndex, locale: hyIndex}
Indonesian Tag = Tag{language: idIndex, locale: idIndex}
Icelandic Tag = Tag{language: isIndex, locale: isIndex}
Italian Tag = Tag{language: itIndex, locale: itIndex}
Japanese Tag = Tag{language: jaIndex, locale: jaIndex}
Georgian Tag = Tag{language: kaIndex, locale: kaIndex}
Kazakh Tag = Tag{language: kkIndex, locale: kkIndex}
Khmer Tag = Tag{language: kmIndex, locale: kmIndex}
Kannada Tag = Tag{language: knIndex, locale: knIndex}
Korean Tag = Tag{language: koIndex, locale: koIndex}
Kirghiz Tag = Tag{language: kyIndex, locale: kyIndex}
Lao Tag = Tag{language: loIndex, locale: loIndex}
Lithuanian Tag = Tag{language: ltIndex, locale: ltIndex}
Latvian Tag = Tag{language: lvIndex, locale: lvIndex}
Macedonian Tag = Tag{language: mkIndex, locale: mkIndex}
Malayalam Tag = Tag{language: mlIndex, locale: mlIndex}
Mongolian Tag = Tag{language: mnIndex, locale: mnIndex}
Marathi Tag = Tag{language: mrIndex, locale: mrIndex}
Malay Tag = Tag{language: msIndex, locale: msIndex}
Burmese Tag = Tag{language: myIndex, locale: myIndex}
Nepali Tag = Tag{language: neIndex, locale: neIndex}
Dutch Tag = Tag{language: nlIndex, locale: nlIndex}
Norwegian Tag = Tag{language: noIndex, locale: noIndex}
Punjabi Tag = Tag{language: paIndex, locale: paIndex}
Polish Tag = Tag{language: plIndex, locale: plIndex}
Portuguese Tag = Tag{language: ptIndex, locale: ptIndex}
BrazilianPortuguese Tag = Tag{language: ptBRIndex, locale: ptBRIndex}
EuropeanPortuguese Tag = Tag{language: ptPTIndex, locale: ptPTIndex}
Romanian Tag = Tag{language: roIndex, locale: roIndex}
Russian Tag = Tag{language: ruIndex, locale: ruIndex}
Sinhala Tag = Tag{language: siIndex, locale: siIndex}
Slovak Tag = Tag{language: skIndex, locale: skIndex}
Slovenian Tag = Tag{language: slIndex, locale: slIndex}
Albanian Tag = Tag{language: sqIndex, locale: sqIndex}
Serbian Tag = Tag{language: srIndex, locale: srIndex}
SerbianLatin Tag = Tag{language: srLatnIndex, locale: srLatnIndex}
Swedish Tag = Tag{language: svIndex, locale: svIndex}
Swahili Tag = Tag{language: swIndex, locale: swIndex}
Tamil Tag = Tag{language: taIndex, locale: taIndex}
Telugu Tag = Tag{language: teIndex, locale: teIndex}
Thai Tag = Tag{language: thIndex, locale: thIndex}
Turkish Tag = Tag{language: trIndex, locale: trIndex}
Ukrainian Tag = Tag{language: ukIndex, locale: ukIndex}
Urdu Tag = Tag{language: urIndex, locale: urIndex}
Uzbek Tag = Tag{language: uzIndex, locale: uzIndex}
Vietnamese Tag = Tag{language: viIndex, locale: viIndex}
Chinese Tag = Tag{language: zhIndex, locale: zhIndex}
SimplifiedChinese Tag = Tag{language: zhHansIndex, locale: zhHansIndex}
TraditionalChinese Tag = Tag{language: zhHantIndex, locale: zhHantIndex}
Zulu Tag = Tag{language: zuIndex, locale: zuIndex}
)

167
vendor/golang.org/x/text/internal/language/compose.go generated vendored Normal file
View File

@ -0,0 +1,167 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"sort"
"strings"
)
// A Builder allows constructing a Tag from individual components.
// Its main user is Compose in the top-level language package.
type Builder struct {
Tag Tag
private string // the x extension
variants []string
extensions []string
}
// Make returns a new Tag from the current settings.
func (b *Builder) Make() Tag {
t := b.Tag
if len(b.extensions) > 0 || len(b.variants) > 0 {
sort.Sort(sortVariants(b.variants))
sort.Strings(b.extensions)
if b.private != "" {
b.extensions = append(b.extensions, b.private)
}
n := maxCoreSize + tokenLen(b.variants...) + tokenLen(b.extensions...)
buf := make([]byte, n)
p := t.genCoreBytes(buf)
t.pVariant = byte(p)
p += appendTokens(buf[p:], b.variants...)
t.pExt = uint16(p)
p += appendTokens(buf[p:], b.extensions...)
t.str = string(buf[:p])
// We may not always need to remake the string, but when or when not
// to do so is rather tricky.
scan := makeScanner(buf[:p])
t, _ = parse(&scan, "")
return t
} else if b.private != "" {
t.str = b.private
t.RemakeString()
}
return t
}
// SetTag copies all the settings from a given Tag. Any previously set values
// are discarded.
func (b *Builder) SetTag(t Tag) {
b.Tag.LangID = t.LangID
b.Tag.RegionID = t.RegionID
b.Tag.ScriptID = t.ScriptID
// TODO: optimize
b.variants = b.variants[:0]
if variants := t.Variants(); variants != "" {
for _, vr := range strings.Split(variants[1:], "-") {
b.variants = append(b.variants, vr)
}
}
b.extensions, b.private = b.extensions[:0], ""
for _, e := range t.Extensions() {
b.AddExt(e)
}
}
// AddExt adds extension e to the tag. e must be a valid extension as returned
// by Tag.Extension. If the extension already exists, it will be discarded,
// except for a -u extension, where non-existing key-type pairs will added.
func (b *Builder) AddExt(e string) {
if e[0] == 'x' {
if b.private == "" {
b.private = e
}
return
}
for i, s := range b.extensions {
if s[0] == e[0] {
if e[0] == 'u' {
b.extensions[i] += e[1:]
}
return
}
}
b.extensions = append(b.extensions, e)
}
// SetExt sets the extension e to the tag. e must be a valid extension as
// returned by Tag.Extension. If the extension already exists, it will be
// overwritten, except for a -u extension, where the individual key-type pairs
// will be set.
func (b *Builder) SetExt(e string) {
if e[0] == 'x' {
b.private = e
return
}
for i, s := range b.extensions {
if s[0] == e[0] {
if e[0] == 'u' {
b.extensions[i] = e + s[1:]
} else {
b.extensions[i] = e
}
return
}
}
b.extensions = append(b.extensions, e)
}
// AddVariant adds any number of variants.
func (b *Builder) AddVariant(v ...string) {
for _, v := range v {
if v != "" {
b.variants = append(b.variants, v)
}
}
}
// ClearVariants removes any variants previously added, including those
// copied from a Tag in SetTag.
func (b *Builder) ClearVariants() {
b.variants = b.variants[:0]
}
// ClearExtensions removes any extensions previously added, including those
// copied from a Tag in SetTag.
func (b *Builder) ClearExtensions() {
b.private = ""
b.extensions = b.extensions[:0]
}
func tokenLen(token ...string) (n int) {
for _, t := range token {
n += len(t) + 1
}
return
}
func appendTokens(b []byte, token ...string) int {
p := 0
for _, t := range token {
b[p] = '-'
copy(b[p+1:], t)
p += 1 + len(t)
}
return p
}
type sortVariants []string
func (s sortVariants) Len() int {
return len(s)
}
func (s sortVariants) Swap(i, j int) {
s[j], s[i] = s[i], s[j]
}
func (s sortVariants) Less(i, j int) bool {
return variantIndex[s[i]] < variantIndex[s[j]]
}

28
vendor/golang.org/x/text/internal/language/coverage.go generated vendored Normal file
View File

@ -0,0 +1,28 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
// BaseLanguages returns the list of all supported base languages. It generates
// the list by traversing the internal structures.
func BaseLanguages() []Language {
base := make([]Language, 0, NumLanguages)
for i := 0; i < langNoIndexOffset; i++ {
// We included "und" already for the value 0.
if i != nonCanonicalUnd {
base = append(base, Language(i))
}
}
i := langNoIndexOffset
for _, v := range langNoIndex {
for k := 0; k < 8; k++ {
if v&1 == 1 {
base = append(base, Language(i))
}
v >>= 1
i++
}
}
return base
}

596
vendor/golang.org/x/text/internal/language/language.go generated vendored Normal file
View File

@ -0,0 +1,596 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go gen_common.go -output tables.go
package language // import "golang.org/x/text/internal/language"
// TODO: Remove above NOTE after:
// - verifying that tables are dropped correctly (most notably matcher tables).
import (
"errors"
"fmt"
"strings"
)
const (
// maxCoreSize is the maximum size of a BCP 47 tag without variants and
// extensions. Equals max lang (3) + script (4) + max reg (3) + 2 dashes.
maxCoreSize = 12
// max99thPercentileSize is a somewhat arbitrary buffer size that presumably
// is large enough to hold at least 99% of the BCP 47 tags.
max99thPercentileSize = 32
// maxSimpleUExtensionSize is the maximum size of a -u extension with one
// key-type pair. Equals len("-u-") + key (2) + dash + max value (8).
maxSimpleUExtensionSize = 14
)
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
// specific language or locale. All language tag values are guaranteed to be
// well-formed. The zero value of Tag is Und.
type Tag struct {
// TODO: the following fields have the form TagTypeID. This name is chosen
// to allow refactoring the public package without conflicting with its
// Base, Script, and Region methods. Once the transition is fully completed
// the ID can be stripped from the name.
LangID Language
RegionID Region
// TODO: we will soon run out of positions for ScriptID. Idea: instead of
// storing lang, region, and ScriptID codes, store only the compact index and
// have a lookup table from this code to its expansion. This greatly speeds
// up table lookup, speed up common variant cases.
// This will also immediately free up 3 extra bytes. Also, the pVariant
// field can now be moved to the lookup table, as the compact index uniquely
// determines the offset of a possible variant.
ScriptID Script
pVariant byte // offset in str, includes preceding '-'
pExt uint16 // offset of first extension, includes preceding '-'
// str is the string representation of the Tag. It will only be used if the
// tag has variants or extensions.
str string
}
// Make is a convenience wrapper for Parse that omits the error.
// In case of an error, a sensible default is returned.
func Make(s string) Tag {
t, _ := Parse(s)
return t
}
// Raw returns the raw base language, script and region, without making an
// attempt to infer their values.
// TODO: consider removing
func (t Tag) Raw() (b Language, s Script, r Region) {
return t.LangID, t.ScriptID, t.RegionID
}
// equalTags compares language, script and region subtags only.
func (t Tag) equalTags(a Tag) bool {
return t.LangID == a.LangID && t.ScriptID == a.ScriptID && t.RegionID == a.RegionID
}
// IsRoot returns true if t is equal to language "und".
func (t Tag) IsRoot() bool {
if int(t.pVariant) < len(t.str) {
return false
}
return t.equalTags(Und)
}
// IsPrivateUse reports whether the Tag consists solely of an IsPrivateUse use
// tag.
func (t Tag) IsPrivateUse() bool {
return t.str != "" && t.pVariant == 0
}
// RemakeString is used to update t.str in case lang, script or region changed.
// It is assumed that pExt and pVariant still point to the start of the
// respective parts.
func (t *Tag) RemakeString() {
if t.str == "" {
return
}
extra := t.str[t.pVariant:]
if t.pVariant > 0 {
extra = extra[1:]
}
if t.equalTags(Und) && strings.HasPrefix(extra, "x-") {
t.str = extra
t.pVariant = 0
t.pExt = 0
return
}
var buf [max99thPercentileSize]byte // avoid extra memory allocation in most cases.
b := buf[:t.genCoreBytes(buf[:])]
if extra != "" {
diff := len(b) - int(t.pVariant)
b = append(b, '-')
b = append(b, extra...)
t.pVariant = uint8(int(t.pVariant) + diff)
t.pExt = uint16(int(t.pExt) + diff)
} else {
t.pVariant = uint8(len(b))
t.pExt = uint16(len(b))
}
t.str = string(b)
}
// genCoreBytes writes a string for the base languages, script and region tags
// to the given buffer and returns the number of bytes written. It will never
// write more than maxCoreSize bytes.
func (t *Tag) genCoreBytes(buf []byte) int {
n := t.LangID.StringToBuf(buf[:])
if t.ScriptID != 0 {
n += copy(buf[n:], "-")
n += copy(buf[n:], t.ScriptID.String())
}
if t.RegionID != 0 {
n += copy(buf[n:], "-")
n += copy(buf[n:], t.RegionID.String())
}
return n
}
// String returns the canonical string representation of the language tag.
func (t Tag) String() string {
if t.str != "" {
return t.str
}
if t.ScriptID == 0 && t.RegionID == 0 {
return t.LangID.String()
}
buf := [maxCoreSize]byte{}
return string(buf[:t.genCoreBytes(buf[:])])
}
// MarshalText implements encoding.TextMarshaler.
func (t Tag) MarshalText() (text []byte, err error) {
if t.str != "" {
text = append(text, t.str...)
} else if t.ScriptID == 0 && t.RegionID == 0 {
text = append(text, t.LangID.String()...)
} else {
buf := [maxCoreSize]byte{}
text = buf[:t.genCoreBytes(buf[:])]
}
return text, nil
}
// UnmarshalText implements encoding.TextUnmarshaler.
func (t *Tag) UnmarshalText(text []byte) error {
tag, err := Parse(string(text))
*t = tag
return err
}
// Variants returns the part of the tag holding all variants or the empty string
// if there are no variants defined.
func (t Tag) Variants() string {
if t.pVariant == 0 {
return ""
}
return t.str[t.pVariant:t.pExt]
}
// VariantOrPrivateUseTags returns variants or private use tags.
func (t Tag) VariantOrPrivateUseTags() string {
if t.pExt > 0 {
return t.str[t.pVariant:t.pExt]
}
return t.str[t.pVariant:]
}
// HasString reports whether this tag defines more than just the raw
// components.
func (t Tag) HasString() bool {
return t.str != ""
}
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
// specific language are substituted with fields from the parent language.
// The parent for a language may change for newer versions of CLDR.
func (t Tag) Parent() Tag {
if t.str != "" {
// Strip the variants and extensions.
b, s, r := t.Raw()
t = Tag{LangID: b, ScriptID: s, RegionID: r}
if t.RegionID == 0 && t.ScriptID != 0 && t.LangID != 0 {
base, _ := addTags(Tag{LangID: t.LangID})
if base.ScriptID == t.ScriptID {
return Tag{LangID: t.LangID}
}
}
return t
}
if t.LangID != 0 {
if t.RegionID != 0 {
maxScript := t.ScriptID
if maxScript == 0 {
max, _ := addTags(t)
maxScript = max.ScriptID
}
for i := range parents {
if Language(parents[i].lang) == t.LangID && Script(parents[i].maxScript) == maxScript {
for _, r := range parents[i].fromRegion {
if Region(r) == t.RegionID {
return Tag{
LangID: t.LangID,
ScriptID: Script(parents[i].script),
RegionID: Region(parents[i].toRegion),
}
}
}
}
}
// Strip the script if it is the default one.
base, _ := addTags(Tag{LangID: t.LangID})
if base.ScriptID != maxScript {
return Tag{LangID: t.LangID, ScriptID: maxScript}
}
return Tag{LangID: t.LangID}
} else if t.ScriptID != 0 {
// The parent for an base-script pair with a non-default script is
// "und" instead of the base language.
base, _ := addTags(Tag{LangID: t.LangID})
if base.ScriptID != t.ScriptID {
return Und
}
return Tag{LangID: t.LangID}
}
}
return Und
}
// ParseExtension parses s as an extension and returns it on success.
func ParseExtension(s string) (ext string, err error) {
scan := makeScannerString(s)
var end int
if n := len(scan.token); n != 1 {
return "", ErrSyntax
}
scan.toLower(0, len(scan.b))
end = parseExtension(&scan)
if end != len(s) {
return "", ErrSyntax
}
return string(scan.b), nil
}
// HasVariants reports whether t has variants.
func (t Tag) HasVariants() bool {
return uint16(t.pVariant) < t.pExt
}
// HasExtensions reports whether t has extensions.
func (t Tag) HasExtensions() bool {
return int(t.pExt) < len(t.str)
}
// Extension returns the extension of type x for tag t. It will return
// false for ok if t does not have the requested extension. The returned
// extension will be invalid in this case.
func (t Tag) Extension(x byte) (ext string, ok bool) {
for i := int(t.pExt); i < len(t.str)-1; {
var ext string
i, ext = getExtension(t.str, i)
if ext[0] == x {
return ext, true
}
}
return "", false
}
// Extensions returns all extensions of t.
func (t Tag) Extensions() []string {
e := []string{}
for i := int(t.pExt); i < len(t.str)-1; {
var ext string
i, ext = getExtension(t.str, i)
e = append(e, ext)
}
return e
}
// TypeForKey returns the type associated with the given key, where key and type
// are of the allowed values defined for the Unicode locale extension ('u') in
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// TypeForKey will traverse the inheritance chain to get the correct value.
func (t Tag) TypeForKey(key string) string {
if start, end, _ := t.findTypeForKey(key); end != start {
return t.str[start:end]
}
return ""
}
var (
errPrivateUse = errors.New("cannot set a key on a private use tag")
errInvalidArguments = errors.New("invalid key or type")
)
// SetTypeForKey returns a new Tag with the key set to type, where key and type
// are of the allowed values defined for the Unicode locale extension ('u') in
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// An empty value removes an existing pair with the same key.
func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
if t.IsPrivateUse() {
return t, errPrivateUse
}
if len(key) != 2 {
return t, errInvalidArguments
}
// Remove the setting if value is "".
if value == "" {
start, end, _ := t.findTypeForKey(key)
if start != end {
// Remove key tag and leading '-'.
start -= 4
// Remove a possible empty extension.
if (end == len(t.str) || t.str[end+2] == '-') && t.str[start-2] == '-' {
start -= 2
}
if start == int(t.pVariant) && end == len(t.str) {
t.str = ""
t.pVariant, t.pExt = 0, 0
} else {
t.str = fmt.Sprintf("%s%s", t.str[:start], t.str[end:])
}
}
return t, nil
}
if len(value) < 3 || len(value) > 8 {
return t, errInvalidArguments
}
var (
buf [maxCoreSize + maxSimpleUExtensionSize]byte
uStart int // start of the -u extension.
)
// Generate the tag string if needed.
if t.str == "" {
uStart = t.genCoreBytes(buf[:])
buf[uStart] = '-'
uStart++
}
// Create new key-type pair and parse it to verify.
b := buf[uStart:]
copy(b, "u-")
copy(b[2:], key)
b[4] = '-'
b = b[:5+copy(b[5:], value)]
scan := makeScanner(b)
if parseExtensions(&scan); scan.err != nil {
return t, scan.err
}
// Assemble the replacement string.
if t.str == "" {
t.pVariant, t.pExt = byte(uStart-1), uint16(uStart-1)
t.str = string(buf[:uStart+len(b)])
} else {
s := t.str
start, end, hasExt := t.findTypeForKey(key)
if start == end {
if hasExt {
b = b[2:]
}
t.str = fmt.Sprintf("%s-%s%s", s[:start], b, s[end:])
} else {
t.str = fmt.Sprintf("%s%s%s", s[:start], value, s[end:])
}
}
return t, nil
}
// findKeyAndType returns the start and end position for the type corresponding
// to key or the point at which to insert the key-value pair if the type
// wasn't found. The hasExt return value reports whether an -u extension was present.
// Note: the extensions are typically very small and are likely to contain
// only one key-type pair.
func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) {
p := int(t.pExt)
if len(key) != 2 || p == len(t.str) || p == 0 {
return p, p, false
}
s := t.str
// Find the correct extension.
for p++; s[p] != 'u'; p++ {
if s[p] > 'u' {
p--
return p, p, false
}
if p = nextExtension(s, p); p == len(s) {
return len(s), len(s), false
}
}
// Proceed to the hyphen following the extension name.
p++
// curKey is the key currently being processed.
curKey := ""
// Iterate over keys until we get the end of a section.
for {
// p points to the hyphen preceding the current token.
if p3 := p + 3; s[p3] == '-' {
// Found a key.
// Check whether we just processed the key that was requested.
if curKey == key {
return start, p, true
}
// Set to the next key and continue scanning type tokens.
curKey = s[p+1 : p3]
if curKey > key {
return p, p, true
}
// Start of the type token sequence.
start = p + 4
// A type is at least 3 characters long.
p += 7 // 4 + 3
} else {
// Attribute or type, which is at least 3 characters long.
p += 4
}
// p points past the third character of a type or attribute.
max := p + 5 // maximum length of token plus hyphen.
if len(s) < max {
max = len(s)
}
for ; p < max && s[p] != '-'; p++ {
}
// Bail if we have exhausted all tokens or if the next token starts
// a new extension.
if p == len(s) || s[p+2] == '-' {
if curKey == key {
return start, p, true
}
return p, p, true
}
}
}
// ParseBase parses a 2- or 3-letter ISO 639 code.
// It returns a ValueError if s is a well-formed but unknown language identifier
// or another error if another error occurred.
func ParseBase(s string) (Language, error) {
if n := len(s); n < 2 || 3 < n {
return 0, ErrSyntax
}
var buf [3]byte
return getLangID(buf[:copy(buf[:], s)])
}
// ParseScript parses a 4-letter ISO 15924 code.
// It returns a ValueError if s is a well-formed but unknown script identifier
// or another error if another error occurred.
func ParseScript(s string) (Script, error) {
if len(s) != 4 {
return 0, ErrSyntax
}
var buf [4]byte
return getScriptID(script, buf[:copy(buf[:], s)])
}
// EncodeM49 returns the Region for the given UN M.49 code.
// It returns an error if r is not a valid code.
func EncodeM49(r int) (Region, error) {
return getRegionM49(r)
}
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
// It returns a ValueError if s is a well-formed but unknown region identifier
// or another error if another error occurred.
func ParseRegion(s string) (Region, error) {
if n := len(s); n < 2 || 3 < n {
return 0, ErrSyntax
}
var buf [3]byte
return getRegionID(buf[:copy(buf[:], s)])
}
// IsCountry returns whether this region is a country or autonomous area. This
// includes non-standard definitions from CLDR.
func (r Region) IsCountry() bool {
if r == 0 || r.IsGroup() || r.IsPrivateUse() && r != _XK {
return false
}
return true
}
// IsGroup returns whether this region defines a collection of regions. This
// includes non-standard definitions from CLDR.
func (r Region) IsGroup() bool {
if r == 0 {
return false
}
return int(regionInclusion[r]) < len(regionContainment)
}
// Contains returns whether Region c is contained by Region r. It returns true
// if c == r.
func (r Region) Contains(c Region) bool {
if r == c {
return true
}
g := regionInclusion[r]
if g >= nRegionGroups {
return false
}
m := regionContainment[g]
d := regionInclusion[c]
b := regionInclusionBits[d]
// A contained country may belong to multiple disjoint groups. Matching any
// of these indicates containment. If the contained region is a group, it
// must strictly be a subset.
if d >= nRegionGroups {
return b&m != 0
}
return b&^m == 0
}
var errNoTLD = errors.New("language: region is not a valid ccTLD")
// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
// In all other cases it returns either the region itself or an error.
//
// This method may return an error for a region for which there exists a
// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
// region will already be canonicalized it was obtained from a Tag that was
// obtained using any of the default methods.
func (r Region) TLD() (Region, error) {
// See http://en.wikipedia.org/wiki/Country_code_top-level_domain for the
// difference between ISO 3166-1 and IANA ccTLD.
if r == _GB {
r = _UK
}
if (r.typ() & ccTLD) == 0 {
return 0, errNoTLD
}
return r, nil
}
// Canonicalize returns the region or a possible replacement if the region is
// deprecated. It will not return a replacement for deprecated regions that
// are split into multiple regions.
func (r Region) Canonicalize() Region {
if cr := normRegion(r); cr != 0 {
return cr
}
return r
}
// Variant represents a registered variant of a language as defined by BCP 47.
type Variant struct {
ID uint8
str string
}
// ParseVariant parses and returns a Variant. An error is returned if s is not
// a valid variant.
func ParseVariant(s string) (Variant, error) {
s = strings.ToLower(s)
if id, ok := variantIndex[s]; ok {
return Variant{id, s}, nil
}
return Variant{}, NewValueError([]byte(s))
}
// String returns the string representation of the variant.
func (v Variant) String() string {
return v.str
}

View File

@ -17,11 +17,11 @@ import (
// if it could not be found. // if it could not be found.
func findIndex(idx tag.Index, key []byte, form string) (index int, err error) { func findIndex(idx tag.Index, key []byte, form string) (index int, err error) {
if !tag.FixCase(form, key) { if !tag.FixCase(form, key) {
return 0, errSyntax return 0, ErrSyntax
} }
i := idx.Index(key) i := idx.Index(key)
if i == -1 { if i == -1 {
return 0, mkErrInvalid(key) return 0, NewValueError(key)
} }
return i, nil return i, nil
} }
@ -32,38 +32,45 @@ func searchUint(imap []uint16, key uint16) int {
}) })
} }
type langID uint16 type Language uint16
// getLangID returns the langID of s if s is a canonical subtag // getLangID returns the langID of s if s is a canonical subtag
// or langUnknown if s is not a canonical subtag. // or langUnknown if s is not a canonical subtag.
func getLangID(s []byte) (langID, error) { func getLangID(s []byte) (Language, error) {
if len(s) == 2 { if len(s) == 2 {
return getLangISO2(s) return getLangISO2(s)
} }
return getLangISO3(s) return getLangISO3(s)
} }
// TODO language normalization as well as the AliasMaps could be moved to the
// higher level package, but it is a bit tricky to separate the generation.
func (id Language) Canonicalize() (Language, AliasType) {
return normLang(id)
}
// mapLang returns the mapped langID of id according to mapping m. // mapLang returns the mapped langID of id according to mapping m.
func normLang(id langID) (langID, langAliasType) { func normLang(id Language) (Language, AliasType) {
k := sort.Search(len(langAliasMap), func(i int) bool { k := sort.Search(len(AliasMap), func(i int) bool {
return langAliasMap[i].from >= uint16(id) return AliasMap[i].From >= uint16(id)
}) })
if k < len(langAliasMap) && langAliasMap[k].from == uint16(id) { if k < len(AliasMap) && AliasMap[k].From == uint16(id) {
return langID(langAliasMap[k].to), langAliasTypes[k] return Language(AliasMap[k].To), AliasTypes[k]
} }
return id, langAliasTypeUnknown return id, AliasTypeUnknown
} }
// getLangISO2 returns the langID for the given 2-letter ISO language code // getLangISO2 returns the langID for the given 2-letter ISO language code
// or unknownLang if this does not exist. // or unknownLang if this does not exist.
func getLangISO2(s []byte) (langID, error) { func getLangISO2(s []byte) (Language, error) {
if !tag.FixCase("zz", s) { if !tag.FixCase("zz", s) {
return 0, errSyntax return 0, ErrSyntax
} }
if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 { if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 {
return langID(i), nil return Language(i), nil
} }
return 0, mkErrInvalid(s) return 0, NewValueError(s)
} }
const base = 'z' - 'a' + 1 const base = 'z' - 'a' + 1
@ -88,7 +95,7 @@ func intToStr(v uint, s []byte) {
// getLangISO3 returns the langID for the given 3-letter ISO language code // getLangISO3 returns the langID for the given 3-letter ISO language code
// or unknownLang if this does not exist. // or unknownLang if this does not exist.
func getLangISO3(s []byte) (langID, error) { func getLangISO3(s []byte) (Language, error) {
if tag.FixCase("und", s) { if tag.FixCase("und", s) {
// first try to match canonical 3-letter entries // first try to match canonical 3-letter entries
for i := lang.Index(s[:2]); i != -1; i = lang.Next(s[:2], i) { for i := lang.Index(s[:2]); i != -1; i = lang.Next(s[:2], i) {
@ -96,7 +103,7 @@ func getLangISO3(s []byte) (langID, error) {
// We treat "und" as special and always translate it to "unspecified". // We treat "und" as special and always translate it to "unspecified".
// Note that ZZ and Zzzz are private use and are not treated as // Note that ZZ and Zzzz are private use and are not treated as
// unspecified by default. // unspecified by default.
id := langID(i) id := Language(i)
if id == nonCanonicalUnd { if id == nonCanonicalUnd {
return 0, nil return 0, nil
} }
@ -104,26 +111,26 @@ func getLangISO3(s []byte) (langID, error) {
} }
} }
if i := altLangISO3.Index(s); i != -1 { if i := altLangISO3.Index(s); i != -1 {
return langID(altLangIndex[altLangISO3.Elem(i)[3]]), nil return Language(altLangIndex[altLangISO3.Elem(i)[3]]), nil
} }
n := strToInt(s) n := strToInt(s)
if langNoIndex[n/8]&(1<<(n%8)) != 0 { if langNoIndex[n/8]&(1<<(n%8)) != 0 {
return langID(n) + langNoIndexOffset, nil return Language(n) + langNoIndexOffset, nil
} }
// Check for non-canonical uses of ISO3. // Check for non-canonical uses of ISO3.
for i := lang.Index(s[:1]); i != -1; i = lang.Next(s[:1], i) { for i := lang.Index(s[:1]); i != -1; i = lang.Next(s[:1], i) {
if e := lang.Elem(i); e[2] == s[1] && e[3] == s[2] { if e := lang.Elem(i); e[2] == s[1] && e[3] == s[2] {
return langID(i), nil return Language(i), nil
} }
} }
return 0, mkErrInvalid(s) return 0, NewValueError(s)
} }
return 0, errSyntax return 0, ErrSyntax
} }
// stringToBuf writes the string to b and returns the number of bytes // StringToBuf writes the string to b and returns the number of bytes
// written. cap(b) must be >= 3. // written. cap(b) must be >= 3.
func (id langID) stringToBuf(b []byte) int { func (id Language) StringToBuf(b []byte) int {
if id >= langNoIndexOffset { if id >= langNoIndexOffset {
intToStr(uint(id)-langNoIndexOffset, b[:3]) intToStr(uint(id)-langNoIndexOffset, b[:3])
return 3 return 3
@ -140,7 +147,7 @@ func (id langID) stringToBuf(b []byte) int {
// String returns the BCP 47 representation of the langID. // String returns the BCP 47 representation of the langID.
// Use b as variable name, instead of id, to ensure the variable // Use b as variable name, instead of id, to ensure the variable
// used is consistent with that of Base in which this type is embedded. // used is consistent with that of Base in which this type is embedded.
func (b langID) String() string { func (b Language) String() string {
if b == 0 { if b == 0 {
return "und" return "und"
} else if b >= langNoIndexOffset { } else if b >= langNoIndexOffset {
@ -157,7 +164,7 @@ func (b langID) String() string {
} }
// ISO3 returns the ISO 639-3 language code. // ISO3 returns the ISO 639-3 language code.
func (b langID) ISO3() string { func (b Language) ISO3() string {
if b == 0 || b >= langNoIndexOffset { if b == 0 || b >= langNoIndexOffset {
return b.String() return b.String()
} }
@ -173,15 +180,24 @@ func (b langID) ISO3() string {
} }
// IsPrivateUse reports whether this language code is reserved for private use. // IsPrivateUse reports whether this language code is reserved for private use.
func (b langID) IsPrivateUse() bool { func (b Language) IsPrivateUse() bool {
return langPrivateStart <= b && b <= langPrivateEnd return langPrivateStart <= b && b <= langPrivateEnd
} }
type regionID uint16 // SuppressScript returns the script marked as SuppressScript in the IANA
// language tag repository, or 0 if there is no such script.
func (b Language) SuppressScript() Script {
if b < langNoIndexOffset {
return Script(suppressScript[b])
}
return 0
}
type Region uint16
// getRegionID returns the region id for s if s is a valid 2-letter region code // getRegionID returns the region id for s if s is a valid 2-letter region code
// or unknownRegion. // or unknownRegion.
func getRegionID(s []byte) (regionID, error) { func getRegionID(s []byte) (Region, error) {
if len(s) == 3 { if len(s) == 3 {
if isAlpha(s[0]) { if isAlpha(s[0]) {
return getRegionISO3(s) return getRegionISO3(s)
@ -195,34 +211,34 @@ func getRegionID(s []byte) (regionID, error) {
// getRegionISO2 returns the regionID for the given 2-letter ISO country code // getRegionISO2 returns the regionID for the given 2-letter ISO country code
// or unknownRegion if this does not exist. // or unknownRegion if this does not exist.
func getRegionISO2(s []byte) (regionID, error) { func getRegionISO2(s []byte) (Region, error) {
i, err := findIndex(regionISO, s, "ZZ") i, err := findIndex(regionISO, s, "ZZ")
if err != nil { if err != nil {
return 0, err return 0, err
} }
return regionID(i) + isoRegionOffset, nil return Region(i) + isoRegionOffset, nil
} }
// getRegionISO3 returns the regionID for the given 3-letter ISO country code // getRegionISO3 returns the regionID for the given 3-letter ISO country code
// or unknownRegion if this does not exist. // or unknownRegion if this does not exist.
func getRegionISO3(s []byte) (regionID, error) { func getRegionISO3(s []byte) (Region, error) {
if tag.FixCase("ZZZ", s) { if tag.FixCase("ZZZ", s) {
for i := regionISO.Index(s[:1]); i != -1; i = regionISO.Next(s[:1], i) { for i := regionISO.Index(s[:1]); i != -1; i = regionISO.Next(s[:1], i) {
if e := regionISO.Elem(i); e[2] == s[1] && e[3] == s[2] { if e := regionISO.Elem(i); e[2] == s[1] && e[3] == s[2] {
return regionID(i) + isoRegionOffset, nil return Region(i) + isoRegionOffset, nil
} }
} }
for i := 0; i < len(altRegionISO3); i += 3 { for i := 0; i < len(altRegionISO3); i += 3 {
if tag.Compare(altRegionISO3[i:i+3], s) == 0 { if tag.Compare(altRegionISO3[i:i+3], s) == 0 {
return regionID(altRegionIDs[i/3]), nil return Region(altRegionIDs[i/3]), nil
} }
} }
return 0, mkErrInvalid(s) return 0, NewValueError(s)
} }
return 0, errSyntax return 0, ErrSyntax
} }
func getRegionM49(n int) (regionID, error) { func getRegionM49(n int) (Region, error) {
if 0 < n && n <= 999 { if 0 < n && n <= 999 {
const ( const (
searchBits = 7 searchBits = 7
@ -236,7 +252,7 @@ func getRegionM49(n int) (regionID, error) {
return buf[i] >= val return buf[i] >= val
}) })
if r := fromM49[int(m49Index[idx])+i]; r&^regionMask == val { if r := fromM49[int(m49Index[idx])+i]; r&^regionMask == val {
return regionID(r & regionMask), nil return Region(r & regionMask), nil
} }
} }
var e ValueError var e ValueError
@ -247,13 +263,13 @@ func getRegionM49(n int) (regionID, error) {
// normRegion returns a region if r is deprecated or 0 otherwise. // normRegion returns a region if r is deprecated or 0 otherwise.
// TODO: consider supporting BYS (-> BLR), CSK (-> 200 or CZ), PHI (-> PHL) and AFI (-> DJ). // TODO: consider supporting BYS (-> BLR), CSK (-> 200 or CZ), PHI (-> PHL) and AFI (-> DJ).
// TODO: consider mapping split up regions to new most populous one (like CLDR). // TODO: consider mapping split up regions to new most populous one (like CLDR).
func normRegion(r regionID) regionID { func normRegion(r Region) Region {
m := regionOldMap m := regionOldMap
k := sort.Search(len(m), func(i int) bool { k := sort.Search(len(m), func(i int) bool {
return m[i].from >= uint16(r) return m[i].From >= uint16(r)
}) })
if k < len(m) && m[k].from == uint16(r) { if k < len(m) && m[k].From == uint16(r) {
return regionID(m[k].to) return Region(m[k].To)
} }
return 0 return 0
} }
@ -264,13 +280,13 @@ const (
bcp47Region bcp47Region
) )
func (r regionID) typ() byte { func (r Region) typ() byte {
return regionTypes[r] return regionTypes[r]
} }
// String returns the BCP 47 representation for the region. // String returns the BCP 47 representation for the region.
// It returns "ZZ" for an unspecified region. // It returns "ZZ" for an unspecified region.
func (r regionID) String() string { func (r Region) String() string {
if r < isoRegionOffset { if r < isoRegionOffset {
if r == 0 { if r == 0 {
return "ZZ" return "ZZ"
@ -284,7 +300,7 @@ func (r regionID) String() string {
// ISO3 returns the 3-letter ISO code of r. // ISO3 returns the 3-letter ISO code of r.
// Note that not all regions have a 3-letter ISO code. // Note that not all regions have a 3-letter ISO code.
// In such cases this method returns "ZZZ". // In such cases this method returns "ZZZ".
func (r regionID) ISO3() string { func (r Region) ISO3() string {
if r < isoRegionOffset { if r < isoRegionOffset {
return "ZZZ" return "ZZZ"
} }
@ -301,29 +317,29 @@ func (r regionID) ISO3() string {
// M49 returns the UN M.49 encoding of r, or 0 if this encoding // M49 returns the UN M.49 encoding of r, or 0 if this encoding
// is not defined for r. // is not defined for r.
func (r regionID) M49() int { func (r Region) M49() int {
return int(m49[r]) return int(m49[r])
} }
// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This // IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
// may include private-use tags that are assigned by CLDR and used in this // may include private-use tags that are assigned by CLDR and used in this
// implementation. So IsPrivateUse and IsCountry can be simultaneously true. // implementation. So IsPrivateUse and IsCountry can be simultaneously true.
func (r regionID) IsPrivateUse() bool { func (r Region) IsPrivateUse() bool {
return r.typ()&iso3166UserAssigned != 0 return r.typ()&iso3166UserAssigned != 0
} }
type scriptID uint8 type Script uint8
// getScriptID returns the script id for string s. It assumes that s // getScriptID returns the script id for string s. It assumes that s
// is of the format [A-Z][a-z]{3}. // is of the format [A-Z][a-z]{3}.
func getScriptID(idx tag.Index, s []byte) (scriptID, error) { func getScriptID(idx tag.Index, s []byte) (Script, error) {
i, err := findIndex(idx, s, "Zzzz") i, err := findIndex(idx, s, "Zzzz")
return scriptID(i), err return Script(i), err
} }
// String returns the script code in title case. // String returns the script code in title case.
// It returns "Zzzz" for an unspecified script. // It returns "Zzzz" for an unspecified script.
func (s scriptID) String() string { func (s Script) String() string {
if s == 0 { if s == 0 {
return "Zzzz" return "Zzzz"
} }
@ -331,7 +347,7 @@ func (s scriptID) String() string {
} }
// IsPrivateUse reports whether this script code is reserved for private use. // IsPrivateUse reports whether this script code is reserved for private use.
func (s scriptID) IsPrivateUse() bool { func (s Script) IsPrivateUse() bool {
return _Qaaa <= s && s <= _Qabx return _Qaaa <= s && s <= _Qabx
} }
@ -389,7 +405,7 @@ func grandfathered(s [maxAltTaglen]byte) (t Tag, ok bool) {
if v < 0 { if v < 0 {
return Make(altTags[altTagIndex[-v-1]:altTagIndex[-v]]), true return Make(altTags[altTagIndex[-v-1]:altTagIndex[-v]]), true
} }
t.lang = langID(v) t.LangID = Language(v)
return t, true return t, true
} }
return t, false return t, false

226
vendor/golang.org/x/text/internal/language/match.go generated vendored Normal file
View File

@ -0,0 +1,226 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import "errors"
type scriptRegionFlags uint8
const (
isList = 1 << iota
scriptInFrom
regionInFrom
)
func (t *Tag) setUndefinedLang(id Language) {
if t.LangID == 0 {
t.LangID = id
}
}
func (t *Tag) setUndefinedScript(id Script) {
if t.ScriptID == 0 {
t.ScriptID = id
}
}
func (t *Tag) setUndefinedRegion(id Region) {
if t.RegionID == 0 || t.RegionID.Contains(id) {
t.RegionID = id
}
}
// ErrMissingLikelyTagsData indicates no information was available
// to compute likely values of missing tags.
var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
// addLikelySubtags sets subtags to their most likely value, given the locale.
// In most cases this means setting fields for unknown values, but in some
// cases it may alter a value. It returns an ErrMissingLikelyTagsData error
// if the given locale cannot be expanded.
func (t Tag) addLikelySubtags() (Tag, error) {
id, err := addTags(t)
if err != nil {
return t, err
} else if id.equalTags(t) {
return t, nil
}
id.RemakeString()
return id, nil
}
// specializeRegion attempts to specialize a group region.
func specializeRegion(t *Tag) bool {
if i := regionInclusion[t.RegionID]; i < nRegionGroups {
x := likelyRegionGroup[i]
if Language(x.lang) == t.LangID && Script(x.script) == t.ScriptID {
t.RegionID = Region(x.region)
}
return true
}
return false
}
// Maximize returns a new tag with missing tags filled in.
func (t Tag) Maximize() (Tag, error) {
return addTags(t)
}
func addTags(t Tag) (Tag, error) {
// We leave private use identifiers alone.
if t.IsPrivateUse() {
return t, nil
}
if t.ScriptID != 0 && t.RegionID != 0 {
if t.LangID != 0 {
// already fully specified
specializeRegion(&t)
return t, nil
}
// Search matches for und-script-region. Note that for these cases
// region will never be a group so there is no need to check for this.
list := likelyRegion[t.RegionID : t.RegionID+1]
if x := list[0]; x.flags&isList != 0 {
list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
}
for _, x := range list {
// Deviating from the spec. See match_test.go for details.
if Script(x.script) == t.ScriptID {
t.setUndefinedLang(Language(x.lang))
return t, nil
}
}
}
if t.LangID != 0 {
// Search matches for lang-script and lang-region, where lang != und.
if t.LangID < langNoIndexOffset {
x := likelyLang[t.LangID]
if x.flags&isList != 0 {
list := likelyLangList[x.region : x.region+uint16(x.script)]
if t.ScriptID != 0 {
for _, x := range list {
if Script(x.script) == t.ScriptID && x.flags&scriptInFrom != 0 {
t.setUndefinedRegion(Region(x.region))
return t, nil
}
}
} else if t.RegionID != 0 {
count := 0
goodScript := true
tt := t
for _, x := range list {
// We visit all entries for which the script was not
// defined, including the ones where the region was not
// defined. This allows for proper disambiguation within
// regions.
if x.flags&scriptInFrom == 0 && t.RegionID.Contains(Region(x.region)) {
tt.RegionID = Region(x.region)
tt.setUndefinedScript(Script(x.script))
goodScript = goodScript && tt.ScriptID == Script(x.script)
count++
}
}
if count == 1 {
return tt, nil
}
// Even if we fail to find a unique Region, we might have
// an unambiguous script.
if goodScript {
t.ScriptID = tt.ScriptID
}
}
}
}
} else {
// Search matches for und-script.
if t.ScriptID != 0 {
x := likelyScript[t.ScriptID]
if x.region != 0 {
t.setUndefinedRegion(Region(x.region))
t.setUndefinedLang(Language(x.lang))
return t, nil
}
}
// Search matches for und-region. If und-script-region exists, it would
// have been found earlier.
if t.RegionID != 0 {
if i := regionInclusion[t.RegionID]; i < nRegionGroups {
x := likelyRegionGroup[i]
if x.region != 0 {
t.setUndefinedLang(Language(x.lang))
t.setUndefinedScript(Script(x.script))
t.RegionID = Region(x.region)
}
} else {
x := likelyRegion[t.RegionID]
if x.flags&isList != 0 {
x = likelyRegionList[x.lang]
}
if x.script != 0 && x.flags != scriptInFrom {
t.setUndefinedLang(Language(x.lang))
t.setUndefinedScript(Script(x.script))
return t, nil
}
}
}
}
// Search matches for lang.
if t.LangID < langNoIndexOffset {
x := likelyLang[t.LangID]
if x.flags&isList != 0 {
x = likelyLangList[x.region]
}
if x.region != 0 {
t.setUndefinedScript(Script(x.script))
t.setUndefinedRegion(Region(x.region))
}
specializeRegion(&t)
if t.LangID == 0 {
t.LangID = _en // default language
}
return t, nil
}
return t, ErrMissingLikelyTagsData
}
func (t *Tag) setTagsFrom(id Tag) {
t.LangID = id.LangID
t.ScriptID = id.ScriptID
t.RegionID = id.RegionID
}
// minimize removes the region or script subtags from t such that
// t.addLikelySubtags() == t.minimize().addLikelySubtags().
func (t Tag) minimize() (Tag, error) {
t, err := minimizeTags(t)
if err != nil {
return t, err
}
t.RemakeString()
return t, nil
}
// minimizeTags mimics the behavior of the ICU 51 C implementation.
func minimizeTags(t Tag) (Tag, error) {
if t.equalTags(Und) {
return t, nil
}
max, err := addTags(t)
if err != nil {
return t, err
}
for _, id := range [...]Tag{
{LangID: t.LangID},
{LangID: t.LangID, RegionID: t.RegionID},
{LangID: t.LangID, ScriptID: t.ScriptID},
} {
if x, err := addTags(id); err == nil && max.equalTags(x) {
t.setTagsFrom(id)
break
}
}
return t, nil
}

594
vendor/golang.org/x/text/internal/language/parse.go generated vendored Normal file
View File

@ -0,0 +1,594 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"bytes"
"errors"
"fmt"
"sort"
"golang.org/x/text/internal/tag"
)
// isAlpha returns true if the byte is not a digit.
// b must be an ASCII letter or digit.
func isAlpha(b byte) bool {
return b > '9'
}
// isAlphaNum returns true if the string contains only ASCII letters or digits.
func isAlphaNum(s []byte) bool {
for _, c := range s {
if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
return false
}
}
return true
}
// ErrSyntax is returned by any of the parsing functions when the
// input is not well-formed, according to BCP 47.
// TODO: return the position at which the syntax error occurred?
var ErrSyntax = errors.New("language: tag is not well-formed")
// ErrDuplicateKey is returned when a tag contains the same key twice with
// different values in the -u section.
var ErrDuplicateKey = errors.New("language: different values for same key in -u extension")
// ValueError is returned by any of the parsing functions when the
// input is well-formed but the respective subtag is not recognized
// as a valid value.
type ValueError struct {
v [8]byte
}
// NewValueError creates a new ValueError.
func NewValueError(tag []byte) ValueError {
var e ValueError
copy(e.v[:], tag)
return e
}
func (e ValueError) tag() []byte {
n := bytes.IndexByte(e.v[:], 0)
if n == -1 {
n = 8
}
return e.v[:n]
}
// Error implements the error interface.
func (e ValueError) Error() string {
return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
}
// Subtag returns the subtag for which the error occurred.
func (e ValueError) Subtag() string {
return string(e.tag())
}
// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
type scanner struct {
b []byte
bytes [max99thPercentileSize]byte
token []byte
start int // start position of the current token
end int // end position of the current token
next int // next point for scan
err error
done bool
}
func makeScannerString(s string) scanner {
scan := scanner{}
if len(s) <= len(scan.bytes) {
scan.b = scan.bytes[:copy(scan.bytes[:], s)]
} else {
scan.b = []byte(s)
}
scan.init()
return scan
}
// makeScanner returns a scanner using b as the input buffer.
// b is not copied and may be modified by the scanner routines.
func makeScanner(b []byte) scanner {
scan := scanner{b: b}
scan.init()
return scan
}
func (s *scanner) init() {
for i, c := range s.b {
if c == '_' {
s.b[i] = '-'
}
}
s.scan()
}
// restToLower converts the string between start and end to lower case.
func (s *scanner) toLower(start, end int) {
for i := start; i < end; i++ {
c := s.b[i]
if 'A' <= c && c <= 'Z' {
s.b[i] += 'a' - 'A'
}
}
}
func (s *scanner) setError(e error) {
if s.err == nil || (e == ErrSyntax && s.err != ErrSyntax) {
s.err = e
}
}
// resizeRange shrinks or grows the array at position oldStart such that
// a new string of size newSize can fit between oldStart and oldEnd.
// Sets the scan point to after the resized range.
func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
s.start = oldStart
if end := oldStart + newSize; end != oldEnd {
diff := end - oldEnd
if end < cap(s.b) {
b := make([]byte, len(s.b)+diff)
copy(b, s.b[:oldStart])
copy(b[end:], s.b[oldEnd:])
s.b = b
} else {
s.b = append(s.b[end:], s.b[oldEnd:]...)
}
s.next = end + (s.next - s.end)
s.end = end
}
}
// replace replaces the current token with repl.
func (s *scanner) replace(repl string) {
s.resizeRange(s.start, s.end, len(repl))
copy(s.b[s.start:], repl)
}
// gobble removes the current token from the input.
// Caller must call scan after calling gobble.
func (s *scanner) gobble(e error) {
s.setError(e)
if s.start == 0 {
s.b = s.b[:+copy(s.b, s.b[s.next:])]
s.end = 0
} else {
s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
s.end = s.start - 1
}
s.next = s.start
}
// deleteRange removes the given range from s.b before the current token.
func (s *scanner) deleteRange(start, end int) {
s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
diff := end - start
s.next -= diff
s.start -= diff
s.end -= diff
}
// scan parses the next token of a BCP 47 string. Tokens that are larger
// than 8 characters or include non-alphanumeric characters result in an error
// and are gobbled and removed from the output.
// It returns the end position of the last token consumed.
func (s *scanner) scan() (end int) {
end = s.end
s.token = nil
for s.start = s.next; s.next < len(s.b); {
i := bytes.IndexByte(s.b[s.next:], '-')
if i == -1 {
s.end = len(s.b)
s.next = len(s.b)
i = s.end - s.start
} else {
s.end = s.next + i
s.next = s.end + 1
}
token := s.b[s.start:s.end]
if i < 1 || i > 8 || !isAlphaNum(token) {
s.gobble(ErrSyntax)
continue
}
s.token = token
return end
}
if n := len(s.b); n > 0 && s.b[n-1] == '-' {
s.setError(ErrSyntax)
s.b = s.b[:len(s.b)-1]
}
s.done = true
return end
}
// acceptMinSize parses multiple tokens of the given size or greater.
// It returns the end position of the last token consumed.
func (s *scanner) acceptMinSize(min int) (end int) {
end = s.end
s.scan()
for ; len(s.token) >= min; s.scan() {
end = s.end
}
return end
}
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
// failed it returns an error and any part of the tag that could be parsed.
// If parsing succeeded but an unknown value was found, it returns
// ValueError. The Tag returned in this case is just stripped of the unknown
// value. All other values are preserved. It accepts tags in the BCP 47 format
// and extensions to this standard defined in
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
func Parse(s string) (t Tag, err error) {
// TODO: consider supporting old-style locale key-value pairs.
if s == "" {
return Und, ErrSyntax
}
if len(s) <= maxAltTaglen {
b := [maxAltTaglen]byte{}
for i, c := range s {
// Generating invalid UTF-8 is okay as it won't match.
if 'A' <= c && c <= 'Z' {
c += 'a' - 'A'
} else if c == '_' {
c = '-'
}
b[i] = byte(c)
}
if t, ok := grandfathered(b); ok {
return t, nil
}
}
scan := makeScannerString(s)
return parse(&scan, s)
}
func parse(scan *scanner, s string) (t Tag, err error) {
t = Und
var end int
if n := len(scan.token); n <= 1 {
scan.toLower(0, len(scan.b))
if n == 0 || scan.token[0] != 'x' {
return t, ErrSyntax
}
end = parseExtensions(scan)
} else if n >= 4 {
return Und, ErrSyntax
} else { // the usual case
t, end = parseTag(scan)
if n := len(scan.token); n == 1 {
t.pExt = uint16(end)
end = parseExtensions(scan)
} else if end < len(scan.b) {
scan.setError(ErrSyntax)
scan.b = scan.b[:end]
}
}
if int(t.pVariant) < len(scan.b) {
if end < len(s) {
s = s[:end]
}
if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
t.str = s
} else {
t.str = string(scan.b)
}
} else {
t.pVariant, t.pExt = 0, 0
}
return t, scan.err
}
// parseTag parses language, script, region and variants.
// It returns a Tag and the end position in the input that was parsed.
func parseTag(scan *scanner) (t Tag, end int) {
var e error
// TODO: set an error if an unknown lang, script or region is encountered.
t.LangID, e = getLangID(scan.token)
scan.setError(e)
scan.replace(t.LangID.String())
langStart := scan.start
end = scan.scan()
for len(scan.token) == 3 && isAlpha(scan.token[0]) {
// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
// to a tag of the form <extlang>.
lang, e := getLangID(scan.token)
if lang != 0 {
t.LangID = lang
copy(scan.b[langStart:], lang.String())
scan.b[langStart+3] = '-'
scan.start = langStart + 4
}
scan.gobble(e)
end = scan.scan()
}
if len(scan.token) == 4 && isAlpha(scan.token[0]) {
t.ScriptID, e = getScriptID(script, scan.token)
if t.ScriptID == 0 {
scan.gobble(e)
}
end = scan.scan()
}
if n := len(scan.token); n >= 2 && n <= 3 {
t.RegionID, e = getRegionID(scan.token)
if t.RegionID == 0 {
scan.gobble(e)
} else {
scan.replace(t.RegionID.String())
}
end = scan.scan()
}
scan.toLower(scan.start, len(scan.b))
t.pVariant = byte(end)
end = parseVariants(scan, end, t)
t.pExt = uint16(end)
return t, end
}
var separator = []byte{'-'}
// parseVariants scans tokens as long as each token is a valid variant string.
// Duplicate variants are removed.
func parseVariants(scan *scanner, end int, t Tag) int {
start := scan.start
varIDBuf := [4]uint8{}
variantBuf := [4][]byte{}
varID := varIDBuf[:0]
variant := variantBuf[:0]
last := -1
needSort := false
for ; len(scan.token) >= 4; scan.scan() {
// TODO: measure the impact of needing this conversion and redesign
// the data structure if there is an issue.
v, ok := variantIndex[string(scan.token)]
if !ok {
// unknown variant
// TODO: allow user-defined variants?
scan.gobble(NewValueError(scan.token))
continue
}
varID = append(varID, v)
variant = append(variant, scan.token)
if !needSort {
if last < int(v) {
last = int(v)
} else {
needSort = true
// There is no legal combinations of more than 7 variants
// (and this is by no means a useful sequence).
const maxVariants = 8
if len(varID) > maxVariants {
break
}
}
}
end = scan.end
}
if needSort {
sort.Sort(variantsSort{varID, variant})
k, l := 0, -1
for i, v := range varID {
w := int(v)
if l == w {
// Remove duplicates.
continue
}
varID[k] = varID[i]
variant[k] = variant[i]
k++
l = w
}
if str := bytes.Join(variant[:k], separator); len(str) == 0 {
end = start - 1
} else {
scan.resizeRange(start, end, len(str))
copy(scan.b[scan.start:], str)
end = scan.end
}
}
return end
}
type variantsSort struct {
i []uint8
v [][]byte
}
func (s variantsSort) Len() int {
return len(s.i)
}
func (s variantsSort) Swap(i, j int) {
s.i[i], s.i[j] = s.i[j], s.i[i]
s.v[i], s.v[j] = s.v[j], s.v[i]
}
func (s variantsSort) Less(i, j int) bool {
return s.i[i] < s.i[j]
}
type bytesSort struct {
b [][]byte
n int // first n bytes to compare
}
func (b bytesSort) Len() int {
return len(b.b)
}
func (b bytesSort) Swap(i, j int) {
b.b[i], b.b[j] = b.b[j], b.b[i]
}
func (b bytesSort) Less(i, j int) bool {
for k := 0; k < b.n; k++ {
if b.b[i][k] == b.b[j][k] {
continue
}
return b.b[i][k] < b.b[j][k]
}
return false
}
// parseExtensions parses and normalizes the extensions in the buffer.
// It returns the last position of scan.b that is part of any extension.
// It also trims scan.b to remove excess parts accordingly.
func parseExtensions(scan *scanner) int {
start := scan.start
exts := [][]byte{}
private := []byte{}
end := scan.end
for len(scan.token) == 1 {
extStart := scan.start
ext := scan.token[0]
end = parseExtension(scan)
extension := scan.b[extStart:end]
if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
scan.setError(ErrSyntax)
end = extStart
continue
} else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
scan.b = scan.b[:end]
return end
} else if ext == 'x' {
private = extension
break
}
exts = append(exts, extension)
}
sort.Sort(bytesSort{exts, 1})
if len(private) > 0 {
exts = append(exts, private)
}
scan.b = scan.b[:start]
if len(exts) > 0 {
scan.b = append(scan.b, bytes.Join(exts, separator)...)
} else if start > 0 {
// Strip trailing '-'.
scan.b = scan.b[:start-1]
}
return end
}
// parseExtension parses a single extension and returns the position of
// the extension end.
func parseExtension(scan *scanner) int {
start, end := scan.start, scan.end
switch scan.token[0] {
case 'u':
attrStart := end
scan.scan()
for last := []byte{}; len(scan.token) > 2; scan.scan() {
if bytes.Compare(scan.token, last) != -1 {
// Attributes are unsorted. Start over from scratch.
p := attrStart + 1
scan.next = p
attrs := [][]byte{}
for scan.scan(); len(scan.token) > 2; scan.scan() {
attrs = append(attrs, scan.token)
end = scan.end
}
sort.Sort(bytesSort{attrs, 3})
copy(scan.b[p:], bytes.Join(attrs, separator))
break
}
last = scan.token
end = scan.end
}
var last, key []byte
for attrEnd := end; len(scan.token) == 2; last = key {
key = scan.token
keyEnd := scan.end
end = scan.acceptMinSize(3)
// TODO: check key value validity
if keyEnd == end || bytes.Compare(key, last) != 1 {
// We have an invalid key or the keys are not sorted.
// Start scanning keys from scratch and reorder.
p := attrEnd + 1
scan.next = p
keys := [][]byte{}
for scan.scan(); len(scan.token) == 2; {
keyStart, keyEnd := scan.start, scan.end
end = scan.acceptMinSize(3)
if keyEnd != end {
keys = append(keys, scan.b[keyStart:end])
} else {
scan.setError(ErrSyntax)
end = keyStart
}
}
sort.Stable(bytesSort{keys, 2})
if n := len(keys); n > 0 {
k := 0
for i := 1; i < n; i++ {
if !bytes.Equal(keys[k][:2], keys[i][:2]) {
k++
keys[k] = keys[i]
} else if !bytes.Equal(keys[k], keys[i]) {
scan.setError(ErrDuplicateKey)
}
}
keys = keys[:k+1]
}
reordered := bytes.Join(keys, separator)
if e := p + len(reordered); e < end {
scan.deleteRange(e, end)
end = e
}
copy(scan.b[p:], reordered)
break
}
}
case 't':
scan.scan()
if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
_, end = parseTag(scan)
scan.toLower(start, end)
}
for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
end = scan.acceptMinSize(3)
}
case 'x':
end = scan.acceptMinSize(1)
default:
end = scan.acceptMinSize(2)
}
return end
}
// getExtension returns the name, body and end position of the extension.
func getExtension(s string, p int) (end int, ext string) {
if s[p] == '-' {
p++
}
if s[p] == 'x' {
return len(s), s[p:]
}
end = nextExtension(s, p)
return end, s[p:end]
}
// nextExtension finds the next extension within the string, searching
// for the -<char>- pattern from position p.
// In the fast majority of cases, language tags will have at most
// one extension and extensions tend to be small.
func nextExtension(s string, p int) int {
for n := len(s) - 3; p < n; {
if s[p] == '-' {
if s[p+2] == '-' {
return p
}
p += 3
} else {
p++
}
}
return len(s)
}

3464
vendor/golang.org/x/text/internal/language/tables.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

48
vendor/golang.org/x/text/internal/language/tags.go generated vendored Normal file
View File

@ -0,0 +1,48 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
// It simplifies safe initialization of Tag values.
func MustParse(s string) Tag {
t, err := Parse(s)
if err != nil {
panic(err)
}
return t
}
// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
// It simplifies safe initialization of Base values.
func MustParseBase(s string) Language {
b, err := ParseBase(s)
if err != nil {
panic(err)
}
return b
}
// MustParseScript is like ParseScript, but panics if the given script cannot be
// parsed. It simplifies safe initialization of Script values.
func MustParseScript(s string) Script {
scr, err := ParseScript(s)
if err != nil {
panic(err)
}
return scr
}
// MustParseRegion is like ParseRegion, but panics if the given region cannot be
// parsed. It simplifies safe initialization of Region values.
func MustParseRegion(s string) Region {
r, err := ParseRegion(s)
if err != nil {
panic(err)
}
return r
}
// Und is the root language.
var Und Tag

View File

@ -1,16 +0,0 @@
# Copyright 2013 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
CLEANFILES+=maketables
maketables: maketables.go
go build $^
tables: maketables
./maketables > tables.go
gofmt -w -s tables.go
# Build (but do not run) maketables during testing,
# just to make sure it still compiles.
testshort: maketables

View File

@ -7,6 +7,8 @@ package language
import ( import (
"fmt" "fmt"
"sort" "sort"
"golang.org/x/text/internal/language"
) )
// The Coverage interface is used to define the level of coverage of an // The Coverage interface is used to define the level of coverage of an
@ -44,9 +46,9 @@ type allSubtags struct{}
// consecutive range, it simply returns a slice of numbers in increasing order. // consecutive range, it simply returns a slice of numbers in increasing order.
// The "undefined" region is not returned. // The "undefined" region is not returned.
func (s allSubtags) Regions() []Region { func (s allSubtags) Regions() []Region {
reg := make([]Region, numRegions) reg := make([]Region, language.NumRegions)
for i := range reg { for i := range reg {
reg[i] = Region{regionID(i + 1)} reg[i] = Region{language.Region(i + 1)}
} }
return reg return reg
} }
@ -55,9 +57,9 @@ func (s allSubtags) Regions() []Region {
// consecutive range, it simply returns a slice of numbers in increasing order. // consecutive range, it simply returns a slice of numbers in increasing order.
// The "undefined" script is not returned. // The "undefined" script is not returned.
func (s allSubtags) Scripts() []Script { func (s allSubtags) Scripts() []Script {
scr := make([]Script, numScripts) scr := make([]Script, language.NumScripts)
for i := range scr { for i := range scr {
scr[i] = Script{scriptID(i + 1)} scr[i] = Script{language.Script(i + 1)}
} }
return scr return scr
} }
@ -65,22 +67,10 @@ func (s allSubtags) Scripts() []Script {
// BaseLanguages returns the list of all supported base languages. It generates // BaseLanguages returns the list of all supported base languages. It generates
// the list by traversing the internal structures. // the list by traversing the internal structures.
func (s allSubtags) BaseLanguages() []Base { func (s allSubtags) BaseLanguages() []Base {
base := make([]Base, 0, numLanguages) bs := language.BaseLanguages()
for i := 0; i < langNoIndexOffset; i++ { base := make([]Base, len(bs))
// We included "und" already for the value 0. for i, b := range bs {
if i != nonCanonicalUnd { base[i] = Base{b}
base = append(base, Base{langID(i)})
}
}
i := langNoIndexOffset
for _, v := range langNoIndex {
for k := 0; k < 8; k++ {
if v&1 == 1 {
base = append(base, Base{langID(i)})
}
v >>= 1
i++
}
} }
return base return base
} }
@ -90,7 +80,7 @@ func (s allSubtags) Tags() []Tag {
return nil return nil
} }
// coverage is used used by NewCoverage which is used as a convenient way for // coverage is used by NewCoverage which is used as a convenient way for
// creating Coverage implementations for partially defined data. Very often a // creating Coverage implementations for partially defined data. Very often a
// package will only need to define a subset of slices. coverage provides a // package will only need to define a subset of slices. coverage provides a
// convenient way to do this. Moreover, packages using NewCoverage, instead of // convenient way to do this. Moreover, packages using NewCoverage, instead of
@ -134,7 +124,7 @@ func (s *coverage) BaseLanguages() []Base {
} }
a := make([]Base, len(tags)) a := make([]Base, len(tags))
for i, t := range tags { for i, t := range tags {
a[i] = Base{langID(t.lang)} a[i] = Base{language.Language(t.lang())}
} }
sort.Sort(bases(a)) sort.Sort(bases(a))
k := 0 k := 0

View File

@ -1,783 +0,0 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package language
// NumCompactTags is the number of common tags. The maximum tag is
// NumCompactTags-1.
const NumCompactTags = 768
var specialTags = []Tag{ // 2 elements
0: {lang: 0xd7, region: 0x6e, script: 0x0, pVariant: 0x5, pExt: 0xe, str: "ca-ES-valencia"},
1: {lang: 0x139, region: 0x135, script: 0x0, pVariant: 0x5, pExt: 0x5, str: "en-US-u-va-posix"},
} // Size: 72 bytes
var coreTags = map[uint32]uint16{
0x0: 0, // und
0x01600000: 3, // af
0x016000d2: 4, // af-NA
0x01600161: 5, // af-ZA
0x01c00000: 6, // agq
0x01c00052: 7, // agq-CM
0x02100000: 8, // ak
0x02100080: 9, // ak-GH
0x02700000: 10, // am
0x0270006f: 11, // am-ET
0x03a00000: 12, // ar
0x03a00001: 13, // ar-001
0x03a00023: 14, // ar-AE
0x03a00039: 15, // ar-BH
0x03a00062: 16, // ar-DJ
0x03a00067: 17, // ar-DZ
0x03a0006b: 18, // ar-EG
0x03a0006c: 19, // ar-EH
0x03a0006d: 20, // ar-ER
0x03a00097: 21, // ar-IL
0x03a0009b: 22, // ar-IQ
0x03a000a1: 23, // ar-JO
0x03a000a8: 24, // ar-KM
0x03a000ac: 25, // ar-KW
0x03a000b0: 26, // ar-LB
0x03a000b9: 27, // ar-LY
0x03a000ba: 28, // ar-MA
0x03a000c9: 29, // ar-MR
0x03a000e1: 30, // ar-OM
0x03a000ed: 31, // ar-PS
0x03a000f3: 32, // ar-QA
0x03a00108: 33, // ar-SA
0x03a0010b: 34, // ar-SD
0x03a00115: 35, // ar-SO
0x03a00117: 36, // ar-SS
0x03a0011c: 37, // ar-SY
0x03a00120: 38, // ar-TD
0x03a00128: 39, // ar-TN
0x03a0015e: 40, // ar-YE
0x04000000: 41, // ars
0x04300000: 42, // as
0x04300099: 43, // as-IN
0x04400000: 44, // asa
0x0440012f: 45, // asa-TZ
0x04800000: 46, // ast
0x0480006e: 47, // ast-ES
0x05800000: 48, // az
0x0581f000: 49, // az-Cyrl
0x0581f032: 50, // az-Cyrl-AZ
0x05857000: 51, // az-Latn
0x05857032: 52, // az-Latn-AZ
0x05e00000: 53, // bas
0x05e00052: 54, // bas-CM
0x07100000: 55, // be
0x07100047: 56, // be-BY
0x07500000: 57, // bem
0x07500162: 58, // bem-ZM
0x07900000: 59, // bez
0x0790012f: 60, // bez-TZ
0x07e00000: 61, // bg
0x07e00038: 62, // bg-BG
0x08200000: 63, // bh
0x0a000000: 64, // bm
0x0a0000c3: 65, // bm-ML
0x0a500000: 66, // bn
0x0a500035: 67, // bn-BD
0x0a500099: 68, // bn-IN
0x0a900000: 69, // bo
0x0a900053: 70, // bo-CN
0x0a900099: 71, // bo-IN
0x0b200000: 72, // br
0x0b200078: 73, // br-FR
0x0b500000: 74, // brx
0x0b500099: 75, // brx-IN
0x0b700000: 76, // bs
0x0b71f000: 77, // bs-Cyrl
0x0b71f033: 78, // bs-Cyrl-BA
0x0b757000: 79, // bs-Latn
0x0b757033: 80, // bs-Latn-BA
0x0d700000: 81, // ca
0x0d700022: 82, // ca-AD
0x0d70006e: 83, // ca-ES
0x0d700078: 84, // ca-FR
0x0d70009e: 85, // ca-IT
0x0db00000: 86, // ccp
0x0db00035: 87, // ccp-BD
0x0db00099: 88, // ccp-IN
0x0dc00000: 89, // ce
0x0dc00106: 90, // ce-RU
0x0df00000: 91, // cgg
0x0df00131: 92, // cgg-UG
0x0e500000: 93, // chr
0x0e500135: 94, // chr-US
0x0e900000: 95, // ckb
0x0e90009b: 96, // ckb-IQ
0x0e90009c: 97, // ckb-IR
0x0fa00000: 98, // cs
0x0fa0005e: 99, // cs-CZ
0x0fe00000: 100, // cu
0x0fe00106: 101, // cu-RU
0x10000000: 102, // cy
0x1000007b: 103, // cy-GB
0x10100000: 104, // da
0x10100063: 105, // da-DK
0x10100082: 106, // da-GL
0x10800000: 107, // dav
0x108000a4: 108, // dav-KE
0x10d00000: 109, // de
0x10d0002e: 110, // de-AT
0x10d00036: 111, // de-BE
0x10d0004e: 112, // de-CH
0x10d00060: 113, // de-DE
0x10d0009e: 114, // de-IT
0x10d000b2: 115, // de-LI
0x10d000b7: 116, // de-LU
0x11700000: 117, // dje
0x117000d4: 118, // dje-NE
0x11f00000: 119, // dsb
0x11f00060: 120, // dsb-DE
0x12400000: 121, // dua
0x12400052: 122, // dua-CM
0x12800000: 123, // dv
0x12b00000: 124, // dyo
0x12b00114: 125, // dyo-SN
0x12d00000: 126, // dz
0x12d00043: 127, // dz-BT
0x12f00000: 128, // ebu
0x12f000a4: 129, // ebu-KE
0x13000000: 130, // ee
0x13000080: 131, // ee-GH
0x13000122: 132, // ee-TG
0x13600000: 133, // el
0x1360005d: 134, // el-CY
0x13600087: 135, // el-GR
0x13900000: 136, // en
0x13900001: 137, // en-001
0x1390001a: 138, // en-150
0x13900025: 139, // en-AG
0x13900026: 140, // en-AI
0x1390002d: 141, // en-AS
0x1390002e: 142, // en-AT
0x1390002f: 143, // en-AU
0x13900034: 144, // en-BB
0x13900036: 145, // en-BE
0x1390003a: 146, // en-BI
0x1390003d: 147, // en-BM
0x13900042: 148, // en-BS
0x13900046: 149, // en-BW
0x13900048: 150, // en-BZ
0x13900049: 151, // en-CA
0x1390004a: 152, // en-CC
0x1390004e: 153, // en-CH
0x13900050: 154, // en-CK
0x13900052: 155, // en-CM
0x1390005c: 156, // en-CX
0x1390005d: 157, // en-CY
0x13900060: 158, // en-DE
0x13900061: 159, // en-DG
0x13900063: 160, // en-DK
0x13900064: 161, // en-DM
0x1390006d: 162, // en-ER
0x13900072: 163, // en-FI
0x13900073: 164, // en-FJ
0x13900074: 165, // en-FK
0x13900075: 166, // en-FM
0x1390007b: 167, // en-GB
0x1390007c: 168, // en-GD
0x1390007f: 169, // en-GG
0x13900080: 170, // en-GH
0x13900081: 171, // en-GI
0x13900083: 172, // en-GM
0x1390008a: 173, // en-GU
0x1390008c: 174, // en-GY
0x1390008d: 175, // en-HK
0x13900096: 176, // en-IE
0x13900097: 177, // en-IL
0x13900098: 178, // en-IM
0x13900099: 179, // en-IN
0x1390009a: 180, // en-IO
0x1390009f: 181, // en-JE
0x139000a0: 182, // en-JM
0x139000a4: 183, // en-KE
0x139000a7: 184, // en-KI
0x139000a9: 185, // en-KN
0x139000ad: 186, // en-KY
0x139000b1: 187, // en-LC
0x139000b4: 188, // en-LR
0x139000b5: 189, // en-LS
0x139000bf: 190, // en-MG
0x139000c0: 191, // en-MH
0x139000c6: 192, // en-MO
0x139000c7: 193, // en-MP
0x139000ca: 194, // en-MS
0x139000cb: 195, // en-MT
0x139000cc: 196, // en-MU
0x139000ce: 197, // en-MW
0x139000d0: 198, // en-MY
0x139000d2: 199, // en-NA
0x139000d5: 200, // en-NF
0x139000d6: 201, // en-NG
0x139000d9: 202, // en-NL
0x139000dd: 203, // en-NR
0x139000df: 204, // en-NU
0x139000e0: 205, // en-NZ
0x139000e6: 206, // en-PG
0x139000e7: 207, // en-PH
0x139000e8: 208, // en-PK
0x139000eb: 209, // en-PN
0x139000ec: 210, // en-PR
0x139000f0: 211, // en-PW
0x13900107: 212, // en-RW
0x13900109: 213, // en-SB
0x1390010a: 214, // en-SC
0x1390010b: 215, // en-SD
0x1390010c: 216, // en-SE
0x1390010d: 217, // en-SG
0x1390010e: 218, // en-SH
0x1390010f: 219, // en-SI
0x13900112: 220, // en-SL
0x13900117: 221, // en-SS
0x1390011b: 222, // en-SX
0x1390011d: 223, // en-SZ
0x1390011f: 224, // en-TC
0x13900125: 225, // en-TK
0x13900129: 226, // en-TO
0x1390012c: 227, // en-TT
0x1390012d: 228, // en-TV
0x1390012f: 229, // en-TZ
0x13900131: 230, // en-UG
0x13900133: 231, // en-UM
0x13900135: 232, // en-US
0x13900139: 233, // en-VC
0x1390013c: 234, // en-VG
0x1390013d: 235, // en-VI
0x1390013f: 236, // en-VU
0x13900142: 237, // en-WS
0x13900161: 238, // en-ZA
0x13900162: 239, // en-ZM
0x13900164: 240, // en-ZW
0x13c00000: 241, // eo
0x13c00001: 242, // eo-001
0x13e00000: 243, // es
0x13e0001f: 244, // es-419
0x13e0002c: 245, // es-AR
0x13e0003f: 246, // es-BO
0x13e00041: 247, // es-BR
0x13e00048: 248, // es-BZ
0x13e00051: 249, // es-CL
0x13e00054: 250, // es-CO
0x13e00056: 251, // es-CR
0x13e00059: 252, // es-CU
0x13e00065: 253, // es-DO
0x13e00068: 254, // es-EA
0x13e00069: 255, // es-EC
0x13e0006e: 256, // es-ES
0x13e00086: 257, // es-GQ
0x13e00089: 258, // es-GT
0x13e0008f: 259, // es-HN
0x13e00094: 260, // es-IC
0x13e000cf: 261, // es-MX
0x13e000d8: 262, // es-NI
0x13e000e2: 263, // es-PA
0x13e000e4: 264, // es-PE
0x13e000e7: 265, // es-PH
0x13e000ec: 266, // es-PR
0x13e000f1: 267, // es-PY
0x13e0011a: 268, // es-SV
0x13e00135: 269, // es-US
0x13e00136: 270, // es-UY
0x13e0013b: 271, // es-VE
0x14000000: 272, // et
0x1400006a: 273, // et-EE
0x14500000: 274, // eu
0x1450006e: 275, // eu-ES
0x14600000: 276, // ewo
0x14600052: 277, // ewo-CM
0x14800000: 278, // fa
0x14800024: 279, // fa-AF
0x1480009c: 280, // fa-IR
0x14e00000: 281, // ff
0x14e00052: 282, // ff-CM
0x14e00084: 283, // ff-GN
0x14e000c9: 284, // ff-MR
0x14e00114: 285, // ff-SN
0x15100000: 286, // fi
0x15100072: 287, // fi-FI
0x15300000: 288, // fil
0x153000e7: 289, // fil-PH
0x15800000: 290, // fo
0x15800063: 291, // fo-DK
0x15800076: 292, // fo-FO
0x15e00000: 293, // fr
0x15e00036: 294, // fr-BE
0x15e00037: 295, // fr-BF
0x15e0003a: 296, // fr-BI
0x15e0003b: 297, // fr-BJ
0x15e0003c: 298, // fr-BL
0x15e00049: 299, // fr-CA
0x15e0004b: 300, // fr-CD
0x15e0004c: 301, // fr-CF
0x15e0004d: 302, // fr-CG
0x15e0004e: 303, // fr-CH
0x15e0004f: 304, // fr-CI
0x15e00052: 305, // fr-CM
0x15e00062: 306, // fr-DJ
0x15e00067: 307, // fr-DZ
0x15e00078: 308, // fr-FR
0x15e0007a: 309, // fr-GA
0x15e0007e: 310, // fr-GF
0x15e00084: 311, // fr-GN
0x15e00085: 312, // fr-GP
0x15e00086: 313, // fr-GQ
0x15e00091: 314, // fr-HT
0x15e000a8: 315, // fr-KM
0x15e000b7: 316, // fr-LU
0x15e000ba: 317, // fr-MA
0x15e000bb: 318, // fr-MC
0x15e000be: 319, // fr-MF
0x15e000bf: 320, // fr-MG
0x15e000c3: 321, // fr-ML
0x15e000c8: 322, // fr-MQ
0x15e000c9: 323, // fr-MR
0x15e000cc: 324, // fr-MU
0x15e000d3: 325, // fr-NC
0x15e000d4: 326, // fr-NE
0x15e000e5: 327, // fr-PF
0x15e000ea: 328, // fr-PM
0x15e00102: 329, // fr-RE
0x15e00107: 330, // fr-RW
0x15e0010a: 331, // fr-SC
0x15e00114: 332, // fr-SN
0x15e0011c: 333, // fr-SY
0x15e00120: 334, // fr-TD
0x15e00122: 335, // fr-TG
0x15e00128: 336, // fr-TN
0x15e0013f: 337, // fr-VU
0x15e00140: 338, // fr-WF
0x15e0015f: 339, // fr-YT
0x16900000: 340, // fur
0x1690009e: 341, // fur-IT
0x16d00000: 342, // fy
0x16d000d9: 343, // fy-NL
0x16e00000: 344, // ga
0x16e00096: 345, // ga-IE
0x17e00000: 346, // gd
0x17e0007b: 347, // gd-GB
0x19000000: 348, // gl
0x1900006e: 349, // gl-ES
0x1a300000: 350, // gsw
0x1a30004e: 351, // gsw-CH
0x1a300078: 352, // gsw-FR
0x1a3000b2: 353, // gsw-LI
0x1a400000: 354, // gu
0x1a400099: 355, // gu-IN
0x1a900000: 356, // guw
0x1ab00000: 357, // guz
0x1ab000a4: 358, // guz-KE
0x1ac00000: 359, // gv
0x1ac00098: 360, // gv-IM
0x1b400000: 361, // ha
0x1b400080: 362, // ha-GH
0x1b4000d4: 363, // ha-NE
0x1b4000d6: 364, // ha-NG
0x1b800000: 365, // haw
0x1b800135: 366, // haw-US
0x1bc00000: 367, // he
0x1bc00097: 368, // he-IL
0x1be00000: 369, // hi
0x1be00099: 370, // hi-IN
0x1d100000: 371, // hr
0x1d100033: 372, // hr-BA
0x1d100090: 373, // hr-HR
0x1d200000: 374, // hsb
0x1d200060: 375, // hsb-DE
0x1d500000: 376, // hu
0x1d500092: 377, // hu-HU
0x1d700000: 378, // hy
0x1d700028: 379, // hy-AM
0x1e100000: 380, // id
0x1e100095: 381, // id-ID
0x1e700000: 382, // ig
0x1e7000d6: 383, // ig-NG
0x1ea00000: 384, // ii
0x1ea00053: 385, // ii-CN
0x1f500000: 386, // io
0x1f800000: 387, // is
0x1f80009d: 388, // is-IS
0x1f900000: 389, // it
0x1f90004e: 390, // it-CH
0x1f90009e: 391, // it-IT
0x1f900113: 392, // it-SM
0x1f900138: 393, // it-VA
0x1fa00000: 394, // iu
0x20000000: 395, // ja
0x200000a2: 396, // ja-JP
0x20300000: 397, // jbo
0x20700000: 398, // jgo
0x20700052: 399, // jgo-CM
0x20a00000: 400, // jmc
0x20a0012f: 401, // jmc-TZ
0x20e00000: 402, // jv
0x21000000: 403, // ka
0x2100007d: 404, // ka-GE
0x21200000: 405, // kab
0x21200067: 406, // kab-DZ
0x21600000: 407, // kaj
0x21700000: 408, // kam
0x217000a4: 409, // kam-KE
0x21f00000: 410, // kcg
0x22300000: 411, // kde
0x2230012f: 412, // kde-TZ
0x22700000: 413, // kea
0x2270005a: 414, // kea-CV
0x23400000: 415, // khq
0x234000c3: 416, // khq-ML
0x23900000: 417, // ki
0x239000a4: 418, // ki-KE
0x24200000: 419, // kk
0x242000ae: 420, // kk-KZ
0x24400000: 421, // kkj
0x24400052: 422, // kkj-CM
0x24500000: 423, // kl
0x24500082: 424, // kl-GL
0x24600000: 425, // kln
0x246000a4: 426, // kln-KE
0x24a00000: 427, // km
0x24a000a6: 428, // km-KH
0x25100000: 429, // kn
0x25100099: 430, // kn-IN
0x25400000: 431, // ko
0x254000aa: 432, // ko-KP
0x254000ab: 433, // ko-KR
0x25600000: 434, // kok
0x25600099: 435, // kok-IN
0x26a00000: 436, // ks
0x26a00099: 437, // ks-IN
0x26b00000: 438, // ksb
0x26b0012f: 439, // ksb-TZ
0x26d00000: 440, // ksf
0x26d00052: 441, // ksf-CM
0x26e00000: 442, // ksh
0x26e00060: 443, // ksh-DE
0x27400000: 444, // ku
0x28100000: 445, // kw
0x2810007b: 446, // kw-GB
0x28a00000: 447, // ky
0x28a000a5: 448, // ky-KG
0x29100000: 449, // lag
0x2910012f: 450, // lag-TZ
0x29500000: 451, // lb
0x295000b7: 452, // lb-LU
0x2a300000: 453, // lg
0x2a300131: 454, // lg-UG
0x2af00000: 455, // lkt
0x2af00135: 456, // lkt-US
0x2b500000: 457, // ln
0x2b50002a: 458, // ln-AO
0x2b50004b: 459, // ln-CD
0x2b50004c: 460, // ln-CF
0x2b50004d: 461, // ln-CG
0x2b800000: 462, // lo
0x2b8000af: 463, // lo-LA
0x2bf00000: 464, // lrc
0x2bf0009b: 465, // lrc-IQ
0x2bf0009c: 466, // lrc-IR
0x2c000000: 467, // lt
0x2c0000b6: 468, // lt-LT
0x2c200000: 469, // lu
0x2c20004b: 470, // lu-CD
0x2c400000: 471, // luo
0x2c4000a4: 472, // luo-KE
0x2c500000: 473, // luy
0x2c5000a4: 474, // luy-KE
0x2c700000: 475, // lv
0x2c7000b8: 476, // lv-LV
0x2d100000: 477, // mas
0x2d1000a4: 478, // mas-KE
0x2d10012f: 479, // mas-TZ
0x2e900000: 480, // mer
0x2e9000a4: 481, // mer-KE
0x2ed00000: 482, // mfe
0x2ed000cc: 483, // mfe-MU
0x2f100000: 484, // mg
0x2f1000bf: 485, // mg-MG
0x2f200000: 486, // mgh
0x2f2000d1: 487, // mgh-MZ
0x2f400000: 488, // mgo
0x2f400052: 489, // mgo-CM
0x2ff00000: 490, // mk
0x2ff000c2: 491, // mk-MK
0x30400000: 492, // ml
0x30400099: 493, // ml-IN
0x30b00000: 494, // mn
0x30b000c5: 495, // mn-MN
0x31b00000: 496, // mr
0x31b00099: 497, // mr-IN
0x31f00000: 498, // ms
0x31f0003e: 499, // ms-BN
0x31f000d0: 500, // ms-MY
0x31f0010d: 501, // ms-SG
0x32000000: 502, // mt
0x320000cb: 503, // mt-MT
0x32500000: 504, // mua
0x32500052: 505, // mua-CM
0x33100000: 506, // my
0x331000c4: 507, // my-MM
0x33a00000: 508, // mzn
0x33a0009c: 509, // mzn-IR
0x34100000: 510, // nah
0x34500000: 511, // naq
0x345000d2: 512, // naq-NA
0x34700000: 513, // nb
0x347000da: 514, // nb-NO
0x34700110: 515, // nb-SJ
0x34e00000: 516, // nd
0x34e00164: 517, // nd-ZW
0x35000000: 518, // nds
0x35000060: 519, // nds-DE
0x350000d9: 520, // nds-NL
0x35100000: 521, // ne
0x35100099: 522, // ne-IN
0x351000db: 523, // ne-NP
0x36700000: 524, // nl
0x36700030: 525, // nl-AW
0x36700036: 526, // nl-BE
0x36700040: 527, // nl-BQ
0x3670005b: 528, // nl-CW
0x367000d9: 529, // nl-NL
0x36700116: 530, // nl-SR
0x3670011b: 531, // nl-SX
0x36800000: 532, // nmg
0x36800052: 533, // nmg-CM
0x36a00000: 534, // nn
0x36a000da: 535, // nn-NO
0x36c00000: 536, // nnh
0x36c00052: 537, // nnh-CM
0x36f00000: 538, // no
0x37500000: 539, // nqo
0x37600000: 540, // nr
0x37a00000: 541, // nso
0x38000000: 542, // nus
0x38000117: 543, // nus-SS
0x38700000: 544, // ny
0x38900000: 545, // nyn
0x38900131: 546, // nyn-UG
0x39000000: 547, // om
0x3900006f: 548, // om-ET
0x390000a4: 549, // om-KE
0x39500000: 550, // or
0x39500099: 551, // or-IN
0x39800000: 552, // os
0x3980007d: 553, // os-GE
0x39800106: 554, // os-RU
0x39d00000: 555, // pa
0x39d05000: 556, // pa-Arab
0x39d050e8: 557, // pa-Arab-PK
0x39d33000: 558, // pa-Guru
0x39d33099: 559, // pa-Guru-IN
0x3a100000: 560, // pap
0x3b300000: 561, // pl
0x3b3000e9: 562, // pl-PL
0x3bd00000: 563, // prg
0x3bd00001: 564, // prg-001
0x3be00000: 565, // ps
0x3be00024: 566, // ps-AF
0x3c000000: 567, // pt
0x3c00002a: 568, // pt-AO
0x3c000041: 569, // pt-BR
0x3c00004e: 570, // pt-CH
0x3c00005a: 571, // pt-CV
0x3c000086: 572, // pt-GQ
0x3c00008b: 573, // pt-GW
0x3c0000b7: 574, // pt-LU
0x3c0000c6: 575, // pt-MO
0x3c0000d1: 576, // pt-MZ
0x3c0000ee: 577, // pt-PT
0x3c000118: 578, // pt-ST
0x3c000126: 579, // pt-TL
0x3c400000: 580, // qu
0x3c40003f: 581, // qu-BO
0x3c400069: 582, // qu-EC
0x3c4000e4: 583, // qu-PE
0x3d400000: 584, // rm
0x3d40004e: 585, // rm-CH
0x3d900000: 586, // rn
0x3d90003a: 587, // rn-BI
0x3dc00000: 588, // ro
0x3dc000bc: 589, // ro-MD
0x3dc00104: 590, // ro-RO
0x3de00000: 591, // rof
0x3de0012f: 592, // rof-TZ
0x3e200000: 593, // ru
0x3e200047: 594, // ru-BY
0x3e2000a5: 595, // ru-KG
0x3e2000ae: 596, // ru-KZ
0x3e2000bc: 597, // ru-MD
0x3e200106: 598, // ru-RU
0x3e200130: 599, // ru-UA
0x3e500000: 600, // rw
0x3e500107: 601, // rw-RW
0x3e600000: 602, // rwk
0x3e60012f: 603, // rwk-TZ
0x3eb00000: 604, // sah
0x3eb00106: 605, // sah-RU
0x3ec00000: 606, // saq
0x3ec000a4: 607, // saq-KE
0x3f300000: 608, // sbp
0x3f30012f: 609, // sbp-TZ
0x3fa00000: 610, // sd
0x3fa000e8: 611, // sd-PK
0x3fc00000: 612, // sdh
0x3fd00000: 613, // se
0x3fd00072: 614, // se-FI
0x3fd000da: 615, // se-NO
0x3fd0010c: 616, // se-SE
0x3ff00000: 617, // seh
0x3ff000d1: 618, // seh-MZ
0x40100000: 619, // ses
0x401000c3: 620, // ses-ML
0x40200000: 621, // sg
0x4020004c: 622, // sg-CF
0x40800000: 623, // shi
0x40857000: 624, // shi-Latn
0x408570ba: 625, // shi-Latn-MA
0x408dc000: 626, // shi-Tfng
0x408dc0ba: 627, // shi-Tfng-MA
0x40c00000: 628, // si
0x40c000b3: 629, // si-LK
0x41200000: 630, // sk
0x41200111: 631, // sk-SK
0x41600000: 632, // sl
0x4160010f: 633, // sl-SI
0x41c00000: 634, // sma
0x41d00000: 635, // smi
0x41e00000: 636, // smj
0x41f00000: 637, // smn
0x41f00072: 638, // smn-FI
0x42200000: 639, // sms
0x42300000: 640, // sn
0x42300164: 641, // sn-ZW
0x42900000: 642, // so
0x42900062: 643, // so-DJ
0x4290006f: 644, // so-ET
0x429000a4: 645, // so-KE
0x42900115: 646, // so-SO
0x43100000: 647, // sq
0x43100027: 648, // sq-AL
0x431000c2: 649, // sq-MK
0x4310014d: 650, // sq-XK
0x43200000: 651, // sr
0x4321f000: 652, // sr-Cyrl
0x4321f033: 653, // sr-Cyrl-BA
0x4321f0bd: 654, // sr-Cyrl-ME
0x4321f105: 655, // sr-Cyrl-RS
0x4321f14d: 656, // sr-Cyrl-XK
0x43257000: 657, // sr-Latn
0x43257033: 658, // sr-Latn-BA
0x432570bd: 659, // sr-Latn-ME
0x43257105: 660, // sr-Latn-RS
0x4325714d: 661, // sr-Latn-XK
0x43700000: 662, // ss
0x43a00000: 663, // ssy
0x43b00000: 664, // st
0x44400000: 665, // sv
0x44400031: 666, // sv-AX
0x44400072: 667, // sv-FI
0x4440010c: 668, // sv-SE
0x44500000: 669, // sw
0x4450004b: 670, // sw-CD
0x445000a4: 671, // sw-KE
0x4450012f: 672, // sw-TZ
0x44500131: 673, // sw-UG
0x44e00000: 674, // syr
0x45000000: 675, // ta
0x45000099: 676, // ta-IN
0x450000b3: 677, // ta-LK
0x450000d0: 678, // ta-MY
0x4500010d: 679, // ta-SG
0x46100000: 680, // te
0x46100099: 681, // te-IN
0x46400000: 682, // teo
0x464000a4: 683, // teo-KE
0x46400131: 684, // teo-UG
0x46700000: 685, // tg
0x46700124: 686, // tg-TJ
0x46b00000: 687, // th
0x46b00123: 688, // th-TH
0x46f00000: 689, // ti
0x46f0006d: 690, // ti-ER
0x46f0006f: 691, // ti-ET
0x47100000: 692, // tig
0x47600000: 693, // tk
0x47600127: 694, // tk-TM
0x48000000: 695, // tn
0x48200000: 696, // to
0x48200129: 697, // to-TO
0x48a00000: 698, // tr
0x48a0005d: 699, // tr-CY
0x48a0012b: 700, // tr-TR
0x48e00000: 701, // ts
0x49400000: 702, // tt
0x49400106: 703, // tt-RU
0x4a400000: 704, // twq
0x4a4000d4: 705, // twq-NE
0x4a900000: 706, // tzm
0x4a9000ba: 707, // tzm-MA
0x4ac00000: 708, // ug
0x4ac00053: 709, // ug-CN
0x4ae00000: 710, // uk
0x4ae00130: 711, // uk-UA
0x4b400000: 712, // ur
0x4b400099: 713, // ur-IN
0x4b4000e8: 714, // ur-PK
0x4bc00000: 715, // uz
0x4bc05000: 716, // uz-Arab
0x4bc05024: 717, // uz-Arab-AF
0x4bc1f000: 718, // uz-Cyrl
0x4bc1f137: 719, // uz-Cyrl-UZ
0x4bc57000: 720, // uz-Latn
0x4bc57137: 721, // uz-Latn-UZ
0x4be00000: 722, // vai
0x4be57000: 723, // vai-Latn
0x4be570b4: 724, // vai-Latn-LR
0x4bee3000: 725, // vai-Vaii
0x4bee30b4: 726, // vai-Vaii-LR
0x4c000000: 727, // ve
0x4c300000: 728, // vi
0x4c30013e: 729, // vi-VN
0x4c900000: 730, // vo
0x4c900001: 731, // vo-001
0x4cc00000: 732, // vun
0x4cc0012f: 733, // vun-TZ
0x4ce00000: 734, // wa
0x4cf00000: 735, // wae
0x4cf0004e: 736, // wae-CH
0x4e500000: 737, // wo
0x4e500114: 738, // wo-SN
0x4f200000: 739, // xh
0x4fb00000: 740, // xog
0x4fb00131: 741, // xog-UG
0x50900000: 742, // yav
0x50900052: 743, // yav-CM
0x51200000: 744, // yi
0x51200001: 745, // yi-001
0x51800000: 746, // yo
0x5180003b: 747, // yo-BJ
0x518000d6: 748, // yo-NG
0x51f00000: 749, // yue
0x51f38000: 750, // yue-Hans
0x51f38053: 751, // yue-Hans-CN
0x51f39000: 752, // yue-Hant
0x51f3908d: 753, // yue-Hant-HK
0x52800000: 754, // zgh
0x528000ba: 755, // zgh-MA
0x52900000: 756, // zh
0x52938000: 757, // zh-Hans
0x52938053: 758, // zh-Hans-CN
0x5293808d: 759, // zh-Hans-HK
0x529380c6: 760, // zh-Hans-MO
0x5293810d: 761, // zh-Hans-SG
0x52939000: 762, // zh-Hant
0x5293908d: 763, // zh-Hant-HK
0x529390c6: 764, // zh-Hant-MO
0x5293912e: 765, // zh-Hant-TW
0x52f00000: 766, // zu
0x52f00161: 767, // zu-ZA
}
// Total table size 4676 bytes (4KiB); checksum: 17BE3673

View File

@ -2,8 +2,7 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:generate go run gen.go gen_common.go -output tables.go //go:generate go run gen.go -output tables.go
//go:generate go run gen_index.go
package language package language
@ -11,47 +10,34 @@ package language
// - verifying that tables are dropped correctly (most notably matcher tables). // - verifying that tables are dropped correctly (most notably matcher tables).
import ( import (
"errors"
"fmt"
"strings" "strings"
)
const ( "golang.org/x/text/internal/language"
// maxCoreSize is the maximum size of a BCP 47 tag without variants and "golang.org/x/text/internal/language/compact"
// extensions. Equals max lang (3) + script (4) + max reg (3) + 2 dashes.
maxCoreSize = 12
// max99thPercentileSize is a somewhat arbitrary buffer size that presumably
// is large enough to hold at least 99% of the BCP 47 tags.
max99thPercentileSize = 32
// maxSimpleUExtensionSize is the maximum size of a -u extension with one
// key-type pair. Equals len("-u-") + key (2) + dash + max value (8).
maxSimpleUExtensionSize = 14
) )
// Tag represents a BCP 47 language tag. It is used to specify an instance of a // Tag represents a BCP 47 language tag. It is used to specify an instance of a
// specific language or locale. All language tag values are guaranteed to be // specific language or locale. All language tag values are guaranteed to be
// well-formed. // well-formed.
type Tag struct { type Tag compact.Tag
lang langID
region regionID
// TODO: we will soon run out of positions for script. Idea: instead of
// storing lang, region, and script codes, store only the compact index and
// have a lookup table from this code to its expansion. This greatly speeds
// up table lookup, speed up common variant cases.
// This will also immediately free up 3 extra bytes. Also, the pVariant
// field can now be moved to the lookup table, as the compact index uniquely
// determines the offset of a possible variant.
script scriptID
pVariant byte // offset in str, includes preceding '-'
pExt uint16 // offset of first extension, includes preceding '-'
// str is the string representation of the Tag. It will only be used if the func makeTag(t language.Tag) (tag Tag) {
// tag has variants or extensions. return Tag(compact.Make(t))
str string
} }
func (t *Tag) tag() language.Tag {
return (*compact.Tag)(t).Tag()
}
func (t *Tag) isCompact() bool {
return (*compact.Tag)(t).IsCompact()
}
// TODO: improve performance.
func (t *Tag) lang() language.Language { return t.tag().LangID }
func (t *Tag) region() language.Region { return t.tag().RegionID }
func (t *Tag) script() language.Script { return t.tag().ScriptID }
// Make is a convenience wrapper for Parse that omits the error. // Make is a convenience wrapper for Parse that omits the error.
// In case of an error, a sensible default is returned. // In case of an error, a sensible default is returned.
func Make(s string) Tag { func Make(s string) Tag {
@ -68,25 +54,13 @@ func (c CanonType) Make(s string) Tag {
// Raw returns the raw base language, script and region, without making an // Raw returns the raw base language, script and region, without making an
// attempt to infer their values. // attempt to infer their values.
func (t Tag) Raw() (b Base, s Script, r Region) { func (t Tag) Raw() (b Base, s Script, r Region) {
return Base{t.lang}, Script{t.script}, Region{t.region} tt := t.tag()
} return Base{tt.LangID}, Script{tt.ScriptID}, Region{tt.RegionID}
// equalTags compares language, script and region subtags only.
func (t Tag) equalTags(a Tag) bool {
return t.lang == a.lang && t.script == a.script && t.region == a.region
} }
// IsRoot returns true if t is equal to language "und". // IsRoot returns true if t is equal to language "und".
func (t Tag) IsRoot() bool { func (t Tag) IsRoot() bool {
if int(t.pVariant) < len(t.str) { return compact.Tag(t).IsRoot()
return false
}
return t.equalTags(und)
}
// private reports whether the Tag consists solely of a private use tag.
func (t Tag) private() bool {
return t.str != "" && t.pVariant == 0
} }
// CanonType can be used to enable or disable various types of canonicalization. // CanonType can be used to enable or disable various types of canonicalization.
@ -138,73 +112,73 @@ const (
// canonicalize returns the canonicalized equivalent of the tag and // canonicalize returns the canonicalized equivalent of the tag and
// whether there was any change. // whether there was any change.
func (t Tag) canonicalize(c CanonType) (Tag, bool) { func canonicalize(c CanonType, t language.Tag) (language.Tag, bool) {
if c == Raw { if c == Raw {
return t, false return t, false
} }
changed := false changed := false
if c&SuppressScript != 0 { if c&SuppressScript != 0 {
if t.lang < langNoIndexOffset && uint8(t.script) == suppressScript[t.lang] { if t.LangID.SuppressScript() == t.ScriptID {
t.script = 0 t.ScriptID = 0
changed = true changed = true
} }
} }
if c&canonLang != 0 { if c&canonLang != 0 {
for { for {
if l, aliasType := normLang(t.lang); l != t.lang { if l, aliasType := t.LangID.Canonicalize(); l != t.LangID {
switch aliasType { switch aliasType {
case langLegacy: case language.Legacy:
if c&Legacy != 0 { if c&Legacy != 0 {
if t.lang == _sh && t.script == 0 { if t.LangID == _sh && t.ScriptID == 0 {
t.script = _Latn t.ScriptID = _Latn
} }
t.lang = l t.LangID = l
changed = true changed = true
} }
case langMacro: case language.Macro:
if c&Macro != 0 { if c&Macro != 0 {
// We deviate here from CLDR. The mapping "nb" -> "no" // We deviate here from CLDR. The mapping "nb" -> "no"
// qualifies as a typical Macro language mapping. However, // qualifies as a typical Macro language mapping. However,
// for legacy reasons, CLDR maps "no", the macro language // for legacy reasons, CLDR maps "no", the macro language
// code for Norwegian, to the dominant variant "nb". This // code for Norwegian, to the dominant variant "nb". This
// change is currently under consideration for CLDR as well. // change is currently under consideration for CLDR as well.
// See http://unicode.org/cldr/trac/ticket/2698 and also // See https://unicode.org/cldr/trac/ticket/2698 and also
// http://unicode.org/cldr/trac/ticket/1790 for some of the // https://unicode.org/cldr/trac/ticket/1790 for some of the
// practical implications. TODO: this check could be removed // practical implications. TODO: this check could be removed
// if CLDR adopts this change. // if CLDR adopts this change.
if c&CLDR == 0 || t.lang != _nb { if c&CLDR == 0 || t.LangID != _nb {
changed = true changed = true
t.lang = l t.LangID = l
} }
} }
case langDeprecated: case language.Deprecated:
if c&DeprecatedBase != 0 { if c&DeprecatedBase != 0 {
if t.lang == _mo && t.region == 0 { if t.LangID == _mo && t.RegionID == 0 {
t.region = _MD t.RegionID = _MD
} }
t.lang = l t.LangID = l
changed = true changed = true
// Other canonicalization types may still apply. // Other canonicalization types may still apply.
continue continue
} }
} }
} else if c&Legacy != 0 && t.lang == _no && c&CLDR != 0 { } else if c&Legacy != 0 && t.LangID == _no && c&CLDR != 0 {
t.lang = _nb t.LangID = _nb
changed = true changed = true
} }
break break
} }
} }
if c&DeprecatedScript != 0 { if c&DeprecatedScript != 0 {
if t.script == _Qaai { if t.ScriptID == _Qaai {
changed = true changed = true
t.script = _Zinh t.ScriptID = _Zinh
} }
} }
if c&DeprecatedRegion != 0 { if c&DeprecatedRegion != 0 {
if r := normRegion(t.region); r != 0 { if r := t.RegionID.Canonicalize(); r != t.RegionID {
changed = true changed = true
t.region = r t.RegionID = r
} }
} }
return t, changed return t, changed
@ -212,11 +186,20 @@ func (t Tag) canonicalize(c CanonType) (Tag, bool) {
// Canonicalize returns the canonicalized equivalent of the tag. // Canonicalize returns the canonicalized equivalent of the tag.
func (c CanonType) Canonicalize(t Tag) (Tag, error) { func (c CanonType) Canonicalize(t Tag) (Tag, error) {
t, changed := t.canonicalize(c) // First try fast path.
if changed { if t.isCompact() {
t.remakeString() if _, changed := canonicalize(c, compact.Tag(t).Tag()); !changed {
return t, nil
}
}
// It is unlikely that one will canonicalize a tag after matching. So do
// a slow but simple approach here.
if tag, changed := canonicalize(c, t.tag()); changed {
tag.RemakeString()
return makeTag(tag), nil
} }
return t, nil return t, nil
} }
// Confidence indicates the level of certainty for a given return value. // Confidence indicates the level of certainty for a given return value.
@ -239,83 +222,21 @@ func (c Confidence) String() string {
return confName[c] return confName[c]
} }
// remakeString is used to update t.str in case lang, script or region changed.
// It is assumed that pExt and pVariant still point to the start of the
// respective parts.
func (t *Tag) remakeString() {
if t.str == "" {
return
}
extra := t.str[t.pVariant:]
if t.pVariant > 0 {
extra = extra[1:]
}
if t.equalTags(und) && strings.HasPrefix(extra, "x-") {
t.str = extra
t.pVariant = 0
t.pExt = 0
return
}
var buf [max99thPercentileSize]byte // avoid extra memory allocation in most cases.
b := buf[:t.genCoreBytes(buf[:])]
if extra != "" {
diff := len(b) - int(t.pVariant)
b = append(b, '-')
b = append(b, extra...)
t.pVariant = uint8(int(t.pVariant) + diff)
t.pExt = uint16(int(t.pExt) + diff)
} else {
t.pVariant = uint8(len(b))
t.pExt = uint16(len(b))
}
t.str = string(b)
}
// genCoreBytes writes a string for the base languages, script and region tags
// to the given buffer and returns the number of bytes written. It will never
// write more than maxCoreSize bytes.
func (t *Tag) genCoreBytes(buf []byte) int {
n := t.lang.stringToBuf(buf[:])
if t.script != 0 {
n += copy(buf[n:], "-")
n += copy(buf[n:], t.script.String())
}
if t.region != 0 {
n += copy(buf[n:], "-")
n += copy(buf[n:], t.region.String())
}
return n
}
// String returns the canonical string representation of the language tag. // String returns the canonical string representation of the language tag.
func (t Tag) String() string { func (t Tag) String() string {
if t.str != "" { return t.tag().String()
return t.str
}
if t.script == 0 && t.region == 0 {
return t.lang.String()
}
buf := [maxCoreSize]byte{}
return string(buf[:t.genCoreBytes(buf[:])])
} }
// MarshalText implements encoding.TextMarshaler. // MarshalText implements encoding.TextMarshaler.
func (t Tag) MarshalText() (text []byte, err error) { func (t Tag) MarshalText() (text []byte, err error) {
if t.str != "" { return t.tag().MarshalText()
text = append(text, t.str...)
} else if t.script == 0 && t.region == 0 {
text = append(text, t.lang.String()...)
} else {
buf := [maxCoreSize]byte{}
text = buf[:t.genCoreBytes(buf[:])]
}
return text, nil
} }
// UnmarshalText implements encoding.TextUnmarshaler. // UnmarshalText implements encoding.TextUnmarshaler.
func (t *Tag) UnmarshalText(text []byte) error { func (t *Tag) UnmarshalText(text []byte) error {
tag, err := Raw.Parse(string(text)) var tag language.Tag
*t = tag err := tag.UnmarshalText(text)
*t = makeTag(tag)
return err return err
} }
@ -323,15 +244,16 @@ func (t *Tag) UnmarshalText(text []byte) error {
// unspecified, an attempt will be made to infer it from the context. // unspecified, an attempt will be made to infer it from the context.
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change. // It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
func (t Tag) Base() (Base, Confidence) { func (t Tag) Base() (Base, Confidence) {
if t.lang != 0 { if b := t.lang(); b != 0 {
return Base{t.lang}, Exact return Base{b}, Exact
} }
tt := t.tag()
c := High c := High
if t.script == 0 && !(Region{t.region}).IsCountry() { if tt.ScriptID == 0 && !tt.RegionID.IsCountry() {
c = Low c = Low
} }
if tag, err := addTags(t); err == nil && tag.lang != 0 { if tag, err := tt.Maximize(); err == nil && tag.LangID != 0 {
return Base{tag.lang}, c return Base{tag.LangID}, c
} }
return Base{0}, No return Base{0}, No
} }
@ -344,35 +266,34 @@ func (t Tag) Base() (Base, Confidence) {
// If a script cannot be inferred (Zzzz, No) is returned. We do not use Zyyy (undetermined) // If a script cannot be inferred (Zzzz, No) is returned. We do not use Zyyy (undetermined)
// as one would suspect from the IANA registry for BCP 47. In a Unicode context Zyyy marks // as one would suspect from the IANA registry for BCP 47. In a Unicode context Zyyy marks
// common characters (like 1, 2, 3, '.', etc.) and is therefore more like multiple scripts. // common characters (like 1, 2, 3, '.', etc.) and is therefore more like multiple scripts.
// See http://www.unicode.org/reports/tr24/#Values for more details. Zzzz is also used for // See https://www.unicode.org/reports/tr24/#Values for more details. Zzzz is also used for
// unknown value in CLDR. (Zzzz, Exact) is returned if Zzzz was explicitly specified. // unknown value in CLDR. (Zzzz, Exact) is returned if Zzzz was explicitly specified.
// Note that an inferred script is never guaranteed to be the correct one. Latin is // Note that an inferred script is never guaranteed to be the correct one. Latin is
// almost exclusively used for Afrikaans, but Arabic has been used for some texts // almost exclusively used for Afrikaans, but Arabic has been used for some texts
// in the past. Also, the script that is commonly used may change over time. // in the past. Also, the script that is commonly used may change over time.
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change. // It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
func (t Tag) Script() (Script, Confidence) { func (t Tag) Script() (Script, Confidence) {
if t.script != 0 { if scr := t.script(); scr != 0 {
return Script{t.script}, Exact return Script{scr}, Exact
} }
sc, c := scriptID(_Zzzz), No tt := t.tag()
if t.lang < langNoIndexOffset { sc, c := language.Script(_Zzzz), No
if scr := scriptID(suppressScript[t.lang]); scr != 0 { if scr := tt.LangID.SuppressScript(); scr != 0 {
// Note: it is not always the case that a language with a suppress // Note: it is not always the case that a language with a suppress
// script value is only written in one script (e.g. kk, ms, pa). // script value is only written in one script (e.g. kk, ms, pa).
if t.region == 0 { if tt.RegionID == 0 {
return Script{scriptID(scr)}, High return Script{scr}, High
} }
sc, c = scr, High sc, c = scr, High
} }
} if tag, err := tt.Maximize(); err == nil {
if tag, err := addTags(t); err == nil { if tag.ScriptID != sc {
if tag.script != sc { sc, c = tag.ScriptID, Low
sc, c = tag.script, Low
} }
} else { } else {
t, _ = (Deprecated | Macro).Canonicalize(t) tt, _ = canonicalize(Deprecated|Macro, tt)
if tag, err := addTags(t); err == nil && tag.script != sc { if tag, err := tt.Maximize(); err == nil && tag.ScriptID != sc {
sc, c = tag.script, Low sc, c = tag.ScriptID, Low
} }
} }
return Script{sc}, c return Script{sc}, c
@ -382,86 +303,45 @@ func (t Tag) Script() (Script, Confidence) {
// infer a most likely candidate from the context. // infer a most likely candidate from the context.
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change. // It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
func (t Tag) Region() (Region, Confidence) { func (t Tag) Region() (Region, Confidence) {
if t.region != 0 { if r := t.region(); r != 0 {
return Region{t.region}, Exact return Region{r}, Exact
} }
if t, err := addTags(t); err == nil { tt := t.tag()
return Region{t.region}, Low // TODO: differentiate between high and low. if tt, err := tt.Maximize(); err == nil {
return Region{tt.RegionID}, Low // TODO: differentiate between high and low.
} }
t, _ = (Deprecated | Macro).Canonicalize(t) tt, _ = canonicalize(Deprecated|Macro, tt)
if tag, err := addTags(t); err == nil { if tag, err := tt.Maximize(); err == nil {
return Region{tag.region}, Low return Region{tag.RegionID}, Low
} }
return Region{_ZZ}, No // TODO: return world instead of undetermined? return Region{_ZZ}, No // TODO: return world instead of undetermined?
} }
// Variant returns the variants specified explicitly for this language tag. // Variants returns the variants specified explicitly for this language tag.
// or nil if no variant was specified. // or nil if no variant was specified.
func (t Tag) Variants() []Variant { func (t Tag) Variants() []Variant {
if !compact.Tag(t).MayHaveVariants() {
return nil
}
v := []Variant{} v := []Variant{}
if int(t.pVariant) < int(t.pExt) { x, str := "", t.tag().Variants()
for x, str := "", t.str[t.pVariant:t.pExt]; str != ""; { for str != "" {
x, str = nextToken(str) x, str = nextToken(str)
v = append(v, Variant{x}) v = append(v, Variant{x})
} }
}
return v return v
} }
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a // Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
// specific language are substituted with fields from the parent language. // specific language are substituted with fields from the parent language.
// The parent for a language may change for newer versions of CLDR. // The parent for a language may change for newer versions of CLDR.
//
// Parent returns a tag for a less specific language that is mutually
// intelligible or Und if there is no such language. This may not be the same as
// simply stripping the last BCP 47 subtag. For instance, the parent of "zh-TW"
// is "zh-Hant", and the parent of "zh-Hant" is "und".
func (t Tag) Parent() Tag { func (t Tag) Parent() Tag {
if t.str != "" { return Tag(compact.Tag(t).Parent())
// Strip the variants and extensions.
t, _ = Raw.Compose(t.Raw())
if t.region == 0 && t.script != 0 && t.lang != 0 {
base, _ := addTags(Tag{lang: t.lang})
if base.script == t.script {
return Tag{lang: t.lang}
}
}
return t
}
if t.lang != 0 {
if t.region != 0 {
maxScript := t.script
if maxScript == 0 {
max, _ := addTags(t)
maxScript = max.script
}
for i := range parents {
if langID(parents[i].lang) == t.lang && scriptID(parents[i].maxScript) == maxScript {
for _, r := range parents[i].fromRegion {
if regionID(r) == t.region {
return Tag{
lang: t.lang,
script: scriptID(parents[i].script),
region: regionID(parents[i].toRegion),
}
}
}
}
}
// Strip the script if it is the default one.
base, _ := addTags(Tag{lang: t.lang})
if base.script != maxScript {
return Tag{lang: t.lang, script: maxScript}
}
return Tag{lang: t.lang}
} else if t.script != 0 {
// The parent for an base-script pair with a non-default script is
// "und" instead of the base language.
base, _ := addTags(Tag{lang: t.lang})
if base.script != t.script {
return und
}
return Tag{lang: t.lang}
}
}
return und
} }
// returns token t and the rest of the string. // returns token t and the rest of the string.
@ -487,17 +367,8 @@ func (e Extension) String() string {
// ParseExtension parses s as an extension and returns it on success. // ParseExtension parses s as an extension and returns it on success.
func ParseExtension(s string) (e Extension, err error) { func ParseExtension(s string) (e Extension, err error) {
scan := makeScannerString(s) ext, err := language.ParseExtension(s)
var end int return Extension{ext}, err
if n := len(scan.token); n != 1 {
return Extension{}, errSyntax
}
scan.toLower(0, len(scan.b))
end = parseExtension(&scan)
if end != len(s) {
return Extension{}, errSyntax
}
return Extension{string(scan.b)}, nil
} }
// Type returns the one-byte extension type of e. It returns 0 for the zero // Type returns the one-byte extension type of e. It returns 0 for the zero
@ -518,22 +389,20 @@ func (e Extension) Tokens() []string {
// false for ok if t does not have the requested extension. The returned // false for ok if t does not have the requested extension. The returned
// extension will be invalid in this case. // extension will be invalid in this case.
func (t Tag) Extension(x byte) (ext Extension, ok bool) { func (t Tag) Extension(x byte) (ext Extension, ok bool) {
for i := int(t.pExt); i < len(t.str)-1; { if !compact.Tag(t).MayHaveExtensions() {
var ext string
i, ext = getExtension(t.str, i)
if ext[0] == x {
return Extension{ext}, true
}
}
return Extension{}, false return Extension{}, false
}
e, ok := t.tag().Extension(x)
return Extension{e}, ok
} }
// Extensions returns all extensions of t. // Extensions returns all extensions of t.
func (t Tag) Extensions() []Extension { func (t Tag) Extensions() []Extension {
if !compact.Tag(t).MayHaveExtensions() {
return nil
}
e := []Extension{} e := []Extension{}
for i := int(t.pExt); i < len(t.str)-1; { for _, ext := range t.tag().Extensions() {
var ext string
i, ext = getExtension(t.str, i)
e = append(e, Extension{ext}) e = append(e, Extension{ext})
} }
return e return e
@ -541,259 +410,105 @@ func (t Tag) Extensions() []Extension {
// TypeForKey returns the type associated with the given key, where key and type // TypeForKey returns the type associated with the given key, where key and type
// are of the allowed values defined for the Unicode locale extension ('u') in // are of the allowed values defined for the Unicode locale extension ('u') in
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// TypeForKey will traverse the inheritance chain to get the correct value. // TypeForKey will traverse the inheritance chain to get the correct value.
func (t Tag) TypeForKey(key string) string { func (t Tag) TypeForKey(key string) string {
if start, end, _ := t.findTypeForKey(key); end != start { if !compact.Tag(t).MayHaveExtensions() {
return t.str[start:end] if key != "rg" && key != "va" {
}
return "" return ""
}
}
return t.tag().TypeForKey(key)
} }
var (
errPrivateUse = errors.New("cannot set a key on a private use tag")
errInvalidArguments = errors.New("invalid key or type")
)
// SetTypeForKey returns a new Tag with the key set to type, where key and type // SetTypeForKey returns a new Tag with the key set to type, where key and type
// are of the allowed values defined for the Unicode locale extension ('u') in // are of the allowed values defined for the Unicode locale extension ('u') in
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// An empty value removes an existing pair with the same key. // An empty value removes an existing pair with the same key.
func (t Tag) SetTypeForKey(key, value string) (Tag, error) { func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
if t.private() { tt, err := t.tag().SetTypeForKey(key, value)
return t, errPrivateUse return makeTag(tt), err
}
if len(key) != 2 {
return t, errInvalidArguments
}
// Remove the setting if value is "".
if value == "" {
start, end, _ := t.findTypeForKey(key)
if start != end {
// Remove key tag and leading '-'.
start -= 4
// Remove a possible empty extension.
if (end == len(t.str) || t.str[end+2] == '-') && t.str[start-2] == '-' {
start -= 2
}
if start == int(t.pVariant) && end == len(t.str) {
t.str = ""
t.pVariant, t.pExt = 0, 0
} else {
t.str = fmt.Sprintf("%s%s", t.str[:start], t.str[end:])
}
}
return t, nil
}
if len(value) < 3 || len(value) > 8 {
return t, errInvalidArguments
}
var (
buf [maxCoreSize + maxSimpleUExtensionSize]byte
uStart int // start of the -u extension.
)
// Generate the tag string if needed.
if t.str == "" {
uStart = t.genCoreBytes(buf[:])
buf[uStart] = '-'
uStart++
}
// Create new key-type pair and parse it to verify.
b := buf[uStart:]
copy(b, "u-")
copy(b[2:], key)
b[4] = '-'
b = b[:5+copy(b[5:], value)]
scan := makeScanner(b)
if parseExtensions(&scan); scan.err != nil {
return t, scan.err
}
// Assemble the replacement string.
if t.str == "" {
t.pVariant, t.pExt = byte(uStart-1), uint16(uStart-1)
t.str = string(buf[:uStart+len(b)])
} else {
s := t.str
start, end, hasExt := t.findTypeForKey(key)
if start == end {
if hasExt {
b = b[2:]
}
t.str = fmt.Sprintf("%s-%s%s", s[:start], b, s[end:])
} else {
t.str = fmt.Sprintf("%s%s%s", s[:start], value, s[end:])
}
}
return t, nil
} }
// findKeyAndType returns the start and end position for the type corresponding // NumCompactTags is the number of compact tags. The maximum tag is
// to key or the point at which to insert the key-value pair if the type // NumCompactTags-1.
// wasn't found. The hasExt return value reports whether an -u extension was present. const NumCompactTags = compact.NumCompactTags
// Note: the extensions are typically very small and are likely to contain
// only one key-type pair.
func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) {
p := int(t.pExt)
if len(key) != 2 || p == len(t.str) || p == 0 {
return p, p, false
}
s := t.str
// Find the correct extension.
for p++; s[p] != 'u'; p++ {
if s[p] > 'u' {
p--
return p, p, false
}
if p = nextExtension(s, p); p == len(s) {
return len(s), len(s), false
}
}
// Proceed to the hyphen following the extension name.
p++
// curKey is the key currently being processed.
curKey := ""
// Iterate over keys until we get the end of a section.
for {
// p points to the hyphen preceding the current token.
if p3 := p + 3; s[p3] == '-' {
// Found a key.
// Check whether we just processed the key that was requested.
if curKey == key {
return start, p, true
}
// Set to the next key and continue scanning type tokens.
curKey = s[p+1 : p3]
if curKey > key {
return p, p, true
}
// Start of the type token sequence.
start = p + 4
// A type is at least 3 characters long.
p += 7 // 4 + 3
} else {
// Attribute or type, which is at least 3 characters long.
p += 4
}
// p points past the third character of a type or attribute.
max := p + 5 // maximum length of token plus hyphen.
if len(s) < max {
max = len(s)
}
for ; p < max && s[p] != '-'; p++ {
}
// Bail if we have exhausted all tokens or if the next token starts
// a new extension.
if p == len(s) || s[p+2] == '-' {
if curKey == key {
return start, p, true
}
return p, p, true
}
}
}
// CompactIndex returns an index, where 0 <= index < NumCompactTags, for tags // CompactIndex returns an index, where 0 <= index < NumCompactTags, for tags
// for which data exists in the text repository. The index will change over time // for which data exists in the text repository.The index will change over time
// and should not be stored in persistent storage. Extensions, except for the // and should not be stored in persistent storage. If t does not match a compact
// 'va' type of the 'u' extension, are ignored. It will return 0, false if no // index, exact will be false and the compact index will be returned for the
// compact tag exists, where 0 is the index for the root language (Und). // first match after repeatedly taking the Parent of t.
func CompactIndex(t Tag) (index int, ok bool) { func CompactIndex(t Tag) (index int, exact bool) {
// TODO: perhaps give more frequent tags a lower index. id, exact := compact.LanguageID(compact.Tag(t))
// TODO: we could make the indexes stable. This will excluded some return int(id), exact
// possibilities for optimization, so don't do this quite yet.
b, s, r := t.Raw()
if len(t.str) > 0 {
if strings.HasPrefix(t.str, "x-") {
// We have no entries for user-defined tags.
return 0, false
}
if uint16(t.pVariant) != t.pExt {
// There are no tags with variants and an u-va type.
if t.TypeForKey("va") != "" {
return 0, false
}
t, _ = Raw.Compose(b, s, r, t.Variants())
} else if _, ok := t.Extension('u'); ok {
// Strip all but the 'va' entry.
variant := t.TypeForKey("va")
t, _ = Raw.Compose(b, s, r)
t, _ = t.SetTypeForKey("va", variant)
}
if len(t.str) > 0 {
// We have some variants.
for i, s := range specialTags {
if s == t {
return i + 1, true
}
}
return 0, false
}
}
// No variants specified: just compare core components.
// The key has the form lllssrrr, where l, s, and r are nibbles for
// respectively the langID, scriptID, and regionID.
key := uint32(b.langID) << (8 + 12)
key |= uint32(s.scriptID) << 12
key |= uint32(r.regionID)
x, ok := coreTags[key]
return int(x), ok
} }
var root = language.Tag{}
// Base is an ISO 639 language code, used for encoding the base language // Base is an ISO 639 language code, used for encoding the base language
// of a language tag. // of a language tag.
type Base struct { type Base struct {
langID langID language.Language
} }
// ParseBase parses a 2- or 3-letter ISO 639 code. // ParseBase parses a 2- or 3-letter ISO 639 code.
// It returns a ValueError if s is a well-formed but unknown language identifier // It returns a ValueError if s is a well-formed but unknown language identifier
// or another error if another error occurred. // or another error if another error occurred.
func ParseBase(s string) (Base, error) { func ParseBase(s string) (Base, error) {
if n := len(s); n < 2 || 3 < n { l, err := language.ParseBase(s)
return Base{}, errSyntax
}
var buf [3]byte
l, err := getLangID(buf[:copy(buf[:], s)])
return Base{l}, err return Base{l}, err
} }
// String returns the BCP 47 representation of the base language.
func (b Base) String() string {
return b.langID.String()
}
// ISO3 returns the ISO 639-3 language code.
func (b Base) ISO3() string {
return b.langID.ISO3()
}
// IsPrivateUse reports whether this language code is reserved for private use.
func (b Base) IsPrivateUse() bool {
return b.langID.IsPrivateUse()
}
// Script is a 4-letter ISO 15924 code for representing scripts. // Script is a 4-letter ISO 15924 code for representing scripts.
// It is idiomatically represented in title case. // It is idiomatically represented in title case.
type Script struct { type Script struct {
scriptID scriptID language.Script
} }
// ParseScript parses a 4-letter ISO 15924 code. // ParseScript parses a 4-letter ISO 15924 code.
// It returns a ValueError if s is a well-formed but unknown script identifier // It returns a ValueError if s is a well-formed but unknown script identifier
// or another error if another error occurred. // or another error if another error occurred.
func ParseScript(s string) (Script, error) { func ParseScript(s string) (Script, error) {
if len(s) != 4 { sc, err := language.ParseScript(s)
return Script{}, errSyntax
}
var buf [4]byte
sc, err := getScriptID(script, buf[:copy(buf[:], s)])
return Script{sc}, err return Script{sc}, err
} }
// String returns the script code in title case.
// It returns "Zzzz" for an unspecified script.
func (s Script) String() string {
return s.scriptID.String()
}
// IsPrivateUse reports whether this script code is reserved for private use.
func (s Script) IsPrivateUse() bool {
return s.scriptID.IsPrivateUse()
}
// Region is an ISO 3166-1 or UN M.49 code for representing countries and regions. // Region is an ISO 3166-1 or UN M.49 code for representing countries and regions.
type Region struct { type Region struct {
regionID regionID language.Region
} }
// EncodeM49 returns the Region for the given UN M.49 code. // EncodeM49 returns the Region for the given UN M.49 code.
// It returns an error if r is not a valid code. // It returns an error if r is not a valid code.
func EncodeM49(r int) (Region, error) { func EncodeM49(r int) (Region, error) {
rid, err := getRegionM49(r) rid, err := language.EncodeM49(r)
return Region{rid}, err return Region{rid}, err
} }
@ -801,62 +516,54 @@ func EncodeM49(r int) (Region, error) {
// It returns a ValueError if s is a well-formed but unknown region identifier // It returns a ValueError if s is a well-formed but unknown region identifier
// or another error if another error occurred. // or another error if another error occurred.
func ParseRegion(s string) (Region, error) { func ParseRegion(s string) (Region, error) {
if n := len(s); n < 2 || 3 < n { r, err := language.ParseRegion(s)
return Region{}, errSyntax
}
var buf [3]byte
r, err := getRegionID(buf[:copy(buf[:], s)])
return Region{r}, err return Region{r}, err
} }
// String returns the BCP 47 representation for the region.
// It returns "ZZ" for an unspecified region.
func (r Region) String() string {
return r.regionID.String()
}
// ISO3 returns the 3-letter ISO code of r.
// Note that not all regions have a 3-letter ISO code.
// In such cases this method returns "ZZZ".
func (r Region) ISO3() string {
return r.regionID.ISO3()
}
// M49 returns the UN M.49 encoding of r, or 0 if this encoding
// is not defined for r.
func (r Region) M49() int {
return r.regionID.M49()
}
// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
// may include private-use tags that are assigned by CLDR and used in this
// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
func (r Region) IsPrivateUse() bool {
return r.regionID.IsPrivateUse()
}
// IsCountry returns whether this region is a country or autonomous area. This // IsCountry returns whether this region is a country or autonomous area. This
// includes non-standard definitions from CLDR. // includes non-standard definitions from CLDR.
func (r Region) IsCountry() bool { func (r Region) IsCountry() bool {
if r.regionID == 0 || r.IsGroup() || r.IsPrivateUse() && r.regionID != _XK { return r.regionID.IsCountry()
return false
}
return true
} }
// IsGroup returns whether this region defines a collection of regions. This // IsGroup returns whether this region defines a collection of regions. This
// includes non-standard definitions from CLDR. // includes non-standard definitions from CLDR.
func (r Region) IsGroup() bool { func (r Region) IsGroup() bool {
if r.regionID == 0 { return r.regionID.IsGroup()
return false
}
return int(regionInclusion[r.regionID]) < len(regionContainment)
} }
// Contains returns whether Region c is contained by Region r. It returns true // Contains returns whether Region c is contained by Region r. It returns true
// if c == r. // if c == r.
func (r Region) Contains(c Region) bool { func (r Region) Contains(c Region) bool {
return r.regionID.contains(c.regionID) return r.regionID.Contains(c.regionID)
} }
func (r regionID) contains(c regionID) bool {
if r == c {
return true
}
g := regionInclusion[r]
if g >= nRegionGroups {
return false
}
m := regionContainment[g]
d := regionInclusion[c]
b := regionInclusionBits[d]
// A contained country may belong to multiple disjoint groups. Matching any
// of these indicates containment. If the contained region is a group, it
// must strictly be a subset.
if d >= nRegionGroups {
return b&m != 0
}
return b&^m == 0
}
var errNoTLD = errors.New("language: region is not a valid ccTLD")
// TLD returns the country code top-level domain (ccTLD). UK is returned for GB. // TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
// In all other cases it returns either the region itself or an error. // In all other cases it returns either the region itself or an error.
// //
@ -865,25 +572,15 @@ var errNoTLD = errors.New("language: region is not a valid ccTLD")
// region will already be canonicalized it was obtained from a Tag that was // region will already be canonicalized it was obtained from a Tag that was
// obtained using any of the default methods. // obtained using any of the default methods.
func (r Region) TLD() (Region, error) { func (r Region) TLD() (Region, error) {
// See http://en.wikipedia.org/wiki/Country_code_top-level_domain for the tld, err := r.regionID.TLD()
// difference between ISO 3166-1 and IANA ccTLD. return Region{tld}, err
if r.regionID == _GB {
r = Region{_UK}
}
if (r.typ() & ccTLD) == 0 {
return Region{}, errNoTLD
}
return r, nil
} }
// Canonicalize returns the region or a possible replacement if the region is // Canonicalize returns the region or a possible replacement if the region is
// deprecated. It will not return a replacement for deprecated regions that // deprecated. It will not return a replacement for deprecated regions that
// are split into multiple regions. // are split into multiple regions.
func (r Region) Canonicalize() Region { func (r Region) Canonicalize() Region {
if cr := normRegion(r.regionID); cr != 0 { return Region{r.regionID.Canonicalize()}
return Region{cr}
}
return r
} }
// Variant represents a registered variant of a language as defined by BCP 47. // Variant represents a registered variant of a language as defined by BCP 47.
@ -894,11 +591,8 @@ type Variant struct {
// ParseVariant parses and returns a Variant. An error is returned if s is not // ParseVariant parses and returns a Variant. An error is returned if s is not
// a valid variant. // a valid variant.
func ParseVariant(s string) (Variant, error) { func ParseVariant(s string) (Variant, error) {
s = strings.ToLower(s) v, err := language.ParseVariant(s)
if _, ok := variantIndex[s]; ok { return Variant{v.String()}, err
return Variant{s}, nil
}
return Variant{}, mkErrInvalid([]byte(s))
} }
// String returns the string representation of the variant. // String returns the string representation of the variant.

View File

@ -4,7 +4,12 @@
package language package language
import "errors" import (
"errors"
"strings"
"golang.org/x/text/internal/language"
)
// A MatchOption configures a Matcher. // A MatchOption configures a Matcher.
type MatchOption func(*matcher) type MatchOption func(*matcher)
@ -74,12 +79,13 @@ func NewMatcher(t []Tag, options ...MatchOption) Matcher {
} }
func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) { func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) {
var tt language.Tag
match, w, c := m.getBest(want...) match, w, c := m.getBest(want...)
if match != nil { if match != nil {
t, index = match.tag, match.index tt, index = match.tag, match.index
} else { } else {
// TODO: this should be an option // TODO: this should be an option
t = m.default_.tag tt = m.default_.tag
if m.preferSameScript { if m.preferSameScript {
outer: outer:
for _, w := range want { for _, w := range want {
@ -91,7 +97,7 @@ func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) {
} }
for i, h := range m.supported { for i, h := range m.supported {
if script.scriptID == h.maxScript { if script.scriptID == h.maxScript {
t, index = h.tag, i tt, index = h.tag, i
break outer break outer
} }
} }
@ -99,238 +105,45 @@ func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) {
} }
// TODO: select first language tag based on script. // TODO: select first language tag based on script.
} }
if w.region != 0 && t.region != 0 && t.region.contains(w.region) { if w.RegionID != tt.RegionID && w.RegionID != 0 {
t, _ = Raw.Compose(t, Region{w.region}) if w.RegionID != 0 && tt.RegionID != 0 && tt.RegionID.Contains(w.RegionID) {
tt.RegionID = w.RegionID
tt.RemakeString()
} else if r := w.RegionID.String(); len(r) == 2 {
// TODO: also filter macro and deprecated.
tt, _ = tt.SetTypeForKey("rg", strings.ToLower(r)+"zzzz")
}
} }
// Copy options from the user-provided tag into the result tag. This is hard // Copy options from the user-provided tag into the result tag. This is hard
// to do after the fact, so we do it here. // to do after the fact, so we do it here.
// TODO: add in alternative variants to -u-va-. // TODO: add in alternative variants to -u-va-.
// TODO: add preferred region to -u-rg-. // TODO: add preferred region to -u-rg-.
if e := w.Extensions(); len(e) > 0 { if e := w.Extensions(); len(e) > 0 {
t, _ = Raw.Compose(t, e) b := language.Builder{}
b.SetTag(tt)
for _, e := range e {
b.AddExt(e)
} }
return t, index, c tt = b.Make()
}
type scriptRegionFlags uint8
const (
isList = 1 << iota
scriptInFrom
regionInFrom
)
func (t *Tag) setUndefinedLang(id langID) {
if t.lang == 0 {
t.lang = id
}
}
func (t *Tag) setUndefinedScript(id scriptID) {
if t.script == 0 {
t.script = id
}
}
func (t *Tag) setUndefinedRegion(id regionID) {
if t.region == 0 || t.region.contains(id) {
t.region = id
} }
return makeTag(tt), index, c
} }
// ErrMissingLikelyTagsData indicates no information was available // ErrMissingLikelyTagsData indicates no information was available
// to compute likely values of missing tags. // to compute likely values of missing tags.
var ErrMissingLikelyTagsData = errors.New("missing likely tags data") var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
// addLikelySubtags sets subtags to their most likely value, given the locale. // func (t *Tag) setTagsFrom(id Tag) {
// In most cases this means setting fields for unknown values, but in some // t.LangID = id.LangID
// cases it may alter a value. It returns an ErrMissingLikelyTagsData error // t.ScriptID = id.ScriptID
// if the given locale cannot be expanded. // t.RegionID = id.RegionID
func (t Tag) addLikelySubtags() (Tag, error) { // }
id, err := addTags(t)
if err != nil {
return t, err
} else if id.equalTags(t) {
return t, nil
}
id.remakeString()
return id, nil
}
// specializeRegion attempts to specialize a group region.
func specializeRegion(t *Tag) bool {
if i := regionInclusion[t.region]; i < nRegionGroups {
x := likelyRegionGroup[i]
if langID(x.lang) == t.lang && scriptID(x.script) == t.script {
t.region = regionID(x.region)
}
return true
}
return false
}
func addTags(t Tag) (Tag, error) {
// We leave private use identifiers alone.
if t.private() {
return t, nil
}
if t.script != 0 && t.region != 0 {
if t.lang != 0 {
// already fully specified
specializeRegion(&t)
return t, nil
}
// Search matches for und-script-region. Note that for these cases
// region will never be a group so there is no need to check for this.
list := likelyRegion[t.region : t.region+1]
if x := list[0]; x.flags&isList != 0 {
list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
}
for _, x := range list {
// Deviating from the spec. See match_test.go for details.
if scriptID(x.script) == t.script {
t.setUndefinedLang(langID(x.lang))
return t, nil
}
}
}
if t.lang != 0 {
// Search matches for lang-script and lang-region, where lang != und.
if t.lang < langNoIndexOffset {
x := likelyLang[t.lang]
if x.flags&isList != 0 {
list := likelyLangList[x.region : x.region+uint16(x.script)]
if t.script != 0 {
for _, x := range list {
if scriptID(x.script) == t.script && x.flags&scriptInFrom != 0 {
t.setUndefinedRegion(regionID(x.region))
return t, nil
}
}
} else if t.region != 0 {
count := 0
goodScript := true
tt := t
for _, x := range list {
// We visit all entries for which the script was not
// defined, including the ones where the region was not
// defined. This allows for proper disambiguation within
// regions.
if x.flags&scriptInFrom == 0 && t.region.contains(regionID(x.region)) {
tt.region = regionID(x.region)
tt.setUndefinedScript(scriptID(x.script))
goodScript = goodScript && tt.script == scriptID(x.script)
count++
}
}
if count == 1 {
return tt, nil
}
// Even if we fail to find a unique Region, we might have
// an unambiguous script.
if goodScript {
t.script = tt.script
}
}
}
}
} else {
// Search matches for und-script.
if t.script != 0 {
x := likelyScript[t.script]
if x.region != 0 {
t.setUndefinedRegion(regionID(x.region))
t.setUndefinedLang(langID(x.lang))
return t, nil
}
}
// Search matches for und-region. If und-script-region exists, it would
// have been found earlier.
if t.region != 0 {
if i := regionInclusion[t.region]; i < nRegionGroups {
x := likelyRegionGroup[i]
if x.region != 0 {
t.setUndefinedLang(langID(x.lang))
t.setUndefinedScript(scriptID(x.script))
t.region = regionID(x.region)
}
} else {
x := likelyRegion[t.region]
if x.flags&isList != 0 {
x = likelyRegionList[x.lang]
}
if x.script != 0 && x.flags != scriptInFrom {
t.setUndefinedLang(langID(x.lang))
t.setUndefinedScript(scriptID(x.script))
return t, nil
}
}
}
}
// Search matches for lang.
if t.lang < langNoIndexOffset {
x := likelyLang[t.lang]
if x.flags&isList != 0 {
x = likelyLangList[x.region]
}
if x.region != 0 {
t.setUndefinedScript(scriptID(x.script))
t.setUndefinedRegion(regionID(x.region))
}
specializeRegion(&t)
if t.lang == 0 {
t.lang = _en // default language
}
return t, nil
}
return t, ErrMissingLikelyTagsData
}
func (t *Tag) setTagsFrom(id Tag) {
t.lang = id.lang
t.script = id.script
t.region = id.region
}
// minimize removes the region or script subtags from t such that
// t.addLikelySubtags() == t.minimize().addLikelySubtags().
func (t Tag) minimize() (Tag, error) {
t, err := minimizeTags(t)
if err != nil {
return t, err
}
t.remakeString()
return t, nil
}
// minimizeTags mimics the behavior of the ICU 51 C implementation.
func minimizeTags(t Tag) (Tag, error) {
if t.equalTags(und) {
return t, nil
}
max, err := addTags(t)
if err != nil {
return t, err
}
for _, id := range [...]Tag{
{lang: t.lang},
{lang: t.lang, region: t.region},
{lang: t.lang, script: t.script},
} {
if x, err := addTags(id); err == nil && max.equalTags(x) {
t.setTagsFrom(id)
break
}
}
return t, nil
}
// Tag Matching // Tag Matching
// CLDR defines an algorithm for finding the best match between two sets of language // CLDR defines an algorithm for finding the best match between two sets of language
// tags. The basic algorithm defines how to score a possible match and then find // tags. The basic algorithm defines how to score a possible match and then find
// the match with the best score // the match with the best score
// (see http://www.unicode.org/reports/tr35/#LanguageMatching). // (see https://www.unicode.org/reports/tr35/#LanguageMatching).
// Using scoring has several disadvantages. The scoring obfuscates the importance of // Using scoring has several disadvantages. The scoring obfuscates the importance of
// the various factors considered, making the algorithm harder to understand. Using // the various factors considered, making the algorithm harder to understand. Using
// scoring also requires the full score to be computed for each pair of tags. // scoring also requires the full score to be computed for each pair of tags.
@ -441,7 +254,7 @@ func minimizeTags(t Tag) (Tag, error) {
type matcher struct { type matcher struct {
default_ *haveTag default_ *haveTag
supported []*haveTag supported []*haveTag
index map[langID]*matchHeader index map[language.Language]*matchHeader
passSettings bool passSettings bool
preferSameScript bool preferSameScript bool
} }
@ -456,7 +269,7 @@ type matchHeader struct {
// haveTag holds a supported Tag and its maximized script and region. The maximized // haveTag holds a supported Tag and its maximized script and region. The maximized
// or canonicalized language is not stored as it is not needed during matching. // or canonicalized language is not stored as it is not needed during matching.
type haveTag struct { type haveTag struct {
tag Tag tag language.Tag
// index of this tag in the original list of supported tags. // index of this tag in the original list of supported tags.
index int index int
@ -466,37 +279,37 @@ type haveTag struct {
conf Confidence conf Confidence
// Maximized region and script. // Maximized region and script.
maxRegion regionID maxRegion language.Region
maxScript scriptID maxScript language.Script
// altScript may be checked as an alternative match to maxScript. If altScript // altScript may be checked as an alternative match to maxScript. If altScript
// matches, the confidence level for this match is Low. Theoretically there // matches, the confidence level for this match is Low. Theoretically there
// could be multiple alternative scripts. This does not occur in practice. // could be multiple alternative scripts. This does not occur in practice.
altScript scriptID altScript language.Script
// nextMax is the index of the next haveTag with the same maximized tags. // nextMax is the index of the next haveTag with the same maximized tags.
nextMax uint16 nextMax uint16
} }
func makeHaveTag(tag Tag, index int) (haveTag, langID) { func makeHaveTag(tag language.Tag, index int) (haveTag, language.Language) {
max := tag max := tag
if tag.lang != 0 || tag.region != 0 || tag.script != 0 { if tag.LangID != 0 || tag.RegionID != 0 || tag.ScriptID != 0 {
max, _ = max.canonicalize(All) max, _ = canonicalize(All, max)
max, _ = addTags(max) max, _ = max.Maximize()
max.remakeString() max.RemakeString()
} }
return haveTag{tag, index, Exact, max.region, max.script, altScript(max.lang, max.script), 0}, max.lang return haveTag{tag, index, Exact, max.RegionID, max.ScriptID, altScript(max.LangID, max.ScriptID), 0}, max.LangID
} }
// altScript returns an alternative script that may match the given script with // altScript returns an alternative script that may match the given script with
// a low confidence. At the moment, the langMatch data allows for at most one // a low confidence. At the moment, the langMatch data allows for at most one
// script to map to another and we rely on this to keep the code simple. // script to map to another and we rely on this to keep the code simple.
func altScript(l langID, s scriptID) scriptID { func altScript(l language.Language, s language.Script) language.Script {
for _, alt := range matchScript { for _, alt := range matchScript {
// TODO: also match cases where language is not the same. // TODO: also match cases where language is not the same.
if (langID(alt.wantLang) == l || langID(alt.haveLang) == l) && if (language.Language(alt.wantLang) == l || language.Language(alt.haveLang) == l) &&
scriptID(alt.haveScript) == s { language.Script(alt.haveScript) == s {
return scriptID(alt.wantScript) return language.Script(alt.wantScript)
} }
} }
return 0 return 0
@ -508,7 +321,7 @@ func (h *matchHeader) addIfNew(n haveTag, exact bool) {
h.original = h.original || exact h.original = h.original || exact
// Don't add new exact matches. // Don't add new exact matches.
for _, v := range h.haveTags { for _, v := range h.haveTags {
if v.tag.equalsRest(n.tag) { if equalsRest(v.tag, n.tag) {
return return
} }
} }
@ -517,7 +330,7 @@ func (h *matchHeader) addIfNew(n haveTag, exact bool) {
for i, v := range h.haveTags { for i, v := range h.haveTags {
if v.maxScript == n.maxScript && if v.maxScript == n.maxScript &&
v.maxRegion == n.maxRegion && v.maxRegion == n.maxRegion &&
v.tag.variantOrPrivateTagStr() == n.tag.variantOrPrivateTagStr() { v.tag.VariantOrPrivateUseTags() == n.tag.VariantOrPrivateUseTags() {
for h.haveTags[i].nextMax != 0 { for h.haveTags[i].nextMax != 0 {
i = int(h.haveTags[i].nextMax) i = int(h.haveTags[i].nextMax)
} }
@ -530,7 +343,7 @@ func (h *matchHeader) addIfNew(n haveTag, exact bool) {
// header returns the matchHeader for the given language. It creates one if // header returns the matchHeader for the given language. It creates one if
// it doesn't already exist. // it doesn't already exist.
func (m *matcher) header(l langID) *matchHeader { func (m *matcher) header(l language.Language) *matchHeader {
if h := m.index[l]; h != nil { if h := m.index[l]; h != nil {
return h return h
} }
@ -554,7 +367,7 @@ func toConf(d uint8) Confidence {
// for a given tag. // for a given tag.
func newMatcher(supported []Tag, options []MatchOption) *matcher { func newMatcher(supported []Tag, options []MatchOption) *matcher {
m := &matcher{ m := &matcher{
index: make(map[langID]*matchHeader), index: make(map[language.Language]*matchHeader),
preferSameScript: true, preferSameScript: true,
} }
for _, o := range options { for _, o := range options {
@ -567,16 +380,18 @@ func newMatcher(supported []Tag, options []MatchOption) *matcher {
// Add supported languages to the index. Add exact matches first to give // Add supported languages to the index. Add exact matches first to give
// them precedence. // them precedence.
for i, tag := range supported { for i, tag := range supported {
pair, _ := makeHaveTag(tag, i) tt := tag.tag()
m.header(tag.lang).addIfNew(pair, true) pair, _ := makeHaveTag(tt, i)
m.header(tt.LangID).addIfNew(pair, true)
m.supported = append(m.supported, &pair) m.supported = append(m.supported, &pair)
} }
m.default_ = m.header(supported[0].lang).haveTags[0] m.default_ = m.header(supported[0].lang()).haveTags[0]
// Keep these in two different loops to support the case that two equivalent // Keep these in two different loops to support the case that two equivalent
// languages are distinguished, such as iw and he. // languages are distinguished, such as iw and he.
for i, tag := range supported { for i, tag := range supported {
pair, max := makeHaveTag(tag, i) tt := tag.tag()
if max != tag.lang { pair, max := makeHaveTag(tt, i)
if max != tt.LangID {
m.header(max).addIfNew(pair, true) m.header(max).addIfNew(pair, true)
} }
} }
@ -585,11 +400,11 @@ func newMatcher(supported []Tag, options []MatchOption) *matcher {
// update will only add entries to original indexes, thus not computing any // update will only add entries to original indexes, thus not computing any
// transitive relations. // transitive relations.
update := func(want, have uint16, conf Confidence) { update := func(want, have uint16, conf Confidence) {
if hh := m.index[langID(have)]; hh != nil { if hh := m.index[language.Language(have)]; hh != nil {
if !hh.original { if !hh.original {
return return
} }
hw := m.header(langID(want)) hw := m.header(language.Language(want))
for _, ht := range hh.haveTags { for _, ht := range hh.haveTags {
v := *ht v := *ht
if conf < v.conf { if conf < v.conf {
@ -597,7 +412,7 @@ func newMatcher(supported []Tag, options []MatchOption) *matcher {
} }
v.nextMax = 0 // this value needs to be recomputed v.nextMax = 0 // this value needs to be recomputed
if v.altScript != 0 { if v.altScript != 0 {
v.altScript = altScript(langID(want), v.maxScript) v.altScript = altScript(language.Language(want), v.maxScript)
} }
hw.addIfNew(v, conf == Exact && hh.original) hw.addIfNew(v, conf == Exact && hh.original)
} }
@ -618,66 +433,67 @@ func newMatcher(supported []Tag, options []MatchOption) *matcher {
// First we match deprecated equivalents. If they are perfect equivalents // First we match deprecated equivalents. If they are perfect equivalents
// (their canonicalization simply substitutes a different language code, but // (their canonicalization simply substitutes a different language code, but
// nothing else), the match confidence is Exact, otherwise it is High. // nothing else), the match confidence is Exact, otherwise it is High.
for i, lm := range langAliasMap { for i, lm := range language.AliasMap {
// If deprecated codes match and there is no fiddling with the script or // If deprecated codes match and there is no fiddling with the script or
// or region, we consider it an exact match. // or region, we consider it an exact match.
conf := Exact conf := Exact
if langAliasTypes[i] != langMacro { if language.AliasTypes[i] != language.Macro {
if !isExactEquivalent(langID(lm.from)) { if !isExactEquivalent(language.Language(lm.From)) {
conf = High conf = High
} }
update(lm.to, lm.from, conf) update(lm.To, lm.From, conf)
} }
update(lm.from, lm.to, conf) update(lm.From, lm.To, conf)
} }
return m return m
} }
// getBest gets the best matching tag in m for any of the given tags, taking into // getBest gets the best matching tag in m for any of the given tags, taking into
// account the order of preference of the given tags. // account the order of preference of the given tags.
func (m *matcher) getBest(want ...Tag) (got *haveTag, orig Tag, c Confidence) { func (m *matcher) getBest(want ...Tag) (got *haveTag, orig language.Tag, c Confidence) {
best := bestMatch{} best := bestMatch{}
for i, w := range want { for i, ww := range want {
var max Tag w := ww.tag()
var max language.Tag
// Check for exact match first. // Check for exact match first.
h := m.index[w.lang] h := m.index[w.LangID]
if w.lang != 0 { if w.LangID != 0 {
if h == nil { if h == nil {
continue continue
} }
// Base language is defined. // Base language is defined.
max, _ = w.canonicalize(Legacy | Deprecated | Macro) max, _ = canonicalize(Legacy|Deprecated|Macro, w)
// A region that is added through canonicalization is stronger than // A region that is added through canonicalization is stronger than
// a maximized region: set it in the original (e.g. mo -> ro-MD). // a maximized region: set it in the original (e.g. mo -> ro-MD).
if w.region != max.region { if w.RegionID != max.RegionID {
w.region = max.region w.RegionID = max.RegionID
} }
// TODO: should we do the same for scripts? // TODO: should we do the same for scripts?
// See test case: en, sr, nl ; sh ; sr // See test case: en, sr, nl ; sh ; sr
max, _ = addTags(max) max, _ = max.Maximize()
} else { } else {
// Base language is not defined. // Base language is not defined.
if h != nil { if h != nil {
for i := range h.haveTags { for i := range h.haveTags {
have := h.haveTags[i] have := h.haveTags[i]
if have.tag.equalsRest(w) { if equalsRest(have.tag, w) {
return have, w, Exact return have, w, Exact
} }
} }
} }
if w.script == 0 && w.region == 0 { if w.ScriptID == 0 && w.RegionID == 0 {
// We skip all tags matching und for approximate matching, including // We skip all tags matching und for approximate matching, including
// private tags. // private tags.
continue continue
} }
max, _ = addTags(w) max, _ = w.Maximize()
if h = m.index[max.lang]; h == nil { if h = m.index[max.LangID]; h == nil {
continue continue
} }
} }
pin := true pin := true
for _, t := range want[i+1:] { for _, t := range want[i+1:] {
if w.lang == t.lang { if w.LangID == t.lang() {
pin = false pin = false
break break
} }
@ -685,11 +501,11 @@ func (m *matcher) getBest(want ...Tag) (got *haveTag, orig Tag, c Confidence) {
// Check for match based on maximized tag. // Check for match based on maximized tag.
for i := range h.haveTags { for i := range h.haveTags {
have := h.haveTags[i] have := h.haveTags[i]
best.update(have, w, max.script, max.region, pin) best.update(have, w, max.ScriptID, max.RegionID, pin)
if best.conf == Exact { if best.conf == Exact {
for have.nextMax != 0 { for have.nextMax != 0 {
have = h.haveTags[have.nextMax] have = h.haveTags[have.nextMax]
best.update(have, w, max.script, max.region, pin) best.update(have, w, max.ScriptID, max.RegionID, pin)
} }
return best.have, best.want, best.conf return best.have, best.want, best.conf
} }
@ -697,9 +513,9 @@ func (m *matcher) getBest(want ...Tag) (got *haveTag, orig Tag, c Confidence) {
} }
if best.conf <= No { if best.conf <= No {
if len(want) != 0 { if len(want) != 0 {
return nil, want[0], No return nil, want[0].tag(), No
} }
return nil, Tag{}, No return nil, language.Tag{}, No
} }
return best.have, best.want, best.conf return best.have, best.want, best.conf
} }
@ -707,9 +523,9 @@ func (m *matcher) getBest(want ...Tag) (got *haveTag, orig Tag, c Confidence) {
// bestMatch accumulates the best match so far. // bestMatch accumulates the best match so far.
type bestMatch struct { type bestMatch struct {
have *haveTag have *haveTag
want Tag want language.Tag
conf Confidence conf Confidence
pinnedRegion regionID pinnedRegion language.Region
pinLanguage bool pinLanguage bool
sameRegionGroup bool sameRegionGroup bool
// Cached results from applying tie-breaking rules. // Cached results from applying tie-breaking rules.
@ -734,19 +550,19 @@ type bestMatch struct {
// still prefer a second language over a dialect of the preferred language by // still prefer a second language over a dialect of the preferred language by
// explicitly specifying dialects, e.g. "en, nl, en-GB". In this case pin should // explicitly specifying dialects, e.g. "en, nl, en-GB". In this case pin should
// be false. // be false.
func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion regionID, pin bool) { func (m *bestMatch) update(have *haveTag, tag language.Tag, maxScript language.Script, maxRegion language.Region, pin bool) {
// Bail if the maximum attainable confidence is below that of the current best match. // Bail if the maximum attainable confidence is below that of the current best match.
c := have.conf c := have.conf
if c < m.conf { if c < m.conf {
return return
} }
// Don't change the language once we already have found an exact match. // Don't change the language once we already have found an exact match.
if m.pinLanguage && tag.lang != m.want.lang { if m.pinLanguage && tag.LangID != m.want.LangID {
return return
} }
// Pin the region group if we are comparing tags for the same language. // Pin the region group if we are comparing tags for the same language.
if tag.lang == m.want.lang && m.sameRegionGroup { if tag.LangID == m.want.LangID && m.sameRegionGroup {
_, sameGroup := regionGroupDist(m.pinnedRegion, have.maxRegion, have.maxScript, m.want.lang) _, sameGroup := regionGroupDist(m.pinnedRegion, have.maxRegion, have.maxScript, m.want.LangID)
if !sameGroup { if !sameGroup {
return return
} }
@ -756,7 +572,7 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
// don't pin anything, otherwise pin the language. // don't pin anything, otherwise pin the language.
m.pinLanguage = pin m.pinLanguage = pin
} }
if have.tag.equalsRest(tag) { if equalsRest(have.tag, tag) {
} else if have.maxScript != maxScript { } else if have.maxScript != maxScript {
// There is usually very little comprehension between different scripts. // There is usually very little comprehension between different scripts.
// In a few cases there may still be Low comprehension. This possibility // In a few cases there may still be Low comprehension. This possibility
@ -786,7 +602,7 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
// Tie-breaker rules: // Tie-breaker rules:
// We prefer if the pre-maximized language was specified and identical. // We prefer if the pre-maximized language was specified and identical.
origLang := have.tag.lang == tag.lang && tag.lang != 0 origLang := have.tag.LangID == tag.LangID && tag.LangID != 0
if !beaten && m.origLang != origLang { if !beaten && m.origLang != origLang {
if m.origLang { if m.origLang {
return return
@ -795,7 +611,7 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
} }
// We prefer if the pre-maximized region was specified and identical. // We prefer if the pre-maximized region was specified and identical.
origReg := have.tag.region == tag.region && tag.region != 0 origReg := have.tag.RegionID == tag.RegionID && tag.RegionID != 0
if !beaten && m.origReg != origReg { if !beaten && m.origReg != origReg {
if m.origReg { if m.origReg {
return return
@ -803,7 +619,7 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
beaten = true beaten = true
} }
regGroupDist, sameGroup := regionGroupDist(have.maxRegion, maxRegion, maxScript, tag.lang) regGroupDist, sameGroup := regionGroupDist(have.maxRegion, maxRegion, maxScript, tag.LangID)
if !beaten && m.regGroupDist != regGroupDist { if !beaten && m.regGroupDist != regGroupDist {
if regGroupDist > m.regGroupDist { if regGroupDist > m.regGroupDist {
return return
@ -811,7 +627,7 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
beaten = true beaten = true
} }
paradigmReg := isParadigmLocale(tag.lang, have.maxRegion) paradigmReg := isParadigmLocale(tag.LangID, have.maxRegion)
if !beaten && m.paradigmReg != paradigmReg { if !beaten && m.paradigmReg != paradigmReg {
if !paradigmReg { if !paradigmReg {
return return
@ -820,7 +636,7 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
} }
// Next we prefer if the pre-maximized script was specified and identical. // Next we prefer if the pre-maximized script was specified and identical.
origScript := have.tag.script == tag.script && tag.script != 0 origScript := have.tag.ScriptID == tag.ScriptID && tag.ScriptID != 0
if !beaten && m.origScript != origScript { if !beaten && m.origScript != origScript {
if m.origScript { if m.origScript {
return return
@ -843,9 +659,9 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
} }
} }
func isParadigmLocale(lang langID, r regionID) bool { func isParadigmLocale(lang language.Language, r language.Region) bool {
for _, e := range paradigmLocales { for _, e := range paradigmLocales {
if langID(e[0]) == lang && (r == regionID(e[1]) || r == regionID(e[2])) { if language.Language(e[0]) == lang && (r == language.Region(e[1]) || r == language.Region(e[2])) {
return true return true
} }
} }
@ -854,13 +670,13 @@ func isParadigmLocale(lang langID, r regionID) bool {
// regionGroupDist computes the distance between two regions based on their // regionGroupDist computes the distance between two regions based on their
// CLDR grouping. // CLDR grouping.
func regionGroupDist(a, b regionID, script scriptID, lang langID) (dist uint8, same bool) { func regionGroupDist(a, b language.Region, script language.Script, lang language.Language) (dist uint8, same bool) {
const defaultDistance = 4 const defaultDistance = 4
aGroup := uint(regionToGroups[a]) << 1 aGroup := uint(regionToGroups[a]) << 1
bGroup := uint(regionToGroups[b]) << 1 bGroup := uint(regionToGroups[b]) << 1
for _, ri := range matchRegion { for _, ri := range matchRegion {
if langID(ri.lang) == lang && (ri.script == 0 || scriptID(ri.script) == script) { if language.Language(ri.lang) == lang && (ri.script == 0 || language.Script(ri.script) == script) {
group := uint(1 << (ri.group &^ 0x80)) group := uint(1 << (ri.group &^ 0x80))
if 0x80&ri.group == 0 { if 0x80&ri.group == 0 {
if aGroup&bGroup&group != 0 { // Both regions are in the group. if aGroup&bGroup&group != 0 { // Both regions are in the group.
@ -876,31 +692,16 @@ func regionGroupDist(a, b regionID, script scriptID, lang langID) (dist uint8, s
return defaultDistance, true return defaultDistance, true
} }
func (t Tag) variants() string {
if t.pVariant == 0 {
return ""
}
return t.str[t.pVariant:t.pExt]
}
// variantOrPrivateTagStr returns variants or private use tags.
func (t Tag) variantOrPrivateTagStr() string {
if t.pExt > 0 {
return t.str[t.pVariant:t.pExt]
}
return t.str[t.pVariant:]
}
// equalsRest compares everything except the language. // equalsRest compares everything except the language.
func (a Tag) equalsRest(b Tag) bool { func equalsRest(a, b language.Tag) bool {
// TODO: don't include extensions in this comparison. To do this efficiently, // TODO: don't include extensions in this comparison. To do this efficiently,
// though, we should handle private tags separately. // though, we should handle private tags separately.
return a.script == b.script && a.region == b.region && a.variantOrPrivateTagStr() == b.variantOrPrivateTagStr() return a.ScriptID == b.ScriptID && a.RegionID == b.RegionID && a.VariantOrPrivateUseTags() == b.VariantOrPrivateUseTags()
} }
// isExactEquivalent returns true if canonicalizing the language will not alter // isExactEquivalent returns true if canonicalizing the language will not alter
// the script or region of a tag. // the script or region of a tag.
func isExactEquivalent(l langID) bool { func isExactEquivalent(l language.Language) bool {
for _, o := range notEquivalent { for _, o := range notEquivalent {
if o == l { if o == l {
return false return false
@ -909,25 +710,26 @@ func isExactEquivalent(l langID) bool {
return true return true
} }
var notEquivalent []langID var notEquivalent []language.Language
func init() { func init() {
// Create a list of all languages for which canonicalization may alter the // Create a list of all languages for which canonicalization may alter the
// script or region. // script or region.
for _, lm := range langAliasMap { for _, lm := range language.AliasMap {
tag := Tag{lang: langID(lm.from)} tag := language.Tag{LangID: language.Language(lm.From)}
if tag, _ = tag.canonicalize(All); tag.script != 0 || tag.region != 0 { if tag, _ = canonicalize(All, tag); tag.ScriptID != 0 || tag.RegionID != 0 {
notEquivalent = append(notEquivalent, langID(lm.from)) notEquivalent = append(notEquivalent, language.Language(lm.From))
} }
} }
// Maximize undefined regions of paradigm locales. // Maximize undefined regions of paradigm locales.
for i, v := range paradigmLocales { for i, v := range paradigmLocales {
max, _ := addTags(Tag{lang: langID(v[0])}) t := language.Tag{LangID: language.Language(v[0])}
max, _ := t.Maximize()
if v[1] == 0 { if v[1] == 0 {
paradigmLocales[i][1] = uint16(max.region) paradigmLocales[i][1] = uint16(max.RegionID)
} }
if v[2] == 0 { if v[2] == 0 {
paradigmLocales[i][2] = uint16(max.region) paradigmLocales[i][2] = uint16(max.RegionID)
} }
} }
} }

View File

@ -5,216 +5,21 @@
package language package language
import ( import (
"bytes"
"errors" "errors"
"fmt"
"sort"
"strconv" "strconv"
"strings" "strings"
"golang.org/x/text/internal/tag" "golang.org/x/text/internal/language"
) )
// isAlpha returns true if the byte is not a digit.
// b must be an ASCII letter or digit.
func isAlpha(b byte) bool {
return b > '9'
}
// isAlphaNum returns true if the string contains only ASCII letters or digits.
func isAlphaNum(s []byte) bool {
for _, c := range s {
if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
return false
}
}
return true
}
// errSyntax is returned by any of the parsing functions when the
// input is not well-formed, according to BCP 47.
// TODO: return the position at which the syntax error occurred?
var errSyntax = errors.New("language: tag is not well-formed")
// ValueError is returned by any of the parsing functions when the // ValueError is returned by any of the parsing functions when the
// input is well-formed but the respective subtag is not recognized // input is well-formed but the respective subtag is not recognized
// as a valid value. // as a valid value.
type ValueError struct { type ValueError interface {
v [8]byte error
}
func mkErrInvalid(s []byte) error { // Subtag returns the subtag for which the error occurred.
var e ValueError Subtag() string
copy(e.v[:], s)
return e
}
func (e ValueError) tag() []byte {
n := bytes.IndexByte(e.v[:], 0)
if n == -1 {
n = 8
}
return e.v[:n]
}
// Error implements the error interface.
func (e ValueError) Error() string {
return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
}
// Subtag returns the subtag for which the error occurred.
func (e ValueError) Subtag() string {
return string(e.tag())
}
// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
type scanner struct {
b []byte
bytes [max99thPercentileSize]byte
token []byte
start int // start position of the current token
end int // end position of the current token
next int // next point for scan
err error
done bool
}
func makeScannerString(s string) scanner {
scan := scanner{}
if len(s) <= len(scan.bytes) {
scan.b = scan.bytes[:copy(scan.bytes[:], s)]
} else {
scan.b = []byte(s)
}
scan.init()
return scan
}
// makeScanner returns a scanner using b as the input buffer.
// b is not copied and may be modified by the scanner routines.
func makeScanner(b []byte) scanner {
scan := scanner{b: b}
scan.init()
return scan
}
func (s *scanner) init() {
for i, c := range s.b {
if c == '_' {
s.b[i] = '-'
}
}
s.scan()
}
// restToLower converts the string between start and end to lower case.
func (s *scanner) toLower(start, end int) {
for i := start; i < end; i++ {
c := s.b[i]
if 'A' <= c && c <= 'Z' {
s.b[i] += 'a' - 'A'
}
}
}
func (s *scanner) setError(e error) {
if s.err == nil || (e == errSyntax && s.err != errSyntax) {
s.err = e
}
}
// resizeRange shrinks or grows the array at position oldStart such that
// a new string of size newSize can fit between oldStart and oldEnd.
// Sets the scan point to after the resized range.
func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
s.start = oldStart
if end := oldStart + newSize; end != oldEnd {
diff := end - oldEnd
if end < cap(s.b) {
b := make([]byte, len(s.b)+diff)
copy(b, s.b[:oldStart])
copy(b[end:], s.b[oldEnd:])
s.b = b
} else {
s.b = append(s.b[end:], s.b[oldEnd:]...)
}
s.next = end + (s.next - s.end)
s.end = end
}
}
// replace replaces the current token with repl.
func (s *scanner) replace(repl string) {
s.resizeRange(s.start, s.end, len(repl))
copy(s.b[s.start:], repl)
}
// gobble removes the current token from the input.
// Caller must call scan after calling gobble.
func (s *scanner) gobble(e error) {
s.setError(e)
if s.start == 0 {
s.b = s.b[:+copy(s.b, s.b[s.next:])]
s.end = 0
} else {
s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
s.end = s.start - 1
}
s.next = s.start
}
// deleteRange removes the given range from s.b before the current token.
func (s *scanner) deleteRange(start, end int) {
s.setError(errSyntax)
s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
diff := end - start
s.next -= diff
s.start -= diff
s.end -= diff
}
// scan parses the next token of a BCP 47 string. Tokens that are larger
// than 8 characters or include non-alphanumeric characters result in an error
// and are gobbled and removed from the output.
// It returns the end position of the last token consumed.
func (s *scanner) scan() (end int) {
end = s.end
s.token = nil
for s.start = s.next; s.next < len(s.b); {
i := bytes.IndexByte(s.b[s.next:], '-')
if i == -1 {
s.end = len(s.b)
s.next = len(s.b)
i = s.end - s.start
} else {
s.end = s.next + i
s.next = s.end + 1
}
token := s.b[s.start:s.end]
if i < 1 || i > 8 || !isAlphaNum(token) {
s.gobble(errSyntax)
continue
}
s.token = token
return end
}
if n := len(s.b); n > 0 && s.b[n-1] == '-' {
s.setError(errSyntax)
s.b = s.b[:len(s.b)-1]
}
s.done = true
return end
}
// acceptMinSize parses multiple tokens of the given size or greater.
// It returns the end position of the last token consumed.
func (s *scanner) acceptMinSize(min int) (end int) {
end = s.end
s.scan()
for ; len(s.token) >= min; s.scan() {
end = s.end
}
return end
} }
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing // Parse parses the given BCP 47 string and returns a valid Tag. If parsing
@ -223,7 +28,7 @@ func (s *scanner) acceptMinSize(min int) (end int) {
// ValueError. The Tag returned in this case is just stripped of the unknown // ValueError. The Tag returned in this case is just stripped of the unknown
// value. All other values are preserved. It accepts tags in the BCP 47 format // value. All other values are preserved. It accepts tags in the BCP 47 format
// and extensions to this standard defined in // and extensions to this standard defined in
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// The resulting tag is canonicalized using the default canonicalization type. // The resulting tag is canonicalized using the default canonicalization type.
func Parse(s string) (t Tag, err error) { func Parse(s string) (t Tag, err error) {
return Default.Parse(s) return Default.Parse(s)
@ -235,327 +40,18 @@ func Parse(s string) (t Tag, err error) {
// ValueError. The Tag returned in this case is just stripped of the unknown // ValueError. The Tag returned in this case is just stripped of the unknown
// value. All other values are preserved. It accepts tags in the BCP 47 format // value. All other values are preserved. It accepts tags in the BCP 47 format
// and extensions to this standard defined in // and extensions to this standard defined in
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// The resulting tag is canonicalized using the the canonicalization type c. // The resulting tag is canonicalized using the canonicalization type c.
func (c CanonType) Parse(s string) (t Tag, err error) { func (c CanonType) Parse(s string) (t Tag, err error) {
// TODO: consider supporting old-style locale key-value pairs. tt, err := language.Parse(s)
if s == "" { if err != nil {
return und, errSyntax return makeTag(tt), err
} }
if len(s) <= maxAltTaglen { tt, changed := canonicalize(c, tt)
b := [maxAltTaglen]byte{}
for i, c := range s {
// Generating invalid UTF-8 is okay as it won't match.
if 'A' <= c && c <= 'Z' {
c += 'a' - 'A'
} else if c == '_' {
c = '-'
}
b[i] = byte(c)
}
if t, ok := grandfathered(b); ok {
return t, nil
}
}
scan := makeScannerString(s)
t, err = parse(&scan, s)
t, changed := t.canonicalize(c)
if changed { if changed {
t.remakeString() tt.RemakeString()
} }
return t, err return makeTag(tt), err
}
func parse(scan *scanner, s string) (t Tag, err error) {
t = und
var end int
if n := len(scan.token); n <= 1 {
scan.toLower(0, len(scan.b))
if n == 0 || scan.token[0] != 'x' {
return t, errSyntax
}
end = parseExtensions(scan)
} else if n >= 4 {
return und, errSyntax
} else { // the usual case
t, end = parseTag(scan)
if n := len(scan.token); n == 1 {
t.pExt = uint16(end)
end = parseExtensions(scan)
} else if end < len(scan.b) {
scan.setError(errSyntax)
scan.b = scan.b[:end]
}
}
if int(t.pVariant) < len(scan.b) {
if end < len(s) {
s = s[:end]
}
if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
t.str = s
} else {
t.str = string(scan.b)
}
} else {
t.pVariant, t.pExt = 0, 0
}
return t, scan.err
}
// parseTag parses language, script, region and variants.
// It returns a Tag and the end position in the input that was parsed.
func parseTag(scan *scanner) (t Tag, end int) {
var e error
// TODO: set an error if an unknown lang, script or region is encountered.
t.lang, e = getLangID(scan.token)
scan.setError(e)
scan.replace(t.lang.String())
langStart := scan.start
end = scan.scan()
for len(scan.token) == 3 && isAlpha(scan.token[0]) {
// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
// to a tag of the form <extlang>.
lang, e := getLangID(scan.token)
if lang != 0 {
t.lang = lang
copy(scan.b[langStart:], lang.String())
scan.b[langStart+3] = '-'
scan.start = langStart + 4
}
scan.gobble(e)
end = scan.scan()
}
if len(scan.token) == 4 && isAlpha(scan.token[0]) {
t.script, e = getScriptID(script, scan.token)
if t.script == 0 {
scan.gobble(e)
}
end = scan.scan()
}
if n := len(scan.token); n >= 2 && n <= 3 {
t.region, e = getRegionID(scan.token)
if t.region == 0 {
scan.gobble(e)
} else {
scan.replace(t.region.String())
}
end = scan.scan()
}
scan.toLower(scan.start, len(scan.b))
t.pVariant = byte(end)
end = parseVariants(scan, end, t)
t.pExt = uint16(end)
return t, end
}
var separator = []byte{'-'}
// parseVariants scans tokens as long as each token is a valid variant string.
// Duplicate variants are removed.
func parseVariants(scan *scanner, end int, t Tag) int {
start := scan.start
varIDBuf := [4]uint8{}
variantBuf := [4][]byte{}
varID := varIDBuf[:0]
variant := variantBuf[:0]
last := -1
needSort := false
for ; len(scan.token) >= 4; scan.scan() {
// TODO: measure the impact of needing this conversion and redesign
// the data structure if there is an issue.
v, ok := variantIndex[string(scan.token)]
if !ok {
// unknown variant
// TODO: allow user-defined variants?
scan.gobble(mkErrInvalid(scan.token))
continue
}
varID = append(varID, v)
variant = append(variant, scan.token)
if !needSort {
if last < int(v) {
last = int(v)
} else {
needSort = true
// There is no legal combinations of more than 7 variants
// (and this is by no means a useful sequence).
const maxVariants = 8
if len(varID) > maxVariants {
break
}
}
}
end = scan.end
}
if needSort {
sort.Sort(variantsSort{varID, variant})
k, l := 0, -1
for i, v := range varID {
w := int(v)
if l == w {
// Remove duplicates.
continue
}
varID[k] = varID[i]
variant[k] = variant[i]
k++
l = w
}
if str := bytes.Join(variant[:k], separator); len(str) == 0 {
end = start - 1
} else {
scan.resizeRange(start, end, len(str))
copy(scan.b[scan.start:], str)
end = scan.end
}
}
return end
}
type variantsSort struct {
i []uint8
v [][]byte
}
func (s variantsSort) Len() int {
return len(s.i)
}
func (s variantsSort) Swap(i, j int) {
s.i[i], s.i[j] = s.i[j], s.i[i]
s.v[i], s.v[j] = s.v[j], s.v[i]
}
func (s variantsSort) Less(i, j int) bool {
return s.i[i] < s.i[j]
}
type bytesSort [][]byte
func (b bytesSort) Len() int {
return len(b)
}
func (b bytesSort) Swap(i, j int) {
b[i], b[j] = b[j], b[i]
}
func (b bytesSort) Less(i, j int) bool {
return bytes.Compare(b[i], b[j]) == -1
}
// parseExtensions parses and normalizes the extensions in the buffer.
// It returns the last position of scan.b that is part of any extension.
// It also trims scan.b to remove excess parts accordingly.
func parseExtensions(scan *scanner) int {
start := scan.start
exts := [][]byte{}
private := []byte{}
end := scan.end
for len(scan.token) == 1 {
extStart := scan.start
ext := scan.token[0]
end = parseExtension(scan)
extension := scan.b[extStart:end]
if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
scan.setError(errSyntax)
end = extStart
continue
} else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
scan.b = scan.b[:end]
return end
} else if ext == 'x' {
private = extension
break
}
exts = append(exts, extension)
}
sort.Sort(bytesSort(exts))
if len(private) > 0 {
exts = append(exts, private)
}
scan.b = scan.b[:start]
if len(exts) > 0 {
scan.b = append(scan.b, bytes.Join(exts, separator)...)
} else if start > 0 {
// Strip trailing '-'.
scan.b = scan.b[:start-1]
}
return end
}
// parseExtension parses a single extension and returns the position of
// the extension end.
func parseExtension(scan *scanner) int {
start, end := scan.start, scan.end
switch scan.token[0] {
case 'u':
attrStart := end
scan.scan()
for last := []byte{}; len(scan.token) > 2; scan.scan() {
if bytes.Compare(scan.token, last) != -1 {
// Attributes are unsorted. Start over from scratch.
p := attrStart + 1
scan.next = p
attrs := [][]byte{}
for scan.scan(); len(scan.token) > 2; scan.scan() {
attrs = append(attrs, scan.token)
end = scan.end
}
sort.Sort(bytesSort(attrs))
copy(scan.b[p:], bytes.Join(attrs, separator))
break
}
last = scan.token
end = scan.end
}
var last, key []byte
for attrEnd := end; len(scan.token) == 2; last = key {
key = scan.token
keyEnd := scan.end
end = scan.acceptMinSize(3)
// TODO: check key value validity
if keyEnd == end || bytes.Compare(key, last) != 1 {
// We have an invalid key or the keys are not sorted.
// Start scanning keys from scratch and reorder.
p := attrEnd + 1
scan.next = p
keys := [][]byte{}
for scan.scan(); len(scan.token) == 2; {
keyStart, keyEnd := scan.start, scan.end
end = scan.acceptMinSize(3)
if keyEnd != end {
keys = append(keys, scan.b[keyStart:end])
} else {
scan.setError(errSyntax)
end = keyStart
}
}
sort.Sort(bytesSort(keys))
reordered := bytes.Join(keys, separator)
if e := p + len(reordered); e < end {
scan.deleteRange(e, end)
end = e
}
copy(scan.b[p:], bytes.Join(keys, separator))
break
}
}
case 't':
scan.scan()
if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
_, end = parseTag(scan)
scan.toLower(start, end)
}
for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
end = scan.acceptMinSize(3)
}
case 'x':
end = scan.acceptMinSize(1)
default:
end = scan.acceptMinSize(2)
}
return end
} }
// Compose creates a Tag from individual parts, which may be of type Tag, Base, // Compose creates a Tag from individual parts, which may be of type Tag, Base,
@ -563,10 +59,11 @@ func parseExtension(scan *scanner) int {
// Base, Script or Region or slice of type Variant or Extension is passed more // Base, Script or Region or slice of type Variant or Extension is passed more
// than once, the latter will overwrite the former. Variants and Extensions are // than once, the latter will overwrite the former. Variants and Extensions are
// accumulated, but if two extensions of the same type are passed, the latter // accumulated, but if two extensions of the same type are passed, the latter
// will replace the former. A Tag overwrites all former values and typically // will replace the former. For -u extensions, though, the key-type pairs are
// only makes sense as the first argument. The resulting tag is returned after // added, where later values overwrite older ones. A Tag overwrites all former
// canonicalizing using the Default CanonType. If one or more errors are // values and typically only makes sense as the first argument. The resulting
// encountered, one of the errors is returned. // tag is returned after canonicalizing using the Default CanonType. If one or
// more errors are encountered, one of the errors is returned.
func Compose(part ...interface{}) (t Tag, err error) { func Compose(part ...interface{}) (t Tag, err error) {
return Default.Compose(part...) return Default.Compose(part...)
} }
@ -576,193 +73,65 @@ func Compose(part ...interface{}) (t Tag, err error) {
// Base, Script or Region or slice of type Variant or Extension is passed more // Base, Script or Region or slice of type Variant or Extension is passed more
// than once, the latter will overwrite the former. Variants and Extensions are // than once, the latter will overwrite the former. Variants and Extensions are
// accumulated, but if two extensions of the same type are passed, the latter // accumulated, but if two extensions of the same type are passed, the latter
// will replace the former. A Tag overwrites all former values and typically // will replace the former. For -u extensions, though, the key-type pairs are
// only makes sense as the first argument. The resulting tag is returned after // added, where later values overwrite older ones. A Tag overwrites all former
// canonicalizing using CanonType c. If one or more errors are encountered, // values and typically only makes sense as the first argument. The resulting
// one of the errors is returned. // tag is returned after canonicalizing using CanonType c. If one or more errors
// are encountered, one of the errors is returned.
func (c CanonType) Compose(part ...interface{}) (t Tag, err error) { func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
var b builder var b language.Builder
if err = b.update(part...); err != nil { if err = update(&b, part...); err != nil {
return und, err return und, err
} }
t, _ = b.tag.canonicalize(c) b.Tag, _ = canonicalize(c, b.Tag)
return makeTag(b.Make()), err
if len(b.ext) > 0 || len(b.variant) > 0 {
sort.Sort(sortVariant(b.variant))
sort.Strings(b.ext)
if b.private != "" {
b.ext = append(b.ext, b.private)
}
n := maxCoreSize + tokenLen(b.variant...) + tokenLen(b.ext...)
buf := make([]byte, n)
p := t.genCoreBytes(buf)
t.pVariant = byte(p)
p += appendTokens(buf[p:], b.variant...)
t.pExt = uint16(p)
p += appendTokens(buf[p:], b.ext...)
t.str = string(buf[:p])
} else if b.private != "" {
t.str = b.private
t.remakeString()
}
return
}
type builder struct {
tag Tag
private string // the x extension
ext []string
variant []string
err error
}
func (b *builder) addExt(e string) {
if e == "" {
} else if e[0] == 'x' {
b.private = e
} else {
b.ext = append(b.ext, e)
}
} }
var errInvalidArgument = errors.New("invalid Extension or Variant") var errInvalidArgument = errors.New("invalid Extension or Variant")
func (b *builder) update(part ...interface{}) (err error) { func update(b *language.Builder, part ...interface{}) (err error) {
replace := func(l *[]string, s string, eq func(a, b string) bool) bool {
if s == "" {
b.err = errInvalidArgument
return true
}
for i, v := range *l {
if eq(v, s) {
(*l)[i] = s
return true
}
}
return false
}
for _, x := range part { for _, x := range part {
switch v := x.(type) { switch v := x.(type) {
case Tag: case Tag:
b.tag.lang = v.lang b.SetTag(v.tag())
b.tag.region = v.region
b.tag.script = v.script
if v.str != "" {
b.variant = nil
for x, s := "", v.str[v.pVariant:v.pExt]; s != ""; {
x, s = nextToken(s)
b.variant = append(b.variant, x)
}
b.ext, b.private = nil, ""
for i, e := int(v.pExt), ""; i < len(v.str); {
i, e = getExtension(v.str, i)
b.addExt(e)
}
}
case Base: case Base:
b.tag.lang = v.langID b.Tag.LangID = v.langID
case Script: case Script:
b.tag.script = v.scriptID b.Tag.ScriptID = v.scriptID
case Region: case Region:
b.tag.region = v.regionID b.Tag.RegionID = v.regionID
case Variant: case Variant:
if !replace(&b.variant, v.variant, func(a, b string) bool { return a == b }) { if v.variant == "" {
b.variant = append(b.variant, v.variant) err = errInvalidArgument
break
} }
b.AddVariant(v.variant)
case Extension: case Extension:
if !replace(&b.ext, v.s, func(a, b string) bool { return a[0] == b[0] }) { if v.s == "" {
b.addExt(v.s) err = errInvalidArgument
break
} }
b.SetExt(v.s)
case []Variant: case []Variant:
b.variant = nil b.ClearVariants()
for _, x := range v { for _, v := range v {
b.update(x) b.AddVariant(v.variant)
} }
case []Extension: case []Extension:
b.ext, b.private = nil, "" b.ClearExtensions()
for _, e := range v { for _, e := range v {
b.update(e) b.SetExt(e.s)
} }
// TODO: support parsing of raw strings based on morphology or just extensions? // TODO: support parsing of raw strings based on morphology or just extensions?
case error: case error:
if v != nil {
err = v err = v
} }
} }
return
}
func tokenLen(token ...string) (n int) {
for _, t := range token {
n += len(t) + 1
} }
return return
} }
func appendTokens(b []byte, token ...string) int {
p := 0
for _, t := range token {
b[p] = '-'
copy(b[p+1:], t)
p += 1 + len(t)
}
return p
}
type sortVariant []string
func (s sortVariant) Len() int {
return len(s)
}
func (s sortVariant) Swap(i, j int) {
s[j], s[i] = s[i], s[j]
}
func (s sortVariant) Less(i, j int) bool {
return variantIndex[s[i]] < variantIndex[s[j]]
}
func findExt(list []string, x byte) int {
for i, e := range list {
if e[0] == x {
return i
}
}
return -1
}
// getExtension returns the name, body and end position of the extension.
func getExtension(s string, p int) (end int, ext string) {
if s[p] == '-' {
p++
}
if s[p] == 'x' {
return len(s), s[p:]
}
end = nextExtension(s, p)
return end, s[p:end]
}
// nextExtension finds the next extension within the string, searching
// for the -<char>- pattern from position p.
// In the fast majority of cases, language tags will have at most
// one extension and extensions tend to be small.
func nextExtension(s string, p int) int {
for n := len(s) - 3; p < n; {
if s[p] == '-' {
if s[p+2] == '-' {
return p
}
p += 3
} else {
p++
}
}
return len(s)
}
var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight") var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
// ParseAcceptLanguage parses the contents of an Accept-Language header as // ParseAcceptLanguage parses the contents of an Accept-Language header as
@ -788,7 +157,7 @@ func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
if !ok { if !ok {
return nil, nil, err return nil, nil, err
} }
t = Tag{lang: id} t = makeTag(language.Tag{LangID: id})
} }
// Scan the optional weight. // Scan the optional weight.
@ -830,9 +199,9 @@ func split(s string, c byte) (head, tail string) {
return strings.TrimSpace(s), "" return strings.TrimSpace(s), ""
} }
// Add hack mapping to deal with a small number of cases that that occur // Add hack mapping to deal with a small number of cases that occur
// in Accept-Language (with reasonable frequency). // in Accept-Language (with reasonable frequency).
var acceptFallback = map[string]langID{ var acceptFallback = map[string]language.Language{
"english": _en, "english": _en,
"deutsch": _de, "deutsch": _de,
"italian": _it, "italian": _it,

File diff suppressed because it is too large Load Diff

View File

@ -4,6 +4,8 @@
package language package language
import "golang.org/x/text/internal/language/compact"
// TODO: Various sets of commonly use tags and regions. // TODO: Various sets of commonly use tags and regions.
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed. // MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
@ -61,83 +63,83 @@ var (
Und Tag = Tag{} Und Tag = Tag{}
Afrikaans Tag = Tag{lang: _af} // af Afrikaans Tag = Tag(compact.Afrikaans)
Amharic Tag = Tag{lang: _am} // am Amharic Tag = Tag(compact.Amharic)
Arabic Tag = Tag{lang: _ar} // ar Arabic Tag = Tag(compact.Arabic)
ModernStandardArabic Tag = Tag{lang: _ar, region: _001} // ar-001 ModernStandardArabic Tag = Tag(compact.ModernStandardArabic)
Azerbaijani Tag = Tag{lang: _az} // az Azerbaijani Tag = Tag(compact.Azerbaijani)
Bulgarian Tag = Tag{lang: _bg} // bg Bulgarian Tag = Tag(compact.Bulgarian)
Bengali Tag = Tag{lang: _bn} // bn Bengali Tag = Tag(compact.Bengali)
Catalan Tag = Tag{lang: _ca} // ca Catalan Tag = Tag(compact.Catalan)
Czech Tag = Tag{lang: _cs} // cs Czech Tag = Tag(compact.Czech)
Danish Tag = Tag{lang: _da} // da Danish Tag = Tag(compact.Danish)
German Tag = Tag{lang: _de} // de German Tag = Tag(compact.German)
Greek Tag = Tag{lang: _el} // el Greek Tag = Tag(compact.Greek)
English Tag = Tag{lang: _en} // en English Tag = Tag(compact.English)
AmericanEnglish Tag = Tag{lang: _en, region: _US} // en-US AmericanEnglish Tag = Tag(compact.AmericanEnglish)
BritishEnglish Tag = Tag{lang: _en, region: _GB} // en-GB BritishEnglish Tag = Tag(compact.BritishEnglish)
Spanish Tag = Tag{lang: _es} // es Spanish Tag = Tag(compact.Spanish)
EuropeanSpanish Tag = Tag{lang: _es, region: _ES} // es-ES EuropeanSpanish Tag = Tag(compact.EuropeanSpanish)
LatinAmericanSpanish Tag = Tag{lang: _es, region: _419} // es-419 LatinAmericanSpanish Tag = Tag(compact.LatinAmericanSpanish)
Estonian Tag = Tag{lang: _et} // et Estonian Tag = Tag(compact.Estonian)
Persian Tag = Tag{lang: _fa} // fa Persian Tag = Tag(compact.Persian)
Finnish Tag = Tag{lang: _fi} // fi Finnish Tag = Tag(compact.Finnish)
Filipino Tag = Tag{lang: _fil} // fil Filipino Tag = Tag(compact.Filipino)
French Tag = Tag{lang: _fr} // fr French Tag = Tag(compact.French)
CanadianFrench Tag = Tag{lang: _fr, region: _CA} // fr-CA CanadianFrench Tag = Tag(compact.CanadianFrench)
Gujarati Tag = Tag{lang: _gu} // gu Gujarati Tag = Tag(compact.Gujarati)
Hebrew Tag = Tag{lang: _he} // he Hebrew Tag = Tag(compact.Hebrew)
Hindi Tag = Tag{lang: _hi} // hi Hindi Tag = Tag(compact.Hindi)
Croatian Tag = Tag{lang: _hr} // hr Croatian Tag = Tag(compact.Croatian)
Hungarian Tag = Tag{lang: _hu} // hu Hungarian Tag = Tag(compact.Hungarian)
Armenian Tag = Tag{lang: _hy} // hy Armenian Tag = Tag(compact.Armenian)
Indonesian Tag = Tag{lang: _id} // id Indonesian Tag = Tag(compact.Indonesian)
Icelandic Tag = Tag{lang: _is} // is Icelandic Tag = Tag(compact.Icelandic)
Italian Tag = Tag{lang: _it} // it Italian Tag = Tag(compact.Italian)
Japanese Tag = Tag{lang: _ja} // ja Japanese Tag = Tag(compact.Japanese)
Georgian Tag = Tag{lang: _ka} // ka Georgian Tag = Tag(compact.Georgian)
Kazakh Tag = Tag{lang: _kk} // kk Kazakh Tag = Tag(compact.Kazakh)
Khmer Tag = Tag{lang: _km} // km Khmer Tag = Tag(compact.Khmer)
Kannada Tag = Tag{lang: _kn} // kn Kannada Tag = Tag(compact.Kannada)
Korean Tag = Tag{lang: _ko} // ko Korean Tag = Tag(compact.Korean)
Kirghiz Tag = Tag{lang: _ky} // ky Kirghiz Tag = Tag(compact.Kirghiz)
Lao Tag = Tag{lang: _lo} // lo Lao Tag = Tag(compact.Lao)
Lithuanian Tag = Tag{lang: _lt} // lt Lithuanian Tag = Tag(compact.Lithuanian)
Latvian Tag = Tag{lang: _lv} // lv Latvian Tag = Tag(compact.Latvian)
Macedonian Tag = Tag{lang: _mk} // mk Macedonian Tag = Tag(compact.Macedonian)
Malayalam Tag = Tag{lang: _ml} // ml Malayalam Tag = Tag(compact.Malayalam)
Mongolian Tag = Tag{lang: _mn} // mn Mongolian Tag = Tag(compact.Mongolian)
Marathi Tag = Tag{lang: _mr} // mr Marathi Tag = Tag(compact.Marathi)
Malay Tag = Tag{lang: _ms} // ms Malay Tag = Tag(compact.Malay)
Burmese Tag = Tag{lang: _my} // my Burmese Tag = Tag(compact.Burmese)
Nepali Tag = Tag{lang: _ne} // ne Nepali Tag = Tag(compact.Nepali)
Dutch Tag = Tag{lang: _nl} // nl Dutch Tag = Tag(compact.Dutch)
Norwegian Tag = Tag{lang: _no} // no Norwegian Tag = Tag(compact.Norwegian)
Punjabi Tag = Tag{lang: _pa} // pa Punjabi Tag = Tag(compact.Punjabi)
Polish Tag = Tag{lang: _pl} // pl Polish Tag = Tag(compact.Polish)
Portuguese Tag = Tag{lang: _pt} // pt Portuguese Tag = Tag(compact.Portuguese)
BrazilianPortuguese Tag = Tag{lang: _pt, region: _BR} // pt-BR BrazilianPortuguese Tag = Tag(compact.BrazilianPortuguese)
EuropeanPortuguese Tag = Tag{lang: _pt, region: _PT} // pt-PT EuropeanPortuguese Tag = Tag(compact.EuropeanPortuguese)
Romanian Tag = Tag{lang: _ro} // ro Romanian Tag = Tag(compact.Romanian)
Russian Tag = Tag{lang: _ru} // ru Russian Tag = Tag(compact.Russian)
Sinhala Tag = Tag{lang: _si} // si Sinhala Tag = Tag(compact.Sinhala)
Slovak Tag = Tag{lang: _sk} // sk Slovak Tag = Tag(compact.Slovak)
Slovenian Tag = Tag{lang: _sl} // sl Slovenian Tag = Tag(compact.Slovenian)
Albanian Tag = Tag{lang: _sq} // sq Albanian Tag = Tag(compact.Albanian)
Serbian Tag = Tag{lang: _sr} // sr Serbian Tag = Tag(compact.Serbian)
SerbianLatin Tag = Tag{lang: _sr, script: _Latn} // sr-Latn SerbianLatin Tag = Tag(compact.SerbianLatin)
Swedish Tag = Tag{lang: _sv} // sv Swedish Tag = Tag(compact.Swedish)
Swahili Tag = Tag{lang: _sw} // sw Swahili Tag = Tag(compact.Swahili)
Tamil Tag = Tag{lang: _ta} // ta Tamil Tag = Tag(compact.Tamil)
Telugu Tag = Tag{lang: _te} // te Telugu Tag = Tag(compact.Telugu)
Thai Tag = Tag{lang: _th} // th Thai Tag = Tag(compact.Thai)
Turkish Tag = Tag{lang: _tr} // tr Turkish Tag = Tag(compact.Turkish)
Ukrainian Tag = Tag{lang: _uk} // uk Ukrainian Tag = Tag(compact.Ukrainian)
Urdu Tag = Tag{lang: _ur} // ur Urdu Tag = Tag(compact.Urdu)
Uzbek Tag = Tag{lang: _uz} // uz Uzbek Tag = Tag(compact.Uzbek)
Vietnamese Tag = Tag{lang: _vi} // vi Vietnamese Tag = Tag(compact.Vietnamese)
Chinese Tag = Tag{lang: _zh} // zh Chinese Tag = Tag(compact.Chinese)
SimplifiedChinese Tag = Tag{lang: _zh, script: _Hans} // zh-Hans SimplifiedChinese Tag = Tag(compact.SimplifiedChinese)
TraditionalChinese Tag = Tag{lang: _zh, script: _Hant} // zh-Hant TraditionalChinese Tag = Tag(compact.TraditionalChinese)
Zulu Tag = Tag{lang: _zu} // zu Zulu Tag = Tag(compact.Zulu)
) )

View File

@ -78,8 +78,8 @@ type SpanningTransformer interface {
// considering the error err. // considering the error err.
// //
// A nil error means that all input bytes are known to be identical to the // A nil error means that all input bytes are known to be identical to the
// output produced by the Transformer. A nil error can be be returned // output produced by the Transformer. A nil error can be returned
// regardless of whether atEOF is true. If err is nil, then then n must // regardless of whether atEOF is true. If err is nil, then n must
// equal len(src); the converse is not necessarily true. // equal len(src); the converse is not necessarily true.
// //
// ErrEndOfSpan means that the Transformer output may differ from the // ErrEndOfSpan means that the Transformer output may differ from the
@ -493,7 +493,7 @@ func (c *chain) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err erro
return dstL.n, srcL.p, err return dstL.n, srcL.p, err
} }
// Deprecated: use runes.Remove instead. // Deprecated: Use runes.Remove instead.
func RemoveFunc(f func(r rune) bool) Transformer { func RemoveFunc(f func(r rune) bool) Transformer {
return removeF(f) return removeF(f)
} }
@ -648,7 +648,8 @@ func String(t Transformer, s string) (result string, n int, err error) {
// Transform the remaining input, growing dst and src buffers as necessary. // Transform the remaining input, growing dst and src buffers as necessary.
for { for {
n := copy(src, s[pSrc:]) n := copy(src, s[pSrc:])
nDst, nSrc, err := t.Transform(dst[pDst:], src[:n], pSrc+n == len(s)) atEOF := pSrc+n == len(s)
nDst, nSrc, err := t.Transform(dst[pDst:], src[:n], atEOF)
pDst += nDst pDst += nDst
pSrc += nSrc pSrc += nSrc
@ -659,6 +660,9 @@ func String(t Transformer, s string) (result string, n int, err error) {
dst = grow(dst, pDst) dst = grow(dst, pDst)
} }
} else if err == ErrShortSrc { } else if err == ErrShortSrc {
if atEOF {
return string(dst[:pDst]), pSrc, err
}
if nSrc == 0 { if nSrc == 0 {
src = grow(src, 0) src = grow(src, 0)
} }

17
vendor/gopkg.in/ini.v1/.travis.yml generated vendored
View File

@ -1,17 +0,0 @@
sudo: false
language: go
go:
- 1.6.x
- 1.7.x
- 1.8.x
- 1.9.x
- 1.10.x
- 1.11.x
script:
- go get golang.org/x/tools/cmd/cover
- go get github.com/smartystreets/goconvey
- mkdir -p $HOME/gopath/src/gopkg.in
- ln -s $HOME/gopath/src/github.com/go-ini/ini $HOME/gopath/src/gopkg.in/ini.v1
- cd $HOME/gopath/src/gopkg.in/ini.v1
- go test -v -cover -race

2
vendor/gopkg.in/ini.v1/Makefile generated vendored
View File

@ -6,7 +6,7 @@ test:
go test -v -cover -race go test -v -cover -race
bench: bench:
go test -v -cover -race -test.bench=. -test.benchmem go test -v -cover -test.bench=. -test.benchmem
vet: vet:
go vet go vet

19
vendor/gopkg.in/ini.v1/README.md generated vendored
View File

@ -1,5 +1,9 @@
INI [![Build Status](https://travis-ci.org/go-ini/ini.svg?branch=master)](https://travis-ci.org/go-ini/ini) [![Sourcegraph](https://img.shields.io/badge/view%20on-Sourcegraph-brightgreen.svg)](https://sourcegraph.com/github.com/go-ini/ini) # INI
===
[![GitHub Workflow Status](https://img.shields.io/github/workflow/status/go-ini/ini/Go?logo=github&style=for-the-badge)](https://github.com/go-ini/ini/actions?query=workflow%3AGo)
[![codecov](https://img.shields.io/codecov/c/github/go-ini/ini/master?logo=codecov&style=for-the-badge)](https://codecov.io/gh/go-ini/ini)
[![GoDoc](https://img.shields.io/badge/GoDoc-Reference-blue?style=for-the-badge&logo=go)](https://pkg.go.dev/github.com/go-ini/ini?tab=doc)
[![Sourcegraph](https://img.shields.io/badge/view%20on-Sourcegraph-brightgreen.svg?style=for-the-badge&logo=sourcegraph)](https://sourcegraph.com/github.com/go-ini/ini)
![](https://avatars0.githubusercontent.com/u/10216035?v=3&s=200) ![](https://avatars0.githubusercontent.com/u/10216035?v=3&s=200)
@ -7,7 +11,7 @@ Package ini provides INI file read and write functionality in Go.
## Features ## Features
- Load from multiple data sources(`[]byte`, file and `io.ReadCloser`) with overwrites. - Load from multiple data sources(file, `[]byte`, `io.Reader` and `io.ReadCloser`) with overwrites.
- Read with recursion values. - Read with recursion values.
- Read with parent-child sections. - Read with parent-child sections.
- Read with auto-increment key names. - Read with auto-increment key names.
@ -22,24 +26,17 @@ Package ini provides INI file read and write functionality in Go.
The minimum requirement of Go is **1.6**. The minimum requirement of Go is **1.6**.
To use a tagged revision:
```sh ```sh
$ go get gopkg.in/ini.v1 $ go get gopkg.in/ini.v1
``` ```
To use with latest changes:
```sh
$ go get github.com/go-ini/ini
```
Please add `-u` flag to update in the future. Please add `-u` flag to update in the future.
## Getting Help ## Getting Help
- [Getting Started](https://ini.unknwon.io/docs/intro/getting_started) - [Getting Started](https://ini.unknwon.io/docs/intro/getting_started)
- [API Documentation](https://gowalker.org/gopkg.in/ini.v1) - [API Documentation](https://gowalker.org/gopkg.in/ini.v1)
- 中国大陆镜像https://ini.unknwon.cn
## License ## License

9
vendor/gopkg.in/ini.v1/codecov.yml generated vendored Normal file
View File

@ -0,0 +1,9 @@
coverage:
range: "60...95"
status:
project:
default:
threshold: 1%
comment:
layout: 'diff, files'

76
vendor/gopkg.in/ini.v1/data_source.go generated vendored Normal file
View File

@ -0,0 +1,76 @@
// Copyright 2019 Unknwon
//
// Licensed under the Apache License, Version 2.0 (the "License"): you may
// not use this file except in compliance with the License. You may obtain
// a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
package ini
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"os"
)
var (
_ dataSource = (*sourceFile)(nil)
_ dataSource = (*sourceData)(nil)
_ dataSource = (*sourceReadCloser)(nil)
)
// dataSource is an interface that returns object which can be read and closed.
type dataSource interface {
ReadCloser() (io.ReadCloser, error)
}
// sourceFile represents an object that contains content on the local file system.
type sourceFile struct {
name string
}
func (s sourceFile) ReadCloser() (_ io.ReadCloser, err error) {
return os.Open(s.name)
}
// sourceData represents an object that contains content in memory.
type sourceData struct {
data []byte
}
func (s *sourceData) ReadCloser() (io.ReadCloser, error) {
return ioutil.NopCloser(bytes.NewReader(s.data)), nil
}
// sourceReadCloser represents an input stream with Close method.
type sourceReadCloser struct {
reader io.ReadCloser
}
func (s *sourceReadCloser) ReadCloser() (io.ReadCloser, error) {
return s.reader, nil
}
func parseDataSource(source interface{}) (dataSource, error) {
switch s := source.(type) {
case string:
return sourceFile{s}, nil
case []byte:
return &sourceData{s}, nil
case io.ReadCloser:
return &sourceReadCloser{s}, nil
case io.Reader:
return &sourceReadCloser{ioutil.NopCloser(s)}, nil
default:
return nil, fmt.Errorf("error parsing data source: unknown type %q", s)
}
}

25
vendor/gopkg.in/ini.v1/deprecated.go generated vendored Normal file
View File

@ -0,0 +1,25 @@
// Copyright 2019 Unknwon
//
// Licensed under the Apache License, Version 2.0 (the "License"): you may
// not use this file except in compliance with the License. You may obtain
// a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
package ini
const (
// Deprecated: Use "DefaultSection" instead.
DEFAULT_SECTION = DefaultSection
)
var (
// Deprecated: AllCapsUnderscore converts to format ALL_CAPS_UNDERSCORE.
AllCapsUnderscore = SnackCase
)

2
vendor/gopkg.in/ini.v1/error.go generated vendored
View File

@ -18,10 +18,12 @@ import (
"fmt" "fmt"
) )
// ErrDelimiterNotFound indicates the error type of no delimiter is found which there should be one.
type ErrDelimiterNotFound struct { type ErrDelimiterNotFound struct {
Line string Line string
} }
// IsErrDelimiterNotFound returns true if the given error is an instance of ErrDelimiterNotFound.
func IsErrDelimiterNotFound(err error) bool { func IsErrDelimiterNotFound(err error) bool {
_, ok := err.(ErrDelimiterNotFound) _, ok := err.(ErrDelimiterNotFound)
return ok return ok

179
vendor/gopkg.in/ini.v1/file.go generated vendored
View File

@ -25,7 +25,7 @@ import (
"sync" "sync"
) )
// File represents a combination of a or more INI file(s) in memory. // File represents a combination of one or more INI files in memory.
type File struct { type File struct {
options LoadOptions options LoadOptions
dataSources []dataSource dataSources []dataSource
@ -36,8 +36,12 @@ type File struct {
// To keep data in order. // To keep data in order.
sectionList []string sectionList []string
// To keep track of the index of a section with same name.
// This meta list is only used with non-unique section names are allowed.
sectionIndexes []int
// Actual data is stored here. // Actual data is stored here.
sections map[string]*Section sections map[string][]*Section
NameMapper NameMapper
ValueMapper ValueMapper
@ -48,27 +52,40 @@ func newFile(dataSources []dataSource, opts LoadOptions) *File {
if len(opts.KeyValueDelimiters) == 0 { if len(opts.KeyValueDelimiters) == 0 {
opts.KeyValueDelimiters = "=:" opts.KeyValueDelimiters = "=:"
} }
if len(opts.KeyValueDelimiterOnWrite) == 0 {
opts.KeyValueDelimiterOnWrite = "="
}
if len(opts.ChildSectionDelimiter) == 0 {
opts.ChildSectionDelimiter = "."
}
return &File{ return &File{
BlockMode: true, BlockMode: true,
dataSources: dataSources, dataSources: dataSources,
sections: make(map[string]*Section), sections: make(map[string][]*Section),
sectionList: make([]string, 0, 10),
options: opts, options: opts,
} }
} }
// Empty returns an empty file object. // Empty returns an empty file object.
func Empty() *File { func Empty(opts ...LoadOptions) *File {
// Ignore error here, we sure our data is good. var opt LoadOptions
f, _ := Load([]byte("")) if len(opts) > 0 {
opt = opts[0]
}
// Ignore error here, we are sure our data is good.
f, _ := LoadSources(opt, []byte(""))
return f return f
} }
// NewSection creates a new section. // NewSection creates a new section.
func (f *File) NewSection(name string) (*Section, error) { func (f *File) NewSection(name string) (*Section, error) {
if len(name) == 0 { if len(name) == 0 {
return nil, errors.New("error creating new section: empty section name") return nil, errors.New("empty section name")
} else if f.options.Insensitive && name != DEFAULT_SECTION { }
if (f.options.Insensitive || f.options.InsensitiveSections) && name != DefaultSection {
name = strings.ToLower(name) name = strings.ToLower(name)
} }
@ -77,13 +94,20 @@ func (f *File) NewSection(name string) (*Section, error) {
defer f.lock.Unlock() defer f.lock.Unlock()
} }
if inSlice(name, f.sectionList) { if !f.options.AllowNonUniqueSections && inSlice(name, f.sectionList) {
return f.sections[name], nil return f.sections[name][0], nil
} }
f.sectionList = append(f.sectionList, name) f.sectionList = append(f.sectionList, name)
f.sections[name] = newSection(f, name)
return f.sections[name], nil // NOTE: Append to indexes must happen before appending to sections,
// otherwise index will have off-by-one problem.
f.sectionIndexes = append(f.sectionIndexes, len(f.sections[name]))
sec := newSection(f, name)
f.sections[name] = append(f.sections[name], sec)
return sec, nil
} }
// NewRawSection creates a new section with an unparseable body. // NewRawSection creates a new section with an unparseable body.
@ -110,10 +134,20 @@ func (f *File) NewSections(names ...string) (err error) {
// GetSection returns section by given name. // GetSection returns section by given name.
func (f *File) GetSection(name string) (*Section, error) { func (f *File) GetSection(name string) (*Section, error) {
if len(name) == 0 { secs, err := f.SectionsByName(name)
name = DEFAULT_SECTION if err != nil {
return nil, err
} }
if f.options.Insensitive {
return secs[0], err
}
// SectionsByName returns all sections with given name.
func (f *File) SectionsByName(name string) ([]*Section, error) {
if len(name) == 0 {
name = DefaultSection
}
if f.options.Insensitive || f.options.InsensitiveSections {
name = strings.ToLower(name) name = strings.ToLower(name)
} }
@ -122,11 +156,12 @@ func (f *File) GetSection(name string) (*Section, error) {
defer f.lock.RUnlock() defer f.lock.RUnlock()
} }
sec := f.sections[name] secs := f.sections[name]
if sec == nil { if len(secs) == 0 {
return nil, fmt.Errorf("section '%s' does not exist", name) return nil, fmt.Errorf("section %q does not exist", name)
} }
return sec, nil
return secs, nil
} }
// Section assumes named section exists and returns a zero-value when not. // Section assumes named section exists and returns a zero-value when not.
@ -141,7 +176,20 @@ func (f *File) Section(name string) *Section {
return sec return sec
} }
// Section returns list of Section. // SectionWithIndex assumes named section exists and returns a new section when not.
func (f *File) SectionWithIndex(name string, index int) *Section {
secs, err := f.SectionsByName(name)
if err != nil || len(secs) <= index {
// NOTE: It's OK here because the only possible error is empty section name,
// but if it's empty, this piece of code won't be executed.
newSec, _ := f.NewSection(name)
return newSec
}
return secs[index]
}
// Sections returns a list of Section stored in the current instance.
func (f *File) Sections() []*Section { func (f *File) Sections() []*Section {
if f.BlockMode { if f.BlockMode {
f.lock.RLock() f.lock.RLock()
@ -150,7 +198,7 @@ func (f *File) Sections() []*Section {
sections := make([]*Section, len(f.sectionList)) sections := make([]*Section, len(f.sectionList))
for i, name := range f.sectionList { for i, name := range f.sectionList {
sections[i] = f.sections[name] sections[i] = f.sections[name][f.sectionIndexes[i]]
} }
return sections return sections
} }
@ -167,24 +215,70 @@ func (f *File) SectionStrings() []string {
return list return list
} }
// DeleteSection deletes a section. // DeleteSection deletes a section or all sections with given name.
func (f *File) DeleteSection(name string) { func (f *File) DeleteSection(name string) {
secs, err := f.SectionsByName(name)
if err != nil {
return
}
for i := 0; i < len(secs); i++ {
// For non-unique sections, it is always needed to remove the first one so
// in the next iteration, the subsequent section continue having index 0.
// Ignoring the error as index 0 never returns an error.
_ = f.DeleteSectionWithIndex(name, 0)
}
}
// DeleteSectionWithIndex deletes a section with given name and index.
func (f *File) DeleteSectionWithIndex(name string, index int) error {
if !f.options.AllowNonUniqueSections && index != 0 {
return fmt.Errorf("delete section with non-zero index is only allowed when non-unique sections is enabled")
}
if len(name) == 0 {
name = DefaultSection
}
if f.options.Insensitive || f.options.InsensitiveSections {
name = strings.ToLower(name)
}
if f.BlockMode { if f.BlockMode {
f.lock.Lock() f.lock.Lock()
defer f.lock.Unlock() defer f.lock.Unlock()
} }
if len(name) == 0 { // Count occurrences of the sections
name = DEFAULT_SECTION occurrences := 0
sectionListCopy := make([]string, len(f.sectionList))
copy(sectionListCopy, f.sectionList)
for i, s := range sectionListCopy {
if s != name {
continue
} }
for i, s := range f.sectionList { if occurrences == index {
if s == name { if len(f.sections[name]) <= 1 {
delete(f.sections, name) // The last one in the map
} else {
f.sections[name] = append(f.sections[name][:index], f.sections[name][index+1:]...)
}
// Fix section lists
f.sectionList = append(f.sectionList[:i], f.sectionList[i+1:]...) f.sectionList = append(f.sectionList[:i], f.sectionList[i+1:]...)
delete(f.sections, name) f.sectionIndexes = append(f.sectionIndexes[:i], f.sectionIndexes[i+1:]...)
return
} else if occurrences > index {
// Fix the indices of all following sections with this name.
f.sectionIndexes[i-1]--
} }
occurrences++
} }
return nil
} }
func (f *File) reload(s dataSource) error { func (f *File) reload(s dataSource) error {
@ -203,11 +297,14 @@ func (f *File) Reload() (err error) {
if err = f.reload(s); err != nil { if err = f.reload(s); err != nil {
// In loose mode, we create an empty default section for nonexistent files. // In loose mode, we create an empty default section for nonexistent files.
if os.IsNotExist(err) && f.options.Loose { if os.IsNotExist(err) && f.options.Loose {
f.parse(bytes.NewBuffer(nil)) _ = f.parse(bytes.NewBuffer(nil))
continue continue
} }
return err return err
} }
if f.options.ShortCircuit {
return nil
}
} }
return nil return nil
} }
@ -230,16 +327,16 @@ func (f *File) Append(source interface{}, others ...interface{}) error {
} }
func (f *File) writeToBuffer(indent string) (*bytes.Buffer, error) { func (f *File) writeToBuffer(indent string) (*bytes.Buffer, error) {
equalSign := DefaultFormatLeft + "=" + DefaultFormatRight equalSign := DefaultFormatLeft + f.options.KeyValueDelimiterOnWrite + DefaultFormatRight
if PrettyFormat || PrettyEqual { if PrettyFormat || PrettyEqual {
equalSign = " = " equalSign = fmt.Sprintf(" %s ", f.options.KeyValueDelimiterOnWrite)
} }
// Use buffer to make sure target is safe until finish encoding. // Use buffer to make sure target is safe until finish encoding.
buf := bytes.NewBuffer(nil) buf := bytes.NewBuffer(nil)
for i, sname := range f.sectionList { for i, sname := range f.sectionList {
sec := f.Section(sname) sec := f.SectionWithIndex(sname, f.sectionIndexes[i])
if len(sec.Comment) > 0 { if len(sec.Comment) > 0 {
// Support multiline comments // Support multiline comments
lines := strings.Split(sec.Comment, LineBreak) lines := strings.Split(sec.Comment, LineBreak)
@ -256,7 +353,7 @@ func (f *File) writeToBuffer(indent string) (*bytes.Buffer, error) {
} }
} }
if i > 0 || DefaultHeader { if i > 0 || DefaultHeader || (i == 0 && strings.ToUpper(sec.name) != DefaultSection) {
if _, err := buf.WriteString("[" + sname + "]" + LineBreak); err != nil { if _, err := buf.WriteString("[" + sname + "]" + LineBreak); err != nil {
return nil, err return nil, err
} }
@ -282,7 +379,7 @@ func (f *File) writeToBuffer(indent string) (*bytes.Buffer, error) {
} }
// Count and generate alignment length and buffer spaces using the // Count and generate alignment length and buffer spaces using the
// longest key. Keys may be modifed if they contain certain characters so // longest key. Keys may be modified if they contain certain characters so
// we need to take that into account in our calculation. // we need to take that into account in our calculation.
alignLength := 0 alignLength := 0
if PrettyFormat { if PrettyFormat {
@ -302,11 +399,11 @@ func (f *File) writeToBuffer(indent string) (*bytes.Buffer, error) {
} }
alignSpaces := bytes.Repeat([]byte(" "), alignLength) alignSpaces := bytes.Repeat([]byte(" "), alignLength)
KEY_LIST: KeyList:
for _, kname := range sec.keyList { for _, kname := range sec.keyList {
key := sec.Key(kname) key := sec.Key(kname)
if len(key.Comment) > 0 { if len(key.Comment) > 0 {
if len(indent) > 0 && sname != DEFAULT_SECTION { if len(indent) > 0 && sname != DefaultSection {
buf.WriteString(indent) buf.WriteString(indent)
} }
@ -325,7 +422,7 @@ func (f *File) writeToBuffer(indent string) (*bytes.Buffer, error) {
} }
} }
if len(indent) > 0 && sname != DEFAULT_SECTION { if len(indent) > 0 && sname != DefaultSection {
buf.WriteString(indent) buf.WriteString(indent)
} }
@ -347,7 +444,7 @@ func (f *File) writeToBuffer(indent string) (*bytes.Buffer, error) {
if kname != sec.keyList[len(sec.keyList)-1] { if kname != sec.keyList[len(sec.keyList)-1] {
buf.WriteString(LineBreak) buf.WriteString(LineBreak)
} }
continue KEY_LIST continue KeyList
} }
// Write out alignment spaces before "=" sign // Write out alignment spaces before "=" sign
@ -360,6 +457,8 @@ func (f *File) writeToBuffer(indent string) (*bytes.Buffer, error) {
val = `"""` + val + `"""` val = `"""` + val + `"""`
} else if !f.options.IgnoreInlineComment && strings.ContainsAny(val, "#;") { } else if !f.options.IgnoreInlineComment && strings.ContainsAny(val, "#;") {
val = "`" + val + "`" val = "`" + val + "`"
} else if len(strings.TrimSpace(val)) != len(val) {
val = `"` + val + `"`
} }
if _, err := buf.WriteString(equalSign + val + LineBreak); err != nil { if _, err := buf.WriteString(equalSign + val + LineBreak); err != nil {
return nil, err return nil, err
@ -403,7 +502,7 @@ func (f *File) WriteTo(w io.Writer) (int64, error) {
// SaveToIndent writes content to file system with given value indention. // SaveToIndent writes content to file system with given value indention.
func (f *File) SaveToIndent(filename, indent string) error { func (f *File) SaveToIndent(filename, indent string) error {
// Note: Because we are truncating with os.Create, // Note: Because we are truncating with os.Create,
// so it's safer to save to a temporary file location and rename afte done. // so it's safer to save to a temporary file location and rename after done.
buf, err := f.writeToBuffer(indent) buf, err := f.writeToBuffer(indent)
if err != nil { if err != nil {
return err return err

24
vendor/gopkg.in/ini.v1/helper.go generated vendored Normal file
View File

@ -0,0 +1,24 @@
// Copyright 2019 Unknwon
//
// Licensed under the Apache License, Version 2.0 (the "License"): you may
// not use this file except in compliance with the License. You may obtain
// a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
package ini
func inSlice(str string, s []string) bool {
for _, v := range s {
if str == v {
return true
}
}
return false
}

127
vendor/gopkg.in/ini.v1/ini.go generated vendored
View File

@ -18,128 +18,71 @@
package ini package ini
import ( import (
"bytes"
"fmt"
"io"
"io/ioutil"
"os" "os"
"regexp" "regexp"
"runtime" "runtime"
"strings"
) )
const ( const (
// Name for default section. You can use this constant or the string literal. // DefaultSection is the name of default section. You can use this constant or the string literal.
// In most of cases, an empty string is all you need to access the section. // In most of cases, an empty string is all you need to access the section.
DEFAULT_SECTION = "DEFAULT" DefaultSection = "DEFAULT"
// Maximum allowed depth when recursively substituing variable names. // Maximum allowed depth when recursively substituing variable names.
_DEPTH_VALUES = 99 depthValues = 99
_VERSION = "1.42.0"
) )
// Version returns current package version literal.
func Version() string {
return _VERSION
}
var ( var (
// Delimiter to determine or compose a new line. // LineBreak is the delimiter to determine or compose a new line.
// This variable will be changed to "\r\n" automatically on Windows // This variable will be changed to "\r\n" automatically on Windows at package init time.
// at package init time.
LineBreak = "\n" LineBreak = "\n"
// Place custom spaces when PrettyFormat and PrettyEqual are both disabled
DefaultFormatLeft = ""
DefaultFormatRight = ""
// Variable regexp pattern: %(variable)s // Variable regexp pattern: %(variable)s
varPattern = regexp.MustCompile(`%\(([^\)]+)\)s`) varPattern = regexp.MustCompile(`%\(([^)]+)\)s`)
// Indicate whether to align "=" sign with spaces to produce pretty output // DefaultHeader explicitly writes default section header.
// or reduce all possible spaces for compact format.
PrettyFormat = true
// Place spaces around "=" sign even when PrettyFormat is false
PrettyEqual = false
// Explicitly write DEFAULT section header
DefaultHeader = false DefaultHeader = false
// Indicate whether to put a line between sections // PrettySection indicates whether to put a line between sections.
PrettySection = true PrettySection = true
// PrettyFormat indicates whether to align "=" sign with spaces to produce pretty output
// or reduce all possible spaces for compact format.
PrettyFormat = true
// PrettyEqual places spaces around "=" sign even when PrettyFormat is false.
PrettyEqual = false
// DefaultFormatLeft places custom spaces on the left when PrettyFormat and PrettyEqual are both disabled.
DefaultFormatLeft = ""
// DefaultFormatRight places custom spaces on the right when PrettyFormat and PrettyEqual are both disabled.
DefaultFormatRight = ""
) )
var inTest = len(os.Args) > 0 && strings.HasSuffix(strings.TrimSuffix(os.Args[0], ".exe"), ".test")
func init() { func init() {
if runtime.GOOS == "windows" { if runtime.GOOS == "windows" && !inTest {
LineBreak = "\r\n" LineBreak = "\r\n"
} }
} }
func inSlice(str string, s []string) bool { // LoadOptions contains all customized options used for load data source(s).
for _, v := range s {
if str == v {
return true
}
}
return false
}
// dataSource is an interface that returns object which can be read and closed.
type dataSource interface {
ReadCloser() (io.ReadCloser, error)
}
// sourceFile represents an object that contains content on the local file system.
type sourceFile struct {
name string
}
func (s sourceFile) ReadCloser() (_ io.ReadCloser, err error) {
return os.Open(s.name)
}
// sourceData represents an object that contains content in memory.
type sourceData struct {
data []byte
}
func (s *sourceData) ReadCloser() (io.ReadCloser, error) {
return ioutil.NopCloser(bytes.NewReader(s.data)), nil
}
// sourceReadCloser represents an input stream with Close method.
type sourceReadCloser struct {
reader io.ReadCloser
}
func (s *sourceReadCloser) ReadCloser() (io.ReadCloser, error) {
return s.reader, nil
}
func parseDataSource(source interface{}) (dataSource, error) {
switch s := source.(type) {
case string:
return sourceFile{s}, nil
case []byte:
return &sourceData{s}, nil
case io.ReadCloser:
return &sourceReadCloser{s}, nil
default:
return nil, fmt.Errorf("error parsing data source: unknown type '%s'", s)
}
}
type LoadOptions struct { type LoadOptions struct {
// Loose indicates whether the parser should ignore nonexistent files or return error. // Loose indicates whether the parser should ignore nonexistent files or return error.
Loose bool Loose bool
// Insensitive indicates whether the parser forces all section and key names to lowercase. // Insensitive indicates whether the parser forces all section and key names to lowercase.
Insensitive bool Insensitive bool
// InsensitiveSections indicates whether the parser forces all section to lowercase.
InsensitiveSections bool
// InsensitiveKeys indicates whether the parser forces all key names to lowercase.
InsensitiveKeys bool
// IgnoreContinuation indicates whether to ignore continuation lines while parsing. // IgnoreContinuation indicates whether to ignore continuation lines while parsing.
IgnoreContinuation bool IgnoreContinuation bool
// IgnoreInlineComment indicates whether to ignore comments at the end of value and treat it as part of value. // IgnoreInlineComment indicates whether to ignore comments at the end of value and treat it as part of value.
IgnoreInlineComment bool IgnoreInlineComment bool
// SkipUnrecognizableLines indicates whether to skip unrecognizable lines that do not conform to key/value pairs. // SkipUnrecognizableLines indicates whether to skip unrecognizable lines that do not conform to key/value pairs.
SkipUnrecognizableLines bool SkipUnrecognizableLines bool
// ShortCircuit indicates whether to ignore other configuration sources after loaded the first available configuration source.
ShortCircuit bool
// AllowBooleanKeys indicates whether to allow boolean type keys or treat as value is missing. // AllowBooleanKeys indicates whether to allow boolean type keys or treat as value is missing.
// This type of keys are mostly used in my.cnf. // This type of keys are mostly used in my.cnf.
AllowBooleanKeys bool AllowBooleanKeys bool
@ -170,10 +113,24 @@ type LoadOptions struct {
UnparseableSections []string UnparseableSections []string
// KeyValueDelimiters is the sequence of delimiters that are used to separate key and value. By default, it is "=:". // KeyValueDelimiters is the sequence of delimiters that are used to separate key and value. By default, it is "=:".
KeyValueDelimiters string KeyValueDelimiters string
// KeyValueDelimiterOnWrite is the delimiter that are used to separate key and value output. By default, it is "=".
KeyValueDelimiterOnWrite string
// ChildSectionDelimiter is the delimiter that is used to separate child sections. By default, it is ".".
ChildSectionDelimiter string
// PreserveSurroundedQuote indicates whether to preserve surrounded quote (single and double quotes). // PreserveSurroundedQuote indicates whether to preserve surrounded quote (single and double quotes).
PreserveSurroundedQuote bool PreserveSurroundedQuote bool
// DebugFunc is called to collect debug information (currently only useful to debug parsing Python-style multiline values).
DebugFunc DebugFunc
// ReaderBufferSize is the buffer size of the reader in bytes.
ReaderBufferSize int
// AllowNonUniqueSections indicates whether to allow sections with the same name multiple times.
AllowNonUniqueSections bool
} }
// DebugFunc is the type of function called to log parse events.
type DebugFunc func(message string)
// LoadSources allows caller to apply customized options for loading from data source(s).
func LoadSources(opts LoadOptions, source interface{}, others ...interface{}) (_ *File, err error) { func LoadSources(opts LoadOptions, source interface{}, others ...interface{}) (_ *File, err error) {
sources := make([]dataSource, len(others)+1) sources := make([]dataSource, len(others)+1)
sources[0], err = parseDataSource(source) sources[0], err = parseDataSource(source)

151
vendor/gopkg.in/ini.v1/key.go generated vendored
View File

@ -54,6 +54,16 @@ func (k *Key) addShadow(val string) error {
return errors.New("cannot add shadow to auto-increment or boolean key") return errors.New("cannot add shadow to auto-increment or boolean key")
} }
// Deduplicate shadows based on their values.
if k.value == val {
return nil
}
for i := range k.shadows {
if k.shadows[i].value == val {
return nil
}
}
shadow := newKey(k.s, k.name, val) shadow := newKey(k.s, k.name, val)
shadow.isShadow = true shadow.isShadow = true
k.shadows = append(k.shadows, shadow) k.shadows = append(k.shadows, shadow)
@ -77,6 +87,7 @@ func (k *Key) addNestedValue(val string) error {
return nil return nil
} }
// AddNestedValue adds a nested value to the key.
func (k *Key) AddNestedValue(val string) error { func (k *Key) AddNestedValue(val string) error {
if !k.s.f.options.AllowNestedValues { if !k.s.f.options.AllowNestedValues {
return errors.New("nested value is not allowed") return errors.New("nested value is not allowed")
@ -126,7 +137,7 @@ func (k *Key) transformValue(val string) string {
if !strings.Contains(val, "%") { if !strings.Contains(val, "%") {
return val return val
} }
for i := 0; i < _DEPTH_VALUES; i++ { for i := 0; i < depthValues; i++ {
vr := varPattern.FindString(val) vr := varPattern.FindString(val)
if len(vr) == 0 { if len(vr) == 0 {
break break
@ -136,10 +147,15 @@ func (k *Key) transformValue(val string) string {
noption := vr[2 : len(vr)-2] noption := vr[2 : len(vr)-2]
// Search in the same section. // Search in the same section.
// If not found or found the key itself, then search again in default section.
nk, err := k.s.GetKey(noption) nk, err := k.s.GetKey(noption)
if err != nil || k == nk { if err != nil || k == nk {
// Search again in default section.
nk, _ = k.s.f.Section("").GetKey(noption) nk, _ = k.s.f.Section("").GetKey(noption)
if nk == nil {
// Stop when no results found in the default section,
// and returns the value as-is.
break
}
} }
// Substitute by new value and take off leading '%(' and trailing ')s'. // Substitute by new value and take off leading '%(' and trailing ')s'.
@ -491,7 +507,7 @@ func (k *Key) Strings(delim string) []string {
buf.WriteRune(runes[idx]) buf.WriteRune(runes[idx])
} }
} }
idx += 1 idx++
if idx == len(runes) { if idx == len(runes) {
break break
} }
@ -553,6 +569,12 @@ func (k *Key) Uint64s(delim string) []uint64 {
return vals return vals
} }
// Bools returns list of bool divided by given delimiter. Any invalid input will be treated as zero value.
func (k *Key) Bools(delim string) []bool {
vals, _ := k.parseBools(k.Strings(delim), true, false)
return vals
}
// TimesFormat parses with given format and returns list of time.Time divided by given delimiter. // TimesFormat parses with given format and returns list of time.Time divided by given delimiter.
// Any invalid input will be treated as zero value (0001-01-01 00:00:00 +0000 UTC). // Any invalid input will be treated as zero value (0001-01-01 00:00:00 +0000 UTC).
func (k *Key) TimesFormat(format, delim string) []time.Time { func (k *Key) TimesFormat(format, delim string) []time.Time {
@ -601,6 +623,13 @@ func (k *Key) ValidUint64s(delim string) []uint64 {
return vals return vals
} }
// ValidBools returns list of bool divided by given delimiter. If some value is not 64-bit unsigned
// integer, then it will not be included to result list.
func (k *Key) ValidBools(delim string) []bool {
vals, _ := k.parseBools(k.Strings(delim), false, false)
return vals
}
// ValidTimesFormat parses with given format and returns list of time.Time divided by given delimiter. // ValidTimesFormat parses with given format and returns list of time.Time divided by given delimiter.
func (k *Key) ValidTimesFormat(format, delim string) []time.Time { func (k *Key) ValidTimesFormat(format, delim string) []time.Time {
vals, _ := k.parseTimesFormat(format, k.Strings(delim), false, false) vals, _ := k.parseTimesFormat(format, k.Strings(delim), false, false)
@ -637,6 +666,11 @@ func (k *Key) StrictUint64s(delim string) ([]uint64, error) {
return k.parseUint64s(k.Strings(delim), false, true) return k.parseUint64s(k.Strings(delim), false, true)
} }
// StrictBools returns list of bool divided by given delimiter or error on first invalid input.
func (k *Key) StrictBools(delim string) ([]bool, error) {
return k.parseBools(k.Strings(delim), false, true)
}
// StrictTimesFormat parses with given format and returns list of time.Time divided by given delimiter // StrictTimesFormat parses with given format and returns list of time.Time divided by given delimiter
// or error on first invalid input. // or error on first invalid input.
func (k *Key) StrictTimesFormat(format, delim string) ([]time.Time, error) { func (k *Key) StrictTimesFormat(format, delim string) ([]time.Time, error) {
@ -649,87 +683,130 @@ func (k *Key) StrictTimes(delim string) ([]time.Time, error) {
return k.StrictTimesFormat(time.RFC3339, delim) return k.StrictTimesFormat(time.RFC3339, delim)
} }
// parseBools transforms strings to bools.
func (k *Key) parseBools(strs []string, addInvalid, returnOnInvalid bool) ([]bool, error) {
vals := make([]bool, 0, len(strs))
parser := func(str string) (interface{}, error) {
val, err := parseBool(str)
return val, err
}
rawVals, err := k.doParse(strs, addInvalid, returnOnInvalid, parser)
if err == nil {
for _, val := range rawVals {
vals = append(vals, val.(bool))
}
}
return vals, err
}
// parseFloat64s transforms strings to float64s. // parseFloat64s transforms strings to float64s.
func (k *Key) parseFloat64s(strs []string, addInvalid, returnOnInvalid bool) ([]float64, error) { func (k *Key) parseFloat64s(strs []string, addInvalid, returnOnInvalid bool) ([]float64, error) {
vals := make([]float64, 0, len(strs)) vals := make([]float64, 0, len(strs))
for _, str := range strs { parser := func(str string) (interface{}, error) {
val, err := strconv.ParseFloat(str, 64) val, err := strconv.ParseFloat(str, 64)
if err != nil && returnOnInvalid { return val, err
return nil, err
} }
if err == nil || addInvalid { rawVals, err := k.doParse(strs, addInvalid, returnOnInvalid, parser)
vals = append(vals, val) if err == nil {
for _, val := range rawVals {
vals = append(vals, val.(float64))
} }
} }
return vals, nil return vals, err
} }
// parseInts transforms strings to ints. // parseInts transforms strings to ints.
func (k *Key) parseInts(strs []string, addInvalid, returnOnInvalid bool) ([]int, error) { func (k *Key) parseInts(strs []string, addInvalid, returnOnInvalid bool) ([]int, error) {
vals := make([]int, 0, len(strs)) vals := make([]int, 0, len(strs))
for _, str := range strs { parser := func(str string) (interface{}, error) {
valInt64, err := strconv.ParseInt(str, 0, 64) val, err := strconv.ParseInt(str, 0, 64)
val := int(valInt64) return val, err
if err != nil && returnOnInvalid {
return nil, err
} }
if err == nil || addInvalid { rawVals, err := k.doParse(strs, addInvalid, returnOnInvalid, parser)
vals = append(vals, val) if err == nil {
for _, val := range rawVals {
vals = append(vals, int(val.(int64)))
} }
} }
return vals, nil return vals, err
} }
// parseInt64s transforms strings to int64s. // parseInt64s transforms strings to int64s.
func (k *Key) parseInt64s(strs []string, addInvalid, returnOnInvalid bool) ([]int64, error) { func (k *Key) parseInt64s(strs []string, addInvalid, returnOnInvalid bool) ([]int64, error) {
vals := make([]int64, 0, len(strs)) vals := make([]int64, 0, len(strs))
for _, str := range strs { parser := func(str string) (interface{}, error) {
val, err := strconv.ParseInt(str, 0, 64) val, err := strconv.ParseInt(str, 0, 64)
if err != nil && returnOnInvalid { return val, err
return nil, err
} }
if err == nil || addInvalid {
vals = append(vals, val) rawVals, err := k.doParse(strs, addInvalid, returnOnInvalid, parser)
if err == nil {
for _, val := range rawVals {
vals = append(vals, val.(int64))
} }
} }
return vals, nil return vals, err
} }
// parseUints transforms strings to uints. // parseUints transforms strings to uints.
func (k *Key) parseUints(strs []string, addInvalid, returnOnInvalid bool) ([]uint, error) { func (k *Key) parseUints(strs []string, addInvalid, returnOnInvalid bool) ([]uint, error) {
vals := make([]uint, 0, len(strs)) vals := make([]uint, 0, len(strs))
for _, str := range strs { parser := func(str string) (interface{}, error) {
val, err := strconv.ParseUint(str, 0, 0) val, err := strconv.ParseUint(str, 0, 64)
if err != nil && returnOnInvalid { return val, err
return nil, err
} }
if err == nil || addInvalid {
vals = append(vals, uint(val)) rawVals, err := k.doParse(strs, addInvalid, returnOnInvalid, parser)
if err == nil {
for _, val := range rawVals {
vals = append(vals, uint(val.(uint64)))
} }
} }
return vals, nil return vals, err
} }
// parseUint64s transforms strings to uint64s. // parseUint64s transforms strings to uint64s.
func (k *Key) parseUint64s(strs []string, addInvalid, returnOnInvalid bool) ([]uint64, error) { func (k *Key) parseUint64s(strs []string, addInvalid, returnOnInvalid bool) ([]uint64, error) {
vals := make([]uint64, 0, len(strs)) vals := make([]uint64, 0, len(strs))
for _, str := range strs { parser := func(str string) (interface{}, error) {
val, err := strconv.ParseUint(str, 0, 64) val, err := strconv.ParseUint(str, 0, 64)
if err != nil && returnOnInvalid { return val, err
return nil, err
} }
if err == nil || addInvalid { rawVals, err := k.doParse(strs, addInvalid, returnOnInvalid, parser)
vals = append(vals, val) if err == nil {
for _, val := range rawVals {
vals = append(vals, val.(uint64))
} }
} }
return vals, nil return vals, err
} }
type Parser func(str string) (interface{}, error)
// parseTimesFormat transforms strings to times in given format. // parseTimesFormat transforms strings to times in given format.
func (k *Key) parseTimesFormat(format string, strs []string, addInvalid, returnOnInvalid bool) ([]time.Time, error) { func (k *Key) parseTimesFormat(format string, strs []string, addInvalid, returnOnInvalid bool) ([]time.Time, error) {
vals := make([]time.Time, 0, len(strs)) vals := make([]time.Time, 0, len(strs))
for _, str := range strs { parser := func(str string) (interface{}, error) {
val, err := time.Parse(format, str) val, err := time.Parse(format, str)
return val, err
}
rawVals, err := k.doParse(strs, addInvalid, returnOnInvalid, parser)
if err == nil {
for _, val := range rawVals {
vals = append(vals, val.(time.Time))
}
}
return vals, err
}
// doParse transforms strings to different types
func (k *Key) doParse(strs []string, addInvalid, returnOnInvalid bool, parser Parser) ([]interface{}, error) {
vals := make([]interface{}, 0, len(strs))
for _, str := range strs {
val, err := parser(str)
if err != nil && returnOnInvalid { if err != nil && returnOnInvalid {
return nil, err return nil, err
} }

217
vendor/gopkg.in/ini.v1/parser.go generated vendored
View File

@ -25,27 +25,46 @@ import (
"unicode" "unicode"
) )
var pythonMultiline = regexp.MustCompile("^(\\s+)([^\n]+)") const minReaderBufferSize = 4096
type tokenType int var pythonMultiline = regexp.MustCompile(`^([\t\f ]+)(.*)`)
const ( type parserOptions struct {
_TOKEN_INVALID tokenType = iota IgnoreContinuation bool
_TOKEN_COMMENT IgnoreInlineComment bool
_TOKEN_SECTION AllowPythonMultilineValues bool
_TOKEN_KEY SpaceBeforeInlineComment bool
) UnescapeValueDoubleQuotes bool
UnescapeValueCommentSymbols bool
PreserveSurroundedQuote bool
DebugFunc DebugFunc
ReaderBufferSize int
}
type parser struct { type parser struct {
buf *bufio.Reader buf *bufio.Reader
options parserOptions
isEOF bool isEOF bool
count int count int
comment *bytes.Buffer comment *bytes.Buffer
} }
func newParser(r io.Reader) *parser { func (p *parser) debug(format string, args ...interface{}) {
if p.options.DebugFunc != nil {
p.options.DebugFunc(fmt.Sprintf(format, args...))
}
}
func newParser(r io.Reader, opts parserOptions) *parser {
size := opts.ReaderBufferSize
if size < minReaderBufferSize {
size = minReaderBufferSize
}
return &parser{ return &parser{
buf: bufio.NewReader(r), buf: bufio.NewReaderSize(r, size),
options: opts,
count: 1, count: 1,
comment: &bytes.Buffer{}, comment: &bytes.Buffer{},
} }
@ -65,7 +84,10 @@ func (p *parser) BOM() error {
case mask[0] == 254 && mask[1] == 255: case mask[0] == 254 && mask[1] == 255:
fallthrough fallthrough
case mask[0] == 255 && mask[1] == 254: case mask[0] == 255 && mask[1] == 254:
p.buf.Read(mask) _, err = p.buf.Read(mask)
if err != nil {
return err
}
case mask[0] == 239 && mask[1] == 187: case mask[0] == 239 && mask[1] == 187:
mask, err := p.buf.Peek(3) mask, err := p.buf.Peek(3)
if err != nil && err != io.EOF { if err != nil && err != io.EOF {
@ -74,7 +96,10 @@ func (p *parser) BOM() error {
return nil return nil
} }
if mask[2] == 191 { if mask[2] == 191 {
p.buf.Read(mask) _, err = p.buf.Read(mask)
if err != nil {
return err
}
} }
} }
return nil return nil
@ -116,7 +141,7 @@ func readKeyName(delimiters string, in []byte) (string, int, error) {
} }
// Get out key name // Get out key name
endIdx := -1 var endIdx int
if len(keyQuote) > 0 { if len(keyQuote) > 0 {
startIdx := len(keyQuote) startIdx := len(keyQuote)
// FIXME: fail case -> """"""name"""=value // FIXME: fail case -> """"""name"""=value
@ -162,7 +187,7 @@ func (p *parser) readMultilines(line, val, valQuote string) (string, error) {
} }
val += next val += next
if p.isEOF { if p.isEOF {
return "", fmt.Errorf("missing closing key quote from '%s' to '%s'", line, next) return "", fmt.Errorf("missing closing key quote from %q to %q", line, next)
} }
} }
return val, nil return val, nil
@ -196,12 +221,13 @@ func hasSurroundedQuote(in string, quote byte) bool {
strings.IndexByte(in[1:], quote) == len(in)-2 strings.IndexByte(in[1:], quote) == len(in)-2
} }
func (p *parser) readValue(in []byte, func (p *parser) readValue(in []byte, bufferSize int) (string, error) {
parserBufferSize int,
ignoreContinuation, ignoreInlineComment, unescapeValueDoubleQuotes, unescapeValueCommentSymbols, allowPythonMultilines, spaceBeforeInlineComment, preserveSurroundedQuote bool) (string, error) {
line := strings.TrimLeftFunc(string(in), unicode.IsSpace) line := strings.TrimLeftFunc(string(in), unicode.IsSpace)
if len(line) == 0 { if len(line) == 0 {
if p.options.AllowPythonMultilineValues && len(in) > 0 && in[len(in)-1] == '\n' {
return p.readPythonMultilines(line, bufferSize)
}
return "", nil return "", nil
} }
@ -210,7 +236,7 @@ func (p *parser) readValue(in []byte,
valQuote = `"""` valQuote = `"""`
} else if line[0] == '`' { } else if line[0] == '`' {
valQuote = "`" valQuote = "`"
} else if unescapeValueDoubleQuotes && line[0] == '"' { } else if p.options.UnescapeValueDoubleQuotes && line[0] == '"' {
valQuote = `"` valQuote = `"`
} }
@ -222,7 +248,7 @@ func (p *parser) readValue(in []byte,
return p.readMultilines(line, line[startIdx:], valQuote) return p.readMultilines(line, line[startIdx:], valQuote)
} }
if unescapeValueDoubleQuotes && valQuote == `"` { if p.options.UnescapeValueDoubleQuotes && valQuote == `"` {
return strings.Replace(line[startIdx:pos+startIdx], `\"`, `"`, -1), nil return strings.Replace(line[startIdx:pos+startIdx], `\"`, `"`, -1), nil
} }
return line[startIdx : pos+startIdx], nil return line[startIdx : pos+startIdx], nil
@ -234,14 +260,14 @@ func (p *parser) readValue(in []byte,
trimmedLastChar := line[len(line)-1] trimmedLastChar := line[len(line)-1]
// Check continuation lines when desired // Check continuation lines when desired
if !ignoreContinuation && trimmedLastChar == '\\' { if !p.options.IgnoreContinuation && trimmedLastChar == '\\' {
return p.readContinuationLines(line[:len(line)-1]) return p.readContinuationLines(line[:len(line)-1])
} }
// Check if ignore inline comment // Check if ignore inline comment
if !ignoreInlineComment { if !p.options.IgnoreInlineComment {
var i int var i int
if spaceBeforeInlineComment { if p.options.SpaceBeforeInlineComment {
i = strings.Index(line, " #") i = strings.Index(line, " #")
if i == -1 { if i == -1 {
i = strings.Index(line, " ;") i = strings.Index(line, " ;")
@ -260,65 +286,99 @@ func (p *parser) readValue(in []byte,
// Trim single and double quotes // Trim single and double quotes
if (hasSurroundedQuote(line, '\'') || if (hasSurroundedQuote(line, '\'') ||
hasSurroundedQuote(line, '"')) && !preserveSurroundedQuote { hasSurroundedQuote(line, '"')) && !p.options.PreserveSurroundedQuote {
line = line[1 : len(line)-1] line = line[1 : len(line)-1]
} else if len(valQuote) == 0 && unescapeValueCommentSymbols { } else if len(valQuote) == 0 && p.options.UnescapeValueCommentSymbols {
if strings.Contains(line, `\;`) { if strings.Contains(line, `\;`) {
line = strings.Replace(line, `\;`, ";", -1) line = strings.Replace(line, `\;`, ";", -1)
} }
if strings.Contains(line, `\#`) { if strings.Contains(line, `\#`) {
line = strings.Replace(line, `\#`, "#", -1) line = strings.Replace(line, `\#`, "#", -1)
} }
} else if allowPythonMultilines && lastChar == '\n' { } else if p.options.AllowPythonMultilineValues && lastChar == '\n' {
parserBufferPeekResult, _ := p.buf.Peek(parserBufferSize) return p.readPythonMultilines(line, bufferSize)
peekBuffer := bytes.NewBuffer(parserBufferPeekResult)
val := line
for {
peekData, peekErr := peekBuffer.ReadBytes('\n')
if peekErr != nil {
if peekErr == io.EOF {
return val, nil
}
return "", peekErr
}
peekMatches := pythonMultiline.FindStringSubmatch(string(peekData))
if len(peekMatches) != 3 {
return val, nil
}
// NOTE: Return if not a python-ini multi-line value.
currentIdentSize := len(peekMatches[1])
if currentIdentSize <= 0 {
return val, nil
}
// NOTE: Just advance the parser reader (buffer) in-sync with the peek buffer.
_, err := p.readUntil('\n')
if err != nil {
return "", err
}
val += fmt.Sprintf("\n%s", peekMatches[2])
}
} }
return line, nil return line, nil
} }
func (p *parser) readPythonMultilines(line string, bufferSize int) (string, error) {
parserBufferPeekResult, _ := p.buf.Peek(bufferSize)
peekBuffer := bytes.NewBuffer(parserBufferPeekResult)
indentSize := 0
for {
peekData, peekErr := peekBuffer.ReadBytes('\n')
if peekErr != nil {
if peekErr == io.EOF {
p.debug("readPythonMultilines: io.EOF, peekData: %q, line: %q", string(peekData), line)
return line, nil
}
p.debug("readPythonMultilines: failed to peek with error: %v", peekErr)
return "", peekErr
}
p.debug("readPythonMultilines: parsing %q", string(peekData))
peekMatches := pythonMultiline.FindStringSubmatch(string(peekData))
p.debug("readPythonMultilines: matched %d parts", len(peekMatches))
for n, v := range peekMatches {
p.debug(" %d: %q", n, v)
}
// Return if not a Python multiline value.
if len(peekMatches) != 3 {
p.debug("readPythonMultilines: end of value, got: %q", line)
return line, nil
}
// Determine indent size and line prefix.
currentIndentSize := len(peekMatches[1])
if indentSize < 1 {
indentSize = currentIndentSize
p.debug("readPythonMultilines: indent size is %d", indentSize)
}
// Make sure each line is indented at least as far as first line.
if currentIndentSize < indentSize {
p.debug("readPythonMultilines: end of value, current indent: %d, expected indent: %d, line: %q", currentIndentSize, indentSize, line)
return line, nil
}
// Advance the parser reader (buffer) in-sync with the peek buffer.
_, err := p.buf.Discard(len(peekData))
if err != nil {
p.debug("readPythonMultilines: failed to skip to the end, returning error")
return "", err
}
// Handle indented empty line.
line += "\n" + peekMatches[1][indentSize:] + peekMatches[2]
}
}
// parse parses data through an io.Reader. // parse parses data through an io.Reader.
func (f *File) parse(reader io.Reader) (err error) { func (f *File) parse(reader io.Reader) (err error) {
p := newParser(reader) p := newParser(reader, parserOptions{
IgnoreContinuation: f.options.IgnoreContinuation,
IgnoreInlineComment: f.options.IgnoreInlineComment,
AllowPythonMultilineValues: f.options.AllowPythonMultilineValues,
SpaceBeforeInlineComment: f.options.SpaceBeforeInlineComment,
UnescapeValueDoubleQuotes: f.options.UnescapeValueDoubleQuotes,
UnescapeValueCommentSymbols: f.options.UnescapeValueCommentSymbols,
PreserveSurroundedQuote: f.options.PreserveSurroundedQuote,
DebugFunc: f.options.DebugFunc,
ReaderBufferSize: f.options.ReaderBufferSize,
})
if err = p.BOM(); err != nil { if err = p.BOM(); err != nil {
return fmt.Errorf("BOM: %v", err) return fmt.Errorf("BOM: %v", err)
} }
// Ignore error because default section name is never empty string. // Ignore error because default section name is never empty string.
name := DEFAULT_SECTION name := DefaultSection
if f.options.Insensitive { if f.options.Insensitive || f.options.InsensitiveSections {
name = strings.ToLower(DEFAULT_SECTION) name = strings.ToLower(DefaultSection)
} }
section, _ := f.NewSection(name) section, _ := f.NewSection(name)
@ -333,8 +393,8 @@ func (f *File) parse(reader io.Reader) (err error) {
// the size of the parser buffer is found. // the size of the parser buffer is found.
// TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`. // TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`.
parserBufferSize := 0 parserBufferSize := 0
// NOTE: Peek 1kb at a time. // NOTE: Peek 4kb at a time.
currentPeekSize := 1024 currentPeekSize := minReaderBufferSize
if f.options.AllowPythonMultilineValues { if f.options.AllowPythonMultilineValues {
for { for {
@ -359,7 +419,10 @@ func (f *File) parse(reader io.Reader) (err error) {
if f.options.AllowNestedValues && if f.options.AllowNestedValues &&
isLastValueEmpty && len(line) > 0 { isLastValueEmpty && len(line) > 0 {
if line[0] == ' ' || line[0] == '\t' { if line[0] == ' ' || line[0] == '\t' {
lastRegularKey.addNestedValue(string(bytes.TrimSpace(line))) err = lastRegularKey.addNestedValue(string(bytes.TrimSpace(line)))
if err != nil {
return err
}
continue continue
} }
} }
@ -399,14 +462,14 @@ func (f *File) parse(reader io.Reader) (err error) {
section.Comment = strings.TrimSpace(p.comment.String()) section.Comment = strings.TrimSpace(p.comment.String())
// Reset aotu-counter and comments // Reset auto-counter and comments
p.comment.Reset() p.comment.Reset()
p.count = 1 p.count = 1
inUnparseableSection = false inUnparseableSection = false
for i := range f.options.UnparseableSections { for i := range f.options.UnparseableSections {
if f.options.UnparseableSections[i] == name || if f.options.UnparseableSections[i] == name ||
(f.options.Insensitive && strings.ToLower(f.options.UnparseableSections[i]) == strings.ToLower(name)) { ((f.options.Insensitive || f.options.InsensitiveSections) && strings.EqualFold(f.options.UnparseableSections[i], name)) {
inUnparseableSection = true inUnparseableSection = true
continue continue
} }
@ -426,15 +489,7 @@ func (f *File) parse(reader io.Reader) (err error) {
if IsErrDelimiterNotFound(err) { if IsErrDelimiterNotFound(err) {
switch { switch {
case f.options.AllowBooleanKeys: case f.options.AllowBooleanKeys:
kname, err := p.readValue(line, kname, err := p.readValue(line, parserBufferSize)
parserBufferSize,
f.options.IgnoreContinuation,
f.options.IgnoreInlineComment,
f.options.UnescapeValueDoubleQuotes,
f.options.UnescapeValueCommentSymbols,
f.options.AllowPythonMultilineValues,
f.options.SpaceBeforeInlineComment,
f.options.PreserveSurroundedQuote)
if err != nil { if err != nil {
return err return err
} }
@ -461,15 +516,7 @@ func (f *File) parse(reader io.Reader) (err error) {
p.count++ p.count++
} }
value, err := p.readValue(line[offset:], value, err := p.readValue(line[offset:], parserBufferSize)
parserBufferSize,
f.options.IgnoreContinuation,
f.options.IgnoreInlineComment,
f.options.UnescapeValueDoubleQuotes,
f.options.UnescapeValueCommentSymbols,
f.options.AllowPythonMultilineValues,
f.options.SpaceBeforeInlineComment,
f.options.PreserveSurroundedQuote)
if err != nil { if err != nil {
return err return err
} }

21
vendor/gopkg.in/ini.v1/section.go generated vendored
View File

@ -66,7 +66,7 @@ func (s *Section) SetBody(body string) {
func (s *Section) NewKey(name, val string) (*Key, error) { func (s *Section) NewKey(name, val string) (*Key, error) {
if len(name) == 0 { if len(name) == 0 {
return nil, errors.New("error creating new key: empty key name") return nil, errors.New("error creating new key: empty key name")
} else if s.f.options.Insensitive { } else if s.f.options.Insensitive || s.f.options.InsensitiveKeys {
name = strings.ToLower(name) name = strings.ToLower(name)
} }
@ -106,11 +106,10 @@ func (s *Section) NewBooleanKey(name string) (*Key, error) {
// GetKey returns key in section by given name. // GetKey returns key in section by given name.
func (s *Section) GetKey(name string) (*Key, error) { func (s *Section) GetKey(name string) (*Key, error) {
// FIXME: change to section level lock?
if s.f.BlockMode { if s.f.BlockMode {
s.f.lock.RLock() s.f.lock.RLock()
} }
if s.f.options.Insensitive { if s.f.options.Insensitive || s.f.options.InsensitiveKeys {
name = strings.ToLower(name) name = strings.ToLower(name)
} }
key := s.keys[name] key := s.keys[name]
@ -122,18 +121,17 @@ func (s *Section) GetKey(name string) (*Key, error) {
// Check if it is a child-section. // Check if it is a child-section.
sname := s.name sname := s.name
for { for {
if i := strings.LastIndex(sname, "."); i > -1 { if i := strings.LastIndex(sname, s.f.options.ChildSectionDelimiter); i > -1 {
sname = sname[:i] sname = sname[:i]
sec, err := s.f.GetSection(sname) sec, err := s.f.GetSection(sname)
if err != nil { if err != nil {
continue continue
} }
return sec.GetKey(name) return sec.GetKey(name)
} else { }
break break
} }
} return nil, fmt.Errorf("error when getting key of section %q: key %q not exists", s.name, name)
return nil, fmt.Errorf("error when getting key of section '%s': key '%s' not exists", s.name, name)
} }
return key, nil return key, nil
} }
@ -144,8 +142,7 @@ func (s *Section) HasKey(name string) bool {
return key != nil return key != nil
} }
// Haskey is a backwards-compatible name for HasKey. // Deprecated: Use "HasKey" instead.
// TODO: delete me in v2
func (s *Section) Haskey(name string) bool { func (s *Section) Haskey(name string) bool {
return s.HasKey(name) return s.HasKey(name)
} }
@ -191,7 +188,7 @@ func (s *Section) ParentKeys() []*Key {
var parentKeys []*Key var parentKeys []*Key
sname := s.name sname := s.name
for { for {
if i := strings.LastIndex(sname, "."); i > -1 { if i := strings.LastIndex(sname, s.f.options.ChildSectionDelimiter); i > -1 {
sname = sname[:i] sname = sname[:i]
sec, err := s.f.GetSection(sname) sec, err := s.f.GetSection(sname)
if err != nil { if err != nil {
@ -248,11 +245,11 @@ func (s *Section) DeleteKey(name string) {
// For example, "[parent.child1]" and "[parent.child12]" are child sections // For example, "[parent.child1]" and "[parent.child12]" are child sections
// of section "[parent]". // of section "[parent]".
func (s *Section) ChildSections() []*Section { func (s *Section) ChildSections() []*Section {
prefix := s.name + "." prefix := s.name + s.f.options.ChildSectionDelimiter
children := make([]*Section, 0, 3) children := make([]*Section, 0, 3)
for _, name := range s.f.sectionList { for _, name := range s.f.sectionList {
if strings.HasPrefix(name, prefix) { if strings.HasPrefix(name, prefix) {
children = append(children, s.f.sections[name]) children = append(children, s.f.sections[name]...)
} }
} }
return children return children

361
vendor/gopkg.in/ini.v1/struct.go generated vendored
View File

@ -29,8 +29,8 @@ type NameMapper func(string) string
// Built-in name getters. // Built-in name getters.
var ( var (
// AllCapsUnderscore converts to format ALL_CAPS_UNDERSCORE. // SnackCase converts to format SNACK_CASE.
AllCapsUnderscore NameMapper = func(raw string) string { SnackCase NameMapper = func(raw string) string {
newstr := make([]rune, 0, len(raw)) newstr := make([]rune, 0, len(raw))
for i, chr := range raw { for i, chr := range raw {
if isUpper := 'A' <= chr && chr <= 'Z'; isUpper { if isUpper := 'A' <= chr && chr <= 'Z'; isUpper {
@ -50,7 +50,7 @@ var (
if i > 0 { if i > 0 {
newstr = append(newstr, '_') newstr = append(newstr, '_')
} }
chr -= ('A' - 'a') chr -= 'A' - 'a'
} }
newstr = append(newstr, chr) newstr = append(newstr, chr)
} }
@ -108,6 +108,8 @@ func setSliceWithProperType(key *Key, field reflect.Value, delim string, allowSh
vals, err = key.parseUint64s(strs, true, false) vals, err = key.parseUint64s(strs, true, false)
case reflect.Float64: case reflect.Float64:
vals, err = key.parseFloat64s(strs, true, false) vals, err = key.parseFloat64s(strs, true, false)
case reflect.Bool:
vals, err = key.parseBools(strs, true, false)
case reflectTime: case reflectTime:
vals, err = key.parseTimesFormat(time.RFC3339, strs, true, false) vals, err = key.parseTimesFormat(time.RFC3339, strs, true, false)
default: default:
@ -132,6 +134,8 @@ func setSliceWithProperType(key *Key, field reflect.Value, delim string, allowSh
slice.Index(i).Set(reflect.ValueOf(vals.([]uint64)[i])) slice.Index(i).Set(reflect.ValueOf(vals.([]uint64)[i]))
case reflect.Float64: case reflect.Float64:
slice.Index(i).Set(reflect.ValueOf(vals.([]float64)[i])) slice.Index(i).Set(reflect.ValueOf(vals.([]float64)[i]))
case reflect.Bool:
slice.Index(i).Set(reflect.ValueOf(vals.([]bool)[i]))
case reflectTime: case reflectTime:
slice.Index(i).Set(reflect.ValueOf(vals.([]time.Time)[i])) slice.Index(i).Set(reflect.ValueOf(vals.([]time.Time)[i]))
} }
@ -149,25 +153,47 @@ func wrapStrictError(err error, isStrict bool) error {
// setWithProperType sets proper value to field based on its type, // setWithProperType sets proper value to field based on its type,
// but it does not return error for failing parsing, // but it does not return error for failing parsing,
// because we want to use default value that is already assigned to strcut. // because we want to use default value that is already assigned to struct.
func setWithProperType(t reflect.Type, key *Key, field reflect.Value, delim string, allowShadow, isStrict bool) error { func setWithProperType(t reflect.Type, key *Key, field reflect.Value, delim string, allowShadow, isStrict bool) error {
switch t.Kind() { vt := t
case reflect.String: isPtr := t.Kind() == reflect.Ptr
if len(key.String()) == 0 { if isPtr {
return nil vt = t.Elem()
} }
switch vt.Kind() {
case reflect.String:
stringVal := key.String()
if isPtr {
field.Set(reflect.ValueOf(&stringVal))
} else if len(stringVal) > 0 {
field.SetString(key.String()) field.SetString(key.String())
}
case reflect.Bool: case reflect.Bool:
boolVal, err := key.Bool() boolVal, err := key.Bool()
if err != nil { if err != nil {
return wrapStrictError(err, isStrict) return wrapStrictError(err, isStrict)
} }
if isPtr {
field.Set(reflect.ValueOf(&boolVal))
} else {
field.SetBool(boolVal) field.SetBool(boolVal)
}
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
// ParseDuration will not return err for `0`, so check the type name
if vt.Name() == "Duration" {
durationVal, err := key.Duration() durationVal, err := key.Duration()
// Skip zero value if err != nil {
if err == nil && int64(durationVal) > 0 { if intVal, err := key.Int64(); err == nil {
field.SetInt(intVal)
return nil
}
return wrapStrictError(err, isStrict)
}
if isPtr {
field.Set(reflect.ValueOf(&durationVal))
} else if int64(durationVal) > 0 {
field.Set(reflect.ValueOf(durationVal)) field.Set(reflect.ValueOf(durationVal))
}
return nil return nil
} }
@ -175,13 +201,23 @@ func setWithProperType(t reflect.Type, key *Key, field reflect.Value, delim stri
if err != nil { if err != nil {
return wrapStrictError(err, isStrict) return wrapStrictError(err, isStrict)
} }
if isPtr {
pv := reflect.New(t.Elem())
pv.Elem().SetInt(intVal)
field.Set(pv)
} else {
field.SetInt(intVal) field.SetInt(intVal)
}
// byte is an alias for uint8, so supporting uint8 breaks support for byte // byte is an alias for uint8, so supporting uint8 breaks support for byte
case reflect.Uint, reflect.Uint16, reflect.Uint32, reflect.Uint64: case reflect.Uint, reflect.Uint16, reflect.Uint32, reflect.Uint64:
durationVal, err := key.Duration() durationVal, err := key.Duration()
// Skip zero value // Skip zero value
if err == nil && uint64(durationVal) > 0 { if err == nil && uint64(durationVal) > 0 {
if isPtr {
field.Set(reflect.ValueOf(&durationVal))
} else {
field.Set(reflect.ValueOf(durationVal)) field.Set(reflect.ValueOf(durationVal))
}
return nil return nil
} }
@ -189,41 +225,59 @@ func setWithProperType(t reflect.Type, key *Key, field reflect.Value, delim stri
if err != nil { if err != nil {
return wrapStrictError(err, isStrict) return wrapStrictError(err, isStrict)
} }
if isPtr {
pv := reflect.New(t.Elem())
pv.Elem().SetUint(uintVal)
field.Set(pv)
} else {
field.SetUint(uintVal) field.SetUint(uintVal)
}
case reflect.Float32, reflect.Float64: case reflect.Float32, reflect.Float64:
floatVal, err := key.Float64() floatVal, err := key.Float64()
if err != nil { if err != nil {
return wrapStrictError(err, isStrict) return wrapStrictError(err, isStrict)
} }
if isPtr {
pv := reflect.New(t.Elem())
pv.Elem().SetFloat(floatVal)
field.Set(pv)
} else {
field.SetFloat(floatVal) field.SetFloat(floatVal)
}
case reflectTime: case reflectTime:
timeVal, err := key.Time() timeVal, err := key.Time()
if err != nil { if err != nil {
return wrapStrictError(err, isStrict) return wrapStrictError(err, isStrict)
} }
if isPtr {
field.Set(reflect.ValueOf(&timeVal))
} else {
field.Set(reflect.ValueOf(timeVal)) field.Set(reflect.ValueOf(timeVal))
}
case reflect.Slice: case reflect.Slice:
return setSliceWithProperType(key, field, delim, allowShadow, isStrict) return setSliceWithProperType(key, field, delim, allowShadow, isStrict)
default: default:
return fmt.Errorf("unsupported type '%s'", t) return fmt.Errorf("unsupported type %q", t)
} }
return nil return nil
} }
func parseTagOptions(tag string) (rawName string, omitEmpty bool, allowShadow bool) { func parseTagOptions(tag string) (rawName string, omitEmpty bool, allowShadow bool, allowNonUnique bool, extends bool) {
opts := strings.SplitN(tag, ",", 3) opts := strings.SplitN(tag, ",", 5)
rawName = opts[0] rawName = opts[0]
if len(opts) > 1 { for _, opt := range opts[1:] {
omitEmpty = opts[1] == "omitempty" omitEmpty = omitEmpty || (opt == "omitempty")
allowShadow = allowShadow || (opt == "allowshadow")
allowNonUnique = allowNonUnique || (opt == "nonunique")
extends = extends || (opt == "extends")
} }
if len(opts) > 2 { return rawName, omitEmpty, allowShadow, allowNonUnique, extends
allowShadow = opts[2] == "allowshadow"
}
return rawName, omitEmpty, allowShadow
} }
func (s *Section) mapTo(val reflect.Value, isStrict bool) error { // mapToField maps the given value to the matching field of the given section.
// The sectionIndex is the index (if non unique sections are enabled) to which the value should be added.
func (s *Section) mapToField(val reflect.Value, isStrict bool, sectionIndex int, sectionName string) error {
if val.Kind() == reflect.Ptr { if val.Kind() == reflect.Ptr {
val = val.Elem() val = val.Elem()
} }
@ -238,64 +292,124 @@ func (s *Section) mapTo(val reflect.Value, isStrict bool) error {
continue continue
} }
rawName, _, allowShadow := parseTagOptions(tag) rawName, _, allowShadow, allowNonUnique, extends := parseTagOptions(tag)
fieldName := s.parseFieldName(tpField.Name, rawName) fieldName := s.parseFieldName(tpField.Name, rawName)
if len(fieldName) == 0 || !field.CanSet() { if len(fieldName) == 0 || !field.CanSet() {
continue continue
} }
isAnonymous := tpField.Type.Kind() == reflect.Ptr && tpField.Anonymous
isStruct := tpField.Type.Kind() == reflect.Struct isStruct := tpField.Type.Kind() == reflect.Struct
if isAnonymous { isStructPtr := tpField.Type.Kind() == reflect.Ptr && tpField.Type.Elem().Kind() == reflect.Struct
isAnonymousPtr := tpField.Type.Kind() == reflect.Ptr && tpField.Anonymous
if isAnonymousPtr {
field.Set(reflect.New(tpField.Type.Elem())) field.Set(reflect.New(tpField.Type.Elem()))
} }
if isAnonymous || isStruct { if extends && (isAnonymousPtr || (isStruct && tpField.Anonymous)) {
if sec, err := s.f.GetSection(fieldName); err == nil { if isStructPtr && field.IsNil() {
if err = sec.mapTo(field, isStrict); err != nil { field.Set(reflect.New(tpField.Type.Elem()))
return fmt.Errorf("error mapping field(%s): %v", fieldName, err) }
fieldSection := s
if rawName != "" {
sectionName = s.name + s.f.options.ChildSectionDelimiter + rawName
if secs, err := s.f.SectionsByName(sectionName); err == nil && sectionIndex < len(secs) {
fieldSection = secs[sectionIndex]
}
}
if err := fieldSection.mapToField(field, isStrict, sectionIndex, sectionName); err != nil {
return fmt.Errorf("map to field %q: %v", fieldName, err)
}
} else if isAnonymousPtr || isStruct || isStructPtr {
if secs, err := s.f.SectionsByName(fieldName); err == nil {
if len(secs) <= sectionIndex {
return fmt.Errorf("there are not enough sections (%d <= %d) for the field %q", len(secs), sectionIndex, fieldName)
}
// Only set the field to non-nil struct value if we have a section for it.
// Otherwise, we end up with a non-nil struct ptr even though there is no data.
if isStructPtr && field.IsNil() {
field.Set(reflect.New(tpField.Type.Elem()))
}
if err = secs[sectionIndex].mapToField(field, isStrict, sectionIndex, fieldName); err != nil {
return fmt.Errorf("map to field %q: %v", fieldName, err)
} }
continue continue
} }
} }
// Map non-unique sections
if allowNonUnique && tpField.Type.Kind() == reflect.Slice {
newField, err := s.mapToSlice(fieldName, field, isStrict)
if err != nil {
return fmt.Errorf("map to slice %q: %v", fieldName, err)
}
field.Set(newField)
continue
}
if key, err := s.GetKey(fieldName); err == nil { if key, err := s.GetKey(fieldName); err == nil {
delim := parseDelim(tpField.Tag.Get("delim")) delim := parseDelim(tpField.Tag.Get("delim"))
if err = setWithProperType(tpField.Type, key, field, delim, allowShadow, isStrict); err != nil { if err = setWithProperType(tpField.Type, key, field, delim, allowShadow, isStrict); err != nil {
return fmt.Errorf("error mapping field(%s): %v", fieldName, err) return fmt.Errorf("set field %q: %v", fieldName, err)
} }
} }
} }
return nil return nil
} }
// MapTo maps section to given struct. // mapToSlice maps all sections with the same name and returns the new value.
func (s *Section) MapTo(v interface{}) error { // The type of the Value must be a slice.
typ := reflect.TypeOf(v) func (s *Section) mapToSlice(secName string, val reflect.Value, isStrict bool) (reflect.Value, error) {
val := reflect.ValueOf(v) secs, err := s.f.SectionsByName(secName)
if typ.Kind() == reflect.Ptr { if err != nil {
typ = typ.Elem() return reflect.Value{}, err
val = val.Elem()
} else {
return errors.New("cannot map to non-pointer struct")
} }
return s.mapTo(val, false) typ := val.Type().Elem()
for i, sec := range secs {
elem := reflect.New(typ)
if err = sec.mapToField(elem, isStrict, i, sec.name); err != nil {
return reflect.Value{}, fmt.Errorf("map to field from section %q: %v", secName, err)
}
val = reflect.Append(val, elem.Elem())
}
return val, nil
} }
// MapTo maps section to given struct in strict mode, // mapTo maps a section to object v.
// which returns all possible error including value parsing error. func (s *Section) mapTo(v interface{}, isStrict bool) error {
func (s *Section) StrictMapTo(v interface{}) error {
typ := reflect.TypeOf(v) typ := reflect.TypeOf(v)
val := reflect.ValueOf(v) val := reflect.ValueOf(v)
if typ.Kind() == reflect.Ptr { if typ.Kind() == reflect.Ptr {
typ = typ.Elem() typ = typ.Elem()
val = val.Elem() val = val.Elem()
} else { } else {
return errors.New("cannot map to non-pointer struct") return errors.New("not a pointer to a struct")
} }
return s.mapTo(val, true) if typ.Kind() == reflect.Slice {
newField, err := s.mapToSlice(s.name, val, isStrict)
if err != nil {
return err
}
val.Set(newField)
return nil
}
return s.mapToField(val, isStrict, 0, s.name)
}
// MapTo maps section to given struct.
func (s *Section) MapTo(v interface{}) error {
return s.mapTo(v, false)
}
// StrictMapTo maps section to given struct in strict mode,
// which returns all possible error including value parsing error.
func (s *Section) StrictMapTo(v interface{}) error {
return s.mapTo(v, true)
} }
// MapTo maps file to given struct. // MapTo maps file to given struct.
@ -303,13 +417,13 @@ func (f *File) MapTo(v interface{}) error {
return f.Section("").MapTo(v) return f.Section("").MapTo(v)
} }
// MapTo maps file to given struct in strict mode, // StrictMapTo maps file to given struct in strict mode,
// which returns all possible error including value parsing error. // which returns all possible error including value parsing error.
func (f *File) StrictMapTo(v interface{}) error { func (f *File) StrictMapTo(v interface{}) error {
return f.Section("").StrictMapTo(v) return f.Section("").StrictMapTo(v)
} }
// MapTo maps data sources to given struct with name mapper. // MapToWithMapper maps data sources to given struct with name mapper.
func MapToWithMapper(v interface{}, mapper NameMapper, source interface{}, others ...interface{}) error { func MapToWithMapper(v interface{}, mapper NameMapper, source interface{}, others ...interface{}) error {
cfg, err := Load(source, others...) cfg, err := Load(source, others...)
if err != nil { if err != nil {
@ -342,14 +456,45 @@ func StrictMapTo(v, source interface{}, others ...interface{}) error {
} }
// reflectSliceWithProperType does the opposite thing as setSliceWithProperType. // reflectSliceWithProperType does the opposite thing as setSliceWithProperType.
func reflectSliceWithProperType(key *Key, field reflect.Value, delim string) error { func reflectSliceWithProperType(key *Key, field reflect.Value, delim string, allowShadow bool) error {
slice := field.Slice(0, field.Len()) slice := field.Slice(0, field.Len())
if field.Len() == 0 { if field.Len() == 0 {
return nil return nil
} }
sliceOf := field.Type().Elem().Kind()
if allowShadow {
var keyWithShadows *Key
for i := 0; i < field.Len(); i++ {
var val string
switch sliceOf {
case reflect.String:
val = slice.Index(i).String()
case reflect.Int, reflect.Int64:
val = fmt.Sprint(slice.Index(i).Int())
case reflect.Uint, reflect.Uint64:
val = fmt.Sprint(slice.Index(i).Uint())
case reflect.Float64:
val = fmt.Sprint(slice.Index(i).Float())
case reflect.Bool:
val = fmt.Sprint(slice.Index(i).Bool())
case reflectTime:
val = slice.Index(i).Interface().(time.Time).Format(time.RFC3339)
default:
return fmt.Errorf("unsupported type '[]%s'", sliceOf)
}
if i == 0 {
keyWithShadows = newKey(key.s, key.name, val)
} else {
_ = keyWithShadows.AddShadow(val)
}
}
*key = *keyWithShadows
return nil
}
var buf bytes.Buffer var buf bytes.Buffer
sliceOf := field.Type().Elem().Kind()
for i := 0; i < field.Len(); i++ { for i := 0; i < field.Len(); i++ {
switch sliceOf { switch sliceOf {
case reflect.String: case reflect.String:
@ -360,6 +505,8 @@ func reflectSliceWithProperType(key *Key, field reflect.Value, delim string) err
buf.WriteString(fmt.Sprint(slice.Index(i).Uint())) buf.WriteString(fmt.Sprint(slice.Index(i).Uint()))
case reflect.Float64: case reflect.Float64:
buf.WriteString(fmt.Sprint(slice.Index(i).Float())) buf.WriteString(fmt.Sprint(slice.Index(i).Float()))
case reflect.Bool:
buf.WriteString(fmt.Sprint(slice.Index(i).Bool()))
case reflectTime: case reflectTime:
buf.WriteString(slice.Index(i).Interface().(time.Time).Format(time.RFC3339)) buf.WriteString(slice.Index(i).Interface().(time.Time).Format(time.RFC3339))
default: default:
@ -367,12 +514,12 @@ func reflectSliceWithProperType(key *Key, field reflect.Value, delim string) err
} }
buf.WriteString(delim) buf.WriteString(delim)
} }
key.SetValue(buf.String()[:buf.Len()-1]) key.SetValue(buf.String()[:buf.Len()-len(delim)])
return nil return nil
} }
// reflectWithProperType does the opposite thing as setWithProperType. // reflectWithProperType does the opposite thing as setWithProperType.
func reflectWithProperType(t reflect.Type, key *Key, field reflect.Value, delim string) error { func reflectWithProperType(t reflect.Type, key *Key, field reflect.Value, delim string, allowShadow bool) error {
switch t.Kind() { switch t.Kind() {
case reflect.String: case reflect.String:
key.SetValue(field.String()) key.SetValue(field.String())
@ -387,9 +534,13 @@ func reflectWithProperType(t reflect.Type, key *Key, field reflect.Value, delim
case reflectTime: case reflectTime:
key.SetValue(fmt.Sprint(field.Interface().(time.Time).Format(time.RFC3339))) key.SetValue(fmt.Sprint(field.Interface().(time.Time).Format(time.RFC3339)))
case reflect.Slice: case reflect.Slice:
return reflectSliceWithProperType(key, field, delim) return reflectSliceWithProperType(key, field, delim, allowShadow)
case reflect.Ptr:
if !field.IsNil() {
return reflectWithProperType(t.Elem(), key, field.Elem(), delim, allowShadow)
}
default: default:
return fmt.Errorf("unsupported type '%s'", t) return fmt.Errorf("unsupported type %q", t)
} }
return nil return nil
} }
@ -417,6 +568,11 @@ func isEmptyValue(v reflect.Value) bool {
return false return false
} }
// StructReflector is the interface implemented by struct types that can extract themselves into INI objects.
type StructReflector interface {
ReflectINIStruct(*File) error
}
func (s *Section) reflectFrom(val reflect.Value) error { func (s *Section) reflectFrom(val reflect.Value) error {
if val.Kind() == reflect.Ptr { if val.Kind() == reflect.Ptr {
val = val.Elem() val = val.Elem()
@ -424,6 +580,10 @@ func (s *Section) reflectFrom(val reflect.Value) error {
typ := val.Type() typ := val.Type()
for i := 0; i < typ.NumField(); i++ { for i := 0; i < typ.NumField(); i++ {
if !val.Field(i).CanInterface() {
continue
}
field := val.Field(i) field := val.Field(i)
tpField := typ.Field(i) tpField := typ.Field(i)
@ -432,17 +592,28 @@ func (s *Section) reflectFrom(val reflect.Value) error {
continue continue
} }
opts := strings.SplitN(tag, ",", 2) rawName, omitEmpty, allowShadow, allowNonUnique, extends := parseTagOptions(tag)
if len(opts) == 2 && opts[1] == "omitempty" && isEmptyValue(field) { if omitEmpty && isEmptyValue(field) {
continue continue
} }
fieldName := s.parseFieldName(tpField.Name, opts[0]) if r, ok := field.Interface().(StructReflector); ok {
return r.ReflectINIStruct(s.f)
}
fieldName := s.parseFieldName(tpField.Name, rawName)
if len(fieldName) == 0 || !field.CanSet() { if len(fieldName) == 0 || !field.CanSet() {
continue continue
} }
if (tpField.Type.Kind() == reflect.Ptr && tpField.Anonymous) || if extends && tpField.Anonymous && (tpField.Type.Kind() == reflect.Ptr || tpField.Type.Kind() == reflect.Struct) {
if err := s.reflectFrom(field); err != nil {
return fmt.Errorf("reflect from field %q: %v", fieldName, err)
}
continue
}
if (tpField.Type.Kind() == reflect.Ptr && tpField.Type.Elem().Kind() == reflect.Struct) ||
(tpField.Type.Kind() == reflect.Struct && tpField.Type.Name() != "Time") { (tpField.Type.Kind() == reflect.Struct && tpField.Type.Name() != "Time") {
// Note: The only error here is section doesn't exist. // Note: The only error here is section doesn't exist.
sec, err := s.f.GetSection(fieldName) sec, err := s.f.GetSection(fieldName)
@ -457,12 +628,41 @@ func (s *Section) reflectFrom(val reflect.Value) error {
} }
if err = sec.reflectFrom(field); err != nil { if err = sec.reflectFrom(field); err != nil {
return fmt.Errorf("error reflecting field (%s): %v", fieldName, err) return fmt.Errorf("reflect from field %q: %v", fieldName, err)
} }
continue continue
} }
// Note: Same reason as secion. if allowNonUnique && tpField.Type.Kind() == reflect.Slice {
slice := field.Slice(0, field.Len())
if field.Len() == 0 {
return nil
}
sliceOf := field.Type().Elem().Kind()
for i := 0; i < field.Len(); i++ {
if sliceOf != reflect.Struct && sliceOf != reflect.Ptr {
return fmt.Errorf("field %q is not a slice of pointer or struct", fieldName)
}
sec, err := s.f.NewSection(fieldName)
if err != nil {
return err
}
// Add comment from comment tag
if len(sec.Comment) == 0 {
sec.Comment = tpField.Tag.Get("comment")
}
if err := sec.reflectFrom(slice.Index(i)); err != nil {
return fmt.Errorf("reflect from field %q: %v", fieldName, err)
}
}
continue
}
// Note: Same reason as section.
key, err := s.GetKey(fieldName) key, err := s.GetKey(fieldName)
if err != nil { if err != nil {
key, _ = s.NewKey(fieldName, "") key, _ = s.NewKey(fieldName, "")
@ -473,23 +673,58 @@ func (s *Section) reflectFrom(val reflect.Value) error {
key.Comment = tpField.Tag.Get("comment") key.Comment = tpField.Tag.Get("comment")
} }
if err = reflectWithProperType(tpField.Type, key, field, parseDelim(tpField.Tag.Get("delim"))); err != nil { delim := parseDelim(tpField.Tag.Get("delim"))
return fmt.Errorf("error reflecting field (%s): %v", fieldName, err) if err = reflectWithProperType(tpField.Type, key, field, delim, allowShadow); err != nil {
return fmt.Errorf("reflect field %q: %v", fieldName, err)
} }
} }
return nil return nil
} }
// ReflectFrom reflects secion from given struct. // ReflectFrom reflects section from given struct. It overwrites existing ones.
func (s *Section) ReflectFrom(v interface{}) error { func (s *Section) ReflectFrom(v interface{}) error {
typ := reflect.TypeOf(v) typ := reflect.TypeOf(v)
val := reflect.ValueOf(v) val := reflect.ValueOf(v)
if s.name != DefaultSection && s.f.options.AllowNonUniqueSections &&
(typ.Kind() == reflect.Slice || typ.Kind() == reflect.Ptr) {
// Clear sections to make sure none exists before adding the new ones
s.f.DeleteSection(s.name)
if typ.Kind() == reflect.Ptr {
sec, err := s.f.NewSection(s.name)
if err != nil {
return err
}
return sec.reflectFrom(val.Elem())
}
slice := val.Slice(0, val.Len())
sliceOf := val.Type().Elem().Kind()
if sliceOf != reflect.Ptr {
return fmt.Errorf("not a slice of pointers")
}
for i := 0; i < slice.Len(); i++ {
sec, err := s.f.NewSection(s.name)
if err != nil {
return err
}
err = sec.reflectFrom(slice.Index(i))
if err != nil {
return fmt.Errorf("reflect from %dth field: %v", i, err)
}
}
return nil
}
if typ.Kind() == reflect.Ptr { if typ.Kind() == reflect.Ptr {
typ = typ.Elem()
val = val.Elem() val = val.Elem()
} else { } else {
return errors.New("cannot reflect from non-pointer struct") return errors.New("not a pointer to a struct")
} }
return s.reflectFrom(val) return s.reflectFrom(val)
@ -500,7 +735,7 @@ func (f *File) ReflectFrom(v interface{}) error {
return f.Section("").ReflectFrom(v) return f.Section("").ReflectFrom(v)
} }
// ReflectFrom reflects data sources from given struct with name mapper. // ReflectFromWithMapper reflects data sources from given struct with name mapper.
func ReflectFromWithMapper(cfg *File, v interface{}, mapper NameMapper) error { func ReflectFromWithMapper(cfg *File, v interface{}, mapper NameMapper) error {
cfg.NameMapper = mapper cfg.NameMapper = mapper
return cfg.ReflectFrom(v) return cfg.ReflectFrom(v)

16
vendor/modules.txt vendored
View File

@ -1,16 +1,18 @@
# github.com/antchfx/xmlquery v1.0.0 # github.com/antchfx/xmlquery v1.3.3
github.com/antchfx/xmlquery github.com/antchfx/xmlquery
# github.com/antchfx/xpath v1.0.0 # github.com/antchfx/xpath v1.1.11
github.com/antchfx/xpath github.com/antchfx/xpath
# github.com/influxdata/influxdb1-client v0.0.0-20190402204710-8ff2fc3824fc # github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e
github.com/golang/groupcache/lru
# github.com/influxdata/influxdb1-client v0.0.0-20200827194710-b269163b24ab
github.com/influxdata/influxdb1-client/models github.com/influxdata/influxdb1-client/models
github.com/influxdata/influxdb1-client/pkg/escape github.com/influxdata/influxdb1-client/pkg/escape
github.com/influxdata/influxdb1-client/v2 github.com/influxdata/influxdb1-client/v2
# golang.org/x/net v0.0.0-20190607181551-461777fb6f67 # golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb
golang.org/x/net/html golang.org/x/net/html
golang.org/x/net/html/atom golang.org/x/net/html/atom
golang.org/x/net/html/charset golang.org/x/net/html/charset
# golang.org/x/text v0.3.0 # golang.org/x/text v0.3.4
golang.org/x/text/encoding golang.org/x/text/encoding
golang.org/x/text/encoding/charmap golang.org/x/text/encoding/charmap
golang.org/x/text/encoding/htmlindex golang.org/x/text/encoding/htmlindex
@ -21,10 +23,12 @@ golang.org/x/text/encoding/korean
golang.org/x/text/encoding/simplifiedchinese golang.org/x/text/encoding/simplifiedchinese
golang.org/x/text/encoding/traditionalchinese golang.org/x/text/encoding/traditionalchinese
golang.org/x/text/encoding/unicode golang.org/x/text/encoding/unicode
golang.org/x/text/internal/language
golang.org/x/text/internal/language/compact
golang.org/x/text/internal/tag golang.org/x/text/internal/tag
golang.org/x/text/internal/utf8internal golang.org/x/text/internal/utf8internal
golang.org/x/text/language golang.org/x/text/language
golang.org/x/text/runes golang.org/x/text/runes
golang.org/x/text/transform golang.org/x/text/transform
# gopkg.in/ini.v1 v1.42.0 # gopkg.in/ini.v1 v1.62.0
gopkg.in/ini.v1 gopkg.in/ini.v1