Adding upstream version 1.34.4.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
e393c3af3f
commit
4978089aab
4963 changed files with 677545 additions and 0 deletions
93
tools/license_checker/README.md
Normal file
93
tools/license_checker/README.md
Normal file
|
@ -0,0 +1,93 @@
|
|||
# Dependency license verification tool
|
||||
|
||||
This tool allows the verification of information in
|
||||
`docs/LICENSE_OF_DEPENDENCIES.md` against the linked license
|
||||
information. To do so, the license reported by the user is
|
||||
checked against the license classification of the downloaded
|
||||
license file for each dependency.
|
||||
|
||||
## Building
|
||||
|
||||
```shell
|
||||
make build_tools
|
||||
```
|
||||
|
||||
## Running
|
||||
|
||||
The simplest way to run the verification tool is to execute
|
||||
|
||||
```shell
|
||||
telegraf$ ./tools/license_checker/license_checker
|
||||
```
|
||||
|
||||
using the current directory as telegraf's root directory and verifies
|
||||
all licenses. Only errors will be reported by default.
|
||||
|
||||
There are multiple options you can use to customize the verification.
|
||||
Take a look at
|
||||
|
||||
```shell
|
||||
telegraf$ ./tools/license_checker/license_checker --help
|
||||
```
|
||||
|
||||
to get an overview.
|
||||
|
||||
As the verification tool downloads each license file linked in the
|
||||
dependency license document, you should be careful on not exceeding
|
||||
the access limits of e.g. GitHub by running the tool too frequent.
|
||||
|
||||
Some packages change the license for newer versions. As we always
|
||||
link to the latest license text the classification might not match
|
||||
the actual license of our used dependency. Furthermore, some license
|
||||
text might be wrongly classified, or not classified at all. In these
|
||||
cases, you can use a _whitelist_ to explicitly state the license
|
||||
SPDX classifier for those packages.
|
||||
See the [whitelist section](#whitelist) for more details.
|
||||
|
||||
The recommended use in telegraf is to run
|
||||
|
||||
```shell
|
||||
telegraf$ ./tools/license_checker/license_checker \
|
||||
-whitelist ./tools/license_checker/data/whitelist
|
||||
```
|
||||
|
||||
using the code-versioned whitelist. This command will report all
|
||||
non-matching entries with an `ERR:` prefix.
|
||||
|
||||
## Whitelist
|
||||
|
||||
Whitelist entries contain explicit license information for
|
||||
a set of packages to use instead of classification. Each entry
|
||||
in the whitelist is a line of the form
|
||||
|
||||
```text
|
||||
[comparison operator]<package name>[@vX.Y.Z] <license SPDX>
|
||||
```
|
||||
|
||||
where the _comparison operator_ is one of `>`, `>=`, `=`, `<=` or `<`
|
||||
and the _license SPDX_ is a [SPDX license identifier][spdx].
|
||||
In case no package version is specified, the entry matches all versions
|
||||
of the library. Furthermore, the comparison operator can be omitted
|
||||
which is equivalent to an exact match (`=`).
|
||||
|
||||
The entries are processed in order until the first match is found.
|
||||
|
||||
Here is an example of a whitelist. Assume that you have library
|
||||
`github.com/foo/bar` which started out with the `MIT` license
|
||||
until version 1.0.0 where it changed to `EFL-1.0` until it again
|
||||
changed to `EFL-2.0` starting __after__ version 2.3.0. In this case
|
||||
the whitelist should look like this
|
||||
|
||||
```text
|
||||
<github.com/foo/bar@v1.0.0 MIT
|
||||
<=github.com/foo/bar@v2.3.0 EFL-1.0
|
||||
github.com/foo/bar EFL-2.0
|
||||
```
|
||||
|
||||
All versions below 1.0.0 are matched by the first line and are thus
|
||||
classified as `MIT`. The second line matches everything that is
|
||||
above 1.0.0 (thus not matched by the first line) until (and including)
|
||||
2.3.0. The last line with catch everything that was passing the first
|
||||
two lines i.e. everything after 2.3.0.
|
||||
|
||||
[spdx]: https://spdx.org/licenses/
|
15
tools/license_checker/data/spdx_mapping.json
Normal file
15
tools/license_checker/data/spdx_mapping.json
Normal file
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
"Apache License 2.0": "Apache-2.0",
|
||||
"BSD 2-Clause with views sentence": "BSD-2-Clause-Views",
|
||||
"BSD 2-Clause \"Simplified\" License": "BSD-2-Clause",
|
||||
"BSD 3-Clause \"New\" or \"Revised\" License": "BSD-3-Clause",
|
||||
"BSD 3-Clause Clear License": "BSD-3-Clause",
|
||||
"BSD 3-Clause License": "BSD-3-Clause",
|
||||
"Eclipse Public License - v 1.0": "EPL-1.0",
|
||||
"Eclipse Public License - v 2.0": "EPL-2.0",
|
||||
"ISC License": "ISC",
|
||||
"MIT License": "MIT",
|
||||
"Mozilla Public License 2.0": "MPL-2.0",
|
||||
"The Unlicense": "Unlicense",
|
||||
"zlib License": "Zlib"
|
||||
}
|
2
tools/license_checker/data/whitelist
Normal file
2
tools/license_checker/data/whitelist
Normal file
|
@ -0,0 +1,2 @@
|
|||
<github.com/couchbase/goutils@v0.1.2 Apache-2.0
|
||||
<=github.com/segmentio/asm@v1.2.0 MIT
|
245
tools/license_checker/main.go
Normal file
245
tools/license_checker/main.go
Normal file
|
@ -0,0 +1,245 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/yuin/goldmark"
|
||||
"github.com/yuin/goldmark/ast"
|
||||
"github.com/yuin/goldmark/text"
|
||||
"golang.org/x/mod/modfile"
|
||||
)
|
||||
|
||||
//go:embed data/spdx_mapping.json
|
||||
var spdxMappingFile []byte
|
||||
|
||||
var debug bool
|
||||
var nameToSPDX map[string]string
|
||||
|
||||
func debugf(format string, v ...any) {
|
||||
if !debug {
|
||||
return
|
||||
}
|
||||
log.Printf("DEBUG: "+format, v...)
|
||||
}
|
||||
|
||||
func main() {
|
||||
var help, verbose bool
|
||||
var threshold float64
|
||||
var whitelistFn, userpkg string
|
||||
|
||||
flag.BoolVar(&debug, "debug", false, "output debugging information")
|
||||
flag.BoolVar(&help, "help", false, "output this help text")
|
||||
flag.BoolVar(&verbose, "verbose", false, "output verbose information instead of just errors")
|
||||
flag.Float64Var(&threshold, "threshold", 0.8, "threshold for license classification")
|
||||
flag.StringVar(&whitelistFn, "whitelist", "", "use the given white-list file for comparison")
|
||||
flag.StringVar(&userpkg, "package", "", "only test the given package (all by default)")
|
||||
flag.Parse()
|
||||
|
||||
if help || flag.NArg() > 1 {
|
||||
fmt.Fprintf(flag.CommandLine.Output(), "Usage of %s [options] [telegraf root dir]\n", os.Args[0])
|
||||
fmt.Fprintf(flag.CommandLine.Output(), "Options:\n")
|
||||
flag.PrintDefaults()
|
||||
fmt.Fprintf(flag.CommandLine.Output(), "\n")
|
||||
fmt.Fprintf(flag.CommandLine.Output(), "Arguments:\n")
|
||||
fmt.Fprintf(flag.CommandLine.Output(), " telegraf root dir (optional)\n")
|
||||
fmt.Fprintf(flag.CommandLine.Output(), " path to the root directory of telegraf (default: .)\n")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Setup full-name to license SPDX identifier mapping
|
||||
if err := json.Unmarshal(spdxMappingFile, &nameToSPDX); err != nil {
|
||||
log.Fatalf("Unmarshalling license name to SPDX mapping failed: %v", err)
|
||||
}
|
||||
|
||||
// Get required files
|
||||
path := "."
|
||||
if flag.NArg() == 1 {
|
||||
path = flag.Arg(0)
|
||||
}
|
||||
|
||||
moduleFilename := filepath.Join(path, "go.mod")
|
||||
licenseFilename := filepath.Join(path, "docs", "LICENSE_OF_DEPENDENCIES.md")
|
||||
|
||||
var override whitelist
|
||||
if whitelistFn != "" {
|
||||
log.Printf("Reading whitelist file %q...", whitelistFn)
|
||||
if err := override.Parse(whitelistFn); err != nil {
|
||||
log.Fatalf("Reading whitelist failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
log.Printf("Reading module file %q...", moduleFilename)
|
||||
modbuf, err := os.ReadFile(moduleFilename)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
depModules, err := modfile.Parse(moduleFilename, modbuf, nil)
|
||||
if err != nil {
|
||||
log.Fatalf("Parsing modules failed: %f", err)
|
||||
}
|
||||
debugf("found %d required packages", len(depModules.Require))
|
||||
|
||||
dependencies := make(map[string]string)
|
||||
for _, d := range depModules.Require {
|
||||
dependencies[d.Mod.Path] = d.Mod.Version
|
||||
}
|
||||
|
||||
log.Printf("Reading license file %q...", licenseFilename)
|
||||
licensesMarkdown, err := os.ReadFile(licenseFilename)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// Parse the markdown document
|
||||
parser := goldmark.DefaultParser()
|
||||
root := parser.Parse(text.NewReader(licensesMarkdown))
|
||||
|
||||
// Prepare a line parser
|
||||
lineParser := goldmark.DefaultParser()
|
||||
|
||||
// Collect the licenses
|
||||
// For each list we search for the items and parse them.
|
||||
// Expect a pattern of <package name> <link>.
|
||||
ignored := 0
|
||||
var packageInfos []packageInfo
|
||||
for node := root.FirstChild(); node != nil; node = node.NextSibling() {
|
||||
listNode, ok := node.(*ast.List)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
for inode := listNode.FirstChild(); inode != nil; inode = inode.NextSibling() {
|
||||
itemNode, ok := inode.(*ast.ListItem)
|
||||
if !ok || itemNode.ChildCount() != 1 {
|
||||
continue
|
||||
}
|
||||
textNode, ok := itemNode.FirstChild().(*ast.TextBlock)
|
||||
if !ok || textNode.Lines().Len() != 1 {
|
||||
continue
|
||||
}
|
||||
|
||||
lineSegment := textNode.Lines().At(0)
|
||||
line := lineSegment.Value(licensesMarkdown)
|
||||
lineRoot := lineParser.Parse(text.NewReader(line))
|
||||
if lineRoot.ChildCount() != 1 || lineRoot.FirstChild().ChildCount() < 2 {
|
||||
log.Printf("WARN: Ignoring item %q due to wrong count (%d/%d)", string(line), lineRoot.ChildCount(), lineRoot.FirstChild().ChildCount())
|
||||
ignored++
|
||||
continue
|
||||
}
|
||||
|
||||
var name, license, link string
|
||||
for lineElementNode := lineRoot.FirstChild().FirstChild(); lineElementNode != nil; lineElementNode = lineElementNode.NextSibling() {
|
||||
switch v := lineElementNode.(type) {
|
||||
case *ast.Text:
|
||||
name += string(v.Value(line))
|
||||
case *ast.Link:
|
||||
license = string(v.FirstChild().(*ast.Text).Value(line))
|
||||
link = string(v.Destination)
|
||||
default:
|
||||
debugf("ignoring unknown element %T (%v)", v, v)
|
||||
}
|
||||
}
|
||||
name = strings.TrimSpace(name)
|
||||
|
||||
info := packageInfo{
|
||||
name: name,
|
||||
version: dependencies[name],
|
||||
url: strings.TrimSpace(link),
|
||||
license: strings.TrimSpace(license),
|
||||
}
|
||||
info.ToSPDX()
|
||||
if info.name == "" {
|
||||
log.Printf("WARN: Ignoring item %q due to empty package name", string(line))
|
||||
ignored++
|
||||
continue
|
||||
}
|
||||
if info.url == "" {
|
||||
log.Printf("WARN: Ignoring item %q due to empty url name", string(line))
|
||||
ignored++
|
||||
continue
|
||||
}
|
||||
if info.license == "" {
|
||||
log.Printf("WARN: Ignoring item %q due to empty license name", string(line))
|
||||
ignored++
|
||||
continue
|
||||
}
|
||||
debugf("adding %q with license %q (%s) and version %q at %q...", info.name, info.license, info.spdx, info.version, info.url)
|
||||
packageInfos = append(packageInfos, info)
|
||||
}
|
||||
}
|
||||
|
||||
// Get the superset of licenses
|
||||
if debug {
|
||||
licenseSet := make(map[string]bool, len(packageInfos))
|
||||
licenseNames := make([]string, 0, len(packageInfos))
|
||||
for _, info := range packageInfos {
|
||||
if found := licenseSet[info.license]; !found {
|
||||
licenseNames = append(licenseNames, info.license)
|
||||
}
|
||||
licenseSet[info.license] = true
|
||||
}
|
||||
sort.Strings(licenseNames)
|
||||
log.Println("Using licenses:")
|
||||
for _, license := range licenseNames {
|
||||
log.Println(" " + license)
|
||||
}
|
||||
}
|
||||
|
||||
// Check the licenses by matching their text and compare the classification result
|
||||
// with the information provided by the user
|
||||
var succeeded, warn, failed int
|
||||
for _, info := range packageInfos {
|
||||
// Ignore all packages except the ones given by the user (if any)
|
||||
if userpkg != "" && userpkg != info.name {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if we got a whitelist entry for the package
|
||||
if ok, found := override.Check(info.name, info.version, info.spdx); found {
|
||||
if ok {
|
||||
log.Printf("OK: \"%s@%s\" (%s) (whitelist)", info.name, info.version, info.license)
|
||||
succeeded++
|
||||
} else {
|
||||
log.Printf("ERR: \"%s@%s\" (%s) %s does not match whitelist", info.name, info.version, info.license, info.spdx)
|
||||
failed++
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Perform a text classification
|
||||
confidence, err := info.Classify()
|
||||
if err != nil {
|
||||
log.Printf("ERR: %q (%s) %v", info.name, info.license, err)
|
||||
failed++
|
||||
continue
|
||||
}
|
||||
if confidence < threshold {
|
||||
log.Printf("WARN: %q (%s) has low matching confidence (%.2f%%)", info.name, info.license, confidence)
|
||||
warn++
|
||||
continue
|
||||
}
|
||||
if verbose {
|
||||
log.Printf("OK: %q (%s) (%.2f%%)", info.name, info.license, confidence)
|
||||
}
|
||||
succeeded++
|
||||
}
|
||||
if verbose {
|
||||
log.Printf("Checked %d licenses (%d ignored lines):", len(packageInfos), ignored)
|
||||
log.Printf(" %d successful", succeeded)
|
||||
log.Printf(" %d low confidence", warn)
|
||||
log.Printf(" %d errors", failed)
|
||||
}
|
||||
|
||||
if failed > 0 {
|
||||
os.Exit(1)
|
||||
}
|
||||
os.Exit(0)
|
||||
}
|
102
tools/license_checker/package.go
Normal file
102
tools/license_checker/package.go
Normal file
|
@ -0,0 +1,102 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"github.com/google/licensecheck"
|
||||
)
|
||||
|
||||
type packageInfo struct {
|
||||
name string
|
||||
version string
|
||||
license string
|
||||
url string
|
||||
spdx string
|
||||
}
|
||||
|
||||
func (pkg *packageInfo) ToSPDX() {
|
||||
pkg.spdx = nameToSPDX[pkg.license]
|
||||
}
|
||||
|
||||
func (pkg *packageInfo) Classify() (float64, error) {
|
||||
// Check for a valid SPDX
|
||||
if pkg.spdx == "" {
|
||||
return 0.0, fmt.Errorf("empty SPDX for license %q", pkg.license)
|
||||
}
|
||||
|
||||
// Download the license text
|
||||
source, err := normalizeURL(pkg.url)
|
||||
if err != nil {
|
||||
return 0.0, fmt.Errorf("%q is not a valid URL: %w", pkg.url, err)
|
||||
}
|
||||
debugf("%q downloading from %q", pkg.name, source)
|
||||
|
||||
response, err := http.Get(source.String())
|
||||
if err != nil {
|
||||
return 0.0, fmt.Errorf("download from %q failed: %w", source, err)
|
||||
}
|
||||
if response.StatusCode < 200 || response.StatusCode > 299 {
|
||||
status := response.StatusCode
|
||||
return 0.0, fmt.Errorf("download from %q failed %d: %s", source, status, http.StatusText(status))
|
||||
}
|
||||
defer response.Body.Close()
|
||||
text, err := io.ReadAll(response.Body)
|
||||
if err != nil {
|
||||
return 0.0, fmt.Errorf("reading body failed: %w", err)
|
||||
}
|
||||
if len(text) < 1 {
|
||||
return 0.0, errors.New("empty body")
|
||||
}
|
||||
|
||||
// Classify the license text
|
||||
coverage := licensecheck.Scan(text)
|
||||
if len(coverage.Match) == 0 {
|
||||
return coverage.Percent, errors.New("no match found")
|
||||
}
|
||||
match := coverage.Match[0]
|
||||
debugf("%q found match: %q with confidence %f%%", pkg.name, match.ID, coverage.Percent)
|
||||
|
||||
if match.ID != pkg.spdx {
|
||||
return coverage.Percent, fmt.Errorf("classification %q does not match", match.ID)
|
||||
}
|
||||
return coverage.Percent, nil
|
||||
}
|
||||
|
||||
func normalizeURL(raw string) (*url.URL, error) {
|
||||
u, err := url.Parse(raw)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch u.Hostname() {
|
||||
case "github.com":
|
||||
u.Host = "raw.githubusercontent.com"
|
||||
var cleaned []string
|
||||
for _, p := range strings.Split(u.Path, "/") {
|
||||
// Filter out elements
|
||||
if p == "blob" {
|
||||
continue
|
||||
}
|
||||
cleaned = append(cleaned, p)
|
||||
}
|
||||
u.Path = strings.Join(cleaned, "/")
|
||||
case "gitlab.com":
|
||||
u.Path = strings.Replace(u.Path, "/-/blob/", "/-/raw/", 1)
|
||||
case "git.octo.it":
|
||||
parts := strings.Split(u.RawQuery, ";")
|
||||
for i, p := range parts {
|
||||
if p == "a=blob" {
|
||||
parts[i] = "a=blob_plain"
|
||||
break
|
||||
}
|
||||
}
|
||||
u.RawQuery = strings.Join(parts, ";")
|
||||
}
|
||||
|
||||
return u, nil
|
||||
}
|
119
tools/license_checker/whitelist.go
Normal file
119
tools/license_checker/whitelist.go
Normal file
|
@ -0,0 +1,119 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"log"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/coreos/go-semver/semver"
|
||||
)
|
||||
|
||||
type whitelist []whitelistEntry
|
||||
|
||||
type whitelistEntry struct {
|
||||
Name string
|
||||
Version *semver.Version
|
||||
Operator string
|
||||
License string
|
||||
}
|
||||
|
||||
var re = regexp.MustCompile(`^([<=>]+\s*)?([-\.\/\w]+)(@v[\d\.]+)?\s+([-\.\w]+)$`)
|
||||
|
||||
func (w *whitelist) Parse(filename string) error {
|
||||
file, err := os.Open(filename)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Read file line-by-line and split by semicolon
|
||||
lineno := 0
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
lineno++
|
||||
if strings.HasPrefix(line, "#") {
|
||||
// Comment
|
||||
continue
|
||||
}
|
||||
|
||||
groups := re.FindAllStringSubmatch(line, -1)
|
||||
if len(groups) != 1 {
|
||||
log.Printf("WARN: Ignoring not matching entry in line %d", lineno)
|
||||
continue
|
||||
}
|
||||
group := groups[0]
|
||||
if len(group) != 5 {
|
||||
// Malformed
|
||||
log.Printf("WARN: Ignoring malformed entry in line %d", lineno)
|
||||
continue
|
||||
}
|
||||
|
||||
// An entry has the form:
|
||||
// [operator]<package name>[@version] [license SPDX]
|
||||
var operator, version string
|
||||
if group[1] != "" {
|
||||
operator = strings.TrimSpace(group[1])
|
||||
}
|
||||
name := group[2]
|
||||
if group[3] != "" {
|
||||
version = strings.TrimSpace(group[3])
|
||||
version = strings.TrimLeft(version, "@v")
|
||||
}
|
||||
license := strings.TrimSpace(group[4])
|
||||
|
||||
entry := whitelistEntry{Name: name, License: license, Operator: operator}
|
||||
if version != "" {
|
||||
entry.Version, err = semver.NewVersion(version)
|
||||
if err != nil {
|
||||
// Malformed
|
||||
log.Printf("Ignoring malformed version in line %d: %v", lineno, err)
|
||||
continue
|
||||
}
|
||||
if entry.Operator == "" {
|
||||
entry.Operator = "="
|
||||
}
|
||||
}
|
||||
*w = append(*w, entry)
|
||||
}
|
||||
|
||||
return scanner.Err()
|
||||
}
|
||||
|
||||
func (w *whitelist) Check(pkg, version, spdx string) (ok, found bool) {
|
||||
v := strings.TrimSpace(version)
|
||||
v = strings.TrimPrefix(v, "v")
|
||||
if v == "" {
|
||||
return false, false
|
||||
}
|
||||
pkgver := *semver.New(v)
|
||||
|
||||
for _, entry := range *w {
|
||||
if entry.Name != pkg {
|
||||
continue
|
||||
}
|
||||
|
||||
var match bool
|
||||
switch entry.Operator {
|
||||
case "":
|
||||
match = true
|
||||
case "=":
|
||||
match = pkgver.Equal(*entry.Version)
|
||||
case "<":
|
||||
match = pkgver.LessThan(*entry.Version)
|
||||
case "<=":
|
||||
match = pkgver.LessThan(*entry.Version) || pkgver.Equal(*entry.Version)
|
||||
case ">":
|
||||
match = !pkgver.LessThan(*entry.Version) && !pkgver.Equal(*entry.Version)
|
||||
case ">=":
|
||||
match = !pkgver.LessThan(*entry.Version)
|
||||
}
|
||||
if match {
|
||||
return entry.License == spdx, true
|
||||
}
|
||||
}
|
||||
|
||||
return false, false
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue