1
0
Fork 0

Adding upstream version 1.34.4.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-05-24 07:26:29 +02:00
parent e393c3af3f
commit 4978089aab
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
4963 changed files with 677545 additions and 0 deletions

View file

@ -0,0 +1,93 @@
# Dependency license verification tool
This tool allows the verification of information in
`docs/LICENSE_OF_DEPENDENCIES.md` against the linked license
information. To do so, the license reported by the user is
checked against the license classification of the downloaded
license file for each dependency.
## Building
```shell
make build_tools
```
## Running
The simplest way to run the verification tool is to execute
```shell
telegraf$ ./tools/license_checker/license_checker
```
using the current directory as telegraf's root directory and verifies
all licenses. Only errors will be reported by default.
There are multiple options you can use to customize the verification.
Take a look at
```shell
telegraf$ ./tools/license_checker/license_checker --help
```
to get an overview.
As the verification tool downloads each license file linked in the
dependency license document, you should be careful on not exceeding
the access limits of e.g. GitHub by running the tool too frequent.
Some packages change the license for newer versions. As we always
link to the latest license text the classification might not match
the actual license of our used dependency. Furthermore, some license
text might be wrongly classified, or not classified at all. In these
cases, you can use a _whitelist_ to explicitly state the license
SPDX classifier for those packages.
See the [whitelist section](#whitelist) for more details.
The recommended use in telegraf is to run
```shell
telegraf$ ./tools/license_checker/license_checker \
-whitelist ./tools/license_checker/data/whitelist
```
using the code-versioned whitelist. This command will report all
non-matching entries with an `ERR:` prefix.
## Whitelist
Whitelist entries contain explicit license information for
a set of packages to use instead of classification. Each entry
in the whitelist is a line of the form
```text
[comparison operator]<package name>[@vX.Y.Z] <license SPDX>
```
where the _comparison operator_ is one of `>`, `>=`, `=`, `<=` or `<`
and the _license SPDX_ is a [SPDX license identifier][spdx].
In case no package version is specified, the entry matches all versions
of the library. Furthermore, the comparison operator can be omitted
which is equivalent to an exact match (`=`).
The entries are processed in order until the first match is found.
Here is an example of a whitelist. Assume that you have library
`github.com/foo/bar` which started out with the `MIT` license
until version 1.0.0 where it changed to `EFL-1.0` until it again
changed to `EFL-2.0` starting __after__ version 2.3.0. In this case
the whitelist should look like this
```text
<github.com/foo/bar@v1.0.0 MIT
<=github.com/foo/bar@v2.3.0 EFL-1.0
github.com/foo/bar EFL-2.0
```
All versions below 1.0.0 are matched by the first line and are thus
classified as `MIT`. The second line matches everything that is
above 1.0.0 (thus not matched by the first line) until (and including)
2.3.0. The last line with catch everything that was passing the first
two lines i.e. everything after 2.3.0.
[spdx]: https://spdx.org/licenses/

View file

@ -0,0 +1,15 @@
{
"Apache License 2.0": "Apache-2.0",
"BSD 2-Clause with views sentence": "BSD-2-Clause-Views",
"BSD 2-Clause \"Simplified\" License": "BSD-2-Clause",
"BSD 3-Clause \"New\" or \"Revised\" License": "BSD-3-Clause",
"BSD 3-Clause Clear License": "BSD-3-Clause",
"BSD 3-Clause License": "BSD-3-Clause",
"Eclipse Public License - v 1.0": "EPL-1.0",
"Eclipse Public License - v 2.0": "EPL-2.0",
"ISC License": "ISC",
"MIT License": "MIT",
"Mozilla Public License 2.0": "MPL-2.0",
"The Unlicense": "Unlicense",
"zlib License": "Zlib"
}

View file

@ -0,0 +1,2 @@
<github.com/couchbase/goutils@v0.1.2 Apache-2.0
<=github.com/segmentio/asm@v1.2.0 MIT

View file

@ -0,0 +1,245 @@
package main
import (
_ "embed"
"encoding/json"
"flag"
"fmt"
"log"
"os"
"path/filepath"
"sort"
"strings"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/text"
"golang.org/x/mod/modfile"
)
//go:embed data/spdx_mapping.json
var spdxMappingFile []byte
var debug bool
var nameToSPDX map[string]string
func debugf(format string, v ...any) {
if !debug {
return
}
log.Printf("DEBUG: "+format, v...)
}
func main() {
var help, verbose bool
var threshold float64
var whitelistFn, userpkg string
flag.BoolVar(&debug, "debug", false, "output debugging information")
flag.BoolVar(&help, "help", false, "output this help text")
flag.BoolVar(&verbose, "verbose", false, "output verbose information instead of just errors")
flag.Float64Var(&threshold, "threshold", 0.8, "threshold for license classification")
flag.StringVar(&whitelistFn, "whitelist", "", "use the given white-list file for comparison")
flag.StringVar(&userpkg, "package", "", "only test the given package (all by default)")
flag.Parse()
if help || flag.NArg() > 1 {
fmt.Fprintf(flag.CommandLine.Output(), "Usage of %s [options] [telegraf root dir]\n", os.Args[0])
fmt.Fprintf(flag.CommandLine.Output(), "Options:\n")
flag.PrintDefaults()
fmt.Fprintf(flag.CommandLine.Output(), "\n")
fmt.Fprintf(flag.CommandLine.Output(), "Arguments:\n")
fmt.Fprintf(flag.CommandLine.Output(), " telegraf root dir (optional)\n")
fmt.Fprintf(flag.CommandLine.Output(), " path to the root directory of telegraf (default: .)\n")
os.Exit(1)
}
// Setup full-name to license SPDX identifier mapping
if err := json.Unmarshal(spdxMappingFile, &nameToSPDX); err != nil {
log.Fatalf("Unmarshalling license name to SPDX mapping failed: %v", err)
}
// Get required files
path := "."
if flag.NArg() == 1 {
path = flag.Arg(0)
}
moduleFilename := filepath.Join(path, "go.mod")
licenseFilename := filepath.Join(path, "docs", "LICENSE_OF_DEPENDENCIES.md")
var override whitelist
if whitelistFn != "" {
log.Printf("Reading whitelist file %q...", whitelistFn)
if err := override.Parse(whitelistFn); err != nil {
log.Fatalf("Reading whitelist failed: %v", err)
}
}
log.Printf("Reading module file %q...", moduleFilename)
modbuf, err := os.ReadFile(moduleFilename)
if err != nil {
log.Fatal(err)
}
depModules, err := modfile.Parse(moduleFilename, modbuf, nil)
if err != nil {
log.Fatalf("Parsing modules failed: %f", err)
}
debugf("found %d required packages", len(depModules.Require))
dependencies := make(map[string]string)
for _, d := range depModules.Require {
dependencies[d.Mod.Path] = d.Mod.Version
}
log.Printf("Reading license file %q...", licenseFilename)
licensesMarkdown, err := os.ReadFile(licenseFilename)
if err != nil {
log.Fatal(err)
}
// Parse the markdown document
parser := goldmark.DefaultParser()
root := parser.Parse(text.NewReader(licensesMarkdown))
// Prepare a line parser
lineParser := goldmark.DefaultParser()
// Collect the licenses
// For each list we search for the items and parse them.
// Expect a pattern of <package name> <link>.
ignored := 0
var packageInfos []packageInfo
for node := root.FirstChild(); node != nil; node = node.NextSibling() {
listNode, ok := node.(*ast.List)
if !ok {
continue
}
for inode := listNode.FirstChild(); inode != nil; inode = inode.NextSibling() {
itemNode, ok := inode.(*ast.ListItem)
if !ok || itemNode.ChildCount() != 1 {
continue
}
textNode, ok := itemNode.FirstChild().(*ast.TextBlock)
if !ok || textNode.Lines().Len() != 1 {
continue
}
lineSegment := textNode.Lines().At(0)
line := lineSegment.Value(licensesMarkdown)
lineRoot := lineParser.Parse(text.NewReader(line))
if lineRoot.ChildCount() != 1 || lineRoot.FirstChild().ChildCount() < 2 {
log.Printf("WARN: Ignoring item %q due to wrong count (%d/%d)", string(line), lineRoot.ChildCount(), lineRoot.FirstChild().ChildCount())
ignored++
continue
}
var name, license, link string
for lineElementNode := lineRoot.FirstChild().FirstChild(); lineElementNode != nil; lineElementNode = lineElementNode.NextSibling() {
switch v := lineElementNode.(type) {
case *ast.Text:
name += string(v.Value(line))
case *ast.Link:
license = string(v.FirstChild().(*ast.Text).Value(line))
link = string(v.Destination)
default:
debugf("ignoring unknown element %T (%v)", v, v)
}
}
name = strings.TrimSpace(name)
info := packageInfo{
name: name,
version: dependencies[name],
url: strings.TrimSpace(link),
license: strings.TrimSpace(license),
}
info.ToSPDX()
if info.name == "" {
log.Printf("WARN: Ignoring item %q due to empty package name", string(line))
ignored++
continue
}
if info.url == "" {
log.Printf("WARN: Ignoring item %q due to empty url name", string(line))
ignored++
continue
}
if info.license == "" {
log.Printf("WARN: Ignoring item %q due to empty license name", string(line))
ignored++
continue
}
debugf("adding %q with license %q (%s) and version %q at %q...", info.name, info.license, info.spdx, info.version, info.url)
packageInfos = append(packageInfos, info)
}
}
// Get the superset of licenses
if debug {
licenseSet := make(map[string]bool, len(packageInfos))
licenseNames := make([]string, 0, len(packageInfos))
for _, info := range packageInfos {
if found := licenseSet[info.license]; !found {
licenseNames = append(licenseNames, info.license)
}
licenseSet[info.license] = true
}
sort.Strings(licenseNames)
log.Println("Using licenses:")
for _, license := range licenseNames {
log.Println(" " + license)
}
}
// Check the licenses by matching their text and compare the classification result
// with the information provided by the user
var succeeded, warn, failed int
for _, info := range packageInfos {
// Ignore all packages except the ones given by the user (if any)
if userpkg != "" && userpkg != info.name {
continue
}
// Check if we got a whitelist entry for the package
if ok, found := override.Check(info.name, info.version, info.spdx); found {
if ok {
log.Printf("OK: \"%s@%s\" (%s) (whitelist)", info.name, info.version, info.license)
succeeded++
} else {
log.Printf("ERR: \"%s@%s\" (%s) %s does not match whitelist", info.name, info.version, info.license, info.spdx)
failed++
}
continue
}
// Perform a text classification
confidence, err := info.Classify()
if err != nil {
log.Printf("ERR: %q (%s) %v", info.name, info.license, err)
failed++
continue
}
if confidence < threshold {
log.Printf("WARN: %q (%s) has low matching confidence (%.2f%%)", info.name, info.license, confidence)
warn++
continue
}
if verbose {
log.Printf("OK: %q (%s) (%.2f%%)", info.name, info.license, confidence)
}
succeeded++
}
if verbose {
log.Printf("Checked %d licenses (%d ignored lines):", len(packageInfos), ignored)
log.Printf(" %d successful", succeeded)
log.Printf(" %d low confidence", warn)
log.Printf(" %d errors", failed)
}
if failed > 0 {
os.Exit(1)
}
os.Exit(0)
}

View file

@ -0,0 +1,102 @@
package main
import (
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"github.com/google/licensecheck"
)
type packageInfo struct {
name string
version string
license string
url string
spdx string
}
func (pkg *packageInfo) ToSPDX() {
pkg.spdx = nameToSPDX[pkg.license]
}
func (pkg *packageInfo) Classify() (float64, error) {
// Check for a valid SPDX
if pkg.spdx == "" {
return 0.0, fmt.Errorf("empty SPDX for license %q", pkg.license)
}
// Download the license text
source, err := normalizeURL(pkg.url)
if err != nil {
return 0.0, fmt.Errorf("%q is not a valid URL: %w", pkg.url, err)
}
debugf("%q downloading from %q", pkg.name, source)
response, err := http.Get(source.String())
if err != nil {
return 0.0, fmt.Errorf("download from %q failed: %w", source, err)
}
if response.StatusCode < 200 || response.StatusCode > 299 {
status := response.StatusCode
return 0.0, fmt.Errorf("download from %q failed %d: %s", source, status, http.StatusText(status))
}
defer response.Body.Close()
text, err := io.ReadAll(response.Body)
if err != nil {
return 0.0, fmt.Errorf("reading body failed: %w", err)
}
if len(text) < 1 {
return 0.0, errors.New("empty body")
}
// Classify the license text
coverage := licensecheck.Scan(text)
if len(coverage.Match) == 0 {
return coverage.Percent, errors.New("no match found")
}
match := coverage.Match[0]
debugf("%q found match: %q with confidence %f%%", pkg.name, match.ID, coverage.Percent)
if match.ID != pkg.spdx {
return coverage.Percent, fmt.Errorf("classification %q does not match", match.ID)
}
return coverage.Percent, nil
}
func normalizeURL(raw string) (*url.URL, error) {
u, err := url.Parse(raw)
if err != nil {
return nil, err
}
switch u.Hostname() {
case "github.com":
u.Host = "raw.githubusercontent.com"
var cleaned []string
for _, p := range strings.Split(u.Path, "/") {
// Filter out elements
if p == "blob" {
continue
}
cleaned = append(cleaned, p)
}
u.Path = strings.Join(cleaned, "/")
case "gitlab.com":
u.Path = strings.Replace(u.Path, "/-/blob/", "/-/raw/", 1)
case "git.octo.it":
parts := strings.Split(u.RawQuery, ";")
for i, p := range parts {
if p == "a=blob" {
parts[i] = "a=blob_plain"
break
}
}
u.RawQuery = strings.Join(parts, ";")
}
return u, nil
}

View file

@ -0,0 +1,119 @@
package main
import (
"bufio"
"log"
"os"
"regexp"
"strings"
"github.com/coreos/go-semver/semver"
)
type whitelist []whitelistEntry
type whitelistEntry struct {
Name string
Version *semver.Version
Operator string
License string
}
var re = regexp.MustCompile(`^([<=>]+\s*)?([-\.\/\w]+)(@v[\d\.]+)?\s+([-\.\w]+)$`)
func (w *whitelist) Parse(filename string) error {
file, err := os.Open(filename)
if err != nil {
return err
}
defer file.Close()
// Read file line-by-line and split by semicolon
lineno := 0
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
lineno++
if strings.HasPrefix(line, "#") {
// Comment
continue
}
groups := re.FindAllStringSubmatch(line, -1)
if len(groups) != 1 {
log.Printf("WARN: Ignoring not matching entry in line %d", lineno)
continue
}
group := groups[0]
if len(group) != 5 {
// Malformed
log.Printf("WARN: Ignoring malformed entry in line %d", lineno)
continue
}
// An entry has the form:
// [operator]<package name>[@version] [license SPDX]
var operator, version string
if group[1] != "" {
operator = strings.TrimSpace(group[1])
}
name := group[2]
if group[3] != "" {
version = strings.TrimSpace(group[3])
version = strings.TrimLeft(version, "@v")
}
license := strings.TrimSpace(group[4])
entry := whitelistEntry{Name: name, License: license, Operator: operator}
if version != "" {
entry.Version, err = semver.NewVersion(version)
if err != nil {
// Malformed
log.Printf("Ignoring malformed version in line %d: %v", lineno, err)
continue
}
if entry.Operator == "" {
entry.Operator = "="
}
}
*w = append(*w, entry)
}
return scanner.Err()
}
func (w *whitelist) Check(pkg, version, spdx string) (ok, found bool) {
v := strings.TrimSpace(version)
v = strings.TrimPrefix(v, "v")
if v == "" {
return false, false
}
pkgver := *semver.New(v)
for _, entry := range *w {
if entry.Name != pkg {
continue
}
var match bool
switch entry.Operator {
case "":
match = true
case "=":
match = pkgver.Equal(*entry.Version)
case "<":
match = pkgver.LessThan(*entry.Version)
case "<=":
match = pkgver.LessThan(*entry.Version) || pkgver.Equal(*entry.Version)
case ">":
match = !pkgver.LessThan(*entry.Version) && !pkgver.Equal(*entry.Version)
case ">=":
match = !pkgver.LessThan(*entry.Version)
}
if match {
return entry.License == spdx, true
}
}
return false, false
}