1
0
Fork 0
telegraf/tools/license_checker/package.go

103 lines
2.3 KiB
Go
Raw Normal View History

package main
import (
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"github.com/google/licensecheck"
)
type packageInfo struct {
name string
version string
license string
url string
spdx string
}
func (pkg *packageInfo) ToSPDX() {
pkg.spdx = nameToSPDX[pkg.license]
}
func (pkg *packageInfo) Classify() (float64, error) {
// Check for a valid SPDX
if pkg.spdx == "" {
return 0.0, fmt.Errorf("empty SPDX for license %q", pkg.license)
}
// Download the license text
source, err := normalizeURL(pkg.url)
if err != nil {
return 0.0, fmt.Errorf("%q is not a valid URL: %w", pkg.url, err)
}
debugf("%q downloading from %q", pkg.name, source)
response, err := http.Get(source.String())
if err != nil {
return 0.0, fmt.Errorf("download from %q failed: %w", source, err)
}
if response.StatusCode < 200 || response.StatusCode > 299 {
status := response.StatusCode
return 0.0, fmt.Errorf("download from %q failed %d: %s", source, status, http.StatusText(status))
}
defer response.Body.Close()
text, err := io.ReadAll(response.Body)
if err != nil {
return 0.0, fmt.Errorf("reading body failed: %w", err)
}
if len(text) < 1 {
return 0.0, errors.New("empty body")
}
// Classify the license text
coverage := licensecheck.Scan(text)
if len(coverage.Match) == 0 {
return coverage.Percent, errors.New("no match found")
}
match := coverage.Match[0]
debugf("%q found match: %q with confidence %f%%", pkg.name, match.ID, coverage.Percent)
if match.ID != pkg.spdx {
return coverage.Percent, fmt.Errorf("classification %q does not match", match.ID)
}
return coverage.Percent, nil
}
func normalizeURL(raw string) (*url.URL, error) {
u, err := url.Parse(raw)
if err != nil {
return nil, err
}
switch u.Hostname() {
case "github.com":
u.Host = "raw.githubusercontent.com"
var cleaned []string
for _, p := range strings.Split(u.Path, "/") {
// Filter out elements
if p == "blob" {
continue
}
cleaned = append(cleaned, p)
}
u.Path = strings.Join(cleaned, "/")
case "gitlab.com":
u.Path = strings.Replace(u.Path, "/-/blob/", "/-/raw/", 1)
case "git.octo.it":
parts := strings.Split(u.RawQuery, ";")
for i, p := range parts {
if p == "a=blob" {
parts[i] = "a=blob_plain"
break
}
}
u.RawQuery = strings.Join(parts, ";")
}
return u, nil
}