102 lines
2.3 KiB
Go
102 lines
2.3 KiB
Go
package main
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
|
|
"github.com/google/licensecheck"
|
|
)
|
|
|
|
type packageInfo struct {
|
|
name string
|
|
version string
|
|
license string
|
|
url string
|
|
spdx string
|
|
}
|
|
|
|
func (pkg *packageInfo) ToSPDX() {
|
|
pkg.spdx = nameToSPDX[pkg.license]
|
|
}
|
|
|
|
func (pkg *packageInfo) Classify() (float64, error) {
|
|
// Check for a valid SPDX
|
|
if pkg.spdx == "" {
|
|
return 0.0, fmt.Errorf("empty SPDX for license %q", pkg.license)
|
|
}
|
|
|
|
// Download the license text
|
|
source, err := normalizeURL(pkg.url)
|
|
if err != nil {
|
|
return 0.0, fmt.Errorf("%q is not a valid URL: %w", pkg.url, err)
|
|
}
|
|
debugf("%q downloading from %q", pkg.name, source)
|
|
|
|
response, err := http.Get(source.String())
|
|
if err != nil {
|
|
return 0.0, fmt.Errorf("download from %q failed: %w", source, err)
|
|
}
|
|
if response.StatusCode < 200 || response.StatusCode > 299 {
|
|
status := response.StatusCode
|
|
return 0.0, fmt.Errorf("download from %q failed %d: %s", source, status, http.StatusText(status))
|
|
}
|
|
defer response.Body.Close()
|
|
text, err := io.ReadAll(response.Body)
|
|
if err != nil {
|
|
return 0.0, fmt.Errorf("reading body failed: %w", err)
|
|
}
|
|
if len(text) < 1 {
|
|
return 0.0, errors.New("empty body")
|
|
}
|
|
|
|
// Classify the license text
|
|
coverage := licensecheck.Scan(text)
|
|
if len(coverage.Match) == 0 {
|
|
return coverage.Percent, errors.New("no match found")
|
|
}
|
|
match := coverage.Match[0]
|
|
debugf("%q found match: %q with confidence %f%%", pkg.name, match.ID, coverage.Percent)
|
|
|
|
if match.ID != pkg.spdx {
|
|
return coverage.Percent, fmt.Errorf("classification %q does not match", match.ID)
|
|
}
|
|
return coverage.Percent, nil
|
|
}
|
|
|
|
func normalizeURL(raw string) (*url.URL, error) {
|
|
u, err := url.Parse(raw)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
switch u.Hostname() {
|
|
case "github.com":
|
|
u.Host = "raw.githubusercontent.com"
|
|
var cleaned []string
|
|
for _, p := range strings.Split(u.Path, "/") {
|
|
// Filter out elements
|
|
if p == "blob" {
|
|
continue
|
|
}
|
|
cleaned = append(cleaned, p)
|
|
}
|
|
u.Path = strings.Join(cleaned, "/")
|
|
case "gitlab.com":
|
|
u.Path = strings.Replace(u.Path, "/-/blob/", "/-/raw/", 1)
|
|
case "git.octo.it":
|
|
parts := strings.Split(u.RawQuery, ";")
|
|
for i, p := range parts {
|
|
if p == "a=blob" {
|
|
parts[i] = "a=blob_plain"
|
|
break
|
|
}
|
|
}
|
|
u.RawQuery = strings.Join(parts, ";")
|
|
}
|
|
|
|
return u, nil
|
|
}
|