//go:generate ../../../tools/readme_config_includer/generator
package timestream

import (
	"context"
	_ "embed"
	"errors"
	"fmt"
	"math"
	"reflect"
	"strconv"
	"sync"
	"time"

	"github.com/aws/aws-sdk-go-v2/aws"
	"github.com/aws/aws-sdk-go-v2/config"
	"github.com/aws/aws-sdk-go-v2/service/timestreamwrite"
	"github.com/aws/aws-sdk-go-v2/service/timestreamwrite/types"
	"github.com/aws/smithy-go"

	"github.com/influxdata/telegraf"
	common_aws "github.com/influxdata/telegraf/plugins/common/aws"
	"github.com/influxdata/telegraf/plugins/outputs"
)

//go:embed sample.conf
var sampleConfig string

type (
	Timestream struct {
		MappingMode             string `toml:"mapping_mode"`
		DescribeDatabaseOnStart bool   `toml:"describe_database_on_start"`
		DatabaseName            string `toml:"database_name"`

		SingleTableName                                    string `toml:"single_table_name"`
		SingleTableDimensionNameForTelegrafMeasurementName string `toml:"single_table_dimension_name_for_telegraf_measurement_name"`

		UseMultiMeasureRecords            bool   `toml:"use_multi_measure_records"`
		MeasureNameForMultiMeasureRecords string `toml:"measure_name_for_multi_measure_records"`

		CreateTableIfNotExists                        bool              `toml:"create_table_if_not_exists"`
		CreateTableMagneticStoreRetentionPeriodInDays int64             `toml:"create_table_magnetic_store_retention_period_in_days"`
		CreateTableMemoryStoreRetentionPeriodInHours  int64             `toml:"create_table_memory_store_retention_period_in_hours"`
		CreateTableTags                               map[string]string `toml:"create_table_tags"`
		MaxWriteGoRoutinesCount                       int               `toml:"max_write_go_routines"`

		Log telegraf.Logger
		svc WriteClient

		common_aws.CredentialConfig
	}

	WriteClient interface {
		CreateTable(context.Context, *timestreamwrite.CreateTableInput, ...func(*timestreamwrite.Options)) (*timestreamwrite.CreateTableOutput, error)
		WriteRecords(context.Context, *timestreamwrite.WriteRecordsInput, ...func(*timestreamwrite.Options)) (*timestreamwrite.WriteRecordsOutput, error)
		DescribeDatabase(
			context.Context,
			*timestreamwrite.DescribeDatabaseInput,
			...func(*timestreamwrite.Options),
		) (*timestreamwrite.DescribeDatabaseOutput, error)
	}
)

// Mapping modes specify how Telegraf model should be represented in Timestream model.
// See sample config for more details.
const (
	MappingModeSingleTable = "single-table"
	MappingModeMultiTable  = "multi-table"
)

// MaxRecordsPerCall reflects Timestream limit of WriteRecords API call
const MaxRecordsPerCall = 100

// Default value for maximum number of parallel go routines to ingest/write data
// when max_write_go_routines is not specified in the config
const MaxWriteRoutinesDefault = 1

// WriteFactory function provides a way to mock the client instantiation for testing purposes.
var WriteFactory = func(credentialConfig *common_aws.CredentialConfig) (WriteClient, error) {
	awsCreds, awsErr := credentialConfig.Credentials()
	if awsErr != nil {
		panic("Unable to load credentials config " + awsErr.Error())
	}

	cfg, cfgErr := config.LoadDefaultConfig(context.TODO())
	if cfgErr != nil {
		panic("Unable to load SDK config for Timestream " + cfgErr.Error())
	}

	if credentialConfig.EndpointURL != "" && credentialConfig.Region != "" {
		cfg, err := config.LoadDefaultConfig(context.TODO())
		if err != nil {
			panic("unable to load SDK config for Timestream " + err.Error())
		}
		cfg.Credentials = awsCreds.Credentials

		return timestreamwrite.NewFromConfig(cfg, func(o *timestreamwrite.Options) {
			o.BaseEndpoint = &credentialConfig.EndpointURL
			o.Region = credentialConfig.Region
			o.EndpointDiscovery.EnableEndpointDiscovery = aws.EndpointDiscoveryDisabled
		}), nil
	}

	cfg.Credentials = awsCreds.Credentials

	return timestreamwrite.NewFromConfig(cfg, func(o *timestreamwrite.Options) {
		o.Region = credentialConfig.Region
	}), nil
}

func (*Timestream) SampleConfig() string {
	return sampleConfig
}

func (t *Timestream) Connect() error {
	if t.DatabaseName == "" {
		return errors.New("'database_name' key is required")
	}

	if t.MappingMode == "" {
		return errors.New("'mapping_mode' key is required")
	}

	if t.MappingMode != MappingModeSingleTable && t.MappingMode != MappingModeMultiTable {
		return fmt.Errorf("correct MappingMode key values are: %q, %q",
			MappingModeSingleTable, MappingModeMultiTable)
	}

	if t.MappingMode == MappingModeSingleTable {
		if t.SingleTableName == "" {
			return fmt.Errorf("in %q mapping mode, SingleTableName key is required", MappingModeSingleTable)
		}

		if t.SingleTableDimensionNameForTelegrafMeasurementName == "" && !t.UseMultiMeasureRecords {
			return fmt.Errorf("in %q mapping mode, SingleTableDimensionNameForTelegrafMeasurementName key is required",
				MappingModeSingleTable)
		}

		// When using MappingModeSingleTable with UseMultiMeasureRecords enabled,
		// measurementName ( from line protocol ) is mapped to multiMeasure name in timestream.
		if t.UseMultiMeasureRecords && t.MeasureNameForMultiMeasureRecords != "" {
			return fmt.Errorf("in %q mapping mode, with multi-measure enabled, key MeasureNameForMultiMeasureRecords is invalid", MappingModeMultiTable)
		}
	}

	if t.MappingMode == MappingModeMultiTable {
		if t.SingleTableName != "" {
			return fmt.Errorf("in %q mapping mode, do not specify SingleTableName key", MappingModeMultiTable)
		}

		if t.SingleTableDimensionNameForTelegrafMeasurementName != "" {
			return fmt.Errorf("in %q mapping mode, do not specify SingleTableDimensionNameForTelegrafMeasurementName key", MappingModeMultiTable)
		}

		// When using MappingModeMultiTable ( data is ingested to multiple tables ) with
		// UseMultiMeasureRecords enabled, measurementName is used as tableName in timestream and
		// we require MeasureNameForMultiMeasureRecords to be configured.
		if t.UseMultiMeasureRecords && t.MeasureNameForMultiMeasureRecords == "" {
			return fmt.Errorf("in %q mapping mode, with multi-measure enabled, key MeasureNameForMultiMeasureRecords is required", MappingModeMultiTable)
		}
	}

	if t.CreateTableIfNotExists {
		if t.CreateTableMagneticStoreRetentionPeriodInDays < 1 {
			return errors.New("if Telegraf should create tables, CreateTableMagneticStoreRetentionPeriodInDays key should have a value greater than 0")
		}

		if t.CreateTableMemoryStoreRetentionPeriodInHours < 1 {
			return errors.New("if Telegraf should create tables, CreateTableMemoryStoreRetentionPeriodInHours key should have a value greater than 0")
		}
	}

	if t.MaxWriteGoRoutinesCount <= 0 {
		t.MaxWriteGoRoutinesCount = MaxWriteRoutinesDefault
	}

	t.Log.Infof("Constructing Timestream client for %q mode", t.MappingMode)

	svc, err := WriteFactory(&t.CredentialConfig)
	if err != nil {
		return err
	}

	if t.DescribeDatabaseOnStart {
		t.Log.Infof("Describing database %q in region %q", t.DatabaseName, t.Region)

		describeDatabaseInput := &timestreamwrite.DescribeDatabaseInput{
			DatabaseName: aws.String(t.DatabaseName),
		}
		describeDatabaseOutput, err := svc.DescribeDatabase(context.Background(), describeDatabaseInput)
		if err != nil {
			t.Log.Errorf("Couldn't describe database %q. Check error, fix permissions, connectivity, create database.", t.DatabaseName)
			return err
		}
		t.Log.Infof("Describe database %q returned %v", t.DatabaseName, describeDatabaseOutput)
	}

	t.svc = svc
	return nil
}

func (*Timestream) Close() error {
	return nil
}

func init() {
	outputs.Add("timestream", func() telegraf.Output {
		return &Timestream{}
	})
}

func (t *Timestream) Write(metrics []telegraf.Metric) error {
	writeRecordsInputs := t.TransformMetrics(metrics)

	maxWriteJobs := t.MaxWriteGoRoutinesCount
	numberOfWriteRecordsInputs := len(writeRecordsInputs)

	if numberOfWriteRecordsInputs < maxWriteJobs {
		maxWriteJobs = numberOfWriteRecordsInputs
	}

	var wg sync.WaitGroup
	errs := make(chan error, numberOfWriteRecordsInputs)
	writeJobs := make(chan *timestreamwrite.WriteRecordsInput, maxWriteJobs)

	start := time.Now()

	for i := 0; i < maxWriteJobs; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			for writeJob := range writeJobs {
				if err := t.writeToTimestream(writeJob, true); err != nil {
					errs <- err
				}
			}
		}()
	}

	for i := range writeRecordsInputs {
		writeJobs <- writeRecordsInputs[i]
	}

	// Close channel once all jobs are added
	close(writeJobs)

	wg.Wait()
	elapsed := time.Since(start)

	close(errs)

	t.Log.Infof("##WriteToTimestream - Metrics size: %d request size: %d time(ms): %d",
		len(metrics), len(writeRecordsInputs), elapsed.Milliseconds())

	// On partial failures, Telegraf will reject the entire batch of metrics and
	// retry. writeToTimestream will return retryable exceptions only.
	for err := range errs {
		if err != nil {
			return err
		}
	}

	return nil
}

func (t *Timestream) writeToTimestream(writeRecordsInput *timestreamwrite.WriteRecordsInput, resourceNotFoundRetry bool) error {
	_, err := t.svc.WriteRecords(context.Background(), writeRecordsInput)
	if err != nil {
		// Telegraf will retry ingesting the metrics if an error is returned from the plugin.
		// Therefore, return error only for retryable exceptions: ThrottlingException and 5xx exceptions.
		var notFound *types.ResourceNotFoundException
		if errors.As(err, &notFound) {
			if resourceNotFoundRetry {
				t.Log.Warnf("Failed to write to Timestream database %q table %q: %s",
					t.DatabaseName, *writeRecordsInput.TableName, notFound)
				return t.createTableAndRetry(writeRecordsInput)
			}
			t.logWriteToTimestreamError(notFound, writeRecordsInput.TableName)
			// log error and return error to telegraf to retry in next flush interval
			// We need this is to avoid data drop when there are no tables present in the database
			return fmt.Errorf("failed to write to Timestream database %q table %q: %w", t.DatabaseName, *writeRecordsInput.TableName, err)
		}

		var rejected *types.RejectedRecordsException
		if errors.As(err, &rejected) {
			t.logWriteToTimestreamError(err, writeRecordsInput.TableName)
			for _, rr := range rejected.RejectedRecords {
				t.Log.Errorf("reject reason: %q, record index: '%d'", aws.ToString(rr.Reason), rr.RecordIndex)
			}
			return nil
		}

		var throttling *types.ThrottlingException
		if errors.As(err, &throttling) {
			return fmt.Errorf("unable to write to Timestream database %q table %q: %w",
				t.DatabaseName, *writeRecordsInput.TableName, throttling)
		}

		var internal *types.InternalServerException
		if errors.As(err, &internal) {
			return fmt.Errorf("unable to write to Timestream database %q table %q: %w",
				t.DatabaseName, *writeRecordsInput.TableName, internal)
		}

		var operation *smithy.OperationError
		if !errors.As(err, &operation) {
			// Retry other, non-aws errors.
			return fmt.Errorf("unable to write to Timestream database %q table %q: %w",
				t.DatabaseName, *writeRecordsInput.TableName, err)
		}
		t.logWriteToTimestreamError(err, writeRecordsInput.TableName)
	}
	return nil
}

func (t *Timestream) logWriteToTimestreamError(err error, tableName *string) {
	t.Log.Errorf("Failed to write to Timestream database %q table %q: %s. Skipping metric!",
		t.DatabaseName, *tableName, err.Error())
}

func (t *Timestream) createTableAndRetry(writeRecordsInput *timestreamwrite.WriteRecordsInput) error {
	if t.CreateTableIfNotExists {
		t.Log.Infof(
			"Trying to create table %q in database %q, as 'CreateTableIfNotExists' config key is 'true'.",
			*writeRecordsInput.TableName,
			t.DatabaseName,
		)
		err := t.createTable(writeRecordsInput.TableName)
		if err == nil {
			t.Log.Infof("Table %q in database %q created. Retrying writing.", *writeRecordsInput.TableName, t.DatabaseName)
			return t.writeToTimestream(writeRecordsInput, false)
		}
		t.Log.Errorf("Failed to create table %q in database %q: %s. Skipping metric!", *writeRecordsInput.TableName, t.DatabaseName, err.Error())
	} else {
		t.Log.Errorf("Not trying to create table %q in database %q, as 'CreateTableIfNotExists' config key is 'false'. Skipping metric!",
			*writeRecordsInput.TableName, t.DatabaseName)
	}
	return nil
}

// createTable creates a Timestream table according to the configuration.
func (t *Timestream) createTable(tableName *string) error {
	createTableInput := &timestreamwrite.CreateTableInput{
		DatabaseName: aws.String(t.DatabaseName),
		TableName:    aws.String(*tableName),
		RetentionProperties: &types.RetentionProperties{
			MagneticStoreRetentionPeriodInDays: &t.CreateTableMagneticStoreRetentionPeriodInDays,
			MemoryStoreRetentionPeriodInHours:  &t.CreateTableMemoryStoreRetentionPeriodInHours,
		},
	}
	tags := make([]types.Tag, 0, len(t.CreateTableTags))
	for key, val := range t.CreateTableTags {
		tags = append(tags, types.Tag{
			Key:   aws.String(key),
			Value: aws.String(val),
		})
	}
	createTableInput.Tags = tags

	_, err := t.svc.CreateTable(context.Background(), createTableInput)
	if err != nil {
		var e *types.ConflictException
		if errors.As(err, &e) {
			// if the table was created in the meantime, it's ok.
			return nil
		}
		return err
	}
	return nil
}

// TransformMetrics transforms a collection of Telegraf Metrics into write requests to Timestream.
// Telegraf Metrics are grouped by Name, Tag Keys and Time to use Timestream CommonAttributes.
// Returns collection of write requests to be performed to Timestream.
func (t *Timestream) TransformMetrics(metrics []telegraf.Metric) []*timestreamwrite.WriteRecordsInput {
	writeRequests := make(map[string]*timestreamwrite.WriteRecordsInput, len(metrics))
	for _, m := range metrics {
		// build MeasureName, MeasureValue, MeasureValueType
		records := t.buildWriteRecords(m)
		if len(records) == 0 {
			continue
		}

		var tableName string

		if t.MappingMode == MappingModeSingleTable {
			tableName = t.SingleTableName
		}

		if t.MappingMode == MappingModeMultiTable {
			tableName = m.Name()
		}

		if curr, ok := writeRequests[tableName]; !ok {
			newWriteRecord := &timestreamwrite.WriteRecordsInput{
				DatabaseName:     aws.String(t.DatabaseName),
				TableName:        aws.String(tableName),
				Records:          records,
				CommonAttributes: &types.Record{},
			}

			writeRequests[tableName] = newWriteRecord
		} else {
			curr.Records = append(curr.Records, records...)
		}
	}

	// Create result as array of WriteRecordsInput. Split requests over records count limit to smaller requests.
	var result []*timestreamwrite.WriteRecordsInput
	for _, writeRequest := range writeRequests {
		if len(writeRequest.Records) > MaxRecordsPerCall {
			for _, recordsPartition := range partitionRecords(MaxRecordsPerCall, writeRequest.Records) {
				newWriteRecord := &timestreamwrite.WriteRecordsInput{
					DatabaseName:     writeRequest.DatabaseName,
					TableName:        writeRequest.TableName,
					Records:          recordsPartition,
					CommonAttributes: writeRequest.CommonAttributes,
				}
				result = append(result, newWriteRecord)
			}
		} else {
			result = append(result, writeRequest)
		}
	}
	return result
}

func (t *Timestream) buildDimensions(point telegraf.Metric) []types.Dimension {
	dimensions := make([]types.Dimension, 0, len(point.Tags()))
	for tagName, tagValue := range point.Tags() {
		dimension := types.Dimension{
			Name:  aws.String(tagName),
			Value: aws.String(tagValue),
		}
		dimensions = append(dimensions, dimension)
	}
	if t.MappingMode == MappingModeSingleTable && !t.UseMultiMeasureRecords {
		dimension := types.Dimension{
			Name:  aws.String(t.SingleTableDimensionNameForTelegrafMeasurementName),
			Value: aws.String(point.Name()),
		}
		dimensions = append(dimensions, dimension)
	}
	return dimensions
}

// buildWriteRecords builds the Timestream write records from Metric Fields only.
// Tags and time are not included - common attributes are built separately.
// Records with unsupported Metric Field type are skipped.
// It returns an array of Timestream write records.
func (t *Timestream) buildWriteRecords(point telegraf.Metric) []types.Record {
	if t.UseMultiMeasureRecords {
		return t.buildMultiMeasureWriteRecords(point)
	}
	return t.buildSingleWriteRecords(point)
}

func (t *Timestream) buildSingleWriteRecords(point telegraf.Metric) []types.Record {
	dimensions := t.buildDimensions(point)
	records := make([]types.Record, 0, len(point.Fields()))
	for fieldName, fieldValue := range point.Fields() {
		stringFieldValue, stringFieldValueType, ok := convertValue(fieldValue)
		if !ok {
			t.Log.Warnf("Skipping field %q. The type %q is not supported in Timestream as MeasureValue. "+
				"Supported values are: [int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, float32, float64, bool]",
				fieldName, reflect.TypeOf(fieldValue))
			continue
		}

		timeUnit, timeValue := getTimestreamTime(point.Time())

		record := types.Record{
			MeasureName:      aws.String(fieldName),
			MeasureValueType: stringFieldValueType,
			MeasureValue:     aws.String(stringFieldValue),
			Dimensions:       dimensions,
			Time:             aws.String(timeValue),
			TimeUnit:         timeUnit,
		}
		records = append(records, record)
	}
	return records
}

func (t *Timestream) buildMultiMeasureWriteRecords(point telegraf.Metric) []types.Record {
	var records []types.Record
	dimensions := t.buildDimensions(point)

	multiMeasureName := t.MeasureNameForMultiMeasureRecords
	if t.MappingMode == MappingModeSingleTable {
		multiMeasureName = point.Name()
	}

	multiMeasures := make([]types.MeasureValue, 0, len(point.Fields()))
	for fieldName, fieldValue := range point.Fields() {
		stringFieldValue, stringFieldValueType, ok := convertValue(fieldValue)
		if !ok {
			t.Log.Warnf("Skipping field %q. The type %q is not supported in Timestream as MeasureValue. "+
				"Supported values are: [int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, float32, float64, bool]",
				fieldName, reflect.TypeOf(fieldValue))
			continue
		}
		multiMeasures = append(multiMeasures, types.MeasureValue{
			Name:  aws.String(fieldName),
			Type:  stringFieldValueType,
			Value: aws.String(stringFieldValue),
		})
	}

	timeUnit, timeValue := getTimestreamTime(point.Time())

	record := types.Record{
		MeasureName:      aws.String(multiMeasureName),
		MeasureValueType: "MULTI",
		MeasureValues:    multiMeasures,
		Dimensions:       dimensions,
		Time:             aws.String(timeValue),
		TimeUnit:         timeUnit,
	}

	records = append(records, record)

	return records
}

// partitionRecords splits the Timestream records into smaller slices of a max size
// so that are under the limit for the Timestream API call.
// It returns the array of array of records.
func partitionRecords(size int, records []types.Record) [][]types.Record {
	numberOfPartitions := len(records) / size
	if len(records)%size != 0 {
		numberOfPartitions++
	}

	partitions := make([][]types.Record, 0, numberOfPartitions)
	for i := 0; i < numberOfPartitions; i++ {
		start := size * i
		end := size * (i + 1)
		if end > len(records) {
			end = len(records)
		}

		partitions = append(partitions, records[start:end])
	}

	return partitions
}

// getTimestreamTime produces Timestream TimeUnit and TimeValue with minimum possible granularity
// while maintaining the same information.
func getTimestreamTime(t time.Time) (timeUnit types.TimeUnit, timeValue string) {
	nanosTime := t.UnixNano()
	if nanosTime%1e9 == 0 {
		timeUnit = types.TimeUnitSeconds
		timeValue = strconv.FormatInt(nanosTime/1e9, 10)
	} else if nanosTime%1e6 == 0 {
		timeUnit = types.TimeUnitMilliseconds
		timeValue = strconv.FormatInt(nanosTime/1e6, 10)
	} else if nanosTime%1e3 == 0 {
		timeUnit = types.TimeUnitMicroseconds
		timeValue = strconv.FormatInt(nanosTime/1e3, 10)
	} else {
		timeUnit = types.TimeUnitNanoseconds
		timeValue = strconv.FormatInt(nanosTime, 10)
	}
	return timeUnit, timeValue
}

// convertValue converts single Field value from Telegraf Metric and produces
// value, valueType Timestream representation.
func convertValue(v interface{}) (value string, valueType types.MeasureValueType, ok bool) {
	ok = true

	switch t := v.(type) {
	case int:
		valueType = types.MeasureValueTypeBigint
		value = strconv.FormatInt(int64(t), 10)
	case int8:
		valueType = types.MeasureValueTypeBigint
		value = strconv.FormatInt(int64(t), 10)
	case int16:
		valueType = types.MeasureValueTypeBigint
		value = strconv.FormatInt(int64(t), 10)
	case int32:
		valueType = types.MeasureValueTypeBigint
		value = strconv.FormatInt(int64(t), 10)
	case int64:
		valueType = types.MeasureValueTypeBigint
		value = strconv.FormatInt(t, 10)
	case uint:
		valueType = types.MeasureValueTypeBigint
		value = strconv.FormatUint(uint64(t), 10)
	case uint8:
		valueType = types.MeasureValueTypeBigint
		value = strconv.FormatUint(uint64(t), 10)
	case uint16:
		valueType = types.MeasureValueTypeBigint
		value = strconv.FormatUint(uint64(t), 10)
	case uint32:
		valueType = types.MeasureValueTypeBigint
		value = strconv.FormatUint(uint64(t), 10)
	case uint64:
		valueType = types.MeasureValueTypeBigint
		if t <= uint64(math.MaxInt64) {
			value = strconv.FormatUint(t, 10)
		} else {
			value = strconv.FormatUint(math.MaxInt64, 10)
		}
	case float32:
		valueType = types.MeasureValueTypeDouble
		value = strconv.FormatFloat(float64(t), 'f', -1, 32)
	case float64:
		valueType = types.MeasureValueTypeDouble
		value = strconv.FormatFloat(t, 'f', -1, 64)
	case bool:
		valueType = types.MeasureValueTypeBoolean
		if t {
			value = "true"
		} else {
			value = "false"
		}
	case string:
		valueType = types.MeasureValueTypeVarchar
		value = t
	default:
		// Skip unsupported type.
		ok = false
		return value, valueType, ok
	}
	return value, valueType, ok
}