// Copyright 2013 The Go Authors. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd.

package database

import (
	"path"
	"regexp"
	"strings"
	"unicode"

	"github.com/golang/gddo/doc"
	"github.com/golang/gddo/gosrc"
)

func isStandardPackage(path string) bool {
	return strings.Index(path, ".") < 0
}

func isTermSep(r rune) bool {
	return unicode.IsSpace(r) ||
		r != '.' && unicode.IsPunct(r) ||
		unicode.IsSymbol(r)
}

func normalizeProjectRoot(projectRoot string) string {
	if projectRoot == "" {
		return "go"
	}
	return projectRoot
}

var synonyms = map[string]string{
	"redis":    "redisdb", // append db to avoid stemming to 'red'
	"rand":     "random",
	"postgres": "postgresql",
	"mongo":    "mongodb",
}

func term(s string) string {
	s = strings.ToLower(s)
	if x, ok := synonyms[s]; ok {
		s = x
	}

	// Trim the trailing period at the end of any sentence.
	return stem(strings.TrimSuffix(s, "."))
}

var httpPat = regexp.MustCompile(`https?://\S+`)

func collectSynopsisTerms(terms map[string]bool, synopsis string) {

	synopsis = httpPat.ReplaceAllLiteralString(synopsis, "")

	fields := strings.FieldsFunc(synopsis, isTermSep)
	for i := range fields {
		fields[i] = strings.ToLower(fields[i])
	}

	// Ignore boilerplate in the following common patterns:
	//  Package foo ...
	//  Command foo ...
	//  Package foo implements ... (and provides, contains)
	//  The foo package ...
	//  The foo package implements ...
	//  The foo command ...

	checkPackageVerb := false
	switch {
	case len(fields) >= 1 && fields[0] == "package":
		fields = fields[1:]
		checkPackageVerb = true
	case len(fields) >= 1 && fields[0] == "command":
		fields = fields[1:]
	case len(fields) >= 3 && fields[0] == "the" && fields[2] == "package":
		fields[2] = fields[1]
		fields = fields[2:]
		checkPackageVerb = true
	case len(fields) >= 3 && fields[0] == "the" && fields[2] == "command":
		fields[2] = fields[1]
		fields = fields[2:]
	}

	if checkPackageVerb && len(fields) >= 2 &&
		(fields[1] == "implements" || fields[1] == "provides" || fields[1] == "contains") {
		fields[1] = fields[0]
		fields = fields[1:]
	}

	for _, s := range fields {
		if !stopWord[s] {
			terms[term(s)] = true
		}
	}
}

func termSlice(terms map[string]bool) []string {
	result := make([]string, 0, len(terms))
	for term := range terms {
		result = append(result, term)
	}
	return result
}

func documentTerms(pdoc *doc.Package, score float64) []string {

	terms := make(map[string]bool)

	// Project root

	projectRoot := normalizeProjectRoot(pdoc.ProjectRoot)
	terms["project:"+projectRoot] = true

	if strings.HasPrefix(pdoc.ImportPath, "golang.org/x/") {
		terms["project:subrepo"] = true
	}

	// Imports

	for _, path := range pdoc.Imports {
		if gosrc.IsValidPath(path) {
			terms["import:"+path] = true
		}
	}

	if score > 0 {

		for _, term := range parseQuery(pdoc.ImportPath) {
			terms[term] = true
		}
		if !isStandardPackage(pdoc.ImportPath) {
			terms["all:"] = true
			for _, term := range parseQuery(pdoc.ProjectName) {
				terms[term] = true
			}
			for _, term := range parseQuery(pdoc.Name) {
				terms[term] = true
			}
		}

		// Synopsis

		collectSynopsisTerms(terms, pdoc.Synopsis)

	}

	return termSlice(terms)
}

// vendorPat matches the path of a vendored package.
var vendorPat = regexp.MustCompile(
	// match directories used by tools to vendor packages.
	`/(?:_?third_party|vendors|Godeps/_workspace/src)/` +
		// match a domain name.
		`[^./]+\.[^/]+`)

func documentScore(pdoc *doc.Package) float64 {
	if pdoc.Name == "" ||
		pdoc.Status != gosrc.Active ||
		len(pdoc.Errors) > 0 ||
		strings.HasSuffix(pdoc.ImportPath, ".go") ||
		strings.HasPrefix(pdoc.ImportPath, "gist.github.com/") ||
		strings.HasSuffix(pdoc.ImportPath, "/internal") ||
		strings.Contains(pdoc.ImportPath, "/internal/") ||
		vendorPat.MatchString(pdoc.ImportPath) {
		return 0
	}

	for _, p := range pdoc.Imports {
		if strings.HasSuffix(p, ".go") {
			return 0
		}
	}

	r := 1.0
	if pdoc.IsCmd {
		if pdoc.Doc == "" {
			// Do not include command in index if it does not have documentation.
			return 0
		}
		if !importsGoPackages(pdoc) {
			// Penalize commands that don't use the "go/*" packages.
			r *= 0.9
		}
	} else {
		if !pdoc.Truncated &&
			len(pdoc.Consts) == 0 &&
			len(pdoc.Vars) == 0 &&
			len(pdoc.Funcs) == 0 &&
			len(pdoc.Types) == 0 &&
			len(pdoc.Examples) == 0 {
			// Do not include package in index if it does not have exports.
			return 0
		}
		if pdoc.Doc == "" {
			// Penalty for no documentation.
			r *= 0.95
		}
		if path.Base(pdoc.ImportPath) != pdoc.Name {
			// Penalty for last element of path != package name.
			r *= 0.9
		}
		for i := 0; i < strings.Count(pdoc.ImportPath[len(pdoc.ProjectRoot):], "/"); i++ {
			// Penalty for deeply nested packages.
			r *= 0.99
		}
		if strings.Index(pdoc.ImportPath[len(pdoc.ProjectRoot):], "/src/") > 0 {
			r *= 0.95
		}
		for _, p := range pdoc.Imports {
			if vendorPat.MatchString(p) {
				// Penalize packages that import vendored packages.
				r *= 0.1
				break
			}
		}
	}
	return r
}

func parseQuery(q string) []string {
	var terms []string
	q = strings.ToLower(q)
	for _, s := range strings.FieldsFunc(q, isTermSep) {
		if !stopWord[s] {
			terms = append(terms, term(s))
		}
	}
	return terms
}

func importsGoPackages(pdoc *doc.Package) bool {
	for _, m := range pdoc.Imports {
		if strings.HasPrefix(m, "go/") {
			return true
		}
	}
	return false
}