diff options
| author | Felix Hanley <felix@userspace.com.au> | 2020-02-19 22:49:55 +0000 |
|---|---|---|
| committer | Felix Hanley <felix@userspace.com.au> | 2020-02-19 22:49:55 +0000 |
| commit | d17450c945cd859ee5839802a399dfb9f1e54bfa (patch) | |
| tree | c29320829d63d3715226f17c2c4dd14212bf3dcb /user_agent.go | |
| parent | c664ab56a4726690ed233a0d1a98aefd1d6a5ac9 (diff) | |
| download | sws-d17450c945cd859ee5839802a399dfb9f1e54bfa.tar.gz sws-d17450c945cd859ee5839802a399dfb9f1e54bfa.tar.bz2 | |
Outsource UA detection
Diffstat (limited to 'user_agent.go')
| -rw-r--r-- | user_agent.go | 78 |
1 files changed, 68 insertions, 10 deletions
diff --git a/user_agent.go b/user_agent.go index f65f45c..a259538 100644 --- a/user_agent.go +++ b/user_agent.go @@ -5,38 +5,96 @@ import ( "fmt" "net/http" "regexp" + "strings" "time" -) -var botRegex = regexp.MustCompile("(?i)(bot|crawler|sp(i|y)der|search|worm|fetch|nutch)") -var botFromSiteRegexp = regexp.MustCompile("http[s]?://.+\\.\\w+") + detector "github.com/mssola/user_agent" +) +// UserAgent of a hit. type UserAgent struct { Hash string `json:"hash"` Name string `json:"name"` LastSeenAt time.Time `json:"last_seen_at" db:"last_seen_at"` + Count int + + ua *detector.UserAgent } -func (ua UserAgent) Bot() bool { - // TODO a little naive ATM - return botRegex.MatchString(ua.Name) || botFromSiteRegexp.MatchString(ua.Name) +var ( + reBotWord, reBotSite *regexp.Regexp +) + +type browserMatcher func(string) (string, bool) + +func init() { + reBotWord = regexp.MustCompile("(?i)(bot|crawler|sp(i|y)der|search|worm|fetch|nutch)") + reBotSite = regexp.MustCompile("http[s]?://.+\\.\\w+") } +// UserAgentHash is the UA key. func UserAgentHash(s string) string { return fmt.Sprintf("%x", sha1.Sum([]byte(s))) } +// UserAgentFromRequest extracts a UA from a request. func UserAgentFromRequest(r *http.Request) (*UserAgent, error) { q := r.URL.Query() - agent := q.Get("u") - if agent == "" { - return nil, nil + ua := q.Get("u") + if ua == "" { + ua = r.UserAgent() } - ua := r.UserAgent() return &UserAgent{ Name: ua, LastSeenAt: time.Now(), Hash: UserAgentHash(ua), + ua: detector.New(ua), }, nil } + +// UserAgentsFromHits collects the browsers from provided hits. +func UserAgentsFromHits(hits []*Hit) map[string]*UserAgent { + out := make(map[string]*UserAgent) + for _, h := range hits { + if h.UserAgentHash != nil { + b, ok := out[*h.UserAgentHash] + if !ok { + b = &UserAgent{ + Name: h.UserAgent.Name, + LastSeenAt: h.CreatedAt, + ua: detector.New(h.UserAgent.Name), + } + } + if b.LastSeenAt.Before(h.CreatedAt) { + b.LastSeenAt = h.CreatedAt + } + b.Count++ + out[*h.UserAgentHash] = b + } + } + return out +} + +func (ua UserAgent) IsBot() bool { + return ua.ua.Bot() +} + +func (ua UserAgent) IsMobile() bool { + //return ua.ua.Mobile() + return strings.Contains(ua.Name, "Mobi") +} + +func (ua UserAgent) Platform() string { + return ua.ua.Platform() +} + +func (ua UserAgent) Browser() string { + n, _ := ua.ua.Browser() + return n +} + +func (ua UserAgent) BrowserVersion() string { + _, v := ua.ua.Browser() + return v +} |
