1
0
mirror of https://github.com/dutchcoders/transfer.sh.git synced 2020-11-18 19:53:40 -08:00
2019-03-17 20:19:56 +01:00

764 lines
20 KiB
Go

// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package har collects HTTP requests and responses and stores them in HAR format.
//
// For more information on HAR, see:
// https://w3c.github.io/web-performance/specs/HAR/Overview.html
package har
import (
"bytes"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"mime"
"mime/multipart"
"net/http"
"net/url"
"strings"
"sync"
"time"
"unicode/utf8"
"github.com/google/martian"
"github.com/google/martian/log"
"github.com/google/martian/messageview"
"github.com/google/martian/proxyutil"
)
// Logger maintains request and response log entries.
type Logger struct {
bodyLogging func(*http.Response) bool
postDataLogging func(*http.Request) bool
creator *Creator
mu sync.Mutex
entries map[string]*Entry
tail *Entry
}
// HAR is the top level object of a HAR log.
type HAR struct {
Log *Log `json:"log"`
}
// Log is the HAR HTTP request and response log.
type Log struct {
// Version number of the HAR format.
Version string `json:"version"`
// Creator holds information about the log creator application.
Creator *Creator `json:"creator"`
// Entries is a list containing requests and responses.
Entries []*Entry `json:"entries"`
}
// Creator is the program responsible for generating the log. Martian, in this case.
type Creator struct {
// Name of the log creator application.
Name string `json:"name"`
// Version of the log creator application.
Version string `json:"version"`
}
// Entry is a individual log entry for a request or response.
type Entry struct {
// ID is the unique ID for the entry.
ID string `json:"_id"`
// StartedDateTime is the date and time stamp of the request start (ISO 8601).
StartedDateTime time.Time `json:"startedDateTime"`
// Time is the total elapsed time of the request in milliseconds.
Time int64 `json:"time"`
// Request contains the detailed information about the request.
Request *Request `json:"request"`
// Response contains the detailed information about the response.
Response *Response `json:"response,omitempty"`
// Cache contains information about a request coming from browser cache.
Cache *Cache `json:"cache"`
// Timings describes various phases within request-response round trip. All
// times are specified in milliseconds.
Timings *Timings `json:"timings"`
next *Entry
}
// Request holds data about an individual HTTP request.
type Request struct {
// Method is the request method (GET, POST, ...).
Method string `json:"method"`
// URL is the absolute URL of the request (fragments are not included).
URL string `json:"url"`
// HTTPVersion is the Request HTTP version (HTTP/1.1).
HTTPVersion string `json:"httpVersion"`
// Cookies is a list of cookies.
Cookies []Cookie `json:"cookies"`
// Headers is a list of headers.
Headers []Header `json:"headers"`
// QueryString is a list of query parameters.
QueryString []QueryString `json:"queryString"`
// PostData is the posted data information.
PostData *PostData `json:"postData,omitempty"`
// HeaderSize is the Total number of bytes from the start of the HTTP request
// message until (and including) the double CLRF before the body. Set to -1
// if the info is not available.
HeadersSize int64 `json:"headersSize"`
// BodySize is the size of the request body (POST data payload) in bytes. Set
// to -1 if the info is not available.
BodySize int64 `json:"bodySize"`
}
// Response holds data about an individual HTTP response.
type Response struct {
// Status is the response status code.
Status int `json:"status"`
// StatusText is the response status description.
StatusText string `json:"statusText"`
// HTTPVersion is the Response HTTP version (HTTP/1.1).
HTTPVersion string `json:"httpVersion"`
// Cookies is a list of cookies.
Cookies []Cookie `json:"cookies"`
// Headers is a list of headers.
Headers []Header `json:"headers"`
// Content contains the details of the response body.
Content *Content `json:"content"`
// RedirectURL is the target URL from the Location response header.
RedirectURL string `json:"redirectURL"`
// HeadersSize is the total number of bytes from the start of the HTTP
// request message until (and including) the double CLRF before the body.
// Set to -1 if the info is not available.
HeadersSize int64 `json:"headersSize"`
// BodySize is the size of the request body (POST data payload) in bytes. Set
// to -1 if the info is not available.
BodySize int64 `json:"bodySize"`
}
// Cache contains information about a request coming from browser cache.
type Cache struct {
// Has no fields as they are not supported, but HAR requires the "cache"
// object to exist.
}
// Timings describes various phases within request-response round trip. All
// times are specified in milliseconds
type Timings struct {
// Send is the time required to send HTTP request to the server.
Send int64 `json:"send"`
// Wait is the time spent waiting for a response from the server.
Wait int64 `json:"wait"`
// Receive is the time required to read entire response from server or cache.
Receive int64 `json:"receive"`
}
// Cookie is the data about a cookie on a request or response.
type Cookie struct {
// Name is the cookie name.
Name string `json:"name"`
// Value is the cookie value.
Value string `json:"value"`
// Path is the path pertaining to the cookie.
Path string `json:"path,omitempty"`
// Domain is the host of the cookie.
Domain string `json:"domain,omitempty"`
// Expires contains cookie expiration time.
Expires time.Time `json:"-"`
// Expires8601 contains cookie expiration time in ISO 8601 format.
Expires8601 string `json:"expires,omitempty"`
// HTTPOnly is set to true if the cookie is HTTP only, false otherwise.
HTTPOnly bool `json:"httpOnly,omitempty"`
// Secure is set to true if the cookie was transmitted over SSL, false
// otherwise.
Secure bool `json:"secure,omitempty"`
}
// Header is an HTTP request or response header.
type Header struct {
// Name is the header name.
Name string `json:"name"`
// Value is the header value.
Value string `json:"value"`
}
// QueryString is a query string parameter on a request.
type QueryString struct {
// Name is the query parameter name.
Name string `json:"name"`
// Value is the query parameter value.
Value string `json:"value"`
}
// PostData describes posted data on a request.
type PostData struct {
// MimeType is the MIME type of the posted data.
MimeType string `json:"mimeType"`
// Params is a list of posted parameters (in case of URL encoded parameters).
Params []Param `json:"params"`
// Text contains the posted data. Although its type is string, it may contain
// binary data.
Text string `json:"text"`
}
// pdBinary is the JSON representation of binary PostData.
type pdBinary struct {
MimeType string `json:"mimeType"`
// Params is a list of posted parameters (in case of URL encoded parameters).
Params []Param `json:"params"`
Text []byte `json:"text"`
Encoding string `json:"encoding"`
}
// MarshalJSON returns a JSON representation of binary PostData.
func (p *PostData) MarshalJSON() ([]byte, error) {
if utf8.ValidString(p.Text) {
type noMethod PostData // avoid infinite recursion
return json.Marshal((*noMethod)(p))
}
return json.Marshal(pdBinary{
MimeType: p.MimeType,
Params: p.Params,
Text: []byte(p.Text),
Encoding: "base64",
})
}
// UnmarshalJSON populates PostData based on the []byte representation of
// the binary PostData.
func (p *PostData) UnmarshalJSON(data []byte) error {
if bytes.Equal(data, []byte("null")) { // conform to json.Unmarshaler spec
return nil
}
var enc struct {
Encoding string `json:"encoding"`
}
if err := json.Unmarshal(data, &enc); err != nil {
return err
}
if enc.Encoding != "base64" {
type noMethod PostData // avoid infinite recursion
return json.Unmarshal(data, (*noMethod)(p))
}
var pb pdBinary
if err := json.Unmarshal(data, &pb); err != nil {
return err
}
p.MimeType = pb.MimeType
p.Params = pb.Params
p.Text = string(pb.Text)
return nil
}
// Param describes an individual posted parameter.
type Param struct {
// Name of the posted parameter.
Name string `json:"name"`
// Value of the posted parameter.
Value string `json:"value,omitempty"`
// Filename of a posted file.
Filename string `json:"fileName,omitempty"`
// ContentType is the content type of a posted file.
ContentType string `json:"contentType,omitempty"`
}
// Content describes details about response content.
type Content struct {
// Size is the length of the returned content in bytes. Should be equal to
// response.bodySize if there is no compression and bigger when the content
// has been compressed.
Size int64 `json:"size"`
// MimeType is the MIME type of the response text (value of the Content-Type
// response header).
MimeType string `json:"mimeType"`
// Text contains the response body sent from the server or loaded from the
// browser cache. This field is populated with textual content only. The text
// field is either HTTP decoded text or a encoded (e.g. "base64")
// representation of the response body. Leave out this field if the
// information is not available.
Text []byte `json:"text,omitempty"`
// Encoding used for response text field e.g "base64". Leave out this field
// if the text field is HTTP decoded (decompressed & unchunked), than
// trans-coded from its original character set into UTF-8.
Encoding string `json:"encoding,omitempty"`
}
// Option is a configurable setting for the logger.
type Option func(l *Logger)
// PostDataLogging returns an option that configures request post data logging.
func PostDataLogging(enabled bool) Option {
return func(l *Logger) {
l.postDataLogging = func(*http.Request) bool {
return enabled
}
}
}
// PostDataLoggingForContentTypes returns an option that logs request bodies based
// on opting in to the Content-Type of the request.
func PostDataLoggingForContentTypes(cts ...string) Option {
return func(l *Logger) {
l.postDataLogging = func(req *http.Request) bool {
rct := req.Header.Get("Content-Type")
for _, ct := range cts {
if strings.HasPrefix(strings.ToLower(rct), strings.ToLower(ct)) {
return true
}
}
return false
}
}
}
// SkipPostDataLoggingForContentTypes returns an option that logs request bodies based
// on opting out of the Content-Type of the request.
func SkipPostDataLoggingForContentTypes(cts ...string) Option {
return func(l *Logger) {
l.postDataLogging = func(req *http.Request) bool {
rct := req.Header.Get("Content-Type")
for _, ct := range cts {
if strings.HasPrefix(strings.ToLower(rct), strings.ToLower(ct)) {
return false
}
}
return true
}
}
}
// BodyLogging returns an option that configures response body logging.
func BodyLogging(enabled bool) Option {
return func(l *Logger) {
l.bodyLogging = func(*http.Response) bool {
return enabled
}
}
}
// BodyLoggingForContentTypes returns an option that logs response bodies based
// on opting in to the Content-Type of the response.
func BodyLoggingForContentTypes(cts ...string) Option {
return func(l *Logger) {
l.bodyLogging = func(res *http.Response) bool {
rct := res.Header.Get("Content-Type")
for _, ct := range cts {
if strings.HasPrefix(strings.ToLower(rct), strings.ToLower(ct)) {
return true
}
}
return false
}
}
}
// SkipBodyLoggingForContentTypes returns an option that logs response bodies based
// on opting out of the Content-Type of the response.
func SkipBodyLoggingForContentTypes(cts ...string) Option {
return func(l *Logger) {
l.bodyLogging = func(res *http.Response) bool {
rct := res.Header.Get("Content-Type")
for _, ct := range cts {
if strings.HasPrefix(strings.ToLower(rct), strings.ToLower(ct)) {
return false
}
}
return true
}
}
}
// NewLogger returns a HAR logger. The returned
// logger logs all request post data and response bodies by default.
func NewLogger() *Logger {
l := &Logger{
creator: &Creator{
Name: "martian proxy",
Version: "2.0.0",
},
entries: make(map[string]*Entry),
}
l.SetOption(BodyLogging(true))
l.SetOption(PostDataLogging(true))
return l
}
// SetOption sets configurable options on the logger.
func (l *Logger) SetOption(opts ...Option) {
for _, opt := range opts {
opt(l)
}
}
// ModifyRequest logs requests.
func (l *Logger) ModifyRequest(req *http.Request) error {
ctx := martian.NewContext(req)
if ctx.SkippingLogging() {
return nil
}
id := ctx.ID()
return l.RecordRequest(id, req)
}
// RecordRequest logs the HTTP request with the given ID. The ID should be unique
// per request/response pair.
func (l *Logger) RecordRequest(id string, req *http.Request) error {
hreq, err := NewRequest(req, l.postDataLogging(req))
if err != nil {
return err
}
entry := &Entry{
ID: id,
StartedDateTime: time.Now().UTC(),
Request: hreq,
Cache: &Cache{},
Timings: &Timings{},
}
l.mu.Lock()
defer l.mu.Unlock()
if _, exists := l.entries[id]; exists {
return fmt.Errorf("Duplicate request ID: %s", id)
}
l.entries[id] = entry
if l.tail == nil {
l.tail = entry
}
entry.next = l.tail.next
l.tail.next = entry
l.tail = entry
return nil
}
// NewRequest constructs and returns a Request from req. If withBody is true,
// req.Body is read to EOF and replaced with a copy in a bytes.Buffer. An error
// is returned (and req.Body may be in an intermediate state) if an error is
// returned from req.Body.Read.
func NewRequest(req *http.Request, withBody bool) (*Request, error) {
r := &Request{
Method: req.Method,
URL: req.URL.String(),
HTTPVersion: req.Proto,
HeadersSize: -1,
BodySize: req.ContentLength,
QueryString: []QueryString{},
Headers: headers(proxyutil.RequestHeader(req).Map()),
Cookies: cookies(req.Cookies()),
}
for n, vs := range req.URL.Query() {
for _, v := range vs {
r.QueryString = append(r.QueryString, QueryString{
Name: n,
Value: v,
})
}
}
pd, err := postData(req, withBody)
if err != nil {
return nil, err
}
r.PostData = pd
return r, nil
}
// ModifyResponse logs responses.
func (l *Logger) ModifyResponse(res *http.Response) error {
ctx := martian.NewContext(res.Request)
if ctx.SkippingLogging() {
return nil
}
id := ctx.ID()
return l.RecordResponse(id, res)
}
// RecordResponse logs an HTTP response, associating it with the previously-logged
// HTTP request with the same ID.
func (l *Logger) RecordResponse(id string, res *http.Response) error {
hres, err := NewResponse(res, l.bodyLogging(res))
if err != nil {
return err
}
l.mu.Lock()
defer l.mu.Unlock()
if e, ok := l.entries[id]; ok {
e.Response = hres
e.Time = time.Since(e.StartedDateTime).Nanoseconds() / 1000000
}
return nil
}
// NewResponse constructs and returns a Response from resp. If withBody is true,
// resp.Body is read to EOF and replaced with a copy in a bytes.Buffer. An error
// is returned (and resp.Body may be in an intermediate state) if an error is
// returned from resp.Body.Read.
func NewResponse(res *http.Response, withBody bool) (*Response, error) {
r := &Response{
HTTPVersion: res.Proto,
Status: res.StatusCode,
StatusText: http.StatusText(res.StatusCode),
HeadersSize: -1,
BodySize: res.ContentLength,
Headers: headers(proxyutil.ResponseHeader(res).Map()),
Cookies: cookies(res.Cookies()),
}
if res.StatusCode >= 300 && res.StatusCode < 400 {
r.RedirectURL = res.Header.Get("Location")
}
r.Content = &Content{
Encoding: "base64",
MimeType: res.Header.Get("Content-Type"),
}
if withBody {
mv := messageview.New()
if err := mv.SnapshotResponse(res); err != nil {
return nil, err
}
br, err := mv.BodyReader(messageview.Decode())
if err != nil {
return nil, err
}
body, err := ioutil.ReadAll(br)
if err != nil {
return nil, err
}
r.Content.Text = body
r.Content.Size = int64(len(body))
}
return r, nil
}
// Export returns the in-memory log.
func (l *Logger) Export() *HAR {
l.mu.Lock()
defer l.mu.Unlock()
es := make([]*Entry, 0, len(l.entries))
curr := l.tail
for curr != nil {
curr = curr.next
es = append(es, curr)
if curr == l.tail {
break
}
}
return l.makeHAR(es)
}
// ExportAndReset returns the in-memory log for completed requests, clearing them.
func (l *Logger) ExportAndReset() *HAR {
l.mu.Lock()
defer l.mu.Unlock()
es := make([]*Entry, 0, len(l.entries))
curr := l.tail
prev := l.tail
var first *Entry
for curr != nil {
curr = curr.next
if curr.Response != nil {
es = append(es, curr)
delete(l.entries, curr.ID)
} else {
if first == nil {
first = curr
}
prev.next = curr
prev = curr
}
if curr == l.tail {
break
}
}
if len(l.entries) == 0 {
l.tail = nil
} else {
l.tail = prev
l.tail.next = first
}
return l.makeHAR(es)
}
func (l *Logger) makeHAR(es []*Entry) *HAR {
return &HAR{
Log: &Log{
Version: "1.2",
Creator: l.creator,
Entries: es,
},
}
}
// Reset clears the in-memory log of entries.
func (l *Logger) Reset() {
l.mu.Lock()
defer l.mu.Unlock()
l.entries = make(map[string]*Entry)
l.tail = nil
}
func cookies(cs []*http.Cookie) []Cookie {
hcs := make([]Cookie, 0, len(cs))
for _, c := range cs {
var expires string
if !c.Expires.IsZero() {
expires = c.Expires.Format(time.RFC3339)
}
hcs = append(hcs, Cookie{
Name: c.Name,
Value: c.Value,
Path: c.Path,
Domain: c.Domain,
HTTPOnly: c.HttpOnly,
Secure: c.Secure,
Expires: c.Expires,
Expires8601: expires,
})
}
return hcs
}
func headers(hs http.Header) []Header {
hhs := make([]Header, 0, len(hs))
for n, vs := range hs {
for _, v := range vs {
hhs = append(hhs, Header{
Name: n,
Value: v,
})
}
}
return hhs
}
func postData(req *http.Request, logBody bool) (*PostData, error) {
// If the request has no body (no Content-Length and Transfer-Encoding isn't
// chunked), skip the post data.
if req.ContentLength <= 0 && len(req.TransferEncoding) == 0 {
return nil, nil
}
ct := req.Header.Get("Content-Type")
mt, ps, err := mime.ParseMediaType(ct)
if err != nil {
log.Errorf("har: cannot parse Content-Type header %q: %v", ct, err)
mt = ct
}
pd := &PostData{
MimeType: mt,
Params: []Param{},
}
if !logBody {
return pd, nil
}
mv := messageview.New()
if err := mv.SnapshotRequest(req); err != nil {
return nil, err
}
br, err := mv.BodyReader()
if err != nil {
return nil, err
}
switch mt {
case "multipart/form-data":
mpr := multipart.NewReader(br, ps["boundary"])
for {
p, err := mpr.NextPart()
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
defer p.Close()
body, err := ioutil.ReadAll(p)
if err != nil {
return nil, err
}
pd.Params = append(pd.Params, Param{
Name: p.FormName(),
Filename: p.FileName(),
ContentType: p.Header.Get("Content-Type"),
Value: string(body),
})
}
case "application/x-www-form-urlencoded":
body, err := ioutil.ReadAll(br)
if err != nil {
return nil, err
}
vs, err := url.ParseQuery(string(body))
if err != nil {
return nil, err
}
for n, vs := range vs {
for _, v := range vs {
pd.Params = append(pd.Params, Param{
Name: n,
Value: v,
})
}
}
default:
body, err := ioutil.ReadAll(br)
if err != nil {
return nil, err
}
pd.Text = string(body)
}
return pd, nil
}