• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2020 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5package main
6
7import (
8	"context"
9	"errors"
10	"flag"
11	"fmt"
12	"time"
13
14	"cloud.google.com/go/datastore"
15
16	"go.skia.org/infra/autoroll/go/manual"
17	"go.skia.org/infra/go/auth"
18	"go.skia.org/infra/go/firestore"
19	"go.skia.org/infra/go/skerr"
20	"go.skia.org/infra/task_driver/go/lib/auth_steps"
21	"go.skia.org/infra/task_driver/go/lib/checkout"
22	"go.skia.org/infra/task_driver/go/td"
23)
24
25const (
26	MinWaitDuration = 4 * time.Minute
27)
28
29var (
30	canaryRollNotCreatedErr        = errors.New("Canary roll could not be created. Ask the Infra Gardener to investigate (or directly ping rmistry@) if this happens consistently.")
31	canaryRollSuccessTooQuicklyErr = fmt.Errorf("Canary roll returned success in less than %s. Failing canary due to skbug.com/10563.", MinWaitDuration)
32
33	// Lets add the roll link only once to step data.
34	addedRollLinkStepData = false
35)
36
37func main() {
38	var (
39		projectId = flag.String("project_id", "", "ID of the Google Cloud project.")
40		taskId    = flag.String("task_id", "", "ID of this task.")
41		taskName  = flag.String("task_name", "", "Name of the task.")
42		output    = flag.String("o", "", "If provided, dump a JSON blob of step data to the given file. Prints to stdout if '-' is given.")
43		local     = flag.Bool("local", true, "True if running locally (as opposed to on the bots)")
44
45		checkoutFlags = checkout.SetupFlags(nil)
46
47		rollerName = flag.String("roller_name", "", "The roller we will use to create the canary with.")
48	)
49	ctx := td.StartRun(projectId, taskId, taskName, output, local)
50	defer td.EndRun(ctx)
51	if *rollerName == "" {
52		td.Fatalf(ctx, "--roller_name must be specified")
53	}
54
55	rs, err := checkout.GetRepoState(checkoutFlags)
56	if err != nil {
57		td.Fatal(ctx, skerr.Wrap(err))
58	}
59	if rs.Issue == "" || rs.Patchset == "" {
60		td.Fatalf(ctx, "This task driver should be run only as a try bot")
61	}
62
63	// Create token source with scope for datastore access.
64	ts, err := auth_steps.Init(ctx, *local, auth.ScopeUserinfoEmail, datastore.ScopeDatastore)
65	if err != nil {
66		td.Fatal(ctx, skerr.Wrap(err))
67	}
68
69	// Add documentation link for canary rolls.
70	td.StepText(ctx, "Canary roll doc", "https://goto.google.com/autoroller-canary-bots")
71
72	// Instantiate firestore DB.
73	manualRollDB, err := manual.NewDBWithParams(ctx, firestore.FIRESTORE_PROJECT, "production", ts)
74	if err != nil {
75		td.Fatal(ctx, skerr.Wrap(err))
76	}
77
78	// Retry if canary roll could not be created or if canary roll returned
79	// success too quickly (skbug.com/10563).
80	retryAttempts := 3
81	for retry := 0; ; retry++ {
82		retryText := ""
83		if retry > 0 {
84			retryText = fmt.Sprintf(" (Retry #%d)", retry)
85		}
86
87		req := manual.ManualRollRequest{
88			Requester:  *rollerName,
89			RollerName: *rollerName,
90			Status:     manual.STATUS_PENDING,
91			Timestamp:  firestore.FixTimestamp(time.Now()),
92			Revision:   rs.GetPatchRef(),
93
94			DryRun:            true,
95			NoEmail:           true,
96			NoResolveRevision: true,
97			Canary:            true,
98		}
99		if err := td.Do(ctx, td.Props(fmt.Sprintf("Trigger canary roll%s", retryText)).Infra(), func(ctx context.Context) error {
100			return manualRollDB.Put(&req)
101		}); err != nil {
102			// Immediately fail for errors in triggering.
103			td.Fatal(ctx, skerr.Wrap(err))
104		}
105
106		if err := waitForCanaryRoll(ctx, manualRollDB, req.Id, fmt.Sprintf("Wait for canary roll%s", retryText)); err != nil {
107			// Retry these errors.
108			if err == canaryRollNotCreatedErr || err == canaryRollSuccessTooQuicklyErr {
109				if retry >= (retryAttempts - 1) {
110					td.Fatal(ctx, skerr.Wrapf(err, "failed inspite of 3 retries"))
111				}
112				time.Sleep(time.Minute)
113				continue
114			}
115			// Immediately fail for all other errors.
116			td.Fatal(ctx, skerr.Wrap(err))
117		} else {
118			// The canary roll was successful, break out of the
119			// retry loop.
120			break
121		}
122	}
123}
124
125func waitForCanaryRoll(parentCtx context.Context, manualRollDB manual.DB, rollId, stepName string) error {
126	ctx := td.StartStep(parentCtx, td.Props(stepName))
127	defer td.EndStep(ctx)
128	startTime := time.Now()
129
130	// For writing to the step's log stream.
131	stdout := td.NewLogStream(ctx, "stdout", td.SeverityInfo)
132	for {
133		roll, err := manualRollDB.Get(ctx, rollId)
134		if err != nil {
135			return td.FailStep(ctx, fmt.Errorf("Could not find canary roll with ID: %s", rollId))
136		}
137		cl := roll.Url
138		var rollStatus string
139		if cl == "" {
140			rollStatus = fmt.Sprintf("Canary roll has status %s", roll.Status)
141		} else {
142			if !addedRollLinkStepData {
143				// Add the roll link to both the current step and it's parent.
144				td.StepText(ctx, "Canary roll CL", cl)
145				td.StepText(parentCtx, "Canary roll CL", cl)
146				addedRollLinkStepData = true
147			}
148			rollStatus = fmt.Sprintf("Canary roll [ %s ] has status %s", roll.Url, roll.Status)
149		}
150		if _, err := stdout.Write([]byte(rollStatus)); err != nil {
151			return td.FailStep(ctx, fmt.Errorf("Could not write to stdout: %s", err))
152		}
153
154		if roll.Status == manual.STATUS_COMPLETE {
155			if roll.Result == manual.RESULT_SUCCESS {
156				// This is a hopefully temperory workaround for skbug.com/10563. Sometimes
157				// Canary-Chromium returns success immediately after creating a change and
158				// before the tryjobs have a chance to run. If we have waited
159				// for < MinWaitDuration then be cautious and assume failure.
160				if time.Now().Before(startTime.Add(MinWaitDuration)) {
161					return td.FailStep(ctx, canaryRollSuccessTooQuicklyErr)
162				}
163				return nil
164			} else if roll.Result == manual.RESULT_FAILURE {
165				if cl == "" {
166					return td.FailStep(ctx, canaryRollNotCreatedErr)
167				}
168				return td.FailStep(ctx, fmt.Errorf("Canary roll [ %s ] failed", cl))
169			} else if roll.Result == manual.RESULT_UNKNOWN {
170				return td.FailStep(ctx, fmt.Errorf("Canary roll [ %s ] completed with an unknown result", cl))
171			}
172		}
173		time.Sleep(30 * time.Second)
174	}
175}
176