From 48048ddd55478a45ef1603f3cc8332df242e13ed Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Wed, 4 Dec 2024 20:48:48 +0100 Subject: [PATCH] Postpone directory creation until upload start This ensure that "bundle validate" does not create remote directory. Extend empty_bundle_test.go to check if remote path is created prematurely --- internal/bundle/empty_bundle_test.go | 24 +++++++++++++++-- internal/sync_test.go | 19 +++++++++----- libs/sync/path.go | 39 ++++++++++++++++------------ libs/sync/sync.go | 14 +++++++++- 4 files changed, 70 insertions(+), 26 deletions(-) diff --git a/internal/bundle/empty_bundle_test.go b/internal/bundle/empty_bundle_test.go index 36883ae001..e6d3f97418 100644 --- a/internal/bundle/empty_bundle_test.go +++ b/internal/bundle/empty_bundle_test.go @@ -8,11 +8,17 @@ import ( "github.com/databricks/cli/internal/acc" "github.com/google/uuid" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func TestAccEmptyBundleDeploy(t *testing.T) { - ctx, _ := acc.WorkspaceTest(t) + ctx, w := acc.WorkspaceTest(t) + + uniqueId := uuid.New().String() + me, err := w.W.CurrentUser.Me(ctx) + require.NoError(t, err) + remoteRoot := fmt.Sprintf("/Workspace/Users/%s/.bundle/%s", me.UserName, uniqueId) // create empty bundle tmpDir := t.TempDir() @@ -20,11 +26,20 @@ func TestAccEmptyBundleDeploy(t *testing.T) { require.NoError(t, err) bundleRoot := fmt.Sprintf(`bundle: - name: %s`, uuid.New().String()) + name: %s`, uniqueId) _, err = f.WriteString(bundleRoot) require.NoError(t, err) f.Close() + _, err = w.W.Workspace.GetStatusByPath(ctx, remoteRoot) + assert.ErrorContains(t, err, "doesn't exist") + + mustValidateBundle(t, ctx, tmpDir) + + // regression: "bundle validate" must not create a directory + _, err = w.W.Workspace.GetStatusByPath(ctx, remoteRoot) + require.ErrorContains(t, err, "doesn't exist") + // deploy empty bundle err = deployBundle(t, ctx, tmpDir) require.NoError(t, err) @@ -33,4 +48,9 @@ func TestAccEmptyBundleDeploy(t *testing.T) { err = destroyBundle(t, ctx, tmpDir) require.NoError(t, err) }) + + // verify that remoteRoot was actually relevant location to test + _, err = w.W.Workspace.GetStatusByPath(ctx, remoteRoot) + assert.NoError(t, err) + } diff --git a/internal/sync_test.go b/internal/sync_test.go index 6f8b1827be..aed6ba4cb3 100644 --- a/internal/sync_test.go +++ b/internal/sync_test.go @@ -509,13 +509,15 @@ func TestAccSyncEnsureRemotePathIsUsableIfRepoDoesntExist(t *testing.T) { // Hypothetical repo path doesn't exist. nonExistingRepoPath := fmt.Sprintf("/Repos/%s/%s", me.UserName, RandomName("doesnt-exist-")) - err = sync.EnsureRemotePathIsUsable(ctx, wsc, nonExistingRepoPath, nil) + remoteExists, err := sync.EnsureRemotePathIsUsable(ctx, wsc, nonExistingRepoPath, nil) assert.ErrorContains(t, err, " does not exist; please create it first") + assert.False(t, remoteExists) // Paths nested under a hypothetical repo path should yield the same error. nestedPath := path.Join(nonExistingRepoPath, "nested/directory") - err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath, nil) + remoteExists, err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath, nil) assert.ErrorContains(t, err, " does not exist; please create it first") + assert.False(t, remoteExists) } func TestAccSyncEnsureRemotePathIsUsableIfRepoExists(t *testing.T) { @@ -526,13 +528,15 @@ func TestAccSyncEnsureRemotePathIsUsableIfRepoExists(t *testing.T) { _, remoteRepoPath := setupRepo(t, wsc, ctx) // Repo itself is usable. - err := sync.EnsureRemotePathIsUsable(ctx, wsc, remoteRepoPath, nil) + remoteExists, err := sync.EnsureRemotePathIsUsable(ctx, wsc, remoteRepoPath, nil) assert.NoError(t, err) + assert.True(t, remoteExists) // Path nested under repo path is usable. nestedPath := path.Join(remoteRepoPath, "nested/directory") - err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath, nil) + remoteExists, err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath, nil) assert.NoError(t, err) + assert.False(t, remoteExists) // Verify that the directory has been created. info, err := wsc.Workspace.GetStatusByPath(ctx, nestedPath) @@ -549,8 +553,9 @@ func TestAccSyncEnsureRemotePathIsUsableInWorkspace(t *testing.T) { require.NoError(t, err) remotePath := fmt.Sprintf("/Users/%s/%s", me.UserName, RandomName("ensure-path-exists-test-")) - err = sync.EnsureRemotePathIsUsable(ctx, wsc, remotePath, me) + remoteExists, err := sync.EnsureRemotePathIsUsable(ctx, wsc, remotePath, me) assert.NoError(t, err) + assert.False(t, remoteExists) // Clean up directory after test. defer func() { @@ -560,8 +565,8 @@ func TestAccSyncEnsureRemotePathIsUsableInWorkspace(t *testing.T) { assert.NoError(t, err) }() - // Verify that the directory has been created. + // Verify that the directory has not been created. info, err := wsc.Workspace.GetStatusByPath(ctx, remotePath) - require.NoError(t, err) + require.ErrorContains(t, err, "not exist") require.Equal(t, workspace.ObjectTypeDirectory, info.ObjectType) } diff --git a/libs/sync/path.go b/libs/sync/path.go index 97a9089652..92d08cd54a 100644 --- a/libs/sync/path.go +++ b/libs/sync/path.go @@ -24,7 +24,8 @@ func repoPathForPath(me *iam.User, remotePath string) string { // EnsureRemotePathIsUsable checks if the specified path is nested under // expected base paths and if it is a directory or repository. -func EnsureRemotePathIsUsable(ctx context.Context, wsc *databricks.WorkspaceClient, remotePath string, me *iam.User) error { +// Returns (doesRemoteExist, error) +func EnsureRemotePathIsUsable(ctx context.Context, wsc *databricks.WorkspaceClient, remotePath string, me *iam.User) (bool, error) { var err error // TODO: we should cache CurrentUser.Me at the SDK level @@ -32,7 +33,7 @@ func EnsureRemotePathIsUsable(ctx context.Context, wsc *databricks.WorkspaceClie if me == nil { me, err = wsc.CurrentUser.Me(ctx) if err != nil { - return err + return false, err } } @@ -43,7 +44,7 @@ func EnsureRemotePathIsUsable(ctx context.Context, wsc *databricks.WorkspaceClie if err != nil { // We only deal with 404s below. if !apierr.IsMissing(err) { - return err + return false, err } // If the path is nested under a repo, the repo has to exist. @@ -51,19 +52,12 @@ func EnsureRemotePathIsUsable(ctx context.Context, wsc *databricks.WorkspaceClie repoPath := repoPathForPath(me, remotePath) _, err = wsc.Workspace.GetStatusByPath(ctx, repoPath) if err != nil && apierr.IsMissing(err) { - return fmt.Errorf("%s does not exist; please create it first", repoPath) + return false, fmt.Errorf("%s does not exist; please create it first", repoPath) } } - // The workspace path doesn't exist. Create it and try again. - err = wsc.Workspace.MkdirsByPath(ctx, remotePath) - if err != nil { - return fmt.Errorf("unable to create directory at %s: %w", remotePath, err) - } - info, err = wsc.Workspace.GetStatusByPath(ctx, remotePath) - if err != nil { - return err - } + return false, nil + } log.Debugf( @@ -77,10 +71,23 @@ func EnsureRemotePathIsUsable(ctx context.Context, wsc *databricks.WorkspaceClie // We expect the object at path to be a directory or a repo. switch info.ObjectType { case workspace.ObjectTypeDirectory: - return nil + return true, nil case workspace.ObjectTypeRepo: - return nil + return true, nil } - return fmt.Errorf("%s points to a %s", remotePath, strings.ToLower(info.ObjectType.String())) + return true, fmt.Errorf("%s points to a %s", remotePath, strings.ToLower(info.ObjectType.String())) +} + +func createRemotePath(ctx context.Context, wsc *databricks.WorkspaceClient, remotePath string) error { + // The workspace path doesn't exist. Create it and try again. + err := wsc.Workspace.MkdirsByPath(ctx, remotePath) + if err != nil { + return fmt.Errorf("unable to create directory at %s: %w", remotePath, err) + } + _, err = wsc.Workspace.GetStatusByPath(ctx, remotePath) + if err != nil { + return err + } + return nil } diff --git a/libs/sync/sync.go b/libs/sync/sync.go index 6bd26f2241..54978e3af7 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -59,6 +59,9 @@ type Sync struct { // WaitGroup is automatically created when an output handler is provided in the SyncOptions. // Close call is required to ensure the output handler goroutine handles all events in time. outputWaitGroup *stdsync.WaitGroup + + // If this flag is not set, we'll create remote directory before starting upload + remoteExists bool } // New initializes and returns a new [Sync] instance. @@ -84,7 +87,7 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) { } // Verify that the remote path we're about to synchronize to is valid and allowed. - err = EnsureRemotePathIsUsable(ctx, opts.WorkspaceClient, opts.RemotePath, opts.CurrentUser) + remoteExists, err := EnsureRemotePathIsUsable(ctx, opts.WorkspaceClient, opts.RemotePath, opts.CurrentUser) if err != nil { return nil, err } @@ -141,6 +144,7 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) { notifier: notifier, outputWaitGroup: outputWaitGroup, seq: 0, + remoteExists: remoteExists, }, nil } @@ -180,6 +184,14 @@ func (s *Sync) notifyComplete(ctx context.Context, d diff) { // Returns the list of files tracked (and synchronized) by the syncer during the run, // and an error if any occurred. func (s *Sync) RunOnce(ctx context.Context) ([]fileset.File, error) { + if !s.remoteExists { + err := createRemotePath(ctx, s.WorkspaceClient, s.RemotePath) + if err != nil { + return nil, err + } + s.remoteExists = true + } + files, err := s.GetFileList(ctx) if err != nil { return files, err