diff --git a/.github/workflows/blob-publish.yml b/.github/workflows/blob-publish.yml new file mode 100644 index 000000000..a19e395cc --- /dev/null +++ b/.github/workflows/blob-publish.yml @@ -0,0 +1,70 @@ +name: Blob - Version and Release + +on: + workflow_dispatch: + inputs: + newversion: + type: choice + description: "Semantic Version Bump Type" + default: patch + options: + - patch + - minor + - major + +concurrency: + group: "push-to-main" + +defaults: + run: + working-directory: packages/blob + +jobs: + version_and_release: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + # Needed to push the tag and the commit on the main branch, otherwise we get: + # > Run git push --follow-tags + # remote: error: GH006: Protected branch update failed for refs/heads/main. + # remote: error: Changes must be made through a pull request. Required status check "lint" is expected. + token: ${{ secrets.BOT_ACCESS_TOKEN }} + - run: corepack enable + - uses: actions/setup-node@v3 + with: + node-version: "20" + cache: "pnpm" + cache-dependency-path: | + packages/blob/pnpm-lock.yaml + # setting a registry enables the NODE_AUTH_TOKEN env variable where we can set an npm token. REQUIRED + registry-url: "https://registry.npmjs.org" + - run: pnpm install + - run: git config --global user.name machineuser + - run: git config --global user.email infra+machineuser@huggingface.co + - run: | + PACKAGE_VERSION=$(node -p "require('./package.json').version") + BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')") + # Update package.json with the new version + node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" + git commit . -m "🔖 @huggingface/blob $BUMPED_VERSION" + git tag "blob-v$BUMPED_VERSION" + + - run: pnpm publish --no-git-checks . + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + - run: git pull --rebase && git push --follow-tags + # hack - reuse actions/setup-node@v3 just to set a new registry + - uses: actions/setup-node@v3 + with: + node-version: "20" + registry-url: "https://npm.pkg.github.com" + # Disable for now, until github supports PATs for writing github packages (https://github.com/github/roadmap/issues/558) + # - run: pnpm publish --no-git-checks . + # env: + # NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: "Update Doc" + uses: peter-evans/repository-dispatch@v2 + with: + event-type: doc-build + token: ${{ secrets.BOT_ACCESS_TOKEN }} diff --git a/packages/blob/README.md b/packages/blob/README.md index 3d87c15dc..9f4ade718 100644 --- a/packages/blob/README.md +++ b/packages/blob/README.md @@ -1,176 +1,86 @@ -# 🤗 Hugging Face Hub API +# 🤗 Hugging Face Blobs -Official utilities to use the Hugging Face Hub API. +Utilities to convert a string or URL to a [Blob](https://developer.mozilla.org/en-US/docs/Web/API/Blob) object, whether it represents a local file or remote URL. + +`fetch` already returns a `Blob` object for remote URLs, but it loads the entire file in memory. This utility makes ad-hoc http range requests when calling `.slice()` on the blob, for example. ## Install ```console -pnpm add @huggingface/hub +pnpm add @huggingface/blob -npm add @huggingface/hub +npm add @huggingface/blob -yarn add @huggingface/hub +yarn add @huggingface/blob ``` ### Deno ```ts // esm.sh -import { uploadFiles, listModels } from "https://esm.sh/@huggingface/hub" +import { FileBlob, WebBlob } from "https://esm.sh/@huggingface/blob"; // or npm: -import { uploadFiles, listModels } from "npm:@huggingface/hub" +import { FileBlob, WebBlob } from "npm:@huggingface/blob"; ``` -Check out the [full documentation](https://huggingface.co/docs/huggingface.js/hub/README). - ## Usage -For some of the calls, you need to create an account and generate an [access token](https://huggingface.co/settings/tokens). - -Learn how to find free models using the hub package in this [interactive tutorial](https://scrimba.com/scrim/c7BbVPcd?pl=pkVnrP7uP). ```ts -import * as hub from "@huggingface/hub"; -import type { RepoDesignation } from "@huggingface/hub"; - -const repo: RepoDesignation = { type: "model", name: "myname/some-model" }; - -const {name: username} = await hub.whoAmI({accessToken: "hf_..."}); - -for await (const model of hub.listModels({search: {owner: username}, accessToken: "hf_..."})) { - console.log("My model:", model); -} - -const specificModel = await hub.modelInfo({name: "openai-community/gpt2"}); -await hub.checkRepoAccess({repo, accessToken: "hf_..."}); - -await hub.createRepo({ repo, accessToken: "hf_...", license: "mit" }); - -await hub.uploadFiles({ - repo, - accessToken: "hf_...", - files: [ - // path + blob content - { - path: "file.txt", - content: new Blob(["Hello World"]), - }, - // Local file URL - pathToFileURL("./pytorch-model.bin"), - // Web URL - new URL("https://huggingface.co/xlm-roberta-base/resolve/main/tokenizer.json"), - // Path + Web URL - { - path: "myfile.bin", - content: new URL("https://huggingface.co/bert-base-uncased/resolve/main/pytorch_model.bin") - } - // Can also work with native File in browsers - ], -}); - -// or +import { FileBlob } from "@huggingface/blob/FileBlob"; +import { WebBlob } from "@huggingface/blob/WebBlob"; +import { createBlob } from "@huggingface/blob"; -for await (const progressEvent of await hub.uploadFilesWithProgress({ - repo, - accessToken: "hf_...", - files: [ - ... - ], -})) { - console.log(progressEvent); -} +const fileBlob = await FileBlob.create("path/to/file"); +const webBlob = await WebBlob.create("https://url/to/file"); -await hub.deleteFile({repo, accessToken: "hf_...", path: "myfile.bin"}); - -await (await hub.downloadFile({ repo, path: "README.md" })).text(); - -for await (const fileInfo of hub.listFiles({repo})) { - console.log(fileInfo); -} - -await hub.deleteRepo({ repo, accessToken: "hf_..." }); +const blob = await createBlob("..."); // Automatically detects if it's a file or web URL ``` -## OAuth Login - -It's possible to login using OAuth (["Sign in with HF"](https://huggingface.co/docs/hub/oauth)). - -This will allow you get an access token to use some of the API, depending on the scopes set inside the Space or the OAuth App. - -```ts -import { oauthLoginUrl, oauthHandleRedirectIfPresent } from "@huggingface/hub"; - -const oauthResult = await oauthHandleRedirectIfPresent(); - -if (!oauthResult) { - // If the user is not logged in, redirect to the login page - window.location.href = await oauthLoginUrl(); -} - -// You can use oauthResult.accessToken, oauthResult.accessTokenExpiresAt and oauthResult.userInfo -console.log(oauthResult); -``` - -Checkout the demo: https://huggingface.co/spaces/huggingfacejs/client-side-oauth - -## Hugging face cache - -The `@huggingface/hub` package provide basic capabilities to scan the cache directory. Learn more about [Manage huggingface_hub cache-system](https://huggingface.co/docs/huggingface_hub/en/guides/manage-cache). +## API -### `scanCacheDir` +### createBlob -You can get the list of cached repositories using the `scanCacheDir` function. +Creates a Blob object from a string or URL. Automatically detects if it's a file or web URL. ```ts -import { scanCacheDir } from "@huggingface/hub"; - -const result = await scanCacheDir(); - -console.log(result); -``` -Note: this does not work in the browser - -### `downloadFileToCacheDir` - -You can cache a file of a repository using the `downloadFileToCacheDir` function. - -```ts -import { downloadFileToCacheDir } from "@huggingface/hub"; - -const file = await downloadFileToCacheDir({ - repo: 'foo/bar', - path: 'README.md' +await createBlob("...", { + /** + * Custom fetch function to use, in case it resolves to a Web Blob. + * + * Useful for adding headers, etc. + */ + fetch: ..., }); -console.log(file); -``` -Note: this does not work in the browser - -### `snapshotDownload` - -You can download an entire repository at a given revision in the cache directory using the `snapshotDownload` function. +### FileBlob ```ts -import { snapshotDownload } from "@huggingface/hub"; - -const directory = await snapshotDownload({ - repo: 'foo/bar', -}); - -console.log(directory); +await FileBlob.create("path/to/file"); +await FileBlob.create(new URL("file:///path/to/file")); ``` -The code use internally the `downloadFileToCacheDir` function. -Note: this does not work in the browser +### WebBlob -## Performance considerations +Creates a Blob object from a URL. If the file is less than 1MB (as indicated by the Content-Length header), by default it will be cached in memory in entirety upon blob creation. -When uploading large files, you may want to run the `commit` calls inside a worker, to offload the sha256 computations. +This class is useful for large files that do not need to be loaded all at once in memory, as it makes range requests for the data. -Remote resources and local files should be passed as `URL` whenever it's possible so they can be lazy loaded in chunks to reduce RAM usage. Passing a `File` inside the browser's context is fine, because it natively behaves as a `Blob`. - -Under the hood, `@huggingface/hub` uses a lazy blob implementation to load the file. - -## Dependencies - -- `@huggingface/tasks` : Typings only +```ts +await WebBlob.create("https://url/to/file"); +await WebBlob.create(new URL("https://url/to/file")); + +await WebBlob.create("https://url/to/file", { + /** + * Custom fetch function to use. Useful for adding headers, etc. + */ + fetch: ..., + /** + * If the file is less than the specified size, it will be cached in memory in entirety upon blob creation, + * instead of doing range requests for the data. + * + * @default 1_000_000 + */ + cacheBelow: ... +}) +``` \ No newline at end of file diff --git a/packages/blob/package.json b/packages/blob/package.json index e3536c5a2..ad7289ee1 100644 --- a/packages/blob/package.json +++ b/packages/blob/package.json @@ -1,8 +1,8 @@ { "name": "@huggingface/blob", "packageManager": "pnpm@8.10.5", - "version": "0.21.0", - "description": "Utilities to interact with the Hugging Face hub", + "version": "0.0.1", + "description": "Utilities to convert URLs and files to Blobs, internally used by Hugging Face libs", "repository": "https://github.com/huggingface/huggingface.js.git", "publishConfig": { "access": "public" @@ -18,11 +18,7 @@ } }, "browser": { - "./src/utils/sha256-node.ts": false, "./src/utils/FileBlob.ts": false, - "./src/lib/cache-management.ts": false, - "./src/lib/download-file-to-cache-dir.ts": false, - "./src/lib/snapshot-download.ts": false, "./dist/index.js": "./dist/browser/index.js", "./dist/index.mjs": "./dist/browser/index.mjs" }, @@ -50,11 +46,10 @@ ], "keywords": [ "huggingface", - "hub", - "api", - "client", "hugging", - "face" + "face", + "blob", + "lazy" ], "author": "Hugging Face", "license": "MIT",