-
Notifications
You must be signed in to change notification settings - Fork 241
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
8b7a4ab
commit 926db9d
Showing
3 changed files
with
124 additions
and
149 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
name: Blob - Version and Release | ||
|
||
on: | ||
workflow_dispatch: | ||
inputs: | ||
newversion: | ||
type: choice | ||
description: "Semantic Version Bump Type" | ||
default: patch | ||
options: | ||
- patch | ||
- minor | ||
- major | ||
|
||
concurrency: | ||
group: "push-to-main" | ||
|
||
defaults: | ||
run: | ||
working-directory: packages/blob | ||
|
||
jobs: | ||
version_and_release: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v3 | ||
with: | ||
# Needed to push the tag and the commit on the main branch, otherwise we get: | ||
# > Run git push --follow-tags | ||
# remote: error: GH006: Protected branch update failed for refs/heads/main. | ||
# remote: error: Changes must be made through a pull request. Required status check "lint" is expected. | ||
token: ${{ secrets.BOT_ACCESS_TOKEN }} | ||
- run: corepack enable | ||
- uses: actions/setup-node@v3 | ||
with: | ||
node-version: "20" | ||
cache: "pnpm" | ||
cache-dependency-path: | | ||
packages/blob/pnpm-lock.yaml | ||
# setting a registry enables the NODE_AUTH_TOKEN env variable where we can set an npm token. REQUIRED | ||
registry-url: "https://registry.npmjs.org" | ||
- run: pnpm install | ||
- run: git config --global user.name machineuser | ||
- run: git config --global user.email [email protected] | ||
- run: | | ||
PACKAGE_VERSION=$(node -p "require('./package.json').version") | ||
BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')") | ||
# Update package.json with the new version | ||
node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" | ||
git commit . -m "🔖 @huggingface/blob $BUMPED_VERSION" | ||
git tag "blob-v$BUMPED_VERSION" | ||
- run: pnpm publish --no-git-checks . | ||
env: | ||
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} | ||
- run: git pull --rebase && git push --follow-tags | ||
# hack - reuse actions/setup-node@v3 just to set a new registry | ||
- uses: actions/setup-node@v3 | ||
with: | ||
node-version: "20" | ||
registry-url: "https://npm.pkg.github.com" | ||
# Disable for now, until github supports PATs for writing github packages (https://github.com/github/roadmap/issues/558) | ||
# - run: pnpm publish --no-git-checks . | ||
# env: | ||
# NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
- name: "Update Doc" | ||
uses: peter-evans/repository-dispatch@v2 | ||
with: | ||
event-type: doc-build | ||
token: ${{ secrets.BOT_ACCESS_TOKEN }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,176 +1,86 @@ | ||
# 🤗 Hugging Face Hub API | ||
# 🤗 Hugging Face Blobs | ||
|
||
Official utilities to use the Hugging Face Hub API. | ||
Utilities to convert a string or URL to a [Blob](https://developer.mozilla.org/en-US/docs/Web/API/Blob) object, whether it represents a local file or remote URL. | ||
|
||
`fetch` already returns a `Blob` object for remote URLs, but it loads the entire file in memory. This utility makes ad-hoc http range requests when calling `.slice()` on the blob, for example. | ||
|
||
## Install | ||
|
||
```console | ||
pnpm add @huggingface/hub | ||
pnpm add @huggingface/blob | ||
|
||
npm add @huggingface/hub | ||
npm add @huggingface/blob | ||
|
||
yarn add @huggingface/hub | ||
yarn add @huggingface/blob | ||
``` | ||
|
||
### Deno | ||
|
||
```ts | ||
// esm.sh | ||
import { uploadFiles, listModels } from "https://esm.sh/@huggingface/hub" | ||
import { FileBlob, WebBlob } from "https://esm.sh/@huggingface/blob"; | ||
// or npm: | ||
import { uploadFiles, listModels } from "npm:@huggingface/hub" | ||
import { FileBlob, WebBlob } from "npm:@huggingface/blob"; | ||
``` | ||
|
||
Check out the [full documentation](https://huggingface.co/docs/huggingface.js/hub/README). | ||
|
||
## Usage | ||
|
||
For some of the calls, you need to create an account and generate an [access token](https://huggingface.co/settings/tokens). | ||
|
||
Learn how to find free models using the hub package in this [interactive tutorial](https://scrimba.com/scrim/c7BbVPcd?pl=pkVnrP7uP). | ||
|
||
```ts | ||
import * as hub from "@huggingface/hub"; | ||
import type { RepoDesignation } from "@huggingface/hub"; | ||
|
||
const repo: RepoDesignation = { type: "model", name: "myname/some-model" }; | ||
|
||
const {name: username} = await hub.whoAmI({accessToken: "hf_..."}); | ||
|
||
for await (const model of hub.listModels({search: {owner: username}, accessToken: "hf_..."})) { | ||
console.log("My model:", model); | ||
} | ||
|
||
const specificModel = await hub.modelInfo({name: "openai-community/gpt2"}); | ||
await hub.checkRepoAccess({repo, accessToken: "hf_..."}); | ||
|
||
await hub.createRepo({ repo, accessToken: "hf_...", license: "mit" }); | ||
|
||
await hub.uploadFiles({ | ||
repo, | ||
accessToken: "hf_...", | ||
files: [ | ||
// path + blob content | ||
{ | ||
path: "file.txt", | ||
content: new Blob(["Hello World"]), | ||
}, | ||
// Local file URL | ||
pathToFileURL("./pytorch-model.bin"), | ||
// Web URL | ||
new URL("https://huggingface.co/xlm-roberta-base/resolve/main/tokenizer.json"), | ||
// Path + Web URL | ||
{ | ||
path: "myfile.bin", | ||
content: new URL("https://huggingface.co/bert-base-uncased/resolve/main/pytorch_model.bin") | ||
} | ||
// Can also work with native File in browsers | ||
], | ||
}); | ||
|
||
// or | ||
import { FileBlob } from "@huggingface/blob/FileBlob"; | ||
import { WebBlob } from "@huggingface/blob/WebBlob"; | ||
import { createBlob } from "@huggingface/blob"; | ||
|
||
for await (const progressEvent of await hub.uploadFilesWithProgress({ | ||
repo, | ||
accessToken: "hf_...", | ||
files: [ | ||
... | ||
], | ||
})) { | ||
console.log(progressEvent); | ||
} | ||
const fileBlob = await FileBlob.create("path/to/file"); | ||
const webBlob = await WebBlob.create("https://url/to/file"); | ||
|
||
await hub.deleteFile({repo, accessToken: "hf_...", path: "myfile.bin"}); | ||
|
||
await (await hub.downloadFile({ repo, path: "README.md" })).text(); | ||
|
||
for await (const fileInfo of hub.listFiles({repo})) { | ||
console.log(fileInfo); | ||
} | ||
|
||
await hub.deleteRepo({ repo, accessToken: "hf_..." }); | ||
const blob = await createBlob("..."); // Automatically detects if it's a file or web URL | ||
``` | ||
|
||
## OAuth Login | ||
|
||
It's possible to login using OAuth (["Sign in with HF"](https://huggingface.co/docs/hub/oauth)). | ||
|
||
This will allow you get an access token to use some of the API, depending on the scopes set inside the Space or the OAuth App. | ||
|
||
```ts | ||
import { oauthLoginUrl, oauthHandleRedirectIfPresent } from "@huggingface/hub"; | ||
|
||
const oauthResult = await oauthHandleRedirectIfPresent(); | ||
|
||
if (!oauthResult) { | ||
// If the user is not logged in, redirect to the login page | ||
window.location.href = await oauthLoginUrl(); | ||
} | ||
|
||
// You can use oauthResult.accessToken, oauthResult.accessTokenExpiresAt and oauthResult.userInfo | ||
console.log(oauthResult); | ||
``` | ||
|
||
Checkout the demo: https://huggingface.co/spaces/huggingfacejs/client-side-oauth | ||
|
||
## Hugging face cache | ||
|
||
The `@huggingface/hub` package provide basic capabilities to scan the cache directory. Learn more about [Manage huggingface_hub cache-system](https://huggingface.co/docs/huggingface_hub/en/guides/manage-cache). | ||
## API | ||
|
||
### `scanCacheDir` | ||
### createBlob | ||
|
||
You can get the list of cached repositories using the `scanCacheDir` function. | ||
Creates a Blob object from a string or URL. Automatically detects if it's a file or web URL. | ||
|
||
```ts | ||
import { scanCacheDir } from "@huggingface/hub"; | ||
|
||
const result = await scanCacheDir(); | ||
|
||
console.log(result); | ||
``` | ||
Note: this does not work in the browser | ||
|
||
### `downloadFileToCacheDir` | ||
|
||
You can cache a file of a repository using the `downloadFileToCacheDir` function. | ||
|
||
```ts | ||
import { downloadFileToCacheDir } from "@huggingface/hub"; | ||
|
||
const file = await downloadFileToCacheDir({ | ||
repo: 'foo/bar', | ||
path: 'README.md' | ||
await createBlob("...", { | ||
/** | ||
* Custom fetch function to use, in case it resolves to a Web Blob. | ||
* | ||
* Useful for adding headers, etc. | ||
*/ | ||
fetch: ..., | ||
}); | ||
|
||
console.log(file); | ||
``` | ||
Note: this does not work in the browser | ||
|
||
### `snapshotDownload` | ||
|
||
You can download an entire repository at a given revision in the cache directory using the `snapshotDownload` function. | ||
### FileBlob | ||
|
||
```ts | ||
import { snapshotDownload } from "@huggingface/hub"; | ||
|
||
const directory = await snapshotDownload({ | ||
repo: 'foo/bar', | ||
}); | ||
|
||
console.log(directory); | ||
await FileBlob.create("path/to/file"); | ||
await FileBlob.create(new URL("file:///path/to/file")); | ||
``` | ||
The code use internally the `downloadFileToCacheDir` function. | ||
|
||
Note: this does not work in the browser | ||
### WebBlob | ||
|
||
## Performance considerations | ||
Creates a Blob object from a URL. If the file is less than 1MB (as indicated by the Content-Length header), by default it will be cached in memory in entirety upon blob creation. | ||
|
||
When uploading large files, you may want to run the `commit` calls inside a worker, to offload the sha256 computations. | ||
This class is useful for large files that do not need to be loaded all at once in memory, as it makes range requests for the data. | ||
|
||
Remote resources and local files should be passed as `URL` whenever it's possible so they can be lazy loaded in chunks to reduce RAM usage. Passing a `File` inside the browser's context is fine, because it natively behaves as a `Blob`. | ||
|
||
Under the hood, `@huggingface/hub` uses a lazy blob implementation to load the file. | ||
|
||
## Dependencies | ||
|
||
- `@huggingface/tasks` : Typings only | ||
```ts | ||
await WebBlob.create("https://url/to/file"); | ||
await WebBlob.create(new URL("https://url/to/file")); | ||
|
||
await WebBlob.create("https://url/to/file", { | ||
/** | ||
* Custom fetch function to use. Useful for adding headers, etc. | ||
*/ | ||
fetch: ..., | ||
/** | ||
* If the file is less than the specified size, it will be cached in memory in entirety upon blob creation, | ||
* instead of doing range requests for the data. | ||
* | ||
* @default 1_000_000 | ||
*/ | ||
cacheBelow: ... | ||
}) | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,8 @@ | ||
{ | ||
"name": "@huggingface/blob", | ||
"packageManager": "[email protected]", | ||
"version": "0.21.0", | ||
"description": "Utilities to interact with the Hugging Face hub", | ||
"version": "0.0.1", | ||
"description": "Utilities to convert URLs and files to Blobs, internally used by Hugging Face libs", | ||
"repository": "https://github.com/huggingface/huggingface.js.git", | ||
"publishConfig": { | ||
"access": "public" | ||
|
@@ -18,11 +18,7 @@ | |
} | ||
}, | ||
"browser": { | ||
"./src/utils/sha256-node.ts": false, | ||
"./src/utils/FileBlob.ts": false, | ||
"./src/lib/cache-management.ts": false, | ||
"./src/lib/download-file-to-cache-dir.ts": false, | ||
"./src/lib/snapshot-download.ts": false, | ||
"./dist/index.js": "./dist/browser/index.js", | ||
"./dist/index.mjs": "./dist/browser/index.mjs" | ||
}, | ||
|
@@ -50,11 +46,10 @@ | |
], | ||
"keywords": [ | ||
"huggingface", | ||
"hub", | ||
"api", | ||
"client", | ||
"hugging", | ||
"face" | ||
"face", | ||
"blob", | ||
"lazy" | ||
], | ||
"author": "Hugging Face", | ||
"license": "MIT", | ||
|