Skip to content

Commit

Permalink
📝
Browse files Browse the repository at this point in the history
  • Loading branch information
coyotte508 committed Dec 3, 2024
1 parent 8b7a4ab commit 926db9d
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 149 deletions.
70 changes: 70 additions & 0 deletions .github/workflows/blob-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
name: Blob - Version and Release

on:
workflow_dispatch:
inputs:
newversion:
type: choice
description: "Semantic Version Bump Type"
default: patch
options:
- patch
- minor
- major

concurrency:
group: "push-to-main"

defaults:
run:
working-directory: packages/blob

jobs:
version_and_release:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
# Needed to push the tag and the commit on the main branch, otherwise we get:
# > Run git push --follow-tags
# remote: error: GH006: Protected branch update failed for refs/heads/main.
# remote: error: Changes must be made through a pull request. Required status check "lint" is expected.
token: ${{ secrets.BOT_ACCESS_TOKEN }}
- run: corepack enable
- uses: actions/setup-node@v3
with:
node-version: "20"
cache: "pnpm"
cache-dependency-path: |
packages/blob/pnpm-lock.yaml
# setting a registry enables the NODE_AUTH_TOKEN env variable where we can set an npm token. REQUIRED
registry-url: "https://registry.npmjs.org"
- run: pnpm install
- run: git config --global user.name machineuser
- run: git config --global user.email [email protected]
- run: |
PACKAGE_VERSION=$(node -p "require('./package.json').version")
BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')")
# Update package.json with the new version
node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');"
git commit . -m "🔖 @huggingface/blob $BUMPED_VERSION"
git tag "blob-v$BUMPED_VERSION"
- run: pnpm publish --no-git-checks .
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
- run: git pull --rebase && git push --follow-tags
# hack - reuse actions/setup-node@v3 just to set a new registry
- uses: actions/setup-node@v3
with:
node-version: "20"
registry-url: "https://npm.pkg.github.com"
# Disable for now, until github supports PATs for writing github packages (https://github.com/github/roadmap/issues/558)
# - run: pnpm publish --no-git-checks .
# env:
# NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: "Update Doc"
uses: peter-evans/repository-dispatch@v2
with:
event-type: doc-build
token: ${{ secrets.BOT_ACCESS_TOKEN }}
188 changes: 49 additions & 139 deletions packages/blob/README.md
Original file line number Diff line number Diff line change
@@ -1,176 +1,86 @@
# 🤗 Hugging Face Hub API
# 🤗 Hugging Face Blobs

Official utilities to use the Hugging Face Hub API.
Utilities to convert a string or URL to a [Blob](https://developer.mozilla.org/en-US/docs/Web/API/Blob) object, whether it represents a local file or remote URL.

`fetch` already returns a `Blob` object for remote URLs, but it loads the entire file in memory. This utility makes ad-hoc http range requests when calling `.slice()` on the blob, for example.

## Install

```console
pnpm add @huggingface/hub
pnpm add @huggingface/blob

npm add @huggingface/hub
npm add @huggingface/blob

yarn add @huggingface/hub
yarn add @huggingface/blob
```

### Deno

```ts
// esm.sh
import { uploadFiles, listModels } from "https://esm.sh/@huggingface/hub"
import { FileBlob, WebBlob } from "https://esm.sh/@huggingface/blob";
// or npm:
import { uploadFiles, listModels } from "npm:@huggingface/hub"
import { FileBlob, WebBlob } from "npm:@huggingface/blob";
```

Check out the [full documentation](https://huggingface.co/docs/huggingface.js/hub/README).

## Usage

For some of the calls, you need to create an account and generate an [access token](https://huggingface.co/settings/tokens).

Learn how to find free models using the hub package in this [interactive tutorial](https://scrimba.com/scrim/c7BbVPcd?pl=pkVnrP7uP).

```ts
import * as hub from "@huggingface/hub";
import type { RepoDesignation } from "@huggingface/hub";

const repo: RepoDesignation = { type: "model", name: "myname/some-model" };

const {name: username} = await hub.whoAmI({accessToken: "hf_..."});

for await (const model of hub.listModels({search: {owner: username}, accessToken: "hf_..."})) {
console.log("My model:", model);
}

const specificModel = await hub.modelInfo({name: "openai-community/gpt2"});
await hub.checkRepoAccess({repo, accessToken: "hf_..."});

await hub.createRepo({ repo, accessToken: "hf_...", license: "mit" });

await hub.uploadFiles({
repo,
accessToken: "hf_...",
files: [
// path + blob content
{
path: "file.txt",
content: new Blob(["Hello World"]),
},
// Local file URL
pathToFileURL("./pytorch-model.bin"),
// Web URL
new URL("https://huggingface.co/xlm-roberta-base/resolve/main/tokenizer.json"),
// Path + Web URL
{
path: "myfile.bin",
content: new URL("https://huggingface.co/bert-base-uncased/resolve/main/pytorch_model.bin")
}
// Can also work with native File in browsers
],
});

// or
import { FileBlob } from "@huggingface/blob/FileBlob";
import { WebBlob } from "@huggingface/blob/WebBlob";
import { createBlob } from "@huggingface/blob";

for await (const progressEvent of await hub.uploadFilesWithProgress({
repo,
accessToken: "hf_...",
files: [
...
],
})) {
console.log(progressEvent);
}
const fileBlob = await FileBlob.create("path/to/file");
const webBlob = await WebBlob.create("https://url/to/file");

await hub.deleteFile({repo, accessToken: "hf_...", path: "myfile.bin"});

await (await hub.downloadFile({ repo, path: "README.md" })).text();

for await (const fileInfo of hub.listFiles({repo})) {
console.log(fileInfo);
}

await hub.deleteRepo({ repo, accessToken: "hf_..." });
const blob = await createBlob("..."); // Automatically detects if it's a file or web URL
```

## OAuth Login

It's possible to login using OAuth (["Sign in with HF"](https://huggingface.co/docs/hub/oauth)).

This will allow you get an access token to use some of the API, depending on the scopes set inside the Space or the OAuth App.

```ts
import { oauthLoginUrl, oauthHandleRedirectIfPresent } from "@huggingface/hub";

const oauthResult = await oauthHandleRedirectIfPresent();

if (!oauthResult) {
// If the user is not logged in, redirect to the login page
window.location.href = await oauthLoginUrl();
}

// You can use oauthResult.accessToken, oauthResult.accessTokenExpiresAt and oauthResult.userInfo
console.log(oauthResult);
```

Checkout the demo: https://huggingface.co/spaces/huggingfacejs/client-side-oauth

## Hugging face cache

The `@huggingface/hub` package provide basic capabilities to scan the cache directory. Learn more about [Manage huggingface_hub cache-system](https://huggingface.co/docs/huggingface_hub/en/guides/manage-cache).
## API

### `scanCacheDir`
### createBlob

You can get the list of cached repositories using the `scanCacheDir` function.
Creates a Blob object from a string or URL. Automatically detects if it's a file or web URL.

```ts
import { scanCacheDir } from "@huggingface/hub";

const result = await scanCacheDir();

console.log(result);
```
Note: this does not work in the browser

### `downloadFileToCacheDir`

You can cache a file of a repository using the `downloadFileToCacheDir` function.

```ts
import { downloadFileToCacheDir } from "@huggingface/hub";

const file = await downloadFileToCacheDir({
repo: 'foo/bar',
path: 'README.md'
await createBlob("...", {
/**
* Custom fetch function to use, in case it resolves to a Web Blob.
*
* Useful for adding headers, etc.
*/
fetch: ...,
});

console.log(file);
```
Note: this does not work in the browser

### `snapshotDownload`

You can download an entire repository at a given revision in the cache directory using the `snapshotDownload` function.
### FileBlob

```ts
import { snapshotDownload } from "@huggingface/hub";

const directory = await snapshotDownload({
repo: 'foo/bar',
});

console.log(directory);
await FileBlob.create("path/to/file");
await FileBlob.create(new URL("file:///path/to/file"));
```
The code use internally the `downloadFileToCacheDir` function.

Note: this does not work in the browser
### WebBlob

## Performance considerations
Creates a Blob object from a URL. If the file is less than 1MB (as indicated by the Content-Length header), by default it will be cached in memory in entirety upon blob creation.

When uploading large files, you may want to run the `commit` calls inside a worker, to offload the sha256 computations.
This class is useful for large files that do not need to be loaded all at once in memory, as it makes range requests for the data.

Remote resources and local files should be passed as `URL` whenever it's possible so they can be lazy loaded in chunks to reduce RAM usage. Passing a `File` inside the browser's context is fine, because it natively behaves as a `Blob`.

Under the hood, `@huggingface/hub` uses a lazy blob implementation to load the file.

## Dependencies

- `@huggingface/tasks` : Typings only
```ts
await WebBlob.create("https://url/to/file");
await WebBlob.create(new URL("https://url/to/file"));

await WebBlob.create("https://url/to/file", {
/**
* Custom fetch function to use. Useful for adding headers, etc.
*/
fetch: ...,
/**
* If the file is less than the specified size, it will be cached in memory in entirety upon blob creation,
* instead of doing range requests for the data.
*
* @default 1_000_000
*/
cacheBelow: ...
})
```
15 changes: 5 additions & 10 deletions packages/blob/package.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"name": "@huggingface/blob",
"packageManager": "[email protected]",
"version": "0.21.0",
"description": "Utilities to interact with the Hugging Face hub",
"version": "0.0.1",
"description": "Utilities to convert URLs and files to Blobs, internally used by Hugging Face libs",
"repository": "https://github.com/huggingface/huggingface.js.git",
"publishConfig": {
"access": "public"
Expand All @@ -18,11 +18,7 @@
}
},
"browser": {
"./src/utils/sha256-node.ts": false,
"./src/utils/FileBlob.ts": false,
"./src/lib/cache-management.ts": false,
"./src/lib/download-file-to-cache-dir.ts": false,
"./src/lib/snapshot-download.ts": false,
"./dist/index.js": "./dist/browser/index.js",
"./dist/index.mjs": "./dist/browser/index.mjs"
},
Expand Down Expand Up @@ -50,11 +46,10 @@
],
"keywords": [
"huggingface",
"hub",
"api",
"client",
"hugging",
"face"
"face",
"blob",
"lazy"
],
"author": "Hugging Face",
"license": "MIT",
Expand Down

0 comments on commit 926db9d

Please sign in to comment.