From d35b31ff713af3f72aa4cde2ac51fb937d64a2b5 Mon Sep 17 00:00:00 2001 From: Raymond Cheng Date: Mon, 11 Mar 2024 00:06:44 -0600 Subject: [PATCH] docs: add a router for `integrate` (#1018) * Also moved schema to data-model * Moved indexing pipeline to technical architecture --- apps/docs/docs/contribute/project-data.md | 4 ++-- apps/docs/docs/how-oso-works/_category_.json | 5 ++--- apps/docs/docs/how-oso-works/architecture.md | 12 +++++++++++- .../how-oso-works/data-model/_category_.json | 8 ++++++++ .../{schema => data-model}/artifact.md | 0 .../{schema => data-model}/collection.md | 0 .../{schema => data-model}/event.md | 0 .../{core-concepts.md => data-model/index.md} | 18 ++++-------------- .../{schema => data-model}/project.md | 0 .../{schema => data-model}/schema-updates.md | 0 .../impact-metrics/onchain_activity.md | 2 +- apps/docs/docs/how-oso-works/index.mdx | 4 ---- .../docs/how-oso-works/schema/_category_.json | 7 ------- apps/docs/docs/integrate/index.mdx | 8 ++++++++ apps/docs/docusaurus.config.ts | 2 +- apps/docs/src/pages/index.mdx | 2 +- 16 files changed, 38 insertions(+), 34 deletions(-) create mode 100644 apps/docs/docs/how-oso-works/data-model/_category_.json rename apps/docs/docs/how-oso-works/{schema => data-model}/artifact.md (100%) rename apps/docs/docs/how-oso-works/{schema => data-model}/collection.md (100%) rename apps/docs/docs/how-oso-works/{schema => data-model}/event.md (100%) rename apps/docs/docs/how-oso-works/{core-concepts.md => data-model/index.md} (81%) rename apps/docs/docs/how-oso-works/{schema => data-model}/project.md (100%) rename apps/docs/docs/how-oso-works/{schema => data-model}/schema-updates.md (100%) delete mode 100644 apps/docs/docs/how-oso-works/index.mdx delete mode 100644 apps/docs/docs/how-oso-works/schema/_category_.json diff --git a/apps/docs/docs/contribute/project-data.md b/apps/docs/docs/contribute/project-data.md index 9869ce305..e700e0761 100644 --- a/apps/docs/docs/contribute/project-data.md +++ b/apps/docs/docs/contribute/project-data.md @@ -55,14 +55,14 @@ If you run into issues, check out [GitHub's instructions](https://docs.github.co - If the project doesn't exist, you can create a new `.yaml` file under `./data/projects/` In most cases, you should simply use the GitHub organization name (eg, `my-new-org`) as the slug and filename (eg, `./data/projectsm/my-new-org.yaml`). - If the project is not associated with a GitHub organization, you can use the project name followed by the repo owner as the slug, separated by hyphens (eg, `my-repo-my-org`), and the same convention for the filename (eg, `./data/projects/m/my-repo-my-org.yaml`). - Initialize the new project with the following fields: - - `version`: The version of the schema you are using. The latest version is Version 3. You can learn more about the schema [here](../how-oso-works/schema/schema-updates). + - `version`: The version of the schema you are using. The latest version is Version 3. You can learn more about the schema [here](../how-oso-works/data-model/schema-updates). - `slug`: The unique identifier for the project. This is usually the GitHub organization name or the project name followed by the repo owner, separated by hyphens. - `name`: The name of the project. - `github`: The GitHub URL of the project. This is a list of URLs, as a project can have multiple GitHub URLs. In most cases, the first and only URL will be the main GitHub organization URL. You don't need to include all the repositories that belong to the organization, as we will automatically index all of them. ### 3. Link artifacts to the project -- Add artifacts to the project file. Artifacts are the different types of data that we index for a project. You can find the list of artifacts in the [schema](../how-oso-works/schema/artifact). Here's an example of a project file with artifacts: +- Add artifacts to the project file. Artifacts are the different types of data that we index for a project. You can find the list of artifacts in the [schema](../how-oso-works/data-model/artifact). Here's an example of a project file with artifacts: ```yaml version: diff --git a/apps/docs/docs/how-oso-works/_category_.json b/apps/docs/docs/how-oso-works/_category_.json index 168ea92bb..43d0d64f1 100644 --- a/apps/docs/docs/how-oso-works/_category_.json +++ b/apps/docs/docs/how-oso-works/_category_.json @@ -1,8 +1,7 @@ { - "label": "Learn How OSO Works", + "label": "How OSO Works", "position": 4, "link": { - "type": "doc", - "id": "index" + "type": "generated-index" } } diff --git a/apps/docs/docs/how-oso-works/architecture.md b/apps/docs/docs/how-oso-works/architecture.md index 7d9bbd453..2a08ae680 100644 --- a/apps/docs/docs/how-oso-works/architecture.md +++ b/apps/docs/docs/how-oso-works/architecture.md @@ -1,5 +1,5 @@ --- -title: Architecture +title: Technical Architecture sidebar_position: 5 --- @@ -75,6 +75,16 @@ dependencies are: the most used views from bigquery as using bigquery to serve the OSO website would become cost prohibitive. +## Indexing Pipeline + +--- + +OSO maintains an [ETL](https://en.wikipedia.org/wiki/Extract%2C_load%2C_transform) data pipeline that is continuously deployed from our [monorepo](https://github.com/opensource-observer/oso/) and regularly indexes all available event data about projects in the [oss-directory](https://github.com/opensource-observer/oss-directory). + +- **Extract**: raw event data from a variety of public data sources (e.g., GitHub, blockchains, npm, Open Collective) +- **Transform**: the raw data into impact metrics and impact vectors per project (e.g., # of active developers) +- **Load**: the results into various OSO data products (e.g., our API, website, widgets) + ## Open Architecture for Open Source Data The architecture is designed to accomodate a collaborative data pipeline that diff --git a/apps/docs/docs/how-oso-works/data-model/_category_.json b/apps/docs/docs/how-oso-works/data-model/_category_.json new file mode 100644 index 000000000..e1f000595 --- /dev/null +++ b/apps/docs/docs/how-oso-works/data-model/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Data Model", + "position": 2, + "link": { + "type": "doc", + "id": "index" + } +} diff --git a/apps/docs/docs/how-oso-works/schema/artifact.md b/apps/docs/docs/how-oso-works/data-model/artifact.md similarity index 100% rename from apps/docs/docs/how-oso-works/schema/artifact.md rename to apps/docs/docs/how-oso-works/data-model/artifact.md diff --git a/apps/docs/docs/how-oso-works/schema/collection.md b/apps/docs/docs/how-oso-works/data-model/collection.md similarity index 100% rename from apps/docs/docs/how-oso-works/schema/collection.md rename to apps/docs/docs/how-oso-works/data-model/collection.md diff --git a/apps/docs/docs/how-oso-works/schema/event.md b/apps/docs/docs/how-oso-works/data-model/event.md similarity index 100% rename from apps/docs/docs/how-oso-works/schema/event.md rename to apps/docs/docs/how-oso-works/data-model/event.md diff --git a/apps/docs/docs/how-oso-works/core-concepts.md b/apps/docs/docs/how-oso-works/data-model/index.md similarity index 81% rename from apps/docs/docs/how-oso-works/core-concepts.md rename to apps/docs/docs/how-oso-works/data-model/index.md index aa4a04176..d39d42304 100644 --- a/apps/docs/docs/how-oso-works/core-concepts.md +++ b/apps/docs/docs/how-oso-works/data-model/index.md @@ -23,7 +23,7 @@ Collections are used to group projects together. For example, a collection may i A collection is validated by confirming that all of its projects are valid. -To learn more, check out the [collection schema](../how-oso-works/schema/collection) in our resources section. +To learn more, check out the [collection schema](./collection) in our resources section. ## Projects @@ -35,7 +35,7 @@ In order to instantiate a project, we require a unique slug and a GitHub URL tha Critically, artifacts can only belong to one project. We run validation checks to ensure that artifacts are not duplicated across projects. -To learn more, check out the [project schema](../how-oso-works/schema/project) in our resources section. +To learn more, check out the [project schema](./project) in our resources section. ## Artifacts @@ -49,7 +49,7 @@ Tags are also used to indicate the network the address is used on. For example, OSO creates additional artifacts when a user interacts with a project. For example, when a user interacts with a project's GitHub repository, OSO creates a GitHub user artifact for that user. When a user interacts with a project's blockchain address, OSO creates a blockchain address artifact for that user. -To learn more, check out the [artifact schema](../how-oso-works/schema/artifact) in our resources section. +To learn more, check out the [artifact schema](./artifact) in our resources section. ## Events @@ -61,17 +61,7 @@ Every event is associated with an artifact that belongs to a single project. For The `to` and `from` relationships between artifacts in an event are critical to OSO's ability to understand the impact of a project's activities and situate it in the context of overall network / ecosystem activity. -To learn more, check out the [event schema](../how-oso-works/schema/event) in our resources section. - -## Indexing Pipeline - ---- - -OSO maintains an [ETL](https://en.wikipedia.org/wiki/Extract%2C_load%2C_transform) data pipeline that is continuously deployed from our [monorepo](https://github.com/opensource-observer/oso/) and regularly indexes all available event data about projects in the [oss-directory](https://github.com/opensource-observer/oss-directory). - -- **Extract**: raw event data from a variety of public data sources (e.g., GitHub, blockchains, npm, Open Collective) -- **Transform**: the raw data into impact metrics and impact vectors per project (e.g., # of active developers) -- **Load**: the results into various OSO data products (e.g., our API, website, widgets) +To learn more, check out the [event schema](./event) in our resources section. ## Example diff --git a/apps/docs/docs/how-oso-works/schema/project.md b/apps/docs/docs/how-oso-works/data-model/project.md similarity index 100% rename from apps/docs/docs/how-oso-works/schema/project.md rename to apps/docs/docs/how-oso-works/data-model/project.md diff --git a/apps/docs/docs/how-oso-works/schema/schema-updates.md b/apps/docs/docs/how-oso-works/data-model/schema-updates.md similarity index 100% rename from apps/docs/docs/how-oso-works/schema/schema-updates.md rename to apps/docs/docs/how-oso-works/data-model/schema-updates.md diff --git a/apps/docs/docs/how-oso-works/impact-metrics/onchain_activity.md b/apps/docs/docs/how-oso-works/impact-metrics/onchain_activity.md index 923686548..89374e003 100644 --- a/apps/docs/docs/how-oso-works/impact-metrics/onchain_activity.md +++ b/apps/docs/docs/how-oso-works/impact-metrics/onchain_activity.md @@ -10,7 +10,7 @@ Onchain metrics are typically queried by project (e.g., `uniswap`) and network ( ## Addesses Owned -For information about how addresses are tagged, please see: [Address Tagging](../schema/artifact.md#tagging-addresses). +For information about how addresses are tagged, please see: [Address Tagging](../data-model/artifact.md#tagging-addresses). ### num_contracts diff --git a/apps/docs/docs/how-oso-works/index.mdx b/apps/docs/docs/how-oso-works/index.mdx deleted file mode 100644 index a6a7b43c9..000000000 --- a/apps/docs/docs/how-oso-works/index.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: How OSO Works -sidebar_position: 0 ---- diff --git a/apps/docs/docs/how-oso-works/schema/_category_.json b/apps/docs/docs/how-oso-works/schema/_category_.json deleted file mode 100644 index e5a29440e..000000000 --- a/apps/docs/docs/how-oso-works/schema/_category_.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "label": "Schemas", - "position": 2, - "link": { - "type": "generated-index" - } -} diff --git a/apps/docs/docs/integrate/index.mdx b/apps/docs/docs/integrate/index.mdx index 0e13f8130..860f5e744 100644 --- a/apps/docs/docs/integrate/index.mdx +++ b/apps/docs/docs/integrate/index.mdx @@ -2,3 +2,11 @@ title: Get OSO Data sidebar_position: 0 --- + +There are a number of ways to access OSO data: + +- If you want to download a snapshot of the data, the easiest way is to download it directly from BigQuery. Check out the [Getting Started](../getting-started) guide. +- If you are trying to connect the latest OSO metrics into a live production application, then check out our [GraphQL API](./api). +- If you want to do data science over any dataset, check out this [guide](./data-science). +- If you want to just download the project info from OSS directory, we have [libraries and exports](./oss-directory) that you can use. + diff --git a/apps/docs/docusaurus.config.ts b/apps/docs/docusaurus.config.ts index 40534ccd0..1a6e74a97 100644 --- a/apps/docs/docusaurus.config.ts +++ b/apps/docs/docusaurus.config.ts @@ -98,7 +98,7 @@ const config: Config = { }, { label: "How OSO Works", - to: "/docs/how-oso-works/", + to: "/docs/category/how-oso-works/", }, ], }, diff --git a/apps/docs/src/pages/index.mdx b/apps/docs/src/pages/index.mdx index ebf9cdc24..df6900204 100644 --- a/apps/docs/src/pages/index.mdx +++ b/apps/docs/src/pages/index.mdx @@ -41,7 +41,7 @@ We are [open source, open data, and open infrastructure](/blog/open-source-open- All code and devops is continuously-deployed and fully-transparent, constantly evolving with community contributions. -Learn more +Learn more ---