From 34e0a30f193a6d0091cde4d0f2cf62efb1d44e59 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 3 May 2024 21:09:31 +0530 Subject: [PATCH 1/9] Refine Symbol and String collection doc Signed-off-by: Keshav Priyadarshi --- docs/source/purldb/index.rst | 2 +- docs/source/purldb/purl2sym.rst | 78 ------------------- .../purldb/symbol_and_string_collection.rst | 34 ++++++++ 3 files changed, 35 insertions(+), 79 deletions(-) delete mode 100644 docs/source/purldb/purl2sym.rst create mode 100644 docs/source/purldb/symbol_and_string_collection.rst diff --git a/docs/source/purldb/index.rst b/docs/source/purldb/index.rst index b07e0373..21a31890 100644 --- a/docs/source/purldb/index.rst +++ b/docs/source/purldb/index.rst @@ -5,4 +5,4 @@ PURLdb :maxdepth: 2 overview - purl2sym + symbol_and_string_collection diff --git a/docs/source/purldb/purl2sym.rst b/docs/source/purldb/purl2sym.rst deleted file mode 100644 index c4cac435..00000000 --- a/docs/source/purldb/purl2sym.rst +++ /dev/null @@ -1,78 +0,0 @@ -.. _purl2sym: - -Purl2Sym -============ - -Purl2Sym collects the core package metadata along with symbols and strings -from source code and stores them in the ``extra_data`` field of the resource. - -How it works ------------- - -When PurlDB receives an index request for a PURL via the ``/api/collect`` -endpoint, it fetches the archive download_url and creates a package for -the PURL with relevant metadata. Thereafter, a scan job is scheduled which -downloads the archive of the PURL and runs the `scan_single_package `_ -package pipeline. Thereafter, the scan job also runs the two addon pipelines: -`collect_symbols `_ -and `collect_source_strings `_ -for symbol and string collection respectively. Upon completion of the scan -job, the package is updated with resource data along with the ``source_symbols`` -and ``source_strings`` in the ``extra_data`` field of resources. - -source-inspector ------------------- - -source-inspector is a set of utilities to inspect and analyze source -code and collect interesting data using various tools such as code symbols and strings. -This is also a ScanCode-toolkit plugin. - -Requirements -~~~~~~~~~~~~~ - -This utility is designed to work on Linux and POSIX OS with these utilities: - -- xgettext that comes with GNU gettext. -- universal ctags, version 5.9 or higher, built with JSON support. - -On Debian systems run this:: - - sudo apt-get install universal-ctags gettext - -On MacOS systems run this:: - - brew install universal-ctags gettext - -To get started: -~~~~~~~~~~~~~~~~ - -1. Clone this repo - -2. Run:: - - ./configure --dev - source venv/bin/activate - -3. Run tests with:: - - pytest -vvs - -4. Run a basic scan to collect symbols and display as YAML on screen:: - - scancode --source-symbol tests/data/symbols_ctags/test3.cpp --yaml - - -5. Run a basic scan to collect strings and display as YAML on screen:: - - scancode --source-string tests/data/symbols_ctags/test3.cpp --yaml - - -Pipeline in scancode.io -------------------------- - -There is a ``collect_symbols`` pipeline in scancode.io to get symbols -using the ``source-inspector`` library for codebases. - -See the `pipeline `_ for more details. - -This is also available in the standard scancode.io pipelines used to scan packages -in purldb and symbols are stored in the ``extra_data`` field for all scanned resources -available in purldb. diff --git a/docs/source/purldb/symbol_and_string_collection.rst b/docs/source/purldb/symbol_and_string_collection.rst new file mode 100644 index 00000000..8906b0f5 --- /dev/null +++ b/docs/source/purldb/symbol_and_string_collection.rst @@ -0,0 +1,34 @@ +.. _symbol_and_string_collection: + +Symbol and String Collection +============================ + +The package indexing endpoint now also supports the symbol and string collection +pipeline and stores them in the ``extra_data`` field of the resource. + +How it works +------------ + +When PurlDB receives an index request for a PURL via the ``/api/collect`` +endpoint along with the symbol/string addon_pipeline, it fetches the archive +download_url and creates a package for the PURL with relevant metadata. +Thereafter, a scan job is scheduled which downloads the archive of the PURL +and runs the `scan_single_package `_ +package pipeline. Scan job also runs the requested addon pipelines. +Upon completion of the scan job, the package is updated with resource data along +with the ``source_symbols`` and ``source_strings`` in the ``extra_data`` field of +resources. + +Currently PurlDB supports these addon pipeline for symbol/string collection. + +- ``collect_symbols`` +- ``collect_source_strings`` +- ``collect_tree_sitter_symbol`` +- ``collect_pygments_symbols`` + +See the detailed tutorial on :ref:`tutorial_symbol_and_string_collection` in PurlDB. + +.. line-block:: + + To use these pipeline on ScanCode.io refer to `Symbol and String Collection `_. + For more details on these plugins refer to `source-inspector `_. From bc4c4889f42dd5dbc8f67e4d83168886fa2286a7 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 3 May 2024 21:10:13 +0530 Subject: [PATCH 2/9] Add tutorial for symbol and string collection Signed-off-by: Keshav Priyadarshi --- docs/source/how-to-guides/index.rst | 2 +- docs/source/how-to-guides/purl2sym.rst | 3 - .../tutorial_symbol_and_string_collection.rst | 395 ++++++++++++++++++ 3 files changed, 396 insertions(+), 4 deletions(-) delete mode 100644 docs/source/how-to-guides/purl2sym.rst create mode 100644 docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst diff --git a/docs/source/how-to-guides/index.rst b/docs/source/how-to-guides/index.rst index 064a46fd..47d33636 100644 --- a/docs/source/how-to-guides/index.rst +++ b/docs/source/how-to-guides/index.rst @@ -11,4 +11,4 @@ thourgh specifica use cases: :maxdepth: 2 matchcode - purl2sym + tutorial_symbol_and_string_collection diff --git a/docs/source/how-to-guides/purl2sym.rst b/docs/source/how-to-guides/purl2sym.rst deleted file mode 100644 index f7bbf2f6..00000000 --- a/docs/source/how-to-guides/purl2sym.rst +++ /dev/null @@ -1,3 +0,0 @@ -How To get symbols from a PURL/package -====================================== - diff --git a/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst b/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst new file mode 100644 index 00000000..0fdad619 --- /dev/null +++ b/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst @@ -0,0 +1,395 @@ +.. _tutorial_symbol_and_string_collection: + +How To get symbols and strings from a PURL/package +================================================== + +In this tutorial we'll introduce the different addon pipeline that can be used for +collecting symbols and strings from codebase resources. + +.. note:: + This tutorial assumes that you have a working installation of PurlDB. + If you don't, please refer to the `installation <../purldb/overview.html#installation>`_ page. + + +Through out this tutorial we will use ``pkg:github/llvm/llvm-project@10.0.0`` and will show +the symbol and string for `llvm-project/clang/include/clang/Analysis/BodyFarm.h `_ +resource. + +.. code-block:: c + :name: BodyFarm.h + + //== BodyFarm.h - Factory for conjuring up fake bodies -------------*- C++ -*-// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. + // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + // + //===----------------------------------------------------------------------===// + // + // BodyFarm is a factory for creating faux implementations for functions/methods + // for analysis purposes. + // + //===----------------------------------------------------------------------===// + + #ifndef LLVM_CLANG_LIB_ANALYSIS_BODYFARM_H + #define LLVM_CLANG_LIB_ANALYSIS_BODYFARM_H + + #include "clang/AST/DeclBase.h" + #include "clang/Basic/LLVM.h" + #include "llvm/ADT/DenseMap.h" + #include "llvm/ADT/Optional.h" + + namespace clang { + + class ASTContext; + class FunctionDecl; + class ObjCMethodDecl; + class ObjCPropertyDecl; + class Stmt; + class CodeInjector; + + class BodyFarm { + public: + BodyFarm(ASTContext &C, CodeInjector *injector) : C(C), Injector(injector) {} + + /// Factory method for creating bodies for ordinary functions. + Stmt *getBody(const FunctionDecl *D); + + /// Factory method for creating bodies for Objective-C properties. + Stmt *getBody(const ObjCMethodDecl *D); + + /// Remove copy constructor to avoid accidental copying. + BodyFarm(const BodyFarm &other) = delete; + + private: + typedef llvm::DenseMap> BodyMap; + + ASTContext &C; + BodyMap Bodies; + CodeInjector *Injector; + }; + } // namespace clang + + #endif + + +Ctags Symbols +------------- + +- Send GET request to PurlDB with ``/api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0 + &addon_pipelines=collect_symbols``. + +.. warning:: + The ``collect_symbols`` pipeline requires ``universal-ctags``. + +- Once the indexing has completed visit ``/api/resources/?purl=pkg:github/llvm/llvm-project@10.0.0`` + to get the ``source_symbols`` for resources. + +- Below is the Ctags symbol for ``clang/include/clang/Analysis/BodyFarm.h`` + file in ``extra_data`` field. + +.. code-block:: json + + { + "package": "http://127.0.0.1:8001/api/packages//", + "purl": "pkg:github/llvm/llvm-project@10.0.0", + "path": "llvm-project-llvmorg-10.0.0.tar.gz-extract/llvm-project-llvmorg-10.0.0/clang/include/clang/Analysis/BodyFarm.h", + "type": "file", + "name": "BodyFarm.h", + "extension": ".h", + "size": 1509, + "md5": "808b7438da9841d95ae3a8135e7bf61f", + "sha1": "38093fc0f043d0e639cc0b225e1acc038ffb7020", + "sha256": "83693b005ba387627ad10cef752d2559fe724cc0c7d4e86c4947f22403273e0c", + "sha512": null, + "git_sha1": null, + "mime_type": "text/x-c++", + "file_type": "C++ source, ASCII text", + "programming_language": "C", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_key_file": false, + "detected_license_expression": "", + "detected_license_expression_spdx": "", + "license_detections": [], + "license_clues": [], + "percentage_of_license_text": null, + "copyrights": [], + "holders": [], + "authors": [], + "package_data": [], + "emails": [], + "urls": [], + "extra_data": { + "source_symbols": [ + "Bodies", + "BodyFarm", + "BodyFarm", + "BodyMap", + "C", + "Injector", + "LLVM_CLANG_LIB_ANALYSIS_BODYFARM_H", + "clang" + ] + } + } + + +Xgettext Strings +---------------- + +- Send GET request to PurlDB with ``/api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0 + &addon_pipelines=collect_source_strings``. + +.. warning:: + The ``collect_source_strings`` pipeline requires ``gettext``. + +- Once the indexing has completed visit ``/api/resources/?purl=pkg:github/llvm/llvm-project@10.0.0`` + to get the ``source_strings`` for resources. + +- Below is the Xgettext strings for ``clang/include/clang/Analysis/BodyFarm.h`` + file in ``extra_data`` field. + +.. code-block:: json + + { + "package": "http://127.0.0.1:8001/api/packages//", + "purl": "pkg:github/llvm/llvm-project@10.0.0", + "path": "llvm-project-llvmorg-10.0.0.tar.gz-extract/llvm-project-llvmorg-10.0.0/clang/include/clang/Analysis/BodyFarm.h", + "type": "file", + "name": "BodyFarm.h", + "extension": ".h", + "size": 1509, + "md5": "808b7438da9841d95ae3a8135e7bf61f", + "sha1": "38093fc0f043d0e639cc0b225e1acc038ffb7020", + "sha256": "83693b005ba387627ad10cef752d2559fe724cc0c7d4e86c4947f22403273e0c", + "sha512": null, + "git_sha1": null, + "mime_type": "text/x-c++", + "file_type": "C++ source, ASCII text", + "programming_language": "C", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_key_file": false, + "detected_license_expression": "", + "detected_license_expression_spdx": "", + "license_detections": [], + "license_clues": [], + "percentage_of_license_text": null, + "copyrights": [], + "holders": [], + "authors": [], + "package_data": [], + "emails": [], + "urls": [], + "extra_data": { + "source_strings": [] + } + } + +Tree-Sitter Symbols and Strings +------------------------------- + +- Send GET request to PurlDB with ``/api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0 + &addon_pipelines=collect_tree_sitter_symbols``. + +- Once the indexing has completed visit ``/api/resources/?purl=pkg:github/llvm/llvm-project@10.0.0`` + to get the ``source_symbols`` and ``source_strings`` for resources. + +- Below is the Tree-Sitter symbols and strings for ``clang/include/clang/Analysis/BodyFarm.h`` file + in ``extra_data`` field. + +.. code-block:: json + + { + "package": "http://127.0.0.1:8001/api/packages//", + "purl": "pkg:github/llvm/llvm-project@10.0.0", + "path": "llvm-project-llvmorg-10.0.0.tar.gz-extract/llvm-project-llvmorg-10.0.0/clang/include/clang/Analysis/BodyFarm.h", + "type": "file", + "name": "BodyFarm.h", + "extension": ".h", + "size": 1509, + "md5": "808b7438da9841d95ae3a8135e7bf61f", + "sha1": "38093fc0f043d0e639cc0b225e1acc038ffb7020", + "sha256": "83693b005ba387627ad10cef752d2559fe724cc0c7d4e86c4947f22403273e0c", + "sha512": null, + "git_sha1": null, + "mime_type": "text/x-c++", + "file_type": "C++ source, ASCII text", + "programming_language": "C", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_key_file": false, + "detected_license_expression": "", + "detected_license_expression_spdx": "", + "license_detections": [], + "license_clues": [], + "percentage_of_license_text": null, + "copyrights": [], + "holders": [], + "authors": [], + "package_data": [], + "emails": [], + "urls": [], + "extra_data": { + "source_symbols": [ + "LLVM_CLANG_LIB_ANALYSIS_BODYFARM_H", + "LLVM_CLANG_LIB_ANALYSIS_BODYFARM_H", + "clang", + "ASTContext", + "FunctionDecl", + "ObjCMethodDecl", + "ObjCPropertyDecl", + "Stmt", + "CodeInjector", + "BodyFarm", + "BodyFarm", + "ASTContext", + "C", + "CodeInjector", + "injector", + "C", + "C", + "Injector", + "injector", + "getBody", + "D", + "getBody", + "D", + "BodyFarm", + "other", + "delete", + "llvm", + "DenseMap", + "const", + "Decl", + "Optional", + "Stmt", + "BodyMap", + "ASTContext", + "C", + "Bodies", + "Injector" + ], + "source_strings": [ + "clang/AST/DeclBase.h", + "clang/Basic/LLVM.h", + "llvm/ADT/DenseMap.h", + "llvm/ADT/Optional.h" + ] + } + } + +Pygments Symbols and Strings +------------------------------- + +- Send GET request to PurlDB with ``/api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0 + &addon_pipelines=collect_source_strings``. + +.. warning:: + The ``collect_source_strings`` pipeline requires ``gettext``. + +- Once the indexing has completed visit ``/api/resources/?purl=pkg:github/llvm/llvm-project@10.0.0`` + to get the ``source_strings`` for resources. + +- Below is the Xgettext strings for ``clang/include/clang/Analysis/BodyFarm.h`` + file in ``extra_data`` field. + +.. code-block:: json + + { + "package": "http://127.0.0.1:8001/api/packages//", + "purl": "pkg:github/llvm/llvm-project@10.0.0", + "path": "llvm-project-llvmorg-10.0.0.tar.gz-extract/llvm-project-llvmorg-10.0.0/clang/include/clang/Analysis/BodyFarm.h", + "type": "file", + "name": "BodyFarm.h", + "extension": ".h", + "size": 1509, + "md5": "808b7438da9841d95ae3a8135e7bf61f", + "sha1": "38093fc0f043d0e639cc0b225e1acc038ffb7020", + "sha256": "83693b005ba387627ad10cef752d2559fe724cc0c7d4e86c4947f22403273e0c", + "sha512": null, + "git_sha1": null, + "mime_type": "text/x-c++", + "file_type": "C++ source, ASCII text", + "programming_language": "C", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_key_file": false, + "detected_license_expression": "", + "detected_license_expression_spdx": "", + "license_detections": [], + "license_clues": [], + "percentage_of_license_text": null, + "copyrights": [], + "holders": [], + "authors": [], + "package_data": [], + "emails": [], + "urls": [], + "extra_data": { + "source_strings": [] + } + } + +Tree-Sitter Symbols and Strings +------------------------------- + +- Send GET request to PurlDB with ``/api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0 + &addon_pipelines=collect_pygments_symbols``. + +- Once the indexing has completed visit ``/api/resources/?purl=pkg:github/llvm/llvm-project@10.0.0`` + to get the ``source_symbols`` and ``source_strings`` for resources. + +- Below is the Pygments symbols and strings for ``clang/include/clang/Analysis/BodyFarm.h`` file + in ``extra_data`` field. + +.. code-block:: json + + { + "package": "http://127.0.0.1:8001/api/packages//", + "purl": "pkg:github/llvm/llvm-project@10.0.0", + "path": "llvm-project-llvmorg-10.0.0.tar.gz-extract/llvm-project-llvmorg-10.0.0/clang/include/clang/Analysis/BodyFarm.h", + "type": "file", + "name": "BodyFarm.h", + "extension": ".h", + "size": 1509, + "md5": "808b7438da9841d95ae3a8135e7bf61f", + "sha1": "38093fc0f043d0e639cc0b225e1acc038ffb7020", + "sha256": "83693b005ba387627ad10cef752d2559fe724cc0c7d4e86c4947f22403273e0c", + "sha512": null, + "git_sha1": null, + "mime_type": "text/x-c++", + "file_type": "C++ source, ASCII text", + "programming_language": "C", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_key_file": false, + "detected_license_expression": "", + "detected_license_expression_spdx": "", + "license_detections": [], + "license_clues": [], + "percentage_of_license_text": null, + "copyrights": [], + "holders": [], + "authors": [], + "package_data": [], + "emails": [], + "urls": [], + "extra_data": { + "source_symbols": [ + "getBody" + ], + "source_strings": [] + } + } From a7533b48ee5fd91ec6d3fb4ea090cdc9c1d66c8c Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 3 May 2024 21:18:51 +0530 Subject: [PATCH 3/9] Use correct url for SCIO tutorial Signed-off-by: Keshav Priyadarshi --- docs/source/purldb/symbol_and_string_collection.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/purldb/symbol_and_string_collection.rst b/docs/source/purldb/symbol_and_string_collection.rst index 8906b0f5..13cfa9ed 100644 --- a/docs/source/purldb/symbol_and_string_collection.rst +++ b/docs/source/purldb/symbol_and_string_collection.rst @@ -30,5 +30,5 @@ See the detailed tutorial on :ref:`tutorial_symbol_and_string_collection` in Pur .. line-block:: - To use these pipeline on ScanCode.io refer to `Symbol and String Collection `_. + To use these pipeline on ScanCode.io refer to `Symbol and String Collection `_. For more details on these plugins refer to `source-inspector `_. From b444d3a15f4d8bd26f91f0a7d88a6d88cb17958e Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 7 May 2024 12:55:50 +0530 Subject: [PATCH 4/9] Use larger resource file for tutorial Signed-off-by: Keshav Priyadarshi --- .../tutorial_symbol_and_string_collection.rst | 1000 ++++++++++++----- 1 file changed, 745 insertions(+), 255 deletions(-) diff --git a/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst b/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst index 0fdad619..42d32f64 100644 --- a/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst +++ b/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst @@ -12,13 +12,13 @@ collecting symbols and strings from codebase resources. Through out this tutorial we will use ``pkg:github/llvm/llvm-project@10.0.0`` and will show -the symbol and string for `llvm-project/clang/include/clang/Analysis/BodyFarm.h `_ +the symbol and string for `llvm-project/clang/lib/Basic/Builtins.cpp `_ resource. .. code-block:: c - :name: BodyFarm.h + :name: Builtins.cpp - //== BodyFarm.h - Factory for conjuring up fake bodies -------------*- C++ -*-// + //===--- Builtins.cpp - Builtin function implementation -------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -26,58 +26,199 @@ resource. // //===----------------------------------------------------------------------===// // - // BodyFarm is a factory for creating faux implementations for functions/methods - // for analysis purposes. + // This file implements various things for builtin functions. // //===----------------------------------------------------------------------===// - #ifndef LLVM_CLANG_LIB_ANALYSIS_BODYFARM_H - #define LLVM_CLANG_LIB_ANALYSIS_BODYFARM_H + #include "clang/Basic/Builtins.h" + #include "clang/Basic/IdentifierTable.h" + #include "clang/Basic/LangOptions.h" + #include "clang/Basic/TargetInfo.h" + #include "llvm/ADT/StringRef.h" + using namespace clang; + + static const Builtin::Info BuiltinInfo[] = { + { "not a builtin function", nullptr, nullptr, nullptr, ALL_LANGUAGES,nullptr}, + #define BUILTIN(ID, TYPE, ATTRS) \ + { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr }, + #define LANGBUILTIN(ID, TYPE, ATTRS, LANGS) \ + { #ID, TYPE, ATTRS, nullptr, LANGS, nullptr }, + #define LIBBUILTIN(ID, TYPE, ATTRS, HEADER, LANGS) \ + { #ID, TYPE, ATTRS, HEADER, LANGS, nullptr }, + #include "clang/Basic/Builtins.def" + }; + + const Builtin::Info &Builtin::Context::getRecord(unsigned ID) const { + if (ID < Builtin::FirstTSBuiltin) + return BuiltinInfo[ID]; + assert(((ID - Builtin::FirstTSBuiltin) < + (TSRecords.size() + AuxTSRecords.size())) && + "Invalid builtin ID!"); + if (isAuxBuiltinID(ID)) + return AuxTSRecords[getAuxBuiltinID(ID) - Builtin::FirstTSBuiltin]; + return TSRecords[ID - Builtin::FirstTSBuiltin]; + } + + void Builtin::Context::InitializeTarget(const TargetInfo &Target, + const TargetInfo *AuxTarget) { + assert(TSRecords.empty() && "Already initialized target?"); + TSRecords = Target.getTargetBuiltins(); + if (AuxTarget) + AuxTSRecords = AuxTarget->getTargetBuiltins(); + } + + bool Builtin::Context::isBuiltinFunc(llvm::StringRef FuncName) { + for (unsigned i = Builtin::NotBuiltin + 1; i != Builtin::FirstTSBuiltin; ++i) + if (FuncName.equals(BuiltinInfo[i].Name)) + return strchr(BuiltinInfo[i].Attributes, 'f') != nullptr; - #include "clang/AST/DeclBase.h" - #include "clang/Basic/LLVM.h" - #include "llvm/ADT/DenseMap.h" - #include "llvm/ADT/Optional.h" + return false; + } - namespace clang { + bool Builtin::Context::builtinIsSupported(const Builtin::Info &BuiltinInfo, + const LangOptions &LangOpts) { + bool BuiltinsUnsupported = + (LangOpts.NoBuiltin || LangOpts.isNoBuiltinFunc(BuiltinInfo.Name)) && + strchr(BuiltinInfo.Attributes, 'f'); + bool MathBuiltinsUnsupported = + LangOpts.NoMathBuiltin && BuiltinInfo.HeaderName && + llvm::StringRef(BuiltinInfo.HeaderName).equals("math.h"); + bool GnuModeUnsupported = !LangOpts.GNUMode && (BuiltinInfo.Langs & GNU_LANG); + bool MSModeUnsupported = + !LangOpts.MicrosoftExt && (BuiltinInfo.Langs & MS_LANG); + bool ObjCUnsupported = !LangOpts.ObjC && BuiltinInfo.Langs == OBJC_LANG; + bool OclC1Unsupported = (LangOpts.OpenCLVersion / 100) != 1 && + (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES ) == OCLC1X_LANG; + bool OclC2Unsupported = + (LangOpts.OpenCLVersion != 200 && !LangOpts.OpenCLCPlusPlus) && + (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES) == OCLC20_LANG; + bool OclCUnsupported = !LangOpts.OpenCL && + (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES); + bool OpenMPUnsupported = !LangOpts.OpenMP && BuiltinInfo.Langs == OMP_LANG; + bool CPlusPlusUnsupported = + !LangOpts.CPlusPlus && BuiltinInfo.Langs == CXX_LANG; + return !BuiltinsUnsupported && !MathBuiltinsUnsupported && !OclCUnsupported && + !OclC1Unsupported && !OclC2Unsupported && !OpenMPUnsupported && + !GnuModeUnsupported && !MSModeUnsupported && !ObjCUnsupported && + !CPlusPlusUnsupported; + } - class ASTContext; - class FunctionDecl; - class ObjCMethodDecl; - class ObjCPropertyDecl; - class Stmt; - class CodeInjector; + /// initializeBuiltins - Mark the identifiers for all the builtins with their + /// appropriate builtin ID # and mark any non-portable builtin identifiers as + /// such. + void Builtin::Context::initializeBuiltins(IdentifierTable &Table, + const LangOptions& LangOpts) { + // Step #1: mark all target-independent builtins with their ID's. + for (unsigned i = Builtin::NotBuiltin+1; i != Builtin::FirstTSBuiltin; ++i) + if (builtinIsSupported(BuiltinInfo[i], LangOpts)) { + Table.get(BuiltinInfo[i].Name).setBuiltinID(i); + } - class BodyFarm { - public: - BodyFarm(ASTContext &C, CodeInjector *injector) : C(C), Injector(injector) {} + // Step #2: Register target-specific builtins. + for (unsigned i = 0, e = TSRecords.size(); i != e; ++i) + if (builtinIsSupported(TSRecords[i], LangOpts)) + Table.get(TSRecords[i].Name).setBuiltinID(i + Builtin::FirstTSBuiltin); - /// Factory method for creating bodies for ordinary functions. - Stmt *getBody(const FunctionDecl *D); + // Step #3: Register target-specific builtins for AuxTarget. + for (unsigned i = 0, e = AuxTSRecords.size(); i != e; ++i) + Table.get(AuxTSRecords[i].Name) + .setBuiltinID(i + Builtin::FirstTSBuiltin + TSRecords.size()); + } - /// Factory method for creating bodies for Objective-C properties. - Stmt *getBody(const ObjCMethodDecl *D); + void Builtin::Context::forgetBuiltin(unsigned ID, IdentifierTable &Table) { + Table.get(getRecord(ID).Name).setBuiltinID(0); + } - /// Remove copy constructor to avoid accidental copying. - BodyFarm(const BodyFarm &other) = delete; + unsigned Builtin::Context::getRequiredVectorWidth(unsigned ID) const { + const char *WidthPos = ::strchr(getRecord(ID).Attributes, 'V'); + if (!WidthPos) + return 0; - private: - typedef llvm::DenseMap> BodyMap; + ++WidthPos; + assert(*WidthPos == ':' && + "Vector width specifier must be followed by a ':'"); + ++WidthPos; - ASTContext &C; - BodyMap Bodies; - CodeInjector *Injector; - }; - } // namespace clang + char *EndPos; + unsigned Width = ::strtol(WidthPos, &EndPos, 10); + assert(*EndPos == ':' && "Vector width specific must end with a ':'"); + return Width; + } + + bool Builtin::Context::isLike(unsigned ID, unsigned &FormatIdx, + bool &HasVAListArg, const char *Fmt) const { + assert(Fmt && "Not passed a format string"); + assert(::strlen(Fmt) == 2 && + "Format string needs to be two characters long"); + assert(::toupper(Fmt[0]) == Fmt[1] && + "Format string is not in the form \"xX\""); + + const char *Like = ::strpbrk(getRecord(ID).Attributes, Fmt); + if (!Like) + return false; + + HasVAListArg = (*Like == Fmt[1]); + + ++Like; + assert(*Like == ':' && "Format specifier must be followed by a ':'"); + ++Like; + + assert(::strchr(Like, ':') && "Format specifier must end with a ':'"); + FormatIdx = ::strtol(Like, nullptr, 10); + return true; + } + + bool Builtin::Context::isPrintfLike(unsigned ID, unsigned &FormatIdx, + bool &HasVAListArg) { + return isLike(ID, FormatIdx, HasVAListArg, "pP"); + } + + bool Builtin::Context::isScanfLike(unsigned ID, unsigned &FormatIdx, + bool &HasVAListArg) { + return isLike(ID, FormatIdx, HasVAListArg, "sS"); + } - #endif + bool Builtin::Context::performsCallback(unsigned ID, + SmallVectorImpl &Encoding) const { + const char *CalleePos = ::strchr(getRecord(ID).Attributes, 'C'); + if (!CalleePos) + return false; + + ++CalleePos; + assert(*CalleePos == '<' && + "Callback callee specifier must be followed by a '<'"); + ++CalleePos; + + char *EndPos; + int CalleeIdx = ::strtol(CalleePos, &EndPos, 10); + assert(CalleeIdx >= 0 && "Callee index is supposed to be positive!"); + Encoding.push_back(CalleeIdx); + + while (*EndPos == ',') { + const char *PayloadPos = EndPos + 1; + + int PayloadIdx = ::strtol(PayloadPos, &EndPos, 10); + Encoding.push_back(PayloadIdx); + } + + assert(*EndPos == '>' && "Callback callee specifier must end with a '>'"); + return true; + } + + bool Builtin::Context::canBeRedeclared(unsigned ID) const { + return ID == Builtin::NotBuiltin || + ID == Builtin::BI__va_start || + (!hasReferenceArgsOrResult(ID) && + !hasCustomTypechecking(ID)); + } Ctags Symbols ------------- -- Send GET request to PurlDB with ``/api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0 - &addon_pipelines=collect_symbols``. +- Send GET request to PurlDB with:: + + /api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0&addon_pipelines=collect_symbols .. warning:: The ``collect_symbols`` pipeline requires ``universal-ctags``. @@ -85,63 +226,72 @@ Ctags Symbols - Once the indexing has completed visit ``/api/resources/?purl=pkg:github/llvm/llvm-project@10.0.0`` to get the ``source_symbols`` for resources. -- Below is the Ctags symbol for ``clang/include/clang/Analysis/BodyFarm.h`` +- Below is the Ctags symbol for ``clang/lib/Basic/Builtins.cpp`` file in ``extra_data`` field. .. code-block:: json { - "package": "http://127.0.0.1:8001/api/packages//", - "purl": "pkg:github/llvm/llvm-project@10.0.0", - "path": "llvm-project-llvmorg-10.0.0.tar.gz-extract/llvm-project-llvmorg-10.0.0/clang/include/clang/Analysis/BodyFarm.h", - "type": "file", - "name": "BodyFarm.h", - "extension": ".h", - "size": 1509, - "md5": "808b7438da9841d95ae3a8135e7bf61f", - "sha1": "38093fc0f043d0e639cc0b225e1acc038ffb7020", - "sha256": "83693b005ba387627ad10cef752d2559fe724cc0c7d4e86c4947f22403273e0c", - "sha512": null, - "git_sha1": null, - "mime_type": "text/x-c++", - "file_type": "C++ source, ASCII text", - "programming_language": "C", - "is_binary": false, - "is_text": true, - "is_archive": false, - "is_media": false, - "is_key_file": false, - "detected_license_expression": "", - "detected_license_expression_spdx": "", - "license_detections": [], - "license_clues": [], - "percentage_of_license_text": null, - "copyrights": [], - "holders": [], - "authors": [], - "package_data": [], - "emails": [], - "urls": [], - "extra_data": { - "source_symbols": [ - "Bodies", - "BodyFarm", - "BodyFarm", - "BodyMap", - "C", - "Injector", - "LLVM_CLANG_LIB_ANALYSIS_BODYFARM_H", - "clang" - ] - } + "package": "http://127.0.0.1:8001/api/packages/ddedb539-32fd-43fd-b2c7-d50e5b718711/", + "purl": "pkg:github/llvm/llvm-project@10.0.0", + "path": "llvm-project-llvmorg-10.0.0.tar.gz-extract/llvm-project-llvmorg-10.0.0/clang/lib/Basic/Builtins.cpp", + "type": "file", + "name": "Builtins.cpp", + "extension": ".cpp", + "size": 7566, + "md5": "6afa8fe94d28fb1926851fa7eaf2cffa", + "sha1": "5cf1719199d3183d7811a3f133d2a4bfdd2d7da4", + "sha256": "9ba7fe01cb504dd97c7694ab716291e1b9584ee6646219469c14d6724da7292b", + "sha512": null, + "git_sha1": null, + "mime_type": "text/x-c", + "file_type": "C source, ASCII text", + "programming_language": "C++", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_key_file": false, + "detected_license_expression": "", + "detected_license_expression_spdx": "", + "license_detections": [], + "license_clues": [], + "percentage_of_license_text": null, + "copyrights": [], + "holders": [], + "authors": [], + "package_data": [], + "emails": [], + "urls": [], + "extra_data": { + "source_symbols": [ + "BUILTIN", + "BuiltinInfo", + "InitializeTarget", + "LANGBUILTIN", + "LIBBUILTIN", + "builtinIsSupported", + "canBeRedeclared", + "forgetBuiltin", + "getRecord", + "getRequiredVectorWidth", + "initializeBuiltins", + "isBuiltinFunc", + "isLike", + "isPrintfLike", + "isScanfLike", + "performsCallback" + ] + } } Xgettext Strings ---------------- -- Send GET request to PurlDB with ``/api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0 - &addon_pipelines=collect_source_strings``. +- Send GET request to PurlDB with:: + + /api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0&addon_pipelines=collect_source_strings .. warning:: The ``collect_source_strings`` pipeline requires ``gettext``. @@ -149,58 +299,76 @@ Xgettext Strings - Once the indexing has completed visit ``/api/resources/?purl=pkg:github/llvm/llvm-project@10.0.0`` to get the ``source_strings`` for resources. -- Below is the Xgettext strings for ``clang/include/clang/Analysis/BodyFarm.h`` +- Below is the Xgettext strings for ``clang/lib/Basic/Builtins.cpp`` file in ``extra_data`` field. .. code-block:: json { - "package": "http://127.0.0.1:8001/api/packages//", - "purl": "pkg:github/llvm/llvm-project@10.0.0", - "path": "llvm-project-llvmorg-10.0.0.tar.gz-extract/llvm-project-llvmorg-10.0.0/clang/include/clang/Analysis/BodyFarm.h", - "type": "file", - "name": "BodyFarm.h", - "extension": ".h", - "size": 1509, - "md5": "808b7438da9841d95ae3a8135e7bf61f", - "sha1": "38093fc0f043d0e639cc0b225e1acc038ffb7020", - "sha256": "83693b005ba387627ad10cef752d2559fe724cc0c7d4e86c4947f22403273e0c", - "sha512": null, - "git_sha1": null, - "mime_type": "text/x-c++", - "file_type": "C++ source, ASCII text", - "programming_language": "C", - "is_binary": false, - "is_text": true, - "is_archive": false, - "is_media": false, - "is_key_file": false, - "detected_license_expression": "", - "detected_license_expression_spdx": "", - "license_detections": [], - "license_clues": [], - "percentage_of_license_text": null, - "copyrights": [], - "holders": [], - "authors": [], - "package_data": [], - "emails": [], - "urls": [], - "extra_data": { - "source_strings": [] - } + "package": "http://127.0.0.1:8001/api/packages/ddedb539-32fd-43fd-b2c7-d50e5b718711/", + "purl": "pkg:github/llvm/llvm-project@10.0.0", + "path": "llvm-project-llvmorg-10.0.0.tar.gz-extract/llvm-project-llvmorg-10.0.0/clang/lib/Basic/Builtins.cpp", + "type": "file", + "name": "Builtins.cpp", + "extension": ".cpp", + "size": 7566, + "md5": "6afa8fe94d28fb1926851fa7eaf2cffa", + "sha1": "5cf1719199d3183d7811a3f133d2a4bfdd2d7da4", + "sha256": "9ba7fe01cb504dd97c7694ab716291e1b9584ee6646219469c14d6724da7292b", + "sha512": null, + "git_sha1": null, + "mime_type": "text/x-c", + "file_type": "C source, ASCII text", + "programming_language": "C++", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_key_file": false, + "detected_license_expression": "", + "detected_license_expression_spdx": "", + "license_detections": [], + "license_clues": [], + "percentage_of_license_text": null, + "copyrights": [], + "holders": [], + "authors": [], + "package_data": [], + "emails": [], + "urls": [], + "extra_data": { + "source_strings": [ + "not a builtin function" + "Invalid builtin ID!" + "Already initialized target?" + "math.h" + "Vector width specifier must be followed by a ':'" + "Vector width specific must end with a ':'" + "Not passed a format string" + "Format string needs to be two characters long" + "Format string is not in the form \\\"xX\\" + "Format specifier must be followed by a ':'" + "Format specifier must end with a ':'" + "pP" + "sS" + "Callback callee specifier must be followed by a '<'" + "Callee index is supposed to be positive!" + "Callback callee specifier must end with a '>'" + ] + } } Tree-Sitter Symbols and Strings ------------------------------- -- Send GET request to PurlDB with ``/api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0 - &addon_pipelines=collect_tree_sitter_symbols``. +- Send GET request to PurlDB with:: + + /api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0&addon_pipelines=collect_tree_sitter_symbols - Once the indexing has completed visit ``/api/resources/?purl=pkg:github/llvm/llvm-project@10.0.0`` to get the ``source_symbols`` and ``source_strings`` for resources. -- Below is the Tree-Sitter symbols and strings for ``clang/include/clang/Analysis/BodyFarm.h`` file +- Below is the Tree-Sitter symbols and strings for ``clang/lib/Basic/Builtins.cpp`` file in ``extra_data`` field. .. code-block:: json @@ -239,49 +407,308 @@ Tree-Sitter Symbols and Strings "urls": [], "extra_data": { "source_symbols": [ - "LLVM_CLANG_LIB_ANALYSIS_BODYFARM_H", - "LLVM_CLANG_LIB_ANALYSIS_BODYFARM_H", "clang", - "ASTContext", - "FunctionDecl", - "ObjCMethodDecl", - "ObjCPropertyDecl", - "Stmt", - "CodeInjector", - "BodyFarm", - "BodyFarm", - "ASTContext", - "C", - "CodeInjector", - "injector", - "C", - "C", - "Injector", - "injector", - "getBody", - "D", - "getBody", - "D", - "BodyFarm", - "other", - "delete", - "llvm", - "DenseMap", - "const", - "Decl", - "Optional", - "Stmt", - "BodyMap", - "ASTContext", - "C", - "Bodies", - "Injector" + "BuiltinInfo", + "ALL_LANGUAGES", + "BUILTIN", + "ID", + "TYPE", + "ATTRS", + "TYPE", + "ATTRS", + "ALL_LANGUAGES", + "LANGBUILTIN", + "LIBBUILTIN", + "ID", + "TYPE", + "ATTRS", + "HEADER", + "LANGS", + "getRecord", + "ID", + "ID", + "FirstTSBuiltin", + "BuiltinInfo", + "ID", + "assert", + "ID", + "FirstTSBuiltin", + "TSRecords", + "AuxTSRecords", + "isAuxBuiltinID", + "ID", + "AuxTSRecords", + "getAuxBuiltinID", + "ID", + "FirstTSBuiltin", + "TSRecords", + "ID", + "FirstTSBuiltin", + "InitializeTarget", + "Target", + "AuxTarget", + "assert", + "TSRecords", + "TSRecords", + "Target", + "AuxTarget", + "AuxTSRecords", + "AuxTarget", + "isBuiltinFunc", + "FuncName", + "i", + "NotBuiltin", + "i", + "FirstTSBuiltin", + "i", + "FuncName", + "BuiltinInfo", + "i", + "strchr", + "BuiltinInfo", + "i", + "builtinIsSupported", + "BuiltinInfo", + "LangOpts", + "BuiltinsUnsupported", + "LangOpts", + "LangOpts", + "BuiltinInfo", + "strchr", + "BuiltinInfo", + "MathBuiltinsUnsupported", + "LangOpts", + "BuiltinInfo", + "StringRef", + "BuiltinInfo", + "GnuModeUnsupported", + "LangOpts", + "BuiltinInfo", + "GNU_LANG", + "MSModeUnsupported", + "LangOpts", + "BuiltinInfo", + "MS_LANG", + "ObjCUnsupported", + "LangOpts", + "BuiltinInfo", + "OBJC_LANG", + "OclC1Unsupported", + "LangOpts", + "BuiltinInfo", + "ALL_OCLC_LANGUAGES", + "OCLC1X_LANG", + "OclC2Unsupported", + "LangOpts", + "LangOpts", + "BuiltinInfo", + "ALL_OCLC_LANGUAGES", + "OCLC20_LANG", + "OclCUnsupported", + "LangOpts", + "BuiltinInfo", + "ALL_OCLC_LANGUAGES", + "OpenMPUnsupported", + "LangOpts", + "BuiltinInfo", + "OMP_LANG", + "CPlusPlusUnsupported", + "LangOpts", + "BuiltinInfo", + "CXX_LANG", + "BuiltinsUnsupported", + "MathBuiltinsUnsupported", + "OclCUnsupported", + "OclC1Unsupported", + "OclC2Unsupported", + "OpenMPUnsupported", + "GnuModeUnsupported", + "MSModeUnsupported", + "ObjCUnsupported", + "CPlusPlusUnsupported", + "initializeBuiltins", + "Table", + "LangOpts", + "i", + "NotBuiltin", + "i", + "FirstTSBuiltin", + "i", + "builtinIsSupported", + "BuiltinInfo", + "i", + "LangOpts", + "Table", + "BuiltinInfo", + "i", + "i", + "i", + "e", + "TSRecords", + "i", + "e", + "i", + "builtinIsSupported", + "TSRecords", + "i", + "LangOpts", + "Table", + "TSRecords", + "i", + "i", + "FirstTSBuiltin", + "i", + "e", + "AuxTSRecords", + "i", + "e", + "i", + "Table", + "AuxTSRecords", + "i", + "i", + "FirstTSBuiltin", + "TSRecords", + "forgetBuiltin", + "ID", + "Table", + "Table", + "getRecord", + "ID", + "getRequiredVectorWidth", + "ID", + "WidthPos", + "strchr", + "getRecord", + "ID", + "WidthPos", + "WidthPos", + "assert", + "WidthPos", + "WidthPos", + "EndPos", + "Width", + "strtol", + "WidthPos", + "EndPos", + "assert", + "EndPos", + "Width", + "isLike", + "ID", + "FormatIdx", + "HasVAListArg", + "Fmt", + "assert", + "Fmt", + "assert", + "strlen", + "Fmt", + "assert", + "toupper", + "Fmt", + "Fmt", + "Like", + "strpbrk", + "getRecord", + "ID", + "Fmt", + "Like", + "HasVAListArg", + "Like", + "Fmt", + "Like", + "assert", + "Like", + "Like", + "assert", + "strchr", + "Like", + "FormatIdx", + "strtol", + "Like", + "isPrintfLike", + "ID", + "FormatIdx", + "HasVAListArg", + "isLike", + "ID", + "FormatIdx", + "HasVAListArg", + "isScanfLike", + "ID", + "FormatIdx", + "HasVAListArg", + "isLike", + "ID", + "FormatIdx", + "HasVAListArg", + "performsCallback", + "ID", + "Encoding", + "CalleePos", + "strchr", + "getRecord", + "ID", + "CalleePos", + "CalleePos", + "assert", + "CalleePos", + "CalleePos", + "EndPos", + "CalleeIdx", + "strtol", + "CalleePos", + "EndPos", + "assert", + "CalleeIdx", + "Encoding", + "CalleeIdx", + "EndPos", + "PayloadPos", + "EndPos", + "PayloadIdx", + "strtol", + "PayloadPos", + "EndPos", + "Encoding", + "PayloadIdx", + "assert", + "EndPos", + "canBeRedeclared", + "ID", + "ID", + "NotBuiltin", + "ID", + "BI__va_start", + "hasReferenceArgsOrResult", + "ID", + "hasCustomTypechecking", + "ID" ], "source_strings": [ - "clang/AST/DeclBase.h", - "clang/Basic/LLVM.h", - "llvm/ADT/DenseMap.h", - "llvm/ADT/Optional.h" + "clang/Basic/Builtins.h", + "clang/Basic/IdentifierTable.h", + "clang/Basic/LangOptions.h", + "clang/Basic/TargetInfo.h", + "llvm/ADT/StringRef.h", + "not a builtin function", + "clang/Basic/Builtins.def", + "Invalid builtin ID!", + "Already initialized target?", + "math.h", + "Vector width specifier must be followed by a ':'", + "Vector width specific must end with a ':'", + "Not passed a format string", + "Format string needs to be two characters long", + "Format string is not in the form xX", + "Format specifier must be followed by a ':'", + "Format specifier must end with a ':'", + "pP", + "sS", + "Callback callee specifier must be followed by a '<'", + "Callee index is supposed to be positive!", + "Callback callee specifier must end with a '>'" ] } } @@ -289,107 +716,170 @@ Tree-Sitter Symbols and Strings Pygments Symbols and Strings ------------------------------- -- Send GET request to PurlDB with ``/api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0 - &addon_pipelines=collect_source_strings``. - -.. warning:: - The ``collect_source_strings`` pipeline requires ``gettext``. - -- Once the indexing has completed visit ``/api/resources/?purl=pkg:github/llvm/llvm-project@10.0.0`` - to get the ``source_strings`` for resources. - -- Below is the Xgettext strings for ``clang/include/clang/Analysis/BodyFarm.h`` - file in ``extra_data`` field. - -.. code-block:: json - - { - "package": "http://127.0.0.1:8001/api/packages//", - "purl": "pkg:github/llvm/llvm-project@10.0.0", - "path": "llvm-project-llvmorg-10.0.0.tar.gz-extract/llvm-project-llvmorg-10.0.0/clang/include/clang/Analysis/BodyFarm.h", - "type": "file", - "name": "BodyFarm.h", - "extension": ".h", - "size": 1509, - "md5": "808b7438da9841d95ae3a8135e7bf61f", - "sha1": "38093fc0f043d0e639cc0b225e1acc038ffb7020", - "sha256": "83693b005ba387627ad10cef752d2559fe724cc0c7d4e86c4947f22403273e0c", - "sha512": null, - "git_sha1": null, - "mime_type": "text/x-c++", - "file_type": "C++ source, ASCII text", - "programming_language": "C", - "is_binary": false, - "is_text": true, - "is_archive": false, - "is_media": false, - "is_key_file": false, - "detected_license_expression": "", - "detected_license_expression_spdx": "", - "license_detections": [], - "license_clues": [], - "percentage_of_license_text": null, - "copyrights": [], - "holders": [], - "authors": [], - "package_data": [], - "emails": [], - "urls": [], - "extra_data": { - "source_strings": [] - } - } - -Tree-Sitter Symbols and Strings -------------------------------- - -- Send GET request to PurlDB with ``/api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0 - &addon_pipelines=collect_pygments_symbols``. +- Send GET request to PurlDB with:: + + /api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0&addon_pipelines=collect_pygments_symbols - Once the indexing has completed visit ``/api/resources/?purl=pkg:github/llvm/llvm-project@10.0.0`` to get the ``source_symbols`` and ``source_strings`` for resources. -- Below is the Pygments symbols and strings for ``clang/include/clang/Analysis/BodyFarm.h`` file +- Below is the Pygments symbols and strings for ``clang/lib/Basic/Builtins.cpp`` file in ``extra_data`` field. .. code-block:: json { - "package": "http://127.0.0.1:8001/api/packages//", - "purl": "pkg:github/llvm/llvm-project@10.0.0", - "path": "llvm-project-llvmorg-10.0.0.tar.gz-extract/llvm-project-llvmorg-10.0.0/clang/include/clang/Analysis/BodyFarm.h", - "type": "file", - "name": "BodyFarm.h", - "extension": ".h", - "size": 1509, - "md5": "808b7438da9841d95ae3a8135e7bf61f", - "sha1": "38093fc0f043d0e639cc0b225e1acc038ffb7020", - "sha256": "83693b005ba387627ad10cef752d2559fe724cc0c7d4e86c4947f22403273e0c", - "sha512": null, - "git_sha1": null, - "mime_type": "text/x-c++", - "file_type": "C++ source, ASCII text", - "programming_language": "C", - "is_binary": false, - "is_text": true, - "is_archive": false, - "is_media": false, - "is_key_file": false, - "detected_license_expression": "", - "detected_license_expression_spdx": "", - "license_detections": [], - "license_clues": [], - "percentage_of_license_text": null, - "copyrights": [], - "holders": [], - "authors": [], - "package_data": [], - "emails": [], - "urls": [], - "extra_data": { - "source_symbols": [ - "getBody" - ], - "source_strings": [] - } + "package": "http://127.0.0.1:8001/api/packages/ddedb539-32fd-43fd-b2c7-d50e5b718711/", + "purl": "pkg:github/llvm/llvm-project@10.0.0", + "path": "llvm-project-llvmorg-10.0.0.tar.gz-extract/llvm-project-llvmorg-10.0.0/clang/lib/Basic/Builtins.cpp", + "type": "file", + "name": "Builtins.cpp", + "extension": ".cpp", + "size": 7566, + "md5": "6afa8fe94d28fb1926851fa7eaf2cffa", + "sha1": "5cf1719199d3183d7811a3f133d2a4bfdd2d7da4", + "sha256": "9ba7fe01cb504dd97c7694ab716291e1b9584ee6646219469c14d6724da7292b", + "sha512": null, + "git_sha1": null, + "mime_type": "text/x-c", + "file_type": "C source, ASCII text", + "programming_language": "C++", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_key_file": false, + "detected_license_expression": "", + "detected_license_expression_spdx": "", + "license_detections": [], + "license_clues": [], + "percentage_of_license_text": null, + "copyrights": [], + "holders": [], + "authors": [], + "package_data": [], + "emails": [], + "urls": [], + "extra_data": { + "source_symbols": [ + "clang", + "Builtin::Context::getRecord", + "Builtin::Context::InitializeTarget", + "Builtin::Context::isBuiltinFunc", + "Builtin::Context::builtinIsSupported", + "Builtin::Context::initializeBuiltins", + "Builtin::Context::forgetBuiltin", + "Builtin::Context::getRequiredVectorWidth", + "Builtin::Context::isLike", + "Builtin::Context::isPrintfLike", + "Builtin::Context::isScanfLike", + "Builtin::Context::performsCallback", + "Builtin::Context::canBeRedeclared" + ], + "source_strings": [ + "\"", + "not a builtin function", + "\"", + "\"", + "Invalid builtin ID!", + "\"", + "\"", + "Already initialized target?", + "\"", + "1", + "'", + "f", + "'", + "'", + "f", + "'", + "\"", + "math.h", + "\"", + "100", + "1", + "200", + "1", + "0", + "0", + "0", + "'", + "V", + "'", + "0", + "'", + ":", + "'", + "\"", + "Vector width specifier must be followed by a ':'", + "\"", + "10", + "'", + ":", + "'", + "\"", + "Vector width specific must end with a ':'", + "\"", + "\"", + "Not passed a format string", + "\"", + "2", + "\"", + "Format string needs to be two characters long", + "\"", + "0", + "1", + "\"", + "Format string is not in the form", + "\\\"", + "xX", + "\\\"", + "\"", + "1", + "'", + ":", + "'", + "\"", + "Format specifier must be followed by a ':'", + "\"", + "'", + ":", + "'", + "\"", + "Format specifier must end with a ':'", + "\"", + "10", + "\"", + "pP", + "\"", + "\"", + "sS", + "\"", + "'", + "C", + "'", + "'", + "<", + "'", + "\"", + "Callback callee specifier must be followed by a '<'", + "\"", + "10", + "0", + "\"", + "Callee index is supposed to be positive!", + "\"", + "'", + ",", + "'", + "1", + "10", + "'", + ">", + "'", + "\"", + "Callback callee specifier must end with a '>'", + "\"" + ] + } } From b0cfbe86aa947a5430fce7f41e909adc9aa7ec26 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 7 May 2024 12:58:15 +0530 Subject: [PATCH 5/9] Remove trailing whitespace Signed-off-by: Keshav Priyadarshi --- .../tutorial_symbol_and_string_collection.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst b/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst index 42d32f64..58c06983 100644 --- a/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst +++ b/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst @@ -217,7 +217,7 @@ Ctags Symbols ------------- - Send GET request to PurlDB with:: - + /api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0&addon_pipelines=collect_symbols .. warning:: @@ -290,7 +290,7 @@ Xgettext Strings ---------------- - Send GET request to PurlDB with:: - + /api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0&addon_pipelines=collect_source_strings .. warning:: @@ -362,7 +362,7 @@ Tree-Sitter Symbols and Strings ------------------------------- - Send GET request to PurlDB with:: - + /api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0&addon_pipelines=collect_tree_sitter_symbols - Once the indexing has completed visit ``/api/resources/?purl=pkg:github/llvm/llvm-project@10.0.0`` @@ -717,7 +717,7 @@ Pygments Symbols and Strings ------------------------------- - Send GET request to PurlDB with:: - + /api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0&addon_pipelines=collect_pygments_symbols - Once the indexing has completed visit ``/api/resources/?purl=pkg:github/llvm/llvm-project@10.0.0`` From 97646427ca1988838dd37150fcbbdea89a7cc94c Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 8 May 2024 13:20:49 +0530 Subject: [PATCH 6/9] Reduce the size of example data in tutorial Signed-off-by: Keshav Priyadarshi --- .../tutorial_symbol_and_string_collection.rst | 969 +++++------------- 1 file changed, 231 insertions(+), 738 deletions(-) diff --git a/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst b/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst index 58c06983..18b40376 100644 --- a/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst +++ b/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst @@ -12,13 +12,13 @@ collecting symbols and strings from codebase resources. Through out this tutorial we will use ``pkg:github/llvm/llvm-project@10.0.0`` and will show -the symbol and string for `llvm-project/clang/lib/Basic/Builtins.cpp `_ +the symbol and string for `llvm-project/clang/lib/Basic/Targets/BPF.cpp `_ resource. -.. code-block:: c - :name: Builtins.cpp +.. code-block:: cpp + :caption: BPF.cpp - //===--- Builtins.cpp - Builtin function implementation -------------------===// + //===--- BPF.cpp - Implement BPF target feature support -------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -26,190 +26,45 @@ resource. // //===----------------------------------------------------------------------===// // - // This file implements various things for builtin functions. + // This file implements BPF TargetInfo objects. // //===----------------------------------------------------------------------===// - #include "clang/Basic/Builtins.h" - #include "clang/Basic/IdentifierTable.h" - #include "clang/Basic/LangOptions.h" - #include "clang/Basic/TargetInfo.h" + #include "BPF.h" + #include "Targets.h" + #include "clang/Basic/MacroBuilder.h" + #include "clang/Basic/TargetBuiltins.h" #include "llvm/ADT/StringRef.h" + using namespace clang; + using namespace clang::targets; - static const Builtin::Info BuiltinInfo[] = { - { "not a builtin function", nullptr, nullptr, nullptr, ALL_LANGUAGES,nullptr}, + const Builtin::Info BPFTargetInfo::BuiltinInfo[] = { #define BUILTIN(ID, TYPE, ATTRS) \ - { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr }, - #define LANGBUILTIN(ID, TYPE, ATTRS, LANGS) \ - { #ID, TYPE, ATTRS, nullptr, LANGS, nullptr }, - #define LIBBUILTIN(ID, TYPE, ATTRS, HEADER, LANGS) \ - { #ID, TYPE, ATTRS, HEADER, LANGS, nullptr }, - #include "clang/Basic/Builtins.def" + {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, + #include "clang/Basic/BuiltinsBPF.def" }; - const Builtin::Info &Builtin::Context::getRecord(unsigned ID) const { - if (ID < Builtin::FirstTSBuiltin) - return BuiltinInfo[ID]; - assert(((ID - Builtin::FirstTSBuiltin) < - (TSRecords.size() + AuxTSRecords.size())) && - "Invalid builtin ID!"); - if (isAuxBuiltinID(ID)) - return AuxTSRecords[getAuxBuiltinID(ID) - Builtin::FirstTSBuiltin]; - return TSRecords[ID - Builtin::FirstTSBuiltin]; - } - - void Builtin::Context::InitializeTarget(const TargetInfo &Target, - const TargetInfo *AuxTarget) { - assert(TSRecords.empty() && "Already initialized target?"); - TSRecords = Target.getTargetBuiltins(); - if (AuxTarget) - AuxTSRecords = AuxTarget->getTargetBuiltins(); - } - - bool Builtin::Context::isBuiltinFunc(llvm::StringRef FuncName) { - for (unsigned i = Builtin::NotBuiltin + 1; i != Builtin::FirstTSBuiltin; ++i) - if (FuncName.equals(BuiltinInfo[i].Name)) - return strchr(BuiltinInfo[i].Attributes, 'f') != nullptr; - - return false; - } - - bool Builtin::Context::builtinIsSupported(const Builtin::Info &BuiltinInfo, - const LangOptions &LangOpts) { - bool BuiltinsUnsupported = - (LangOpts.NoBuiltin || LangOpts.isNoBuiltinFunc(BuiltinInfo.Name)) && - strchr(BuiltinInfo.Attributes, 'f'); - bool MathBuiltinsUnsupported = - LangOpts.NoMathBuiltin && BuiltinInfo.HeaderName && - llvm::StringRef(BuiltinInfo.HeaderName).equals("math.h"); - bool GnuModeUnsupported = !LangOpts.GNUMode && (BuiltinInfo.Langs & GNU_LANG); - bool MSModeUnsupported = - !LangOpts.MicrosoftExt && (BuiltinInfo.Langs & MS_LANG); - bool ObjCUnsupported = !LangOpts.ObjC && BuiltinInfo.Langs == OBJC_LANG; - bool OclC1Unsupported = (LangOpts.OpenCLVersion / 100) != 1 && - (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES ) == OCLC1X_LANG; - bool OclC2Unsupported = - (LangOpts.OpenCLVersion != 200 && !LangOpts.OpenCLCPlusPlus) && - (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES) == OCLC20_LANG; - bool OclCUnsupported = !LangOpts.OpenCL && - (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES); - bool OpenMPUnsupported = !LangOpts.OpenMP && BuiltinInfo.Langs == OMP_LANG; - bool CPlusPlusUnsupported = - !LangOpts.CPlusPlus && BuiltinInfo.Langs == CXX_LANG; - return !BuiltinsUnsupported && !MathBuiltinsUnsupported && !OclCUnsupported && - !OclC1Unsupported && !OclC2Unsupported && !OpenMPUnsupported && - !GnuModeUnsupported && !MSModeUnsupported && !ObjCUnsupported && - !CPlusPlusUnsupported; + void BPFTargetInfo::getTargetDefines(const LangOptions &Opts, + MacroBuilder &Builder) const { + Builder.defineMacro("__bpf__"); + Builder.defineMacro("__BPF__"); } - /// initializeBuiltins - Mark the identifiers for all the builtins with their - /// appropriate builtin ID # and mark any non-portable builtin identifiers as - /// such. - void Builtin::Context::initializeBuiltins(IdentifierTable &Table, - const LangOptions& LangOpts) { - // Step #1: mark all target-independent builtins with their ID's. - for (unsigned i = Builtin::NotBuiltin+1; i != Builtin::FirstTSBuiltin; ++i) - if (builtinIsSupported(BuiltinInfo[i], LangOpts)) { - Table.get(BuiltinInfo[i].Name).setBuiltinID(i); - } - - // Step #2: Register target-specific builtins. - for (unsigned i = 0, e = TSRecords.size(); i != e; ++i) - if (builtinIsSupported(TSRecords[i], LangOpts)) - Table.get(TSRecords[i].Name).setBuiltinID(i + Builtin::FirstTSBuiltin); + static constexpr llvm::StringLiteral ValidCPUNames[] = {"generic", "v1", "v2", + "v3", "probe"}; - // Step #3: Register target-specific builtins for AuxTarget. - for (unsigned i = 0, e = AuxTSRecords.size(); i != e; ++i) - Table.get(AuxTSRecords[i].Name) - .setBuiltinID(i + Builtin::FirstTSBuiltin + TSRecords.size()); + bool BPFTargetInfo::isValidCPUName(StringRef Name) const { + return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames); } - void Builtin::Context::forgetBuiltin(unsigned ID, IdentifierTable &Table) { - Table.get(getRecord(ID).Name).setBuiltinID(0); + void BPFTargetInfo::fillValidCPUList(SmallVectorImpl &Values) const { + Values.append(std::begin(ValidCPUNames), std::end(ValidCPUNames)); } - unsigned Builtin::Context::getRequiredVectorWidth(unsigned ID) const { - const char *WidthPos = ::strchr(getRecord(ID).Attributes, 'V'); - if (!WidthPos) - return 0; - - ++WidthPos; - assert(*WidthPos == ':' && - "Vector width specifier must be followed by a ':'"); - ++WidthPos; - - char *EndPos; - unsigned Width = ::strtol(WidthPos, &EndPos, 10); - assert(*EndPos == ':' && "Vector width specific must end with a ':'"); - return Width; - } - - bool Builtin::Context::isLike(unsigned ID, unsigned &FormatIdx, - bool &HasVAListArg, const char *Fmt) const { - assert(Fmt && "Not passed a format string"); - assert(::strlen(Fmt) == 2 && - "Format string needs to be two characters long"); - assert(::toupper(Fmt[0]) == Fmt[1] && - "Format string is not in the form \"xX\""); - - const char *Like = ::strpbrk(getRecord(ID).Attributes, Fmt); - if (!Like) - return false; - - HasVAListArg = (*Like == Fmt[1]); - - ++Like; - assert(*Like == ':' && "Format specifier must be followed by a ':'"); - ++Like; - - assert(::strchr(Like, ':') && "Format specifier must end with a ':'"); - FormatIdx = ::strtol(Like, nullptr, 10); - return true; - } - - bool Builtin::Context::isPrintfLike(unsigned ID, unsigned &FormatIdx, - bool &HasVAListArg) { - return isLike(ID, FormatIdx, HasVAListArg, "pP"); - } - - bool Builtin::Context::isScanfLike(unsigned ID, unsigned &FormatIdx, - bool &HasVAListArg) { - return isLike(ID, FormatIdx, HasVAListArg, "sS"); - } - - bool Builtin::Context::performsCallback(unsigned ID, - SmallVectorImpl &Encoding) const { - const char *CalleePos = ::strchr(getRecord(ID).Attributes, 'C'); - if (!CalleePos) - return false; - - ++CalleePos; - assert(*CalleePos == '<' && - "Callback callee specifier must be followed by a '<'"); - ++CalleePos; - - char *EndPos; - int CalleeIdx = ::strtol(CalleePos, &EndPos, 10); - assert(CalleeIdx >= 0 && "Callee index is supposed to be positive!"); - Encoding.push_back(CalleeIdx); - - while (*EndPos == ',') { - const char *PayloadPos = EndPos + 1; - - int PayloadIdx = ::strtol(PayloadPos, &EndPos, 10); - Encoding.push_back(PayloadIdx); - } - - assert(*EndPos == '>' && "Callback callee specifier must end with a '>'"); - return true; - } - - bool Builtin::Context::canBeRedeclared(unsigned ID) const { - return ID == Builtin::NotBuiltin || - ID == Builtin::BI__va_start || - (!hasReferenceArgsOrResult(ID) && - !hasCustomTypechecking(ID)); + ArrayRef BPFTargetInfo::getTargetBuiltins() const { + return llvm::makeArrayRef(BuiltinInfo, clang::BPF::LastTSBuiltin - + Builtin::FirstTSBuiltin); } @@ -226,63 +81,53 @@ Ctags Symbols - Once the indexing has completed visit ``/api/resources/?purl=pkg:github/llvm/llvm-project@10.0.0`` to get the ``source_symbols`` for resources. -- Below is the Ctags symbol for ``clang/lib/Basic/Builtins.cpp`` - file in ``extra_data`` field. - .. code-block:: json + :caption: Ctags symbol for ``clang/lib/Basic/Targets/BPF.cpp`` in ``extra_data`` field + :emphasize-lines: 35-41 { - "package": "http://127.0.0.1:8001/api/packages/ddedb539-32fd-43fd-b2c7-d50e5b718711/", - "purl": "pkg:github/llvm/llvm-project@10.0.0", - "path": "llvm-project-llvmorg-10.0.0.tar.gz-extract/llvm-project-llvmorg-10.0.0/clang/lib/Basic/Builtins.cpp", - "type": "file", - "name": "Builtins.cpp", - "extension": ".cpp", - "size": 7566, - "md5": "6afa8fe94d28fb1926851fa7eaf2cffa", - "sha1": "5cf1719199d3183d7811a3f133d2a4bfdd2d7da4", - "sha256": "9ba7fe01cb504dd97c7694ab716291e1b9584ee6646219469c14d6724da7292b", - "sha512": null, - "git_sha1": null, - "mime_type": "text/x-c", - "file_type": "C source, ASCII text", - "programming_language": "C++", - "is_binary": false, - "is_text": true, - "is_archive": false, - "is_media": false, - "is_key_file": false, - "detected_license_expression": "", - "detected_license_expression_spdx": "", - "license_detections": [], - "license_clues": [], - "percentage_of_license_text": null, - "copyrights": [], - "holders": [], - "authors": [], - "package_data": [], - "emails": [], - "urls": [], - "extra_data": { - "source_symbols": [ - "BUILTIN", - "BuiltinInfo", - "InitializeTarget", - "LANGBUILTIN", - "LIBBUILTIN", - "builtinIsSupported", - "canBeRedeclared", - "forgetBuiltin", - "getRecord", - "getRequiredVectorWidth", - "initializeBuiltins", - "isBuiltinFunc", - "isLike", - "isPrintfLike", - "isScanfLike", - "performsCallback" - ] - } + "package": "http://127.0.0.1:8001/api/packages/", + "purl": "pkg:github/llvm/llvm-project@10.0.0", + "path": "llvm-project-llvmorg-10.0.0.tar.gz-extract/llvm-project-llvmorg-10.0.0/clang/lib/Basic/Targets/BPF.cpp", + "type": "file", + "name": "BPF.cpp", + "extension": ".cpp", + "size": 1788, + "md5": "382b406d1023d12cd8f28106043774ee", + "sha1": "366146c8228c4e2cd46c47618fa3211ce48d96e2", + "sha256": "d7609c502c7d462dcee1b631a80eb765ad7d10597991d88c3d4cd2ae0370eeba", + "sha512": null, + "git_sha1": null, + "mime_type": "text/x-c", + "file_type": "C source, ASCII text", + "programming_language": "C++", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_key_file": false, + "detected_license_expression": "", + "detected_license_expression_spdx": "", + "license_detections": [], + "license_clues": [], + "percentage_of_license_text": null, + "copyrights": [], + "holders": [], + "authors": [], + "package_data": [], + "emails": [], + "urls": [], + "extra_data": { + "source_symbols": [ + "BUILTIN", + "BuiltinInfo", + "ValidCPUNames", + "fillValidCPUList", + "getTargetBuiltins", + "getTargetDefines", + "isValidCPUName" + ] + } } @@ -299,63 +144,53 @@ Xgettext Strings - Once the indexing has completed visit ``/api/resources/?purl=pkg:github/llvm/llvm-project@10.0.0`` to get the ``source_strings`` for resources. -- Below is the Xgettext strings for ``clang/lib/Basic/Builtins.cpp`` - file in ``extra_data`` field. - .. code-block:: json + :caption: Xgettext strings for ``clang/lib/Basic/Targets/BPF.cpp`` in ``extra_data`` field + :emphasize-lines: 35-41 { - "package": "http://127.0.0.1:8001/api/packages/ddedb539-32fd-43fd-b2c7-d50e5b718711/", - "purl": "pkg:github/llvm/llvm-project@10.0.0", - "path": "llvm-project-llvmorg-10.0.0.tar.gz-extract/llvm-project-llvmorg-10.0.0/clang/lib/Basic/Builtins.cpp", - "type": "file", - "name": "Builtins.cpp", - "extension": ".cpp", - "size": 7566, - "md5": "6afa8fe94d28fb1926851fa7eaf2cffa", - "sha1": "5cf1719199d3183d7811a3f133d2a4bfdd2d7da4", - "sha256": "9ba7fe01cb504dd97c7694ab716291e1b9584ee6646219469c14d6724da7292b", - "sha512": null, - "git_sha1": null, - "mime_type": "text/x-c", - "file_type": "C source, ASCII text", - "programming_language": "C++", - "is_binary": false, - "is_text": true, - "is_archive": false, - "is_media": false, - "is_key_file": false, - "detected_license_expression": "", - "detected_license_expression_spdx": "", - "license_detections": [], - "license_clues": [], - "percentage_of_license_text": null, - "copyrights": [], - "holders": [], - "authors": [], - "package_data": [], - "emails": [], - "urls": [], - "extra_data": { - "source_strings": [ - "not a builtin function" - "Invalid builtin ID!" - "Already initialized target?" - "math.h" - "Vector width specifier must be followed by a ':'" - "Vector width specific must end with a ':'" - "Not passed a format string" - "Format string needs to be two characters long" - "Format string is not in the form \\\"xX\\" - "Format specifier must be followed by a ':'" - "Format specifier must end with a ':'" - "pP" - "sS" - "Callback callee specifier must be followed by a '<'" - "Callee index is supposed to be positive!" - "Callback callee specifier must end with a '>'" - ] - } + "package": "http://127.0.0.1:8001/api/packages/", + "purl": "pkg:github/llvm/llvm-project@10.0.0", + "path": "llvm-project-llvmorg-10.0.0.tar.gz-extract/llvm-project-llvmorg-10.0.0/clang/lib/Basic/Targets/BPF.cpp", + "type": "file", + "name": "BPF.cpp", + "extension": ".cpp", + "size": 1788, + "md5": "382b406d1023d12cd8f28106043774ee", + "sha1": "366146c8228c4e2cd46c47618fa3211ce48d96e2", + "sha256": "d7609c502c7d462dcee1b631a80eb765ad7d10597991d88c3d4cd2ae0370eeba", + "sha512": null, + "git_sha1": null, + "mime_type": "text/x-c", + "file_type": "C source, ASCII text", + "programming_language": "C++", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_key_file": false, + "detected_license_expression": "", + "detected_license_expression_spdx": "", + "license_detections": [], + "license_clues": [], + "percentage_of_license_text": null, + "copyrights": [], + "holders": [], + "authors": [], + "package_data": [], + "emails": [], + "urls": [], + "extra_data": { + "source_strings": [ + "__bpf__", + "__BPF__", + "generic", + "v", + "v", + "v", + "probe" + ] + } } Tree-Sitter Symbols and Strings @@ -368,27 +203,26 @@ Tree-Sitter Symbols and Strings - Once the indexing has completed visit ``/api/resources/?purl=pkg:github/llvm/llvm-project@10.0.0`` to get the ``source_symbols`` and ``source_strings`` for resources. -- Below is the Tree-Sitter symbols and strings for ``clang/lib/Basic/Builtins.cpp`` file - in ``extra_data`` field. - .. code-block:: json + :caption: Tree-Sitter symbols and strings for ``clang/lib/Basic/Targets/BPF.cpp`` in ``extra_data`` field + :emphasize-lines: 35-69, 72-84 { - "package": "http://127.0.0.1:8001/api/packages//", + "package": "http://127.0.0.1:8001/api/packages/", "purl": "pkg:github/llvm/llvm-project@10.0.0", - "path": "llvm-project-llvmorg-10.0.0.tar.gz-extract/llvm-project-llvmorg-10.0.0/clang/include/clang/Analysis/BodyFarm.h", + "path": "llvm-project-llvmorg-10.0.0.tar.gz-extract/llvm-project-llvmorg-10.0.0/clang/lib/Basic/Targets/BPF.cpp", "type": "file", - "name": "BodyFarm.h", - "extension": ".h", - "size": 1509, - "md5": "808b7438da9841d95ae3a8135e7bf61f", - "sha1": "38093fc0f043d0e639cc0b225e1acc038ffb7020", - "sha256": "83693b005ba387627ad10cef752d2559fe724cc0c7d4e86c4947f22403273e0c", + "name": "BPF.cpp", + "extension": ".cpp", + "size": 1788, + "md5": "382b406d1023d12cd8f28106043774ee", + "sha1": "366146c8228c4e2cd46c47618fa3211ce48d96e2", + "sha256": "d7609c502c7d462dcee1b631a80eb765ad7d10597991d88c3d4cd2ae0370eeba", "sha512": null, "git_sha1": null, - "mime_type": "text/x-c++", - "file_type": "C++ source, ASCII text", - "programming_language": "C", + "mime_type": "text/x-c", + "file_type": "C source, ASCII text", + "programming_language": "C++", "is_binary": false, "is_text": true, "is_archive": false, @@ -408,8 +242,8 @@ Tree-Sitter Symbols and Strings "extra_data": { "source_symbols": [ "clang", + "targets", "BuiltinInfo", - "ALL_LANGUAGES", "BUILTIN", "ID", "TYPE", @@ -417,298 +251,46 @@ Tree-Sitter Symbols and Strings "TYPE", "ATTRS", "ALL_LANGUAGES", - "LANGBUILTIN", - "LIBBUILTIN", - "ID", - "TYPE", - "ATTRS", - "HEADER", - "LANGS", - "getRecord", - "ID", - "ID", - "FirstTSBuiltin", - "BuiltinInfo", - "ID", - "assert", - "ID", - "FirstTSBuiltin", - "TSRecords", - "AuxTSRecords", - "isAuxBuiltinID", - "ID", - "AuxTSRecords", - "getAuxBuiltinID", - "ID", - "FirstTSBuiltin", - "TSRecords", - "ID", - "FirstTSBuiltin", - "InitializeTarget", - "Target", - "AuxTarget", - "assert", - "TSRecords", - "TSRecords", - "Target", - "AuxTarget", - "AuxTSRecords", - "AuxTarget", - "isBuiltinFunc", - "FuncName", - "i", - "NotBuiltin", - "i", - "FirstTSBuiltin", - "i", - "FuncName", - "BuiltinInfo", - "i", - "strchr", - "BuiltinInfo", - "i", - "builtinIsSupported", + "getTargetDefines", + "Opts", + "Builder", + "Builder", + "Builder", + "ValidCPUNames", + "isValidCPUName", + "Name", + "find", + "ValidCPUNames", + "Name", + "end", + "ValidCPUNames", + "fillValidCPUList", + "Values", + "Values", + "begin", + "ValidCPUNames", + "end", + "ValidCPUNames", + "getTargetBuiltins", + "makeArrayRef", "BuiltinInfo", - "LangOpts", - "BuiltinsUnsupported", - "LangOpts", - "LangOpts", - "BuiltinInfo", - "strchr", - "BuiltinInfo", - "MathBuiltinsUnsupported", - "LangOpts", - "BuiltinInfo", - "StringRef", - "BuiltinInfo", - "GnuModeUnsupported", - "LangOpts", - "BuiltinInfo", - "GNU_LANG", - "MSModeUnsupported", - "LangOpts", - "BuiltinInfo", - "MS_LANG", - "ObjCUnsupported", - "LangOpts", - "BuiltinInfo", - "OBJC_LANG", - "OclC1Unsupported", - "LangOpts", - "BuiltinInfo", - "ALL_OCLC_LANGUAGES", - "OCLC1X_LANG", - "OclC2Unsupported", - "LangOpts", - "LangOpts", - "BuiltinInfo", - "ALL_OCLC_LANGUAGES", - "OCLC20_LANG", - "OclCUnsupported", - "LangOpts", - "BuiltinInfo", - "ALL_OCLC_LANGUAGES", - "OpenMPUnsupported", - "LangOpts", - "BuiltinInfo", - "OMP_LANG", - "CPlusPlusUnsupported", - "LangOpts", - "BuiltinInfo", - "CXX_LANG", - "BuiltinsUnsupported", - "MathBuiltinsUnsupported", - "OclCUnsupported", - "OclC1Unsupported", - "OclC2Unsupported", - "OpenMPUnsupported", - "GnuModeUnsupported", - "MSModeUnsupported", - "ObjCUnsupported", - "CPlusPlusUnsupported", - "initializeBuiltins", - "Table", - "LangOpts", - "i", - "NotBuiltin", - "i", - "FirstTSBuiltin", - "i", - "builtinIsSupported", - "BuiltinInfo", - "i", - "LangOpts", - "Table", - "BuiltinInfo", - "i", - "i", - "i", - "e", - "TSRecords", - "i", - "e", - "i", - "builtinIsSupported", - "TSRecords", - "i", - "LangOpts", - "Table", - "TSRecords", - "i", - "i", - "FirstTSBuiltin", - "i", - "e", - "AuxTSRecords", - "i", - "e", - "i", - "Table", - "AuxTSRecords", - "i", - "i", - "FirstTSBuiltin", - "TSRecords", - "forgetBuiltin", - "ID", - "Table", - "Table", - "getRecord", - "ID", - "getRequiredVectorWidth", - "ID", - "WidthPos", - "strchr", - "getRecord", - "ID", - "WidthPos", - "WidthPos", - "assert", - "WidthPos", - "WidthPos", - "EndPos", - "Width", - "strtol", - "WidthPos", - "EndPos", - "assert", - "EndPos", - "Width", - "isLike", - "ID", - "FormatIdx", - "HasVAListArg", - "Fmt", - "assert", - "Fmt", - "assert", - "strlen", - "Fmt", - "assert", - "toupper", - "Fmt", - "Fmt", - "Like", - "strpbrk", - "getRecord", - "ID", - "Fmt", - "Like", - "HasVAListArg", - "Like", - "Fmt", - "Like", - "assert", - "Like", - "Like", - "assert", - "strchr", - "Like", - "FormatIdx", - "strtol", - "Like", - "isPrintfLike", - "ID", - "FormatIdx", - "HasVAListArg", - "isLike", - "ID", - "FormatIdx", - "HasVAListArg", - "isScanfLike", - "ID", - "FormatIdx", - "HasVAListArg", - "isLike", - "ID", - "FormatIdx", - "HasVAListArg", - "performsCallback", - "ID", - "Encoding", - "CalleePos", - "strchr", - "getRecord", - "ID", - "CalleePos", - "CalleePos", - "assert", - "CalleePos", - "CalleePos", - "EndPos", - "CalleeIdx", - "strtol", - "CalleePos", - "EndPos", - "assert", - "CalleeIdx", - "Encoding", - "CalleeIdx", - "EndPos", - "PayloadPos", - "EndPos", - "PayloadIdx", - "strtol", - "PayloadPos", - "EndPos", - "Encoding", - "PayloadIdx", - "assert", - "EndPos", - "canBeRedeclared", - "ID", - "ID", - "NotBuiltin", - "ID", - "BI__va_start", - "hasReferenceArgsOrResult", - "ID", - "hasCustomTypechecking", - "ID" + "LastTSBuiltin", + "FirstTSBuiltin" ], "source_strings": [ - "clang/Basic/Builtins.h", - "clang/Basic/IdentifierTable.h", - "clang/Basic/LangOptions.h", - "clang/Basic/TargetInfo.h", + "BPF.h", + "Targets.h", + "clang/Basic/MacroBuilder.h", + "clang/Basic/TargetBuiltins.h", "llvm/ADT/StringRef.h", - "not a builtin function", - "clang/Basic/Builtins.def", - "Invalid builtin ID!", - "Already initialized target?", - "math.h", - "Vector width specifier must be followed by a ':'", - "Vector width specific must end with a ':'", - "Not passed a format string", - "Format string needs to be two characters long", - "Format string is not in the form xX", - "Format specifier must be followed by a ':'", - "Format specifier must end with a ':'", - "pP", - "sS", - "Callback callee specifier must be followed by a '<'", - "Callee index is supposed to be positive!", - "Callback callee specifier must end with a '>'" + "clang/Basic/BuiltinsBPF.def", + "__bpf__", + "__BPF__", + "generic", + "v1", + "v2", + "v3", + "probe" ] } } @@ -723,163 +305,74 @@ Pygments Symbols and Strings - Once the indexing has completed visit ``/api/resources/?purl=pkg:github/llvm/llvm-project@10.0.0`` to get the ``source_symbols`` and ``source_strings`` for resources. -- Below is the Pygments symbols and strings for ``clang/lib/Basic/Builtins.cpp`` file - in ``extra_data`` field. .. code-block:: json + :caption: Pygments symbols and strings for ``clang/lib/Basic/Targets/BPF.cpp`` in ``extra_data`` field + :emphasize-lines: 35-40, 43-63 { - "package": "http://127.0.0.1:8001/api/packages/ddedb539-32fd-43fd-b2c7-d50e5b718711/", - "purl": "pkg:github/llvm/llvm-project@10.0.0", - "path": "llvm-project-llvmorg-10.0.0.tar.gz-extract/llvm-project-llvmorg-10.0.0/clang/lib/Basic/Builtins.cpp", - "type": "file", - "name": "Builtins.cpp", - "extension": ".cpp", - "size": 7566, - "md5": "6afa8fe94d28fb1926851fa7eaf2cffa", - "sha1": "5cf1719199d3183d7811a3f133d2a4bfdd2d7da4", - "sha256": "9ba7fe01cb504dd97c7694ab716291e1b9584ee6646219469c14d6724da7292b", - "sha512": null, - "git_sha1": null, - "mime_type": "text/x-c", - "file_type": "C source, ASCII text", - "programming_language": "C++", - "is_binary": false, - "is_text": true, - "is_archive": false, - "is_media": false, - "is_key_file": false, - "detected_license_expression": "", - "detected_license_expression_spdx": "", - "license_detections": [], - "license_clues": [], - "percentage_of_license_text": null, - "copyrights": [], - "holders": [], - "authors": [], - "package_data": [], - "emails": [], - "urls": [], - "extra_data": { - "source_symbols": [ - "clang", - "Builtin::Context::getRecord", - "Builtin::Context::InitializeTarget", - "Builtin::Context::isBuiltinFunc", - "Builtin::Context::builtinIsSupported", - "Builtin::Context::initializeBuiltins", - "Builtin::Context::forgetBuiltin", - "Builtin::Context::getRequiredVectorWidth", - "Builtin::Context::isLike", - "Builtin::Context::isPrintfLike", - "Builtin::Context::isScanfLike", - "Builtin::Context::performsCallback", - "Builtin::Context::canBeRedeclared" - ], - "source_strings": [ - "\"", - "not a builtin function", - "\"", - "\"", - "Invalid builtin ID!", - "\"", - "\"", - "Already initialized target?", - "\"", - "1", - "'", - "f", - "'", - "'", - "f", - "'", - "\"", - "math.h", - "\"", - "100", - "1", - "200", - "1", - "0", - "0", - "0", - "'", - "V", - "'", - "0", - "'", - ":", - "'", - "\"", - "Vector width specifier must be followed by a ':'", - "\"", - "10", - "'", - ":", - "'", - "\"", - "Vector width specific must end with a ':'", - "\"", - "\"", - "Not passed a format string", - "\"", - "2", - "\"", - "Format string needs to be two characters long", - "\"", - "0", - "1", - "\"", - "Format string is not in the form", - "\\\"", - "xX", - "\\\"", - "\"", - "1", - "'", - ":", - "'", - "\"", - "Format specifier must be followed by a ':'", - "\"", - "'", - ":", - "'", - "\"", - "Format specifier must end with a ':'", - "\"", - "10", - "\"", - "pP", - "\"", - "\"", - "sS", - "\"", - "'", - "C", - "'", - "'", - "<", - "'", - "\"", - "Callback callee specifier must be followed by a '<'", - "\"", - "10", - "0", - "\"", - "Callee index is supposed to be positive!", - "\"", - "'", - ",", - "'", - "1", - "10", - "'", - ">", - "'", - "\"", - "Callback callee specifier must end with a '>'", - "\"" - ] - } + "package": "http://127.0.0.1:8001/api/packages/", + "purl": "pkg:github/llvm/llvm-project@10.0.0", + "path": "llvm-project-llvmorg-10.0.0.tar.gz-extract/llvm-project-llvmorg-10.0.0/clang/lib/Basic/Targets/BPF.cpp", + "type": "file", + "name": "BPF.cpp", + "extension": ".cpp", + "size": 1788, + "md5": "382b406d1023d12cd8f28106043774ee", + "sha1": "366146c8228c4e2cd46c47618fa3211ce48d96e2", + "sha256": "d7609c502c7d462dcee1b631a80eb765ad7d10597991d88c3d4cd2ae0370eeba", + "sha512": null, + "git_sha1": null, + "mime_type": "text/x-c", + "file_type": "C source, ASCII text", + "programming_language": "C++", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_key_file": false, + "detected_license_expression": "", + "detected_license_expression_spdx": "", + "license_detections": [], + "license_clues": [], + "percentage_of_license_text": null, + "copyrights": [], + "holders": [], + "authors": [], + "package_data": [], + "emails": [], + "urls": [], + "extra_data": { + "source_symbols": [ + "clang", + "clang", + "targets", + "BPFTargetInfo::getTargetDefines", + "BPFTargetInfo::isValidCPUName", + "BPFTargetInfo::fillValidCPUList" + ], + "source_strings": [ + "\"", + "__bpf__", + "\"", + "\"", + "__BPF__", + "\"", + "\"", + "generic", + "\"", + "\"", + "v1", + "\"", + "\"", + "v2", + "\"", + "\"", + "v3", + "\"", + "\"", + "probe", + "\"" + ] + } } From b5302c68c6b0cbd950627c38cee8abf3a8cbd8a6 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Thu, 9 May 2024 13:41:44 +0530 Subject: [PATCH 7/9] Add toggle to show/hide code Signed-off-by: Keshav Priyadarshi --- .../tutorial_symbol_and_string_collection.rst | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst b/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst index 18b40376..bf39daef 100644 --- a/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst +++ b/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst @@ -15,8 +15,13 @@ Through out this tutorial we will use ``pkg:github/llvm/llvm-project@10.0.0`` an the symbol and string for `llvm-project/clang/lib/Basic/Targets/BPF.cpp `_ resource. +.. raw:: html + +
+ BPF.cpp +
+ .. code-block:: cpp - :caption: BPF.cpp //===--- BPF.cpp - Implement BPF target feature support -------------------===// // @@ -67,6 +72,12 @@ resource. Builtin::FirstTSBuiltin); } +.. raw:: html + +
+
+ + Ctags Symbols ------------- From 6f926f072393b8c39b1d10b3a467fceef04bf03f Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Thu, 9 May 2024 16:13:14 +0530 Subject: [PATCH 8/9] Rename symbols pipelines Reference: https://github.com/nexB/scancode.io/issues/1220 Signed-off-by: Keshav Priyadarshi --- .../tutorial_symbol_and_string_collection.rst | 13 ++++++------- docs/source/purldb/symbol_and_string_collection.rst | 12 ++++++------ minecode/model_utils.py | 8 ++++---- packagedb/api.py | 6 +++--- 4 files changed, 19 insertions(+), 20 deletions(-) diff --git a/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst b/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst index bf39daef..661798bf 100644 --- a/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst +++ b/docs/source/how-to-guides/tutorial_symbol_and_string_collection.rst @@ -78,16 +78,15 @@ resource.
- Ctags Symbols ------------- - Send GET request to PurlDB with:: - /api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0&addon_pipelines=collect_symbols + /api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0&addon_pipelines=collect_symbols_ctags .. warning:: - The ``collect_symbols`` pipeline requires ``universal-ctags``. + The ``collect_symbols_ctags`` pipeline requires ``universal-ctags``. - Once the indexing has completed visit ``/api/resources/?purl=pkg:github/llvm/llvm-project@10.0.0`` to get the ``source_symbols`` for resources. @@ -147,10 +146,10 @@ Xgettext Strings - Send GET request to PurlDB with:: - /api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0&addon_pipelines=collect_source_strings + /api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0&addon_pipelines=collect_strings_gettext .. warning:: - The ``collect_source_strings`` pipeline requires ``gettext``. + The ``collect_strings_gettext`` pipeline requires ``gettext``. - Once the indexing has completed visit ``/api/resources/?purl=pkg:github/llvm/llvm-project@10.0.0`` to get the ``source_strings`` for resources. @@ -209,7 +208,7 @@ Tree-Sitter Symbols and Strings - Send GET request to PurlDB with:: - /api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0&addon_pipelines=collect_tree_sitter_symbols + /api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0&addon_pipelines=collect_symbols_tree_sitter - Once the indexing has completed visit ``/api/resources/?purl=pkg:github/llvm/llvm-project@10.0.0`` to get the ``source_symbols`` and ``source_strings`` for resources. @@ -311,7 +310,7 @@ Pygments Symbols and Strings - Send GET request to PurlDB with:: - /api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0&addon_pipelines=collect_pygments_symbols + /api/collect/?purl=pkg:github/llvm/llvm-project@10.0.0&addon_pipelines=collect_symbols_pygments - Once the indexing has completed visit ``/api/resources/?purl=pkg:github/llvm/llvm-project@10.0.0`` to get the ``source_symbols`` and ``source_strings`` for resources. diff --git a/docs/source/purldb/symbol_and_string_collection.rst b/docs/source/purldb/symbol_and_string_collection.rst index 13cfa9ed..dc674fa4 100644 --- a/docs/source/purldb/symbol_and_string_collection.rst +++ b/docs/source/purldb/symbol_and_string_collection.rst @@ -14,17 +14,17 @@ endpoint along with the symbol/string addon_pipeline, it fetches the archive download_url and creates a package for the PURL with relevant metadata. Thereafter, a scan job is scheduled which downloads the archive of the PURL and runs the `scan_single_package `_ -package pipeline. Scan job also runs the requested addon pipelines. +package pipeline. Scan job also runs the requested addon_pipelines. Upon completion of the scan job, the package is updated with resource data along with the ``source_symbols`` and ``source_strings`` in the ``extra_data`` field of resources. -Currently PurlDB supports these addon pipeline for symbol/string collection. +Currently PurlDB supports these addon_pipeline for symbol/string collection. -- ``collect_symbols`` -- ``collect_source_strings`` -- ``collect_tree_sitter_symbol`` -- ``collect_pygments_symbols`` +- ``collect_symbols_ctags`` +- ``collect_strings_gettext`` +- ``collect_symbols_tree_sitter`` +- ``collect_symbols_pygments`` See the detailed tutorial on :ref:`tutorial_symbol_and_string_collection` in PurlDB. diff --git a/minecode/model_utils.py b/minecode/model_utils.py index d43aa816..882d4cb8 100644 --- a/minecode/model_utils.py +++ b/minecode/model_utils.py @@ -35,10 +35,10 @@ # These are the list of supported addon pipelines to run when we scan a Package for # indexing. SUPPORTED_ADDON_PIPELINES = ( - 'collect_pygments_symbols', - 'collect_source_strings', - 'collect_symbols', - 'collect_tree_sitter_symbols', + 'collect_strings_gettext', + 'collect_symbols_ctags', + 'collect_symbols_pygments', + 'collect_symbols_tree_sitter', 'inspect_elf_binaries', ) diff --git a/packagedb/api.py b/packagedb/api.py index 1fcb9046..12c5f4f6 100644 --- a/packagedb/api.py +++ b/packagedb/api.py @@ -679,7 +679,7 @@ class CollectViewSet(viewsets.ViewSet): **Example:** - /api/collect/?purl=pkg:npm/foo@1.2.3&addon_pipelines=collect_symbols&addon_pipelines=inspect_elf_binaries + /api/collect/?purl=pkg:npm/foo@1.2.3&addon_pipelines=collect_symbols_ctags&addon_pipelines=inspect_elf_binaries **Note:** Use `Index packages` for bulk indexing/reindexing of packages. @@ -773,7 +773,7 @@ def index_packages(self, request, *args, **kwargs): "purl": "pkg:npm/less@1.0.32", "vers": null, "source_purl": None, - "addon_pipelines": ['collect_symbols'] + "addon_pipelines": ['collect_symbols_ctags'] }, { "purl": "pkg:npm/less", @@ -785,7 +785,7 @@ def index_packages(self, request, *args, **kwargs): "purl": "pkg:npm/foobar", "vers": null, "source_purl": None, - "addon_pipelines": ['inspect_elf_binaries', 'collect_symbols'] + "addon_pipelines": ['inspect_elf_binaries', 'collect_symbols_ctags'] } ] "reindex": true, From 7db678c90bdd1be695a2e77ff496f0be2534a0be Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Thu, 9 May 2024 16:25:41 +0530 Subject: [PATCH 9/9] Add CHANGELOG for symbol and string collection Signed-off-by: Keshav Priyadarshi --- CHANGELOG.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 2e1e556c..13dbb88e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,11 @@ Next Release - Add `/api/from_purl/purl2git` endpoint to get a git repo for a purl. - Add `/api/to_purl/go` endpoint to get a purl from a golang import string or a package string in go.mod. +- Support indexing of PURLs listed in https://github.com/nexB/purldb/issues/326, + https://github.com/nexB/purldb/issues/327, https://github.com/nexB/purldb/issues/328, + https://github.com/nexB/purldb/issues/329 and https://github.com/nexB/purldb/issues/356. +- Support ``addon_pipelines`` for symbol and string collection in ``/api/collect`` endpoint. https://github.com/nexB/purldb/pull/393 +- Store ``source_symbols`` and ``source_strings`` in ``extra_data`` field. https://github.com/nexB/purldb/pull/351 v4.0.0