Import llama.cpp

https://github.com/ggerganov/llama.cpp 0b2da20538d01926b77ea237dd1c930c4d20b686 See third_party/ggml/README.cosmo for changes
jart · Apr 27, 2023 · e8b4390 · e8b4390
1 parent f42089d
commit e8b4390
Show file tree

Hide file tree

Showing 14 changed files with 18,313 additions and 2 deletions.
diff --git a/Makefile b/Makefile
@@ -144,6 +144,7 @@ include libc/stdio/stdio.mk			# │
 include third_party/libcxx/libcxx.mk		# │
 include net/net.mk				# │
 include third_party/vqsort/vqsort.mk		# │
+include third_party/ggml/ggml.mk		# │
 include libc/log/log.mk				# │
 include third_party/bzip2/bzip2.mk		# │
 include dsp/core/core.mk			# │

diff --git a/third_party/ggml/LICENSE b/third_party/ggml/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Georgi Gerganov
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/third_party/ggml/README.cosmo b/third_party/ggml/README.cosmo
@@ -0,0 +1,21 @@
+DESCRIPTION
+
+  ggml is a machine learning library useful for LLM inference on CPUs
+
+LICENSE
+
+  MIT
+
+ORIGIN
+
+  https://github.com/ggerganov/llama.cpp
+  commit 0b2da20538d01926b77ea237dd1c930c4d20b686
+  Author: Stephan Walter <[email protected]>
+  Date:   Wed Apr 26 20:26:42 2023 +0000
+  ggml : slightly faster AVX2 implementation for Q5 (#1197)
+
+LOCAL CHANGES
+
+  - Refactor headers per cosmo convention
+  - Replace code like 'ggjt' with READ32BE("ggjt")
+  - Remove C++ exceptions; use Die() function instead
diff --git a/third_party/ggml/common.cc b/third_party/ggml/common.cc
diff --git a/third_party/ggml/common.h b/third_party/ggml/common.h
@@ -0,0 +1,103 @@
+// -*- c++ -*-
+// clang-format off
+#ifndef COSMOPOLITAN_THIRD_PARTY_GGML_COMMON_H_
+#define COSMOPOLITAN_THIRD_PARTY_GGML_COMMON_H_
+#include "third_party/ggml/llama.h"
+#include "third_party/libcxx/string"
+#include "third_party/libcxx/vector"
+#include "third_party/libcxx/random"
+#include "third_party/libcxx/thread"
+#if !(__ASSEMBLER__ + __LINKER__ + 0)
+// clang-format off
+// Various helper functions and utilities
+
+//
+// CLI argument parsing
+//
+
+struct gpt_params {
+    int32_t seed          = -1;   // RNG seed
+    int32_t n_threads     = std::min(4, (int32_t) std::thread::hardware_concurrency());
+    int32_t n_predict     = 128;  // new tokens to predict
+    int32_t repeat_last_n = 64;   // last n tokens to penalize
+    int32_t n_parts       = -1;   // amount of model parts (-1 = determine from model dimensions)
+    int32_t n_ctx         = 512;  // context size
+    int32_t n_batch       = 512;  // batch size for prompt processing (must be >=32 to use BLAS)
+    int32_t n_keep        = 0;    // number of tokens to keep from initial prompt
+
+    // sampling parameters
+    int32_t top_k = 40;
+    float   top_p = 0.95f;
+    float   temp  = 0.80f;
+    float   repeat_penalty  = 1.10f;
+
+    std::string model  = "models/lamma-7B/ggml-model.bin"; // model path
+    std::string prompt = "";
+    std::string input_prefix = "";       // string to prefix user inputs with
+    std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted
+
+    std::string lora_adapter = "";  // lora adapter path
+    std::string lora_base = "";     // base model path for the lora adapter
+
+    bool memory_f16        = true;  // use f16 instead of f32 for memory kv
+    bool random_prompt     = false; // do not randomize prompt if none provided
+    bool use_color         = false; // use color to distinguish generations and inputs
+    bool interactive       = false; // interactive mode
+
+    bool embedding         = false; // get only sentence embedding
+    bool interactive_first = false; // wait for user input immediately
+
+    bool instruct          = false; // instruction mode (used for Alpaca models)
+    bool ignore_eos        = false; // do not stop generating after eos
+    bool perplexity        = false; // compute perplexity over the prompt
+    bool use_mmap          = true;  // use mmap for faster loads
+    bool use_mlock         = false; // use mlock to keep model in memory
+    bool mem_test          = false; // compute maximum memory usage
+    bool verbose_prompt    = false; // print prompt tokens before generation
+};
+
+bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
+
+void gpt_print_usage(int argc, char ** argv, const gpt_params & params);
+
+std::string gpt_random_prompt(std::mt19937 & rng);
+
+//
+// Vocab utils
+//
+
+std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos);
+
+//
+// Console utils
+//
+
+#define ANSI_COLOR_RED     "\x1b[31m"
+#define ANSI_COLOR_GREEN   "\x1b[32m"
+#define ANSI_COLOR_YELLOW  "\x1b[33m"
+#define ANSI_COLOR_BLUE    "\x1b[34m"
+#define ANSI_COLOR_MAGENTA "\x1b[35m"
+#define ANSI_COLOR_CYAN    "\x1b[36m"
+#define ANSI_COLOR_RESET   "\x1b[0m"
+#define ANSI_BOLD          "\x1b[1m"
+
+enum console_color_t {
+    CONSOLE_COLOR_DEFAULT=0,
+    CONSOLE_COLOR_PROMPT,
+    CONSOLE_COLOR_USER_INPUT
+};
+
+struct console_state {
+    bool use_color = false;
+    console_color_t color = CONSOLE_COLOR_DEFAULT;
+};
+
+void set_console_color(console_state & con_st, console_color_t color);
+
+#if defined (_WIN32)
+void win32_console_init(bool enable_color);
+void win32_utf8_encode(const std::wstring & wstr, std::string & str);
+#endif
+
+#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
+#endif /* COSMOPOLITAN_THIRD_PARTY_GGML_COMMON_H_ */