diff --git a/Makefile b/Makefile
index 40187c4a25e62..1273eb5511b58 100644
--- a/Makefile
+++ b/Makefile
@@ -569,6 +569,9 @@ perplexity: examples/perplexity/perplexity.cpp                build-info.h ggml.
 embedding: examples/embedding/embedding.cpp                   build-info.h ggml.o llama.o common.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
+cmap-example: examples/cmap-example/cmap-example.cpp          build-info.h ggml.o llama.o common.o $(OBJS)
+	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
+
 save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o common.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
diff --git a/common/common.cpp b/common/common.cpp
index 0f55c33a713a7..1a6156473ab93 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -626,6 +626,9 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
     return true;
 }
 
+// There were missing items from this list of helps so the wording needs checking (all inserted at the end, so reposition too):
+// --embedding, --beams, --ppl-stride, --ppl-output-type, --memory-f32, --no-mmap, --mlock, --use-color, --nprobs, --alias, --infill, --prompt-file
+// some corresponding changes to the sequence of fprintf() code may be needed
 void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     printf("usage: %s [options]\n", argv[0]);
     printf("\n");
@@ -672,7 +675,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     printf("                        (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat);
     printf("  --mirostat-lr N       Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta);
     printf("  --mirostat-ent N      Mirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau);
-    printf("  -l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS\n");
+    printf("  -l T, --logit-bias T  T = TOKEN_ID(plus/minus)BIAS\n");
     printf("                        modifies the likelihood of token appearing in the completion,\n");
     printf("                        i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',\n");
     printf("                        or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n");
@@ -687,7 +690,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     printf("  --rope-freq-base N    RoPE base frequency, used by NTK-aware scaling (default: loaded from model)\n");
     printf("  --rope-freq-scale N   RoPE frequency linear scaling factor (default: loaded from model)\n");
     printf("  --ignore-eos          ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n");
-    printf("  --no-penalize-nl      do not penalize newline token\n");
+    printf("  --no-penalize-nl      do not penalize newline token (default is DO penalise nl token)\n");
     printf("  --memory-f32          use f32 instead of f16 for memory key+value (default: disabled)\n");
     printf("                        not recommended: doubles context memory required and no measurable increase in quality\n");
     printf("  --temp N              temperature (default: %.1f)\n", (double)params.temp);
@@ -734,6 +737,18 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     printf("                        draft model for speculative decoding (default: %s)\n", params.model.c_str());
     printf("  -ld LOGDIR, --logdir LOGDIR\n");
     printf("                        path under which to save YAML logs (no logging if unset)\n");
+    printf("  --ppl-stride          stride for ppl calcs. 0 (default): the pre-existing approach will be used.\n");
+    printf("  --ppl-output-type     0 (default): ppl output as usual, 1: ppl output num_tokens, one per line\n");
+    printf("  --embedding           0 (default): get only sentence embedding\n");
+    printf("  --beams N             0 (default): if non-zero use beam search of given width N.\n");
+    printf("  --memory-f32          0 (default): if true (= 1) disable f16 memory.\n");
+    printf("  --no-mmap             0 (default): if true use mmap for faster loads.\n");
+    printf("  --mlock               0 (default): if true keep model in memory.\n");
+    printf("  --use-color           0 (default): use color to distinguish generations from inputs\n");
+    printf("  --nprobs N            if > 0 output the probabilities of the top N tokens\n");
+    printf("  --alias               model alias (default: 'unknown')\n");
+    printf("  --infill              0 (defaut) use infill mode\n");
+    printf("  --prompt-file         name of external prompt file\n");
     printf("\n");
 }
 
diff --git a/common/common.h b/common/common.h
index c802152791797..ee5c1909414b9 100644
--- a/common/common.h
+++ b/common/common.h
@@ -35,21 +35,21 @@ int32_t get_num_physical_cores();
 
 struct gpt_params {
     uint32_t seed                           = -1;   // RNG seed
-    int32_t n_threads                       = get_num_physical_cores();
-    int32_t n_threads_batch                 = -1;   // number of threads to use for batch processing (-1 = use n_threads)
+    int32_t n_threads                       = get_num_physical_cores(); // user-defined or num of internal physical cores
+    int32_t n_threads_batch                 = -1;   // num threads for batch proc (-1 = use n_threads)
     int32_t n_predict                       = -1;   // new tokens to predict
     int32_t n_ctx                           = 512;  // context size
-    int32_t n_batch                         = 512;  // batch size for prompt processing (must be >=32 to use BLAS)
+    int32_t n_batch                         = 512;  // batch size for prompt proc (>=32 to use BLAS)
     int32_t n_keep                          = 0;    // number of tokens to keep from initial prompt
     int32_t n_draft                         = 16;   // number of tokens to draft during speculative decoding
     int32_t n_chunks                        = -1;   // max number of chunks to process (-1 = unlimited)
     int32_t n_parallel                      = 1;    // number of parallel sequences to decode
     int32_t n_sequences                     = 1;    // number of sequences to decode
-    int32_t n_gpu_layers                    = -1;   // number of layers to store in VRAM (-1 - use default)
-    int32_t n_gpu_layers_draft              = -1;   // number of layers to store in VRAM for the draft model (-1 - use default)
+    int32_t n_gpu_layers                    = -1;   // num layers stored in VRAM (-1 for default)
+    int32_t n_gpu_layers_draft              = -1;   // num layers stored in VRAM for draft mod (-1 for default)
     int32_t main_gpu                        = 0;    // the GPU that is used for scratch and small tensors
     float   tensor_split[LLAMA_MAX_DEVICES] = {0};  // how split tensors should be distributed across GPUs
-    int32_t n_probs                         = 0;    // if greater than 0, output the probabilities of top n_probs tokens.
+    int32_t n_probs                         = 0;    // if > 0, output probabilities of top n_probs tokens.
     int32_t n_beams                         = 0;    // if non-zero then use beam search of given width.
     float   rope_freq_base                  = 0.0f; // RoPE base frequency
     float   rope_freq_scale                 = 0.0f; // RoPE frequency scaling factor
@@ -61,7 +61,7 @@ struct gpt_params {
     float   typical_p         = 1.00f; // 1.0 = disabled
     float   temp              = 0.80f; // 1.0 = disabled
     float   repeat_penalty    = 1.10f; // 1.0 = disabled
-    int32_t repeat_last_n     = 64;    // last n tokens to penalize (0 = disable penalty, -1 = context size)
+    int32_t repeat_last_n     = 64;    // last n tokens to penalize (0 = disable, -1 = cxt size)
     float   frequency_penalty = 0.00f; // 0.0 = disabled
     float   presence_penalty  = 0.00f; // 0.0 = disabled
     int32_t mirostat          = 0;     // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
@@ -75,10 +75,11 @@ struct gpt_params {
     std::string cfg_negative_prompt;       // string to help guidance
     float       cfg_scale         = 1.f;   // How strong is guidance
 
+    std::string help              = "";  // universal help parameter
     std::string model             = "models/7B/ggml-model-f16.gguf"; // model path
     std::string model_draft       = "";                              // draft model for speculative decoding
     std::string model_alias       = "unknown"; // model alias
-    std::string prompt            = "";
+    std::string prompt            = "";  // user-provided single prompt
     std::string prompt_file       = "";  // store the external prompt file name
     std::string path_prompt_cache = "";  // path to file for saving/loading prompt eval state
     std::string input_prefix      = "";  // string to prefix user inputs with
@@ -90,11 +91,11 @@ struct gpt_params {
     std::vector<std::tuple<std::string, float>> lora_adapter; // lora adapter path with user defined scale
     std::string lora_base  = "";                              // base model path for the lora adapter
 
-    int  ppl_stride        = 0;     // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
-    int  ppl_output_type   = 0;     // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line
+    int  ppl_stride        = 0;     // stride for ppl calcs. 0: the pre-existing approach will be used.
+    int  ppl_output_type   = 0;     // 0: ppl output as usual, 1: ppl output = num_tokens, ppl, one per line
                                     //                                       (which is more convenient to use for plotting)
                                     //
-    bool hellaswag         = false; // compute HellaSwag score over random tasks from datafile supplied in prompt
+    bool hellaswag         = false; // compute HellaSwag score from datafile given in prompt
     size_t hellaswag_tasks = 400;   // number of tasks to use when computing the HellaSwag score
 
     bool mul_mat_q         = true;  // if true, use mul_mat_q kernels instead of cuBLAS
@@ -109,7 +110,7 @@ struct gpt_params {
     bool escape            = false; // escape "\n", "\r", "\t", "\'", "\"", and "\\"
     bool interactive_first = false; // wait for user input immediately
     bool multiline_input   = false; // reverse the usage of `\`
-    bool simple_io         = false; // improves compatibility with subprocesses and limited consoles
+    bool simple_io         = false; // improves compat'y with subprocs and ltd consoles
     bool cont_batching     = false; // insert new sequences for decoding on-the-fly
 
     bool input_prefix_bos  = false; // prefix BOS to user inputs, preceding input_prefix
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index de4cf7a691768..b32706b0337e0 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -30,6 +30,7 @@ else()
     add_subdirectory(embd-input)
     add_subdirectory(llama-bench)
     add_subdirectory(beam-search)
+    add_subdirectory(cmap-example)
     if (LLAMA_METAL)
         add_subdirectory(metal)
     endif()
diff --git a/examples/cmap-example/CMakeLists.txt b/examples/cmap-example/CMakeLists.txt
new file mode 100644
index 0000000000000..c5820f7b1bfcd
--- /dev/null
+++ b/examples/cmap-example/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(TARGET cmap-example)
+add_executable(${TARGET} cmap-example.cpp)
+install(TARGETS ${TARGET} RUNTIME)
+target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
+target_compile_features(${TARGET} PRIVATE cxx_std_11)
diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp
new file mode 100644
index 0000000000000..d06699c864b94
--- /dev/null
+++ b/examples/cmap-example/cmap-example.cpp
@@ -0,0 +1,124 @@
+// example of a C/C++ equivalent data structure to the python dict in readcommonh.py
+
+#include <map>
+#include <list>
+#include <string>
+#include <bitset>
+#include <vector>
+#include <cstdio>
+#include <cmath>
+#include <fstream>
+#include <sstream>
+#include <regex>
+// there may be good reasons not to sort the parameters, but here we use map
+#include <map>
+#include <numeric>
+
+std::vector<std::string> split_string(const std::string& str, const std::string& delimiter) {
+    std::vector<std::string> tokens;
+    std::size_t start = 0, end = 0;
+    bool inside_tags = false;  // flag to track if we are inside "<>"
+
+    while ((end = str.find(delimiter, start)) != std::string::npos) {
+        std::string token = str.substr(start, end - start);
+
+        // if (!token.empty()) { // Add condition to exclude empty substrings
+        //    tokens.push_back(token);
+
+        if (!inside_tags && !token.empty()) { // Add condition to exclude empty substrings and if not inside "<>"
+            tokens.push_back(token);
+        }
+        // deal with cases where the split character occurs inside <>
+        // Update inside_tags flag based on "<>"
+        size_t open_tag_pos = str.find("<", start);
+        size_t close_tag_pos = str.find(">", start);
+        if (open_tag_pos != std::string::npos && close_tag_pos != std::string::npos && open_tag_pos < end) {
+            inside_tags = true;
+        } else if (close_tag_pos != std::string::npos && close_tag_pos < end) {
+            inside_tags = false;
+        }
+        start = end + delimiter.length();
+    }
+    tokens.push_back(str.substr(start));
+    return tokens;
+}
+
+void print_parameters(const std::map<std::string, std::vector<std::string>>& parameters) {
+        for (const auto& pair : parameters) {
+            const std::string& key = pair.first;
+            const std::vector<std::string>& value = pair.second; // usually has multiple elements
+            printf("key: %25s: values: ", key.c_str());
+            for (const std::string& element : value) {
+                printf("%s ", element.c_str());
+            }
+            printf("\n");
+    }
+}
+
+std::map<std::string, std::vector<std::string>> extract_parameters() {
+    std::ifstream file("common/common.h");
+    std::string line;
+    std::vector<std::string> lines;
+    while (std::getline(file, line)) {
+        lines.push_back(line);
+    }
+
+    std::map<std::string, std::vector<std::string>> parameters;
+    // fix up failure to match logit_bias; may also need to add lora_adapter; now dealt with and ready for deletion
+    // parameters["logit_bias"] = {"std::unordered_map<llama_token, float>" "logit_bias", "=", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"};
+    // parameters["lora_adapter"] = {"std::vector<std::tuple<std::string, float>>", "lora_adapter", "=", "", "//", "lora", "adapter", "path", "with", "user-defined", "scale"};
+
+    // are we inside gpt_params?
+    // this for loop finds all the params inside struct gpt-params
+    bool inside = false;
+    for (const std::string& line : lines) {
+        std::vector<std::string> nws_elements = split_string(line, " ");
+        printf("nwe = ");
+        for (const std::string& element : nws_elements) {
+            printf("%s ", element.c_str());
+        }
+        printf("\n");
+
+        if (!nws_elements.empty() && nws_elements[0] == "struct" && nws_elements[1] == "gpt_params") {
+            inside = true;
+        }
+
+        if (nws_elements.size() > 2 && inside) {
+            // cannot use nwe[0] as key because types do not generate unique keys and so overwrite
+            // Here we deliberately add back the key so we can manually change it when it is different (remove eventually)
+            // parameters[nws_elements[1]] = nws_elements;
+            std::vector<std::string> copy = nws_elements; // Create a copy of nws_elements
+            parameters[nws_elements[1]] = copy; // Assign the copy to parameters
+
+            // Remove spurious entry caused by eccentric status of logit_bias
+            if (parameters.count("float>") && parameters["float>"][2] == "logit_bias;") {
+                parameters.erase("float>");
+            }
+            // Remove spurious entry caused by eccentric status of lora_adapter
+            if (parameters.count("float>>") && parameters["float>>"][2] == "lora_adapter;") {
+                parameters.erase("float>>");
+            }
+        }
+
+        // Terminate the harvest; TODO: not robust; need better terminator; this just a crude hack for now
+        if (nws_elements.size() > 2 && nws_elements[2] == "infill") {
+            inside = false;
+            break;
+            }
+        }
+    // now display them (unnecessary operationally; here for development)
+    print_parameters(parameters);
+
+    // return the results (will eventually become a void function)
+    return parameters;
+}
+
+int main() {
+
+    // process the code inserted to replicate readcommonh.py
+    // this does not produce output but here is forced; it just collects the output into parameters and returns 0
+    std::map<std::string, std::vector<std::string>> parameters = extract_parameters();
+    print_parameters(parameters);
+
+    return 0;
+}
diff --git a/examples/cmap-example/find-implemented-args.py b/examples/cmap-example/find-implemented-args.py
new file mode 100644
index 0000000000000..c48c8c5a3d82c
--- /dev/null
+++ b/examples/cmap-example/find-implemented-args.py
@@ -0,0 +1,228 @@
+# search the specified directory for files that include command-line arguments
+# these are almost always in the form params.argument; "logit_bias" is one exception
+# have yet to investigate fully what "lora_adapter" in server.cpp does since it is not apparently
+# accessible from the command-line arg/parameter sequence.
+# there is also an issue with -ngl which does not appear in some help menus even when apparently implemented, e.g. in parallel.cpp
+
+import os
+import re
+import collections
+import re
+import readcommonh
+
+# update the source file - usually 'help_list.txt', so the default - in case the source file has been changed
+def update_file(file_from, file_to = "help_list.txt"):
+    # Open the file_from file
+    with open(file_from, "r") as file:
+        lines = file.readlines()
+
+    # Find lines starting with "printf(" and ending with ");" (assumes file_from is written in C/C++)
+    pattern = r'printf\("\s(.*?)\);'
+    matched_lines = [re.search(pattern, line).group(1) for line in lines if re.search(pattern, line)]
+
+    # Save matched lines to file_to
+    with open(file_to, "w") as file:
+        for line in matched_lines:
+            file.write(line + '\n')
+
+# helper fn to make the hyphenated words in a file snake-case for searching
+def replace_dashes_with_underscores(filename):
+    with open(filename, 'r') as file:
+        content = file.read()
+
+    # Match '-' surrounded by word characters on both sides and replace with '_'
+    replaced_content = re.sub(r'(\w)-(\w)', r'\1_\2', content)
+
+    with open(filename, 'w') as file:
+        file.write(replaced_content)
+
+# helper fn to make the underscored words in a file hyphenated for print
+def replace_underscores_with_dashes(parameter):
+    # Match '_' surrounded by word characters on both sides and replace with '-'
+    return re.sub(r'(\w)_(\w)', r'\1-\2', parameter)
+
+
+# find all instances of "params." in the *.cpp files in a directory
+def find_arguments(directory):
+    arguments = {}
+
+    # Use os.walk() to traverse through files in directory and subdirectories
+    for root, dirs, files in os.walk(directory):
+        for file in files:
+            if file.endswith('.cpp'):
+                filepath = os.path.join(root, file)
+                with open(filepath, 'r') as file:
+                    content = file.read()
+
+                    # Search for the expression "params." or "params->" excluding prefixes and read the attribute without trailing detritus
+                    # matches = re.findall(r'(?:^|\s)params\.(.*)(?=[\). <,;}]|\Z)', content)
+                    matches = set(re.findall(r'(?:^|\b)params[->\.]([a-zA-Z_0-9]*)(?=[\). <,;}]|\Z)', content))
+
+                    # Add the matches to the dictionary
+                    arguments[filepath] = matches
+
+    return arguments
+
+# output a list of the params.attributes for each file
+def output_results(result):
+    sorted_result = collections.OrderedDict(sorted(result.items()))
+    all_of_them = set()
+    for filename, arguments in sorted_result.items():
+        arguments.add("help")
+        print(f"Filename: \033[32m{filename.split('/')[-1]}\033[0m, arguments: {arguments}\n")
+        for argument in arguments:
+            if argument not in all_of_them:
+                all_of_them.add("".join(argument))
+    print(f"\033[32mAll of them: \033[0m{sorted(all_of_them)}.")
+    return sorted_result
+
+# put all the words after "//" in a dict back together with spaces
+def concatenate(v):
+    concatenated_element = ""
+    for i, element in enumerate(v):
+        if element == "//":
+            concatenated_element = " ".join(v[i:])
+    return concatenated_element
+
+def title_print(filename):
+    title = filename.split('/')[-1]
+    print("\n\n"+"#"*(10+len(title)))
+    print(f"Filename: \033[32m{title}\033[0m")
+    print("#"*(10+len(title)))
+
+# list all the equivalences between declarations in common.h and common.cpp that defines the help
+# these are used to substitute the searched params.attributes (keys) with help attributes (values)
+def substitution_list(parameters):
+    # store untrapped parameters as identicals in case we need to change them later
+    sub_dict = {"n_threads": "threads",
+                "n_ctx": "ctx_size",
+                "n_draft" : "draft",
+                "n_threads_batch" : "threads_batch",
+                "n_chunks" : "chunks",
+                "n_batch" : "batch_size",
+                "n_sequences" : "sequences",
+                "n_parallel" : "parallel",
+                "n_beams" : "beams",
+                "n_keep" : "keep",
+                "n_probs" : "nprobs",
+                "path_prompt_cache" : "prompt_cache",
+                "prompt_file" : "prompt_file",
+                "input_prefix" : "in_prefix",
+                "input_suffix" : "in_suffix",
+                "input_prefix_bos" : "in_prefix_bos",
+                "antiprompt" : "reverse_prompt",
+                "mul_mat_q" : "no_mul_mat_q",
+                "use_mmap" : "no_mmap",
+                "use_mlock" : "mlock",
+                "model_alias" : "alias",
+                "tfs_z" : "tfs",
+                "use_color" : "color",
+                "logit_bias" : "logit_bias",
+                "ignore_eos" : "ignore_eos",
+                "mirostat_tau" : "mirostat_ent",
+                "mirostat_eta" : "mirostat_lr",
+                "penalize_nl" : "no_penalize_nl",
+                "typical_p" : "typical",
+                "mem_size" : "mem_size",
+                "mem_buffer" : "mem_buffer",
+                "no_alloc" : "no_alloc"
+                }
+    new_parameters = []
+    for parameter in parameters:
+        if parameter in sub_dict:
+            # we need both for future reference
+            new_parameters.append(parameter)
+            new_parameters.append(sub_dict[parameter])
+        else:
+            new_parameters.append(parameter)
+    return new_parameters
+
+# output the lines of the help file
+def find_parameters(file, sorted_result):
+     with open(file, "r") as helpfile:
+        lines = helpfile.read().split("\n")
+        for filename, arguments in sorted_result.items():
+            # we try to fix up some variant labelling in help_file.txt
+            arguments = substitution_list(arguments)
+            parameters = []
+            for line in lines:
+                for argument in arguments:
+                    # building pattern to avoid spurious matches
+                    # pattern = r"(?:--{}\s)|(?:params\.{}[\s.,\.();])".format(argument, argument.split('n_')[-1])
+                    pattern = r"(?:--{}\s)|(?:params\.{}(?=[\s.,\.\(\);]|\.+\w))".format(argument, argument.split('n_')[-1])
+                    # pattern = r"(?<=params\.)\w+(?=\.\w+|\.|,|;|\}|\{|\(|\)|\.)"
+                    # bit of a hack to exclude --attributes at the end of help comment lines
+                    if re.search(pattern, line[:50]):
+                        parameters.append(line)
+
+            all_parameters = set(parameters)
+
+            title_print(filename)
+            print(f"\nCommand-line arguments available and gpt-params functions implemented (TODO: multi-line helps NEED SOME WORK):\n")
+
+            if not all_parameters:
+                print(f"    \033[032mNone\033[0m\n")
+
+            # first do it the original way
+            else:
+                help_count = 0
+                for parameter in all_parameters:
+                    # reverse the hypthen/underscore pattern just for printing
+                    replaced_param = replace_underscores_with_dashes(parameter)
+                    if not parameter.startswith("    "):
+                        help_count += 1
+                        print(f"{help_count:>2} help: \033[33m{replaced_param:<30}\033[0m")
+                    else:
+                        print(f"   help: \033[33m{replaced_param:<30}\033[0m")
+
+                # now do it the new way
+                print("\nNow we extract the original gpt_params definition from common.h with the defaults for implemented arguments:\n")
+                gpt_count = 0
+                for k,v in readcommonh.parameters.items():
+                    if not readcommonh.parameters.items():
+                        print(f"    \033[032mNone\033[0m\n")
+                    elif k in arguments:
+                        # print(f"gpt_params: \033[33m{k:>20}\033[0m values: {v}")
+                        concatenated_element = concatenate(v)
+                        gpt_count += 1
+                        print(f"{gpt_count:>2} gpt_param: \033[32m{k:>19}; \033[34mrole: \033[33m{concatenated_element:<60}\033[0m;  \033[34mdefault: \033[30m{v[1]:<10}\033[0m ")
+
+                # searching the other way round is quicker:
+                print("\nSearching the other way round is more efficient:\n")
+                key_count = 0
+                for argument in set(arguments):
+                    if argument in readcommonh.parameters:
+                        key_count += 1
+                        print(f"{key_count:>2} key: {argument:>25}; role: {concatenate(readcommonh.parameters[argument]):<60}; default: {readcommonh.parameters[argument][1]:<10}")
+                if help_count == gpt_count and gpt_count == key_count:
+                    print(f"\n\033[032mNo unresolved help-list incompatibilities with \033[33m{filename.split('/')[-1]}\033[0m")
+                else:
+                    print("\n\033[031mThis app requires some attention regarding help-function consistency.\033[0m")
+
+# Specify the directory you want to search for cpp files
+directory = '/Users/edsilm2/llama.cpp/examples'
+
+if __name__ == '__main__':
+
+   # update the source help file from C++ source (this works exactly as required)
+    update_file("common/common.cpp", "help_list.txt")
+
+    # get the parameters from the common.h file utiity we import
+    print(readcommonh.parameters)
+    # So now we've got the gpt_parameters in this parameters dict
+
+    # First we alter all the hyphenated help words in help-file.txt to underscores
+    # we later reverse these changers before printing the help lines
+    replace_dashes_with_underscores('help_list.txt')
+
+    print("\n####################### find parameters #################################")
+    # Call the find function to collect all the params.attributes and output the result
+    result = find_arguments(directory)
+
+    print("\n######################################## output_results #################################")
+    # sort the results and output them
+    sorted = output_results(result)
+
+    print("\n######################## find help context parameters #################################")
+    # analyse the files and what they contain
+    find_parameters("help_list.txt", sorted)
diff --git a/examples/cmap-example/readcommonh.py b/examples/cmap-example/readcommonh.py
new file mode 100644
index 0000000000000..422edea8fd252
--- /dev/null
+++ b/examples/cmap-example/readcommonh.py
@@ -0,0 +1,39 @@
+# read common.h and extract the parameters name list
+
+import re
+
+# Read the file into separate lines
+with open('common/common.h', 'r') as file:
+    lines = file.read().split('\n')
+
+parameters = {}
+# we add the logit_bias parameter which otherwise is not found
+parameters['logit_bias']=['logit_bias', '0', '//', 'way', 'to', 'alter', 'prob', 'of', 'particular', 'words']
+
+inside = False
+for line in lines:
+    # non_whitespace_elements = re.findall(r"\S+", line)
+    non_whitespace_elements = re.findall(r"[^\s}{=;]+", line)
+    print(f"nwe = \033[33m{non_whitespace_elements}\033[0m")
+    if non_whitespace_elements and non_whitespace_elements[0] == "struct":
+        inside = True
+    if len(non_whitespace_elements) > 2 and inside:
+        # note: cannot use nwe[0] because types do not generate unique keys and so overwrite
+        # here we deliberately add back the key so we can make a manual change when it is different
+        parameters[non_whitespace_elements[1]] = non_whitespace_elements[1:]
+        # remove spurious entry caused by eccentric status of logit_bias
+        if "float>" in parameters and parameters["float>"][1] == 'logit_bias':
+            del parameters["float>"]
+
+    # this is a bit of a hack to terminate the harvest
+    if len(non_whitespace_elements) > 2 and non_whitespace_elements[1] == "infill":
+        inside = False
+        break
+for k, v in parameters.items():
+    print(f"key: {k:<20}; values: {v}")
+    concatenated_element = ""
+    for i, element in enumerate(v):
+        if element == "//":
+            concatenated_element = " ".join(v[i:])
+            # break
+    print(" "*10 + f"parameter: \033[32m{k:>40} \033[34mdefault: \033[30m{v[1]:>5} \033[34mcommment: \033[33m{concatenated_element:80}\033[0m")
diff --git a/help_list.txt b/help_list.txt
new file mode 100644
index 0000000000000..c74199e0e4264
--- /dev/null
+++ b/help_list.txt
@@ -0,0 +1,107 @@
+ -h, --help            show this help message and exit\n"
+ -i, --interactive     run in interactive mode\n"
+ --interactive_first   run in interactive mode and wait for input right away\n"
+ -ins, --instruct      run in instruction mode (use with Alpaca models)\n"
+ --multiline_input     allows you to write or paste multiple lines without ending each in '\\'\n"
+ -r PROMPT, --reverse_prompt PROMPT\n"
+                       halt generation at PROMPT, return control in interactive mode\n"
+                       (can be specified more than once for multiple prompts).\n"
+ --color               colorise output to distinguish prompt and user input from generations\n"
+ -s SEED, --seed SEED  RNG seed (default: -1, use random seed for < 0)\n"
+ -t N, --threads N     number of threads to use during generation (default: %d)\n", params.n_threads
+ -tb N, --threads_batch N\n"
+                       number of threads to use during batch and prompt processing (default: same as --threads)\n"
+ -p PROMPT, --prompt PROMPT\n"
+                       prompt to start generation with (default: empty)\n"
+ -e, --escape          process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)\n"
+ --prompt_cache FNAME  file to cache prompt state for faster startup (default: none)\n"
+ --prompt_cache_all    if specified, saves user input and generations to cache as well.\n"
+                       not supported with --interactive or other interactive options\n"
+ --prompt_cache_ro     if specified, uses the prompt cache but does not update it.\n"
+ --random_prompt       start with a randomized prompt.\n"
+ --in_prefix_bos       prefix BOS to user inputs, preceding the `--in_prefix` string\n"
+ --in_prefix STRING    string to prefix user inputs with (default: empty)\n"
+ --in_suffix STRING    string to suffix after user inputs with (default: empty)\n"
+ -f FNAME, --file FNAME\n"
+                       prompt file to start generation.\n"
+ -n N, --n_predict N   number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict
+ -c N, --ctx_size N    size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx
+ -b N, --batch_size N  batch size for prompt processing (default: %d)\n", params.n_batch
+ --top_k N             top_k sampling (default: %d, 0 = disabled)\n", params.top_k
+ --top_p N             top_p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p
+ --tfs N               tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z
+ --typical N           locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p
+ --repeat_last_n N     last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n
+ --repeat_penalty N    penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty
+ --presence_penalty N  repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty
+ --frequency_penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty
+ --mirostat N          use Mirostat sampling.\n"
+                       Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n"
+                       (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat
+ --mirostat_lr N       Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta
+ --mirostat_ent N      Mirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau
+ -l T, --logit_bias T  T = TOKEN_ID(plus/minus)BIAS\n"
+                       modifies the likelihood of token appearing in the completion,\n"
+                       i.e. `--logit_bias 15043+1` to increase likelihood of token ' Hello',\n"
+                       or `--logit_bias 15043_1` to decrease likelihood of token ' Hello'\n"
+ --grammar GRAMMAR     BNF_like grammar to constrain generations (see samples in grammars/ dir)\n"
+ --grammar_file FNAME  file to read grammar from\n"
+ --cfg_negative_prompt PROMPT\n"
+                       negative prompt to use for guidance. (default: empty)\n"
+ --cfg_negative_prompt_file FNAME\n"
+                       negative prompt file to use for guidance. (default: empty)\n"
+ --cfg_scale N         strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale
+ --rope_scale N        RoPE context linear scaling factor, inverse of --rope_freq_scale\n"
+ --rope_freq_base N    RoPE base frequency, used by NTK_aware scaling (default: loaded from model)\n"
+ --rope_freq_scale N   RoPE frequency linear scaling factor (default: loaded from model)\n"
+ --ignore_eos          ignore end of stream token and continue generating (implies --logit_bias 2_inf)\n"
+ --no_penalize_nl      do not penalize newline token (default is DO penalise nl token)\n"
+ --memory_f32          use f32 instead of f16 for memory key+value (default: disabled)\n"
+                       not recommended: doubles context memory required and no measurable increase in quality\n"
+ --temp N              temperature (default: %.1f)\n", (double)params.temp
+ --logits_all          return logits for all tokens in the batch (default: disabled)\n"
+ --hellaswag           compute HellaSwag score over random tasks from datafile supplied with -f\n"
+ --hellaswag_tasks N   number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks
+ --keep N              number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep
+ --draft N             number of tokens to draft for speculative decoding (default: %d)\n", params.n_draft
+ --chunks N            max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks
+ -np N, --parallel N   number of parallel sequences to decode (default: %d)\n", params.n_parallel
+ -ns N, --sequences N  number of sequences to decode (default: %d)\n", params.n_sequences
+ -cb, --cont_batching  enable continuous batching (a.k.a dynamic batching) (default: disabled)\n"
+ --mlock               force system to keep model in RAM rather than swapping or compressing\n"
+ --no_mmap             do not memory_map model (slower load but may reduce pageouts if not using mlock)\n"
+ --numa                attempt optimizations that help on some NUMA systems\n"
+                       if run without this previously, it is recommended to drop the system page cache before using this\n"
+                       see https://github.com/ggerganov/llama.cpp/issues/1437\n"
+ -ngl N, --n_gpu_layers N\n"
+                       number of layers to store in VRAM\n"
+ -ngld N, --n_gpu_layers_draft N\n"
+                       number of layers to store in VRAM for the draft model\n"
+ -ts SPLIT --tensor_split SPLIT\n"
+                       how to split tensors across multiple GPUs, comma_separated list of proportions, e.g. 3,1\n"
+ -mg i, --main_gpu i   the GPU to use for scratch and small tensors\n"
+ -nommq, --no_mul_mat_q\n"
+                       use " GGML_CUBLAS_NAME " instead of custom mul_mat_q " GGML_CUDA_NAME " kernels.\n"
+                       Not recommended since this is both slower and uses more VRAM.\n"
+ --verbose_prompt      print prompt before generation\n"
+ --lora FNAME          apply LoRA adapter (implies --no_mmap)\n"
+ --lora_scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no_mmap)\n"
+ --lora_base FNAME     optional model to use as a base for the layers modified by the LoRA adapter\n"
+ -m FNAME, --model FNAME\n"
+                       model path (default: %s)\n", params.model.c_str()
+ -md FNAME, --model_draft FNAME\n"
+                       draft model for speculative decoding (default: %s)\n", params.model.c_str()
+ -ld LOGDIR, --logdir LOGDIR\n"
+                       path under which to save YAML logs (no logging if unset)\n"
+ --ppl_stride          stride for ppl calcs. 0 (default): the pre_existing approach will be used.\n"
+ --ppl_output_type     0 (default): ppl output as usual, 1: ppl output num_tokens, one per line\n"
+ --embedding           0 (default): get only sentence embedding\n"
+ --beams N             0 (default): if non_zero use beam search of given width N.\n"
+ --memory_f32          0 (default): if true (= 1) disable f16 memory.\n"
+ --no_mmap             0 (default): if true use mmap for faster loads.\n"
+ --mlock               0 (default): if true keep model in memory.\n"
+ --use_color           0 (default): use color to distinguish generations from inputs\n"
+ --nprobs N            if > 0 output the probabilities of the top N tokens\n"
+ --alias               model alias (default: 'unknown')\n"
+ --infill              0 (defaut) use infill mode\n"
+ --prompt_file         name of external prompt file\n"