From 97398a2e1dc2c0e5d4ef6606d619645a1323d848 Mon Sep 17 00:00:00 2001
From: Silas Marvin <19626586+SilasMarvin@users.noreply.github.com>
Date: Tue, 4 Jun 2024 16:46:55 -0700
Subject: [PATCH 1/4] Periodic commit

---
 pgml-cms/docs/api/client-sdk/README.md      | 159 ++++++++++-
 pgml-cms/docs/api/client-sdk/collections.md | 233 +++++++++++++++++
 pgml-cms/docs/api/client-sdk/pipelines.md   | 196 +++++++++++++-
 pgml-cms/docs/api/client-sdk/search.md      | 275 ++++++++++++++++++--
 4 files changed, 836 insertions(+), 27 deletions(-)
diff --git a/pgml-cms/docs/api/client-sdk/README.md b/pgml-cms/docs/api/client-sdk/README.md
index 866610b92..0ccddb9f0 100644
--- a/pgml-cms/docs/api/client-sdk/README.md
+++ b/pgml-cms/docs/api/client-sdk/README.md
@@ -12,17 +12,39 @@ The client SDK can be installed using standard package managers for JavaScript,
 Installing the SDK into your project is as simple as:
 
 {% tabs %}
-{% tab title="JavaScript " %}
+{% tab title="JavaScript" %}
 ```bash
 npm i pgml
 ```
 {% endtab %}
 
-{% tab title="Python " %}
+{% tab title="Python" %}
 ```bash
 pip install pgml
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```bash
+cargo add pgml
+```
+{% endtab %}
+
+{% tab title="C" %}
+
+First clone the `postgresml` repository and navigate to the `pgml-sdks/pgml/c` directory:
+```bash
+git clone https://github.com/postgresml/postgresml
+cd postgresml/pgml-sdks/pgml/c
+```
+
+Then build the bindings
+```bash
+make bindings
+```
+
+This will generate the `pgml.h` file and a `.so` on linux and `.dyblib` on MacOS.
+{% endtab %}
 {% endtabs %}
 
 ## Getting started
@@ -41,10 +63,10 @@ export PGML_DATABASE_URL=postgres://user:password@sql.cloud.postgresml.org:6432/
 
 ### Create a collection
 
-The SDK is written in asynchronous code, so you need to run it inside an async runtime. Both Python and JavaScript support async functions natively.
+The SDK is written in asynchronous code, so you need to run it inside an async runtime. Both Python, JavaScript and Rust support async functions natively.
 
 {% tabs %}
-{% tab title="JavaScript " %}
+{% tab title="JavaScript" %}
 ```javascript
 const pgml = require("pgml");
 
@@ -63,6 +85,28 @@ async def main():
     collection = Collection("sample_collection")
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+use pgml::{Collection, Pipeline};
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let mut collection = Collection::new("sample_collection", None)?;
+}
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+#include <stdio.h>
+#include "pgml.h"
+
+int main() {
+  CollectionC * collection = pgml_collectionc_new("sample_collection", NULL);
+}
+```
+{% endtab %}
 {% endtabs %}
 
 The above example imports the `pgml` module and creates a collection object. By itself, the collection only tracks document contents and identifiers, but once we add a pipeline, we can instruct the SDK to perform additional tasks when documents and are inserted and retrieved.
@@ -93,7 +137,7 @@ await collection.add_pipeline(pipeline);
 ```python
 # Add this code to the end of the main function from the above example.
 pipeline = Pipeline(
-    "test_pipeline",
+    "sample_pipeline",
     {
         "text": {
             "splitter": { "model": "recursive_character" },
@@ -107,6 +151,37 @@ pipeline = Pipeline(
 await collection.add_pipeline(pipeline)
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+// Add this code to the end of the main function from the above example.
+let mut pipeline = Pipeline::new(
+    "sample_pipeline",
+    Some(
+        serde_json::json!({
+            "text": {
+                "splitter": { "model": "recursive_character" },
+                "semantic_search": {
+                    "model": "Alibaba-NLP/gte-base-en-v1.5",
+                },
+            },
+        })
+        .into(),
+    ),
+)?;
+
+collection.add_pipeline(&mut pipeline).await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+// Add this code to the end of the main function from the above example.
+PipelineC * pipeline = pgml_pipelinec_new("sample_pipeline", "{\"text\": {\"splitter\": {\"model\": \"recursive_character\"},\"semantic_search\": {\"model\": \"Alibaba-NLP/gte-base-en-v1.5\"}}}");
+
+pgml_collectionc_add_pipeline(collection, pipeline);
+```
+{% endtab %}
 {% endtabs %}
 
 The pipeline configuration is a key/value object, where the key is the name of a column in a document, and the value is the action the SDK should perform on that column. 
@@ -153,9 +228,36 @@ documents = [
 await collection.upsert_documents(documents)
 ```
 {% endtab %}
-{% endtabs %}
 
-If the same document `id` is used, the SDK computes the difference between existing and new documents and only updates the chunks that have changed.
+{% tab title="Rust" %}
+```rust
+// Add this code to the end of the main function in the above example.
+let documents = vec![
+    serde_json::json!({
+        "id": "Document One",
+        "text": "document one contents...",
+    })
+    .into(),
+    serde_json::json!({
+        "id": "Document Two",
+        "text": "document two contents...",
+    })
+    .into(),
+];
+
+collection.upsert_documents(documents, None).await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+// Add this code to the end of the main function in the above example.
+char * documents_to_upsert[2] = {"{\"id\": \"Document One\", \"text\": \"document one contents...\"}", "{\"id\": \"Document Two\", \"text\": \"document two contents...\"}"};
+
+pgml_collectionc_upsert_documents(collection, documents_to_upsert, 2, NULL);
+```
+{% endtab %}
+{% endtabs %}
 
 ### Search documents
 
@@ -203,6 +305,47 @@ results = await collection.vector_search(
 print(results)
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+// Add this code to the end of the main function in the above example.
+let results = collection
+    .vector_search(
+        serde_json::json!({
+            "query": {
+                "fields": {
+                    "text": {
+                        "query": "Something about a document...",
+                    },
+                },
+            },
+            "limit": 2,
+        })
+        .into(),
+        &mut pipeline,
+    )
+    .await?;
+
+println!("{:?}", results);
+
+Ok(())
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+// Add this code to the end of the main function in the above example.
+r_size = 0;
+char** results = pgml_collectionc_vector_search(collection, "{\"query\": {\"fields\": {\"text\": {\"query\": \"Something about a document...\"}}}, \"limit\": 2}", pipeline, &r_size);
+printf("\n\nPrinting results:\n");
+for (i = 0; i < r_size; ++i) {
+  printf("Result %u -> %s\n", i, results[i]);
+}
+
+pgml_pipelinec_delete(pipeline);
+pgml_collectionc_delete(collection);
+```
+{% endtab %}
 {% endtabs %}
 
 We are using built-in vector search, powered by embeddings and the PostgresML [pgml.embed()](../sql-extension/pgml.embed) function, which embeds the `query` argument, compares it to the embeddings stored in the database, and returns the top two results, ranked by cosine similarity.
@@ -228,6 +371,8 @@ if __name__ == "__main__":
 {% endtab %}
 {% endtabs %}
 
+Note that `Rust` and `C` example do not require any additional code to run correctly.
+
 Once you run the example, you should see something like this in the terminal:
 
 ```bash
diff --git a/pgml-cms/docs/api/client-sdk/collections.md b/pgml-cms/docs/api/client-sdk/collections.md
index 14c64ad5c..ebd63afca 100644
--- a/pgml-cms/docs/api/client-sdk/collections.md
+++ b/pgml-cms/docs/api/client-sdk/collections.md
@@ -26,6 +26,18 @@ const collection = pgml.newCollection("test_collection")
 collection = Collection("test_collection")
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+let mut collection = Collection::new("test_collection", None)?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+CollectionC * collection = pgml_collectionc_new("test_collection", NULL);
+```
+{% endtab %}
 {% endtabs %}
 
 ### Custom `PGML_DATABASE_URL`
@@ -44,6 +56,18 @@ const collection = pgml.newCollection("test_collection", CUSTOM_DATABASE_URL)
 collection = Collection("test_collection", CUSTOM_DATABASE_URL)
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+let mut collection = Collection::new("test_collection", Some(CUSTOM_DATABASE_URL))?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+CollectionC * collection = pgml_collectionc_new("test_collection", CUSTOM_DATABASE_URL);
+```
+{% endtab %}
 {% endtabs %}
 
 ## Upserting Documents
@@ -90,6 +114,38 @@ documents = [
 await collection.upsert_documents(documents)
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+let documents: Vec<pgml::types::Json> = vec![
+    serde_json::json!({
+        "id": "document_one",
+        "title": "Document One",
+        "text": "Here are the contents of Document 1",
+        "random_key": "here is some random data",
+    })
+    .into(),
+    serde_json::json!({
+        "id": "document_two",
+        "title": "Document Two",
+        "text": "Here are the contents of Document 2",
+        "random_key": "here is some random data",
+    })
+    .into(),
+];
+collection.upsert_documents(documents, None).await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+char * documents[2] = {
+  "{\"id\": \"document_one\", \"title\": \"Document One\", \"text\": \"Here are the contents of Document 1\", \"random_key\": \"here is some random data\"}",
+  "{\"id\": \"document_two\", \"title\": \"Document Two\", \"text\": \"Here are the contents of Document 2\", \"random_key\": \"here is some random data\"}"
+};
+pgml_collectionc_upsert_documents(collection, documents, 2, NULL);
+```
+{% endtab %}
 {% endtabs %}
 
 Documents can be replaced by upserting documents with the same `id`.
@@ -134,6 +190,38 @@ documents = [
 await collection.upsert_documents(documents)
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+let documents: Vec<pgml::types::Json> = vec![
+    serde_json::json!({
+        "id": "document_one",
+        "title": "Document One",
+        "text": "Here is some new text for document one",
+        "random_key": "here is some random data",
+    })
+    .into(),
+    serde_json::json!({
+        "id": "document_two",
+        "title": "Document Two",
+        "text": "Here is some new text for document two",
+        "random_key": "here is some random data",
+    })
+    .into(),
+];
+collection.upsert_documents(documents, None).await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+char * documents[2] = {
+  "{\"id\": \"document_one\", \"title\": \"Document One\", \"text\": \"Here is some new text for document one\", \"random_key\": \"here is some random data\"}",
+  "{\"id\": \"document_two\", \"title\": \"Document Two\", \"text\": \"Here is some new text for document two\", \"random_key\": \"here is some random data\"}"
+};
+pgml_collectionc_upsert_documents(collection, documents, 2, NULL);
+```
+{% endtab %}
 {% endtabs %}
 
 Documents can be merged by setting the `merge` option. On conflict, new document keys will override old document keys.
@@ -176,6 +264,38 @@ documents = [
 await collection.upsert_documents(documents, {"merge": True})
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+let documents: Vec<pgml::types::Json> = vec![
+    serde_json::json!({
+        "id": "document_one",
+        "new_key": "this will be a new key in document one",
+        "random_key": "this will replace old random_key"
+    })
+    .into(),
+    serde_json::json!({
+        "id": "document_two",
+        "new_key": "this will be a new key in document two",
+        "random_key": "this will replace old random_key"
+    })
+    .into(),
+];
+collection
+    .upsert_documents(documents, Some(serde_json::json!({"merge": true}).into()))
+    .await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+char * documents[2] = {
+  "{\"id\": \"document_one\", \"new_key\": \"this will be a new key in document one\", \"random_key\": \"this will replace old random_key\"}",
+  "{\"id\": \"document_two\", \"new_key\": \"this will be a new key in document two\", \"random_key\": \"this will replace old random_key\"}"
+};
+pgml_collectionc_upsert_documents(collection, documents, 2, "{\"merge\": true}");
+```
+{% endtab %}
 {% endtabs %}
 
 ## Getting Documents
@@ -194,6 +314,21 @@ const documents = await collection.get_documents({limit: 100 })
 documents = await collection.get_documents({ "limit": 100 })
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+let documents = collection
+    .get_documents(Some(serde_json::json!({"limit": 100}).into()))
+    .await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+unsigned long r_size = 0;
+char** documents = pgml_collectionc_get_documents(collection, "{\"limit\": 100}", &r_size);
+```
+{% endtab %}
 {% endtabs %}
 
 ### Paginating Documents
@@ -214,6 +349,21 @@ const documents = await collection.get_documents({ limit: 100, offset: 10 })
 documents = await collection.get_documents({ "limit": 100, "offset": 10 })
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+let documents = collection
+    .get_documents(Some(serde_json::json!({"limit": 100, "offset": 10}).into()))
+    .await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+unsigned long r_size = 0;
+char** documents = pgml_collectionc_get_documents(collection, "{\"limit\": 100, \"offset\": 10}", &r_size);
+```
+{% endtab %}
 {% endtabs %}
 
 #### Keyset Pagination
@@ -230,6 +380,21 @@ const documents = await collection.get_documents({ limit: 100, last_row_id: 10 }
 documents = await collection.get_documents({ "limit": 100, "last_row_id": 10 })
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+let documents = collection
+    .get_documents(Some(serde_json::json!({"limit": 100, "last_row_id": 10}).into()))
+    .await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+unsigned long r_size = 0;
+char** documents = pgml_collectionc_get_documents(collection, "{\"limit\": 100, \"last_row_id\": 10}", &r_size);
+```
+{% endtab %}
 {% endtabs %}
 
 The `last_row_id` can be taken from the `row_id` field in the returned document's dictionary. Keyset pagination does not currently work when specifying the `order_by` key.
@@ -264,6 +429,29 @@ documents = await collection.get_documents(
 )
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+let documents = collection
+    .get_documents(Some(
+        serde_json::json!({
+            "limit": 100,
+            "filter": {
+                "id": {"$eq": "document_one"},
+            }
+        })
+        .into(),
+    ))
+    .await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+unsigned long r_size = 0;
+char** documents = pgml_collectionc_get_documents(collection, "{\"limit\": 100, \"filter\": {\"id\": {\"$eq\": \"document_one\"}}}", &r_size);
+```
+{% endtab %}
 {% endtabs %}
 
 ### Sorting Documents
@@ -294,6 +482,30 @@ documents = await collection.get_documents({
 })
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+let documents = collection
+    .get_documents(Some(
+        serde_json::json!({
+            "limit": 100,
+            "offset": 10,
+            "order_by": {
+                "id": "desc"
+            }
+        })
+        .into(),
+    ))
+    .await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+unsigned long r_size = 0;
+char** documents = pgml_collectionc_get_documents(collection, "{\"limit\": 100, \"offset\": 10, \"order_by\": {\"id\": \"desc\"}}", &r_size);
+```
+{% endtab %}
 {% endtabs %}
 
 ### Deleting Documents
@@ -320,4 +532,25 @@ documents = await collection.delete_documents(
 )
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+let documents = collection
+    .delete_documents(
+        serde_json::json!({
+            "id": {
+                "$eq": 1
+            }
+        })
+        .into(),
+    )
+    .await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+pgml_collectionc_delete_documents(collection, "{\"id\": { \"$eq\": 1}}");
+```
+{% endtab %}
 {% endtabs %}
diff --git a/pgml-cms/docs/api/client-sdk/pipelines.md b/pgml-cms/docs/api/client-sdk/pipelines.md
index c51987cad..6c3ed57cd 100644
--- a/pgml-cms/docs/api/client-sdk/pipelines.md
+++ b/pgml-cms/docs/api/client-sdk/pipelines.md
@@ -57,6 +57,48 @@ pipeline = Pipeline(
 )
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+let mut pipeline = Pipeline::new(
+    "test_pipeline",
+    Some(
+        serde_json::json!({
+            "title": {
+                "full_text_search": {"configuration": "english"},
+            },
+            "body": {
+                "splitter": {"model": "recursive_character"},
+                "semantic_search": {
+                    "model": "Alibaba-NLP/gte-base-en-v1.5",
+                },
+            },
+        })
+        .into(),
+    ),
+)?;
+
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+PipelineC * pipeline = pgml_pipelinec_new(
+  "test_pipeline", 
+  "{\
+    \"title\": {\
+      \"full_text_search\": {\"configuration\": \"english\"},\
+    },\
+    \"body\": {\
+      \"splitter\": {\"model\": \"recursive_character\"},\
+      \"semantic_search\": {\
+        \"model\": \"Alibaba-NLP/gte-base-en-v1.5\"\
+      }\
+    }\
+  }"
+);
+```
+{% endtab %}
 {% endtabs %}
 
 This `Pipeline` does two things. For each document in the `Collection`, it converts all `title`s into tsvectors enabling full text search, and splits and embeds the `body` text enabling semantic search using vectors. This kind of `Pipeline` would be great for site search utilizing hybrid keyword and semantic search.
@@ -92,6 +134,42 @@ pipeline = Pipeline(
 )
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+let mut pipeline = Pipeline::new(
+    "test_pipeline",
+    Some(
+        serde_json::json!({
+            "body": {
+                "splitter": {"model": "recursive_character"},
+                "semantic_search": {
+                    "model": "Alibaba-NLP/gte-base-en-v1.5",
+                },
+            },
+        })
+        .into(),
+    ),
+)?;
+
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+PipelineC * pipeline = pgml_pipelinec_new(
+  "test_pipeline", 
+  "{\
+    \"body\": {\
+      \"splitter\": {\"model\": \"recursive_character\"},\
+      \"semantic_search\": {\
+        \"model\": \"Alibaba-NLP/gte-base-en-v1.5\"\
+      }\
+    }\
+  }"
+);
+```
+{% endtab %}
 {% endtabs %}
 
 This `Pipeline` splits and embeds the `body` text enabling semantic search using vectors. This is a very popular `Pipeline` for RAG.
@@ -166,6 +244,44 @@ pipeline = Pipeline(
 )
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+let mut pipeline = Pipeline::new(
+    "test_pipeline",
+    Some(
+        serde_json::json!({
+            "body": {
+                "splitter": {"model": "recursive_character"},
+                "semantic_search": {
+                    "model": "Alibaba-NLP/gte-base-en-v1.5",
+                    "hnsw": {"m": 100, "ef_construction": 200}
+                },
+            },
+        })
+        .into(),
+    ),
+)?;
+
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+PipelineC * pipeline = pgml_pipelinec_new(
+  "test_pipeline", 
+  "{\
+    \"body\": {\
+      \"splitter\": {\"model\": \"recursive_character\"},\
+      \"semantic_search\": {\
+        \"model\": \"Alibaba-NLP/gte-base-en-v1.5\",\
+        \"hnsw\": {\"m\": 100, \"ef_construction\": 200}\
+      }\
+    }\
+  }"
+);
+```
+{% endtab %}
 {% endtabs %}
 
 ## Adding Pipelines to a Collection
@@ -184,6 +300,18 @@ await collection.add_pipeline(pipeline)
 await collection.add_pipeline(pipeline)
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+collection.add_pipeline(&mut pipeline).await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+pgml_collectionc_add_pipeline(collection, pipeline);
+```
+{% endtab %}
 {% endtabs %}
 
 > Note: After a `Pipeline` has been added to a `Collection` instances of the `Pipeline` object can be created without specifying a schema:
@@ -200,6 +328,18 @@ const pipeline = pgml.newPipeline("test_pipeline")
 pipeline = Pipeline("test_pipeline")
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+let mut pipeline = Pipeline::new("test_pipeline", None)?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+PipelineC * pipeline = pgml_pipelinec_new("test_pipeline",  NULL);
+```
+{% endtab %}
 {% endtabs %}
 
 ## Searching with Pipelines
@@ -231,6 +371,22 @@ collection = Collection("test_collection")
 await collection.disable_pipeline(pipeline)
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+let mut collection = Collection::new("test_collection", None)?;
+let mut pipeline = Pipeline::new("test_pipeline", None)?;
+collection.disable_pipeline(&mut pipeline).await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+CollectionC * collection = pgml_collectionc_new("test_collection", NULL);
+PipelineC * pipeline = pgml_pipelinec_new("test_pipeline",  NULL);
+pgml_collectionc_disable_pipeline(collection, pipeline);
+```
+{% endtab %}
 {% endtabs %}
 
 Disabling a `Pipeline` prevents it from running automatically, but leaves all tsvectors, chunks, and embeddings already created by that `Pipeline` in the database.
@@ -255,6 +411,22 @@ collection = Collection("test_collection")
 await collection.enable_pipeline(pipeline)
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+let mut collection = Collection::new("test_collection", None)?;
+let mut pipeline = Pipeline::new("test_pipeline", None)?;
+collection.enable_pipeline(&mut pipeline).await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+CollectionC * collection = pgml_collectionc_new("test_collection", NULL);
+PipelineC * pipeline = pgml_pipelinec_new("test_pipeline",  NULL);
+pgml_collectionc_enable_pipeline(collection, pipeline);
+```
+{% endtab %}
 {% endtabs %}
 
 Enabling a `Pipeline` will cause it to automatically run on all documents it may have missed while disabled.
@@ -263,10 +435,10 @@ Enabling a `Pipeline` will cause it to automatically run on all documents it may
 
 {% tabs %}
 {% tab title="JavaScript" %}
-<pre class="language-javascript"><code class="lang-javascript">const pipeline = pgml.newPipeline("test_pipeline")
-<strong>const collection = pgml.newCollection("test_collection")
-</strong>await collection.remove_pipeline(pipeline)
-</code></pre>
+```javascript
+const pipeline = pgml.newPipeline("test_pipeline")
+const collection = pgml.newCollection("test_collection")
+await collection.remove_pipeline(pipeline)
 {% endtab %}
 
 {% tab title="Python" %}
@@ -276,6 +448,22 @@ collection = Collection("test_collection")
 await collection.remove_pipeline(pipeline)
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+let mut collection = Collection::new("test_collection", None)?;
+let mut pipeline = Pipeline::new("test_pipeline", None)?;
+collection.remove_pipeline(&mut pipeline).await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+CollectionC * collection = pgml_collectionc_new("test_collection", NULL);
+PipelineC * pipeline = pgml_pipelinec_new("test_pipeline",  NULL);
+pgml_collectionc_remove_pipeline(collection, pipeline);
+```
+{% endtab %}
 {% endtabs %}
 
 Removing a `Pipeline` deletes it and all associated data from the database. Removed `Pipelines` cannot be re-enabled but can be recreated.
diff --git a/pgml-cms/docs/api/client-sdk/search.md b/pgml-cms/docs/api/client-sdk/search.md
index 8318a8bee..3fc564c55 100644
--- a/pgml-cms/docs/api/client-sdk/search.md
+++ b/pgml-cms/docs/api/client-sdk/search.md
@@ -10,14 +10,14 @@ This section will assume we have previously ran the following code:
 const pipeline = pgml.newPipeline("test_pipeline", {
   abstract: {
     semantic_search: {
-      model: "Alibaba-NLP/gte-base-en-v1.5",
+      model: "mixedbread-ai/mxbai-embed-large-v1",
     },
     full_text_search: { configuration: "english" },
   },
   body: {
     splitter: { model: "recursive_character" },
     semantic_search: {
-      model: "Alibaba-NLP/gte-base-en-v1.5",
+      model: "mixedbread-ai/mxbai-embed-large-v1",
     },
   },
 });
@@ -33,19 +33,70 @@ pipeline = Pipeline(
     {
         "abstract": {
             "semantic_search": {
-                "model": "Alibaba-NLP/gte-base-en-v1.5",
+                "model": "mixedbread-ai/mxbai-embed-large-v1",
             },
             "full_text_search": {"configuration": "english"},
         },
         "body": {
             "splitter": {"model": "recursive_character"},
             "semantic_search": {
-                "model": "Alibaba-NLP/gte-base-en-v1.5",
+                "model": "mixedbread-ai/mxbai-embed-large-v1",
             },
         },
     },
 )
 collection = Collection("test_collection")
+await collection.add_pipeline(pipeline);
+```
+{% endtab %}
+
+{% tab title="Rust" %}
+```rust
+let mut pipeline = Pipeline::new(
+    "test_pipeline",
+    Some(
+        serde_json::json!(
+            {
+                "abstract": {
+                    "semantic_search": {
+                        "model": "mixedbread-ai/mxbai-embed-large-v1",
+                    },
+                    "full_text_search": {"configuration": "english"},
+                },
+                "body": {
+                    "splitter": {"model": "recursive_character"},
+                    "semantic_search": {
+                        "model": "mixedbread-ai/mxbai-embed-large-v1",
+                    },
+                },
+            }
+        )
+        .into(),
+    ),
+)?;
+let mut collection = Collection::new("test_collection", None)?;
+collection.add_pipeline(&mut pipeline).await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+PipelineC *pipeline = pgml_pipelinec_new("test_pipeline", "{\
+    \"abstract\": {\
+        \"semantic_search\": {\
+            \"model\": \"Alibaba-NLP/gte-base-en-v1.5\"\
+        },\
+        \"full_text_search\": {\"configuration\": \"english\"}\
+    },\
+    \"body\": {\
+        \"splitter\": {\"model\": \"recursive_character\"},\
+        \"semantic_search\": {\
+            \"model\": \"Alibaba-NLP/gte-base-en-v1.5\"\
+        }\
+    }\
+}");
+CollectionC * collection = pgml_collectionc_new("test_collection", NULL);
+pgml_collectionc_add_pipeline(collection, pipeline);
 ```
 {% endtab %}
 {% endtabs %}
@@ -63,8 +114,8 @@ const results = await collection.vector_search(
       fields: {
         body: {
           query: "What is the best database?", parameters: {
-            instruction:
-              "Represent the Wikipedia question for retrieving supporting documents: ",
+          prompt:
+              "Represent this sentence for searching relevant passages: ",
           }
         },
       },
@@ -85,7 +136,7 @@ results = await collection.vector_search(
                 "body": {
                     "query": "What is the best database?",
                     "parameters": {
-                        "instruction": "Represent the Wikipedia question for retrieving supporting documents: ",
+                        "prompt": "Represent this sentence for searching relevant passages: ",
                     },
                 },
             },
@@ -96,9 +147,56 @@ results = await collection.vector_search(
 )
 ```
 {% endtab %}
+{% tab title="Rust" %}
+```rust
+let results = collection
+    .vector_search(
+        serde_json::json!({
+            "query": {
+                "fields": {
+                    "body": {
+                        "query": "What is the best database?",
+                        "parameters": {
+                            "prompt": "Represent this sentence for searching relevant passages: ",
+                        },
+                    },
+                },
+            },
+            "limit": 5,
+        })
+        .into(),
+        &mut pipeline,
+    )
+    .await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+r_size = 0;
+char **results = pgml_collectionc_vector_search(collection, "{\
+  \"query\": {\
+    \"fields\": {\
+      \"body\": {\
+        \"query\": \"What is the best database?\",\
+        \"parameters\": {\
+          \"prompt\": \"Represent this sentence for searching relevant passages: \"\
+        }\
+      }\
+    }\
+  },\
+  \"limit\": 5\
+}",
+pipeline, &r_size);
+```
+{% endtab %}
 {% endtabs %}
 
-Let's break this down. `vector_search` takes in a `JSON` object and a `Pipeline`. The `JSON` object currently supports two keys: `query` and `limit` . The `limit` limits how many chunks should be returned, the `query` specifies the actual query to perform. Let's see another more complicated example:
+Let's break this down. `vector_search` takes in a `JSON` object and a `Pipeline`. The `JSON` object currently supports two keys: `query` and `limit` . The `limit` limits how many chunks should be returned, the `query` specifies the actual query to perform. 
+
+Note that `mixedbread-ai/mxbai-embed-large-v1` takes in a prompt when creating embeddings for searching against a corpus which we provide in the `parameters`.
+
+Let's see another more complicated example:
 
 {% tabs %}
 {% tab title="JavaScript" %}
@@ -115,7 +213,7 @@ const results = await collection.vector_search(
         body: {
           query: query, parameters: {
             instruction:
-              "Represent the Wikipedia question for retrieving supporting documents: ",
+              "Represent this sentence for searching relevant passages: ",
           }
         },
       },
@@ -141,7 +239,7 @@ results = await collection.vector_search(
                 "body": {
                     "query": query,
                     "parameters": {
-                        "instruction": "Represent the Wikipedia question for retrieving supporting documents: ",
+                        "instruction": "Represent this sentence for searching relevant passages: ",
                     },
                 },
             },
@@ -151,6 +249,59 @@ results = await collection.vector_search(
     pipeline,
 )
 
+```
+{% endtab %}
+
+{% endtab %}
+{% tab title="Rust" %}
+```rust
+let query = "What is the best database?";
+let results = collection
+    .vector_search(
+        serde_json::json!({
+            "query": {
+                "fields": {
+                    "abastract": {
+                        "query": query,
+                        "full_text_filter": "database",
+                    },
+                    "body": {
+                        "query": query,
+                        "parameters": {
+                            "instruction": "Represent this sentence for searching relevant passages: ",
+                        },
+                    },
+                },
+            },
+            "limit": 5,
+        })
+        .into(),
+        &mut pipeline,
+    )
+    .await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+r_size = 0;
+char **results = pgml_collectionc_vector_search(collection, "{\
+ \"query\": {\
+      \"fields\": {\
+          \"abastract\": {\
+              \"query\": \"What is the best database?\",\
+              \"full_text_filter\": \"database\"\
+          },\
+          \"body\": {\
+              \"query\": \"What is the best database?\",\
+              \"parameters\": {\
+                  \"instruction\": \"Represent this sentence for searching relevant passages: \"\
+              }\
+          }\
+      }\
+  },\
+  \"limit\": 5,\
+}", pipeline, &r_size);
 ```
 {% endtab %}
 {% endtabs %}
@@ -173,7 +324,7 @@ const results = await collection.vector_search(
         body: {
           query: "What is the best database?", parameters: {
             instruction:
-              "Represent the Wikipedia question for retrieving supporting documents: ",
+              "Represent this sentence for searching relevant passages: ",
           }
         },
       },
@@ -199,7 +350,7 @@ results = await collection.vector_search(
                 "body": {
                     "query": "What is the best database?",
                     "parameters": {
-                        "instruction": "Represent the Wikipedia question for retrieving supporting documents: ",
+                        "instruction": "Represent this sentence for searching relevant passages: ",
                     },
                 },
             },
@@ -211,6 +362,52 @@ results = await collection.vector_search(
 )
 ```
 {% endtab %}
+
+{% endtab %}
+{% tab title="Rust" %}
+```rust
+let results = collection
+    .vector_search(
+        serde_json::json!({
+            "query": {
+                "fields": {
+                    "body": {
+                        "query": "What is the best database?",
+                        "parameters": {
+                            "instruction": "Represent this sentence for searching relevant passages: ",
+                        },
+                    },
+                },
+                "filter": {"user_id": {"$eq": 1}},
+            },
+            "limit": 5,
+        })
+        .into(),
+        &mut pipeline,
+    )
+    .await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+r_size = 0;
+char **results = pgml_collectionc_vector_search(collection, "{\
+    \"query\": {\
+        \"fields\": {\
+            \"body\": {\
+                \"query\": \"What is the best database?\",\
+                \"parameters\": {\
+                    \"instruction\": \"Represent this sentence for searching relevant passages: \"\
+                }\
+            }\
+        },\
+        \"filter\": {\"user_id\": {\"$eq\": 1}}\
+    },\
+    \"limit\": 5\
+}", pipeline, &r_size);
+```
+{% endtab %}
 {% endtabs %}
 
 The above query would filter out all chunks from documents that do not contain a key `user_id` equal to `1`.
@@ -227,7 +424,7 @@ const results = await collection.vector_search(
         body: {
           query: "What is the best database?", parameters: {
             instruction:
-              "Represent the Wikipedia question for retrieving supporting documents: ",
+              "Represent this sentence for searching relevant passages: ",
           }
         },
       },
@@ -253,7 +450,7 @@ results = await collection.vector_search(
                 "body": {
                     "query": "What is the best database?",
                     "parameters": {
-                        "instruction": "Represent the Wikipedia question for retrieving supporting documents: ",
+                        "instruction": "Represent this sentence for searching relevant passages: ",
                     },
                 },
             },
@@ -265,6 +462,52 @@ results = await collection.vector_search(
 )
 ```
 {% endtab %}
+
+{% endtab %}
+{% tab title="Rust" %}
+```rust
+let results = collection
+    .vector_search(
+        serde_json::json!({
+            "query": {
+                "fields": {
+                    "body": {
+                        "query": "What is the best database?",
+                        "parameters": {
+                            "instruction": "Represent this sentence for searching relevant passages: ",
+                        },
+                    },
+                },
+                "filter": {"user_id": {"$gte": 1}},
+            },
+            "limit": 5,
+        })
+        .into(),
+        &mut pipeline,
+    )
+    .await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+r_size = 0;
+char **results = pgml_collectionc_vector_search(collection, "{\
+    \"query\": {\
+        \"fields\": {\
+            \"body\": {\
+                \"query\": \"What is the best database?\",\
+                \"parameters\": {\
+                    \"instruction\": \"Represent this sentence for searching relevant passages: \"\
+                }\
+            }\
+        },\
+        \"filter\": {\"user_id\": {\"$eq\": 1}}\
+    },\
+    \"limit\": 5\
+}", pipeline, &r_size);
+```
+{% endtab %}
 {% endtabs %}
 
 The above query would filter out all documents that do not contain a key `user_id` with a value greater than or equal to `1`.
@@ -281,7 +524,7 @@ const results = await collection.vector_search(
         body: {
           query: "What is the best database?", parameters: {
             instruction:
-              "Represent the Wikipedia question for retrieving supporting documents: ",
+              "Represent this sentence for searching relevant passages: ",
           }
         },
       },
@@ -325,7 +568,7 @@ results = await collection.vector_search(
                 "body": {
                     "query": "What is the best database?",
                     "parameters": {
-                        "instruction": "Represent the Wikipedia question for retrieving supporting documents: ",
+                        "instruction": "Represent this sentence for searching relevant passages: ",
                     },
                 },
             },

From 7efe6d9fc8d973452381140e9c5852119033a360 Mon Sep 17 00:00:00 2001
From: Silas Marvin <19626586+SilasMarvin@users.noreply.github.com>
Date: Wed, 5 Jun 2024 09:48:35 -0700
Subject: [PATCH 2/4] Updated everything to have rust and c

---
 .../docs/api/client-sdk/document-search.md    | 117 +++++++++++++++++-
 pgml-cms/docs/api/client-sdk/search.md        |  59 ++++++++-
 2 files changed, 169 insertions(+), 7 deletions(-)

diff --git a/pgml-cms/docs/api/client-sdk/document-search.md b/pgml-cms/docs/api/client-sdk/document-search.md
index cf91f95ee..4ada75d7f 100644
--- a/pgml-cms/docs/api/client-sdk/document-search.md
+++ b/pgml-cms/docs/api/client-sdk/document-search.md
@@ -10,14 +10,14 @@ This section will assume we have previously ran the following code:
 const pipeline = pgml.newPipeline("test_pipeline", {
   abstract: {
     semantic_search: {
-      model: "Alibaba-NLP/gte-base-en-v1.5",
+      model: "mixedbread-ai/mxbai-embed-large-v1",
     },
     full_text_search: { configuration: "english" },
   },
   body: {
     splitter: { model: "recursive_character" },
     semantic_search: {
-      model: "Alibaba-NLP/gte-base-en-v1.5",
+      model: "mixedbread-ai/mxbai-embed-large-v1",
     },
   },
 });
@@ -33,14 +33,14 @@ pipeline = Pipeline(
     {
         "abstract": {
             "semantic_search": {
-                "model": "Alibaba-NLP/gte-base-en-v1.5",
+                "model": "mixedbread-ai/mxbai-embed-large-v1",
             },
             "full_text_search": {"configuration": "english"},
         },
         "body": {
             "splitter": {"model": "recursive_character"},
             "semantic_search": {
-                "model": "Alibaba-NLP/gte-base-en-v1.5",
+                "model": "mixedbread-ai/mxbai-embed-large-v1",
             },
         },
     },
@@ -48,8 +48,60 @@ pipeline = Pipeline(
 collection = Collection("test_collection")
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+let mut pipeline = Pipeline::new(
+    "test_pipeline",
+    Some(
+        serde_json::json!(
+            {
+                "abstract": {
+                    "semantic_search": {
+                        "model": "mixedbread-ai/mxbai-embed-large-v1",
+                    },
+                    "full_text_search": {"configuration": "english"},
+                },
+                "body": {
+                    "splitter": {"model": "recursive_character"},
+                    "semantic_search": {
+                        "model": "mixedbread-ai/mxbai-embed-large-v1",
+                    },
+                },
+            }
+        )
+        .into(),
+    ),
+)?;
+let mut collection = Collection::new("test_collection", None)?;
+collection.add_pipeline(&mut pipeline).await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+PipelineC *pipeline = pgml_pipelinec_new("test_pipeline", "{\
+    \"abstract\": {\
+        \"semantic_search\": {\
+            \"model\": \"mixedbread-ai/mxbai-embed-large-v1\"\
+        },\
+        \"full_text_search\": {\"configuration\": \"english\"}\
+    },\
+    \"body\": {\
+        \"splitter\": {\"model\": \"recursive_character\"},\
+        \"semantic_search\": {\
+            \"model\": \"mixedbread-ai/mxbai-embed-large-v1\"\
+        }\
+    }\
+}");
+CollectionC * collection = pgml_collectionc_new("test_collection", NULL);
+pgml_collectionc_add_pipeline(collection, pipeline);
+```
+{% endtab %}
 {% endtabs %}
 
+This creates a `Pipeline` that is capable of full text search and semantic search on the `abstract` and semantic search on the `body` of documents.
+
 ## Doing Document Search
 
 {% tabs %}
@@ -108,6 +160,63 @@ results = await collection.search(
 )
 ```
 {% endtab %}
+
+
+{% tab title="Rust" %}
+```rust
+let results = collection
+    .search(serde_json::json!({
+        "query": {
+            "full_text_search": {
+                "abstract": {"query": "What is the best database?", "boost": 1.2}
+            },
+            "semantic_search": {
+                "abstract": {
+                    "query": "What is the best database?",
+                    "boost": 2.0,
+                },
+                "body": {
+                    "query": "What is the best database?",
+                    "boost": 1.25,
+                    "parameters": {
+                        "instruction": "Represent the Wikipedia question for retrieving supporting documents: ",
+                    },
+                },
+            },
+            "filter": {"user_id": {"$eq": 1}},
+        },
+        "limit": 10,
+    }).into(), &mut pipeline)
+    .await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+char * results = pgml_collectionc_search(collection, "\
+     \"query\": {\
+        \"full_text_search\": {\
+            \"abstract\": {\"query\": \"What is the best database?\", \"boost\": 1.2}\
+        },\
+        \"semantic_search\": {\
+            \"abstract\": {\
+                \"query\": \"What is the best database?\",\
+                \"boost\": 2.0\
+            },\
+            \"body\": {\
+                \"query\": \"What is the best database?\",\
+                \"boost\": 1.25,\
+                \"parameters\": {\
+                    \"instruction\": \"Represent the Wikipedia question for retrieving supporting documents: \"\
+                }\
+            }\
+        },\
+        \"filter\": {\"user_id\": {\"$eq\": 1}}\
+    },\
+    \"limit\": 10\
+", pipeline);
+```
+{% endtab %}
 {% endtabs %}
 
 Just like `vector_search`, `search` takes in two arguments. The first is a `JSON` object specifying the `query` and `limit` and the second is the `Pipeline`. The `query` object can have three fields: `full_text_search`, `semantic_search` and `filter`. Both `full_text_search` and `semantic_search` function similarly. They take in the text to compare against, titled`query`, an optional `boost` parameter used to boost the effectiveness of the ranking, and `semantic_search` also takes in an optional `parameters` key which specify parameters to pass to the embedding model when embedding the passed in text.
diff --git a/pgml-cms/docs/api/client-sdk/search.md b/pgml-cms/docs/api/client-sdk/search.md
index 3fc564c55..2d5b5ce41 100644
--- a/pgml-cms/docs/api/client-sdk/search.md
+++ b/pgml-cms/docs/api/client-sdk/search.md
@@ -147,6 +147,7 @@ results = await collection.vector_search(
 )
 ```
 {% endtab %}
+
 {% tab title="Rust" %}
 ```rust
 let results = collection
@@ -252,7 +253,6 @@ results = await collection.vector_search(
 ```
 {% endtab %}
 
-{% endtab %}
 {% tab title="Rust" %}
 ```rust
 let query = "What is the best database?";
@@ -363,7 +363,6 @@ results = await collection.vector_search(
 ```
 {% endtab %}
 
-{% endtab %}
 {% tab title="Rust" %}
 ```rust
 let results = collection
@@ -463,7 +462,6 @@ results = await collection.vector_search(
 ```
 {% endtab %}
 
-{% endtab %}
 {% tab title="Rust" %}
 ```rust
 let results = collection
@@ -585,6 +583,61 @@ results = await collection.vector_search(
 )
 ```
 {% endtab %}
+
+{% tab title="Rust" %}
+```rust
+let results = collection
+    .vector_search(
+        serde_json::json!({
+            "query": {
+                "fields": {
+                    "body": {
+                        "query": "What is the best database?",
+                        "parameters": {
+                            "instruction": "Represent this sentence for searching relevant passages: ",
+                        },
+                    },
+                },
+                "filter": {
+                    "$or": [
+                        {"$and": [{"$eq": {"user_id": 1}}, {"$lt": {"user_score": 100}}]},
+                        {"special": {"$ne": True}},
+                    ],
+                },
+            },
+            "limit": 5,
+        })
+        .into(),
+        &mut pipeline,
+    )
+    .await?;
+```
+{% endtab %}
+
+{% tab title="C" %}
+```c
+r_size = 0;
+char **results = pgml_collectionc_vector_search(collection, "{\
+  \"query\": {\
+      \"fields\": {\
+          \"body\": {\
+              \"query\": \"What is the best database?\",\
+              \"parameters\": {\
+                  \"instruction\": \"Represent this sentence for searching relevant passages: \"\
+              }\
+          }\
+      },\
+      \"filter\": {\
+          \"$or\": [\
+              {\"$and\": [{\"$eq\": {\"user_id\": 1}}, {\"$lt\": {\"user_score\": 100}}]},\
+              {\"special\": {\"$ne\": True}}\
+          ]\
+      }\
+  },\
+  \"limit\": 5\
+}", pipeline, &r_size);
+```
+{% endtab %}
 {% endtabs %}
 
 The above query would filter out all documents that do not have a key `special` with a value `True` or (have a key `user_id` equal to 1 and a key `user_score` less than 100).

From b494857dc00f6935740f24e214fbbcd4b226ec95 Mon Sep 17 00:00:00 2001
From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com>
Date: Wed, 5 Jun 2024 12:05:43 -0700
Subject: [PATCH 3/4] Rust and c docs ready to go

---
 pgml-cms/docs/api/client-sdk/README.md    | 3 ++-
 pgml-cms/docs/api/client-sdk/pipelines.md | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/pgml-cms/docs/api/client-sdk/README.md b/pgml-cms/docs/api/client-sdk/README.md
index 0ccddb9f0..5e6fc56a0 100644
--- a/pgml-cms/docs/api/client-sdk/README.md
+++ b/pgml-cms/docs/api/client-sdk/README.md
@@ -89,9 +89,10 @@ async def main():
 {% tab title="Rust" %}
 ```rust
 use pgml::{Collection, Pipeline};
+use anyhow::Error;
 
 #[tokio::main]
-async fn main() -> Result<(), Box<dyn std::error::Error>> {
+async fn main() -> Result<(), Error> {
     let mut collection = Collection::new("sample_collection", None)?;
 }
 ```
diff --git a/pgml-cms/docs/api/client-sdk/pipelines.md b/pgml-cms/docs/api/client-sdk/pipelines.md
index 6c3ed57cd..dccf3f2b7 100644
--- a/pgml-cms/docs/api/client-sdk/pipelines.md
+++ b/pgml-cms/docs/api/client-sdk/pipelines.md
@@ -439,6 +439,7 @@ Enabling a `Pipeline` will cause it to automatically run on all documents it may
 const pipeline = pgml.newPipeline("test_pipeline")
 const collection = pgml.newCollection("test_collection")
 await collection.remove_pipeline(pipeline)
+```
 {% endtab %}
 
 {% tab title="Python" %}

From f9803076957752a69c3d49de3792df94ad661ac7 Mon Sep 17 00:00:00 2001
From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com>
Date: Wed, 5 Jun 2024 12:24:38 -0700
Subject: [PATCH 4/4] Updated to make highlighting work

---
 pgml-dashboard/package-lock.json              | 20 +++++++++++++++++++
 pgml-dashboard/package.json                   |  1 +
 .../code_block/code_block_controller.js       |  3 +++
 pgml-dashboard/src/utils/markdown.rs          |  2 ++
 4 files changed, 26 insertions(+)

diff --git a/pgml-dashboard/package-lock.json b/pgml-dashboard/package-lock.json
index 4fe4783c7..1da57fd91 100644
--- a/pgml-dashboard/package-lock.json
+++ b/pgml-dashboard/package-lock.json
@@ -5,6 +5,7 @@
   "packages": {
     "": {
       "dependencies": {
+        "@codemirror/lang-cpp": "^6.0.2",
         "@codemirror/lang-javascript": "^6.2.1",
         "@codemirror/lang-json": "^6.0.1",
         "@codemirror/lang-python": "^6.1.3",
@@ -46,6 +47,15 @@
         "@lezer/common": "^1.1.0"
       }
     },
+    "node_modules/@codemirror/lang-cpp": {
+      "version": "6.0.2",
+      "resolved": "https://registry.npmjs.org/@codemirror/lang-cpp/-/lang-cpp-6.0.2.tgz",
+      "integrity": "sha512-6oYEYUKHvrnacXxWxYa6t4puTlbN3dgV662BDfSH8+MfjQjVmP697/KYTDOqpxgerkvoNm7q5wlFMBeX8ZMocg==",
+      "dependencies": {
+        "@codemirror/language": "^6.0.0",
+        "@lezer/cpp": "^1.0.0"
+      }
+    },
     "node_modules/@codemirror/lang-javascript": {
       "version": "6.2.2",
       "resolved": "https://registry.npmjs.org/@codemirror/lang-javascript/-/lang-javascript-6.2.2.tgz",
@@ -143,6 +153,16 @@
       "resolved": "https://registry.npmjs.org/@lezer/common/-/common-1.2.1.tgz",
       "integrity": "sha512-yemX0ZD2xS/73llMZIK6KplkjIjf2EvAHcinDi/TfJ9hS25G0388+ClHt6/3but0oOxinTcQHJLDXh6w1crzFQ=="
     },
+    "node_modules/@lezer/cpp": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@lezer/cpp/-/cpp-1.1.2.tgz",
+      "integrity": "sha512-macwKtyeUO0EW86r3xWQCzOV9/CF8imJLpJlPv3sDY57cPGeUZ8gXWOWNlJr52TVByMV3PayFQCA5SHEERDmVQ==",
+      "dependencies": {
+        "@lezer/common": "^1.2.0",
+        "@lezer/highlight": "^1.0.0",
+        "@lezer/lr": "^1.0.0"
+      }
+    },
     "node_modules/@lezer/highlight": {
       "version": "1.2.0",
       "resolved": "https://registry.npmjs.org/@lezer/highlight/-/highlight-1.2.0.tgz",
diff --git a/pgml-dashboard/package.json b/pgml-dashboard/package.json
index bc2860eaa..be19da478 100644
--- a/pgml-dashboard/package.json
+++ b/pgml-dashboard/package.json
@@ -3,6 +3,7 @@
     "@codemirror/lang-javascript": "^6.2.1",
     "@codemirror/lang-python": "^6.1.3",
     "@codemirror/lang-rust": "^6.0.1",
+    "@codemirror/lang-cpp": "^6.0.2",
     "postgresml-lang-sql": "^6.6.3-5",
     "@codemirror/lang-json": "^6.0.1",
     "@codemirror/state": "^6.2.1",
diff --git a/pgml-dashboard/src/components/code_block/code_block_controller.js b/pgml-dashboard/src/components/code_block/code_block_controller.js
index 8817ea08c..25b06a97e 100644
--- a/pgml-dashboard/src/components/code_block/code_block_controller.js
+++ b/pgml-dashboard/src/components/code_block/code_block_controller.js
@@ -4,6 +4,7 @@ import { sql } from "postgresml-lang-sql";
 import { python } from "@codemirror/lang-python";
 import { javascript } from "@codemirror/lang-javascript";
 import { rust } from "@codemirror/lang-rust";
+import { cpp } from "@codemirror/lang-cpp";
 import { json } from "@codemirror/lang-json";
 import { EditorView, ViewPlugin, Decoration } from "@codemirror/view";
 import { RangeSetBuilder, Facet } from "@codemirror/state";
@@ -84,6 +85,8 @@ const language = (element) => {
       return rust;
     case "json":
       return json;
+    case "cpp":
+      return cpp;
     default:
       return null;
   }
diff --git a/pgml-dashboard/src/utils/markdown.rs b/pgml-dashboard/src/utils/markdown.rs
index 3863dae2e..f55e0ee7a 100644
--- a/pgml-dashboard/src/utils/markdown.rs
+++ b/pgml-dashboard/src/utils/markdown.rs
@@ -208,6 +208,8 @@ impl<'a> From<&str> for CodeFence<'a> {
             "postgresql-line-nums"
         } else if options.starts_with("rust") {
             "rust"
+        } else if options.starts_with("cpp") {
+            "cpp"
         } else if options.starts_with("json") {
             "json"
         } else {