From 6d18877cd191a952b97cf9286d51a88652f81831 Mon Sep 17 00:00:00 2001 From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com> Date: Wed, 28 Feb 2024 16:46:33 -0800 Subject: [PATCH 1/4] Allow for filtering out of documents to add to search --- pgml-dashboard/src/utils/markdown.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/pgml-dashboard/src/utils/markdown.rs b/pgml-dashboard/src/utils/markdown.rs index 2d4deea6a..9683f1aff 100644 --- a/pgml-dashboard/src/utils/markdown.rs +++ b/pgml-dashboard/src/utils/markdown.rs @@ -21,6 +21,9 @@ use std::fmt; use std::sync::Mutex; use url::Url; +// Excluded paths in the pgml-cms directory +const EXCLUDED_DOCUMENT_PATHS: [&str; 1] = ["blog/README.md"]; + pub struct MarkdownHeadings { header_map: Arc>>, } @@ -1334,7 +1337,7 @@ impl SiteSearch { results["results"] .as_array() .context("Error getting results from search")? - .into_iter() + .iter() .map(|r| { let SearchResultWithoutSnippet { title, contents, path } = serde_json::from_value(r["document"].clone())?; @@ -1358,6 +1361,14 @@ impl SiteSearch { .map(|path| async move { Document::from_path(&path).await }), ) .await?; + let documents: Vec = documents + .into_iter() + .filter(|f| { + !EXCLUDED_DOCUMENT_PATHS + .iter() + .any(|p| f.path == config::cms_dir().join(p)) + }) + .collect(); let documents: Vec = documents .into_iter() .map(|d| { From d8281b600177a53d02b430dbbaafeaf250c92e86 Mon Sep 17 00:00:00 2001 From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com> Date: Thu, 29 Feb 2024 09:12:38 -0800 Subject: [PATCH 2/4] Debounce search by 250ms --- pgml-dashboard/static/js/search.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pgml-dashboard/static/js/search.js b/pgml-dashboard/static/js/search.js index b08237435..02bd989b9 100644 --- a/pgml-dashboard/static/js/search.js +++ b/pgml-dashboard/static/js/search.js @@ -15,11 +15,16 @@ export default class extends Controller { this.target.addEventListener('shown.bs.modal', this.focusSearchInput) this.target.addEventListener('hidden.bs.modal', this.updateSearch) this.searchInput.addEventListener('input', (e) => this.search(e)) + + this.timer; } search(e) { + clearTimeout(this.timer); const query = e.currentTarget.value - this.searchFrame.src = `/search?query=${query}` + this.timer = setTimeout(() => { + this.searchFrame.src = `/search?query=${query}` + }, 250); } focusSearchInput = (e) => { From b546878f6c84a1874234bac213984afb5e31224a Mon Sep 17 00:00:00 2001 From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com> Date: Fri, 1 Mar 2024 10:22:46 -0800 Subject: [PATCH 3/4] Added some better filtering on documents and a better way to show the descriptions in search results --- pgml-dashboard/src/utils/markdown.rs | 36 ++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/pgml-dashboard/src/utils/markdown.rs b/pgml-dashboard/src/utils/markdown.rs index 9683f1aff..a64f321e1 100644 --- a/pgml-dashboard/src/utils/markdown.rs +++ b/pgml-dashboard/src/utils/markdown.rs @@ -1,6 +1,7 @@ use crate::api::cms::{DocType, Document}; use crate::{templates::docs::TocLink, utils::config}; use anyhow::Context; +use comrak::{format_html_with_plugins, parse_document, ComrakPlugins}; use std::cell::RefCell; use std::collections::HashMap; use std::path::PathBuf; @@ -1341,14 +1342,29 @@ impl SiteSearch { .map(|r| { let SearchResultWithoutSnippet { title, contents, path } = serde_json::from_value(r["document"].clone())?; + let snippet = if let Some(description) = r["document"]["description"].as_str().map(|t| t.to_owned()) { + description + } else { + let title = r["document"]["title"].as_str().unwrap_or("xzxzxz"); + let author = r["document"]["title"].as_str().unwrap_or("xzxzxz"); + // The heuristics used here are ok, not the best it will be better when we can just use the description field + contents + .lines() + .take(100) + .filter(|l| !l.is_empty() && !l.contains(title) && !l.contains(author) && l.len() > 30) + .take(1) + .collect::>() + .join("") + .split(' ') + .take(20) + .collect::>() + .join(" ") + + " ..." + }; let path = path .replace(".md", "") .replace(&config::static_dir().display().to_string(), ""); - Ok(SearchResult { - title, - path, - snippet: contents.split(' ').take(20).collect::>().join(" ") + " ...", - }) + Ok(SearchResult { title, path, snippet }) }) .collect() } @@ -1361,12 +1377,22 @@ impl SiteSearch { .map(|path| async move { Document::from_path(&path).await }), ) .await?; + // Filter out documents who only have 1 line (this is usually just an empty document with the title as the first line) + // and documents that are in our excluded paths list let documents: Vec = documents .into_iter() .filter(|f| { !EXCLUDED_DOCUMENT_PATHS .iter() .any(|p| f.path == config::cms_dir().join(p)) + && !f + .contents + .lines() + .skip(1) + .collect::>() + .join("") + .trim() + .is_empty() }) .collect(); let documents: Vec = documents From fa2199223564612dd8ebed3bd6531582d071853f Mon Sep 17 00:00:00 2001 From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com> Date: Fri, 1 Mar 2024 11:01:46 -0800 Subject: [PATCH 4/4] Changed return type of search --- pgml-dashboard/src/api/cms.rs | 36 +++++++++++++++++++++++++++- pgml-dashboard/src/utils/markdown.rs | 35 ++++----------------------- 2 files changed, 40 insertions(+), 31 deletions(-) diff --git a/pgml-dashboard/src/api/cms.rs b/pgml-dashboard/src/api/cms.rs index ee1060d02..56e46757a 100644 --- a/pgml-dashboard/src/api/cms.rs +++ b/pgml-dashboard/src/api/cms.rs @@ -16,7 +16,7 @@ use crate::{ guards::Cluster, responses::{Response, ResponseOk, Template}, templates::docs::*, - utils::config, + utils::{config, markdown::SearchResult}, }; use serde::{Deserialize, Serialize}; use std::fmt; @@ -561,6 +561,40 @@ impl Collection { #[get("/search?", rank = 20)] async fn search(query: &str, site_search: &State) -> ResponseOk { let results = site_search.search(query, None).await.expect("Error performing search"); + + let results: Vec = results + .into_iter() + .map(|document| { + let snippet = if let Some(description) = document.description { + description + } else { + let author = document.author.unwrap_or_else(|| String::from("xzxzxz")); + // The heuristics used here are ok, not the best it will be better when we can just use the description field + document + .contents + .lines() + .find(|l| !l.is_empty() && !l.contains(&document.title) && !l.contains(&author) && l.len() > 30) + .unwrap_or("") + .split(' ') + .take(20) + .collect::>() + .join(" ") + + " ..." + }; + let path = document + .path + .to_str() + .unwrap_or_default() + .replace(".md", "") + .replace(&config::static_dir().display().to_string(), ""); + SearchResult { + title: document.title, + path, + snippet, + } + }) + .collect(); + ResponseOk( Template(Search { query: query.to_string(), diff --git a/pgml-dashboard/src/utils/markdown.rs b/pgml-dashboard/src/utils/markdown.rs index a64f321e1..5a674e836 100644 --- a/pgml-dashboard/src/utils/markdown.rs +++ b/pgml-dashboard/src/utils/markdown.rs @@ -1295,7 +1295,7 @@ impl SiteSearch { .collect() } - pub async fn search(&self, query: &str, doc_type: Option) -> anyhow::Result> { + pub async fn search(&self, query: &str, doc_type: Option) -> anyhow::Result> { let mut search = serde_json::json!({ "query": { // "full_text_search": { @@ -1327,10 +1327,8 @@ impl SiteSearch { "limit": 10 }); if let Some(doc_type) = doc_type { - search["query"]["filter"] = serde_json::json!({ - "doc_type": { - "$eq": doc_type - } + search["query"]["filter"]["doc_type"] = serde_json::json!({ + "$eq": doc_type }); } let results = self.collection.search_local(search.into(), &self.pipeline).await?; @@ -1340,31 +1338,8 @@ impl SiteSearch { .context("Error getting results from search")? .iter() .map(|r| { - let SearchResultWithoutSnippet { title, contents, path } = - serde_json::from_value(r["document"].clone())?; - let snippet = if let Some(description) = r["document"]["description"].as_str().map(|t| t.to_owned()) { - description - } else { - let title = r["document"]["title"].as_str().unwrap_or("xzxzxz"); - let author = r["document"]["title"].as_str().unwrap_or("xzxzxz"); - // The heuristics used here are ok, not the best it will be better when we can just use the description field - contents - .lines() - .take(100) - .filter(|l| !l.is_empty() && !l.contains(title) && !l.contains(author) && l.len() > 30) - .take(1) - .collect::>() - .join("") - .split(' ') - .take(20) - .collect::>() - .join(" ") - + " ..." - }; - let path = path - .replace(".md", "") - .replace(&config::static_dir().display().to_string(), ""); - Ok(SearchResult { title, path, snippet }) + let document: Document = serde_json::from_value(r["document"].clone())?; + Ok(document) }) .collect() }