11use crate :: api:: cms:: { DocType , Document } ;
22use crate :: { templates:: docs:: TocLink , utils:: config} ;
33use anyhow:: Context ;
4+ use comrak:: { format_html_with_plugins, parse_document, ComrakPlugins } ;
45use std:: cell:: RefCell ;
56use std:: collections:: HashMap ;
67use std:: path:: PathBuf ;
@@ -21,6 +22,9 @@ use std::fmt;
2122use std:: sync:: Mutex ;
2223use url:: Url ;
2324
25+ // Excluded paths in the pgml-cms directory
26+ const EXCLUDED_DOCUMENT_PATHS : [ & str ; 1 ] = [ "blog/README.md" ] ;
27+
2428pub struct MarkdownHeadings {
2529 header_map : Arc < Mutex < HashMap < String , usize > > > ,
2630}
@@ -1291,7 +1295,7 @@ impl SiteSearch {
12911295 . collect ( )
12921296 }
12931297
1294- pub async fn search ( & self , query : & str , doc_type : Option < DocType > ) -> anyhow:: Result < Vec < SearchResult > > {
1298+ pub async fn search ( & self , query : & str , doc_type : Option < DocType > ) -> anyhow:: Result < Vec < Document > > {
12951299 let mut search = serde_json:: json!( {
12961300 "query" : {
12971301 // "full_text_search": {
@@ -1323,29 +1327,19 @@ impl SiteSearch {
13231327 "limit" : 10
13241328 } ) ;
13251329 if let Some ( doc_type) = doc_type {
1326- search[ "query" ] [ "filter" ] = serde_json:: json!( {
1327- "doc_type" : {
1328- "$eq" : doc_type
1329- }
1330+ search[ "query" ] [ "filter" ] [ "doc_type" ] = serde_json:: json!( {
1331+ "$eq" : doc_type
13301332 } ) ;
13311333 }
13321334 let results = self . collection . search_local ( search. into ( ) , & self . pipeline ) . await ?;
13331335
13341336 results[ "results" ]
13351337 . as_array ( )
13361338 . context ( "Error getting results from search" ) ?
1337- . into_iter ( )
1339+ . iter ( )
13381340 . map ( |r| {
1339- let SearchResultWithoutSnippet { title, contents, path } =
1340- serde_json:: from_value ( r[ "document" ] . clone ( ) ) ?;
1341- let path = path
1342- . replace ( ".md" , "" )
1343- . replace ( & config:: static_dir ( ) . display ( ) . to_string ( ) , "" ) ;
1344- Ok ( SearchResult {
1345- title,
1346- path,
1347- snippet : contents. split ( ' ' ) . take ( 20 ) . collect :: < Vec < & str > > ( ) . join ( " " ) + " ..." ,
1348- } )
1341+ let document: Document = serde_json:: from_value ( r[ "document" ] . clone ( ) ) ?;
1342+ Ok ( document)
13491343 } )
13501344 . collect ( )
13511345 }
@@ -1358,6 +1352,24 @@ impl SiteSearch {
13581352 . map ( |path| async move { Document :: from_path ( & path) . await } ) ,
13591353 )
13601354 . await ?;
1355+ // Filter out documents who only have 1 line (this is usually just an empty document with the title as the first line)
1356+ // and documents that are in our excluded paths list
1357+ let documents: Vec < Document > = documents
1358+ . into_iter ( )
1359+ . filter ( |f| {
1360+ !EXCLUDED_DOCUMENT_PATHS
1361+ . iter ( )
1362+ . any ( |p| f. path == config:: cms_dir ( ) . join ( p) )
1363+ && !f
1364+ . contents
1365+ . lines ( )
1366+ . skip ( 1 )
1367+ . collect :: < Vec < & str > > ( )
1368+ . join ( "" )
1369+ . trim ( )
1370+ . is_empty ( )
1371+ } )
1372+ . collect ( ) ;
13611373 let documents: Vec < pgml:: types:: Json > = documents
13621374 . into_iter ( )
13631375 . map ( |d| {
0 commit comments