🌐 AI搜索 & 代理 主页
Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
6c14956
Remote save
SilasMarvin Jul 14, 2023
3b2e3b5
Working remote embeddings
SilasMarvin Jul 17, 2023
f1d6bf7
Compiling
SilasMarvin Jul 19, 2023
22f280e
Commit before moving everything to lazy
SilasMarvin Jul 19, 2023
ec090ca
Working lazy python
SilasMarvin Jul 21, 2023
58c01a3
Commit before moving adjusting Javascript macros
SilasMarvin Jul 21, 2023
9e7b146
Working javascript sdk
SilasMarvin Jul 22, 2023
abb4f5e
Working javascript sdk
SilasMarvin Jul 26, 2023
76ccf3a
The start of working pipelines
SilasMarvin Jul 28, 2023
cfcc66b
Working pipelines in python
SilasMarvin Aug 3, 2023
a9dcbc9
Uncomment
SilasMarvin Aug 3, 2023
8b48750
Added to_dict function
SilasMarvin Aug 3, 2023
6e3f1e6
Small changes and prep for progress bars
SilasMarvin Aug 4, 2023
5365557
Working progress bars and many other small but exciting things
SilasMarvin Aug 4, 2023
66476ff
Prepping to push to test pypi
SilasMarvin Aug 7, 2023
f2613d7
Prepping for javascript
SilasMarvin Aug 8, 2023
92c9623
Improvments to javascript and updates to the python sdk deploy script
SilasMarvin Aug 8, 2023
8a4e3cf
Prepping for real tests
SilasMarvin Aug 8, 2023
12bb3a8
Updated sql
SilasMarvin Aug 9, 2023
2b5b68b
Python examples translated to use pipelines
SilasMarvin Aug 9, 2023
447fc80
Mostly cleaned up and documented crate, and cleaned up python README …
SilasMarvin Aug 10, 2023
333c5e6
Ready for test deployments
SilasMarvin Aug 10, 2023
11bcce2
Updated manual build file for python
SilasMarvin Aug 10, 2023
845bf02
Build fast
SilasMarvin Aug 11, 2023
4904a1a
Small tweaks
SilasMarvin Aug 11, 2023
64dc7e2
Prepping for another test release
SilasMarvin Aug 11, 2023
c3b274c
Prepping to expand query_builder
SilasMarvin Aug 11, 2023
cb143a5
Massive cleanups to macros
SilasMarvin Aug 11, 2023
c66b07b
Massive cleanups to macros
SilasMarvin Aug 11, 2023
b7d4c2d
Ready to release
SilasMarvin Aug 11, 2023
a2c87b1
Formatting
SilasMarvin Aug 13, 2023
dd9c3ab
Renamed files
SilasMarvin Aug 21, 2023
5568608
Added removed file
SilasMarvin Aug 21, 2023
4a2e98d
Removed unnecessary file
SilasMarvin Aug 21, 2023
e673af4
Updated sdk version to 0.9
SilasMarvin Aug 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Massive cleanups to macros
  • Loading branch information
SilasMarvin committed Aug 21, 2023
commit cb143a5229837604c5fc6fc81d9c29b3876655f4
17 changes: 10 additions & 7 deletions pgml-sdks/rust/pgml/javascript/tests/typescript-tests/test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ it("can vector search with local embeddings", async () => {
let model = pgml.newModel();
let splitter = pgml.newSplitter();
let pipeline = pgml.newPipeline("test_j_p_cvswle_0", model, splitter);
let collection = pgml.newCollection("test_j_c_cvswle_2");
let collection = pgml.newCollection("test_j_c_cvswle_3");
await collection.upsert_documents(generate_dummy_documents(3));
await collection.add_pipeline(pipeline);
let results = await collection.vector_search("Here is some query", pipeline);
Expand All @@ -86,33 +86,36 @@ it("can vector search with remote embeddings", async() => {
let model = pgml.newModel("text-embedding-ada-002", "openai");
let splitter = pgml.newSplitter();
let pipeline = pgml.newPipeline("test_j_p_cvswre_0", model, splitter);
let collection = pgml.newCollection("test_j_c_cvswre_0");
let collection = pgml.newCollection("test_j_c_cvswre_1");
await collection.upsert_documents(generate_dummy_documents(3));
await collection.add_pipeline(pipeline);
let results = await collection.vector_search("Here is some query", pipeline);
expect(results).toHaveLength(3);
await collection.archive();
});

it("can vector search with query builder", async() => {
let model = pgml.newModel();
let splitter = pgml.newSplitter();
let pipeline = pgml.newPipeline("test_j_p_cvswqb_0", model, splitter);
let collection = pgml.newCollection("test_j_c_cvswqb_0");
let collection = pgml.newCollection("test_j_c_cvswqb_1");
await collection.upsert_documents(generate_dummy_documents(3));
await collection.add_pipeline(pipeline);
let results = await collection.query().vector_recall("Here is some query", pipeline).limit(10).run();
let results = await collection.query().vector_recall("Here is some query", pipeline).limit(10).fetch_all();
expect(results).toHaveLength(3);
await collection.archive();
});

it("can vector search with query builder with remote embeddings", async() => {
let model = pgml.newModel("text-embedding-ada-002", "openai");
let splitter = pgml.newSplitter();
let pipeline = pgml.newPipeline("test_j_p_cvswqbwre_0", model, splitter);
let collection = pgml.newCollection("test_j_c_cvswqbwre_0");
let collection = pgml.newCollection("test_j_c_cvswqbwre_1");
await collection.upsert_documents(generate_dummy_documents(3));
await collection.add_pipeline(pipeline);
let results = await collection.query().vector_recall("Here is some query", pipeline).limit(10).run();
let results = await collection.query().vector_recall("Here is some query", pipeline).limit(10).fetch_all();
expect(results).toHaveLength(3);
await collection.archive();
});


Expand All @@ -124,7 +127,7 @@ it("pipeline to dict", async () => {
let model = pgml.newModel("text-embedding-ada-002", "openai");
let splitter = pgml.newSplitter();
let pipeline = pgml.newPipeline("test_j_p_ptd_0", model, splitter);
let collection = pgml.newCollection("test_j_c_ptd_0");
let collection = pgml.newCollection("test_j_c_ptd_1");
await collection.add_pipeline(pipeline);
let pipeline_dict = await pipeline.to_dict();
console.log(JSON.stringify(pipeline_dict))
Expand Down
26 changes: 7 additions & 19 deletions pgml-sdks/rust/pgml/python/pgml/pgml.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,12 @@ DateTime = int
from typing import List, Dict, Optional, Self, Any


# Top of file key: A12BECOD!
from typing import List, Dict, Optional, Self, Any


class Builtins:
def __init__(self, database_url: Optional[str] = "Default set in Rust. Please check the documentation.") -> Self
...
def query(self, query: str) -> QueryRunner
...
async def transform(self, task: Json, inputs: List[str], args: Optional[Json] = {}) -> Json
...

class Builtins:
def __init__(self, database_url: Optional[str] = "Default set in Rust. Please check the documentation.") -> Self
...
def query(self, query: str) -> QueryRunner
...
async def transform(self, task: Json, inputs: List[str], args: Optional[Json] = {}) -> Json
async def transform(self, task: Json, inputs: List[str], args: Optional[Json] = Any) -> Json
...

class Collection:
Expand All @@ -43,7 +31,7 @@ class Collection:
...
async def get_documents(self, last_id: Optional[int] = 1, limit: Optional[int] = 1) -> List[Json]
...
async def vector_search(self, query: str, pipeline: Pipeline, query_parameters: Optional[Json] = {}, top_k: Optional[int] = 1) -> List[tuple[float, str, Json]]
async def vector_search(self, query: str, pipeline: Pipeline, query_parameters: Optional[Json] = Any, top_k: Optional[int] = 1) -> List[tuple[float, str, Json]]
...
async def archive(self) -> None
...
Expand All @@ -57,13 +45,13 @@ class Collection:
...

class Model:
def __init__(self, name: Optional[str] = "Default set in Rust. Please check the documentation.", source: Optional[str] = "Default set in Rust. Please check the documentation.", parameters: Optional[Json] = {}) -> Self
def __init__(self, name: Optional[str] = "Default set in Rust. Please check the documentation.", source: Optional[str] = "Default set in Rust. Please check the documentation.", parameters: Optional[Json] = Any) -> Self
...

class Pipeline:
def __init__(self, name: str, model: Optional[Model] = None, splitter: Optional[Splitter] = None, parameters: Optional[Json] = {}) -> Self
def __init__(self, name: str, model: Optional[Model] = Any, splitter: Optional[Splitter] = Any, parameters: Optional[Json] = Any) -> Self
...
async def get_status(self) -> Any
async def get_status(self) -> PipelineSyncData
...
async def to_dict(self) -> Json
...
Expand All @@ -73,7 +61,7 @@ class QueryBuilder:
...
def filter(self, filter: Json) -> Self
...
def vector_recall(self, query: str, pipeline: Pipeline, query_parameters: Optional[Json] = {}) -> Self
def vector_recall(self, query: str, pipeline: Pipeline, query_parameters: Optional[Json] = Any) -> Self
...
async def fetch_all(self) -> List[tuple[float, str, Json]]
...
Expand All @@ -97,5 +85,5 @@ class QueryRunner:
...

class Splitter:
def __init__(self, name: Optional[str] = "Default set in Rust. Please check the documentation.", parameters: Optional[Json] = {}) -> Self
def __init__(self, name: Optional[str] = "Default set in Rust. Please check the documentation.", parameters: Optional[Json] = Any) -> Self
...
2 changes: 1 addition & 1 deletion pgml-sdks/rust/pgml/src/builtins.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::{get_or_initialize_pool, query_runner::QueryRunner, types::Json};
use crate::languages::javascript::*;

#[cfg(feature = "python")]
use crate::{languages::python::*, query_runner::QueryRunnerPython};
use crate::{languages::python::*, query_runner::QueryRunnerPython, types::JsonPython};

#[custom_methods(new, query, transform)]
impl Builtins {
Expand Down
5 changes: 4 additions & 1 deletion pgml-sdks/rust/pgml/src/collection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ use crate::utils;
use crate::languages::javascript::*;

#[cfg(feature = "python")]
use crate::{languages::python::*, pipeline::PipelinePython, query_builder::QueryBuilderPython};
use crate::{
types::JsonPython, languages::python::*, pipeline::PipelinePython,
query_builder::QueryBuilderPython,
};

/// Our project tasks
#[derive(Debug, Clone)]
Expand Down
39 changes: 20 additions & 19 deletions pgml-sdks/rust/pgml/src/languages/python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,12 @@ use pyo3::types::{PyDict, PyFloat, PyInt, PyList, PyString};
use pyo3::{prelude::*, types::PyBool};
use std::collections::HashMap;

use crate::{
pipeline::PipelineSyncData,
types::{DateTime, Json},
};
use crate::{pipeline::PipelineSyncData, types::Json};

////////////////////////////////////////////////////////////////////////////////
// Rust to PY //////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////

impl ToPyObject for DateTime {
fn to_object(&self, py: Python) -> PyObject {
self.0.timestamp().to_object(py)
}
}

impl IntoPy<PyObject> for DateTime {
fn into_py(self, py: Python) -> PyObject {
self.to_object(py)
}
}

impl ToPyObject for Json {
fn to_object(&self, py: Python) -> PyObject {
match &self.0 {
Expand Down Expand Up @@ -65,6 +50,18 @@ impl IntoPy<PyObject> for Json {
}
}

impl ToPyObject for PipelineSyncData {
fn to_object(&self, py: Python) -> PyObject {
Json::from(self.clone()).to_object(py)
}
}

impl IntoPy<PyObject> for PipelineSyncData {
fn into_py(self, py: Python) -> PyObject {
self.to_object(py)
}
}

////////////////////////////////////////////////////////////////////////////////
// PY to Rust //////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -107,6 +104,13 @@ impl FromPyObject<'_> for Json {
}
}

impl FromPyObject<'_> for PipelineSyncData {
fn extract(ob: &PyAny) -> PyResult<Self> {
let json = Json::extract(ob)?;
Ok(json.into())
}
}

////////////////////////////////////////////////////////////////////////////////
// Rust to Rust //////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -196,9 +200,6 @@ gen_custom_into!(());

gen_custom_into!(bool);

gen_custom_into!(Json);
gen_custom_into!(DateTime);

gen_custom_into!(i8);
gen_custom_into!(i16);
gen_custom_into!(i32);
Expand Down
2 changes: 1 addition & 1 deletion pgml-sdks/rust/pgml/src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::{
use crate::languages::javascript::*;

#[cfg(feature = "python")]
use crate::languages::python::*;
use crate::{languages::python::*, types::JsonPython};

/// A few notes on the following enums:
/// - Sqlx does provide type derivation for enums, but it's not very good
Expand Down
31 changes: 27 additions & 4 deletions pgml-sdks/rust/pgml/src/pipeline.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use anyhow::Context;
use indicatif::MultiProgress;
use pgml_macros::{custom_derive, custom_methods};
use pgml_macros::{custom_derive, custom_methods, pgml_alias};
use sqlx::{Executor, PgConnection, PgPool};
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering::Relaxed;
Expand All @@ -22,7 +22,9 @@ use crate::{
use crate::languages::javascript::*;

#[cfg(feature = "python")]
use crate::{languages::python::*, model::ModelPython, splitter::SplitterPython};
use crate::{
languages::python::*, model::ModelPython, splitter::SplitterPython, types::JsonPython,
};

#[derive(Debug, Clone)]
pub struct InvividualSyncStatus {
Expand All @@ -42,7 +44,17 @@ impl From<InvividualSyncStatus> for Json {
}
}

#[derive(Debug, Clone)]
impl From<Json> for InvividualSyncStatus {
fn from(value: Json) -> Self {
Self {
synced: value["synced"].as_i64().expect("The synced field is not an integer"),
not_synced: value["not_synced"].as_i64().expect("The not_synced field is not an integer"),
total: value["total"].as_i64().expect("The total field is not an integer"),
}
}
}

#[derive(pgml_alias, Debug, Clone)]
pub struct PipelineSyncData {
pub chunks_status: InvividualSyncStatus,
pub embeddings_status: InvividualSyncStatus,
Expand All @@ -60,6 +72,16 @@ impl From<PipelineSyncData> for Json {
}
}

impl From<Json> for PipelineSyncData {
fn from(mut value: Json) -> Self {
Self {
chunks_status: Json::from(std::mem::take(&mut value["chunks_status"])).into(),
embeddings_status: Json::from(std::mem::take(&mut value["embeddings_status"])).into(),
tsvectors_status: Json::from(std::mem::take(&mut value["tsvectors_status"])).into(),
}
}
}

#[derive(Debug, Clone)]
pub struct PipelineDatabaseData {
pub id: i64,
Expand Down Expand Up @@ -168,7 +190,8 @@ impl Pipeline {
format!("{}.chunks", project_name)
))
.bind(database_data.splitter_id)
.fetch_one(&pool).await?;
.fetch_one(&pool)
.await?;
let embeddings_status = InvividualSyncStatus {
synced: embeddings_status.0.unwrap_or(0),
not_synced: embeddings_status.1.unwrap_or(0) - embeddings_status.0.unwrap_or(0),
Expand Down
8 changes: 2 additions & 6 deletions pgml-sdks/rust/pgml/src/query_builder/query_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use crate::{
use crate::languages::javascript::*;

#[cfg(feature = "python")]
use crate::{languages::python::*, pipeline::PipelinePython};
use crate::{languages::python::*, pipeline::PipelinePython, types::JsonPython};

#[derive(Clone)]
enum SIden<'a> {
Expand Down Expand Up @@ -53,9 +53,7 @@ impl IntoTableNameAndSchema for String {
}

#[derive(Clone, Debug)]
struct QueryBuilderState {

}
struct QueryBuilderState {}

#[derive(custom_derive, Clone, Debug)]
pub struct QueryBuilder {
Expand Down Expand Up @@ -265,8 +263,6 @@ impl QueryBuilder {
Err(e) => match e.as_database_error() {
Some(d) => {
if d.code() == Some(Cow::from("XX000")) {
println!("WHAT: {:?}", d);

// Explicitly get and set the model
let project_info = self.collection.get_project_info().await?;
let pipeline = self
Expand Down
2 changes: 1 addition & 1 deletion pgml-sdks/rust/pgml/src/query_runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::{get_or_initialize_pool, types::Json};
use crate::languages::javascript::*;

#[cfg(feature = "python")]
use crate::languages::python::*;
use crate::{languages::python::*, types::JsonPython};

#[derive(Clone, Debug)]
enum BindValue {
Expand Down
2 changes: 1 addition & 1 deletion pgml-sdks/rust/pgml/src/splitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::{
use crate::languages::javascript::*;

#[cfg(feature = "python")]
use crate::languages::python::*;
use crate::{languages::python::*, types::JsonPython};

#[derive(Debug, Clone)]
pub(crate) struct SplitterDatabaseData {
Expand Down
6 changes: 5 additions & 1 deletion pgml-sdks/rust/pgml/src/types.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
use pgml_macros::pgml_alias;
use serde::Serialize;
use std::ops::{Deref, DerefMut};

#[cfg(feature = "python")]
use crate::languages::python::*;

/// A wrapper around serde_json::Value
// #[derive(sqlx::Type, sqlx::FromRow, Debug)]
#[derive(sqlx::Type, Debug, Clone)]
#[derive(pgml_alias, sqlx::Type, Debug, Clone)]
#[sqlx(transparent)]
pub struct Json(pub serde_json::Value);

Expand Down