diff --git a/README.md b/README.md index 9e6a5c1a2..6d0caf828 100644 --- a/README.md +++ b/README.md @@ -195,69 +195,9 @@ Colocating the compute with the data inside the database removes one of the most -### Installation in WSL or Ubuntu +### Development -Install Python3, pip, and Pl/Python3: - -```bash -sudo apt update -sudo apt install -y postgresql-plpython3-12 python3 python3-pip -``` - -Restart the Postgres server: - -```bash -sudo service postgresql restart -``` - -Create the extension: - -```sql -CREATE EXTENSION plpython3u; -``` - -Install Scikit globally (I didn't bother setup Postgres with a virtualenv, but it's possible): - -``` -sudo pip3 install sklearn -``` - -### Run the example - -```bash -psql -f scikit_train_and_predict.sql -``` - -Example output: - -``` -psql:scikit_train_and_predict.sql:4: NOTICE: drop cascades to view scikit_train_view -DROP TABLE -CREATE TABLE -psql:scikit_train_and_predict.sql:14: NOTICE: view "scikit_train_view" does not exist, skipping -DROP VIEW -CREATE VIEW -INSERT 0 500 -CREATE FUNCTION - scikit_learn_train_example ----------------------------- - OK -(1 row) - -CREATE FUNCTION - value | weight | prediction --------+--------+------------ - 1 | 5 | 5 - 2 | 5 | 5 - 3 | 5 | 5 - 4 | 5 | 5 - 5 | 5 | 5 -(5 rows) -``` - -### Run the linear model - -Install our PgML package globally: +Follow the installation instructions to create a local working Postgres environment, then install your PgML package from the git repository: ``` cd pgml diff --git a/scikit_import.sql b/scikit_import.sql deleted file mode 100644 index 1afcc6e66..000000000 --- a/scikit_import.sql +++ /dev/null @@ -1,10 +0,0 @@ -CREATE EXTENSION IF NOT EXISTS plpython3u; - -CREATE OR REPLACE FUNCTION pgml_version() -RETURNS TEXT -AS $$ - import pgml - return pgml.version() -$$ LANGUAGE plpython3u; - -SELECT pgml_version(); diff --git a/scikit_train_and_predict.sql b/scikit_train_and_predict.sql deleted file mode 100644 index 6f8b5c990..000000000 --- a/scikit_train_and_predict.sql +++ /dev/null @@ -1,71 +0,0 @@ --- --- CREATE EXTENSION --- -CREATE EXTENSION IF NOT EXISTS plpython3u; - --- --- Data table. --- -DROP TABLE IF EXISTS scikit_train_data CASCADE; -CREATE TABLE scikit_train_data ( - id BIGSERIAL PRIMARY KEY, - value BIGINT, - weight DOUBLE PRECISION -); - --- --- View of the data table, just to demonstrate that views work. --- -DROP VIEW IF EXISTS scikit_train_view; -CREATE VIEW scikit_train_view AS SELECT * FROM scikit_train_data; - --- --- Insert some dummy data into the data table. --- -INSERT INTO scikit_train_data (value, weight) SELECT generate_series(1, 500), 5.0; - - -CREATE OR REPLACE FUNCTION scikit_learn_train_example() -RETURNS BYTEA -AS $$ - from sklearn.ensemble import RandomForestClassifier - import pickle - - cursor = plpy.cursor("SELECT value, weight FROM scikit_train_view") - X = [] - y = [] - - while True: - rows = cursor.fetch(5) - if not rows: - break - for row in rows: - X.append([row["value"],]) - y.append(row["weight"]) - rfc = RandomForestClassifier() - rfc.fit(X, y) - - return pickle.dumps(rfc) - -$$ LANGUAGE plpython3u; - -; - -CREATE OR REPLACE FUNCTION scikit_learn_predict_example(model BYTEA, value INT) -RETURNS DOUBLE PRECISION -AS $$ - import pickle - - m = pickle.loads(model) - - r = m.predict([[value,]]) - return r[0] -$$ LANGUAGE plpython3u; - -WITH model as ( - SELECT scikit_learn_train_example() AS pickle -) -SELECT value, - weight, - scikit_learn_predict_example((SELECT model.pickle FROM model), value::int) AS prediction -FROM scikit_train_view LIMIT 5; diff --git a/benchmarks.sql b/sql/benchmarks.sql similarity index 100% rename from benchmarks.sql rename to sql/benchmarks.sql