diff --git a/src/main/java/de/unknownreality/dataframe/DataFrameColumn.java b/src/main/java/de/unknownreality/dataframe/DataFrameColumn.java index 08b7fb1..9b6a850 100644 --- a/src/main/java/de/unknownreality/dataframe/DataFrameColumn.java +++ b/src/main/java/de/unknownreality/dataframe/DataFrameColumn.java @@ -25,6 +25,7 @@ package de.unknownreality.dataframe; import de.unknownreality.dataframe.common.Row; +import de.unknownreality.dataframe.settings.ColumnSettings; import de.unknownreality.dataframe.transform.ColumnDataFrameTransform; import de.unknownreality.dataframe.transform.ColumnTransform; import de.unknownreality.dataframe.type.ValueType; @@ -44,7 +45,15 @@ public abstract class DataFrameColumn> implem private String name; private DefaultDataFrame dataFrame; private boolean dataFrameAppend = false; + private final ColumnSettings settings = new ColumnSettings(); + protected DataFrameColumn(String name) { + this.name = name; + } + + public ColumnSettings getSettings() { + return settings; + } /** * Used to return the right column type for @@ -53,6 +62,19 @@ public abstract class DataFrameColumn> implem */ protected abstract C getThis(); + /** + * Returns the index for this column. + * Return null the column is not added to a dataframe + * + * @return index of column + */ + public Integer getColumnIndex() { + if (dataFrame == null) { + return null; + } + return dataFrame.getHeader().getIndex(getName()); + } + /** * Sets the capacity of this column. * Can be used during dataframe creation if the size is known. diff --git a/src/main/java/de/unknownreality/dataframe/DataFrameConverter.java b/src/main/java/de/unknownreality/dataframe/DataFrameConverter.java index 0ee9275..8a21f16 100644 --- a/src/main/java/de/unknownreality/dataframe/DataFrameConverter.java +++ b/src/main/java/de/unknownreality/dataframe/DataFrameConverter.java @@ -31,6 +31,7 @@ import de.unknownreality.dataframe.filter.FilterPredicate; import de.unknownreality.dataframe.io.ColumnInformation; import de.unknownreality.dataframe.io.DataIterator; +import de.unknownreality.dataframe.settings.DataFrameSettings; import de.unknownreality.dataframe.type.DataFrameTypeManager; import de.unknownreality.dataframe.type.ValueType; import org.slf4j.Logger; @@ -71,8 +72,31 @@ private DataFrameConverter() { * @param filterPredicate row filter * @return created data frame */ - public static > DataFrame fromDataIterator(DataIterator dataIterator, FilterPredicate filterPredicate) { - return fromDataIterator(dataIterator, null, filterPredicate); + public static > DataFrame fromDataIterator( + DataIterator dataIterator, + FilterPredicate filterPredicate + ) { + return fromDataIterator(dataIterator, null, new DataFrameSettings(), filterPredicate); + } + + /** + * Converts a parent data container to a data frame. + * The required column information is provided by a column information object. + * Column information specified by the dataIterator is used. + * Only rows validated by the filter are appended to the resulting data frame + * + * @param row type + * @param dataIterator parent data container + * @param dataFrameSettings column settings + * @param filterPredicate row filter + * @return created data frame + */ + public static > DataFrame fromDataIterator( + DataIterator dataIterator, + DataFrameSettings dataFrameSettings, + FilterPredicate filterPredicate + ) { + return fromDataIterator(dataIterator, null, dataFrameSettings, filterPredicate); } /** @@ -87,8 +111,40 @@ private DataFrameConverter() { * @param filterPredicate row filter * @return created data frame */ - public static > DataFrame fromDataIterator(DataIterator dataIterator, List columnsInformation, FilterPredicate filterPredicate) { - return fromDataIterator(dataIterator, -1, columnsInformation, filterPredicate); + public static > DataFrame fromDataIterator( + DataIterator dataIterator, + List columnsInformation, + FilterPredicate filterPredicate + ) { + return fromDataIterator( + dataIterator, + -1, + columnsInformation, + new DataFrameSettings(), + filterPredicate + ); + } + + /** + * Converts a parent data container to a data frame. + * The required column information is provided by a column information object. + * If no column information is defined, the one specified by the dataIterator is used. + * Only rows validated by the filter are appended to the resulting data frame + * + * @param row type + * @param dataIterator parent data container + * @param columnsInformation column information + * @param dataFrameSettings column settings + * @param filterPredicate row filter + * @return created data frame + */ + public static > DataFrame fromDataIterator( + DataIterator dataIterator, + List columnsInformation, + DataFrameSettings dataFrameSettings, + FilterPredicate filterPredicate + ) { + return fromDataIterator(dataIterator, -1, columnsInformation, dataFrameSettings, filterPredicate); } /** @@ -105,7 +161,37 @@ private DataFrameConverter() { * @return created data frame */ @SuppressWarnings("unchecked") - public static > DataFrame fromDataIterator(DataIterator dataIterator, int expectedSize, List columnsInformation, FilterPredicate filterPredicate) { + public static > DataFrame fromDataIterator( + DataIterator dataIterator, + int expectedSize, + List columnsInformation, + FilterPredicate filterPredicate + ) { + return fromDataIterator(dataIterator, expectedSize, columnsInformation, new DataFrameSettings(), filterPredicate); + } + + /** + * Converts a parent data container to a data frame. + * The required column information is provided by a column information object. + * If no column information is defined, the one specified by the dataIterator is used. + * Only rows validated by the filter are appended to the resulting data frame + * + * @param row type + * @param dataIterator parent data container + * @param expectedSize expected size of the resulting dataframe + * @param columnsInformation column information + * @param dataFrameSettings column settings + * @param filterPredicate row filter + * @return created data frame + */ + @SuppressWarnings("unchecked") + public static > DataFrame fromDataIterator( + DataIterator dataIterator, + int expectedSize, + List columnsInformation, + DataFrameSettings dataFrameSettings, + FilterPredicate filterPredicate + ) { if (columnsInformation == null) { columnsInformation = new ArrayList<>(dataIterator.getColumnsInformation()); @@ -129,6 +215,7 @@ private DataFrameConverter() { } col.setName(columnInformation.getName()); dataFrame.addColumn(col); + dataFrameSettings.applyToColumn(col); columns[i] = col; autodetect[i] = columnInformation.isAutodetect() && columnInformation.getColumnType().equals(StringColumn.class); @@ -173,7 +260,7 @@ private DataFrameConverter() { r++; } if (hasAutodetect) { - replaceAutodetectColumns(dataFrame, valueTypes, autodetect, types); + replaceAutodetectColumns(dataFrame, valueTypes, autodetect, types, dataFrameSettings); if (filterPredicate != null && filterPredicate != FilterPredicate.EMPTY_FILTER) { dataFrame.filter(filterPredicate); } @@ -207,8 +294,13 @@ private static boolean doSample(int row) { return row % 10000000 == 0; } - private static void replaceAutodetectColumns(DataFrame dataFrame, ValueType[] valueTypes, - boolean[] autodetect, boolean[][] types) { + private static void replaceAutodetectColumns( + DataFrame dataFrame, + ValueType[] valueTypes, + boolean[] autodetect, + boolean[][] types, + DataFrameSettings dataFrameSettings + ) { DataFrameColumn[] newColumns = new DataFrameColumn[autodetect.length]; List columnNames = new ArrayList<>(dataFrame.getColumnNames()); for (int i = 0; i < autodetect.length; i++) { @@ -227,6 +319,9 @@ private static void replaceAutodetectColumns(DataFrame dataFrame, ValueType[] newColumn.setName(columnNames.get(i)); newColumn.setCapacity(dataFrame.size()); newColumns[i] = newColumn; + DataFrameColumn oldColumn = dataFrame.getColumn(columnNames.get(i)); + oldColumn.getSettings().applyTo(newColumn.getSettings()); + dataFrameSettings.applyToColumn(newColumn); } } String currentVal; @@ -269,12 +364,16 @@ private static void replaceAutodetectColumns(DataFrame dataFrame, ValueType[] * Keys in this map are name of the column in the parent data container. * Values are the corresponding data frame columns. * - * @param row type - * @param dataIterator parent data container + * @param row type + * @param dataIterator parent data container + * @param dataFrameSettings column settings * @return created data frame */ - public static > DataFrame fromDataIterator(DataIterator dataIterator) { - return fromDataIterator(dataIterator, FilterPredicate.EMPTY_FILTER); + public static > DataFrame fromDataIterator( + DataIterator dataIterator, + DataFrameSettings dataFrameSettings + ) { + return fromDataIterator(dataIterator, dataFrameSettings, FilterPredicate.EMPTY_FILTER); } private static ValueType[] getValueTypes() { diff --git a/src/main/java/de/unknownreality/dataframe/DataFrameLoader.java b/src/main/java/de/unknownreality/dataframe/DataFrameLoader.java index bde5954..ffce9e6 100644 --- a/src/main/java/de/unknownreality/dataframe/DataFrameLoader.java +++ b/src/main/java/de/unknownreality/dataframe/DataFrameLoader.java @@ -523,7 +523,7 @@ public static DataFrame load(Reader r, DataReader reader) { * @return resulting dataframe */ public static DataFrame load(DataIterator dataIterator) { - return DataFrameConverter.fromDataIterator(dataIterator, FilterPredicate.EMPTY_FILTER); + return load(dataIterator, FilterPredicate.EMPTY_FILTER); } /** @@ -537,7 +537,6 @@ public static DataFrame load(DataIterator dataIterator, FilterPredicate predi return DataFrameConverter.fromDataIterator(dataIterator, predicate); } - /** * Loads a data frame from a file. * The matching data frame meta file must be present. @@ -666,7 +665,6 @@ public static DataFrame loadResource(String path, String metaPath, ClassLoader c return reader; } - /** * Loads a data frame from a resource and the corresponding meta resource. * @@ -678,6 +676,4 @@ public static DataFrame loadResource(String path, String metaPath, ClassLoader c public static DataFrame loadResource(String path, String metaPath, ClassLoader classLoader) { return loadResource(path, metaPath, classLoader, FilterPredicate.EMPTY_FILTER); } - - } diff --git a/src/main/java/de/unknownreality/dataframe/column/BasicColumn.java b/src/main/java/de/unknownreality/dataframe/column/BasicColumn.java index 8a98bb4..a47591b 100644 --- a/src/main/java/de/unknownreality/dataframe/column/BasicColumn.java +++ b/src/main/java/de/unknownreality/dataframe/column/BasicColumn.java @@ -46,8 +46,8 @@ public abstract class BasicColumn> extends DataFr @SuppressWarnings("unchecked") public BasicColumn(String name, Class cl) { + super(name); this.size = 0; - setName(name); values = (T[]) Array.newInstance(cl, INIT_SIZE); } @@ -56,8 +56,8 @@ public BasicColumn(Class cl) { } public BasicColumn(String name, T[] values, int size) { + super(name); this.values = values; - setName(name); this.size = size; } diff --git a/src/main/java/de/unknownreality/dataframe/column/BooleanColumn.java b/src/main/java/de/unknownreality/dataframe/column/BooleanColumn.java index 69020be..1dadb7d 100644 --- a/src/main/java/de/unknownreality/dataframe/column/BooleanColumn.java +++ b/src/main/java/de/unknownreality/dataframe/column/BooleanColumn.java @@ -33,7 +33,7 @@ */ public class BooleanColumn extends BasicColumn { - private final BooleanType valueType = new BooleanType(); + private final BooleanType valueType = new BooleanType(getSettings()); public BooleanColumn() { super(Boolean.class); diff --git a/src/main/java/de/unknownreality/dataframe/column/ByteColumn.java b/src/main/java/de/unknownreality/dataframe/column/ByteColumn.java index 4099208..1c38d37 100644 --- a/src/main/java/de/unknownreality/dataframe/column/ByteColumn.java +++ b/src/main/java/de/unknownreality/dataframe/column/ByteColumn.java @@ -33,7 +33,7 @@ */ public class ByteColumn extends NumberColumn { - private static final ByteType valueType = new ByteType(); + private final ByteType valueType = new ByteType(getSettings()); @Override public ValueType getValueType() { diff --git a/src/main/java/de/unknownreality/dataframe/column/CharacterColumn.java b/src/main/java/de/unknownreality/dataframe/column/CharacterColumn.java index 4ad5eeb..3a86935 100644 --- a/src/main/java/de/unknownreality/dataframe/column/CharacterColumn.java +++ b/src/main/java/de/unknownreality/dataframe/column/CharacterColumn.java @@ -32,7 +32,7 @@ */ public class CharacterColumn extends BasicColumn { - private final static CharacterType valueType = new CharacterType(); + private final CharacterType valueType = new CharacterType(getSettings()); public CharacterColumn() { super(Character.class); diff --git a/src/main/java/de/unknownreality/dataframe/column/DoubleColumn.java b/src/main/java/de/unknownreality/dataframe/column/DoubleColumn.java index b4aebe6..52e88e1 100644 --- a/src/main/java/de/unknownreality/dataframe/column/DoubleColumn.java +++ b/src/main/java/de/unknownreality/dataframe/column/DoubleColumn.java @@ -32,8 +32,7 @@ */ public class DoubleColumn extends NumberColumn { - private final DoubleType valueType = new DoubleType(); - + private final DoubleType valueType = new DoubleType(getSettings()); @Override public DoubleType getValueType() { diff --git a/src/main/java/de/unknownreality/dataframe/column/FloatColumn.java b/src/main/java/de/unknownreality/dataframe/column/FloatColumn.java index a65333f..70126b7 100644 --- a/src/main/java/de/unknownreality/dataframe/column/FloatColumn.java +++ b/src/main/java/de/unknownreality/dataframe/column/FloatColumn.java @@ -32,7 +32,7 @@ */ public class FloatColumn extends NumberColumn { - private final FloatType valueType = new FloatType(); + private final FloatType valueType = new FloatType(getSettings()); @Override public FloatType getValueType() { diff --git a/src/main/java/de/unknownreality/dataframe/column/IntegerColumn.java b/src/main/java/de/unknownreality/dataframe/column/IntegerColumn.java index 8b873cc..becef05 100644 --- a/src/main/java/de/unknownreality/dataframe/column/IntegerColumn.java +++ b/src/main/java/de/unknownreality/dataframe/column/IntegerColumn.java @@ -32,7 +32,7 @@ */ public class IntegerColumn extends NumberColumn { - private final IntegerType valueType = new IntegerType(); + private final IntegerType valueType = new IntegerType(getSettings()); public IntegerColumn() { super(Integer.class); diff --git a/src/main/java/de/unknownreality/dataframe/column/LongColumn.java b/src/main/java/de/unknownreality/dataframe/column/LongColumn.java index 62c79d0..b89df80 100644 --- a/src/main/java/de/unknownreality/dataframe/column/LongColumn.java +++ b/src/main/java/de/unknownreality/dataframe/column/LongColumn.java @@ -32,7 +32,7 @@ */ public class LongColumn extends NumberColumn { - private final LongType valueType = new LongType(); + private final LongType valueType = new LongType(getSettings()); public LongColumn() { super(Long.class); diff --git a/src/main/java/de/unknownreality/dataframe/column/ShortColumn.java b/src/main/java/de/unknownreality/dataframe/column/ShortColumn.java index 6b83e7a..4730207 100644 --- a/src/main/java/de/unknownreality/dataframe/column/ShortColumn.java +++ b/src/main/java/de/unknownreality/dataframe/column/ShortColumn.java @@ -32,7 +32,7 @@ */ public class ShortColumn extends NumberColumn { - private final ShortType valueType = new ShortType(); + private final ShortType valueType = new ShortType(getSettings()); public ShortColumn() { super(Short.class); diff --git a/src/main/java/de/unknownreality/dataframe/column/StringColumn.java b/src/main/java/de/unknownreality/dataframe/column/StringColumn.java index 36ce122..52de61a 100644 --- a/src/main/java/de/unknownreality/dataframe/column/StringColumn.java +++ b/src/main/java/de/unknownreality/dataframe/column/StringColumn.java @@ -32,8 +32,7 @@ * Created by Alex on 09.03.2016. */ public class StringColumn extends BasicColumn { - - private final static StringType valueType = new StringType(); + private final StringType valueType = new StringType(getSettings()); public StringColumn() { super(String.class); @@ -51,7 +50,6 @@ public StringColumn(String name, String[] values, int size) { super(name, values, size); } - @Override public StringType getValueType() { return valueType; diff --git a/src/main/java/de/unknownreality/dataframe/common/row/StringRow.java b/src/main/java/de/unknownreality/dataframe/common/row/StringRow.java index 65a5575..53a5745 100644 --- a/src/main/java/de/unknownreality/dataframe/common/row/StringRow.java +++ b/src/main/java/de/unknownreality/dataframe/common/row/StringRow.java @@ -27,6 +27,7 @@ import de.unknownreality.dataframe.DataFrameRuntimeException; import de.unknownreality.dataframe.common.Row; import de.unknownreality.dataframe.common.header.Header; +import de.unknownreality.dataframe.settings.ColumnSettings; import de.unknownreality.dataframe.type.DataFrameTypeManager; import de.unknownreality.dataframe.type.ValueType; import de.unknownreality.dataframe.type.ValueTypeNotFoundException; @@ -44,7 +45,7 @@ public class StringRow> implements Row, Iterable { private static final Logger log = LoggerFactory.getLogger(StringRow.class); - private final static StringType STRING_VALUE_TYPE = new StringType(); + private final static StringType STRING_VALUE_TYPE = new StringType(new ColumnSettings()); private static final ValueType BOOLEAN_VALUE_READER = DataFrameTypeManager.get().findValueTypeOrThrow(Boolean.class); private static final ValueType DOUBLE_VALUE_READER = DataFrameTypeManager.get().findValueTypeOrThrow(Double.class); @@ -262,7 +263,7 @@ public C getOrNull(int index, Class cl) { * @param resulting type * @return converted value */ - protected C getValueAs(String value, Class cl) { + protected C getValueAs(String value, Class cl) { try { return DataFrameTypeManager.get().parse(cl, value); } catch (ParseException | ValueTypeNotFoundException e) { @@ -314,8 +315,8 @@ public boolean hasNext() { @Override public String next() { - if(index >= values.length){ - throw new NoSuchElementException(String.format("element not found: index out of bounds %s >= %s]",index,values.length)); + if (index >= values.length) { + throw new NoSuchElementException(String.format("element not found: index out of bounds %s >= %s]", index, values.length)); } return values[index++]; } diff --git a/src/main/java/de/unknownreality/dataframe/csv/ColumnSettings.java b/src/main/java/de/unknownreality/dataframe/csv/CSVColumnSettings.java similarity index 98% rename from src/main/java/de/unknownreality/dataframe/csv/ColumnSettings.java rename to src/main/java/de/unknownreality/dataframe/csv/CSVColumnSettings.java index 4e91549..8a9f9ee 100644 --- a/src/main/java/de/unknownreality/dataframe/csv/ColumnSettings.java +++ b/src/main/java/de/unknownreality/dataframe/csv/CSVColumnSettings.java @@ -32,7 +32,7 @@ /** * Created by Alex on 17.06.2017. */ -public class ColumnSettings { +public class CSVColumnSettings { private final List ignoreColumns = new ArrayList<>(); private final List selectColumns = new ArrayList<>(); private final Map> columnTypeMap = new HashMap<>(); diff --git a/src/main/java/de/unknownreality/dataframe/csv/CSVIterator.java b/src/main/java/de/unknownreality/dataframe/csv/CSVIterator.java index 281a1e7..2868260 100644 --- a/src/main/java/de/unknownreality/dataframe/csv/CSVIterator.java +++ b/src/main/java/de/unknownreality/dataframe/csv/CSVIterator.java @@ -44,7 +44,7 @@ public class CSVIterator extends BufferedStreamIterator implements DataI private int lineNumber = 0; private final CSVSettings csvSettings; - private final ColumnSettings columnSettings; + private final CSVColumnSettings csvColumnSettings; private final CSVHeader header = new CSVHeader(); private int cols = -1; private final Set ignoredColumns; @@ -56,17 +56,17 @@ public class CSVIterator extends BufferedStreamIterator implements DataI private boolean[] skipIndices; private final StringSplitter stringSplitter = new StringSplitter(); - public CSVIterator(BufferedReader reader, CSVSettings csvSettings, ColumnSettings columnSettings) { + public CSVIterator(BufferedReader reader, CSVSettings csvSettings, CSVColumnSettings csvColumnSettings) { super(reader); this.csvSettings = csvSettings; - this.columnSettings = columnSettings; - ignoredColumns = new HashSet<>(columnSettings.getIgnoreColumns()); - includedColumns = new HashSet<>(columnSettings.getSelectColumns()); - colTypes = new HashMap<>(columnSettings.getColumnTypeMap()); + this.csvColumnSettings = csvColumnSettings; + ignoredColumns = new HashSet<>(csvColumnSettings.getIgnoreColumns()); + includedColumns = new HashSet<>(csvColumnSettings.getSelectColumns()); + colTypes = new HashMap<>(csvColumnSettings.getColumnTypeMap()); this.stringSplitter.setDetectQuotes(csvSettings.isQuoteDetection()); this.stringSplitter.setDetectSingleQuotes(csvSettings.isSingleQuoteDetection()); int j = 0; - for (String col : columnSettings.getSelectColumns()) { + for (String col : csvColumnSettings.getSelectColumns()) { selectedColumnsIndex.put(col, j++); } //loadNext(); diff --git a/src/main/java/de/unknownreality/dataframe/csv/CSVReader.java b/src/main/java/de/unknownreality/dataframe/csv/CSVReader.java index 6a09eee..6fddfac 100644 --- a/src/main/java/de/unknownreality/dataframe/csv/CSVReader.java +++ b/src/main/java/de/unknownreality/dataframe/csv/CSVReader.java @@ -34,15 +34,15 @@ */ public class CSVReader extends DataReader { private CSVSettings settings; - private final ColumnSettings columnSettings; + private final CSVColumnSettings csvColumnSettings; - protected CSVReader(CSVSettings settings, ColumnSettings columnSettings) { + protected CSVReader(CSVSettings settings, CSVColumnSettings csvColumnSettings) { this.settings = settings; - this.columnSettings = columnSettings; + this.csvColumnSettings = csvColumnSettings; } @Override public CSVIterator load(Reader reader) { - return new CSVIterator(new BufferedReader(reader), settings, columnSettings); + return new CSVIterator(new BufferedReader(reader), settings, csvColumnSettings); } } diff --git a/src/main/java/de/unknownreality/dataframe/csv/CSVReaderBuilder.java b/src/main/java/de/unknownreality/dataframe/csv/CSVReaderBuilder.java index a01331b..6aa8755 100644 --- a/src/main/java/de/unknownreality/dataframe/csv/CSVReaderBuilder.java +++ b/src/main/java/de/unknownreality/dataframe/csv/CSVReaderBuilder.java @@ -59,6 +59,7 @@ public CSVReaderBuilder withSeparator(char separator) { /** * Detect quoted values e.g. val1 "val 2" val3 + * * @param quoteDetection quoteDetection * @return self for method chaining */ @@ -69,6 +70,7 @@ public CSVReaderBuilder withQuoteDetection(boolean quoteDetection) { /** * Detect single quoted values e.g. val1 'val 2' val3 + * * @param singleQuoteDetection singleQuoteDetection * @return self for method chaining */ @@ -77,7 +79,7 @@ public CSVReaderBuilder withSingleQuoteDetection(boolean singleQuoteDetection) { return this; } - public CSVReaderBuilder addSkipPrefix(String prefix){ + public CSVReaderBuilder addSkipPrefix(String prefix) { skipPrefixes.add(prefix); return this; } @@ -128,8 +130,6 @@ public CSVReaderBuilder containsHeader(boolean header) { } - - /** * Creates a {@link CSVIterator} for the specified file * @@ -138,7 +138,7 @@ public CSVReaderBuilder containsHeader(boolean header) { * @deprecated use {@link DataFrame#fromCSV} or {@link DataFrame#load} instead. */ @Deprecated - public CSVIterator load(File file){ + public CSVIterator load(File file) { return build().load(file); } @@ -151,7 +151,7 @@ public CSVIterator load(File file){ * @deprecated use {@link DataFrame#fromCSV} or {@link DataFrame#load} instead. */ @Deprecated - public CSVIterator load(String content){ + public CSVIterator load(String content) { return build().load(content); } @@ -166,7 +166,7 @@ public CSVIterator load(String content){ */ @Deprecated public CSVIterator loadResource(String resourcePath, ClassLoader classLoader) { - return build().load(resourcePath,classLoader); + return build().load(resourcePath, classLoader); } /** @@ -179,7 +179,7 @@ public CSVIterator loadResource(String resourcePath, ClassLoader classLoader) { */ @Deprecated public CSVIterator loadResource(String resourcePath) { - return build().load(resourcePath,CSVReaderBuilder.class.getClassLoader()); + return build().load(resourcePath, CSVReaderBuilder.class.getClassLoader()); } @Override @@ -191,20 +191,18 @@ public CSVReader build() { settings.setSkipPrefixes(skipPrefixes); settings.setQuoteDetection(quoteDetection); settings.setSingleQuoteDetection(singleQuoteDetection); - ColumnSettings columnSettings = new ColumnSettings(); - columnSettings.getColumnTypeMap().putAll(columnTypeMap); - columnSettings.getIgnoreColumns().addAll(ignoreColumns); - columnSettings.getSelectColumns().addAll(selectColumns); - return new CSVReader(settings, columnSettings); + CSVColumnSettings csvColumnSettings = new CSVColumnSettings(); + csvColumnSettings.getColumnTypeMap().putAll(columnTypeMap); + csvColumnSettings.getIgnoreColumns().addAll(ignoreColumns); + csvColumnSettings.getSelectColumns().addAll(selectColumns); + return new CSVReader(settings, csvColumnSettings); } @Override - public ReaderBuilder loadSettings(Map attributes) throws Exception { + public CSVReaderBuilder loadSettings(Map attributes) throws Exception { this.separator = DataFrameTypeManager.get().parse(Character.class, attributes.get("separator")); this.headerPrefix = attributes.get("headerPrefix"); this.containsHeader = DataFrameTypeManager.get().parse(Boolean.class, attributes.get("containsHeader")); return this; } - - } diff --git a/src/main/java/de/unknownreality/dataframe/io/ReaderBuilder.java b/src/main/java/de/unknownreality/dataframe/io/ReaderBuilder.java index 39ddb2c..d94c868 100644 --- a/src/main/java/de/unknownreality/dataframe/io/ReaderBuilder.java +++ b/src/main/java/de/unknownreality/dataframe/io/ReaderBuilder.java @@ -32,6 +32,7 @@ * Created by Alex on 17.06.2017. */ public interface ReaderBuilder, D extends DataReader>> { + D build(); ReaderBuilder loadSettings(Map map) throws Exception; diff --git a/src/main/java/de/unknownreality/dataframe/print/Printer.java b/src/main/java/de/unknownreality/dataframe/print/Printer.java index 9453ada..a9bf626 100644 --- a/src/main/java/de/unknownreality/dataframe/print/Printer.java +++ b/src/main/java/de/unknownreality/dataframe/print/Printer.java @@ -34,6 +34,7 @@ import de.unknownreality.dataframe.common.header.TypeHeader; import de.unknownreality.dataframe.io.DataWriter; import de.unknownreality.dataframe.io.ReadFormat; +import de.unknownreality.dataframe.settings.ColumnSettings; import de.unknownreality.dataframe.type.ValueType; import de.unknownreality.dataframe.type.impl.StringType; @@ -67,7 +68,7 @@ public class Printer extends DataWriter { private final Map columnSettings = new HashMap<>(); private ValueFormatter defaultValueFormatter = new DefaultValueFormatter(); private ValueFormatter defaultHeaderFormatter = (t, v, m) -> "#" + v.toString(); - private final StringType headerType = new StringType(); + private final StringType headerType = new StringType(new ColumnSettings()); private ValueFormatter defaultNumberFormatter = new DefaultNumberFormatter(); diff --git a/src/main/java/de/unknownreality/dataframe/settings/ColumnMatcher.java b/src/main/java/de/unknownreality/dataframe/settings/ColumnMatcher.java new file mode 100644 index 0000000..fd30cfd --- /dev/null +++ b/src/main/java/de/unknownreality/dataframe/settings/ColumnMatcher.java @@ -0,0 +1,8 @@ +package de.unknownreality.dataframe.settings; + +import de.unknownreality.dataframe.DataFrameColumn; + +@FunctionalInterface +public interface ColumnMatcher { + boolean match(DataFrameColumn column); +} \ No newline at end of file diff --git a/src/main/java/de/unknownreality/dataframe/settings/ColumnMatchers.java b/src/main/java/de/unknownreality/dataframe/settings/ColumnMatchers.java new file mode 100644 index 0000000..97503aa --- /dev/null +++ b/src/main/java/de/unknownreality/dataframe/settings/ColumnMatchers.java @@ -0,0 +1,49 @@ +package de.unknownreality.dataframe.settings; + +import de.unknownreality.dataframe.DataFrameColumn; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +public class ColumnMatchers { + + public ColumnMatcher all() { + return (c) -> true; + } + + public static ColumnMatcher byName(String... names) { + final Set nameSet = new HashSet<>(Arrays.asList(names)); + return (c) -> nameSet.contains(c.getName()); + } + + public static ColumnMatcher byIndex(Integer... columnIndices) { + final Set idxSet = new HashSet<>(Arrays.asList(columnIndices)); + return (c) -> idxSet.contains(c.getColumnIndex()); + } + + public static ColumnMatcher byValueType(Class... types) { + final Set> clSet = new HashSet<>(Arrays.asList(types)); + return (c) -> { + return clSet.contains(c.getValueType().getType()) || + clSet.contains(c.getValueType().getClass()); + }; + } + + @SafeVarargs + public static ColumnMatcher byColumnType(Class>... types) { + final Set> clSet = new HashSet<>(Arrays.asList(types)); + return (c) -> clSet.contains(c.getClass()); + } + + public static ColumnMatcher all(ColumnMatcher... matchers) { + return (c) -> { + for (ColumnMatcher matcher : matchers) { + if (matcher.match(c)) { + return true; + } + } + return false; + }; + } +} \ No newline at end of file diff --git a/src/main/java/de/unknownreality/dataframe/settings/ColumnSetting.java b/src/main/java/de/unknownreality/dataframe/settings/ColumnSetting.java new file mode 100644 index 0000000..2b73b7c --- /dev/null +++ b/src/main/java/de/unknownreality/dataframe/settings/ColumnSetting.java @@ -0,0 +1,5 @@ +package de.unknownreality.dataframe.settings; + +public abstract class ColumnSetting { + +} diff --git a/src/main/java/de/unknownreality/dataframe/settings/ColumnSettings.java b/src/main/java/de/unknownreality/dataframe/settings/ColumnSettings.java new file mode 100644 index 0000000..adc4d2a --- /dev/null +++ b/src/main/java/de/unknownreality/dataframe/settings/ColumnSettings.java @@ -0,0 +1,55 @@ +package de.unknownreality.dataframe.settings; + +import java.util.*; + +public class ColumnSettings { + private final Map, ColumnSetting> settingMap = new HashMap<>(); + + public ColumnSettings() { + + } + + public T remove(T setting) { + Set settingList = new HashSet<>(settingMap.values()); + if (settingList.contains(setting)) { + settingMap.remove(setting.getClass()); + return setting; + } + return null; + } + + public T remove(Class cl) { + Object o = settingMap.remove(cl); + if (!cl.isInstance(o)) { + return null; + } + return cl.cast(o); + } + + public void applyTo(ColumnSettings settings) { + settingMap.values().forEach(settings::add); + } + + public void add(ColumnSetting setting) { + settingMap.put(setting.getClass(), setting); + } + + public T get(Class cl) { + Object o = settingMap.get(cl); + if (cl.isInstance(o)) { + return cl.cast(o); + } + return null; + } + + public T getOrDefault(Class cl, T defaultSetting) { + T v = get(cl); + return v == null ? defaultSetting : v; + } + + public static ColumnSettings create(Collection settings) { + ColumnSettings columnSettings = new ColumnSettings(); + settings.forEach(columnSettings::add); + return columnSettings; + } +} diff --git a/src/main/java/de/unknownreality/dataframe/settings/DataFrameSettings.java b/src/main/java/de/unknownreality/dataframe/settings/DataFrameSettings.java new file mode 100644 index 0000000..0117ec2 --- /dev/null +++ b/src/main/java/de/unknownreality/dataframe/settings/DataFrameSettings.java @@ -0,0 +1,104 @@ +package de.unknownreality.dataframe.settings; + +import de.unknownreality.dataframe.DataFrameColumn; + +import java.util.*; + +public class DataFrameSettings { + private final Map> columnSettings = new HashMap<>(); + + public DataFrameSettings() { + } + + public DataFrameSettings(Map> columnSettings) { + columnSettings.forEach((s, m) -> columnSettings.put(s, new ArrayList<>(m))); + } + + public void applyToColumn(DataFrameColumn column) { + HashSet settings = new HashSet<>(); + columnSettings.forEach((setting, matchers) -> { + for (ColumnMatcher matcher : matchers) { + if (matcher.match(column)) { + settings.add(setting); + break; + } + } + }); + settings.forEach((s) -> column.getSettings().add(s)); + } + + /*public DataFrameSettings add(ColumnSetting setting, String... names) { + add(byName(names), setting); + return this; + } + + public DataFrameSettings add(ColumnSetting setting, Integer... columnIndices) { + add(byIndex(columnIndices), setting); + return this; + } + + public DataFrameSettings addByValueType(ColumnSetting setting, Class... types) { + add(byValueType(types), setting); + return this; + } + + public DataFrameSettings addByColumnType(ColumnSetting setting, Class>... types) { + add(byColumnType(types), setting); + return this; + } +*/ + public DataFrameSettings addColumnSettings(ColumnMatcher matcher, ColumnSetting... settings) { + for (ColumnSetting setting : settings) { + addColumnSetting(matcher, setting); + } + return this; + } + + public DataFrameSettings addColumnSetting(ColumnMatcher matcher, ColumnSetting setting) { + columnSettings.compute(setting, (s, matchers) -> { + if (matchers == null) { + matchers = new ArrayList<>(); + } + matchers.add(matcher); + return matchers; + }); + return this; + } + + public boolean remove(ColumnSetting setting) { + return columnSettings.remove(setting) != null; + } + + public static DataFrameSettingsBuilder create() { + return new DataFrameSettingsBuilder(); + } + + public static final class DataFrameSettingsBuilder { + private final Map> columnSettings = new HashMap<>(); + + private DataFrameSettingsBuilder() { + } + + public DataFrameSettingsBuilder addColumnSettings(ColumnMatcher matcher, ColumnSetting... settings) { + for (ColumnSetting setting : settings) { + addColumnSetting(matcher, setting); + } + return this; + } + + public DataFrameSettingsBuilder addColumnSetting(ColumnMatcher matcher, ColumnSetting setting) { + columnSettings.compute(setting, (s, matchers) -> { + if (matchers == null) { + matchers = new ArrayList<>(); + } + matchers.add(matcher); + return matchers; + }); + return this; + } + + public DataFrameSettings build() { + return new DataFrameSettings(columnSettings); + } + } +} diff --git a/src/main/java/de/unknownreality/dataframe/settings/EncodingSetting.java b/src/main/java/de/unknownreality/dataframe/settings/EncodingSetting.java new file mode 100644 index 0000000..23196e7 --- /dev/null +++ b/src/main/java/de/unknownreality/dataframe/settings/EncodingSetting.java @@ -0,0 +1,18 @@ +package de.unknownreality.dataframe.settings; + +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; + +public class EncodingSetting extends ColumnSetting { + public static EncodingSetting UTF8 = new EncodingSetting(StandardCharsets.UTF_8); + + private final Charset charset; + + public EncodingSetting(Charset charset) { + this.charset = charset; + } + + public Charset getCharset() { + return charset; + } +} diff --git a/src/main/java/de/unknownreality/dataframe/type/DataFrameTypeManager.java b/src/main/java/de/unknownreality/dataframe/type/DataFrameTypeManager.java index a0f2678..6bbe1cc 100644 --- a/src/main/java/de/unknownreality/dataframe/type/DataFrameTypeManager.java +++ b/src/main/java/de/unknownreality/dataframe/type/DataFrameTypeManager.java @@ -234,7 +234,7 @@ public ValueType getValueType(Class cl) throws ValueTypeNotFoundExcept if (col == null) { throw new ValueTypeNotFoundException(cl); } - return (ValueType) col.getValueType(); + return (ValueType) col.copyEmpty().getValueType(); } /** diff --git a/src/main/java/de/unknownreality/dataframe/type/ValueType.java b/src/main/java/de/unknownreality/dataframe/type/ValueType.java index 6e8709c..d4a64e9 100644 --- a/src/main/java/de/unknownreality/dataframe/type/ValueType.java +++ b/src/main/java/de/unknownreality/dataframe/type/ValueType.java @@ -1,5 +1,7 @@ package de.unknownreality.dataframe.type; +import de.unknownreality.dataframe.settings.ColumnSettings; + import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; @@ -9,6 +11,16 @@ import java.util.Comparator; public abstract class ValueType { + private ColumnSettings columnSettings; + + protected ValueType(ColumnSettings columnSettings) { + this.columnSettings = columnSettings; + } + + public ColumnSettings getColumnSettings() { + return columnSettings; + } + public abstract Class getType(); public abstract Comparator getComparator(); diff --git a/src/main/java/de/unknownreality/dataframe/type/impl/BooleanType.java b/src/main/java/de/unknownreality/dataframe/type/impl/BooleanType.java index d2cf791..b7456df 100644 --- a/src/main/java/de/unknownreality/dataframe/type/impl/BooleanType.java +++ b/src/main/java/de/unknownreality/dataframe/type/impl/BooleanType.java @@ -1,5 +1,7 @@ package de.unknownreality.dataframe.type.impl; +import de.unknownreality.dataframe.settings.ColumnSettings; + import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; @@ -8,6 +10,10 @@ import java.text.ParseException; public class BooleanType extends ComparableType { + public BooleanType(ColumnSettings columnSettings) { + super(columnSettings); + } + @Override public Class getType() { return Boolean.class; diff --git a/src/main/java/de/unknownreality/dataframe/type/impl/ByteType.java b/src/main/java/de/unknownreality/dataframe/type/impl/ByteType.java index 557251c..42b177b 100644 --- a/src/main/java/de/unknownreality/dataframe/type/impl/ByteType.java +++ b/src/main/java/de/unknownreality/dataframe/type/impl/ByteType.java @@ -1,5 +1,7 @@ package de.unknownreality.dataframe.type.impl; +import de.unknownreality.dataframe.settings.ColumnSettings; + import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; @@ -7,6 +9,10 @@ import java.nio.ByteBuffer; public class ByteType extends NumberType { + public ByteType(ColumnSettings columnSettings) { + super(columnSettings); + } + @Override public Class getType() { return Byte.class; diff --git a/src/main/java/de/unknownreality/dataframe/type/impl/CharacterType.java b/src/main/java/de/unknownreality/dataframe/type/impl/CharacterType.java index 43d4e2f..4b1c239 100644 --- a/src/main/java/de/unknownreality/dataframe/type/impl/CharacterType.java +++ b/src/main/java/de/unknownreality/dataframe/type/impl/CharacterType.java @@ -1,5 +1,7 @@ package de.unknownreality.dataframe.type.impl; +import de.unknownreality.dataframe.settings.ColumnSettings; + import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; @@ -7,6 +9,10 @@ import java.nio.ByteBuffer; public class CharacterType extends ComparableType { + public CharacterType(ColumnSettings columnSettings) { + super(columnSettings); + } + @Override public Class getType() { return Character.class; diff --git a/src/main/java/de/unknownreality/dataframe/type/impl/ComparableType.java b/src/main/java/de/unknownreality/dataframe/type/impl/ComparableType.java index a99b3c1..22a1d7e 100644 --- a/src/main/java/de/unknownreality/dataframe/type/impl/ComparableType.java +++ b/src/main/java/de/unknownreality/dataframe/type/impl/ComparableType.java @@ -1,5 +1,6 @@ package de.unknownreality.dataframe.type.impl; +import de.unknownreality.dataframe.settings.ColumnSettings; import de.unknownreality.dataframe.type.ValueType; import java.util.Comparator; @@ -18,6 +19,10 @@ public abstract class ComparableType> extends ValueType< return o1.compareTo(o2); }; + public ComparableType(ColumnSettings columnSettings) { + super(columnSettings); + } + @Override public Comparator getComparator() { return defaultComparator; diff --git a/src/main/java/de/unknownreality/dataframe/type/impl/DoubleType.java b/src/main/java/de/unknownreality/dataframe/type/impl/DoubleType.java index e9e5d1e..5c75b91 100644 --- a/src/main/java/de/unknownreality/dataframe/type/impl/DoubleType.java +++ b/src/main/java/de/unknownreality/dataframe/type/impl/DoubleType.java @@ -1,5 +1,7 @@ package de.unknownreality.dataframe.type.impl; +import de.unknownreality.dataframe.settings.ColumnSettings; + import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; @@ -7,6 +9,10 @@ import java.nio.ByteBuffer; public class DoubleType extends NumberType { + public DoubleType(ColumnSettings columnSettings) { + super(columnSettings); + } + @Override public Class getType() { return Double.class; diff --git a/src/main/java/de/unknownreality/dataframe/type/impl/FloatType.java b/src/main/java/de/unknownreality/dataframe/type/impl/FloatType.java index 304cbeb..0ddba9f 100644 --- a/src/main/java/de/unknownreality/dataframe/type/impl/FloatType.java +++ b/src/main/java/de/unknownreality/dataframe/type/impl/FloatType.java @@ -1,5 +1,7 @@ package de.unknownreality.dataframe.type.impl; +import de.unknownreality.dataframe.settings.ColumnSettings; + import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; @@ -7,6 +9,10 @@ import java.nio.ByteBuffer; public class FloatType extends NumberType { + public FloatType(ColumnSettings columnSettings) { + super(columnSettings); + } + @Override public Class getType() { return Float.class; diff --git a/src/main/java/de/unknownreality/dataframe/type/impl/IntegerType.java b/src/main/java/de/unknownreality/dataframe/type/impl/IntegerType.java index 992f390..6235fce 100644 --- a/src/main/java/de/unknownreality/dataframe/type/impl/IntegerType.java +++ b/src/main/java/de/unknownreality/dataframe/type/impl/IntegerType.java @@ -1,5 +1,7 @@ package de.unknownreality.dataframe.type.impl; +import de.unknownreality.dataframe.settings.ColumnSettings; + import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; @@ -7,6 +9,10 @@ import java.nio.ByteBuffer; public class IntegerType extends NumberType { + public IntegerType(ColumnSettings columnSettings) { + super(columnSettings); + } + @Override public Class getType() { return Integer.class; diff --git a/src/main/java/de/unknownreality/dataframe/type/impl/LongType.java b/src/main/java/de/unknownreality/dataframe/type/impl/LongType.java index 7827256..b5d7949 100644 --- a/src/main/java/de/unknownreality/dataframe/type/impl/LongType.java +++ b/src/main/java/de/unknownreality/dataframe/type/impl/LongType.java @@ -1,5 +1,7 @@ package de.unknownreality.dataframe.type.impl; +import de.unknownreality.dataframe.settings.ColumnSettings; + import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; @@ -7,6 +9,10 @@ import java.nio.ByteBuffer; public class LongType extends NumberType { + public LongType(ColumnSettings columnSettings) { + super(columnSettings); + } + @Override public Class getType() { return Long.class; diff --git a/src/main/java/de/unknownreality/dataframe/type/impl/NumberType.java b/src/main/java/de/unknownreality/dataframe/type/impl/NumberType.java index 6105586..5095f99 100644 --- a/src/main/java/de/unknownreality/dataframe/type/impl/NumberType.java +++ b/src/main/java/de/unknownreality/dataframe/type/impl/NumberType.java @@ -1,8 +1,13 @@ package de.unknownreality.dataframe.type.impl; import de.unknownreality.dataframe.common.NumberUtil; +import de.unknownreality.dataframe.settings.ColumnSettings; public abstract class NumberType> extends ComparableType { + public NumberType(ColumnSettings columnSettings) { + super(columnSettings); + } + @Override public T convertRaw(Object o) { if (o instanceof Number) { diff --git a/src/main/java/de/unknownreality/dataframe/type/impl/ShortType.java b/src/main/java/de/unknownreality/dataframe/type/impl/ShortType.java index 1510509..0413cf8 100644 --- a/src/main/java/de/unknownreality/dataframe/type/impl/ShortType.java +++ b/src/main/java/de/unknownreality/dataframe/type/impl/ShortType.java @@ -1,5 +1,7 @@ package de.unknownreality.dataframe.type.impl; +import de.unknownreality.dataframe.settings.ColumnSettings; + import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; @@ -7,6 +9,10 @@ import java.nio.ByteBuffer; public class ShortType extends NumberType { + public ShortType(ColumnSettings columnSettings) { + super(columnSettings); + } + @Override public Class getType() { return Short.class; diff --git a/src/main/java/de/unknownreality/dataframe/type/impl/StringType.java b/src/main/java/de/unknownreality/dataframe/type/impl/StringType.java index f42e98e..0aca907 100644 --- a/src/main/java/de/unknownreality/dataframe/type/impl/StringType.java +++ b/src/main/java/de/unknownreality/dataframe/type/impl/StringType.java @@ -1,35 +1,44 @@ package de.unknownreality.dataframe.type.impl; +import de.unknownreality.dataframe.settings.ColumnSettings; +import de.unknownreality.dataframe.settings.EncodingSetting; + import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.io.Writer; import java.nio.ByteBuffer; import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; public class StringType extends ComparableType { - private final Charset charSet = StandardCharsets.UTF_8; + private final static EncodingSetting DEFAULT_ENCODING_SETTING = EncodingSetting.UTF8; + + public StringType(ColumnSettings columnSettings) { + super(columnSettings); + } @Override public Class getType() { return String.class; } + private Charset getEncoding() { + return getColumnSettings().getOrDefault(EncodingSetting.class, DEFAULT_ENCODING_SETTING).getCharset(); + } @Override public String read(DataInputStream dis) throws IOException { int length = dis.readInt(); byte[] data = new byte[length]; dis.read(data); - return new String(data, charSet); + return new String(data, getEncoding()); } @Override public String read(ByteBuffer buf) { int length = buf.getInt(); byte[] data = new byte[length]; - return new String(data, charSet); + return new String(data, getEncoding()); } @Override @@ -58,7 +67,7 @@ public String toString(String value) { @Override public int write(DataOutputStream dos, String value) throws IOException { assertNotNull(value); - byte[] data = value.getBytes(charSet); + byte[] data = value.getBytes(getEncoding()); dos.writeInt(data.length); dos.write(data); return Integer.BYTES + data.length; diff --git a/src/test/java/de/unknownreality/dataframe/group/DataFrameGroupingTest.java b/src/test/java/de/unknownreality/dataframe/group/DataFrameGroupingTest.java index f1b3442..e3c786d 100644 --- a/src/test/java/de/unknownreality/dataframe/group/DataFrameGroupingTest.java +++ b/src/test/java/de/unknownreality/dataframe/group/DataFrameGroupingTest.java @@ -68,9 +68,9 @@ public void testGroupUtil() throws IOException { .withHeader(true) .withHeaderPrefix("") .withSeparator(';') - .setColumnType("ID",Integer.class) - .setColumnType("NAME",String.class) - .setColumnType("VALUE",Integer.class) + .setColumnType("ID", Integer.class) + .setColumnType("NAME", String.class) + .setColumnType("VALUE", Integer.class) .build(); DataFrame dataFrame = DataFrameLoader.load("data_grouping.csv", DataFrameGroupingTest.class.getClassLoader(), csvReader); @@ -87,7 +87,7 @@ public void testGroupUtil() throws IOException { */ DataGrouping dataGroups = dataFrame.groupBy("ID", "NAME") .agg("MAX", Aggregate.max("VALUE")) - .agg("MIN",Aggregate.max("VALUE")); + .agg("MIN", Aggregate.max("VALUE")); Assert.assertEquals(6, dataGroups.size()); Assert.assertEquals(IntegerColumn.class, dataGroups.getColumn("MIN").getClass()); @@ -102,13 +102,13 @@ public void testGroupUtil() throws IOException { testGroup(dataGroups.findByGroupValues(4, "B"), 7); - dataGroups.agg("count2",(DataGroup::size)); - Assert.assertEquals((Integer)2,dataGroups.findByGroupValues(1, "A").getInteger("count2")); - Assert.assertEquals((Integer)1,dataGroups.findByGroupValues(1, "B").getInteger("count2")); - Assert.assertEquals((Integer)1,dataGroups.findByGroupValues(2, "A").getInteger("count2")); - Assert.assertEquals((Integer)2,dataGroups.findByGroupValues(3, "B").getInteger("count2")); - Assert.assertEquals((Integer)1,dataGroups.findByGroupValues(2, "C").getInteger("count2")); - Assert.assertEquals((Integer)1,dataGroups.findByGroupValues(4, "B").getInteger("count2")); + dataGroups.agg("count2", (DataGroup::size)); + Assert.assertEquals((Integer) 2, dataGroups.findByGroupValues(1, "A").getInteger("count2")); + Assert.assertEquals((Integer) 1, dataGroups.findByGroupValues(1, "B").getInteger("count2")); + Assert.assertEquals((Integer) 1, dataGroups.findByGroupValues(2, "A").getInteger("count2")); + Assert.assertEquals((Integer) 2, dataGroups.findByGroupValues(3, "B").getInteger("count2")); + Assert.assertEquals((Integer) 1, dataGroups.findByGroupValues(2, "C").getInteger("count2")); + Assert.assertEquals((Integer) 1, dataGroups.findByGroupValues(4, "B").getInteger("count2")); DataFrame grouping2 = dataGroups.select(FilterPredicate.and(FilterPredicate.lt("ID", 4), FilterPredicate.in("NAME", new String[]{"A", "B"}))); Assert.assertEquals(4, grouping2.size()); @@ -134,13 +134,13 @@ public void testNewGroupUtil() throws IOException { .withHeader(true) .withHeaderPrefix("") .withSeparator(';') - .setColumnType("ID",Integer.class) - .setColumnType("NAME",String.class) - .setColumnType("VALUE",Integer.class) + .setColumnType("ID", Integer.class) + .setColumnType("NAME", String.class) + .setColumnType("VALUE", Integer.class) .build(); DataFrame dataFrame = DataFrameLoader.load("data_grouping.csv", DataFrameGroupingTest.class.getClassLoader(), csvReader); - ((DefaultDataFrame)dataFrame).setGroupUtil(new TreeGroupUtil()); + ((DefaultDataFrame) dataFrame).setGroupUtil(new TreeGroupUtil()); Assert.assertEquals(8, dataFrame.size()); /* @@ -154,7 +154,7 @@ public void testNewGroupUtil() throws IOException { */ DataGrouping dataGroups = dataFrame.groupBy("ID", "NAME") .agg("MAX", Aggregate.max("VALUE")) - .agg("MIN",Aggregate.max("VALUE")); + .agg("MIN", Aggregate.max("VALUE")); Assert.assertEquals(6, dataGroups.size()); Assert.assertEquals(IntegerColumn.class, dataGroups.getColumn("MIN").getClass()); @@ -169,13 +169,13 @@ public void testNewGroupUtil() throws IOException { testGroup(dataGroups.findByGroupValues(4, "B"), 7); - dataGroups.agg("count2",(DataGroup::size)); - Assert.assertEquals((Integer)2,dataGroups.findByGroupValues(1, "A").getInteger("count2")); - Assert.assertEquals((Integer)1,dataGroups.findByGroupValues(1, "B").getInteger("count2")); - Assert.assertEquals((Integer)1,dataGroups.findByGroupValues(2, "A").getInteger("count2")); - Assert.assertEquals((Integer)2,dataGroups.findByGroupValues(3, "B").getInteger("count2")); - Assert.assertEquals((Integer)1,dataGroups.findByGroupValues(2, "C").getInteger("count2")); - Assert.assertEquals((Integer)1,dataGroups.findByGroupValues(4, "B").getInteger("count2")); + dataGroups.agg("count2", (DataGroup::size)); + Assert.assertEquals((Integer) 2, dataGroups.findByGroupValues(1, "A").getInteger("count2")); + Assert.assertEquals((Integer) 1, dataGroups.findByGroupValues(1, "B").getInteger("count2")); + Assert.assertEquals((Integer) 1, dataGroups.findByGroupValues(2, "A").getInteger("count2")); + Assert.assertEquals((Integer) 2, dataGroups.findByGroupValues(3, "B").getInteger("count2")); + Assert.assertEquals((Integer) 1, dataGroups.findByGroupValues(2, "C").getInteger("count2")); + Assert.assertEquals((Integer) 1, dataGroups.findByGroupValues(4, "B").getInteger("count2")); DataFrame grouping2 = dataGroups.select(FilterPredicate.and(FilterPredicate.lt("ID", 4), FilterPredicate.in("NAME", new String[]{"A", "B"}))); Assert.assertEquals(4, grouping2.size()); @@ -195,32 +195,31 @@ public void testAgg() throws IOException { dataFrame.addColumn(new IntegerColumn("n")); - dataFrame.append("a",1d,5,true,true,"abc123",1); - dataFrame.append("b",2d,4,true,false,"abc/123",null); - dataFrame.append("c",3d,3,false,true,"abc", Values.NA); - dataFrame.append("d",4d,2,false,false,"123",1); - dataFrame.append("a",2d,5,true,true,"abc123",1); - dataFrame.append("b",2d,4,true,false,"abc/123",null); - dataFrame.append("c",3d,3,false,true,"abc", Values.NA); - dataFrame.append("d",4d,2,false,false,"a123",1); - dataFrame.append("a",3d,5,true,true,"1bc123",1); - dataFrame.append("b",2d,4,true,false,"abc/123",null); - + dataFrame.append("a", 1d, 5, true, true, "abc123", 1); + dataFrame.append("b", 2d, 4, true, false, "abc/123", null); + dataFrame.append("c", 3d, 3, false, true, "abc", Values.NA); + dataFrame.append("d", 4d, 2, false, false, "123", 1); + dataFrame.append("a", 2d, 5, true, true, "abc123", 1); + dataFrame.append("b", 2d, 4, true, false, "abc/123", null); + dataFrame.append("c", 3d, 3, false, true, "abc", Values.NA); + dataFrame.append("d", 4d, 2, false, false, "a123", 1); + dataFrame.append("a", 3d, 5, true, true, "1bc123", 1); + dataFrame.append("b", 2d, 4, true, false, "abc/123", null); DataGrouping grouping = dataFrame .groupBy("name") - .agg("count",Aggregate.count()) + .agg("count", Aggregate.count()) .agg("mean", Aggregate.mean("x")) - .agg("max",Aggregate.max("x")) + .agg("max", Aggregate.max("x")) .agg("na_count", Aggregate.naCount("n")) - .agg("filter_count",Aggregate.filterCount("r ~= /[a-z].+/")) + .agg("filter_count", Aggregate.filterCount("r ~= /[a-z].+/")) .agg("first", Aggregate.first("x")) .agg("nfirst", Aggregate.first("n")) - .agg("x_25", Aggregate.quantile("x",0.25)) - .agg("desc",group -> group.getGroupDescription()); + .agg("x_25", Aggregate.quantile("x", 0.25)) + .agg("desc", group -> group.getGroupDescription()); - for(DataRow row : grouping){ + for (DataRow row : grouping) { DataGroup group = grouping.getGroup(row.getIndex()); System.out.println(group.getGroupDescription()); } @@ -234,8 +233,8 @@ public void testAgg() throws IOException { DataFrame df = grouping.select("na_count < 3"); df.print(); - df.getStringColumn("desc").map(value -> value+"::2"); - DataFrame joined = grouping.joinInner(df,"name"); + df.getStringColumn("desc").map(value -> value + "::2"); + DataFrame joined = grouping.joinInner(df, "name"); joined.print(); } diff --git a/src/test/java/de/unknownreality/dataframe/value/CustomColumn.java b/src/test/java/de/unknownreality/dataframe/value/CustomColumn.java index 785c87a..37c2716 100644 --- a/src/test/java/de/unknownreality/dataframe/value/CustomColumn.java +++ b/src/test/java/de/unknownreality/dataframe/value/CustomColumn.java @@ -1,6 +1,7 @@ package de.unknownreality.dataframe.value; import de.unknownreality.dataframe.column.BasicColumn; +import de.unknownreality.dataframe.settings.ColumnSettings; import de.unknownreality.dataframe.type.ValueType; import java.io.DataInputStream; @@ -15,7 +16,7 @@ import java.util.regex.Pattern; public class CustomColumn extends BasicColumn { - private CustomValueType type = new CustomValueType(); + private CustomValueType type = new CustomValueType(getSettings()); public CustomColumn() { super(Custom.class); @@ -76,6 +77,10 @@ public int hashCode() { public static class CustomValueType extends ValueType { private Pattern PATTERN = Pattern.compile("\\[([0-9+]+),\\s*([0-9+]+)]"); + protected CustomValueType(ColumnSettings columnSettings) { + super(columnSettings); + } + @Override public Class getType() { return Custom.class; diff --git a/src/test/java/de/unknownreality/dataframe/value/impl/BooleanValueTypeTest.java b/src/test/java/de/unknownreality/dataframe/value/impl/BooleanValueTypeTest.java index 36bd1b1..18d38ff 100644 --- a/src/test/java/de/unknownreality/dataframe/value/impl/BooleanValueTypeTest.java +++ b/src/test/java/de/unknownreality/dataframe/value/impl/BooleanValueTypeTest.java @@ -1,10 +1,11 @@ package de.unknownreality.dataframe.value.impl; +import de.unknownreality.dataframe.settings.ColumnSettings; import de.unknownreality.dataframe.type.impl.BooleanType; public class BooleanValueTypeTest extends AbstractValueTypeTest { @Override public BooleanType getValueType() { - return new BooleanType(); + return new BooleanType(new ColumnSettings()); } } diff --git a/src/test/java/de/unknownreality/dataframe/value/impl/ByteValueTypeTest.java b/src/test/java/de/unknownreality/dataframe/value/impl/ByteValueTypeTest.java index ac24e9a..7312439 100644 --- a/src/test/java/de/unknownreality/dataframe/value/impl/ByteValueTypeTest.java +++ b/src/test/java/de/unknownreality/dataframe/value/impl/ByteValueTypeTest.java @@ -1,10 +1,11 @@ package de.unknownreality.dataframe.value.impl; +import de.unknownreality.dataframe.settings.ColumnSettings; import de.unknownreality.dataframe.type.impl.ByteType; public class ByteValueTypeTest extends NumberValueTypeTest { @Override public ByteType getValueType() { - return new ByteType(); + return new ByteType(new ColumnSettings()); } } diff --git a/src/test/java/de/unknownreality/dataframe/value/impl/CharacterValueTypeTest.java b/src/test/java/de/unknownreality/dataframe/value/impl/CharacterValueTypeTest.java index a5b4196..a497310 100644 --- a/src/test/java/de/unknownreality/dataframe/value/impl/CharacterValueTypeTest.java +++ b/src/test/java/de/unknownreality/dataframe/value/impl/CharacterValueTypeTest.java @@ -1,10 +1,11 @@ package de.unknownreality.dataframe.value.impl; +import de.unknownreality.dataframe.settings.ColumnSettings; import de.unknownreality.dataframe.type.impl.CharacterType; public class CharacterValueTypeTest extends AbstractValueTypeTest { @Override public CharacterType getValueType() { - return new CharacterType(); + return new CharacterType(new ColumnSettings()); } } diff --git a/src/test/java/de/unknownreality/dataframe/value/impl/DoubleValueTypeTest.java b/src/test/java/de/unknownreality/dataframe/value/impl/DoubleValueTypeTest.java index 1218ee2..ddb4328 100644 --- a/src/test/java/de/unknownreality/dataframe/value/impl/DoubleValueTypeTest.java +++ b/src/test/java/de/unknownreality/dataframe/value/impl/DoubleValueTypeTest.java @@ -1,10 +1,11 @@ package de.unknownreality.dataframe.value.impl; +import de.unknownreality.dataframe.settings.ColumnSettings; import de.unknownreality.dataframe.type.impl.DoubleType; public class DoubleValueTypeTest extends NumberValueTypeTest { @Override public DoubleType getValueType() { - return new DoubleType(); + return new DoubleType(new ColumnSettings()); } } diff --git a/src/test/java/de/unknownreality/dataframe/value/impl/FloatValueTypeTest.java b/src/test/java/de/unknownreality/dataframe/value/impl/FloatValueTypeTest.java index 218c506..6689468 100644 --- a/src/test/java/de/unknownreality/dataframe/value/impl/FloatValueTypeTest.java +++ b/src/test/java/de/unknownreality/dataframe/value/impl/FloatValueTypeTest.java @@ -1,10 +1,11 @@ package de.unknownreality.dataframe.value.impl; +import de.unknownreality.dataframe.settings.ColumnSettings; import de.unknownreality.dataframe.type.impl.FloatType; public class FloatValueTypeTest extends NumberValueTypeTest { @Override public FloatType getValueType() { - return new FloatType(); + return new FloatType(new ColumnSettings()); } } diff --git a/src/test/java/de/unknownreality/dataframe/value/impl/IntegerValueTypeTest.java b/src/test/java/de/unknownreality/dataframe/value/impl/IntegerValueTypeTest.java index 9f8eeac..063437d 100644 --- a/src/test/java/de/unknownreality/dataframe/value/impl/IntegerValueTypeTest.java +++ b/src/test/java/de/unknownreality/dataframe/value/impl/IntegerValueTypeTest.java @@ -1,10 +1,11 @@ package de.unknownreality.dataframe.value.impl; +import de.unknownreality.dataframe.settings.ColumnSettings; import de.unknownreality.dataframe.type.impl.IntegerType; public class IntegerValueTypeTest extends NumberValueTypeTest { @Override public IntegerType getValueType() { - return new IntegerType(); + return new IntegerType(new ColumnSettings()); } } diff --git a/src/test/java/de/unknownreality/dataframe/value/impl/LongValueTypeTest.java b/src/test/java/de/unknownreality/dataframe/value/impl/LongValueTypeTest.java index b7741ac..deaefa9 100644 --- a/src/test/java/de/unknownreality/dataframe/value/impl/LongValueTypeTest.java +++ b/src/test/java/de/unknownreality/dataframe/value/impl/LongValueTypeTest.java @@ -1,10 +1,11 @@ package de.unknownreality.dataframe.value.impl; +import de.unknownreality.dataframe.settings.ColumnSettings; import de.unknownreality.dataframe.type.impl.LongType; public class LongValueTypeTest extends AbstractValueTypeTest { @Override public LongType getValueType() { - return new LongType(); + return new LongType(new ColumnSettings()); } } diff --git a/src/test/java/de/unknownreality/dataframe/value/impl/ShortValueTypeTest.java b/src/test/java/de/unknownreality/dataframe/value/impl/ShortValueTypeTest.java index bbabe18..6e56a87 100644 --- a/src/test/java/de/unknownreality/dataframe/value/impl/ShortValueTypeTest.java +++ b/src/test/java/de/unknownreality/dataframe/value/impl/ShortValueTypeTest.java @@ -1,10 +1,11 @@ package de.unknownreality.dataframe.value.impl; +import de.unknownreality.dataframe.settings.ColumnSettings; import de.unknownreality.dataframe.type.impl.ShortType; public class ShortValueTypeTest extends AbstractValueTypeTest { @Override public ShortType getValueType() { - return new ShortType(); + return new ShortType(new ColumnSettings()); } } diff --git a/src/test/java/de/unknownreality/dataframe/value/impl/StringValueTypeTest.java b/src/test/java/de/unknownreality/dataframe/value/impl/StringValueTypeTest.java index e7822cc..182c860 100644 --- a/src/test/java/de/unknownreality/dataframe/value/impl/StringValueTypeTest.java +++ b/src/test/java/de/unknownreality/dataframe/value/impl/StringValueTypeTest.java @@ -1,10 +1,11 @@ package de.unknownreality.dataframe.value.impl; +import de.unknownreality.dataframe.settings.ColumnSettings; import de.unknownreality.dataframe.type.impl.StringType; public class StringValueTypeTest extends AbstractValueTypeTest { @Override public StringType getValueType() { - return new StringType(); + return new StringType(new ColumnSettings()); } }