Source code for opendp.smartnoise.core.components

"""
Warning, this file is autogenerated by code_generation.py.
Don't modify this file manually. (Generated: 2021-01-04 16:18:07.740468)
"""

from .base import Component
from .value import serialize_privacy_usage


[docs]def abs(data, **kwargs): """ Abs Component Absolute value of data. :param data: Atomic types must be of type float or integer. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "Abs", arguments={ 'data': Component.of(data) }, options={ }, constraints=kwargs)
[docs]def add(left, right, **kwargs): """ Add Component Mathematical addition. Value types of arguments must match. :param left: Left value to add. Must be of type float or integer. :param right: Right value to add. Must be of type float or integer. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "Add", arguments={ 'left': Component.of(left), 'right': Component.of(right) }, options={ }, constraints=kwargs)
[docs]def logical_and(left, right, **kwargs): """ And Component :param left: Left argument for the logical AND. :param right: Right argument for the logical AND. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Logical AND of left and right. """ return Component( "And", arguments={ 'left': Component.of(left), 'right': Component.of(right) }, options={ }, constraints=kwargs)
[docs]def cast(data, atomic_type, true_label=None, lower=None, upper=None, **kwargs): """ Cast Component Cast data to an atomic type. :param data: Data to be cast to another type. :param true_label: Positive class (class to be mapped to `true`) for each column. Used only if casting to `bool`. :param lower: Minimum allowable imputation value. Used only if casting to `i64`. :param upper: Maximum allowable imputation value. Used only if casting to `i64`. :param atomic_type: Type to which data should be cast. One of [`string`, `int`, `bool`, `float`] :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "Cast", arguments={ 'data': Component.of(data), 'true_label': Component.of(true_label), 'lower': Component.of(lower), 'upper': Component.of(upper) }, options={ 'atomic_type': atomic_type }, constraints=kwargs)
[docs]def clamp(data, lower=None, upper=None, categories=None, null_value=None, **kwargs): """ Clamp Component Clamps data to the provided bounds. If data are numeric, clamping maps elements outside of an interval `[lower, upper]` to the closer endpoint. If data are categorical, clamping maps elements outside of the `categories` argument to the associated `null`. Using clamp sets the `categories` property for the analysis with value `categories` plus `null_value` in the last position. :param data: Data to be clamped. :param lower: Desired lower bound for each column of the data. Used only if `categories` is `None`. :param upper: Desired upper bound for each column of the data. Used only if `categories` is `None`. :param categories: The set of categories you want to be represented for each column of the data, or `None`. :param null_value: The value to which elements not included in `categories` will be mapped for each column of the data. Used only if `categories` is not `None`. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Clamped data. """ return Component( "Clamp", arguments={ 'data': Component.of(data), 'lower': Component.of(lower), 'upper': Component.of(upper), 'categories': Component.of(categories, value_format="jagged"), 'null_value': Component.of(null_value) }, options={ }, constraints=kwargs)
[docs]def column_bind(arguments): """ ColumnBind Component Bind arguments as columns of an array to produce a larger array :param arguments: dictionary of arguments to supply to the function :return: """ return Component( "ColumnBind", arguments=arguments, options={ }, constraints=None)
[docs]def count(data, distinct=False, **kwargs): """ Count Component Returns the number of rows in the data. :param data: :param distinct: Set to true for the number of unique members in the data. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Row count. """ return Component( "Count", arguments={ 'data': Component.of(data) }, options={ 'distinct': distinct }, constraints=kwargs)
[docs]def covariance(data=None, left=None, right=None, finite_sample_correction=True, **kwargs): """ Covariance Component Calculate covariance. If `data` argument is provided as a 2D array, calculate covariance matrix. Otherwise, `left` and `right` 1D arrays are used to calculate a cross-covariance matrix between elements of the two arrays. :param data: 2D data array used to construct covariance matrix. :param left: Left data array used to calculate cross-covariance matrix. Used only if `data` not provided. :param right: Right data array used to calculate cross-covariance matrix. Used only if `data` not provided. :param finite_sample_correction: Whether or not to use the finite sample correction (Bessel's correction). :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Flattened covariance or cross-covariance matrix. """ return Component( "Covariance", arguments={ 'data': Component.of(data), 'left': Component.of(left), 'right': Component.of(right) }, options={ 'finite_sample_correction': finite_sample_correction }, constraints=kwargs)
[docs]def dp_count(data, lower=0, upper=None, distinct=False, mechanism="SimpleGeometric", privacy_usage=None, **kwargs): """ DPCount Component Returns a differentially private row count. :param data: :param lower: Estimated minimum possible value of the statistic. Useful to help bound elapsed time when sampling for the geometric mechanism. Required for the snapping mechanism. :param upper: Estimated maximum possible value of the statistic. Useful to help bound elapsed time when sampling for the geometric mechanism. Required for the snapping mechanism. :param distinct: Set to true for the number of unique members in the data. :param mechanism: Privatizing mechanism to use. One of [`SimpleGeometric`, `Laplace`, `Snapping`, `Gaussian`, `AnalyticGaussian`]. Only `SimpleGeometric` is accepted if floating-point protections are enabled. :param privacy_usage: Object describing the type and amount of privacy to be used for the mechanism release. Atomic data type value must be float. Example value: {'epsilon': 0.5} :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Differentially private row count. """ return Component( "DPCount", arguments={ 'data': Component.of(data), 'lower': Component.of(lower), 'upper': Component.of(upper) }, options={ 'distinct': distinct, 'mechanism': mechanism, 'privacy_usage': serialize_privacy_usage(privacy_usage) }, constraints=kwargs)
[docs]def dp_covariance(left=None, right=None, data=None, lower=None, upper=None, mechanism="Automatic", privacy_usage=None, finite_sample_correction=True, **kwargs): """ DPCovariance Component Calculate differentially private covariance. If `data` argument is provided as a 2D array, calculate covariance matrix. Otherwise, `left` and `right` 1D arrays are used to calculate a cross-covariance matrix between elements of the two arrays. :param left: Left data array used to calculate cross-covariance matrix. Used only if `data` not provided. :param right: Right data array used to calculate cross-covariance matrix. Used only if `data` not provided. :param data: 2D data array used to construct covariance matrix. :param lower: Estimated minimum possible value of the statistic. Only useful for the snapping mechanism. :param upper: Estimated maximum possible value of the statistic. Only useful for the snapping mechanism. :param mechanism: Privatizing mechanism to use. One of [`Laplace`, `Snapping`, `Gaussian`, `AnalyticGaussian`] :param privacy_usage: Object describing the type and amount of privacy to be used for the mechanism release. Atomic data type value must be float. Example value: {'epsilon': 0.5} :param finite_sample_correction: Whether or not to use the finite sample correction (Bessel's correction). :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Flattened covariance or cross-covariance matrix. """ return Component( "DPCovariance", arguments={ 'left': Component.of(left), 'right': Component.of(right), 'data': Component.of(data), 'lower': Component.of(lower), 'upper': Component.of(upper) }, options={ 'mechanism': mechanism, 'privacy_usage': serialize_privacy_usage(privacy_usage), 'finite_sample_correction': finite_sample_correction }, constraints=kwargs)
[docs]def dp_gumbel_median(data, lower, upper, enforce_constant_time=True, privacy_usage=None, **kwargs): """ DPGumbelMedian Component Returns differentially private estimates of the median of each column of the data. :param data: :param lower: Min candidate :param upper: Max candidate :param enforce_constant_time: Enforce constant time for median :param privacy_usage: Object describing the type and amount of privacy to be used for the mechanism release. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Differentially private estimates of the median of each column of the data. """ return Component( "DPGumbelMedian", arguments={ 'data': Component.of(data), 'lower': Component.of(lower), 'upper': Component.of(upper), 'enforce_constant_time': Component.of(enforce_constant_time) }, options={ 'privacy_usage': serialize_privacy_usage(privacy_usage) }, constraints=kwargs)
[docs]def dp_histogram(data, edges=None, categories=None, null_value=None, lower=0, upper=None, inclusive_left=True, mechanism="SimpleGeometric", privacy_usage=None, **kwargs): """ DPHistogram Component Returns a differentially private histogram over user-defined categories. The final cell contains the counts for null values (outside the set of categories). :param data: Atomic type must be numeric. :param edges: Set of edges to bin continuous-valued data. Used only if data are of `continuous` nature. :param categories: Set of categories in data. Used only if data are of `categorical` nature. :param null_value: The value to which elements not included in `categories` will be mapped for each column of the data. Used only if `categories` is not `None`. The null value is the final category- counts for the null category are at the end of the vector of counts. :param lower: Estimated minimum possible value of bin counts. Useful to help bound elapsed time when sampling for the geometric mechanism. Required for the snapping mechanism. :param upper: Estimated maximum possible value of bin counts. Useful to help bound elapsed time when sampling for the geometric mechanism. Required for the snapping mechanism. :param inclusive_left: Whether or not the left edge of the bin is inclusive. If `true` bins are of the form [lower, upper). Otherwise, bins are of the form (lower, upper]. Used only if data are of `continuous` nature. :param mechanism: Privatizing mechanism to use. One of [`SimpleGeometric`, `Laplace`, `Snapping`, `Gaussian`, `AnalyticGaussian`]. Only `SimpleGeometric` is accepted if floating-point protections are enabled. :param privacy_usage: Object describing the type and amount of privacy to be used for the mechanism release. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Differentially private histogram. """ return Component( "DPHistogram", arguments={ 'data': Component.of(data), 'edges': Component.of(edges, value_format="jagged"), 'categories': Component.of(categories, value_format="jagged"), 'null_value': Component.of(null_value), 'lower': Component.of(lower), 'upper': Component.of(upper), 'inclusive_left': Component.of(inclusive_left) }, options={ 'mechanism': mechanism, 'privacy_usage': serialize_privacy_usage(privacy_usage) }, constraints=kwargs)
[docs]def dp_linear_regression(data_x, data_y, k=None, lower_slope=None, upper_slope=None, lower_intercept=None, upper_intercept=None, implementation="theil-sen-k-match", privacy_usage=None, **kwargs): """ DPLinearRegression Component Returns differentially private estimates of the slope and intercept. :param data_x: Predictor variable :param data_y: Target variable :param k: Number of matchings. Memory usage is quadratic in K. :param lower_slope: Estimated minimum possible value of the slope. :param upper_slope: Estimated maximum possible value of the slope. :param lower_intercept: Estimated minimum possible value of the intercept. :param upper_intercept: Estimated maximum possible value of the intercept. :param implementation: Theil-Sen implementation to use. One of [`theil-sen`, `theil-sen-k-match`] :param privacy_usage: Object describing the type and amount of privacy to be used for the mechanism release. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Differentially private estimate of the slope and intercept of the line fit to the data. """ return Component( "DPLinearRegression", arguments={ 'data_x': Component.of(data_x), 'data_y': Component.of(data_y), 'k': Component.of(k), 'lower_slope': Component.of(lower_slope), 'upper_slope': Component.of(upper_slope), 'lower_intercept': Component.of(lower_intercept), 'upper_intercept': Component.of(upper_intercept) }, options={ 'implementation': implementation, 'privacy_usage': serialize_privacy_usage(privacy_usage) }, constraints=kwargs)
[docs]def dp_maximum(data, candidates=None, lower=None, upper=None, mechanism="Automatic", privacy_usage=None, **kwargs): """ DPMaximum Component Returns differentially private estimates of the maximum elements of each column of the data. :param data: :param candidates: Set from which the Exponential mechanism will return an element. Type must match with atomic type of data. This value must be column-conformable with data. Only useful for Exponential mechanism. :param lower: Estimated minimum possible value of the statistic. Only useful for the snapping mechanism. :param upper: Estimated maximum possible value of the statistic. Only useful for the snapping mechanism. :param mechanism: Privatizing mechanism to use. Value must be one of [`Automatic`, `Laplace`, `Snapping`, `Gaussian`, `AnalyticGaussian`] :param privacy_usage: Object describing the type and amount of privacy to be used for the mechanism release. Atomic data type value must be float. Example value: {'epsilon': 0.5} :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Differentially private estimates of the maximum elements of the data. """ return Component( "DPMaximum", arguments={ 'data': Component.of(data), 'candidates': Component.of(candidates), 'lower': Component.of(lower), 'upper': Component.of(upper) }, options={ 'mechanism': mechanism, 'privacy_usage': serialize_privacy_usage(privacy_usage) }, constraints=kwargs)
[docs]def dp_mean(data, lower=None, upper=None, implementation="resize", mechanism="Automatic", privacy_usage=None, **kwargs): """ DPMean Component Returns differentially private estimates of the means of each column of the data. :param data: Atomic type must be numeric. :param lower: Estimated minimum possible value of the statistic. Only useful for the snapping mechanism. :param upper: Estimated maximum possible value of the statistic. Only useful for the snapping mechanism. :param implementation: Privatizing algorithm to use. One of [`resize`, `plug-in`] :param mechanism: Privatizing mechanism to use. One of [`Laplace`, `Snapping`, `Gaussian`, `AnalyticGaussian`]. :param privacy_usage: Object describing the type and amount of privacy to be used for the mechanism release. Atomic data type value must be float. Example value: {'epsilon': 0.5} :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Differentially private estimate of the mean of each column of the data. """ return Component( "DPMean", arguments={ 'data': Component.of(data), 'lower': Component.of(lower), 'upper': Component.of(upper) }, options={ 'implementation': implementation, 'mechanism': mechanism, 'privacy_usage': serialize_privacy_usage(privacy_usage) }, constraints=kwargs)
[docs]def dp_median(data, candidates=None, lower=None, upper=None, mechanism="Automatic", privacy_usage=None, interpolation="midpoint", **kwargs): """ DPMedian Component Returns differentially private estimates of the median of each column of the data. :param data: Atomic type must be numeric. For Gumbel mechanism, must be limited to a single column of data. :param candidates: Set from which the Exponential mechanism will return an element. Type must match with atomic type of data. This value must be column-conformable with data. Only useful for Exponential mechanism. :param lower: Estimated minimum possible value of the statistic. Only useful for the snapping mechanism. :param upper: Estimated maximum possible value of the statistic. Only useful for the snapping mechanism. :param mechanism: Privatizing mechanism to use. Value must be one of [`Exponential`, `Laplace`, `Snapping`, `Gaussian`, `AnalyticGaussian`, `Gumbel`]. `Automatic` chooses `Exponential` if candidates provided, otherwise chooses `Laplace`. :param privacy_usage: Object describing the type and amount of privacy to be used for the mechanism release. For Gumbel mechanism, must be limited to a single column of data. Atomic data type value must be float. Example value: {'epsilon': 0.5} :param interpolation: Interpolation strategy. One of [`lower`, `upper`, `midpoint`, `nearest`, `linear`] :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Differentially private estimates of the median of each column of the data. """ return Component( "DPMedian", arguments={ 'data': Component.of(data), 'candidates': Component.of(candidates), 'lower': Component.of(lower), 'upper': Component.of(upper) }, options={ 'mechanism': mechanism, 'privacy_usage': serialize_privacy_usage(privacy_usage), 'interpolation': interpolation }, constraints=kwargs)
[docs]def dp_minimum(data, candidates=None, lower=None, upper=None, mechanism="Automatic", privacy_usage=None, **kwargs): """ DPMinimum Component Returns differentially private estimates of the minimum elements of each column of the data. :param data: :param candidates: Set from which the Exponential mechanism will return an element. Type must match with atomic type of data. This value must be column-conformable with data. Only useful for Exponential mechanism. :param lower: Estimated minimum possible value of the statistic. Only useful for the snapping mechanism. :param upper: Estimated maximum possible value of the statistic. Only useful for the snapping mechanism. :param mechanism: Privatizing mechanism to use. Value must be one of [`Automatic`, `Exponential`, `Laplace`, `Snapping`, `Gaussian`, `AnalyticGaussian`]. `Automatic` chooses `Exponential` if candidates provided. :param privacy_usage: Object describing the type and amount of privacy to be used for the mechanism release. Atomic data type value must be float. Example value: {'epsilon': 0.5} :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Differentially private estimates of the minimum elements of the data. """ return Component( "DPMinimum", arguments={ 'data': Component.of(data), 'candidates': Component.of(candidates), 'lower': Component.of(lower), 'upper': Component.of(upper) }, options={ 'mechanism': mechanism, 'privacy_usage': serialize_privacy_usage(privacy_usage) }, constraints=kwargs)
[docs]def dp_quantile(data, alpha, candidates=None, lower=None, upper=None, mechanism="Automatic", privacy_usage=None, interpolation="midpoint", **kwargs): """ DPQuantile Component Returns differentially private estimates of specified quantiles for each column of the data. :param data: Atomic type must be numeric. :param candidates: Set from which the Exponential mechanism will return an element. Type must match with atomic type of data. This value must be column-conformable with data. Only useful for Exponential mechanism. :param lower: Estimated minimum possible value of the statistic. Only useful for the snapping mechanism. :param upper: Estimated maximum possible value of the statistic. Only useful for the snapping mechanism. :param alpha: Desired quantiles, defined on `[0,1]`. :param mechanism: Privatizing mechanism to use. Value must be one of [`Automatic`, `Exponential`, `Laplace`, `Snapping`, `Gaussian`, `AnalyticGaussian`]. `Automatic` chooses `Exponential` if candidates provided. :param privacy_usage: Object describing the type and amount of privacy to be used for the mechanism release. Atomic data type value must be float. Example value: {'epsilon': 0.5} :param interpolation: Interpolation strategy. One of [`lower`, `upper`, `midpoint`, `nearest`, `linear`] :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Differentially private estimate of the quantile. """ return Component( "DPQuantile", arguments={ 'data': Component.of(data), 'candidates': Component.of(candidates), 'lower': Component.of(lower), 'upper': Component.of(upper) }, options={ 'alpha': alpha, 'mechanism': mechanism, 'privacy_usage': serialize_privacy_usage(privacy_usage), 'interpolation': interpolation }, constraints=kwargs)
[docs]def dp_raw_moment(data, order, lower=None, upper=None, mechanism="Automatic", privacy_usage=None, **kwargs): """ DPRawMoment Component Returns differentially private sample estimate of a raw moment for each column of the data. :param data: Data for which you would like the kth raw moments. Atomic data type must be float. :param lower: Estimated minimum possible value of the statistic. Only useful for the snapping mechanism. :param upper: Estimated maximum possible value of the statistic. Only useful for the snapping mechanism. :param order: Integer statistical moment indicator. :param mechanism: Privatizing mechanism to use. Value must be one of [`Automatic`, `Laplace`, `Snapping`, `Gaussian`, `AnalyticGaussian`]. :param privacy_usage: Object describing the type and amount of privacy to be used for the mechanism release. Atomic data type value must be float. Example value: {'epsilon': 0.5} :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Differentially private sample estimate of kth raw moment for each column of the data. """ return Component( "DPRawMoment", arguments={ 'data': Component.of(data), 'lower': Component.of(lower), 'upper': Component.of(upper) }, options={ 'order': order, 'mechanism': mechanism, 'privacy_usage': serialize_privacy_usage(privacy_usage) }, constraints=kwargs)
[docs]def dp_sum(data, lower=None, upper=None, mechanism="Automatic", privacy_usage=None, **kwargs): """ DPSum Component Returns differentially private estimates of the sums of each column of the data. :param data: :param lower: Estimated minimum possible value of the statistic, on integral data. Useful to help bound elapsed time when sampling for the geometric mechanism. Useful for the snapping mechanism. :param upper: Estimated maximum possible value of the statistic, on integral data. Useful to help bound elapsed time when sampling for the geometric mechanism. Useful for the snapping mechanism. :param mechanism: Privatizing mechanism to use. Value must be one of [`Automatic`, `Laplace`, `Gaussian`, `AnalyticGaussian`, `SimpleGeometric`]. `Automatic` chooses based on the input data type. :param privacy_usage: Object describing the type and amount of privacy to be used for the mechanism release. Atomic data type value must be float. Example value: {'epsilon': 0.5} :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Differentially private sum over elements for each column of the data. """ return Component( "DPSum", arguments={ 'data': Component.of(data), 'lower': Component.of(lower), 'upper': Component.of(upper) }, options={ 'mechanism': mechanism, 'privacy_usage': serialize_privacy_usage(privacy_usage) }, constraints=kwargs)
[docs]def dp_variance(data, lower=None, upper=None, mechanism="Automatic", privacy_usage=None, finite_sample_correction=True, **kwargs): """ DPVariance Component Returns a differentially private estimate of the variance for each column of the data. :param data: :param lower: Estimated minimum possible value of the statistic. Only useful for the snapping mechanism. Atomic data type must be float. :param upper: Estimated maximum possible value of the statistic. Only useful for the snapping mechanism. Atomic data type must be float. :param mechanism: Privatizing mechanism to use. Value must be one of [`Laplace`, `Snapping`, `Gaussian`, `AnalyticGaussian`]. :param privacy_usage: Object describing the type and amount of privacy to be used for the mechanism release. Atomic data type value must be float. Example value: {'epsilon': 0.5} :param finite_sample_correction: Whether or not to use the finite sample correction (Bessel's correction). :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Differentially private sample variance for each column of the data. """ return Component( "DPVariance", arguments={ 'data': Component.of(data), 'lower': Component.of(lower), 'upper': Component.of(upper) }, options={ 'mechanism': mechanism, 'privacy_usage': serialize_privacy_usage(privacy_usage), 'finite_sample_correction': finite_sample_correction }, constraints=kwargs)
[docs]def digitize(data, edges, null_value=None, inclusive_left=True, **kwargs): """ Digitize Component Maps data to bins. Bins will be of the form [lower, upper) or (lower, upper]. The null value is the final category. :param data: Data to be binned. :param edges: Values representing the edges of bins. Edges must be sorted, and may not contain duplicates. :param null_value: Value to which to map if there is no valid bin (e.g. if the element falls outside the bin range). The null value is the final category. :param inclusive_left: Whether or not the left edge of the bin is inclusive, i.e. the bins are of the form [lower, upper). :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "Digitize", arguments={ 'data': Component.of(data), 'edges': Component.of(edges, value_format="jagged"), 'null_value': Component.of(null_value), 'inclusive_left': Component.of(inclusive_left) }, options={ }, constraints=kwargs)
[docs]def divide(left, right, **kwargs): """ Divide Component :param left: Atomic type must match right :param right: Atomic type must match left :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "Divide", arguments={ 'left': Component.of(left), 'right': Component.of(right) }, options={ }, constraints=kwargs)
[docs]def equal(left, right, **kwargs): """ Equal Component :param left: Atomic type must match right :param right: Atomic type must match left :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "Equal", arguments={ 'left': Component.of(left), 'right': Component.of(right) }, options={ }, constraints=kwargs)
[docs]def exponential_mechanism(utilities, candidates, sensitivity=None, privacy_usage=None, **kwargs): """ ExponentialMechanism Component Returns an element from a finite set with probability relative to its utility. :param utilities: Respective scores for each candidate. Total number of records must match candidates. :param candidates: Set from which the Exponential mechanism will return an element. Total number of records must match utilities. :param sensitivity: Override the sensitivity computed by the library. Rejected unless `protect_sensitivity` is disabled. :param privacy_usage: Object describing the type and amount of privacy to be used for the mechanism release. Length of privacy_usage must be exactly one. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Element from the candidate set selected via the Exponential mechanism. """ return Component( "ExponentialMechanism", arguments={ 'utilities': Component.of(utilities), 'candidates': Component.of(candidates), 'sensitivity': Component.of(sensitivity) }, options={ 'privacy_usage': serialize_privacy_usage(privacy_usage) }, constraints=kwargs)
[docs]def filter(data, mask, **kwargs): """ Filter Component Filters data down into only the desired rows. :param data: :param mask: Boolean mask giving whether or not each row should be kept. Example value: data['age'] == '4' :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Data with only the desired rows. """ return Component( "Filter", arguments={ 'data': Component.of(data), 'mask': Component.of(mask) }, options={ }, constraints=kwargs)
[docs]def gaussian_mechanism(data, sensitivity=None, privacy_usage=None, analytic=True, **kwargs): """ GaussianMechanism Component Privatizes a result by returning it perturbed with Gaussian noise. :param data: Result to be released privately via the Gaussian mechanism. Atomic type must be numeric. :param sensitivity: Override the sensitivity computed by the library. Rejected unless `protect_sensitivity` is disabled. :param privacy_usage: Object describing the type and amount of privacy to be used for the mechanism release. :param analytic: Set to enable use of the analytic gaussian mechanism. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Original data perturbed with Gaussian noise. """ return Component( "GaussianMechanism", arguments={ 'data': Component.of(data), 'sensitivity': Component.of(sensitivity) }, options={ 'privacy_usage': serialize_privacy_usage(privacy_usage), 'analytic': analytic }, constraints=kwargs)
[docs]def greater_than(left, right, **kwargs): """ GreaterThan Component :param left: Atomic values must be numeric and of the same type. Type must match right. :param right: Atomic values must be numeric and of the same type. Type must match left. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "GreaterThan", arguments={ 'left': Component.of(left), 'right': Component.of(right) }, options={ }, constraints=kwargs)
[docs]def histogram(data, edges=None, categories=None, null_value=None, inclusive_left=True, **kwargs): """ Histogram Component :param data: :param edges: Set of edges to bin continuous-valued data. Used only if data are of `continuous` nature. Must have a value if categories not specified. :param categories: Set of categories in data. Used only if data are of `categorical` nature. Must have a value if edges not specified. :param null_value: The value to which elements not included in `categories` will be mapped for each column of the data. Used only if `categories` is not `None`. :param inclusive_left: Whether or not the left edge of the bin is inclusive. If `true` bins are of the form [lower, upper). Otherwise, bins are of the form (lower, upper]. Used only if data are of `continuous` nature. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "Histogram", arguments={ 'data': Component.of(data), 'edges': Component.of(edges, value_format="jagged"), 'categories': Component.of(categories, value_format="jagged"), 'null_value': Component.of(null_value), 'inclusive_left': Component.of(inclusive_left) }, options={ }, constraints=kwargs)
[docs]def impute(data, lower=None, upper=None, categories=None, null_values=None, weights=None, distribution=None, shift=None, scale=None, **kwargs): """ Impute Component Replaces null values with draws from a specified distribution. If the `categories` argument is provided, the data are considered to be categorical regardless of atomic type and the elements provided in `null_value` will be replaced with those in `categories` according to `weights`. If the `categories` argument is not provided, the data are considered to be numeric and elements that are `f64::NAN` will be replaced according to the specified distribution. :param data: The data for which null values will be imputed. :param lower: A lower bound on data elements for each column. Used only if `categories` is `None`. :param upper: An upper bound on data elements for each column. Used only if `categories` is `None`. :param categories: The set of categories you want to be represented for each column of the data, if the data is categorical. Atomic type must match atomic type of data. :param null_values: The set of values that are considered null for each column of the data, if the data is categorical. Atomic type must match atomic type of data. :param weights: Optional. The weight of each category when imputing. Uniform weights are used if not specified. :param distribution: The distribution to be used when imputing records. Used only if `categories` is `None`. :param shift: The expectation of the Gaussian distribution to be used for imputation. Used only if `distribution` is `Gaussian`. :param scale: The standard deviation of the Gaussian distribution to be used for imputation. Used only if `distribution` is `Gaussian`. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Data with null values replaced by imputed values. """ return Component( "Impute", arguments={ 'data': Component.of(data), 'lower': Component.of(lower), 'upper': Component.of(upper), 'categories': Component.of(categories, value_format="jagged"), 'null_values': Component.of(null_values, value_format="jagged"), 'weights': Component.of(weights, value_format="jagged"), 'distribution': Component.of(distribution), 'shift': Component.of(shift), 'scale': Component.of(scale) }, options={ }, constraints=kwargs)
[docs]def index(data, names=None, indices=None, mask=None, **kwargs): """ Index Component Index into data frames, partitions and arrays to retrieve homogeneously typed contiguous arrays :param data: :param names: :param indices: :param mask: :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "Index", arguments={ 'data': Component.of(data), 'names': Component.of(names), 'indices': Component.of(indices), 'mask': Component.of(mask) }, options={ }, constraints=kwargs)
[docs]def laplace_mechanism(data, sensitivity=None, privacy_usage=None, **kwargs): """ LaplaceMechanism Component Privatizes a result by returning it perturbed with Laplace noise. :param data: True value to be released privately via the Laplace mechanism. :param sensitivity: Override the sensitivity computed by the library. Rejected unless `protect_sensitivity` is disabled. :param privacy_usage: Object describing the type and amount of privacy to be used for the mechanism release. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Original data perturbed with Laplace noise. """ return Component( "LaplaceMechanism", arguments={ 'data': Component.of(data), 'sensitivity': Component.of(sensitivity) }, options={ 'privacy_usage': serialize_privacy_usage(privacy_usage) }, constraints=kwargs)
[docs]def less_than(left, right, **kwargs): """ LessThan Component :param left: Atomic type must be numeric, and match with atomic type of right. :param right: Atomic type must be numeric, and match with atomic type of left. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "LessThan", arguments={ 'left': Component.of(left), 'right': Component.of(right) }, options={ }, constraints=kwargs)
[docs]def literal(**kwargs): """ Literal Component :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "Literal", arguments={ }, options={ }, constraints=kwargs)
[docs]def log(data, base=2.71828, **kwargs): """ Log Component :param data: Atomic type must be float. :param base: :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "Log", arguments={ 'data': Component.of(data), 'base': Component.of(base) }, options={ }, constraints=kwargs)
[docs]def map(arguments, component): """ Map Component Apply Component to each data partition. :param arguments: dictionary of arguments to supply to the function :param component: :return: """ return Component( "Map", arguments=arguments, options={ 'component': component }, constraints=None)
[docs]def materialize(column_names, file_path, public=False, skip_row=True, **kwargs): """ Materialize Component Load a tabular frame from a data source :param column_names: :param public: :param skip_row: when set, skip the first line (header) in a csv :param file_path: Path to the file on the system. File format must be CSV. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "Materialize", arguments={ 'column_names': Component.of(column_names) }, options={ 'public': public, 'skip_row': skip_row, 'file_path': file_path }, constraints=kwargs)
[docs]def maximum(data, candidates=None, **kwargs): """ Maximum Component Find the maximum value of each column in the data. :param data: Data for which you want the maximum value in each column. :param candidates: Set from which the Exponential mechanism will return an element. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Maximum of each column in the data. """ return Component( "Maximum", arguments={ 'data': Component.of(data), 'candidates': Component.of(candidates) }, options={ }, constraints=kwargs)
[docs]def mean(data, **kwargs): """ Mean Component Calculates the arithmetic mean of each column in the provided data. :param data: :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Arithmetic mean for each column of the data in question. """ return Component( "Mean", arguments={ 'data': Component.of(data) }, options={ }, constraints=kwargs)
[docs]def median(data, candidates=None, **kwargs): """ Median Component Find the median value of each column in the data. :param data: Data for which you want the median value in each column. :param candidates: Set from which to compute scores for the Exponential mechanism. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Median of each column in the data. """ return Component( "Median", arguments={ 'data': Component.of(data), 'candidates': Component.of(candidates) }, options={ }, constraints=kwargs)
[docs]def minimum(data, candidates=None, **kwargs): """ Minimum Component Find the minimum value of each column in the data. :param data: Data for which you want the maximum value in each column. :param candidates: Set from which the Exponential mechanism will return an element. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Maximum of each column in the data. """ return Component( "Minimum", arguments={ 'data': Component.of(data), 'candidates': Component.of(candidates) }, options={ }, constraints=kwargs)
[docs]def modulo(left, right, **kwargs): """ Modulo Component :param left: Atomic type must be numeric. Atomic type must match right. :param right: Atomic type must be numeric. Atomic type must match left. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "Modulo", arguments={ 'left': Component.of(left), 'right': Component.of(right) }, options={ }, constraints=kwargs)
[docs]def multiply(left, right, **kwargs): """ Multiply Component :param left: Atomic type must be numeric. Atomic type must match right. :param right: Atomic type must be numeric. Atomic type must match left. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "Multiply", arguments={ 'left': Component.of(left), 'right': Component.of(right) }, options={ }, constraints=kwargs)
[docs]def negate(data, **kwargs): """ Negate Component :param data: Atomic type must be boolean. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "Negate", arguments={ 'data': Component.of(data) }, options={ }, constraints=kwargs)
[docs]def negative(data, **kwargs): """ Negative Component :param data: Atomic type must be numeric. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "Negative", arguments={ 'data': Component.of(data) }, options={ }, constraints=kwargs)
[docs]def logical_or(left, right, **kwargs): """ Or Component left and right arguments must share the same data types :param left: :param right: :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "Or", arguments={ 'left': Component.of(left), 'right': Component.of(right) }, options={ }, constraints=kwargs)
[docs]def partition(data, num_partitions=None, by=None, **kwargs): """ Partition Component Split the rows of data into either k equally sized partitions, or by the categories of a vector :param data: Must be a dataframe or an array :param num_partitions: :param by: :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "Partition", arguments={ 'data': Component.of(data), 'num_partitions': Component.of(num_partitions), 'by': Component.of(by) }, options={ }, constraints=kwargs)
[docs]def power(data, radical, **kwargs): """ Power Component :param data: Atomic types must be numeric and homogenous. :param radical: Atomic values may not be negative. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "Power", arguments={ 'data': Component.of(data), 'radical': Component.of(radical) }, options={ }, constraints=kwargs)
[docs]def quantile(data, alpha, candidates=None, interpolation="midpoint", **kwargs): """ Quantile Component Get values corresponding to specified quantiles for each column of the data. :param data: Atomic type must be numeric. :param candidates: Set from which the Exponential mechanism will return an element. Type must match with atomic type of data. This value must be column-conformable with data. :param alpha: Desired quantiles, defined on `[0,1]`. Examples: 0: min, 0.5: median, 1: max :param interpolation: Interpolation strategy. One of [`lower`, `upper`, `midpoint`, `nearest`, `linear`] :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Quantile values for each column. """ return Component( "Quantile", arguments={ 'data': Component.of(data), 'candidates': Component.of(candidates) }, options={ 'alpha': alpha, 'interpolation': interpolation }, constraints=kwargs)
[docs]def raw_moment(data, order, **kwargs): """ RawMoment Component Returns sample estimate of kth raw moment for each column of the data. :param data: Data for which you would like the kth raw moments. Atomic data type must be float. :param order: Indicate the kth integer statistical moment. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: kth raw sample moment for each column. """ return Component( "RawMoment", arguments={ 'data': Component.of(data) }, options={ 'order': order }, constraints=kwargs)
[docs]def reshape(data, shape, symmetric=False, layout='row', **kwargs): """ Reshape Component Reshapes a row vector into a matrix. :param data: Vector of data to stack into a matrix. A Indexmap of matrices will be emitted if multiple rows are provided. :param symmetric: Set if data are elements from the upper triangle of a symmetric matrix. :param layout: Consecutive elements of either the `row` or `column` reside next to each other. Note that multi-row inputs are reshaped to partitional outputs, having one matrix per partition. :param shape: The shape of the output matrix. Dimensionality may not be greater than 2. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Reshape of data. """ return Component( "Reshape", arguments={ 'data': Component.of(data) }, options={ 'symmetric': symmetric, 'layout': layout, 'shape': shape }, constraints=kwargs)
[docs]def resize(data, number_rows=None, number_columns=None, lower=None, upper=None, categories=None, weights=None, distribution=None, shift=None, scale=None, sample_proportion=None, minimum_rows=None, **kwargs): """ Resize Component Resizes the data in question to be consistent with a provided sample size, `n`. The library does not, in general, assume that the sample size of the data being analyzed is known. This introduces a number of problems around how to calculate statistics that are a function of the sample size. To address this problem, the library asks the user to provide `n`, an estimate of the true sample size based on their own beliefs about the data or a previous differentially private count of the number of rows in the data. This component then either subsamples or appends to the data in order to make it consistent with the provided `n`. Note that lower/upper/categorical arguments must be provided, or lower/upper/categorical properties must be known on data. Note that if using categories constraint, data are treated as categorical regardless of atomic type. :param data: The data to be resized. Atomic type of data must match atomic type of categories. If categories not populated, data are treated as numeric and any necessary imputation is done according to a continuous distribution. :param number_rows: An estimate of the number of rows in the data. This could be the guess of the user, or the result of a DP release. Cannot be set with minimum_rows. :param number_columns: An estimate of the number of columns in the data. This must be the guess of the user, if not previously known (optional). A non-empty value must be positive. A non-empty value is incompatiable with an attempt to resize number of columns and results in an error. :param lower: A lower bound on data elements for each column. This value must be less than upper. :param upper: An upper bound on data elements for each column. This value must be greater than lower. :param categories: The set of categories you want to be represented for each column of the data, if the data is categorical. Atomic type of data must match atomic type of categories. :param weights: Optional. The weight of each category when imputing. Uniform weights are used if not specified. :param distribution: The distribution to be used when imputing records. :param shift: The expectation of the Gaussian distribution used for imputation (used only if `distribution = Gaussian`). :param scale: The standard deviation of the Gaussian distribution used for imputation (used only if `distribution = Gaussian`). :param sample_proportion: The proportion of underlying data that may be used to construct the new data. May be > 1. :param minimum_rows: Only add synthetic data if the actual row count is less than this number. No sampling is performed. Cannot be set with `number_rows` :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: A resized version of data consistent with the provided `n` """ return Component( "Resize", arguments={ 'data': Component.of(data), 'number_rows': Component.of(number_rows), 'number_columns': Component.of(number_columns), 'lower': Component.of(lower), 'upper': Component.of(upper), 'categories': Component.of(categories, value_format="jagged"), 'weights': Component.of(weights, value_format="jagged"), 'distribution': Component.of(distribution), 'shift': Component.of(shift), 'scale': Component.of(scale), 'sample_proportion': Component.of(sample_proportion), 'minimum_rows': Component.of(minimum_rows) }, options={ }, constraints=kwargs)
[docs]def row_max(left, right, **kwargs): """ RowMax Component Returns the maximum of the left and right arguments, per row. Note that left and right arguments must share the same data types. :param left: Member data type must match that of right. :param right: Member data type must match that of left. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "RowMax", arguments={ 'left': Component.of(left), 'right': Component.of(right) }, options={ }, constraints=kwargs)
[docs]def row_min(left, right, **kwargs): """ RowMin Component Returns the minimum of the left and right arguments, per row. Note that left and right arguments must share the same data types. :param left: Member data type must match that of right. :param right: Member data type must match that of left. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "RowMin", arguments={ 'left': Component.of(left), 'right': Component.of(right) }, options={ }, constraints=kwargs)
[docs]def simple_geometric_mechanism(data, lower=None, upper=None, sensitivity=None, privacy_usage=None, **kwargs): """ SimpleGeometricMechanism Component Privatizes a result by returning it perturbed with Geometric noise. :param data: Result to be released privately via the Geometric mechanism. Member data type must be integer. :param lower: Lower bound of the statistic to be privatized. Member data type must be integer. :param upper: Upper bound of the statistic to be privatized. Member data type must be integer. :param sensitivity: Override the sensitivity computed by the library. Rejected unless `protect_sensitivity` is disabled. :param privacy_usage: Object describing the type and amount of privacy to be used for the mechanism release. Values of zero or less, and values of greater than one, will result in warnings. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Original data perturbed with Geometric noise. """ return Component( "SimpleGeometricMechanism", arguments={ 'data': Component.of(data), 'lower': Component.of(lower), 'upper': Component.of(upper), 'sensitivity': Component.of(sensitivity) }, options={ 'privacy_usage': serialize_privacy_usage(privacy_usage) }, constraints=kwargs)
[docs]def snapping_mechanism(data, lower=None, upper=None, binding_probability=None, sensitivity=None, privacy_usage=None, **kwargs): """ SnappingMechanism Component Privatizes a result by returning it perturbed via the Snapping mechanism. This mechanism is generally intended for non-integer numerical data. Note that snapping may not operate on integers when floating-point protections are enabled. For this situation, use the geometric mechanism instead. :param data: Result to be released privately via the Snapping mechanism. Array members must be of type float or of type integer. :param lower: Estimated minimum possible value of the data. Only useful for the snapping mechanism. This argument is required. :param upper: Estimated maximum possible value of the statistic. Only useful for the snapping mechanism. This argument is required. :param binding_probability: Upper bound on probability that final clamp binds. Must be within [0, 1). :param sensitivity: Override the sensitivity computed by the library. Rejected unless `protect_sensitivity` is disabled. :param privacy_usage: Object describing the type and amount of privacy to be used for the mechanism release. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Original data perturbed via the Snapping mechanism. """ return Component( "SnappingMechanism", arguments={ 'data': Component.of(data), 'lower': Component.of(lower), 'upper': Component.of(upper), 'binding_probability': Component.of(binding_probability), 'sensitivity': Component.of(sensitivity) }, options={ 'privacy_usage': serialize_privacy_usage(privacy_usage) }, constraints=kwargs)
[docs]def subtract(left, right, **kwargs): """ Subtract Component Mathematical subtraction. Value types of arguments must match. :param left: Value from which to subtract. Must be of type float or integer. :param right: Value which to subtract. Must be of type float or integer. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "Subtract", arguments={ 'left': Component.of(left), 'right': Component.of(right) }, options={ }, constraints=kwargs)
[docs]def sum(data, **kwargs): """ Sum Component Calculates the sum of each column of the data. Data must be of type float or integer. :param data: Data for which you want the sum of each column. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Sum of each column of the data. """ return Component( "Sum", arguments={ 'data': Component.of(data) }, options={ }, constraints=kwargs)
[docs]def theil_sen(data_x, data_y, implementation="theil-sen-k-match", k=0, **kwargs): """ TheilSen Component Returns slope and intercept estimates for point pairs :param data_x: value(s) from the first coordinate axis :param data_y: value(s) from the second coordinate axis :param implementation: Theil-Sen implementation to use. One of [`theil-sen`, `theil-sen-k-match`] :param k: Number of trials to run for Theil-Sen K Match. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: All slope and intercept estimates for point pairs """ return Component( "TheilSen", arguments={ 'data_x': Component.of(data_x), 'data_y': Component.of(data_y) }, options={ 'implementation': implementation, 'k': k }, constraints=kwargs)
[docs]def to_bool(data, true_label, **kwargs): """ ToBool Component Cast data to a bool atomic type. :param data: Data to be cast to Boolean type. :param true_label: Positive class (class to be mapped to `true`) for each column. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: """ return Component( "ToBool", arguments={ 'data': Component.of(data), 'true_label': Component.of(true_label) }, options={ }, constraints=kwargs)
[docs]def to_dataframe(data, names, **kwargs): """ ToDataframe Component Name columns of an array to produce a Dataframe with the specified names. Typically used when partitioning a dataframe with preprocessed columns. :param data: ndarray (structured or homogeneous), Iterable, dict, or DataFrame :param names: Column labels to use for resulting frame. Will default to RangeIndex (0, 1, 2, …, n) if no column labels are provided. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Dataframe in target language, for example <a href="https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html">pandas.DataFrame</a>. """ return Component( "ToDataframe", arguments={ 'data': Component.of(data), 'names': Component.of(names) }, options={ }, constraints=kwargs)
[docs]def to_float(data, **kwargs): """ ToFloat Component Cast data to a float atomic type. :param data: Data to be cast to float. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Array containing the converted float value(s). """ return Component( "ToFloat", arguments={ 'data': Component.of(data) }, options={ }, constraints=kwargs)
[docs]def to_int(data, lower, upper, **kwargs): """ ToInt Component Cast data to a int atomic type. :param data: Data to be cast to integer type. :param lower: Minimum allowable imputation value. Integers cannot represent null, so values that cannot be parsed are imputed. :param upper: Maximum allowable imputation value. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Array containing the converted integer value(s). """ return Component( "ToInt", arguments={ 'data': Component.of(data), 'lower': Component.of(lower), 'upper': Component.of(upper) }, options={ }, constraints=kwargs)
[docs]def to_string(data, **kwargs): """ ToString Component Cast data to a string atomic type. :param data: Data to be cast to string type. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: The return is the result of the to_string on the arguments. """ return Component( "ToString", arguments={ 'data': Component.of(data) }, options={ }, constraints=kwargs)
[docs]def union(arguments, flatten=True): """ Union Component Union the arrays in the arguments into one array. :param arguments: dictionary of arguments to supply to the function :param flatten: When set, the output is an array. When unset, the output is an indexmap of arrays. :return: Array (or indexmap of arrays) containing item(s) representing the concatenation of all partitions """ return Component( "Union", arguments=arguments, options={ 'flatten': flatten }, constraints=None)
[docs]def variance(data, finite_sample_correction=True, **kwargs): """ Variance Component Calculates the sample variance for each column of the data. :param data: :param finite_sample_correction: Whether or not to use the finite sample correction (Bessel's correction) to correct the bias in the estimation of the population variance. :param kwargs: data bounds of the form [argument]_[bound]=[lower | upper | categories | ...] :return: Sample variance for each column of the data. """ return Component( "Variance", arguments={ 'data': Component.of(data) }, options={ 'finite_sample_correction': finite_sample_correction }, constraints=kwargs)