python/dp_accounting/common.py

# Copyright 2020 Google LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Common classes and functions for the accounting library."""

import math
import typing

import dataclasses
import numpy as np
from scipy import fft
from scipy import signal


@dataclasses.dataclass
class DifferentialPrivacyParameters(object):
  """Representation of the differential privacy parameters of a mechanism.

  Attributes:
    epsilon: the epsilon in (epsilon, delta)-differential privacy.
    delta: the delta in (epsilon, delta)-differential privacy.
  """
  epsilon: float
  delta: float = 0

  def __post_init__(self):
    if self.epsilon < 0:
      raise ValueError(f'epsilon should be positive: {self.epsilon}')
    if self.delta < 0 or self.delta > 1:
      raise ValueError(f'delta should be between 0 and 1: {self.delta}')


@dataclasses.dataclass
class BinarySearchParameters(object):
  """Parameters used for binary search.

  Attributes:
    upper_bound: An upper bound on the binary search range.
    lower_bound: A lower bound on the binary search range.
    initial_guess: An initial guess to start the search with. Must be positive.
      When this guess is close to the true value, it can help make the binary
      search faster.
    tolerance: An acceptable error on the returned value.
    discrete: Whether the search is over integers.
  """
  lower_bound: float
  upper_bound: float
  initial_guess: typing.Optional[float] = None
  tolerance: float = 1e-7
  discrete: bool = False


def inverse_monotone_function(
    func: typing.Callable[[float], float],
    value: float,
    search_parameters: BinarySearchParameters,
    increasing: bool = False) -> typing.Optional[float]:
  """Inverse a monotone function.

  Args:
    func: The function to be inversed.
    value: The desired value of the function.
    search_parameters: Parameters used for binary search.
    increasing: Whether the function is monotonically increasing.

  Returns:
    x such that func(x) is no more than value, when such x exists. It is
    guaranteed that the returned x is within search_parameters.tolerance of the
    smallest (for monotonically decreasing func) or the largest (for
    monotonically increasing func) such x. When no such x exists within the
    given range, returns None.
  """
  lower_x = search_parameters.lower_bound
  upper_x = search_parameters.upper_bound
  initial_guess_x = search_parameters.initial_guess

  if increasing:
    check = lambda func_value, target_value: func_value <= target_value
    if lower_x != -math.inf and func(lower_x) > value:
      return None
  else:
    check = lambda func_value, target_value: func_value > target_value
    if upper_x != math.inf and func(upper_x) > value:
      return None

  if initial_guess_x is not None:
    while initial_guess_x < upper_x and check(func(initial_guess_x), value):
      lower_x = initial_guess_x
      initial_guess_x *= 2
    upper_x = min(upper_x, initial_guess_x)

  if search_parameters.discrete:
    tolerance = 1
  else:
    tolerance = search_parameters.tolerance

  while upper_x - lower_x > tolerance:
    if search_parameters.discrete:
      mid_x = (upper_x + lower_x) // 2
    else:
      mid_x = (upper_x + lower_x) / 2

    if check(func(mid_x), value):
      lower_x = mid_x
    else:
      upper_x = mid_x

  if increasing:
    return lower_x
  else:
    return upper_x


def dictionary_to_list(
    input_dictionary: typing.Mapping[int, float]
) -> typing.Tuple[int, typing.List[float]]:
  """Converts an integer-keyed dictionary into an list.

  Args:
    input_dictionary: A dictionary whose keys are integers.

  Returns:
    A tuple of an integer offset and a list result_list. The offset is the
    minimum value of the input dictionary. result_list has length equal to the
    difference between the maximum and minimum values of the input dictionary.
    result_list[i] is equal to dictionary[offset + i] and is zero if offset + i
    is not a key in the input dictionary.
  """
  offset = min(input_dictionary)
  max_val = max(input_dictionary)
  result_list = [input_dictionary.get(i, 0) for i in range(offset, max_val + 1)]
  return (offset, result_list)


def list_to_dictionary(
    input_list: typing.List[float],
    offset: int,
    tail_mass_truncation: float = 0) -> typing.Mapping[int, float]:
  """Converts a list into an integer-keyed dictionary, with a specified offset.

  Args:
    input_list: An input list.
    offset: The offset in the key of the output dictionary
    tail_mass_truncation: an upper bound on the tails of the input list that
      might be truncated.

  Returns:
    A dictionary whose value at key is equal to input_list[key - offset]. If
    input_list[key - offset] is less than or equal to zero, it is not included
    in the dictionary.
  """
  lower_truncation_index = 0
  lower_truncation_mass = 0
  while lower_truncation_index < len(input_list):
    lower_truncation_mass += input_list[lower_truncation_index]
    if lower_truncation_mass > tail_mass_truncation / 2:
      break
    lower_truncation_index += 1

  upper_truncation_index = len(input_list) - 1
  upper_truncation_mass = 0
  while upper_truncation_index >= 0:
    upper_truncation_mass += input_list[upper_truncation_index]
    if upper_truncation_mass > tail_mass_truncation / 2:
      break
    upper_truncation_index -= 1

  result_dictionary = {}
  for i in range(lower_truncation_index, upper_truncation_index + 1):
    if input_list[i] > 0:
      result_dictionary[i + offset] = input_list[i]
  return result_dictionary


def convolve_dictionary(
    dictionary1: typing.Mapping[int, float],
    dictionary2: typing.Mapping[int, float],
    tail_mass_truncation: float = 0) -> typing.Mapping[int, float]:
  """Computes a convolution of two dictionaries.

  Args:
    dictionary1: The first dictionary whose keys are integers.
    dictionary2: The second dictionary whose keys are integers.
    tail_mass_truncation: an upper bound on the tails of the output that might
      be truncated.

  Returns:
    The dictionary where for each key its corresponding value is the sum, over
    all key1, key2 such that key1 + key2 = key, of dictionary1[key1] times
    dictionary2[key2]
  """

  # Convert the dictionaries to lists.
  min1, list1 = dictionary_to_list(dictionary1)
  min2, list2 = dictionary_to_list(dictionary2)

  # Compute the convolution of the two lists.
  result_list = signal.fftconvolve(list1, list2)

  # Convert the list back to a dictionary and return
  return list_to_dictionary(
      result_list, min1 + min2, tail_mass_truncation=tail_mass_truncation)


def compute_self_convolve_bounds(
    input_list: typing.List[float],
    num_times: int,
    tail_mass_truncation: float = 0,
    orders: typing.Optional[typing.List[float]] = None
) -> typing.Tuple[int, int]:
  """Computes truncation bounds for convolution using Chernoff bound.

  Args:
    input_list: The input list to be convolved.
    num_times: The number of times the list is to be convolved with itself.
    tail_mass_truncation: an upper bound on the tails of the output that might
      be truncated.
    orders: a list of orders on which the Chernoff bound is applied.

  Returns:
    A pair of upper and lower bounds for which the mass of the result of
    convolution outside of this range is at most tail_mass_truncation.
  """
  upper_bound = (len(input_list) - 1) * num_times
  lower_bound = 0

  if tail_mass_truncation == 0:
    return lower_bound, upper_bound

  if orders is None:
    # Set orders so whose absolute values are not too large; otherwise, we may
    # run into numerical issues.
    orders = (np.concatenate((np.arange(-20, 0), np.arange(1, 21)))
              / len(input_list))

  # Compute log of the moment generating function at the specified orders.
  log_mgfs = np.log([
      np.dot(np.exp(np.arange(len(input_list)) * order), input_list)
      for order in orders
  ])

  for order, log_mgf_value in zip(orders, log_mgfs):
    # Use Chernoff bound to update the upper/lower bound. See equation (5) in
    # the supplementary material.
    bound = (num_times * log_mgf_value +
             math.log(2 / tail_mass_truncation)) / order
    if order > 0:
      upper_bound = min(upper_bound, math.ceil(bound))
    if order < 0:
      lower_bound = max(lower_bound, math.floor(bound))

  return lower_bound, upper_bound


def self_convolve(
    input_list: typing.List[float],
    num_times: int,
    tail_mass_truncation: float = 0) -> typing.Tuple[int, typing.List[float]]:
  """Computes a convolution of the input list with itself num_times times.

  Args:
    input_list: The input list to be convolved.
    num_times: The number of times the list is to be convolved with itself.
    tail_mass_truncation: an upper bound on the tails of the output that might
      be truncated.

  Returns:
    A pair of truncation_lower_bound, output_list, where the i-th entry of
    output_list is approximately the sum, over all i_1, i_2, ..., i_num_times
    such that i_1 + i_2 + ... + i_num_times = i + truncation_lower_bound,
    of input_list[i_1] * input_list[i_2] * ... * input_list[i_num_times].
  """
  truncation_lower_bound, truncation_upper_bound = compute_self_convolve_bounds(
      input_list, num_times, tail_mass_truncation)

  # Use FFT to compute the convolution
  fast_len = fft.next_fast_len(truncation_upper_bound -
                                          truncation_lower_bound + 1)
  truncated_convolution_output = np.real(
      fft.ifft(fft.fft(input_list, fast_len)**num_times))

  # Discrete Fourier Transform wraps around module fast_len. Extract the output
  # values in the range of interest.
  output_list = [
      truncated_convolution_output[i % fast_len]
      for i in range(truncation_lower_bound, truncation_upper_bound + 1)
  ]

  return truncation_lower_bound, output_list


def self_convolve_dictionary(
    input_dictionary: typing.Mapping[int, float],
    num_times: int,
    tail_mass_truncation: float = 0) -> typing.Mapping[int, float]:
  """Computes a convolution of the input dictionary with itself num_times times.

  Args:
    input_dictionary: The input dictionary whose keys are integers.
    num_times: The number of times the dictionary is to be convolved with
      itself.
    tail_mass_truncation: an upper bound on the tails of the output that might
      be truncated.

  Returns:
    The dictionary where for each key its corresponding value is the sum, over
    all key1, key2, ..., key_num_times such that key1 + key2 + ... +
    key_num_times = key, of input_dictionary[key1] * input_dictionary[key2] *
    ... * input_dictionary[key_num_times]
  """
  min_val, input_list = dictionary_to_list(input_dictionary)
  min_val_convolution, output_list = self_convolve(
      input_list, num_times, tail_mass_truncation=tail_mass_truncation)
  return list_to_dictionary(output_list,
                            min_val * num_times + min_val_convolution)