diff --git a/.gitignore b/.gitignore index 3c70e2d..dafb06e 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,8 @@ fawkes.egg-info .coverage .cache .pytest_cache +fawkes_dev/api_key.txt +fawkes_dev/protect_personId.txt # developer environments .idea diff --git a/build/lib/fawkes/__init__.py b/build/lib/fawkes/__init__.py deleted file mode 100644 index 1c4f29b..0000000 --- a/build/lib/fawkes/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -# -*- coding: utf-8 -*- -# @Date : 2020-07-01 -# @Author : Shawn Shan (shansixiong@cs.uchicago.edu) -# @Link : https://www.shawnshan.com/ - - -__version__ = '0.0.2' - -from .differentiator import FawkesMaskGeneration -from .utils import load_extractor, init_gpu, select_target_label, dump_image, reverse_process_cloaked, \ - Faces -from .protection import main -import logging -import sys -import os -logging.getLogger('tensorflow').disabled = True - - -__all__ = ( - '__version__', - 'FawkesMaskGeneration', 'load_extractor', - 'init_gpu', - 'select_target_label', 'dump_image', 'reverse_process_cloaked', 'Faces', 'main' -) \ No newline at end of file diff --git a/build/lib/fawkes/differentiator.py b/build/lib/fawkes/differentiator.py deleted file mode 100644 index 98a46e0..0000000 --- a/build/lib/fawkes/differentiator.py +++ /dev/null @@ -1,449 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# @Date : 2020-05-17 -# @Author : Shawn Shan (shansixiong@cs.uchicago.edu) -# @Link : https://www.shawnshan.com/ - -import datetime -import time -from decimal import Decimal - -import numpy as np -import tensorflow as tf -from .utils import preprocess, reverse_preprocess - - -class FawkesMaskGeneration: - # if the attack is trying to mimic a target image or a neuron vector - MIMIC_IMG = True - # number of iterations to perform gradient descent - MAX_ITERATIONS = 10000 - # larger values converge faster to less accurate results - LEARNING_RATE = 1e-2 - # the initial constant c to pick as a first guess - INITIAL_CONST = 1 - # pixel intensity range - INTENSITY_RANGE = 'imagenet' - # threshold for distance - L_THRESHOLD = 0.03 - # whether keep the final result or the best result - KEEP_FINAL = False - # max_val of image - MAX_VAL = 255 - # The following variables are used by DSSIM, should keep as default - # filter size in SSIM - FILTER_SIZE = 11 - # filter sigma in SSIM - FILTER_SIGMA = 1.5 - # weights used in MS-SSIM - SCALE_WEIGHTS = None - MAXIMIZE = False - IMAGE_SHAPE = (224, 224, 3) - RATIO = 1.0 - LIMIT_DIST = False - - def __init__(self, sess, bottleneck_model_ls, mimic_img=MIMIC_IMG, - batch_size=1, learning_rate=LEARNING_RATE, - max_iterations=MAX_ITERATIONS, initial_const=INITIAL_CONST, - intensity_range=INTENSITY_RANGE, l_threshold=L_THRESHOLD, - max_val=MAX_VAL, keep_final=KEEP_FINAL, maximize=MAXIMIZE, image_shape=IMAGE_SHAPE, - verbose=0, ratio=RATIO, limit_dist=LIMIT_DIST, faces=None): - - assert intensity_range in {'raw', 'imagenet', 'inception', 'mnist'} - - # constant used for tanh transformation to avoid corner cases - self.tanh_constant = 2 - 1e-6 - self.sess = sess - self.MIMIC_IMG = mimic_img - self.LEARNING_RATE = learning_rate - self.MAX_ITERATIONS = max_iterations - self.initial_const = initial_const - self.batch_size = batch_size - self.intensity_range = intensity_range - self.l_threshold = l_threshold - self.max_val = max_val - self.keep_final = keep_final - self.verbose = verbose - self.maximize = maximize - self.learning_rate = learning_rate - self.ratio = ratio - self.limit_dist = limit_dist - self.single_shape = list(image_shape) - self.faces = faces - - self.input_shape = tuple([self.batch_size] + self.single_shape) - - self.bottleneck_shape = tuple([self.batch_size] + self.single_shape) - # self.bottleneck_shape = tuple([self.batch_size, bottleneck_model_ls[0].output_shape[-1]]) - - # the variable we're going to optimize over - self.modifier = tf.Variable(np.zeros(self.input_shape, dtype=np.float32)) - - # target image in tanh space - if self.MIMIC_IMG: - self.timg_tanh = tf.Variable(np.zeros(self.input_shape), dtype=np.float32) - else: - self.bottleneck_t_raw = tf.Variable(np.zeros(self.bottleneck_shape), dtype=np.float32) - # source image in tanh space - self.simg_tanh = tf.Variable(np.zeros(self.input_shape), dtype=np.float32) - - self.const = tf.Variable(np.ones(batch_size), dtype=np.float32) - self.mask = tf.Variable(np.ones((batch_size), dtype=np.bool)) - self.weights = tf.Variable(np.ones(self.bottleneck_shape, - dtype=np.float32)) - - # and here's what we use to assign them - self.assign_modifier = tf.placeholder(tf.float32, self.input_shape) - if self.MIMIC_IMG: - self.assign_timg_tanh = tf.placeholder( - tf.float32, self.input_shape) - else: - self.assign_bottleneck_t_raw = tf.placeholder( - tf.float32, self.bottleneck_shape) - self.assign_simg_tanh = tf.placeholder(tf.float32, self.input_shape) - self.assign_const = tf.placeholder(tf.float32, (batch_size)) - self.assign_mask = tf.placeholder(tf.bool, (batch_size)) - self.assign_weights = tf.placeholder(tf.float32, self.bottleneck_shape) - - # the resulting image, tanh'd to keep bounded from -0.5 to 0.5 - # adversarial image in raw space - self.aimg_raw = (tf.tanh(self.modifier + self.simg_tanh) / - self.tanh_constant + - 0.5) * 255.0 - # source image in raw space - self.simg_raw = (tf.tanh(self.simg_tanh) / - self.tanh_constant + - 0.5) * 255.0 - if self.MIMIC_IMG: - # target image in raw space - self.timg_raw = (tf.tanh(self.timg_tanh) / - self.tanh_constant + - 0.5) * 255.0 - - # convert source and adversarial image into input space - if self.intensity_range == 'imagenet': - mean = tf.constant(np.repeat([[[[103.939, 116.779, 123.68]]]], self.batch_size, axis=0), dtype=tf.float32, - name='img_mean') - self.aimg_input = (self.aimg_raw[..., ::-1] - mean) - self.simg_input = (self.simg_raw[..., ::-1] - mean) - if self.MIMIC_IMG: - self.timg_input = (self.timg_raw[..., ::-1] - mean) - - elif self.intensity_range == 'raw': - self.aimg_input = self.aimg_raw - self.simg_input = self.simg_raw - if self.MIMIC_IMG: - self.timg_input = self.timg_raw - - def batch_gen_DSSIM(aimg_raw_split, simg_raw_split): - msssim_split = tf.image.ssim(aimg_raw_split, simg_raw_split, max_val=255.0) - dist = (1.0 - tf.stack(msssim_split)) / 2.0 - # dist = tf.square(aimg_raw_split - simg_raw_split) - return dist - - # raw value of DSSIM distance - self.dist_raw = batch_gen_DSSIM(self.aimg_raw, self.simg_raw) - # distance value after applying threshold - self.dist = tf.maximum(self.dist_raw - self.l_threshold, 0.0) - # self.dist = self.dist_raw - self.dist_raw_sum = tf.reduce_sum( - tf.where(self.mask, - self.dist_raw, - tf.zeros_like(self.dist_raw))) - self.dist_sum = tf.reduce_sum(tf.where(self.mask, self.dist, tf.zeros_like(self.dist))) - - def resize_tensor(input_tensor, model_input_shape): - if input_tensor.shape[1:] == model_input_shape or model_input_shape[1] is None: - return input_tensor - resized_tensor = tf.image.resize(input_tensor, model_input_shape[:2]) - return resized_tensor - - def calculate_direction(bottleneck_model, cur_timg_input, cur_simg_input): - target_features = bottleneck_model(cur_timg_input) - return target_features - - self.bottlesim = 0.0 - self.bottlesim_sum = 0.0 - self.bottlesim_push = 0.0 - for bottleneck_model in bottleneck_model_ls: - model_input_shape = bottleneck_model.input_shape[1:] - cur_aimg_input = resize_tensor(self.aimg_input, model_input_shape) - - self.bottleneck_a = bottleneck_model(cur_aimg_input) - if self.MIMIC_IMG: - cur_timg_input = self.timg_input - cur_simg_input = self.simg_input - self.bottleneck_t = calculate_direction(bottleneck_model, cur_timg_input, cur_simg_input) - else: - self.bottleneck_t = self.bottleneck_t_raw - - bottleneck_diff = self.bottleneck_t - self.bottleneck_a - - scale_factor = tf.sqrt(tf.reduce_sum(tf.square(self.bottleneck_t), axis=1)) - - cur_bottlesim = tf.sqrt(tf.reduce_sum(tf.square(bottleneck_diff), axis=1)) - cur_bottlesim = cur_bottlesim / scale_factor - cur_bottlesim_sum = tf.reduce_sum(cur_bottlesim) - - self.bottlesim += cur_bottlesim - - self.bottlesim_sum += cur_bottlesim_sum - - # sum up the losses - if self.maximize: - self.loss = self.const * tf.square(self.dist) - self.bottlesim - else: - self.loss = self.const * tf.square(self.dist) + self.bottlesim - - self.loss_sum = tf.reduce_sum(tf.where(self.mask, - self.loss, - tf.zeros_like(self.loss))) - - start_vars = set(x.name for x in tf.global_variables()) - self.learning_rate_holder = tf.placeholder(tf.float32, shape=[]) - - optimizer = tf.train.AdadeltaOptimizer(self.learning_rate_holder) - # optimizer = tf.train.AdamOptimizer(self.learning_rate_holder) - - self.train = optimizer.minimize(self.loss_sum, var_list=[self.modifier]) - end_vars = tf.global_variables() - new_vars = [x for x in end_vars if x.name not in start_vars] - - # these are the variables to initialize when we run - self.setup = [] - self.setup.append(self.modifier.assign(self.assign_modifier)) - if self.MIMIC_IMG: - self.setup.append(self.timg_tanh.assign(self.assign_timg_tanh)) - else: - self.setup.append(self.bottleneck_t_raw.assign( - self.assign_bottleneck_t_raw)) - self.setup.append(self.simg_tanh.assign(self.assign_simg_tanh)) - self.setup.append(self.const.assign(self.assign_const)) - self.setup.append(self.mask.assign(self.assign_mask)) - self.setup.append(self.weights.assign(self.assign_weights)) - - self.init = tf.variables_initializer(var_list=[self.modifier] + new_vars) - - print('Attacker loaded') - - def preprocess_arctanh(self, imgs): - - imgs = reverse_preprocess(imgs, self.intensity_range) - imgs /= 255.0 - imgs -= 0.5 - imgs *= self.tanh_constant - tanh_imgs = np.arctanh(imgs) - - return tanh_imgs - - def clipping(self, imgs): - - imgs = reverse_preprocess(imgs, self.intensity_range) - imgs = np.clip(imgs, 0, self.max_val) - imgs = preprocess(imgs, self.intensity_range) - - return imgs - - def attack(self, source_imgs, target_imgs, weights=None): - - if weights is None: - weights = np.ones([source_imgs.shape[0]] + - list(self.bottleneck_shape[1:])) - - assert weights.shape[1:] == self.bottleneck_shape[1:] - assert source_imgs.shape[1:] == self.input_shape[1:] - assert source_imgs.shape[0] == weights.shape[0] - if self.MIMIC_IMG: - assert target_imgs.shape[1:] == self.input_shape[1:] - assert source_imgs.shape[0] == target_imgs.shape[0] - else: - assert target_imgs.shape[1:] == self.bottleneck_shape[1:] - assert source_imgs.shape[0] == target_imgs.shape[0] - - start_time = time.time() - - adv_imgs = [] - print('%d batches in total' - % int(np.ceil(len(source_imgs) / self.batch_size))) - - for idx in range(0, len(source_imgs), self.batch_size): - print('processing batch %d at %s' % (idx, datetime.datetime.now())) - adv_img = self.attack_batch(source_imgs[idx:idx + self.batch_size], - target_imgs[idx:idx + self.batch_size], - weights[idx:idx + self.batch_size]) - adv_imgs.extend(adv_img) - - elapsed_time = time.time() - start_time - print('attack cost %f s' % (elapsed_time)) - - return np.array(adv_imgs) - - def attack_batch(self, source_imgs, target_imgs, weights): - - """ - Run the attack on a batch of images and labels. - """ - - LR = self.learning_rate - nb_imgs = source_imgs.shape[0] - mask = [True] * nb_imgs + [False] * (self.batch_size - nb_imgs) - # mask = [True] * self.batch_size - mask = np.array(mask, dtype=np.bool) - - source_imgs = np.array(source_imgs) - target_imgs = np.array(target_imgs) - - # convert to tanh-space - simg_tanh = self.preprocess_arctanh(source_imgs) - if self.MIMIC_IMG: - timg_tanh = self.preprocess_arctanh(target_imgs) - else: - timg_tanh = target_imgs - - CONST = np.ones(self.batch_size) * self.initial_const - - self.sess.run(self.init) - simg_tanh_batch = np.zeros(self.input_shape) - if self.MIMIC_IMG: - timg_tanh_batch = np.zeros(self.input_shape) - else: - timg_tanh_batch = np.zeros(self.bottleneck_shape) - - weights_batch = np.zeros(self.bottleneck_shape) - simg_tanh_batch[:nb_imgs] = simg_tanh[:nb_imgs] - timg_tanh_batch[:nb_imgs] = timg_tanh[:nb_imgs] - weights_batch[:nb_imgs] = weights[:nb_imgs] - modifier_batch = np.ones(self.input_shape) * 1e-6 - - temp_images = [] - - # set the variables so that we don't have to send them over again - if self.MIMIC_IMG: - self.sess.run(self.setup, - {self.assign_timg_tanh: timg_tanh_batch, - self.assign_simg_tanh: simg_tanh_batch, - self.assign_const: CONST, - self.assign_mask: mask, - self.assign_weights: weights_batch, - self.assign_modifier: modifier_batch}) - else: - # if directly mimicking a vector, use assign_bottleneck_t_raw - # in setup - self.sess.run(self.setup, - {self.assign_bottleneck_t_raw: timg_tanh_batch, - self.assign_simg_tanh: simg_tanh_batch, - self.assign_const: CONST, - self.assign_mask: mask, - self.assign_weights: weights_batch, - self.assign_modifier: modifier_batch}) - - best_bottlesim = [0] * nb_imgs if self.maximize else [np.inf] * nb_imgs - best_adv = np.zeros_like(source_imgs) - - if self.verbose == 1: - loss_sum = float(self.sess.run(self.loss_sum)) - dist_sum = float(self.sess.run(self.dist_sum)) - thresh_over = (dist_sum / self.batch_size / self.l_threshold * 100) - dist_raw_sum = float(self.sess.run(self.dist_raw_sum)) - bottlesim_sum = self.sess.run(self.bottlesim_sum) - print('START: Total loss: %.4E; perturb: %.6f (%.2f%% over, raw: %.6f); sim: %f' - % (Decimal(loss_sum), - dist_sum, - thresh_over, - dist_raw_sum, - bottlesim_sum / nb_imgs)) - - finished_idx = set() - try: - total_distance = [0] * nb_imgs - - if self.limit_dist: - dist_raw_list, bottlesim_list, aimg_input_list = self.sess.run( - [self.dist_raw, - self.bottlesim, - self.aimg_input]) - for e, (dist_raw, bottlesim, aimg_input) in enumerate( - zip(dist_raw_list, bottlesim_list, aimg_input_list)): - if e >= nb_imgs: - break - total_distance[e] = bottlesim - - for iteration in range(self.MAX_ITERATIONS): - - self.sess.run([self.train], feed_dict={self.learning_rate_holder: LR}) - - dist_raw_list, bottlesim_list, aimg_input_list = self.sess.run( - [self.dist_raw, - self.bottlesim, - self.aimg_input]) - - all_clear = True - for e, (dist_raw, bottlesim, aimg_input) in enumerate( - zip(dist_raw_list, bottlesim_list, aimg_input_list)): - - if e in finished_idx: - continue - - if e >= nb_imgs: - break - if (bottlesim < best_bottlesim[e] and bottlesim > total_distance[e] * 0.1 and ( - not self.maximize)) or ( - bottlesim > best_bottlesim[e] and self.maximize): - best_bottlesim[e] = bottlesim - best_adv[e] = aimg_input - - # if iteration > 20 and (dist_raw >= self.l_threshold or iteration == self.MAX_ITERATIONS - 1): - # finished_idx.add(e) - # print("{} finished at dist {}".format(e, dist_raw)) - # best_bottlesim[e] = bottlesim - # best_adv[e] = aimg_input - # - all_clear = False - - if all_clear: - break - - if iteration != 0 and iteration % (self.MAX_ITERATIONS // 2) == 0: - LR = LR / 2 - print("Learning Rate: ", LR) - - if iteration % (self.MAX_ITERATIONS // 5) == 0: - if self.verbose == 1: - dist_raw_sum = float(self.sess.run(self.dist_raw_sum)) - bottlesim_sum = self.sess.run(self.bottlesim_sum) - print('ITER %4d perturb: %.5f; sim: %f' - % (iteration, dist_raw_sum / nb_imgs, bottlesim_sum / nb_imgs)) - - # protected_images = aimg_input_list - # - # orginal_images = np.copy(self.faces.cropped_faces) - # cloak_perturbation = reverse_process_cloaked(protected_images) - reverse_process_cloaked( - # orginal_images) - # final_images = self.faces.merge_faces(cloak_perturbation) - # - # for p_img, img in zip(protected_images, final_images): - # dump_image(reverse_process_cloaked(p_img), - # "/home/shansixioing/fawkes/data/emily/emily_cloaked_cropped{}.png".format(iteration), - # format='png') - # - # dump_image(img, - # "/home/shansixioing/fawkes/data/emily/emily_cloaked_{}.png".format(iteration), - # format='png') - - except KeyboardInterrupt: - pass - - if self.verbose == 1: - loss_sum = float(self.sess.run(self.loss_sum)) - dist_sum = float(self.sess.run(self.dist_sum)) - dist_raw_sum = float(self.sess.run(self.dist_raw_sum)) - bottlesim_sum = float(self.sess.run(self.bottlesim_sum)) - print('END: Total loss: %.4E; perturb: %.6f (raw: %.6f); sim: %f' - % (Decimal(loss_sum), - dist_sum, - dist_raw_sum, - bottlesim_sum / nb_imgs)) - - best_adv = self.clipping(best_adv[:nb_imgs]) - - return best_adv diff --git a/build/lib/fawkes/protection.py b/build/lib/fawkes/protection.py deleted file mode 100644 index da04387..0000000 --- a/build/lib/fawkes/protection.py +++ /dev/null @@ -1,149 +0,0 @@ -# from __future__ import absolute_import -# from __future__ import division -# from __future__ import print_function - -import argparse -import glob -import os -import random -import sys - -import numpy as np - -from .differentiator import FawkesMaskGeneration -from .utils import load_extractor, init_gpu, select_target_label, dump_image, reverse_process_cloaked, \ - Faces - -random.seed(12243) -np.random.seed(122412) - -BATCH_SIZE = 32 - - -def generate_cloak_images(sess, feature_extractors, image_X, target_emb=None, th=0.01, faces=None, sd=1e9, lr=2, - max_step=500): - batch_size = BATCH_SIZE if len(image_X) > BATCH_SIZE else len(image_X) - - differentiator = FawkesMaskGeneration(sess, feature_extractors, - batch_size=batch_size, - mimic_img=True, - intensity_range='imagenet', - initial_const=sd, - learning_rate=lr, - max_iterations=max_step, - l_threshold=th, - verbose=1, maximize=False, keep_final=False, image_shape=image_X.shape[1:], - faces=faces) - - cloaked_image_X = differentiator.attack(image_X, target_emb) - return cloaked_image_X - - -def check_imgs(imgs): - if np.max(imgs) <= 1 and np.min(imgs) >= 0: - imgs = imgs * 255.0 - elif np.max(imgs) <= 255 and np.min(imgs) >= 0: - pass - else: - raise Exception("Image values ") - return imgs - - -def main(*argv): - if not argv: - argv = list(sys.argv) - - # attach SIGPIPE handler to properly handle broken pipe - try: # sigpipe not available under windows. just ignore in this case - import signal - signal.signal(signal.SIGPIPE, signal.SIG_DFL) - except Exception as e: - pass - - parser = argparse.ArgumentParser() - parser.add_argument('--directory', '-d', type=str, - help='directory that contain images for cloaking', default='imgs/') - - parser.add_argument('--gpu', type=str, - help='GPU id', default='0') - - parser.add_argument('--mode', type=str, - help='cloak generation mode', default='high') - parser.add_argument('--feature-extractor', type=str, - help="name of the feature extractor used for optimization", - default="high_extract") - - parser.add_argument('--th', type=float, default=0.01) - parser.add_argument('--max-step', type=int, default=500) - parser.add_argument('--sd', type=int, default=1e9) - parser.add_argument('--lr', type=float, default=2) - - parser.add_argument('--separate_target', action='store_true') - - parser.add_argument('--format', type=str, - help="final image format", - default="jpg") - args = parser.parse_args(argv[1:]) - - if args.mode == 'low': - args.feature_extractor = "high_extract" - args.th = 0.003 - elif args.mode == 'mid': - args.feature_extractor = "high_extract" - args.th = 0.005 - elif args.mode == 'high': - args.feature_extractor = "high_extract" - args.th = 0.007 - elif args.mode == 'ultra': - args.feature_extractor = "high_extract" - args.th = 0.01 - elif args.mode == 'custom': - pass - else: - raise Exception("mode must be one of 'low', 'mid', 'high', 'ultra', 'custom'") - - assert args.format in ['png', 'jpg', 'jpeg'] - if args.format == 'jpg': - args.format = 'jpeg' - - sess = init_gpu(args.gpu) - fs_names = [args.feature_extractor] - feature_extractors_ls = [load_extractor(name) for name in fs_names] - - image_paths = glob.glob(os.path.join(args.directory, "*")) - image_paths = [path for path in image_paths if "_cloaked" not in path.split("/")[-1]] - if not image_paths: - print("No images in the directory") - exit(1) - - faces = Faces(image_paths, sess) - - orginal_images = faces.cropped_faces - orginal_images = np.array(orginal_images) - - if args.separate_target: - target_embedding = [] - for org_img in orginal_images: - org_img = org_img.reshape([1] + list(org_img.shape)) - tar_emb = select_target_label(org_img, feature_extractors_ls, fs_names) - target_embedding.append(tar_emb) - target_embedding = np.concatenate(target_embedding) - else: - target_embedding = select_target_label(orginal_images, feature_extractors_ls, fs_names) - - protected_images = generate_cloak_images(sess, feature_extractors_ls, orginal_images, - target_emb=target_embedding, th=args.th, faces=faces, sd=args.sd, - lr=args.lr, max_step=args.max_step) - - faces.cloaked_cropped_faces = protected_images - - cloak_perturbation = reverse_process_cloaked(protected_images) - reverse_process_cloaked(orginal_images) - final_images = faces.merge_faces(cloak_perturbation) - - for p_img, cloaked_img, path in zip(final_images, protected_images, image_paths): - file_name = "{}_{}_{}_cloaked.{}".format(".".join(path.split(".")[:-1]), args.mode, args.th, args.format) - dump_image(p_img, file_name, format=args.format) - - -if __name__ == '__main__': - main(*sys.argv) diff --git a/build/lib/fawkes/utils.py b/build/lib/fawkes/utils.py deleted file mode 100644 index 6f8f590..0000000 --- a/build/lib/fawkes/utils.py +++ /dev/null @@ -1,574 +0,0 @@ -import glob -import gzip -import json -import os -import pickle -import random -import sys - -stderr = sys.stderr -sys.stderr = open(os.devnull, 'w') -import keras - -sys.stderr = stderr -import keras.backend as K -import numpy as np -import tensorflow as tf -from PIL import Image, ExifTags -# from keras.applications.vgg16 import preprocess_input -from keras.layers import Dense, Activation -from keras.models import Model -from keras.preprocessing import image -from keras.utils import get_file -from skimage.transform import resize -from sklearn.metrics import pairwise_distances - -from .align_face import align, aligner - - -def clip_img(X, preprocessing='raw'): - X = reverse_preprocess(X, preprocessing) - X = np.clip(X, 0.0, 255.0) - X = preprocess(X, preprocessing) - return X - - -def load_image(path): - img = Image.open(path) - if img._getexif() is not None: - for orientation in ExifTags.TAGS.keys(): - if ExifTags.TAGS[orientation] == 'Orientation': - break - - exif = dict(img._getexif().items()) - if orientation in exif.keys(): - if exif[orientation] == 3: - img = img.rotate(180, expand=True) - elif exif[orientation] == 6: - img = img.rotate(270, expand=True) - elif exif[orientation] == 8: - img = img.rotate(90, expand=True) - else: - pass - img = img.convert('RGB') - image_array = image.img_to_array(img) - - return image_array - - -class Faces(object): - def __init__(self, image_paths, sess): - self.aligner = aligner(sess) - self.org_faces = [] - self.cropped_faces = [] - self.cropped_faces_shape = [] - self.cropped_index = [] - self.callback_idx = [] - for i, p in enumerate(image_paths): - cur_img = load_image(p) - self.org_faces.append(cur_img) - align_img = align(cur_img, self.aligner, margin=0.7) - cur_faces = align_img[0] - - cur_shapes = [f.shape[:-1] for f in cur_faces] - - cur_faces_square = [] - for img in cur_faces: - long_size = max([img.shape[1], img.shape[0]]) - base = np.zeros((long_size, long_size, 3)) - base[0:img.shape[0], 0:img.shape[1], :] = img - cur_faces_square.append(base) - - cur_index = align_img[1] - cur_faces_square = [resize(f, (224, 224)) for f in cur_faces_square] - self.cropped_faces_shape.extend(cur_shapes) - self.cropped_faces.extend(cur_faces_square) - self.cropped_index.extend(cur_index) - self.callback_idx.extend([i] * len(cur_faces_square)) - - if not self.cropped_faces: - print("No faces detected") - exit(1) - - self.cropped_faces = np.array(self.cropped_faces) - - self.cropped_faces = preprocess(self.cropped_faces, 'imagenet') - - self.cloaked_cropped_faces = None - self.cloaked_faces = np.copy(self.org_faces) - - def get_faces(self): - return self.cropped_faces - - def merge_faces(self, cloaks): - - self.cloaked_faces = np.copy(self.org_faces) - - for i in range(len(self.cropped_faces)): - cur_cloak = cloaks[i] - org_shape = self.cropped_faces_shape[i] - old_square_shape = max([org_shape[0], org_shape[1]]) - reshape_cloak = resize(cur_cloak, (old_square_shape, old_square_shape)) - reshape_cloak = reshape_cloak[0:org_shape[0], 0:org_shape[1], :] - - callback_id = self.callback_idx[i] - bb = self.cropped_index[i] - self.cloaked_faces[callback_id][bb[1]:bb[3], bb[0]:bb[2], :] += reshape_cloak - - return self.cloaked_faces - - -def dump_dictionary_as_json(dict, outfile): - j = json.dumps(dict) - with open(outfile, "wb") as f: - f.write(j.encode()) - - -def fix_gpu_memory(mem_fraction=1): - os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' - tf_config = None - if tf.test.is_gpu_available(): - gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_fraction) - tf_config = tf.ConfigProto(gpu_options=gpu_options) - tf_config.gpu_options.allow_growth = True - tf_config.log_device_placement = False - init_op = tf.global_variables_initializer() - sess = tf.Session(config=tf_config) - sess.run(init_op) - K.set_session(sess) - return sess - - -def load_victim_model(number_classes, teacher_model=None, end2end=False): - for l in teacher_model.layers: - l.trainable = end2end - x = teacher_model.layers[-1].output - x = Dense(number_classes)(x) - x = Activation('softmax', name="act")(x) - model = Model(teacher_model.input, x) - opt = keras.optimizers.Adadelta() - model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) - return model - - -def init_gpu(gpu_index, force=False): - if isinstance(gpu_index, list): - gpu_num = ','.join([str(i) for i in gpu_index]) - else: - gpu_num = str(gpu_index) - if "CUDA_VISIBLE_DEVICES" in os.environ and os.environ["CUDA_VISIBLE_DEVICES"] and not force: - print('GPU already initiated') - return - os.environ["CUDA_VISIBLE_DEVICES"] = gpu_num - sess = fix_gpu_memory() - return sess - - -def preprocess(X, method): - assert method in {'raw', 'imagenet', 'inception', 'mnist'} - - if method is 'raw': - pass - elif method is 'imagenet': - X = imagenet_preprocessing(X) - else: - raise Exception('unknown method %s' % method) - - return X - - -def reverse_preprocess(X, method): - assert method in {'raw', 'imagenet', 'inception', 'mnist'} - - if method is 'raw': - pass - elif method is 'imagenet': - X = imagenet_reverse_preprocessing(X) - else: - raise Exception('unknown method %s' % method) - - return X - - -def imagenet_preprocessing(x, data_format=None): - if data_format is None: - data_format = K.image_data_format() - assert data_format in ('channels_last', 'channels_first') - - x = np.array(x) - if data_format == 'channels_first': - # 'RGB'->'BGR' - if x.ndim == 3: - x = x[::-1, ...] - else: - x = x[:, ::-1, ...] - else: - # 'RGB'->'BGR' - x = x[..., ::-1] - - mean = [103.939, 116.779, 123.68] - std = None - - # Zero-center by mean pixel - if data_format == 'channels_first': - if x.ndim == 3: - x[0, :, :] -= mean[0] - x[1, :, :] -= mean[1] - x[2, :, :] -= mean[2] - if std is not None: - x[0, :, :] /= std[0] - x[1, :, :] /= std[1] - x[2, :, :] /= std[2] - else: - x[:, 0, :, :] -= mean[0] - x[:, 1, :, :] -= mean[1] - x[:, 2, :, :] -= mean[2] - if std is not None: - x[:, 0, :, :] /= std[0] - x[:, 1, :, :] /= std[1] - x[:, 2, :, :] /= std[2] - else: - x[..., 0] -= mean[0] - x[..., 1] -= mean[1] - x[..., 2] -= mean[2] - if std is not None: - x[..., 0] /= std[0] - x[..., 1] /= std[1] - x[..., 2] /= std[2] - - return x - - -def imagenet_reverse_preprocessing(x, data_format=None): - import keras.backend as K - x = np.array(x) - if data_format is None: - data_format = K.image_data_format() - assert data_format in ('channels_last', 'channels_first') - - if data_format == 'channels_first': - if x.ndim == 3: - # Zero-center by mean pixel - x[0, :, :] += 103.939 - x[1, :, :] += 116.779 - x[2, :, :] += 123.68 - # 'BGR'->'RGB' - x = x[::-1, :, :] - else: - x[:, 0, :, :] += 103.939 - x[:, 1, :, :] += 116.779 - x[:, 2, :, :] += 123.68 - x = x[:, ::-1, :, :] - else: - # Zero-center by mean pixel - x[..., 0] += 103.939 - x[..., 1] += 116.779 - x[..., 2] += 123.68 - # 'BGR'->'RGB' - x = x[..., ::-1] - return x - - -def reverse_process_cloaked(x, preprocess='imagenet'): - x = clip_img(x, preprocess) - return reverse_preprocess(x, preprocess) - - -def build_bottleneck_model(model, cut_off): - bottleneck_model = Model(model.input, model.get_layer(cut_off).output) - bottleneck_model.compile(loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy']) - return bottleneck_model - - -def load_extractor(name): - model_dir = os.path.join(os.path.expanduser('~'), '.fawkes') - os.makedirs(model_dir, exist_ok=True) - model_file = os.path.join(model_dir, "{}.h5".format(name)) - if os.path.exists(model_file): - model = keras.models.load_model(model_file) - else: - get_file("{}.h5".format(name), "http://sandlab.cs.uchicago.edu/fawkes/files/{}.h5".format(name), - cache_dir=model_dir, cache_subdir='') - - get_file("{}_emb.p.gz".format(name), "http://sandlab.cs.uchicago.edu/fawkes/files/{}_emb.p.gz".format(name), - cache_dir=model_dir, cache_subdir='') - - model = keras.models.load_model(model_file) - - if hasattr(model.layers[-1], "activation") and model.layers[-1].activation == "softmax": - raise Exception( - "Given extractor's last layer is softmax, need to remove the top layers to make it into a feature extractor") - # if "extract" in name.split("/")[-1]: - # pass - # else: - # print("Convert a model to a feature extractor") - # model = build_bottleneck_model(model, model.layers[layer_idx].name) - # model.save(name + "extract") - # model = keras.models.load_model(name + "extract") - return model - - -def get_dataset_path(dataset): - model_dir = os.path.join(os.path.expanduser('~'), '.fawkes') - if not os.path.exists(os.path.join(model_dir, "config.json")): - raise Exception("Please config the datasets before running protection code. See more in README and config.py.") - - config = json.load(open(os.path.join(model_dir, "config.json"), 'r')) - if dataset not in config: - raise Exception( - "Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format( - dataset)) - return config[dataset]['train_dir'], config[dataset]['test_dir'], config[dataset]['num_classes'], config[dataset][ - 'num_images'] - - -def normalize(x): - return x / np.linalg.norm(x, axis=1, keepdims=True) - - -def dump_image(x, filename, format="png", scale=False): - # img = image.array_to_img(x, scale=scale) - img = image.array_to_img(x) - img.save(filename, format) - return - - -def load_dir(path): - assert os.path.exists(path) - x_ls = [] - for file in os.listdir(path): - cur_path = os.path.join(path, file) - im = image.load_img(cur_path, target_size=(224, 224)) - im = image.img_to_array(im) - x_ls.append(im) - raw_x = np.array(x_ls) - return preprocess(raw_x, 'imagenet') - - -def load_embeddings(feature_extractors_names): - model_dir = os.path.join(os.path.expanduser('~'), '.fawkes') - dictionaries = [] - for extractor_name in feature_extractors_names: - fp = gzip.open(os.path.join(model_dir, "{}_emb.p.gz".format(extractor_name)), 'rb') - path2emb = pickle.load(fp) - fp.close() - - dictionaries.append(path2emb) - - merge_dict = {} - for k in dictionaries[0].keys(): - cur_emb = [dic[k] for dic in dictionaries] - merge_dict[k] = np.concatenate(cur_emb) - return merge_dict - - -def extractor_ls_predict(feature_extractors_ls, X): - feature_ls = [] - for extractor in feature_extractors_ls: - cur_features = extractor.predict(X) - feature_ls.append(cur_features) - concated_feature_ls = np.concatenate(feature_ls, axis=1) - concated_feature_ls = normalize(concated_feature_ls) - return concated_feature_ls - - -def calculate_dist_score(a, b, feature_extractors_ls, metric='l2'): - features1 = extractor_ls_predict(feature_extractors_ls, a) - features2 = extractor_ls_predict(feature_extractors_ls, b) - - pair_cos = pairwise_distances(features1, features2, metric) - max_sum = np.min(pair_cos, axis=0) - max_sum_arg = np.argsort(max_sum)[::-1] - max_sum_arg = max_sum_arg[:len(a)] - max_sum = [max_sum[i] for i in max_sum_arg] - paired_target_X = [b[j] for j in max_sum_arg] - paired_target_X = np.array(paired_target_X) - return np.min(max_sum), paired_target_X - - -def select_target_label(imgs, feature_extractors_ls, feature_extractors_names, metric='l2'): - model_dir = os.path.join(os.path.expanduser('~'), '.fawkes') - - original_feature_x = extractor_ls_predict(feature_extractors_ls, imgs) - - path2emb = load_embeddings(feature_extractors_names) - items = list(path2emb.items()) - paths = [p[0] for p in items] - embs = [p[1] for p in items] - embs = np.array(embs) - - pair_dist = pairwise_distances(original_feature_x, embs, metric) - max_sum = np.min(pair_dist, axis=0) - max_id = np.argmax(max_sum) - - target_data_id = paths[int(max_id)] - image_dir = os.path.join(model_dir, "target_data/{}/*".format(target_data_id)) - if not os.path.exists(image_dir): - get_file("{}.h5".format(name), "http://sandlab.cs.uchicago.edu/fawkes/files/target_images".format(name), - cache_dir=model_dir, cache_subdir='') - - image_paths = glob.glob(image_dir) - - target_images = [image.img_to_array(image.load_img(cur_path)) for cur_path in - image_paths] - - target_images = np.array([resize(x, (224, 224)) for x in target_images]) - target_images = preprocess(target_images, 'imagenet') - - target_images = list(target_images) - while len(target_images) < len(imgs): - target_images += target_images - - target_images = random.sample(target_images, len(imgs)) - return np.array(target_images) - -# class CloakData(object): -# def __init__(self, protect_directory=None, img_shape=(224, 224)): -# -# self.img_shape = img_shape -# # self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset) -# # self.all_labels = sorted(list(os.listdir(self.train_data_dir))) -# self.protect_directory = protect_directory -# -# self.protect_X = self.load_label_data(self.protect_directory) -# -# self.cloaked_protect_train_X = None -# -# self.label2path_train, self.label2path_test, self.path2idx = self.build_data_mapping() -# self.all_training_path = self.get_all_data_path(self.label2path_train) -# self.all_test_path = self.get_all_data_path(self.label2path_test) -# self.protect_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.protect_class)) -# -# def get_class_image_files(self, path): -# return [os.path.join(path, f) for f in os.listdir(path)] -# -# def extractor_ls_predict(self, feature_extractors_ls, X): -# feature_ls = [] -# for extractor in feature_extractors_ls: -# cur_features = extractor.predict(X) -# feature_ls.append(cur_features) -# concated_feature_ls = np.concatenate(feature_ls, axis=1) -# concated_feature_ls = normalize(concated_feature_ls) -# return concated_feature_ls -# -# def load_embeddings(self, feature_extractors_names): -# dictionaries = [] -# for extractor_name in feature_extractors_names: -# path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb")) -# dictionaries.append(path2emb) -# -# merge_dict = {} -# for k in dictionaries[0].keys(): -# cur_emb = [dic[k] for dic in dictionaries] -# merge_dict[k] = np.concatenate(cur_emb) -# return merge_dict -# -# def select_target_label(self, feature_extractors_ls, feature_extractors_names, metric='l2'): -# original_feature_x = self.extractor_ls_predict(feature_extractors_ls, self.protect_train_X) -# -# path2emb = self.load_embeddings(feature_extractors_names) -# items = list(path2emb.items()) -# paths = [p[0] for p in items] -# embs = [p[1] for p in items] -# embs = np.array(embs) -# -# pair_dist = pairwise_distances(original_feature_x, embs, metric) -# max_sum = np.min(pair_dist, axis=0) -# sorted_idx = np.argsort(max_sum)[::-1] -# -# highest_num = 0 -# paired_target_X = None -# final_target_class_path = None -# for idx in sorted_idx[:5]: -# target_class_path = paths[idx] -# cur_target_X = self.load_dir(target_class_path) -# cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X]) -# cur_tot_sum, cur_paired_target_X = self.calculate_dist_score(self.protect_train_X, cur_target_X, -# feature_extractors_ls, -# metric=metric) -# if cur_tot_sum > highest_num: -# highest_num = cur_tot_sum -# paired_target_X = cur_paired_target_X -# final_target_class_path = target_class_path -# -# np.random.shuffle(paired_target_X) -# return final_target_class_path, paired_target_X -# -# def calculate_dist_score(self, a, b, feature_extractors_ls, metric='l2'): -# features1 = self.extractor_ls_predict(feature_extractors_ls, a) -# features2 = self.extractor_ls_predict(feature_extractors_ls, b) -# -# pair_cos = pairwise_distances(features1, features2, metric) -# max_sum = np.min(pair_cos, axis=0) -# max_sum_arg = np.argsort(max_sum)[::-1] -# max_sum_arg = max_sum_arg[:len(a)] -# max_sum = [max_sum[i] for i in max_sum_arg] -# paired_target_X = [b[j] for j in max_sum_arg] -# paired_target_X = np.array(paired_target_X) -# return np.min(max_sum), paired_target_X -# -# def get_all_data_path(self, label2path): -# all_paths = [] -# for k, v in label2path.items(): -# cur_all_paths = [os.path.join(k, cur_p) for cur_p in v] -# all_paths.extend(cur_all_paths) -# return all_paths -# -# def load_label_data(self, label): -# train_label_path = os.path.join(self.train_data_dir, label) -# test_label_path = os.path.join(self.test_data_dir, label) -# train_X = self.load_dir(train_label_path) -# test_X = self.load_dir(test_label_path) -# return train_X, test_X -# -# def load_dir(self, path): -# assert os.path.exists(path) -# x_ls = [] -# for file in os.listdir(path): -# cur_path = os.path.join(path, file) -# im = image.load_img(cur_path, target_size=self.img_shape) -# im = image.img_to_array(im) -# x_ls.append(im) -# raw_x = np.array(x_ls) -# return preprocess_input(raw_x) -# -# def build_data_mapping(self): -# label2path_train = {} -# label2path_test = {} -# idx = 0 -# path2idx = {} -# for label_name in self.all_labels: -# full_path_train = os.path.join(self.train_data_dir, label_name) -# full_path_test = os.path.join(self.test_data_dir, label_name) -# label2path_train[full_path_train] = list(os.listdir(full_path_train)) -# label2path_test[full_path_test] = list(os.listdir(full_path_test)) -# for img_file in os.listdir(full_path_train): -# path2idx[os.path.join(full_path_train, img_file)] = idx -# for img_file in os.listdir(full_path_test): -# path2idx[os.path.join(full_path_test, img_file)] = idx -# idx += 1 -# return label2path_train, label2path_test, path2idx -# -# def generate_data_post_cloak(self, sybil=False): -# assert self.cloaked_protect_train_X is not None -# while True: -# batch_X = [] -# batch_Y = [] -# cur_batch_path = random.sample(self.all_training_path, 32) -# for p in cur_batch_path: -# cur_y = self.path2idx[p] -# if p in self.protect_class_path: -# cur_x = random.choice(self.cloaked_protect_train_X) -# elif sybil and (p in self.sybil_class): -# cur_x = random.choice(self.cloaked_sybil_train_X) -# else: -# im = image.load_img(p, target_size=self.img_shape) -# im = image.img_to_array(im) -# cur_x = preprocess_input(im) -# batch_X.append(cur_x) -# batch_Y.append(cur_y) -# batch_X = np.array(batch_X) -# batch_Y = to_categorical(np.array(batch_Y), num_classes=self.number_classes) -# yield batch_X, batch_Y