diff --git a/build/lib/fawkes/__init__.py b/build/lib/fawkes/__init__.py index e69de29..1c4f29b 100644 --- a/build/lib/fawkes/__init__.py +++ b/build/lib/fawkes/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +# @Date : 2020-07-01 +# @Author : Shawn Shan (shansixiong@cs.uchicago.edu) +# @Link : https://www.shawnshan.com/ + + +__version__ = '0.0.2' + +from .differentiator import FawkesMaskGeneration +from .utils import load_extractor, init_gpu, select_target_label, dump_image, reverse_process_cloaked, \ + Faces +from .protection import main +import logging +import sys +import os +logging.getLogger('tensorflow').disabled = True + + +__all__ = ( + '__version__', + 'FawkesMaskGeneration', 'load_extractor', + 'init_gpu', + 'select_target_label', 'dump_image', 'reverse_process_cloaked', 'Faces', 'main' +) \ No newline at end of file diff --git a/build/lib/fawkes/differentiator.py b/build/lib/fawkes/differentiator.py index 98b85d6..98a46e0 100644 --- a/build/lib/fawkes/differentiator.py +++ b/build/lib/fawkes/differentiator.py @@ -10,7 +10,7 @@ from decimal import Decimal import numpy as np import tensorflow as tf -from utils import preprocess, reverse_preprocess +from .utils import preprocess, reverse_preprocess class FawkesMaskGeneration: @@ -47,7 +47,7 @@ class FawkesMaskGeneration: max_iterations=MAX_ITERATIONS, initial_const=INITIAL_CONST, intensity_range=INTENSITY_RANGE, l_threshold=L_THRESHOLD, max_val=MAX_VAL, keep_final=KEEP_FINAL, maximize=MAXIMIZE, image_shape=IMAGE_SHAPE, - verbose=0, ratio=RATIO, limit_dist=LIMIT_DIST): + verbose=0, ratio=RATIO, limit_dist=LIMIT_DIST, faces=None): assert intensity_range in {'raw', 'imagenet', 'inception', 'mnist'} @@ -69,10 +69,12 @@ class FawkesMaskGeneration: self.ratio = ratio self.limit_dist = limit_dist self.single_shape = list(image_shape) + self.faces = faces self.input_shape = tuple([self.batch_size] + self.single_shape) self.bottleneck_shape = tuple([self.batch_size] + self.single_shape) + # self.bottleneck_shape = tuple([self.batch_size, bottleneck_model_ls[0].output_shape[-1]]) # the variable we're going to optimize over self.modifier = tf.Variable(np.zeros(self.input_shape, dtype=np.float32)) @@ -149,8 +151,6 @@ class FawkesMaskGeneration: self.dist_raw, tf.zeros_like(self.dist_raw))) self.dist_sum = tf.reduce_sum(tf.where(self.mask, self.dist, tf.zeros_like(self.dist))) - # self.dist_sum = 1e-5 * tf.reduce_sum(self.dist) - # self.dist_raw_sum = self.dist_sum def resize_tensor(input_tensor, model_input_shape): if input_tensor.shape[1:] == model_input_shape or model_input_shape[1] is None: @@ -171,16 +171,14 @@ class FawkesMaskGeneration: self.bottleneck_a = bottleneck_model(cur_aimg_input) if self.MIMIC_IMG: - # cur_timg_input = resize_tensor(self.timg_input, model_input_shape) - # cur_simg_input = resize_tensor(self.simg_input, model_input_shape) cur_timg_input = self.timg_input cur_simg_input = self.simg_input self.bottleneck_t = calculate_direction(bottleneck_model, cur_timg_input, cur_simg_input) - # self.bottleneck_t = bottleneck_model(cur_timg_input) else: self.bottleneck_t = self.bottleneck_t_raw bottleneck_diff = self.bottleneck_t - self.bottleneck_a + scale_factor = tf.sqrt(tf.reduce_sum(tf.square(self.bottleneck_t), axis=1)) cur_bottlesim = tf.sqrt(tf.reduce_sum(tf.square(bottleneck_diff), axis=1)) @@ -189,7 +187,6 @@ class FawkesMaskGeneration: self.bottlesim += cur_bottlesim - # self.bottlesim_push += cur_bottlesim_push_sum self.bottlesim_sum += cur_bottlesim_sum # sum up the losses @@ -202,20 +199,13 @@ class FawkesMaskGeneration: self.loss, tf.zeros_like(self.loss))) - # self.loss_sum = self.dist_sum + tf.reduce_sum(self.bottlesim) - # import pdb - # pdb.set_trace() - # self.loss_sum = tf.reduce_sum(tf.where(self.mask, self.loss, tf.zeros_like(self.loss))) - - # Setup the Adadelta optimizer and keep track of variables - # we're creating start_vars = set(x.name for x in tf.global_variables()) self.learning_rate_holder = tf.placeholder(tf.float32, shape=[]) + optimizer = tf.train.AdadeltaOptimizer(self.learning_rate_holder) # optimizer = tf.train.AdamOptimizer(self.learning_rate_holder) - self.train = optimizer.minimize(self.loss_sum, - var_list=[self.modifier]) + self.train = optimizer.minimize(self.loss_sum, var_list=[self.modifier]) end_vars = tf.global_variables() new_vars = [x for x in end_vars if x.name not in start_vars] @@ -297,6 +287,7 @@ class FawkesMaskGeneration: LR = self.learning_rate nb_imgs = source_imgs.shape[0] mask = [True] * nb_imgs + [False] * (self.batch_size - nb_imgs) + # mask = [True] * self.batch_size mask = np.array(mask, dtype=np.bool) source_imgs = np.array(source_imgs) @@ -317,19 +308,34 @@ class FawkesMaskGeneration: timg_tanh_batch = np.zeros(self.input_shape) else: timg_tanh_batch = np.zeros(self.bottleneck_shape) + weights_batch = np.zeros(self.bottleneck_shape) simg_tanh_batch[:nb_imgs] = simg_tanh[:nb_imgs] timg_tanh_batch[:nb_imgs] = timg_tanh[:nb_imgs] weights_batch[:nb_imgs] = weights[:nb_imgs] modifier_batch = np.ones(self.input_shape) * 1e-6 - self.sess.run(self.setup, - {self.assign_timg_tanh: timg_tanh_batch, - self.assign_simg_tanh: simg_tanh_batch, - self.assign_const: CONST, - self.assign_mask: mask, - self.assign_weights: weights_batch, - self.assign_modifier: modifier_batch}) + temp_images = [] + + # set the variables so that we don't have to send them over again + if self.MIMIC_IMG: + self.sess.run(self.setup, + {self.assign_timg_tanh: timg_tanh_batch, + self.assign_simg_tanh: simg_tanh_batch, + self.assign_const: CONST, + self.assign_mask: mask, + self.assign_weights: weights_batch, + self.assign_modifier: modifier_batch}) + else: + # if directly mimicking a vector, use assign_bottleneck_t_raw + # in setup + self.sess.run(self.setup, + {self.assign_bottleneck_t_raw: timg_tanh_batch, + self.assign_simg_tanh: simg_tanh_batch, + self.assign_const: CONST, + self.assign_mask: mask, + self.assign_weights: weights_batch, + self.assign_modifier: modifier_batch}) best_bottlesim = [0] * nb_imgs if self.maximize else [np.inf] * nb_imgs best_adv = np.zeros_like(source_imgs) @@ -347,6 +353,7 @@ class FawkesMaskGeneration: dist_raw_sum, bottlesim_sum / nb_imgs)) + finished_idx = set() try: total_distance = [0] * nb_imgs @@ -369,8 +376,14 @@ class FawkesMaskGeneration: [self.dist_raw, self.bottlesim, self.aimg_input]) + + all_clear = True for e, (dist_raw, bottlesim, aimg_input) in enumerate( zip(dist_raw_list, bottlesim_list, aimg_input_list)): + + if e in finished_idx: + continue + if e >= nb_imgs: break if (bottlesim < best_bottlesim[e] and bottlesim > total_distance[e] * 0.1 and ( @@ -379,40 +392,55 @@ class FawkesMaskGeneration: best_bottlesim[e] = bottlesim best_adv[e] = aimg_input - if iteration != 0 and iteration % (self.MAX_ITERATIONS // 3) == 0: - # LR = LR / 2 + # if iteration > 20 and (dist_raw >= self.l_threshold or iteration == self.MAX_ITERATIONS - 1): + # finished_idx.add(e) + # print("{} finished at dist {}".format(e, dist_raw)) + # best_bottlesim[e] = bottlesim + # best_adv[e] = aimg_input + # + all_clear = False + + if all_clear: + break + + if iteration != 0 and iteration % (self.MAX_ITERATIONS // 2) == 0: + LR = LR / 2 print("Learning Rate: ", LR) - if iteration % (self.MAX_ITERATIONS // 10) == 0: + if iteration % (self.MAX_ITERATIONS // 5) == 0: if self.verbose == 1: - loss_sum = float(self.sess.run(self.loss_sum)) - dist_sum = float(self.sess.run(self.dist_sum)) - thresh_over = (dist_sum / - self.batch_size / - self.l_threshold * - 100) dist_raw_sum = float(self.sess.run(self.dist_raw_sum)) bottlesim_sum = self.sess.run(self.bottlesim_sum) - print('ITER %4d: Total loss: %.4E; perturb: %.6f (%.2f%% over, raw: %.6f); sim: %f' - % (iteration, - Decimal(loss_sum), - dist_sum, - thresh_over, - dist_raw_sum, - bottlesim_sum / nb_imgs)) + print('ITER %4d perturb: %.5f; sim: %f' + % (iteration, dist_raw_sum / nb_imgs, bottlesim_sum / nb_imgs)) + + # protected_images = aimg_input_list + # + # orginal_images = np.copy(self.faces.cropped_faces) + # cloak_perturbation = reverse_process_cloaked(protected_images) - reverse_process_cloaked( + # orginal_images) + # final_images = self.faces.merge_faces(cloak_perturbation) + # + # for p_img, img in zip(protected_images, final_images): + # dump_image(reverse_process_cloaked(p_img), + # "/home/shansixioing/fawkes/data/emily/emily_cloaked_cropped{}.png".format(iteration), + # format='png') + # + # dump_image(img, + # "/home/shansixioing/fawkes/data/emily/emily_cloaked_{}.png".format(iteration), + # format='png') + except KeyboardInterrupt: pass if self.verbose == 1: loss_sum = float(self.sess.run(self.loss_sum)) dist_sum = float(self.sess.run(self.dist_sum)) - thresh_over = (dist_sum / self.batch_size / self.l_threshold * 100) dist_raw_sum = float(self.sess.run(self.dist_raw_sum)) bottlesim_sum = float(self.sess.run(self.bottlesim_sum)) - print('END: Total loss: %.4E; perturb: %.6f (%.2f%% over, raw: %.6f); sim: %f' + print('END: Total loss: %.4E; perturb: %.6f (raw: %.6f); sim: %f' % (Decimal(loss_sum), dist_sum, - thresh_over, dist_raw_sum, bottlesim_sum / nb_imgs)) diff --git a/build/lib/fawkes/protection.py b/build/lib/fawkes/protection.py index a7afd55..da04387 100644 --- a/build/lib/fawkes/protection.py +++ b/build/lib/fawkes/protection.py @@ -1,3 +1,7 @@ +# from __future__ import absolute_import +# from __future__ import division +# from __future__ import print_function + import argparse import glob import os @@ -5,106 +9,141 @@ import random import sys import numpy as np -from differentiator import FawkesMaskGeneration -from keras.applications.vgg16 import preprocess_input -from keras.preprocessing import image -from skimage.transform import resize -from tensorflow import set_random_seed -from utils import load_extractor, init_gpu, select_target_label, dump_image, reverse_process_cloaked + +from .differentiator import FawkesMaskGeneration +from .utils import load_extractor, init_gpu, select_target_label, dump_image, reverse_process_cloaked, \ + Faces random.seed(12243) np.random.seed(122412) -set_random_seed(12242) -BATCH_SIZE = 1 -MAX_ITER = 1000 +BATCH_SIZE = 32 -def generate_cloak_images(sess, feature_extractors, image_X, target_X=None, th=0.01): +def generate_cloak_images(sess, feature_extractors, image_X, target_emb=None, th=0.01, faces=None, sd=1e9, lr=2, + max_step=500): batch_size = BATCH_SIZE if len(image_X) > BATCH_SIZE else len(image_X) differentiator = FawkesMaskGeneration(sess, feature_extractors, batch_size=batch_size, mimic_img=True, intensity_range='imagenet', - initial_const=args.sd, - learning_rate=args.lr, - max_iterations=MAX_ITER, + initial_const=sd, + learning_rate=lr, + max_iterations=max_step, l_threshold=th, - verbose=1, maximize=False, keep_final=False, image_shape=image_X.shape[1:]) + verbose=1, maximize=False, keep_final=False, image_shape=image_X.shape[1:], + faces=faces) - cloaked_image_X = differentiator.attack(image_X, target_X) + cloaked_image_X = differentiator.attack(image_X, target_emb) return cloaked_image_X -def extract_faces(img): - # foo - return preprocess_input(resize(img, (224, 224))) +def check_imgs(imgs): + if np.max(imgs) <= 1 and np.min(imgs) >= 0: + imgs = imgs * 255.0 + elif np.max(imgs) <= 255 and np.min(imgs) >= 0: + pass + else: + raise Exception("Image values ") + return imgs -def fawkes(): - assert os.path.exists(args.directory) - assert os.path.isdir(args.directory) +def main(*argv): + if not argv: + argv = list(sys.argv) + + # attach SIGPIPE handler to properly handle broken pipe + try: # sigpipe not available under windows. just ignore in this case + import signal + signal.signal(signal.SIGPIPE, signal.SIG_DFL) + except Exception as e: + pass + + parser = argparse.ArgumentParser() + parser.add_argument('--directory', '-d', type=str, + help='directory that contain images for cloaking', default='imgs/') + + parser.add_argument('--gpu', type=str, + help='GPU id', default='0') + + parser.add_argument('--mode', type=str, + help='cloak generation mode', default='high') + parser.add_argument('--feature-extractor', type=str, + help="name of the feature extractor used for optimization", + default="high_extract") + + parser.add_argument('--th', type=float, default=0.01) + parser.add_argument('--max-step', type=int, default=500) + parser.add_argument('--sd', type=int, default=1e9) + parser.add_argument('--lr', type=float, default=2) + + parser.add_argument('--separate_target', action='store_true') + + parser.add_argument('--format', type=str, + help="final image format", + default="jpg") + args = parser.parse_args(argv[1:]) + + if args.mode == 'low': + args.feature_extractor = "high_extract" + args.th = 0.003 + elif args.mode == 'mid': + args.feature_extractor = "high_extract" + args.th = 0.005 + elif args.mode == 'high': + args.feature_extractor = "high_extract" + args.th = 0.007 + elif args.mode == 'ultra': + args.feature_extractor = "high_extract" + args.th = 0.01 + elif args.mode == 'custom': + pass + else: + raise Exception("mode must be one of 'low', 'mid', 'high', 'ultra', 'custom'") + + assert args.format in ['png', 'jpg', 'jpeg'] + if args.format == 'jpg': + args.format = 'jpeg' sess = init_gpu(args.gpu) - - print("Loading {} for optimization".format(args.feature_extractor)) - - feature_extractors_ls = [load_extractor(args.feature_extractor)] + fs_names = [args.feature_extractor] + feature_extractors_ls = [load_extractor(name) for name in fs_names] image_paths = glob.glob(os.path.join(args.directory, "*")) image_paths = [path for path in image_paths if "_cloaked" not in path.split("/")[-1]] + if not image_paths: + print("No images in the directory") + exit(1) - orginal_images = [extract_faces(image.img_to_array(image.load_img(cur_path))) for cur_path in - image_paths] + faces = Faces(image_paths, sess) + orginal_images = faces.cropped_faces orginal_images = np.array(orginal_images) - if args.seperate_target: - target_images = [] + if args.separate_target: + target_embedding = [] for org_img in orginal_images: org_img = org_img.reshape([1] + list(org_img.shape)) - tar_img = select_target_label(org_img, feature_extractors_ls, [args.feature_extractor]) - target_images.append(tar_img) - target_images = np.concatenate(target_images) + tar_emb = select_target_label(org_img, feature_extractors_ls, fs_names) + target_embedding.append(tar_emb) + target_embedding = np.concatenate(target_embedding) else: - target_images = select_target_label(orginal_images, feature_extractors_ls, [args.feature_extractor]) - - # file_name = args.directory.split("/")[-1] - # os.makedirs(args.result_directory, exist_ok=True) - # os.makedirs(os.path.join(args.result_directory, file_name), exist_ok=True) + target_embedding = select_target_label(orginal_images, feature_extractors_ls, fs_names) protected_images = generate_cloak_images(sess, feature_extractors_ls, orginal_images, - target_X=target_images, th=args.th) + target_emb=target_embedding, th=args.th, faces=faces, sd=args.sd, + lr=args.lr, max_step=args.max_step) - for p_img, path in zip(protected_images, image_paths): - p_img = reverse_process_cloaked(p_img) - file_name = "{}_cloaked.jpeg".format(".".join(path.split(".")[:-1])) - dump_image(p_img, file_name, format="JPEG") + faces.cloaked_cropped_faces = protected_images + cloak_perturbation = reverse_process_cloaked(protected_images) - reverse_process_cloaked(orginal_images) + final_images = faces.merge_faces(cloak_perturbation) -def parse_arguments(argv): - parser = argparse.ArgumentParser() - parser.add_argument('--gpu', type=str, - help='GPU id', default='0') - parser.add_argument('--directory', type=str, - help='directory that contain images for cloaking', default='imgs/') - - parser.add_argument('--feature-extractor', type=str, - help="name of the feature extractor used for optimization", - default="webface_dense_robust_extract") - - parser.add_argument('--th', type=float, default=0.005) - parser.add_argument('--sd', type=int, default=1e9) - parser.add_argument('--protect_class', type=str, default=None) - parser.add_argument('--lr', type=float, default=1) - - parser.add_argument('--result_directory', type=str, default="../results") - parser.add_argument('--seperate_target', action='store_true') - - return parser.parse_args(argv) + for p_img, cloaked_img, path in zip(final_images, protected_images, image_paths): + file_name = "{}_{}_{}_cloaked.{}".format(".".join(path.split(".")[:-1]), args.mode, args.th, args.format) + dump_image(p_img, file_name, format=args.format) if __name__ == '__main__': - args = parse_arguments(sys.argv[1:]) - fawkes() + main(*sys.argv) diff --git a/build/lib/fawkes/utils.py b/build/lib/fawkes/utils.py index c0c4577..6f8f590 100644 --- a/build/lib/fawkes/utils.py +++ b/build/lib/fawkes/utils.py @@ -1,19 +1,30 @@ +import glob +import gzip import json import os import pickle import random +import sys +stderr = sys.stderr +sys.stderr = open(os.devnull, 'w') import keras + +sys.stderr = stderr import keras.backend as K import numpy as np import tensorflow as tf -from keras.applications.vgg16 import preprocess_input +from PIL import Image, ExifTags +# from keras.applications.vgg16 import preprocess_input from keras.layers import Dense, Activation from keras.models import Model from keras.preprocessing import image -from keras.utils import to_categorical +from keras.utils import get_file +from skimage.transform import resize from sklearn.metrics import pairwise_distances +from .align_face import align, aligner + def clip_img(X, preprocessing='raw'): X = reverse_preprocess(X, preprocessing) @@ -22,6 +33,91 @@ def clip_img(X, preprocessing='raw'): return X +def load_image(path): + img = Image.open(path) + if img._getexif() is not None: + for orientation in ExifTags.TAGS.keys(): + if ExifTags.TAGS[orientation] == 'Orientation': + break + + exif = dict(img._getexif().items()) + if orientation in exif.keys(): + if exif[orientation] == 3: + img = img.rotate(180, expand=True) + elif exif[orientation] == 6: + img = img.rotate(270, expand=True) + elif exif[orientation] == 8: + img = img.rotate(90, expand=True) + else: + pass + img = img.convert('RGB') + image_array = image.img_to_array(img) + + return image_array + + +class Faces(object): + def __init__(self, image_paths, sess): + self.aligner = aligner(sess) + self.org_faces = [] + self.cropped_faces = [] + self.cropped_faces_shape = [] + self.cropped_index = [] + self.callback_idx = [] + for i, p in enumerate(image_paths): + cur_img = load_image(p) + self.org_faces.append(cur_img) + align_img = align(cur_img, self.aligner, margin=0.7) + cur_faces = align_img[0] + + cur_shapes = [f.shape[:-1] for f in cur_faces] + + cur_faces_square = [] + for img in cur_faces: + long_size = max([img.shape[1], img.shape[0]]) + base = np.zeros((long_size, long_size, 3)) + base[0:img.shape[0], 0:img.shape[1], :] = img + cur_faces_square.append(base) + + cur_index = align_img[1] + cur_faces_square = [resize(f, (224, 224)) for f in cur_faces_square] + self.cropped_faces_shape.extend(cur_shapes) + self.cropped_faces.extend(cur_faces_square) + self.cropped_index.extend(cur_index) + self.callback_idx.extend([i] * len(cur_faces_square)) + + if not self.cropped_faces: + print("No faces detected") + exit(1) + + self.cropped_faces = np.array(self.cropped_faces) + + self.cropped_faces = preprocess(self.cropped_faces, 'imagenet') + + self.cloaked_cropped_faces = None + self.cloaked_faces = np.copy(self.org_faces) + + def get_faces(self): + return self.cropped_faces + + def merge_faces(self, cloaks): + + self.cloaked_faces = np.copy(self.org_faces) + + for i in range(len(self.cropped_faces)): + cur_cloak = cloaks[i] + org_shape = self.cropped_faces_shape[i] + old_square_shape = max([org_shape[0], org_shape[1]]) + reshape_cloak = resize(cur_cloak, (old_square_shape, old_square_shape)) + reshape_cloak = reshape_cloak[0:org_shape[0], 0:org_shape[1], :] + + callback_id = self.callback_idx[i] + bb = self.cropped_index[i] + self.cloaked_faces[callback_id][bb[1]:bb[3], bb[0]:bb[2], :] += reshape_cloak + + return self.cloaked_faces + + def dump_dictionary_as_json(dict, outfile): j = json.dumps(dict) with open(outfile, "wb") as f: @@ -30,10 +126,12 @@ def dump_dictionary_as_json(dict, outfile): def fix_gpu_memory(mem_fraction=1): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' - gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_fraction) - tf_config = tf.ConfigProto(gpu_options=gpu_options) - tf_config.gpu_options.allow_growth = True - tf_config.log_device_placement = False + tf_config = None + if tf.test.is_gpu_available(): + gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_fraction) + tf_config = tf.ConfigProto(gpu_options=gpu_options) + tf_config.gpu_options.allow_growth = True + tf_config.log_device_placement = False init_op = tf.global_variables_initializer() sess = tf.Session(config=tf_config) sess.run(init_op) @@ -45,7 +143,6 @@ def load_victim_model(number_classes, teacher_model=None, end2end=False): for l in teacher_model.layers: l.trainable = end2end x = teacher_model.layers[-1].output - x = Dense(number_classes)(x) x = Activation('softmax', name="act")(x) model = Model(teacher_model.input, x) @@ -141,6 +238,7 @@ def imagenet_preprocessing(x, data_format=None): return x + def imagenet_reverse_preprocessing(x, data_format=None): import keras.backend as K x = np.array(x) @@ -185,7 +283,20 @@ def build_bottleneck_model(model, cut_off): def load_extractor(name): - model = keras.models.load_model("../feature_extractors/{}.h5".format(name)) + model_dir = os.path.join(os.path.expanduser('~'), '.fawkes') + os.makedirs(model_dir, exist_ok=True) + model_file = os.path.join(model_dir, "{}.h5".format(name)) + if os.path.exists(model_file): + model = keras.models.load_model(model_file) + else: + get_file("{}.h5".format(name), "http://sandlab.cs.uchicago.edu/fawkes/files/{}.h5".format(name), + cache_dir=model_dir, cache_subdir='') + + get_file("{}_emb.p.gz".format(name), "http://sandlab.cs.uchicago.edu/fawkes/files/{}_emb.p.gz".format(name), + cache_dir=model_dir, cache_subdir='') + + model = keras.models.load_model(model_file) + if hasattr(model.layers[-1], "activation") and model.layers[-1].activation == "softmax": raise Exception( "Given extractor's last layer is softmax, need to remove the top layers to make it into a feature extractor") @@ -200,10 +311,11 @@ def load_extractor(name): def get_dataset_path(dataset): - if not os.path.exists("config.json"): + model_dir = os.path.join(os.path.expanduser('~'), '.fawkes') + if not os.path.exists(os.path.join(model_dir, "config.json")): raise Exception("Please config the datasets before running protection code. See more in README and config.py.") - config = json.load(open("config.json", 'r')) + config = json.load(open(os.path.join(model_dir, "config.json"), 'r')) if dataset not in config: raise Exception( "Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format( @@ -217,7 +329,8 @@ def normalize(x): def dump_image(x, filename, format="png", scale=False): - img = image.array_to_img(x, scale=scale) + # img = image.array_to_img(x, scale=scale) + img = image.array_to_img(x) img.save(filename, format) return @@ -231,13 +344,17 @@ def load_dir(path): im = image.img_to_array(im) x_ls.append(im) raw_x = np.array(x_ls) - return preprocess_input(raw_x) + return preprocess(raw_x, 'imagenet') def load_embeddings(feature_extractors_names): + model_dir = os.path.join(os.path.expanduser('~'), '.fawkes') dictionaries = [] for extractor_name in feature_extractors_names: - path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb")) + fp = gzip.open(os.path.join(model_dir, "{}_emb.p.gz".format(extractor_name)), 'rb') + path2emb = pickle.load(fp) + fp.close() + dictionaries.append(path2emb) merge_dict = {} @@ -272,6 +389,8 @@ def calculate_dist_score(a, b, feature_extractors_ls, metric='l2'): def select_target_label(imgs, feature_extractors_ls, feature_extractors_names, metric='l2'): + model_dir = os.path.join(os.path.expanduser('~'), '.fawkes') + original_feature_x = extractor_ls_predict(feature_extractors_ls, imgs) path2emb = load_embeddings(feature_extractors_names) @@ -282,178 +401,174 @@ def select_target_label(imgs, feature_extractors_ls, feature_extractors_names, m pair_dist = pairwise_distances(original_feature_x, embs, metric) max_sum = np.min(pair_dist, axis=0) - sorted_idx = np.argsort(max_sum)[::-1] + max_id = np.argmax(max_sum) - highest_num = 0 - paired_target_X = None - final_target_class_path = None - for idx in sorted_idx[:1]: - target_class_path = paths[idx] - cur_target_X = load_dir(target_class_path) - cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X]) - cur_tot_sum, cur_paired_target_X = calculate_dist_score(imgs, cur_target_X, - feature_extractors_ls, - metric=metric) - if cur_tot_sum > highest_num: - highest_num = cur_tot_sum - paired_target_X = cur_paired_target_X + target_data_id = paths[int(max_id)] + image_dir = os.path.join(model_dir, "target_data/{}/*".format(target_data_id)) + if not os.path.exists(image_dir): + get_file("{}.h5".format(name), "http://sandlab.cs.uchicago.edu/fawkes/files/target_images".format(name), + cache_dir=model_dir, cache_subdir='') - np.random.shuffle(paired_target_X) - paired_target_X = list(paired_target_X) - while len(paired_target_X) < len(imgs): - paired_target_X += paired_target_X + image_paths = glob.glob(image_dir) - paired_target_X = paired_target_X[:len(imgs)] - return np.array(paired_target_X) + target_images = [image.img_to_array(image.load_img(cur_path)) for cur_path in + image_paths] + target_images = np.array([resize(x, (224, 224)) for x in target_images]) + target_images = preprocess(target_images, 'imagenet') + target_images = list(target_images) + while len(target_images) < len(imgs): + target_images += target_images -class CloakData(object): - def __init__(self, protect_directory=None, img_shape=(224, 224)): + target_images = random.sample(target_images, len(imgs)) + return np.array(target_images) - self.img_shape = img_shape - - # self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset) - # self.all_labels = sorted(list(os.listdir(self.train_data_dir))) - self.protect_directory = protect_directory - - self.protect_X = self.load_label_data(self.protect_directory) - - self.cloaked_protect_train_X = None - - self.label2path_train, self.label2path_test, self.path2idx = self.build_data_mapping() - self.all_training_path = self.get_all_data_path(self.label2path_train) - self.all_test_path = self.get_all_data_path(self.label2path_test) - self.protect_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.protect_class)) - - def get_class_image_files(self, path): - return [os.path.join(path, f) for f in os.listdir(path)] - - def extractor_ls_predict(self, feature_extractors_ls, X): - feature_ls = [] - for extractor in feature_extractors_ls: - cur_features = extractor.predict(X) - feature_ls.append(cur_features) - concated_feature_ls = np.concatenate(feature_ls, axis=1) - concated_feature_ls = normalize(concated_feature_ls) - return concated_feature_ls - - def load_embeddings(self, feature_extractors_names): - dictionaries = [] - for extractor_name in feature_extractors_names: - path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb")) - dictionaries.append(path2emb) - - merge_dict = {} - for k in dictionaries[0].keys(): - cur_emb = [dic[k] for dic in dictionaries] - merge_dict[k] = np.concatenate(cur_emb) - return merge_dict - - def select_target_label(self, feature_extractors_ls, feature_extractors_names, metric='l2'): - original_feature_x = self.extractor_ls_predict(feature_extractors_ls, self.protect_train_X) - - path2emb = self.load_embeddings(feature_extractors_names) - items = list(path2emb.items()) - paths = [p[0] for p in items] - embs = [p[1] for p in items] - embs = np.array(embs) - - pair_dist = pairwise_distances(original_feature_x, embs, metric) - max_sum = np.min(pair_dist, axis=0) - sorted_idx = np.argsort(max_sum)[::-1] - - highest_num = 0 - paired_target_X = None - final_target_class_path = None - for idx in sorted_idx[:5]: - target_class_path = paths[idx] - cur_target_X = self.load_dir(target_class_path) - cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X]) - cur_tot_sum, cur_paired_target_X = self.calculate_dist_score(self.protect_train_X, cur_target_X, - feature_extractors_ls, - metric=metric) - if cur_tot_sum > highest_num: - highest_num = cur_tot_sum - paired_target_X = cur_paired_target_X - final_target_class_path = target_class_path - - np.random.shuffle(paired_target_X) - return final_target_class_path, paired_target_X - - def calculate_dist_score(self, a, b, feature_extractors_ls, metric='l2'): - features1 = self.extractor_ls_predict(feature_extractors_ls, a) - features2 = self.extractor_ls_predict(feature_extractors_ls, b) - - pair_cos = pairwise_distances(features1, features2, metric) - max_sum = np.min(pair_cos, axis=0) - max_sum_arg = np.argsort(max_sum)[::-1] - max_sum_arg = max_sum_arg[:len(a)] - max_sum = [max_sum[i] for i in max_sum_arg] - paired_target_X = [b[j] for j in max_sum_arg] - paired_target_X = np.array(paired_target_X) - return np.min(max_sum), paired_target_X - - def get_all_data_path(self, label2path): - all_paths = [] - for k, v in label2path.items(): - cur_all_paths = [os.path.join(k, cur_p) for cur_p in v] - all_paths.extend(cur_all_paths) - return all_paths - - def load_label_data(self, label): - train_label_path = os.path.join(self.train_data_dir, label) - test_label_path = os.path.join(self.test_data_dir, label) - train_X = self.load_dir(train_label_path) - test_X = self.load_dir(test_label_path) - return train_X, test_X - - def load_dir(self, path): - assert os.path.exists(path) - x_ls = [] - for file in os.listdir(path): - cur_path = os.path.join(path, file) - im = image.load_img(cur_path, target_size=self.img_shape) - im = image.img_to_array(im) - x_ls.append(im) - raw_x = np.array(x_ls) - return preprocess_input(raw_x) - - def build_data_mapping(self): - label2path_train = {} - label2path_test = {} - idx = 0 - path2idx = {} - for label_name in self.all_labels: - full_path_train = os.path.join(self.train_data_dir, label_name) - full_path_test = os.path.join(self.test_data_dir, label_name) - label2path_train[full_path_train] = list(os.listdir(full_path_train)) - label2path_test[full_path_test] = list(os.listdir(full_path_test)) - for img_file in os.listdir(full_path_train): - path2idx[os.path.join(full_path_train, img_file)] = idx - for img_file in os.listdir(full_path_test): - path2idx[os.path.join(full_path_test, img_file)] = idx - idx += 1 - return label2path_train, label2path_test, path2idx - - def generate_data_post_cloak(self, sybil=False): - assert self.cloaked_protect_train_X is not None - while True: - batch_X = [] - batch_Y = [] - cur_batch_path = random.sample(self.all_training_path, 32) - for p in cur_batch_path: - cur_y = self.path2idx[p] - if p in self.protect_class_path: - cur_x = random.choice(self.cloaked_protect_train_X) - elif sybil and (p in self.sybil_class): - cur_x = random.choice(self.cloaked_sybil_train_X) - else: - im = image.load_img(p, target_size=self.img_shape) - im = image.img_to_array(im) - cur_x = preprocess_input(im) - batch_X.append(cur_x) - batch_Y.append(cur_y) - batch_X = np.array(batch_X) - batch_Y = to_categorical(np.array(batch_Y), num_classes=self.number_classes) - yield batch_X, batch_Y +# class CloakData(object): +# def __init__(self, protect_directory=None, img_shape=(224, 224)): +# +# self.img_shape = img_shape +# # self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset) +# # self.all_labels = sorted(list(os.listdir(self.train_data_dir))) +# self.protect_directory = protect_directory +# +# self.protect_X = self.load_label_data(self.protect_directory) +# +# self.cloaked_protect_train_X = None +# +# self.label2path_train, self.label2path_test, self.path2idx = self.build_data_mapping() +# self.all_training_path = self.get_all_data_path(self.label2path_train) +# self.all_test_path = self.get_all_data_path(self.label2path_test) +# self.protect_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.protect_class)) +# +# def get_class_image_files(self, path): +# return [os.path.join(path, f) for f in os.listdir(path)] +# +# def extractor_ls_predict(self, feature_extractors_ls, X): +# feature_ls = [] +# for extractor in feature_extractors_ls: +# cur_features = extractor.predict(X) +# feature_ls.append(cur_features) +# concated_feature_ls = np.concatenate(feature_ls, axis=1) +# concated_feature_ls = normalize(concated_feature_ls) +# return concated_feature_ls +# +# def load_embeddings(self, feature_extractors_names): +# dictionaries = [] +# for extractor_name in feature_extractors_names: +# path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb")) +# dictionaries.append(path2emb) +# +# merge_dict = {} +# for k in dictionaries[0].keys(): +# cur_emb = [dic[k] for dic in dictionaries] +# merge_dict[k] = np.concatenate(cur_emb) +# return merge_dict +# +# def select_target_label(self, feature_extractors_ls, feature_extractors_names, metric='l2'): +# original_feature_x = self.extractor_ls_predict(feature_extractors_ls, self.protect_train_X) +# +# path2emb = self.load_embeddings(feature_extractors_names) +# items = list(path2emb.items()) +# paths = [p[0] for p in items] +# embs = [p[1] for p in items] +# embs = np.array(embs) +# +# pair_dist = pairwise_distances(original_feature_x, embs, metric) +# max_sum = np.min(pair_dist, axis=0) +# sorted_idx = np.argsort(max_sum)[::-1] +# +# highest_num = 0 +# paired_target_X = None +# final_target_class_path = None +# for idx in sorted_idx[:5]: +# target_class_path = paths[idx] +# cur_target_X = self.load_dir(target_class_path) +# cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X]) +# cur_tot_sum, cur_paired_target_X = self.calculate_dist_score(self.protect_train_X, cur_target_X, +# feature_extractors_ls, +# metric=metric) +# if cur_tot_sum > highest_num: +# highest_num = cur_tot_sum +# paired_target_X = cur_paired_target_X +# final_target_class_path = target_class_path +# +# np.random.shuffle(paired_target_X) +# return final_target_class_path, paired_target_X +# +# def calculate_dist_score(self, a, b, feature_extractors_ls, metric='l2'): +# features1 = self.extractor_ls_predict(feature_extractors_ls, a) +# features2 = self.extractor_ls_predict(feature_extractors_ls, b) +# +# pair_cos = pairwise_distances(features1, features2, metric) +# max_sum = np.min(pair_cos, axis=0) +# max_sum_arg = np.argsort(max_sum)[::-1] +# max_sum_arg = max_sum_arg[:len(a)] +# max_sum = [max_sum[i] for i in max_sum_arg] +# paired_target_X = [b[j] for j in max_sum_arg] +# paired_target_X = np.array(paired_target_X) +# return np.min(max_sum), paired_target_X +# +# def get_all_data_path(self, label2path): +# all_paths = [] +# for k, v in label2path.items(): +# cur_all_paths = [os.path.join(k, cur_p) for cur_p in v] +# all_paths.extend(cur_all_paths) +# return all_paths +# +# def load_label_data(self, label): +# train_label_path = os.path.join(self.train_data_dir, label) +# test_label_path = os.path.join(self.test_data_dir, label) +# train_X = self.load_dir(train_label_path) +# test_X = self.load_dir(test_label_path) +# return train_X, test_X +# +# def load_dir(self, path): +# assert os.path.exists(path) +# x_ls = [] +# for file in os.listdir(path): +# cur_path = os.path.join(path, file) +# im = image.load_img(cur_path, target_size=self.img_shape) +# im = image.img_to_array(im) +# x_ls.append(im) +# raw_x = np.array(x_ls) +# return preprocess_input(raw_x) +# +# def build_data_mapping(self): +# label2path_train = {} +# label2path_test = {} +# idx = 0 +# path2idx = {} +# for label_name in self.all_labels: +# full_path_train = os.path.join(self.train_data_dir, label_name) +# full_path_test = os.path.join(self.test_data_dir, label_name) +# label2path_train[full_path_train] = list(os.listdir(full_path_train)) +# label2path_test[full_path_test] = list(os.listdir(full_path_test)) +# for img_file in os.listdir(full_path_train): +# path2idx[os.path.join(full_path_train, img_file)] = idx +# for img_file in os.listdir(full_path_test): +# path2idx[os.path.join(full_path_test, img_file)] = idx +# idx += 1 +# return label2path_train, label2path_test, path2idx +# +# def generate_data_post_cloak(self, sybil=False): +# assert self.cloaked_protect_train_X is not None +# while True: +# batch_X = [] +# batch_Y = [] +# cur_batch_path = random.sample(self.all_training_path, 32) +# for p in cur_batch_path: +# cur_y = self.path2idx[p] +# if p in self.protect_class_path: +# cur_x = random.choice(self.cloaked_protect_train_X) +# elif sybil and (p in self.sybil_class): +# cur_x = random.choice(self.cloaked_sybil_train_X) +# else: +# im = image.load_img(p, target_size=self.img_shape) +# im = image.img_to_array(im) +# cur_x = preprocess_input(im) +# batch_X.append(cur_x) +# batch_Y.append(cur_y) +# batch_X = np.array(batch_X) +# batch_Y = to_categorical(np.array(batch_Y), num_classes=self.number_classes) +# yield batch_X, batch_Y diff --git a/setup.py b/setup.py index 6defe08..d0532b2 100644 --- a/setup.py +++ b/setup.py @@ -1,23 +1,117 @@ -import setuptools +import os +import re +import sys +from setuptools import setup, Command + +__PATH__ = os.path.abspath(os.path.dirname(__file__)) with open("README.md", "r") as fh: long_description = fh.read() -setuptools.setup( - name="fawkes", - version="0.0.1", - author="Shawn Shan", - author_email="shansixiong@cs.uchicago.edu", - description="Fawkes protect user privacy", + +def read_version(): + __PATH__ = os.path.abspath(os.path.dirname(__file__)) + with open(os.path.join(__PATH__, 'fawkes/__init__.py')) as f: + version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", + f.read(), re.M) + if version_match: + return version_match.group(1) + raise RuntimeError("Unable to find __version__ string") + + +__version__ = read_version() + + +# brought from https://github.com/kennethreitz/setup.py +class DeployCommand(Command): + description = 'Build and deploy the package to PyPI.' + user_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + @staticmethod + def status(s): + print(s) + + def run(self): + + assert 'dev' not in __version__, ( + "Only non-devel versions are allowed. " + "__version__ == {}".format(__version__)) + + with os.popen("git status --short") as fp: + git_status = fp.read().strip() + if git_status: + print("Error: git repository is not clean.\n") + os.system("git status --short") + sys.exit(1) + + try: + from shutil import rmtree + self.status('Removing previous builds ...') + rmtree(os.path.join(__PATH__, 'dist')) + except OSError: + pass + + self.status('Building Source and Wheel (universal) distribution ...') + os.system('{0} setup.py sdist'.format(sys.executable)) + + self.status('Uploading the package to PyPI via Twine ...') + ret = os.system('twine upload dist/*') + if ret != 0: + sys.exit(ret) + + self.status('Creating git tags ...') + os.system('git tag v{0}'.format(__version__)) + os.system('git tag --list') + sys.exit() + + +setup_requires = [] + +install_requires = [ + 'numpy>=1.16.4', + 'tensorflow>=1.13.1', + 'argparse', + 'keras==2.2.5', + 'scikit-image', + 'pillow>=7.0.0', + 'opencv-python>=4.2.0.34', +] + +setup( + name='fawkes', + version=__version__, + license='MIT', + description='An utility to protect user privacy', long_description=long_description, - long_description_content_type="text/markdown", + long_description_content_type='text/markdown', url="https://github.com/Shawn-Shan/fawkes", - packages=setuptools.find_packages(), + author='Shawn Shan', + author_email='shansixiong@cs.uchicago.edu', + keywords='fawkes privacy clearview', classifiers=[ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", + 'Development Status :: 3 - Alpha', + 'License :: OSI Approved :: MIT License', "Operating System :: OS Independent", + 'Programming Language :: Python :: 3', + 'Topic :: System :: Monitoring', ], + packages=['fawkes'], + install_requires=install_requires, + setup_requires=setup_requires, + entry_points={ + 'console_scripts': ['fawkes=fawkes:main'], + }, + cmdclass={ + 'deploy': DeployCommand, + }, + include_package_data=True, + zip_safe=False, python_requires='>=3.5', -) \ No newline at end of file +)