import glob import gzip import json import os import pickle import random import sys stderr = sys.stderr sys.stderr = open(os.devnull, 'w') import keras sys.stderr = stderr import keras.backend as K import numpy as np import tensorflow as tf from PIL import Image, ExifTags # from keras.applications.vgg16 import preprocess_input from keras.layers import Dense, Activation from keras.models import Model from keras.preprocessing import image from keras.utils import get_file from skimage.transform import resize from sklearn.metrics import pairwise_distances from .align_face import align, aligner def clip_img(X, preprocessing='raw'): X = reverse_preprocess(X, preprocessing) X = np.clip(X, 0.0, 255.0) X = preprocess(X, preprocessing) return X def load_image(path): img = Image.open(path) if img._getexif() is not None: for orientation in ExifTags.TAGS.keys(): if ExifTags.TAGS[orientation] == 'Orientation': break exif = dict(img._getexif().items()) if orientation in exif.keys(): if exif[orientation] == 3: img = img.rotate(180, expand=True) elif exif[orientation] == 6: img = img.rotate(270, expand=True) elif exif[orientation] == 8: img = img.rotate(90, expand=True) else: pass img = img.convert('RGB') image_array = image.img_to_array(img) return image_array class Faces(object): def __init__(self, image_paths, sess): self.aligner = aligner(sess) self.org_faces = [] self.cropped_faces = [] self.cropped_faces_shape = [] self.cropped_index = [] self.callback_idx = [] for i, p in enumerate(image_paths): cur_img = load_image(p) self.org_faces.append(cur_img) align_img = align(cur_img, self.aligner, margin=0.7) cur_faces = align_img[0] cur_shapes = [f.shape[:-1] for f in cur_faces] cur_faces_square = [] for img in cur_faces: long_size = max([img.shape[1], img.shape[0]]) base = np.zeros((long_size, long_size, 3)) base[0:img.shape[0], 0:img.shape[1], :] = img cur_faces_square.append(base) cur_index = align_img[1] cur_faces_square = [resize(f, (224, 224)) for f in cur_faces_square] self.cropped_faces_shape.extend(cur_shapes) self.cropped_faces.extend(cur_faces_square) self.cropped_index.extend(cur_index) self.callback_idx.extend([i] * len(cur_faces_square)) if not self.cropped_faces: print("No faces detected") exit(1) self.cropped_faces = np.array(self.cropped_faces) self.cropped_faces = preprocess(self.cropped_faces, 'imagenet') self.cloaked_cropped_faces = None self.cloaked_faces = np.copy(self.org_faces) def get_faces(self): return self.cropped_faces def merge_faces(self, cloaks): self.cloaked_faces = np.copy(self.org_faces) for i in range(len(self.cropped_faces)): cur_cloak = cloaks[i] org_shape = self.cropped_faces_shape[i] old_square_shape = max([org_shape[0], org_shape[1]]) reshape_cloak = resize(cur_cloak, (old_square_shape, old_square_shape)) reshape_cloak = reshape_cloak[0:org_shape[0], 0:org_shape[1], :] callback_id = self.callback_idx[i] bb = self.cropped_index[i] self.cloaked_faces[callback_id][bb[1]:bb[3], bb[0]:bb[2], :] += reshape_cloak return self.cloaked_faces def dump_dictionary_as_json(dict, outfile): j = json.dumps(dict) with open(outfile, "wb") as f: f.write(j.encode()) def fix_gpu_memory(mem_fraction=1): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' tf_config = None if tf.test.is_gpu_available(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_fraction) tf_config = tf.ConfigProto(gpu_options=gpu_options) tf_config.gpu_options.allow_growth = True tf_config.log_device_placement = False init_op = tf.global_variables_initializer() sess = tf.Session(config=tf_config) sess.run(init_op) K.set_session(sess) return sess def load_victim_model(number_classes, teacher_model=None, end2end=False): for l in teacher_model.layers: l.trainable = end2end x = teacher_model.layers[-1].output x = Dense(number_classes)(x) x = Activation('softmax', name="act")(x) model = Model(teacher_model.input, x) opt = keras.optimizers.Adadelta() model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) return model def init_gpu(gpu_index, force=False): if isinstance(gpu_index, list): gpu_num = ','.join([str(i) for i in gpu_index]) else: gpu_num = str(gpu_index) if "CUDA_VISIBLE_DEVICES" in os.environ and os.environ["CUDA_VISIBLE_DEVICES"] and not force: print('GPU already initiated') return os.environ["CUDA_VISIBLE_DEVICES"] = gpu_num sess = fix_gpu_memory() return sess def preprocess(X, method): assert method in {'raw', 'imagenet', 'inception', 'mnist'} if method is 'raw': pass elif method is 'imagenet': X = imagenet_preprocessing(X) else: raise Exception('unknown method %s' % method) return X def reverse_preprocess(X, method): assert method in {'raw', 'imagenet', 'inception', 'mnist'} if method is 'raw': pass elif method is 'imagenet': X = imagenet_reverse_preprocessing(X) else: raise Exception('unknown method %s' % method) return X def imagenet_preprocessing(x, data_format=None): if data_format is None: data_format = K.image_data_format() assert data_format in ('channels_last', 'channels_first') x = np.array(x) if data_format == 'channels_first': # 'RGB'->'BGR' if x.ndim == 3: x = x[::-1, ...] else: x = x[:, ::-1, ...] else: # 'RGB'->'BGR' x = x[..., ::-1] mean = [103.939, 116.779, 123.68] std = None # Zero-center by mean pixel if data_format == 'channels_first': if x.ndim == 3: x[0, :, :] -= mean[0] x[1, :, :] -= mean[1] x[2, :, :] -= mean[2] if std is not None: x[0, :, :] /= std[0] x[1, :, :] /= std[1] x[2, :, :] /= std[2] else: x[:, 0, :, :] -= mean[0] x[:, 1, :, :] -= mean[1] x[:, 2, :, :] -= mean[2] if std is not None: x[:, 0, :, :] /= std[0] x[:, 1, :, :] /= std[1] x[:, 2, :, :] /= std[2] else: x[..., 0] -= mean[0] x[..., 1] -= mean[1] x[..., 2] -= mean[2] if std is not None: x[..., 0] /= std[0] x[..., 1] /= std[1] x[..., 2] /= std[2] return x def imagenet_reverse_preprocessing(x, data_format=None): import keras.backend as K x = np.array(x) if data_format is None: data_format = K.image_data_format() assert data_format in ('channels_last', 'channels_first') if data_format == 'channels_first': if x.ndim == 3: # Zero-center by mean pixel x[0, :, :] += 103.939 x[1, :, :] += 116.779 x[2, :, :] += 123.68 # 'BGR'->'RGB' x = x[::-1, :, :] else: x[:, 0, :, :] += 103.939 x[:, 1, :, :] += 116.779 x[:, 2, :, :] += 123.68 x = x[:, ::-1, :, :] else: # Zero-center by mean pixel x[..., 0] += 103.939 x[..., 1] += 116.779 x[..., 2] += 123.68 # 'BGR'->'RGB' x = x[..., ::-1] return x def reverse_process_cloaked(x, preprocess='imagenet'): x = clip_img(x, preprocess) return reverse_preprocess(x, preprocess) def build_bottleneck_model(model, cut_off): bottleneck_model = Model(model.input, model.get_layer(cut_off).output) bottleneck_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return bottleneck_model def load_extractor(name): model_dir = os.path.join(os.path.expanduser('~'), '.fawkes') os.makedirs(model_dir, exist_ok=True) model_file = os.path.join(model_dir, "{}.h5".format(name)) if os.path.exists(model_file): model = keras.models.load_model(model_file) else: get_file("{}.h5".format(name), "http://sandlab.cs.uchicago.edu/fawkes/files/{}.h5".format(name), cache_dir=model_dir, cache_subdir='') get_file("{}_emb.p.gz".format(name), "http://sandlab.cs.uchicago.edu/fawkes/files/{}_emb.p.gz".format(name), cache_dir=model_dir, cache_subdir='') model = keras.models.load_model(model_file) if hasattr(model.layers[-1], "activation") and model.layers[-1].activation == "softmax": raise Exception( "Given extractor's last layer is softmax, need to remove the top layers to make it into a feature extractor") # if "extract" in name.split("/")[-1]: # pass # else: # print("Convert a model to a feature extractor") # model = build_bottleneck_model(model, model.layers[layer_idx].name) # model.save(name + "extract") # model = keras.models.load_model(name + "extract") return model def get_dataset_path(dataset): model_dir = os.path.join(os.path.expanduser('~'), '.fawkes') if not os.path.exists(os.path.join(model_dir, "config.json")): raise Exception("Please config the datasets before running protection code. See more in README and config.py.") config = json.load(open(os.path.join(model_dir, "config.json"), 'r')) if dataset not in config: raise Exception( "Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format( dataset)) return config[dataset]['train_dir'], config[dataset]['test_dir'], config[dataset]['num_classes'], config[dataset][ 'num_images'] def normalize(x): return x / np.linalg.norm(x, axis=1, keepdims=True) def dump_image(x, filename, format="png", scale=False): # img = image.array_to_img(x, scale=scale) img = image.array_to_img(x) img.save(filename, format) return def load_dir(path): assert os.path.exists(path) x_ls = [] for file in os.listdir(path): cur_path = os.path.join(path, file) im = image.load_img(cur_path, target_size=(224, 224)) im = image.img_to_array(im) x_ls.append(im) raw_x = np.array(x_ls) return preprocess(raw_x, 'imagenet') def load_embeddings(feature_extractors_names): model_dir = os.path.join(os.path.expanduser('~'), '.fawkes') dictionaries = [] for extractor_name in feature_extractors_names: fp = gzip.open(os.path.join(model_dir, "{}_emb.p.gz".format(extractor_name)), 'rb') path2emb = pickle.load(fp) fp.close() dictionaries.append(path2emb) merge_dict = {} for k in dictionaries[0].keys(): cur_emb = [dic[k] for dic in dictionaries] merge_dict[k] = np.concatenate(cur_emb) return merge_dict def extractor_ls_predict(feature_extractors_ls, X): feature_ls = [] for extractor in feature_extractors_ls: cur_features = extractor.predict(X) feature_ls.append(cur_features) concated_feature_ls = np.concatenate(feature_ls, axis=1) concated_feature_ls = normalize(concated_feature_ls) return concated_feature_ls def calculate_dist_score(a, b, feature_extractors_ls, metric='l2'): features1 = extractor_ls_predict(feature_extractors_ls, a) features2 = extractor_ls_predict(feature_extractors_ls, b) pair_cos = pairwise_distances(features1, features2, metric) max_sum = np.min(pair_cos, axis=0) max_sum_arg = np.argsort(max_sum)[::-1] max_sum_arg = max_sum_arg[:len(a)] max_sum = [max_sum[i] for i in max_sum_arg] paired_target_X = [b[j] for j in max_sum_arg] paired_target_X = np.array(paired_target_X) return np.min(max_sum), paired_target_X def select_target_label(imgs, feature_extractors_ls, feature_extractors_names, metric='l2'): model_dir = os.path.join(os.path.expanduser('~'), '.fawkes') original_feature_x = extractor_ls_predict(feature_extractors_ls, imgs) path2emb = load_embeddings(feature_extractors_names) items = list(path2emb.items()) paths = [p[0] for p in items] embs = [p[1] for p in items] embs = np.array(embs) pair_dist = pairwise_distances(original_feature_x, embs, metric) max_sum = np.min(pair_dist, axis=0) max_id = np.argmax(max_sum) target_data_id = paths[int(max_id)] image_dir = os.path.join(model_dir, "target_data/{}/*".format(target_data_id)) if not os.path.exists(image_dir): get_file("{}.h5".format(name), "http://sandlab.cs.uchicago.edu/fawkes/files/target_images".format(name), cache_dir=model_dir, cache_subdir='') image_paths = glob.glob(image_dir) target_images = [image.img_to_array(image.load_img(cur_path)) for cur_path in image_paths] target_images = np.array([resize(x, (224, 224)) for x in target_images]) target_images = preprocess(target_images, 'imagenet') target_images = list(target_images) while len(target_images) < len(imgs): target_images += target_images target_images = random.sample(target_images, len(imgs)) return np.array(target_images) # class CloakData(object): # def __init__(self, protect_directory=None, img_shape=(224, 224)): # # self.img_shape = img_shape # # self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset) # # self.all_labels = sorted(list(os.listdir(self.train_data_dir))) # self.protect_directory = protect_directory # # self.protect_X = self.load_label_data(self.protect_directory) # # self.cloaked_protect_train_X = None # # self.label2path_train, self.label2path_test, self.path2idx = self.build_data_mapping() # self.all_training_path = self.get_all_data_path(self.label2path_train) # self.all_test_path = self.get_all_data_path(self.label2path_test) # self.protect_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.protect_class)) # # def get_class_image_files(self, path): # return [os.path.join(path, f) for f in os.listdir(path)] # # def extractor_ls_predict(self, feature_extractors_ls, X): # feature_ls = [] # for extractor in feature_extractors_ls: # cur_features = extractor.predict(X) # feature_ls.append(cur_features) # concated_feature_ls = np.concatenate(feature_ls, axis=1) # concated_feature_ls = normalize(concated_feature_ls) # return concated_feature_ls # # def load_embeddings(self, feature_extractors_names): # dictionaries = [] # for extractor_name in feature_extractors_names: # path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb")) # dictionaries.append(path2emb) # # merge_dict = {} # for k in dictionaries[0].keys(): # cur_emb = [dic[k] for dic in dictionaries] # merge_dict[k] = np.concatenate(cur_emb) # return merge_dict # # def select_target_label(self, feature_extractors_ls, feature_extractors_names, metric='l2'): # original_feature_x = self.extractor_ls_predict(feature_extractors_ls, self.protect_train_X) # # path2emb = self.load_embeddings(feature_extractors_names) # items = list(path2emb.items()) # paths = [p[0] for p in items] # embs = [p[1] for p in items] # embs = np.array(embs) # # pair_dist = pairwise_distances(original_feature_x, embs, metric) # max_sum = np.min(pair_dist, axis=0) # sorted_idx = np.argsort(max_sum)[::-1] # # highest_num = 0 # paired_target_X = None # final_target_class_path = None # for idx in sorted_idx[:5]: # target_class_path = paths[idx] # cur_target_X = self.load_dir(target_class_path) # cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X]) # cur_tot_sum, cur_paired_target_X = self.calculate_dist_score(self.protect_train_X, cur_target_X, # feature_extractors_ls, # metric=metric) # if cur_tot_sum > highest_num: # highest_num = cur_tot_sum # paired_target_X = cur_paired_target_X # final_target_class_path = target_class_path # # np.random.shuffle(paired_target_X) # return final_target_class_path, paired_target_X # # def calculate_dist_score(self, a, b, feature_extractors_ls, metric='l2'): # features1 = self.extractor_ls_predict(feature_extractors_ls, a) # features2 = self.extractor_ls_predict(feature_extractors_ls, b) # # pair_cos = pairwise_distances(features1, features2, metric) # max_sum = np.min(pair_cos, axis=0) # max_sum_arg = np.argsort(max_sum)[::-1] # max_sum_arg = max_sum_arg[:len(a)] # max_sum = [max_sum[i] for i in max_sum_arg] # paired_target_X = [b[j] for j in max_sum_arg] # paired_target_X = np.array(paired_target_X) # return np.min(max_sum), paired_target_X # # def get_all_data_path(self, label2path): # all_paths = [] # for k, v in label2path.items(): # cur_all_paths = [os.path.join(k, cur_p) for cur_p in v] # all_paths.extend(cur_all_paths) # return all_paths # # def load_label_data(self, label): # train_label_path = os.path.join(self.train_data_dir, label) # test_label_path = os.path.join(self.test_data_dir, label) # train_X = self.load_dir(train_label_path) # test_X = self.load_dir(test_label_path) # return train_X, test_X # # def load_dir(self, path): # assert os.path.exists(path) # x_ls = [] # for file in os.listdir(path): # cur_path = os.path.join(path, file) # im = image.load_img(cur_path, target_size=self.img_shape) # im = image.img_to_array(im) # x_ls.append(im) # raw_x = np.array(x_ls) # return preprocess_input(raw_x) # # def build_data_mapping(self): # label2path_train = {} # label2path_test = {} # idx = 0 # path2idx = {} # for label_name in self.all_labels: # full_path_train = os.path.join(self.train_data_dir, label_name) # full_path_test = os.path.join(self.test_data_dir, label_name) # label2path_train[full_path_train] = list(os.listdir(full_path_train)) # label2path_test[full_path_test] = list(os.listdir(full_path_test)) # for img_file in os.listdir(full_path_train): # path2idx[os.path.join(full_path_train, img_file)] = idx # for img_file in os.listdir(full_path_test): # path2idx[os.path.join(full_path_test, img_file)] = idx # idx += 1 # return label2path_train, label2path_test, path2idx # # def generate_data_post_cloak(self, sybil=False): # assert self.cloaked_protect_train_X is not None # while True: # batch_X = [] # batch_Y = [] # cur_batch_path = random.sample(self.all_training_path, 32) # for p in cur_batch_path: # cur_y = self.path2idx[p] # if p in self.protect_class_path: # cur_x = random.choice(self.cloaked_protect_train_X) # elif sybil and (p in self.sybil_class): # cur_x = random.choice(self.cloaked_sybil_train_X) # else: # im = image.load_img(p, target_size=self.img_shape) # im = image.img_to_array(im) # cur_x = preprocess_input(im) # batch_X.append(cur_x) # batch_Y.append(cur_y) # batch_X = np.array(batch_X) # batch_Y = to_categorical(np.array(batch_Y), num_classes=self.number_classes) # yield batch_X, batch_Y