upload to pypi

2020-06-28 23:34:48 -05:00 · 2020-06-28 23:34:48 -05:00 · 95a0d5efc0
commit 95a0d5efc0
--- a/fawkes/LICENSE
+++ b/fawkes/LICENSE
@ -0,0 +1,19 @@
+Copyright (c) 2018 The Python Packaging Authority
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/fawkes/README.md
+++ b/fawkes/README.md
@ -0,0 +1,57 @@
+# Fawkes
+Code implementation of the paper "[Fawkes: Protecting Personal Privacy against Unauthorized Deep Learning Models](https://arxiv.org/pdf/2002.08327.pdf)", at *USENIX Security 2020*. 
+
+### BEFORE YOU RUN OUR CODE
+We appreciate your interest in our work and for trying out our code. We've noticed several cases where incorrect configuration leads to poor performances of protection. If you also observe low detection performance far away from what we presented in the paper, please feel free to open an issue in this repo or contact any of the authors directly. We are more than happy to help you debug your experiment and find out the correct configuration. 
+
+### ABOUT
+
+This repository contains code implementation of the paper "[Fawkes: Protecting Personal Privacy against Unauthorized Deep Learning Models](https://arxiv.org/pdf/2002.08327.pdf)", at *USENIX Security 2020*. 
+
+### DEPENDENCIES
+
+Our code is implemented and tested on Keras with TensorFlow backend. Following packages are used by our code.
+
+- `keras==2.3.1`
+- `numpy==1.18.4`
+- `tensorflow-gpu==1.13.1`
+
+Our code is tested on `Python 3.6.8`
+
+### HOWTO
+
+#### Download and Config Datasets
+The first step is to download several datasets for protection and target selection. 
+1. Download the following dataset to your local machine. After downloading the datasets, restructure it the same way as the FaceScrub dataset downloaded. 
+    - FaceScrub -- used for protection evaluation (link)
+    - VGGFace1 -- used for target select (link)
+    - VGGFace2 -- used for target select (link)
+    - WebFace -- used for target select (link)
+
+2. Config datasets
+open `fawkes/config.py` and update the `DATASETS` dictionary with the path to each dataset. Then run `python fawkes/config.py`. Every time the datasets are updated or moved, remember to rerun the command with the updated path. 
+
+3. Calculate embeddings using feature extractor. 
+Run `python3 fawkes/prepare_feature_extractor.py --candidate-datasets scrub vggface1 vggface2 webface`. This will calculate and cache the embeddings using the default feature extractor we provide. To use a customized feature extractor, please look at the Advance section at the end. 
+
+#### Generate Cloak for Images
+To generate cloak, run 
+`python3 fawkes/protection.py --gpu 0 --dataset scrub --feature-extractor webface_dense_robust_extract`
+For more information about the detailed parameters, please read `fawkes/protection.py`. 
+The code will output a directory in `results/` with `cloak_data.p` inside. You can check the cloaked images or inspect the changes in `this notebook`. 
+
+#### Evaluate Cloak Effectiveness
+To evaluate the cloak, run `python3 fawkes/eval_cloak.py --gpu 0 --cloak_data PATH-TO-RESULT-DIRECTORY --transfer_model vggface2_inception_extract`. 
+
+The code will print out the tracker model accuracy on uncloaked/original test images of the protected user, which should be close to 0. 
+
+
+### Citation
+```
+@inproceedings{shan2020fawkes,
+  title={Fawkes: Protecting Personal Privacy against Unauthorized Deep Learning Models},
+  author={Shan, Shawn and Wenger, Emily and Zhang, Jiayun and Li, Huiying and Zheng, Haitao and Zhao, Ben Y},
+  booktitle="Proc. of USENIX Security",
+  year={2020}
+}
+```
--- a/fawkes/build/lib/fawkes/init.py
+++ b/fawkes/build/lib/fawkes/init.py
--- a/fawkes/build/lib/fawkes/differentiator.py
+++ b/fawkes/build/lib/fawkes/differentiator.py
@ -211,8 +211,8 @@ class FawkesMaskGeneration:
        # we're creating
        start_vars = set(x.name for x in tf.global_variables())
        self.learning_rate_holder = tf.placeholder(tf.float32, shape=[])
-        # optimizer = tf.train.AdadeltaOptimizer(self.learning_rate_holder)
-        optimizer = tf.train.AdamOptimizer(self.learning_rate_holder)
+        optimizer = tf.train.AdadeltaOptimizer(self.learning_rate_holder)
+        # optimizer = tf.train.AdamOptimizer(self.learning_rate_holder)

        self.train = optimizer.minimize(self.loss_sum,
                                        var_list=[self.modifier])
@ -250,8 +250,6 @@ class FawkesMaskGeneration:

        imgs = reverse_preprocess(imgs, self.intensity_range)
        imgs = np.clip(imgs, 0, self.max_val)
-        imgs = np.rint(imgs)
-
        imgs = preprocess(imgs, self.intensity_range)

        return imgs
--- a/fawkes/build/lib/fawkes/protection.py
+++ b/fawkes/build/lib/fawkes/protection.py
@ -16,12 +16,13 @@ random.seed(12243)
 np.random.seed(122412)
 set_random_seed(12242)

-BATCH_SIZE = 32
+BATCH_SIZE = 1
 MAX_ITER = 1000


 def generate_cloak_images(sess, feature_extractors, image_X, target_X=None, th=0.01):
    batch_size = BATCH_SIZE if len(image_X) > BATCH_SIZE else len(image_X)
+
    differentiator = FawkesMaskGeneration(sess, feature_extractors,
                                          batch_size=batch_size,
                                          mimic_img=True,
@ -66,8 +67,6 @@ def fawkes():
            tar_img = select_target_label(org_img, feature_extractors_ls, [args.feature_extractor])
            target_images.append(tar_img)
        target_images = np.concatenate(target_images)
-        # import pdb
-        # pdb.set_trace()
    else:
        target_images = select_target_label(orginal_images, feature_extractors_ls, [args.feature_extractor])

@ -80,7 +79,6 @@ def fawkes():

    for p_img, path in zip(protected_images, image_paths):
        p_img = reverse_process_cloaked(p_img)
-        # img_type = path.split(".")[-1]
        file_name = "{}_cloaked.jpeg".format(".".join(path.split(".")[:-1]))
        dump_image(p_img, file_name, format="JPEG")

@ -97,9 +95,9 @@ def parse_arguments(argv):
                        default="webface_dense_robust_extract")

    parser.add_argument('--th', type=float, default=0.005)
-    parser.add_argument('--sd', type=int, default=1e10)
+    parser.add_argument('--sd', type=int, default=1e9)
    parser.add_argument('--protect_class', type=str, default=None)
-    parser.add_argument('--lr', type=float, default=0.1)
+    parser.add_argument('--lr', type=float, default=1)

    parser.add_argument('--result_directory', type=str, default="../results")
    parser.add_argument('--seperate_target', action='store_true')
--- a/fawkes/build/lib/fawkes/utils.py
+++ b/fawkes/build/lib/fawkes/utils.py
@ -297,7 +297,6 @@ def select_target_label(imgs, feature_extractors_ls, feature_extractors_names, m
        if cur_tot_sum > highest_num:
            highest_num = cur_tot_sum
            paired_target_X = cur_paired_target_X
-            final_target_class_path = target_class_path

    np.random.shuffle(paired_target_X)
    paired_target_X = list(paired_target_X)
--- a/fawkes/dist/fawkes-0.0.1-py3-none-any.whl
+++ b/fawkes/dist/fawkes-0.0.1-py3-none-any.whl
--- a/fawkes/dist/fawkes-0.0.1.tar.gz
+++ b/fawkes/dist/fawkes-0.0.1.tar.gz
--- a/fawkes/fawkes.egg-info/PKG-INFO
+++ b/fawkes/fawkes.egg-info/PKG-INFO
@ -0,0 +1,71 @@
+Metadata-Version: 2.1
+Name: fawkes
+Version: 0.0.1
+Summary: Fawkes protect user privacy
+Home-page: https://github.com/Shawn-Shan/fawkes
+Author: Shawn Shan
+Author-email: shansixiong@cs.uchicago.edu
+License: UNKNOWN
+Description: # Fawkes
+        Code implementation of the paper "[Fawkes: Protecting Personal Privacy against Unauthorized Deep Learning Models](https://arxiv.org/pdf/2002.08327.pdf)", at *USENIX Security 2020*. 
+        
+        ### BEFORE YOU RUN OUR CODE
+        We appreciate your interest in our work and for trying out our code. We've noticed several cases where incorrect configuration leads to poor performances of protection. If you also observe low detection performance far away from what we presented in the paper, please feel free to open an issue in this repo or contact any of the authors directly. We are more than happy to help you debug your experiment and find out the correct configuration. 
+        
+        ### ABOUT
+        
+        This repository contains code implementation of the paper "[Fawkes: Protecting Personal Privacy against Unauthorized Deep Learning Models](https://arxiv.org/pdf/2002.08327.pdf)", at *USENIX Security 2020*. 
+        
+        ### DEPENDENCIES
+        
+        Our code is implemented and tested on Keras with TensorFlow backend. Following packages are used by our code.
+        
+        - `keras==2.3.1`
+        - `numpy==1.18.4`
+        - `tensorflow-gpu==1.13.1`
+        
+        Our code is tested on `Python 3.6.8`
+        
+        ### HOWTO
+        
+        #### Download and Config Datasets
+        The first step is to download several datasets for protection and target selection. 
+        1. Download the following dataset to your local machine. After downloading the datasets, restructure it the same way as the FaceScrub dataset downloaded. 
+            - FaceScrub -- used for protection evaluation (link)
+            - VGGFace1 -- used for target select (link)
+            - VGGFace2 -- used for target select (link)
+            - WebFace -- used for target select (link)
+        
+        2. Config datasets
+        open `fawkes/config.py` and update the `DATASETS` dictionary with the path to each dataset. Then run `python fawkes/config.py`. Every time the datasets are updated or moved, remember to rerun the command with the updated path. 
+        
+        3. Calculate embeddings using feature extractor. 
+        Run `python3 fawkes/prepare_feature_extractor.py --candidate-datasets scrub vggface1 vggface2 webface`. This will calculate and cache the embeddings using the default feature extractor we provide. To use a customized feature extractor, please look at the Advance section at the end. 
+        
+        #### Generate Cloak for Images
+        To generate cloak, run 
+        `python3 fawkes/protection.py --gpu 0 --dataset scrub --feature-extractor webface_dense_robust_extract`
+        For more information about the detailed parameters, please read `fawkes/protection.py`. 
+        The code will output a directory in `results/` with `cloak_data.p` inside. You can check the cloaked images or inspect the changes in `this notebook`. 
+        
+        #### Evaluate Cloak Effectiveness
+        To evaluate the cloak, run `python3 fawkes/eval_cloak.py --gpu 0 --cloak_data PATH-TO-RESULT-DIRECTORY --transfer_model vggface2_inception_extract`. 
+        
+        The code will print out the tracker model accuracy on uncloaked/original test images of the protected user, which should be close to 0. 
+        
+        
+        ### Citation
+        ```
+        @inproceedings{shan2020fawkes,
+          title={Fawkes: Protecting Personal Privacy against Unauthorized Deep Learning Models},
+          author={Shan, Shawn and Wenger, Emily and Zhang, Jiayun and Li, Huiying and Zheng, Haitao and Zhao, Ben Y},
+          booktitle="Proc. of USENIX Security",
+          year={2020}
+        }
+        ```
+Platform: UNKNOWN
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.5
+Description-Content-Type: text/markdown
--- a/fawkes/fawkes.egg-info/SOURCES.txt
+++ b/fawkes/fawkes.egg-info/SOURCES.txt
@ -0,0 +1,10 @@
+README.md
+setup.py
+fawkes/__init__.py
+fawkes/differentiator.py
+fawkes/protection.py
+fawkes/utils.py
+fawkes.egg-info/PKG-INFO
+fawkes.egg-info/SOURCES.txt
+fawkes.egg-info/dependency_links.txt
+fawkes.egg-info/top_level.txt
--- a/fawkes/fawkes.egg-info/dependency_links.txt
+++ b/fawkes/fawkes.egg-info/dependency_links.txt
@ -0,0 +1 @@
+
--- a/fawkes/fawkes.egg-info/top_level.txt
+++ b/fawkes/fawkes.egg-info/top_level.txt
@ -0,0 +1 @@
+fawkes
--- a/fawkes/fawkes/init.py
+++ b/fawkes/fawkes/init.py
--- a/fawkes/fawkes/differentiator.py
+++ b/fawkes/fawkes/differentiator.py
@ -0,0 +1,421 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Date    : 2020-05-17
+# @Author  : Shawn Shan (shansixiong@cs.uchicago.edu)
+# @Link    : https://www.shawnshan.com/
+
+import datetime
+import time
+from decimal import Decimal
+
+import numpy as np
+import tensorflow as tf
+from utils import preprocess, reverse_preprocess
+
+
+class FawkesMaskGeneration:
+    # if the attack is trying to mimic a target image or a neuron vector
+    MIMIC_IMG = True
+    # number of iterations to perform gradient descent
+    MAX_ITERATIONS = 10000
+    # larger values converge faster to less accurate results
+    LEARNING_RATE = 1e-2
+    # the initial constant c to pick as a first guess
+    INITIAL_CONST = 1
+    # pixel intensity range
+    INTENSITY_RANGE = 'imagenet'
+    # threshold for distance
+    L_THRESHOLD = 0.03
+    # whether keep the final result or the best result
+    KEEP_FINAL = False
+    # max_val of image
+    MAX_VAL = 255
+    # The following variables are used by DSSIM, should keep as default
+    # filter size in SSIM
+    FILTER_SIZE = 11
+    # filter sigma in SSIM
+    FILTER_SIGMA = 1.5
+    # weights used in MS-SSIM
+    SCALE_WEIGHTS = None
+    MAXIMIZE = False
+    IMAGE_SHAPE = (224, 224, 3)
+    RATIO = 1.0
+    LIMIT_DIST = False
+
+    def __init__(self, sess, bottleneck_model_ls, mimic_img=MIMIC_IMG,
+                 batch_size=1, learning_rate=LEARNING_RATE,
+                 max_iterations=MAX_ITERATIONS, initial_const=INITIAL_CONST,
+                 intensity_range=INTENSITY_RANGE, l_threshold=L_THRESHOLD,
+                 max_val=MAX_VAL, keep_final=KEEP_FINAL, maximize=MAXIMIZE, image_shape=IMAGE_SHAPE,
+                 verbose=0, ratio=RATIO, limit_dist=LIMIT_DIST):
+
+        assert intensity_range in {'raw', 'imagenet', 'inception', 'mnist'}
+
+        # constant used for tanh transformation to avoid corner cases
+        self.tanh_constant = 2 - 1e-6
+        self.sess = sess
+        self.MIMIC_IMG = mimic_img
+        self.LEARNING_RATE = learning_rate
+        self.MAX_ITERATIONS = max_iterations
+        self.initial_const = initial_const
+        self.batch_size = batch_size
+        self.intensity_range = intensity_range
+        self.l_threshold = l_threshold
+        self.max_val = max_val
+        self.keep_final = keep_final
+        self.verbose = verbose
+        self.maximize = maximize
+        self.learning_rate = learning_rate
+        self.ratio = ratio
+        self.limit_dist = limit_dist
+        self.single_shape = list(image_shape)
+
+        self.input_shape = tuple([self.batch_size] + self.single_shape)
+
+        self.bottleneck_shape = tuple([self.batch_size] + self.single_shape)
+
+        # the variable we're going to optimize over
+        self.modifier = tf.Variable(np.zeros(self.input_shape, dtype=np.float32))
+
+        # target image in tanh space
+        if self.MIMIC_IMG:
+            self.timg_tanh = tf.Variable(np.zeros(self.input_shape), dtype=np.float32)
+        else:
+            self.bottleneck_t_raw = tf.Variable(np.zeros(self.bottleneck_shape), dtype=np.float32)
+        # source image in tanh space
+        self.simg_tanh = tf.Variable(np.zeros(self.input_shape), dtype=np.float32)
+
+        self.const = tf.Variable(np.ones(batch_size), dtype=np.float32)
+        self.mask = tf.Variable(np.ones((batch_size), dtype=np.bool))
+        self.weights = tf.Variable(np.ones(self.bottleneck_shape,
+                                           dtype=np.float32))
+
+        # and here's what we use to assign them
+        self.assign_modifier = tf.placeholder(tf.float32, self.input_shape)
+        if self.MIMIC_IMG:
+            self.assign_timg_tanh = tf.placeholder(
+                tf.float32, self.input_shape)
+        else:
+            self.assign_bottleneck_t_raw = tf.placeholder(
+                tf.float32, self.bottleneck_shape)
+        self.assign_simg_tanh = tf.placeholder(tf.float32, self.input_shape)
+        self.assign_const = tf.placeholder(tf.float32, (batch_size))
+        self.assign_mask = tf.placeholder(tf.bool, (batch_size))
+        self.assign_weights = tf.placeholder(tf.float32, self.bottleneck_shape)
+
+        # the resulting image, tanh'd to keep bounded from -0.5 to 0.5
+        # adversarial image in raw space
+        self.aimg_raw = (tf.tanh(self.modifier + self.simg_tanh) /
+                         self.tanh_constant +
+                         0.5) * 255.0
+        # source image in raw space
+        self.simg_raw = (tf.tanh(self.simg_tanh) /
+                         self.tanh_constant +
+                         0.5) * 255.0
+        if self.MIMIC_IMG:
+            # target image in raw space
+            self.timg_raw = (tf.tanh(self.timg_tanh) /
+                             self.tanh_constant +
+                             0.5) * 255.0
+
+        # convert source and adversarial image into input space
+        if self.intensity_range == 'imagenet':
+            mean = tf.constant(np.repeat([[[[103.939, 116.779, 123.68]]]], self.batch_size, axis=0), dtype=tf.float32,
+                               name='img_mean')
+            self.aimg_input = (self.aimg_raw[..., ::-1] - mean)
+            self.simg_input = (self.simg_raw[..., ::-1] - mean)
+            if self.MIMIC_IMG:
+                self.timg_input = (self.timg_raw[..., ::-1] - mean)
+
+        elif self.intensity_range == 'raw':
+            self.aimg_input = self.aimg_raw
+            self.simg_input = self.simg_raw
+            if self.MIMIC_IMG:
+                self.timg_input = self.timg_raw
+
+        def batch_gen_DSSIM(aimg_raw_split, simg_raw_split):
+            msssim_split = tf.image.ssim(aimg_raw_split, simg_raw_split, max_val=255.0)
+            dist = (1.0 - tf.stack(msssim_split)) / 2.0
+            # dist = tf.square(aimg_raw_split - simg_raw_split)
+            return dist
+
+        # raw value of DSSIM distance
+        self.dist_raw = batch_gen_DSSIM(self.aimg_raw, self.simg_raw)
+        # distance value after applying threshold
+        self.dist = tf.maximum(self.dist_raw - self.l_threshold, 0.0)
+        # self.dist = self.dist_raw
+        self.dist_raw_sum = tf.reduce_sum(
+            tf.where(self.mask,
+                     self.dist_raw,
+                     tf.zeros_like(self.dist_raw)))
+        self.dist_sum = tf.reduce_sum(tf.where(self.mask, self.dist, tf.zeros_like(self.dist)))
+        # self.dist_sum = 1e-5 * tf.reduce_sum(self.dist)
+        # self.dist_raw_sum = self.dist_sum
+
+        def resize_tensor(input_tensor, model_input_shape):
+            if input_tensor.shape[1:] == model_input_shape or model_input_shape[1] is None:
+                return input_tensor
+            resized_tensor = tf.image.resize(input_tensor, model_input_shape[:2])
+            return resized_tensor
+
+        def calculate_direction(bottleneck_model, cur_timg_input, cur_simg_input):
+            target_features = bottleneck_model(cur_timg_input)
+            return target_features
+
+        self.bottlesim = 0.0
+        self.bottlesim_sum = 0.0
+        self.bottlesim_push = 0.0
+        for bottleneck_model in bottleneck_model_ls:
+            model_input_shape = bottleneck_model.input_shape[1:]
+            cur_aimg_input = resize_tensor(self.aimg_input, model_input_shape)
+
+            self.bottleneck_a = bottleneck_model(cur_aimg_input)
+            if self.MIMIC_IMG:
+                # cur_timg_input = resize_tensor(self.timg_input, model_input_shape)
+                # cur_simg_input = resize_tensor(self.simg_input, model_input_shape)
+                cur_timg_input = self.timg_input
+                cur_simg_input = self.simg_input
+                self.bottleneck_t = calculate_direction(bottleneck_model, cur_timg_input, cur_simg_input)
+                # self.bottleneck_t = bottleneck_model(cur_timg_input)
+            else:
+                self.bottleneck_t = self.bottleneck_t_raw
+
+            bottleneck_diff = self.bottleneck_t - self.bottleneck_a
+            scale_factor = tf.sqrt(tf.reduce_sum(tf.square(self.bottleneck_t), axis=1))
+
+            cur_bottlesim = tf.sqrt(tf.reduce_sum(tf.square(bottleneck_diff), axis=1))
+            cur_bottlesim = cur_bottlesim / scale_factor
+            cur_bottlesim_sum = tf.reduce_sum(cur_bottlesim)
+
+            self.bottlesim += cur_bottlesim
+
+            # self.bottlesim_push += cur_bottlesim_push_sum
+            self.bottlesim_sum += cur_bottlesim_sum
+
+        # sum up the losses
+        if self.maximize:
+            self.loss = self.const * tf.square(self.dist) - self.bottlesim
+        else:
+            self.loss = self.const * tf.square(self.dist) + self.bottlesim
+
+        self.loss_sum = tf.reduce_sum(tf.where(self.mask,
+                                               self.loss,
+                                               tf.zeros_like(self.loss)))
+
+        # self.loss_sum = self.dist_sum + tf.reduce_sum(self.bottlesim)
+        # import pdb
+        # pdb.set_trace()
+        # self.loss_sum = tf.reduce_sum(tf.where(self.mask, self.loss, tf.zeros_like(self.loss)))
+
+        # Setup the Adadelta optimizer and keep track of variables
+        # we're creating
+        start_vars = set(x.name for x in tf.global_variables())
+        self.learning_rate_holder = tf.placeholder(tf.float32, shape=[])
+        optimizer = tf.train.AdadeltaOptimizer(self.learning_rate_holder)
+        # optimizer = tf.train.AdamOptimizer(self.learning_rate_holder)
+
+        self.train = optimizer.minimize(self.loss_sum,
+                                        var_list=[self.modifier])
+        end_vars = tf.global_variables()
+        new_vars = [x for x in end_vars if x.name not in start_vars]
+
+        # these are the variables to initialize when we run
+        self.setup = []
+        self.setup.append(self.modifier.assign(self.assign_modifier))
+        if self.MIMIC_IMG:
+            self.setup.append(self.timg_tanh.assign(self.assign_timg_tanh))
+        else:
+            self.setup.append(self.bottleneck_t_raw.assign(
+                self.assign_bottleneck_t_raw))
+        self.setup.append(self.simg_tanh.assign(self.assign_simg_tanh))
+        self.setup.append(self.const.assign(self.assign_const))
+        self.setup.append(self.mask.assign(self.assign_mask))
+        self.setup.append(self.weights.assign(self.assign_weights))
+
+        self.init = tf.variables_initializer(var_list=[self.modifier] + new_vars)
+
+        print('Attacker loaded')
+
+    def preprocess_arctanh(self, imgs):
+
+        imgs = reverse_preprocess(imgs, self.intensity_range)
+        imgs /= 255.0
+        imgs -= 0.5
+        imgs *= self.tanh_constant
+        tanh_imgs = np.arctanh(imgs)
+
+        return tanh_imgs
+
+    def clipping(self, imgs):
+
+        imgs = reverse_preprocess(imgs, self.intensity_range)
+        imgs = np.clip(imgs, 0, self.max_val)
+        imgs = preprocess(imgs, self.intensity_range)
+
+        return imgs
+
+    def attack(self, source_imgs, target_imgs, weights=None):
+
+        if weights is None:
+            weights = np.ones([source_imgs.shape[0]] +
+                              list(self.bottleneck_shape[1:]))
+
+        assert weights.shape[1:] == self.bottleneck_shape[1:]
+        assert source_imgs.shape[1:] == self.input_shape[1:]
+        assert source_imgs.shape[0] == weights.shape[0]
+        if self.MIMIC_IMG:
+            assert target_imgs.shape[1:] == self.input_shape[1:]
+            assert source_imgs.shape[0] == target_imgs.shape[0]
+        else:
+            assert target_imgs.shape[1:] == self.bottleneck_shape[1:]
+            assert source_imgs.shape[0] == target_imgs.shape[0]
+
+        start_time = time.time()
+
+        adv_imgs = []
+        print('%d batches in total'
+              % int(np.ceil(len(source_imgs) / self.batch_size)))
+
+        for idx in range(0, len(source_imgs), self.batch_size):
+            print('processing batch %d at %s' % (idx, datetime.datetime.now()))
+            adv_img = self.attack_batch(source_imgs[idx:idx + self.batch_size],
+                                        target_imgs[idx:idx + self.batch_size],
+                                        weights[idx:idx + self.batch_size])
+            adv_imgs.extend(adv_img)
+
+        elapsed_time = time.time() - start_time
+        print('attack cost %f s' % (elapsed_time))
+
+        return np.array(adv_imgs)
+
+    def attack_batch(self, source_imgs, target_imgs, weights):
+
+        """
+        Run the attack on a batch of images and labels.
+        """
+
+        LR = self.learning_rate
+        nb_imgs = source_imgs.shape[0]
+        mask = [True] * nb_imgs + [False] * (self.batch_size - nb_imgs)
+        mask = np.array(mask, dtype=np.bool)
+
+        source_imgs = np.array(source_imgs)
+        target_imgs = np.array(target_imgs)
+
+        # convert to tanh-space
+        simg_tanh = self.preprocess_arctanh(source_imgs)
+        if self.MIMIC_IMG:
+            timg_tanh = self.preprocess_arctanh(target_imgs)
+        else:
+            timg_tanh = target_imgs
+
+        CONST = np.ones(self.batch_size) * self.initial_const
+
+        self.sess.run(self.init)
+        simg_tanh_batch = np.zeros(self.input_shape)
+        if self.MIMIC_IMG:
+            timg_tanh_batch = np.zeros(self.input_shape)
+        else:
+            timg_tanh_batch = np.zeros(self.bottleneck_shape)
+        weights_batch = np.zeros(self.bottleneck_shape)
+        simg_tanh_batch[:nb_imgs] = simg_tanh[:nb_imgs]
+        timg_tanh_batch[:nb_imgs] = timg_tanh[:nb_imgs]
+        weights_batch[:nb_imgs] = weights[:nb_imgs]
+        modifier_batch = np.ones(self.input_shape) * 1e-6
+
+        self.sess.run(self.setup,
+                      {self.assign_timg_tanh: timg_tanh_batch,
+                       self.assign_simg_tanh: simg_tanh_batch,
+                       self.assign_const: CONST,
+                       self.assign_mask: mask,
+                       self.assign_weights: weights_batch,
+                       self.assign_modifier: modifier_batch})
+
+        best_bottlesim = [0] * nb_imgs if self.maximize else [np.inf] * nb_imgs
+        best_adv = np.zeros_like(source_imgs)
+
+        if self.verbose == 1:
+            loss_sum = float(self.sess.run(self.loss_sum))
+            dist_sum = float(self.sess.run(self.dist_sum))
+            thresh_over = (dist_sum / self.batch_size / self.l_threshold * 100)
+            dist_raw_sum = float(self.sess.run(self.dist_raw_sum))
+            bottlesim_sum = self.sess.run(self.bottlesim_sum)
+            print('START: Total loss: %.4E; perturb: %.6f (%.2f%% over, raw: %.6f); sim: %f'
+                  % (Decimal(loss_sum),
+                     dist_sum,
+                     thresh_over,
+                     dist_raw_sum,
+                     bottlesim_sum / nb_imgs))
+
+        try:
+            total_distance = [0] * nb_imgs
+
+            if self.limit_dist:
+                dist_raw_list, bottlesim_list, aimg_input_list = self.sess.run(
+                    [self.dist_raw,
+                     self.bottlesim,
+                     self.aimg_input])
+                for e, (dist_raw, bottlesim, aimg_input) in enumerate(
+                        zip(dist_raw_list, bottlesim_list, aimg_input_list)):
+                    if e >= nb_imgs:
+                        break
+                    total_distance[e] = bottlesim
+
+            for iteration in range(self.MAX_ITERATIONS):
+
+                self.sess.run([self.train], feed_dict={self.learning_rate_holder: LR})
+
+                dist_raw_list, bottlesim_list, aimg_input_list = self.sess.run(
+                    [self.dist_raw,
+                     self.bottlesim,
+                     self.aimg_input])
+                for e, (dist_raw, bottlesim, aimg_input) in enumerate(
+                        zip(dist_raw_list, bottlesim_list, aimg_input_list)):
+                    if e >= nb_imgs:
+                        break
+                    if (bottlesim < best_bottlesim[e] and bottlesim > total_distance[e] * 0.1 and (
+                            not self.maximize)) or (
+                            bottlesim > best_bottlesim[e] and self.maximize):
+                        best_bottlesim[e] = bottlesim
+                        best_adv[e] = aimg_input
+
+                if iteration != 0 and iteration % (self.MAX_ITERATIONS // 3) == 0:
+                    # LR = LR / 2
+                    print("Learning Rate: ", LR)
+
+                if iteration % (self.MAX_ITERATIONS // 10) == 0:
+                    if self.verbose == 1:
+                        loss_sum = float(self.sess.run(self.loss_sum))
+                        dist_sum = float(self.sess.run(self.dist_sum))
+                        thresh_over = (dist_sum /
+                                       self.batch_size /
+                                       self.l_threshold *
+                                       100)
+                        dist_raw_sum = float(self.sess.run(self.dist_raw_sum))
+                        bottlesim_sum = self.sess.run(self.bottlesim_sum)
+                        print('ITER %4d: Total loss: %.4E; perturb: %.6f (%.2f%% over, raw: %.6f); sim: %f'
+                              % (iteration,
+                                 Decimal(loss_sum),
+                                 dist_sum,
+                                 thresh_over,
+                                 dist_raw_sum,
+                                 bottlesim_sum / nb_imgs))
+        except KeyboardInterrupt:
+            pass
+
+        if self.verbose == 1:
+            loss_sum = float(self.sess.run(self.loss_sum))
+            dist_sum = float(self.sess.run(self.dist_sum))
+            thresh_over = (dist_sum / self.batch_size / self.l_threshold * 100)
+            dist_raw_sum = float(self.sess.run(self.dist_raw_sum))
+            bottlesim_sum = float(self.sess.run(self.bottlesim_sum))
+            print('END:       Total loss: %.4E; perturb: %.6f (%.2f%% over, raw: %.6f); sim: %f'
+                  % (Decimal(loss_sum),
+                     dist_sum,
+                     thresh_over,
+                     dist_raw_sum,
+                     bottlesim_sum / nb_imgs))
+
+        best_adv = self.clipping(best_adv[:nb_imgs])
+
+        return best_adv
--- a/fawkes/fawkes/protection.py
+++ b/fawkes/fawkes/protection.py
@ -0,0 +1,110 @@
+import argparse
+import glob
+import os
+import random
+import sys
+
+import numpy as np
+from differentiator import FawkesMaskGeneration
+from keras.applications.vgg16 import preprocess_input
+from keras.preprocessing import image
+from skimage.transform import resize
+from tensorflow import set_random_seed
+from utils import load_extractor, init_gpu, select_target_label, dump_image, reverse_process_cloaked
+
+random.seed(12243)
+np.random.seed(122412)
+set_random_seed(12242)
+
+BATCH_SIZE = 1
+MAX_ITER = 1000
+
+
+def generate_cloak_images(sess, feature_extractors, image_X, target_X=None, th=0.01):
+    batch_size = BATCH_SIZE if len(image_X) > BATCH_SIZE else len(image_X)
+
+    differentiator = FawkesMaskGeneration(sess, feature_extractors,
+                                          batch_size=batch_size,
+                                          mimic_img=True,
+                                          intensity_range='imagenet',
+                                          initial_const=args.sd,
+                                          learning_rate=args.lr,
+                                          max_iterations=MAX_ITER,
+                                          l_threshold=th,
+                                          verbose=1, maximize=False, keep_final=False, image_shape=image_X.shape[1:])
+
+    cloaked_image_X = differentiator.attack(image_X, target_X)
+    return cloaked_image_X
+
+
+def extract_faces(img):
+    #  foo
+    return preprocess_input(resize(img, (224, 224)))
+
+
+def fawkes():
+    assert os.path.exists(args.directory)
+    assert os.path.isdir(args.directory)
+
+    sess = init_gpu(args.gpu)
+
+    print("Loading {} for optimization".format(args.feature_extractor))
+
+    feature_extractors_ls = [load_extractor(args.feature_extractor)]
+
+    image_paths = glob.glob(os.path.join(args.directory, "*"))
+    image_paths = [path for path in image_paths if "_cloaked" not in path.split("/")[-1]]
+
+    orginal_images = [extract_faces(image.img_to_array(image.load_img(cur_path))) for cur_path in
+                      image_paths]
+
+    orginal_images = np.array(orginal_images)
+
+    if args.seperate_target:
+        target_images = []
+        for org_img in orginal_images:
+            org_img = org_img.reshape([1] + list(org_img.shape))
+            tar_img = select_target_label(org_img, feature_extractors_ls, [args.feature_extractor])
+            target_images.append(tar_img)
+        target_images = np.concatenate(target_images)
+    else:
+        target_images = select_target_label(orginal_images, feature_extractors_ls, [args.feature_extractor])
+
+    # file_name = args.directory.split("/")[-1]
+    # os.makedirs(args.result_directory, exist_ok=True)
+    # os.makedirs(os.path.join(args.result_directory, file_name), exist_ok=True)
+
+    protected_images = generate_cloak_images(sess, feature_extractors_ls, orginal_images,
+                                             target_X=target_images, th=args.th)
+
+    for p_img, path in zip(protected_images, image_paths):
+        p_img = reverse_process_cloaked(p_img)
+        file_name = "{}_cloaked.jpeg".format(".".join(path.split(".")[:-1]))
+        dump_image(p_img, file_name, format="JPEG")
+
+
+def parse_arguments(argv):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--gpu', type=str,
+                        help='GPU id', default='0')
+    parser.add_argument('--directory', type=str,
+                        help='directory that contain images for cloaking', default='imgs/')
+
+    parser.add_argument('--feature-extractor', type=str,
+                        help="name of the feature extractor used for optimization",
+                        default="webface_dense_robust_extract")
+
+    parser.add_argument('--th', type=float, default=0.005)
+    parser.add_argument('--sd', type=int, default=1e9)
+    parser.add_argument('--protect_class', type=str, default=None)
+    parser.add_argument('--lr', type=float, default=1)
+
+    parser.add_argument('--result_directory', type=str, default="../results")
+    parser.add_argument('--seperate_target', action='store_true')
+
+    return parser.parse_args(argv)
+
+
+if __name__ == '__main__':
+    args = parse_arguments(sys.argv[1:])
+    fawkes()
--- a/fawkes/fawkes/utils.py
+++ b/fawkes/fawkes/utils.py
@ -0,0 +1,459 @@
+import json
+import os
+import pickle
+import random
+
+import keras
+import keras.backend as K
+import numpy as np
+import tensorflow as tf
+from keras.applications.vgg16 import preprocess_input
+from keras.layers import Dense, Activation
+from keras.models import Model
+from keras.preprocessing import image
+from keras.utils import to_categorical
+from sklearn.metrics import pairwise_distances
+
+
+def clip_img(X, preprocessing='raw'):
+    X = reverse_preprocess(X, preprocessing)
+    X = np.clip(X, 0.0, 255.0)
+    X = preprocess(X, preprocessing)
+    return X
+
+
+def dump_dictionary_as_json(dict, outfile):
+    j = json.dumps(dict)
+    with open(outfile, "wb") as f:
+        f.write(j.encode())
+
+
+def fix_gpu_memory(mem_fraction=1):
+    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_fraction)
+    tf_config = tf.ConfigProto(gpu_options=gpu_options)
+    tf_config.gpu_options.allow_growth = True
+    tf_config.log_device_placement = False
+    init_op = tf.global_variables_initializer()
+    sess = tf.Session(config=tf_config)
+    sess.run(init_op)
+    K.set_session(sess)
+    return sess
+
+
+def load_victim_model(number_classes, teacher_model=None, end2end=False):
+    for l in teacher_model.layers:
+        l.trainable = end2end
+    x = teacher_model.layers[-1].output
+
+    x = Dense(number_classes)(x)
+    x = Activation('softmax', name="act")(x)
+    model = Model(teacher_model.input, x)
+    opt = keras.optimizers.Adadelta()
+    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
+    return model
+
+
+def init_gpu(gpu_index, force=False):
+    if isinstance(gpu_index, list):
+        gpu_num = ','.join([str(i) for i in gpu_index])
+    else:
+        gpu_num = str(gpu_index)
+    if "CUDA_VISIBLE_DEVICES" in os.environ and os.environ["CUDA_VISIBLE_DEVICES"] and not force:
+        print('GPU already initiated')
+        return
+    os.environ["CUDA_VISIBLE_DEVICES"] = gpu_num
+    sess = fix_gpu_memory()
+    return sess
+
+
+def preprocess(X, method):
+    assert method in {'raw', 'imagenet', 'inception', 'mnist'}
+
+    if method is 'raw':
+        pass
+    elif method is 'imagenet':
+        X = imagenet_preprocessing(X)
+    else:
+        raise Exception('unknown method %s' % method)
+
+    return X
+
+
+def reverse_preprocess(X, method):
+    assert method in {'raw', 'imagenet', 'inception', 'mnist'}
+
+    if method is 'raw':
+        pass
+    elif method is 'imagenet':
+        X = imagenet_reverse_preprocessing(X)
+    else:
+        raise Exception('unknown method %s' % method)
+
+    return X
+
+
+def imagenet_preprocessing(x, data_format=None):
+    if data_format is None:
+        data_format = K.image_data_format()
+    assert data_format in ('channels_last', 'channels_first')
+
+    x = np.array(x)
+    if data_format == 'channels_first':
+        # 'RGB'->'BGR'
+        if x.ndim == 3:
+            x = x[::-1, ...]
+        else:
+            x = x[:, ::-1, ...]
+    else:
+        # 'RGB'->'BGR'
+        x = x[..., ::-1]
+
+    mean = [103.939, 116.779, 123.68]
+    std = None
+
+    # Zero-center by mean pixel
+    if data_format == 'channels_first':
+        if x.ndim == 3:
+            x[0, :, :] -= mean[0]
+            x[1, :, :] -= mean[1]
+            x[2, :, :] -= mean[2]
+            if std is not None:
+                x[0, :, :] /= std[0]
+                x[1, :, :] /= std[1]
+                x[2, :, :] /= std[2]
+        else:
+            x[:, 0, :, :] -= mean[0]
+            x[:, 1, :, :] -= mean[1]
+            x[:, 2, :, :] -= mean[2]
+            if std is not None:
+                x[:, 0, :, :] /= std[0]
+                x[:, 1, :, :] /= std[1]
+                x[:, 2, :, :] /= std[2]
+    else:
+        x[..., 0] -= mean[0]
+        x[..., 1] -= mean[1]
+        x[..., 2] -= mean[2]
+        if std is not None:
+            x[..., 0] /= std[0]
+            x[..., 1] /= std[1]
+            x[..., 2] /= std[2]
+
+    return x
+
+def imagenet_reverse_preprocessing(x, data_format=None):
+    import keras.backend as K
+    x = np.array(x)
+    if data_format is None:
+        data_format = K.image_data_format()
+    assert data_format in ('channels_last', 'channels_first')
+
+    if data_format == 'channels_first':
+        if x.ndim == 3:
+            # Zero-center by mean pixel
+            x[0, :, :] += 103.939
+            x[1, :, :] += 116.779
+            x[2, :, :] += 123.68
+            # 'BGR'->'RGB'
+            x = x[::-1, :, :]
+        else:
+            x[:, 0, :, :] += 103.939
+            x[:, 1, :, :] += 116.779
+            x[:, 2, :, :] += 123.68
+            x = x[:, ::-1, :, :]
+    else:
+        # Zero-center by mean pixel
+        x[..., 0] += 103.939
+        x[..., 1] += 116.779
+        x[..., 2] += 123.68
+        # 'BGR'->'RGB'
+        x = x[..., ::-1]
+    return x
+
+
+def reverse_process_cloaked(x, preprocess='imagenet'):
+    x = clip_img(x, preprocess)
+    return reverse_preprocess(x, preprocess)
+
+
+def build_bottleneck_model(model, cut_off):
+    bottleneck_model = Model(model.input, model.get_layer(cut_off).output)
+    bottleneck_model.compile(loss='categorical_crossentropy',
+                             optimizer='adam',
+                             metrics=['accuracy'])
+    return bottleneck_model
+
+
+def load_extractor(name):
+    model = keras.models.load_model("../feature_extractors/{}.h5".format(name))
+    if hasattr(model.layers[-1], "activation") and model.layers[-1].activation == "softmax":
+        raise Exception(
+            "Given extractor's last layer is softmax, need to remove the top layers to make it into a feature extractor")
+    # if "extract" in name.split("/")[-1]:
+    #     pass
+    # else:
+    #     print("Convert a model to a feature extractor")
+    #     model = build_bottleneck_model(model, model.layers[layer_idx].name)
+    #     model.save(name + "extract")
+    #     model = keras.models.load_model(name + "extract")
+    return model
+
+
+def get_dataset_path(dataset):
+    if not os.path.exists("config.json"):
+        raise Exception("Please config the datasets before running protection code. See more in README and config.py.")
+
+    config = json.load(open("config.json", 'r'))
+    if dataset not in config:
+        raise Exception(
+            "Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format(
+                dataset))
+    return config[dataset]['train_dir'], config[dataset]['test_dir'], config[dataset]['num_classes'], config[dataset][
+        'num_images']
+
+
+def normalize(x):
+    return x / np.linalg.norm(x, axis=1, keepdims=True)
+
+
+def dump_image(x, filename, format="png", scale=False):
+    img = image.array_to_img(x, scale=scale)
+    img.save(filename, format)
+    return
+
+
+def load_dir(path):
+    assert os.path.exists(path)
+    x_ls = []
+    for file in os.listdir(path):
+        cur_path = os.path.join(path, file)
+        im = image.load_img(cur_path, target_size=(224, 224))
+        im = image.img_to_array(im)
+        x_ls.append(im)
+    raw_x = np.array(x_ls)
+    return preprocess_input(raw_x)
+
+
+def load_embeddings(feature_extractors_names):
+    dictionaries = []
+    for extractor_name in feature_extractors_names:
+        path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb"))
+        dictionaries.append(path2emb)
+
+    merge_dict = {}
+    for k in dictionaries[0].keys():
+        cur_emb = [dic[k] for dic in dictionaries]
+        merge_dict[k] = np.concatenate(cur_emb)
+    return merge_dict
+
+
+def extractor_ls_predict(feature_extractors_ls, X):
+    feature_ls = []
+    for extractor in feature_extractors_ls:
+        cur_features = extractor.predict(X)
+        feature_ls.append(cur_features)
+    concated_feature_ls = np.concatenate(feature_ls, axis=1)
+    concated_feature_ls = normalize(concated_feature_ls)
+    return concated_feature_ls
+
+
+def calculate_dist_score(a, b, feature_extractors_ls, metric='l2'):
+    features1 = extractor_ls_predict(feature_extractors_ls, a)
+    features2 = extractor_ls_predict(feature_extractors_ls, b)
+
+    pair_cos = pairwise_distances(features1, features2, metric)
+    max_sum = np.min(pair_cos, axis=0)
+    max_sum_arg = np.argsort(max_sum)[::-1]
+    max_sum_arg = max_sum_arg[:len(a)]
+    max_sum = [max_sum[i] for i in max_sum_arg]
+    paired_target_X = [b[j] for j in max_sum_arg]
+    paired_target_X = np.array(paired_target_X)
+    return np.min(max_sum), paired_target_X
+
+
+def select_target_label(imgs, feature_extractors_ls, feature_extractors_names, metric='l2'):
+    original_feature_x = extractor_ls_predict(feature_extractors_ls, imgs)
+
+    path2emb = load_embeddings(feature_extractors_names)
+    items = list(path2emb.items())
+    paths = [p[0] for p in items]
+    embs = [p[1] for p in items]
+    embs = np.array(embs)
+
+    pair_dist = pairwise_distances(original_feature_x, embs, metric)
+    max_sum = np.min(pair_dist, axis=0)
+    sorted_idx = np.argsort(max_sum)[::-1]
+
+    highest_num = 0
+    paired_target_X = None
+    final_target_class_path = None
+    for idx in sorted_idx[:1]:
+        target_class_path = paths[idx]
+        cur_target_X = load_dir(target_class_path)
+        cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X])
+        cur_tot_sum, cur_paired_target_X = calculate_dist_score(imgs, cur_target_X,
+                                                                feature_extractors_ls,
+                                                                metric=metric)
+        if cur_tot_sum > highest_num:
+            highest_num = cur_tot_sum
+            paired_target_X = cur_paired_target_X
+
+    np.random.shuffle(paired_target_X)
+    paired_target_X = list(paired_target_X)
+    while len(paired_target_X) < len(imgs):
+        paired_target_X += paired_target_X
+
+    paired_target_X = paired_target_X[:len(imgs)]
+    return np.array(paired_target_X)
+
+
+
+class CloakData(object):
+    def __init__(self, protect_directory=None, img_shape=(224, 224)):
+
+        self.img_shape = img_shape
+
+        # self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset)
+        # self.all_labels = sorted(list(os.listdir(self.train_data_dir)))
+        self.protect_directory = protect_directory
+
+        self.protect_X = self.load_label_data(self.protect_directory)
+
+        self.cloaked_protect_train_X = None
+
+        self.label2path_train, self.label2path_test, self.path2idx = self.build_data_mapping()
+        self.all_training_path = self.get_all_data_path(self.label2path_train)
+        self.all_test_path = self.get_all_data_path(self.label2path_test)
+        self.protect_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.protect_class))
+
+    def get_class_image_files(self, path):
+        return [os.path.join(path, f) for f in os.listdir(path)]
+
+    def extractor_ls_predict(self, feature_extractors_ls, X):
+        feature_ls = []
+        for extractor in feature_extractors_ls:
+            cur_features = extractor.predict(X)
+            feature_ls.append(cur_features)
+        concated_feature_ls = np.concatenate(feature_ls, axis=1)
+        concated_feature_ls = normalize(concated_feature_ls)
+        return concated_feature_ls
+
+    def load_embeddings(self, feature_extractors_names):
+        dictionaries = []
+        for extractor_name in feature_extractors_names:
+            path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb"))
+            dictionaries.append(path2emb)
+
+        merge_dict = {}
+        for k in dictionaries[0].keys():
+            cur_emb = [dic[k] for dic in dictionaries]
+            merge_dict[k] = np.concatenate(cur_emb)
+        return merge_dict
+
+    def select_target_label(self, feature_extractors_ls, feature_extractors_names, metric='l2'):
+        original_feature_x = self.extractor_ls_predict(feature_extractors_ls, self.protect_train_X)
+
+        path2emb = self.load_embeddings(feature_extractors_names)
+        items = list(path2emb.items())
+        paths = [p[0] for p in items]
+        embs = [p[1] for p in items]
+        embs = np.array(embs)
+
+        pair_dist = pairwise_distances(original_feature_x, embs, metric)
+        max_sum = np.min(pair_dist, axis=0)
+        sorted_idx = np.argsort(max_sum)[::-1]
+
+        highest_num = 0
+        paired_target_X = None
+        final_target_class_path = None
+        for idx in sorted_idx[:5]:
+            target_class_path = paths[idx]
+            cur_target_X = self.load_dir(target_class_path)
+            cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X])
+            cur_tot_sum, cur_paired_target_X = self.calculate_dist_score(self.protect_train_X, cur_target_X,
+                                                                         feature_extractors_ls,
+                                                                         metric=metric)
+            if cur_tot_sum > highest_num:
+                highest_num = cur_tot_sum
+                paired_target_X = cur_paired_target_X
+                final_target_class_path = target_class_path
+
+        np.random.shuffle(paired_target_X)
+        return final_target_class_path, paired_target_X
+
+    def calculate_dist_score(self, a, b, feature_extractors_ls, metric='l2'):
+        features1 = self.extractor_ls_predict(feature_extractors_ls, a)
+        features2 = self.extractor_ls_predict(feature_extractors_ls, b)
+
+        pair_cos = pairwise_distances(features1, features2, metric)
+        max_sum = np.min(pair_cos, axis=0)
+        max_sum_arg = np.argsort(max_sum)[::-1]
+        max_sum_arg = max_sum_arg[:len(a)]
+        max_sum = [max_sum[i] for i in max_sum_arg]
+        paired_target_X = [b[j] for j in max_sum_arg]
+        paired_target_X = np.array(paired_target_X)
+        return np.min(max_sum), paired_target_X
+
+    def get_all_data_path(self, label2path):
+        all_paths = []
+        for k, v in label2path.items():
+            cur_all_paths = [os.path.join(k, cur_p) for cur_p in v]
+            all_paths.extend(cur_all_paths)
+        return all_paths
+
+    def load_label_data(self, label):
+        train_label_path = os.path.join(self.train_data_dir, label)
+        test_label_path = os.path.join(self.test_data_dir, label)
+        train_X = self.load_dir(train_label_path)
+        test_X = self.load_dir(test_label_path)
+        return train_X, test_X
+
+    def load_dir(self, path):
+        assert os.path.exists(path)
+        x_ls = []
+        for file in os.listdir(path):
+            cur_path = os.path.join(path, file)
+            im = image.load_img(cur_path, target_size=self.img_shape)
+            im = image.img_to_array(im)
+            x_ls.append(im)
+        raw_x = np.array(x_ls)
+        return preprocess_input(raw_x)
+
+    def build_data_mapping(self):
+        label2path_train = {}
+        label2path_test = {}
+        idx = 0
+        path2idx = {}
+        for label_name in self.all_labels:
+            full_path_train = os.path.join(self.train_data_dir, label_name)
+            full_path_test = os.path.join(self.test_data_dir, label_name)
+            label2path_train[full_path_train] = list(os.listdir(full_path_train))
+            label2path_test[full_path_test] = list(os.listdir(full_path_test))
+            for img_file in os.listdir(full_path_train):
+                path2idx[os.path.join(full_path_train, img_file)] = idx
+            for img_file in os.listdir(full_path_test):
+                path2idx[os.path.join(full_path_test, img_file)] = idx
+            idx += 1
+        return label2path_train, label2path_test, path2idx
+
+    def generate_data_post_cloak(self, sybil=False):
+        assert self.cloaked_protect_train_X is not None
+        while True:
+            batch_X = []
+            batch_Y = []
+            cur_batch_path = random.sample(self.all_training_path, 32)
+            for p in cur_batch_path:
+                cur_y = self.path2idx[p]
+                if p in self.protect_class_path:
+                    cur_x = random.choice(self.cloaked_protect_train_X)
+                elif sybil and (p in self.sybil_class):
+                    cur_x = random.choice(self.cloaked_sybil_train_X)
+                else:
+                    im = image.load_img(p, target_size=self.img_shape)
+                    im = image.img_to_array(im)
+                    cur_x = preprocess_input(im)
+                batch_X.append(cur_x)
+                batch_Y.append(cur_y)
+            batch_X = np.array(batch_X)
+            batch_Y = to_categorical(np.array(batch_Y), num_classes=self.number_classes)
+            yield batch_X, batch_Y
--- a/fawkes/setup.py
+++ b/fawkes/setup.py
@ -0,0 +1,23 @@
+import setuptools
+
+
+with open("README.md", "r") as fh:
+    long_description = fh.read()
+
+setuptools.setup(
+    name="fawkes",
+    version="0.0.1",
+    author="Shawn Shan",
+    author_email="shansixiong@cs.uchicago.edu",
+    description="Fawkes protect user privacy",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/Shawn-Shan/fawkes",
+    packages=setuptools.find_packages(),
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ],
+    python_requires='>=3.5',
+)
--- a/fawkes_dev/config.py
+++ b/fawkes_dev/config.py
@ -3,11 +3,11 @@ import json
 import os

 DATASETS = {
-    "scrub": "../data/scrub/",
-    "vggface1": "/mnt/data/sixiongshan/data/vggface/",
-    # "vggface2": "/mnt/data/sixiongshan/data/vggface2/",
+    "scrub": "/home/shansixioing/cloak/fawkes/data/scrub/",
+    # "vggface1": "/mnt/data/sixiongshan/data/vggface/",
+    "vggface2": "/mnt/data/sixiongshan/data/vggface2/",
    "webface": "/mnt/data/sixiongshan/data/webface/",
-    # "youtubeface": "/mnt/data/sixiongshan/data/youtubeface/keras_flow_data/",
+    "youtubeface": "/mnt/data/sixiongshan/data/youtubeface/keras_flow_data/",
 }


--- a/fawkes_dev/protection.py
+++ b/fawkes_dev/protection.py
@ -13,7 +13,7 @@ random.seed(12243)
 np.random.seed(122412)
 set_random_seed(12242)

-NUM_IMG_PROTECTED = 32  # Number of images used to optimize the target class
+NUM_IMG_PROTECTED = 400  # Number of images used to optimize the target class
 BATCH_SIZE = 32

 MAX_ITER = 1000