diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index bdbb526eca7b201fbd9927f14883b179ee03143a..2762c55aeb6692d5adb746b92beb0c160c19f37c 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -109,6 +109,12 @@ struct tap_filter {
 	unsigned char	addr[FLT_EXACT_COUNT][ETH_ALEN];
 };
 
+/* 1024 is probably a high enough limit: modern hypervisors seem to support on
+ * the order of 100-200 CPUs so this leaves us some breathing space if we want
+ * to match a queue per guest CPU.
+ */
+#define MAX_TAP_QUEUES 1024
+
 /* A tun_file connects an open character device to a tuntap netdevice. It
  * also contains all socket related strctures (except sock_fprog and tap_filter)
  * to serve as one transmit queue for tuntap device. The sock_fprog and
@@ -129,6 +135,7 @@ struct tun_file {
 	struct fasync_struct *fasync;
 	/* only used for fasnyc */
 	unsigned int flags;
+	u16 queue_index;
 };
 
 /* Since the socket were moved to tun_file, to preserve the behavior of persist
@@ -136,7 +143,8 @@ struct tun_file {
  * file were attached to a persist device.
  */
 struct tun_struct {
-	struct tun_file	__rcu	*tfile;
+	struct tun_file __rcu	*tfiles[MAX_TAP_QUEUES];
+	unsigned int            numqueues;
 	unsigned int 		flags;
 	kuid_t			owner;
 	kgid_t			group;
@@ -157,56 +165,157 @@ struct tun_struct {
 #endif
 };
 
+/* We try to identify a flow through its rxhash first. The reason that
+ * we do not check rxq no. is becuase some cards(e.g 82599), chooses
+ * the rxq based on the txq where the last packet of the flow comes. As
+ * the userspace application move between processors, we may get a
+ * different rxq no. here. If we could not get rxhash, then we would
+ * hope the rxq no. may help here.
+ */
+static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb)
+{
+	struct tun_struct *tun = netdev_priv(dev);
+	u32 txq = 0;
+	u32 numqueues = 0;
+
+	rcu_read_lock();
+	numqueues = tun->numqueues;
+
+	txq = skb_get_rxhash(skb);
+	if (txq) {
+		/* use multiply and shift instead of expensive divide */
+		txq = ((u64)txq * numqueues) >> 32;
+	} else if (likely(skb_rx_queue_recorded(skb))) {
+		txq = skb_get_rx_queue(skb);
+		while (unlikely(txq >= numqueues))
+			txq -= numqueues;
+	}
+
+	rcu_read_unlock();
+	return txq;
+}
+
+static void tun_set_real_num_queues(struct tun_struct *tun)
+{
+	netif_set_real_num_tx_queues(tun->dev, tun->numqueues);
+	netif_set_real_num_rx_queues(tun->dev, tun->numqueues);
+}
+
+static void __tun_detach(struct tun_file *tfile, bool clean)
+{
+	struct tun_file *ntfile;
+	struct tun_struct *tun;
+	struct net_device *dev;
+
+	tun = rcu_dereference_protected(tfile->tun,
+					lockdep_rtnl_is_held());
+	if (tun) {
+		u16 index = tfile->queue_index;
+		BUG_ON(index >= tun->numqueues);
+		dev = tun->dev;
+
+		rcu_assign_pointer(tun->tfiles[index],
+				   tun->tfiles[tun->numqueues - 1]);
+		rcu_assign_pointer(tfile->tun, NULL);
+		ntfile = rcu_dereference_protected(tun->tfiles[index],
+						   lockdep_rtnl_is_held());
+		ntfile->queue_index = index;
+
+		--tun->numqueues;
+		sock_put(&tfile->sk);
+
+		synchronize_net();
+		/* Drop read queue */
+		skb_queue_purge(&tfile->sk.sk_receive_queue);
+		tun_set_real_num_queues(tun);
+
+		if (tun->numqueues == 0 && !(tun->flags & TUN_PERSIST))
+			if (dev->reg_state == NETREG_REGISTERED)
+				unregister_netdevice(dev);
+	}
+
+	if (clean) {
+		BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED,
+				 &tfile->socket.flags));
+		sk_release_kernel(&tfile->sk);
+	}
+}
+
+static void tun_detach(struct tun_file *tfile, bool clean)
+{
+	rtnl_lock();
+	__tun_detach(tfile, clean);
+	rtnl_unlock();
+}
+
+static void tun_detach_all(struct net_device *dev)
+{
+	struct tun_struct *tun = netdev_priv(dev);
+	struct tun_file *tfile;
+	int i, n = tun->numqueues;
+
+	for (i = 0; i < n; i++) {
+		tfile = rcu_dereference_protected(tun->tfiles[i],
+						  lockdep_rtnl_is_held());
+		BUG_ON(!tfile);
+		wake_up_all(&tfile->wq.wait);
+		rcu_assign_pointer(tfile->tun, NULL);
+		--tun->numqueues;
+	}
+	BUG_ON(tun->numqueues != 0);
+
+	synchronize_net();
+	for (i = 0; i < n; i++) {
+		tfile = rcu_dereference_protected(tun->tfiles[i],
+						  lockdep_rtnl_is_held());
+		/* Drop read queue */
+		skb_queue_purge(&tfile->sk.sk_receive_queue);
+		sock_put(&tfile->sk);
+	}
+}
+
 static int tun_attach(struct tun_struct *tun, struct file *file)
 {
 	struct tun_file *tfile = file->private_data;
 	int err;
 
-	ASSERT_RTNL();
-
-	netif_tx_lock_bh(tun->dev);
-
 	err = -EINVAL;
-	if (tfile->tun)
+	if (rcu_dereference_protected(tfile->tun, lockdep_rtnl_is_held()))
 		goto out;
 
 	err = -EBUSY;
-	if (tun->tfile)
+	if (!(tun->flags & TUN_TAP_MQ) && tun->numqueues == 1)
+		goto out;
+
+	err = -E2BIG;
+	if (tun->numqueues == MAX_TAP_QUEUES)
 		goto out;
 
 	err = 0;
 
-	/* Re-attach filter when attaching to a persist device */
+	/* Re-attach the filter to presist device */
 	if (tun->filter_attached == true) {
 		err = sk_attach_filter(&tun->fprog, tfile->socket.sk);
 		if (!err)
 			goto out;
 	}
+	tfile->queue_index = tun->numqueues;
 	rcu_assign_pointer(tfile->tun, tun);
-	tfile->socket.sk->sk_sndbuf = tun->sndbuf;
-	rcu_assign_pointer(tun->tfile, tfile);
-	netif_carrier_on(tun->dev);
+	rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
 	sock_hold(&tfile->sk);
+	tun->numqueues++;
 
-out:
-	netif_tx_unlock_bh(tun->dev);
-	return err;
-}
+	tun_set_real_num_queues(tun);
 
-static void __tun_detach(struct tun_struct *tun)
-{
-	struct tun_file *tfile = rcu_dereference_protected(tun->tfile,
-							lockdep_rtnl_is_held());
-	/* Detach from net device */
-	netif_carrier_off(tun->dev);
-	rcu_assign_pointer(tun->tfile, NULL);
-	if (tfile) {
-		rcu_assign_pointer(tfile->tun, NULL);
+	if (tun->numqueues == 1)
+		netif_carrier_on(tun->dev);
 
-		synchronize_net();
-		/* Drop read queue */
-		skb_queue_purge(&tfile->socket.sk->sk_receive_queue);
-	}
+	/* device is allowed to go away first, so no need to hold extra
+	 * refcnt.
+	 */
+
+out:
+	return err;
 }
 
 static struct tun_struct *__tun_get(struct tun_file *tfile)
@@ -349,30 +458,20 @@ static const struct ethtool_ops tun_ethtool_ops;
 /* Net device detach from fd. */
 static void tun_net_uninit(struct net_device *dev)
 {
-	struct tun_struct *tun = netdev_priv(dev);
-	struct tun_file *tfile = rcu_dereference_protected(tun->tfile,
-							lockdep_rtnl_is_held());
-
-	/* Inform the methods they need to stop using the dev.
-	 */
-	if (tfile) {
-		wake_up_all(&tfile->wq.wait);
-		__tun_detach(tun);
-		synchronize_net();
-	}
+	tun_detach_all(dev);
 }
 
 /* Net device open. */
 static int tun_net_open(struct net_device *dev)
 {
-	netif_start_queue(dev);
+	netif_tx_start_all_queues(dev);
 	return 0;
 }
 
 /* Net device close. */
 static int tun_net_close(struct net_device *dev)
 {
-	netif_stop_queue(dev);
+	netif_tx_stop_all_queues(dev);
 	return 0;
 }
 
@@ -380,16 +479,20 @@ static int tun_net_close(struct net_device *dev)
 static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct tun_struct *tun = netdev_priv(dev);
+	int txq = skb->queue_mapping;
 	struct tun_file *tfile;
 
 	rcu_read_lock();
-	tfile = rcu_dereference(tun->tfile);
+	tfile = rcu_dereference(tun->tfiles[txq]);
+
 	/* Drop packet if interface is not attached */
-	if (!tfile)
+	if (txq >= tun->numqueues)
 		goto drop;
 
 	tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
 
+	BUG_ON(!tfile);
+
 	/* Drop if the filter does not like it.
 	 * This is a noop if the filter is disabled.
 	 * Filter can be enabled only for the TAP devices. */
@@ -400,12 +503,15 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	    sk_filter(tfile->socket.sk, skb))
 		goto drop;
 
+	/* Limit the number of packets queued by divining txq length with the
+	 * number of queues.
+	 */
 	if (skb_queue_len(&tfile->socket.sk->sk_receive_queue)
-	    >= dev->tx_queue_len) {
+			  >= dev->tx_queue_len / tun->numqueues){
 		if (!(tun->flags & TUN_ONE_QUEUE)) {
 			/* Normal queueing mode. */
 			/* Packet scheduler handles dropping of further packets. */
-			netif_stop_queue(dev);
+			netif_stop_subqueue(dev, txq);
 
 			/* We won't see all dropped packets individually, so overrun
 			 * error is more appropriate. */
@@ -494,6 +600,7 @@ static const struct net_device_ops tun_netdev_ops = {
 	.ndo_start_xmit		= tun_net_xmit,
 	.ndo_change_mtu		= tun_net_change_mtu,
 	.ndo_fix_features	= tun_net_fix_features,
+	.ndo_select_queue	= tun_select_queue,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= tun_poll_controller,
 #endif
@@ -509,6 +616,7 @@ static const struct net_device_ops tap_netdev_ops = {
 	.ndo_set_rx_mode	= tun_net_mclist,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_select_queue	= tun_select_queue,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= tun_poll_controller,
 #endif
@@ -550,7 +658,7 @@ static void tun_net_init(struct net_device *dev)
 /* Character device part */
 
 /* Poll */
-static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
+static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
 {
 	struct tun_file *tfile = file->private_data;
 	struct tun_struct *tun = __tun_get(tfile);
@@ -995,7 +1103,7 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
 			schedule();
 			continue;
 		}
-		netif_wake_queue(tun->dev);
+		netif_wake_subqueue(tun->dev, tfile->queue_index);
 
 		ret = tun_put_user(tun, tfile, skb, iv, len);
 		kfree_skb(skb);
@@ -1156,6 +1264,9 @@ static int tun_flags(struct tun_struct *tun)
 	if (tun->flags & TUN_VNET_HDR)
 		flags |= IFF_VNET_HDR;
 
+	if (tun->flags & TUN_TAP_MQ)
+		flags |= IFF_MULTI_QUEUE;
+
 	return flags;
 }
 
@@ -1247,8 +1358,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 		if (*ifr->ifr_name)
 			name = ifr->ifr_name;
 
-		dev = alloc_netdev(sizeof(struct tun_struct), name,
-				   tun_setup);
+		dev = alloc_netdev_mqs(sizeof(struct tun_struct), name,
+				       tun_setup,
+				       MAX_TAP_QUEUES, MAX_TAP_QUEUES);
 		if (!dev)
 			return -ENOMEM;
 
@@ -1283,7 +1395,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
 		err = tun_attach(tun, file);
 		if (err < 0)
-			goto failed;
+			goto err_free_dev;
 	}
 
 	tun_debug(KERN_INFO, tun, "tun_set_iff\n");
@@ -1303,18 +1415,22 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 	else
 		tun->flags &= ~TUN_VNET_HDR;
 
+	if (ifr->ifr_flags & IFF_MULTI_QUEUE)
+		tun->flags |= TUN_TAP_MQ;
+	else
+		tun->flags &= ~TUN_TAP_MQ;
+
 	/* Make sure persistent devices do not get stuck in
 	 * xoff state.
 	 */
 	if (netif_running(tun->dev))
-		netif_wake_queue(tun->dev);
+		netif_tx_wake_all_queues(tun->dev);
 
 	strcpy(ifr->ifr_name, tun->dev->name);
 	return 0;
 
  err_free_dev:
 	free_netdev(dev);
- failed:
 	return err;
 }
 
@@ -1369,6 +1485,51 @@ static int set_offload(struct tun_struct *tun, unsigned long arg)
 	return 0;
 }
 
+static void tun_detach_filter(struct tun_struct *tun, int n)
+{
+	int i;
+	struct tun_file *tfile;
+
+	for (i = 0; i < n; i++) {
+		tfile = rcu_dereference_protected(tun->tfiles[i],
+						  lockdep_rtnl_is_held());
+		sk_detach_filter(tfile->socket.sk);
+	}
+
+	tun->filter_attached = false;
+}
+
+static int tun_attach_filter(struct tun_struct *tun)
+{
+	int i, ret = 0;
+	struct tun_file *tfile;
+
+	for (i = 0; i < tun->numqueues; i++) {
+		tfile = rcu_dereference_protected(tun->tfiles[i],
+						  lockdep_rtnl_is_held());
+		ret = sk_attach_filter(&tun->fprog, tfile->socket.sk);
+		if (ret) {
+			tun_detach_filter(tun, i);
+			return ret;
+		}
+	}
+
+	tun->filter_attached = true;
+	return ret;
+}
+
+static void tun_set_sndbuf(struct tun_struct *tun)
+{
+	struct tun_file *tfile;
+	int i;
+
+	for (i = 0; i < tun->numqueues; i++) {
+		tfile = rcu_dereference_protected(tun->tfiles[i],
+						lockdep_rtnl_is_held());
+		tfile->socket.sk->sk_sndbuf = tun->sndbuf;
+	}
+}
+
 static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 			    unsigned long arg, int ifreq_len)
 {
@@ -1397,6 +1558,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 				(unsigned int __user*)argp);
 	}
 
+	ret = 0;
 	rtnl_lock();
 
 	tun = __tun_get(tfile);
@@ -1537,7 +1699,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 			break;
 		}
 
-		tun->sndbuf = tfile->socket.sk->sk_sndbuf = sndbuf;
+		tun->sndbuf = sndbuf;
+		tun_set_sndbuf(tun);
 		break;
 
 	case TUNGETVNETHDRSZ:
@@ -1568,9 +1731,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		if (copy_from_user(&tun->fprog, argp, sizeof(tun->fprog)))
 			break;
 
-		ret = sk_attach_filter(&tun->fprog, tfile->socket.sk);
-		if (!ret)
-			tun->filter_attached = true;
+		ret = tun_attach_filter(tun);
 		break;
 
 	case TUNDETACHFILTER:
@@ -1578,9 +1739,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		ret = -EINVAL;
 		if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
 			break;
-		ret = sk_detach_filter(tfile->socket.sk);
-		if (!ret)
-			tun->filter_attached = false;
+		ret = 0;
+		tun_detach_filter(tun, tun->numqueues);
 		break;
 
 	default:
@@ -1685,37 +1845,9 @@ static int tun_chr_open(struct inode *inode, struct file * file)
 static int tun_chr_close(struct inode *inode, struct file *file)
 {
 	struct tun_file *tfile = file->private_data;
-	struct tun_struct *tun;
 	struct net *net = tfile->net;
 
-	rtnl_lock();
-
-	tun = rcu_dereference_protected(tfile->tun, lockdep_rtnl_is_held());
-	if (tun) {
-		struct net_device *dev = tun->dev;
-
-		tun_debug(KERN_INFO, tun, "tun_chr_close\n");
-
-		__tun_detach(tun);
-
-		synchronize_net();
-
-		/* If desirable, unregister the netdevice. */
-		if (!(tun->flags & TUN_PERSIST)) {
-			if (dev->reg_state == NETREG_REGISTERED)
-				unregister_netdevice(dev);
-		}
-
-		/* drop the reference that netdevice holds */
-		sock_put(&tfile->sk);
-	}
-
-	rtnl_unlock();
-
-	/* drop the reference that file holds */
-	BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED,
-			 &tfile->socket.flags));
-	sk_release_kernel(&tfile->sk);
+	tun_detach(tfile, true);
 	put_net(net);
 
 	return 0;