diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 6555d178113221412b1c0f3dbd0ff09c63bb99a6..941e3e2adf41292a6382a98a8a507c641e2c9df1 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -29,15 +29,20 @@ config CSKY
 	select GENERIC_SCHED_CLOCK
 	select GENERIC_SMP_IDLE_THREAD
 	select HAVE_ARCH_TRACEHOOK
+	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZO
 	select HAVE_KERNEL_LZMA
 	select HAVE_PERF_EVENTS
-	select HAVE_C_RECORDMCOUNT
+	select HAVE_PERF_REGS
+	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_CONTIGUOUS
+	select HAVE_SYSCALL_TRACEPOINTS
 	select MAY_HAVE_SPARSE_IRQ
 	select MODULES_USE_ELF_RELA if MODULES
 	select OF
diff --git a/arch/csky/Makefile b/arch/csky/Makefile
index 3607a6e8f66cbd7883caf995589d8d51d9245738..6b87f6c22ad68e3879ccffd466e05699ef99c922 100644
--- a/arch/csky/Makefile
+++ b/arch/csky/Makefile
@@ -36,7 +36,7 @@ endif
 
 ifneq ($(CSKYABI),)
 MCPU_STR = $(CPUTYPE)$(FPUEXT)$(VDSPEXT)$(TEEEXT)
-KBUILD_CFLAGS += -mcpu=$(MCPU_STR)
+KBUILD_CFLAGS += -mcpu=$(CPUTYPE) -Wa,-mcpu=$(MCPU_STR)
 KBUILD_CFLAGS += -DCSKYCPU_DEF_NAME=\"$(MCPU_STR)\"
 KBUILD_CFLAGS += -msoft-float -mdiv
 KBUILD_CFLAGS += -fno-tree-vectorize
diff --git a/arch/csky/abiv1/inc/abi/ckmmu.h b/arch/csky/abiv1/inc/abi/ckmmu.h
index 3a002017bebe09b002bdd14a2e1d45cf94fad3f2..81f37715c0d26d3d9ffa73e7f4ba4f970f37b601 100644
--- a/arch/csky/abiv1/inc/abi/ckmmu.h
+++ b/arch/csky/abiv1/inc/abi/ckmmu.h
@@ -40,6 +40,26 @@ static inline void write_mmu_entryhi(int value)
 	cpwcr("cpcr4", value);
 }
 
+static inline unsigned long read_mmu_msa0(void)
+{
+	return cprcr("cpcr30");
+}
+
+static inline void write_mmu_msa0(unsigned long value)
+{
+	cpwcr("cpcr30", value);
+}
+
+static inline unsigned long read_mmu_msa1(void)
+{
+	return cprcr("cpcr31");
+}
+
+static inline void write_mmu_msa1(unsigned long value)
+{
+	cpwcr("cpcr31", value);
+}
+
 /*
  * TLB operations.
  */
@@ -65,11 +85,11 @@ static inline void tlb_invalid_indexed(void)
 
 static inline void setup_pgd(unsigned long pgd, bool kernel)
 {
-	cpwcr("cpcr29", pgd);
+	cpwcr("cpcr29", pgd | BIT(0));
 }
 
 static inline unsigned long get_pgd(void)
 {
-	return cprcr("cpcr29");
+	return cprcr("cpcr29") & ~BIT(0);
 }
 #endif /* __ASM_CSKY_CKMMUV1_H */
diff --git a/arch/csky/abiv1/inc/abi/entry.h b/arch/csky/abiv1/inc/abi/entry.h
index 3f3faab3d747bdf05d35254b174c6f3eebe229c1..7ab78bd0f3b13f7e52be13445314ef956566fa73 100644
--- a/arch/csky/abiv1/inc/abi/entry.h
+++ b/arch/csky/abiv1/inc/abi/entry.h
@@ -16,9 +16,6 @@
 #define LSAVE_A4	40
 #define LSAVE_A5	44
 
-#define EPC_INCREASE	2
-#define EPC_KEEP	0
-
 .macro USPTOKSP
 	mtcr	sp, ss1
 	mfcr	sp, ss0
@@ -29,10 +26,6 @@
 	mfcr	sp, ss1
 .endm
 
-.macro INCTRAP	rx
-	addi	\rx, EPC_INCREASE
-.endm
-
 .macro	SAVE_ALL epc_inc
 	mtcr    r13, ss2
 	mfcr    r13, epsr
@@ -150,11 +143,35 @@
 	cpwcr   \rx, cpcr8
 .endm
 
-.macro SETUP_MMU rx
-	lrw	\rx, PHYS_OFFSET | 0xe
-	cpwcr	\rx, cpcr30
-	lrw	\rx, (PHYS_OFFSET + 0x20000000) | 0xe
-	cpwcr	\rx, cpcr31
+.macro SETUP_MMU
+	/* Init psr and enable ee */
+	lrw	r6, DEFAULT_PSR_VALUE
+	mtcr    r6, psr
+	psrset  ee
+
+	/* Select MMU as co-processor */
+	cpseti	cp15
+
+	/*
+	 * cpcr30 format:
+	 * 31 - 29 | 28 - 4 | 3 | 2 | 1 | 0
+	 *   BA     Reserved  C   D   V
+	 */
+	cprcr	r6, cpcr30
+	lsri	r6, 28
+	lsli	r6, 28
+	addi	r6, 0xe
+	cpwcr	r6, cpcr30
+
+	lsri	r6, 28
+	addi	r6, 2
+	lsli	r6, 28
+	addi	r6, 0xe
+	cpwcr	r6, cpcr31
 .endm
 
+.macro ANDI_R3 rx, imm
+	lsri	\rx, 3
+	andi	\rx, (\imm >> 3)
+.endm
 #endif /* __ASM_CSKY_ENTRY_H */
diff --git a/arch/csky/abiv1/inc/abi/regdef.h b/arch/csky/abiv1/inc/abi/regdef.h
index 876689291b716802f3a38f29af12ac69ad807c66..104707fbdcc1ceaa389659714d3e376038a5351c 100644
--- a/arch/csky/abiv1/inc/abi/regdef.h
+++ b/arch/csky/abiv1/inc/abi/regdef.h
@@ -5,9 +5,8 @@
 #define __ASM_CSKY_REGDEF_H
 
 #define syscallid	r1
-#define r11_sig		r11
-
 #define regs_syscallid(regs) regs->regs[9]
+#define regs_fp(regs) regs->regs[2]
 
 /*
  * PSR format:
@@ -23,4 +22,6 @@
 
 #define SYSTRACE_SAVENUM	2
 
+#define TRAP0_SIZE		2
+
 #endif /* __ASM_CSKY_REGDEF_H */
diff --git a/arch/csky/abiv2/cacheflush.c b/arch/csky/abiv2/cacheflush.c
index d22c95ffc74dca40d8bab07a918b77752dda1b34..5bb887b275e1213e4e6e9e77b894a7ef490d06c7 100644
--- a/arch/csky/abiv2/cacheflush.c
+++ b/arch/csky/abiv2/cacheflush.c
@@ -34,10 +34,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 {
 	unsigned long addr, pfn;
 	struct page *page;
-	void *va;
-
-	if (!(vma->vm_flags & VM_EXEC))
-		return;
 
 	pfn = pte_pfn(*pte);
 	if (unlikely(!pfn_valid(pfn)))
@@ -47,14 +43,9 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 	if (page == ZERO_PAGE(0))
 		return;
 
-	va = page_address(page);
-	addr = (unsigned long) va;
-
-	if (va == NULL && PageHighMem(page))
-		addr = (unsigned long) kmap_atomic(page);
+	addr = (unsigned long) kmap_atomic(page);
 
 	cache_wbinv_range(addr, addr + PAGE_SIZE);
 
-	if (va == NULL && PageHighMem(page))
-		kunmap_atomic((void *) addr);
+	kunmap_atomic((void *) addr);
 }
diff --git a/arch/csky/abiv2/inc/abi/ckmmu.h b/arch/csky/abiv2/inc/abi/ckmmu.h
index 97230ad9427c3021dbde527b824bec53a9b50cff..e4480e6bc3b3d3bf2d6902fcfcb4e0c7d04ca1b4 100644
--- a/arch/csky/abiv2/inc/abi/ckmmu.h
+++ b/arch/csky/abiv2/inc/abi/ckmmu.h
@@ -42,6 +42,26 @@ static inline void write_mmu_entryhi(int value)
 	mtcr("cr<4, 15>", value);
 }
 
+static inline unsigned long read_mmu_msa0(void)
+{
+	return mfcr("cr<30, 15>");
+}
+
+static inline void write_mmu_msa0(unsigned long value)
+{
+	mtcr("cr<30, 15>", value);
+}
+
+static inline unsigned long read_mmu_msa1(void)
+{
+	return mfcr("cr<31, 15>");
+}
+
+static inline void write_mmu_msa1(unsigned long value)
+{
+	mtcr("cr<31, 15>", value);
+}
+
 /*
  * TLB operations.
  */
@@ -70,18 +90,16 @@ static inline void tlb_invalid_indexed(void)
 	mtcr("cr<8, 15>", 0x02000000);
 }
 
-/* setup hardrefil pgd */
-static inline unsigned long get_pgd(void)
-{
-	return mfcr("cr<29, 15>");
-}
-
 static inline void setup_pgd(unsigned long pgd, bool kernel)
 {
 	if (kernel)
-		mtcr("cr<28, 15>", pgd);
+		mtcr("cr<28, 15>", pgd | BIT(0));
 	else
-		mtcr("cr<29, 15>", pgd);
+		mtcr("cr<29, 15>", pgd | BIT(0));
 }
 
+static inline unsigned long get_pgd(void)
+{
+	return mfcr("cr<29, 15>") & ~BIT(0);
+}
 #endif /* __ASM_CSKY_CKMMUV2_H */
diff --git a/arch/csky/abiv2/inc/abi/entry.h b/arch/csky/abiv2/inc/abi/entry.h
index edc5cc04c4de59621121c4031ba68d9ebcb92af9..9897a16b45e5dcc75b4e6638a8ecd1153be661e9 100644
--- a/arch/csky/abiv2/inc/abi/entry.h
+++ b/arch/csky/abiv2/inc/abi/entry.h
@@ -14,18 +14,11 @@
 #define LSAVE_A2	32
 #define LSAVE_A3	36
 
-#define EPC_INCREASE	4
-#define EPC_KEEP	0
-
 #define KSPTOUSP
 #define USPTOKSP
 
 #define usp cr<14, 1>
 
-.macro INCTRAP	rx
-	addi	\rx, EPC_INCREASE
-.endm
-
 .macro SAVE_ALL epc_inc
 	subi    sp, 152
 	stw	tls, (sp, 0)
@@ -169,10 +162,80 @@
 	mtcr	\rx, cr<8, 15>
 .endm
 
-.macro SETUP_MMU rx
-	lrw	\rx, PHYS_OFFSET | 0xe
-	mtcr	\rx, cr<30, 15>
-	lrw	\rx, (PHYS_OFFSET + 0x20000000) | 0xe
-	mtcr	\rx, cr<31, 15>
+.macro SETUP_MMU
+	/* Init psr and enable ee */
+	lrw	r6, DEFAULT_PSR_VALUE
+	mtcr    r6, psr
+	psrset  ee
+
+	/* Invalid I/Dcache BTB BHT */
+	movi	r6, 7
+	lsli	r6, 16
+	addi	r6, (1<<4) | 3
+	mtcr	r6, cr17
+
+	/* Invalid all TLB */
+	bgeni   r6, 26
+	mtcr	r6, cr<8, 15> /* Set MCIR */
+
+	/* Check MMU on/off */
+	mfcr	r6, cr18
+	btsti	r6, 0
+	bt	1f
+
+	/* MMU off: setup mapping tlb entry */
+	movi	r6, 0
+	mtcr	r6, cr<6, 15> /* Set MPR with 4K page size */
+
+	grs	r6, 1f /* Get current pa by PC */
+	bmaski  r7, (PAGE_SHIFT + 1) /* r7 = 0x1fff */
+	andn    r6, r7
+	mtcr	r6, cr<4, 15> /* Set MEH */
+
+	mov	r8, r6
+	movi    r7, 0x00000006
+	or      r8, r7
+	mtcr	r8, cr<2, 15> /* Set MEL0 */
+	movi    r7, 0x00001006
+	or      r8, r7
+	mtcr	r8, cr<3, 15> /* Set MEL1 */
+
+	bgeni   r8, 28
+	mtcr	r8, cr<8, 15> /* Set MCIR to write TLB */
+
+	br	2f
+1:
+	/*
+	 * MMU on: use origin MSA value from bootloader
+	 *
+	 * cr<30/31, 15> MSA register format:
+	 * 31 - 29 | 28 - 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0
+	 *   BA     Reserved  SH  WA  B   SO SEC  C   D   V
+	 */
+	mfcr	r6, cr<30, 15> /* Get MSA0 */
+2:
+	lsri	r6, 28
+	lsli	r6, 28
+	addi	r6, 0x1ce
+	mtcr	r6, cr<30, 15> /* Set MSA0 */
+
+	lsri	r6, 28
+	addi	r6, 2
+	lsli	r6, 28
+	addi	r6, 0x1ce
+	mtcr	r6, cr<31, 15> /* Set MSA1 */
+
+	/* enable MMU */
+	mfcr    r6, cr18
+	bseti	r6, 0
+	mtcr    r6, cr18
+
+	jmpi	3f /* jump to va */
+3:
+.endm
+
+.macro ANDI_R3 rx, imm
+	lsri	\rx, 3
+	andi	\rx, (\imm >> 3)
 .endm
 #endif /* __ASM_CSKY_ENTRY_H */
diff --git a/arch/csky/abiv2/inc/abi/regdef.h b/arch/csky/abiv2/inc/abi/regdef.h
index c72abb781bdc4c1e43a20c2611e13df4f6b9ca53..d7328bbc1ce7ad7c44a6d4934a24f619d744a422 100644
--- a/arch/csky/abiv2/inc/abi/regdef.h
+++ b/arch/csky/abiv2/inc/abi/regdef.h
@@ -5,9 +5,8 @@
 #define __ASM_CSKY_REGDEF_H
 
 #define syscallid	r7
-#define r11_sig		r11
-
 #define regs_syscallid(regs) regs->regs[3]
+#define regs_fp(regs) regs->regs[4]
 
 /*
  * PSR format:
@@ -23,4 +22,6 @@
 
 #define SYSTRACE_SAVENUM	5
 
+#define TRAP0_SIZE		4
+
 #endif /* __ASM_CSKY_REGDEF_H */
diff --git a/arch/csky/abiv2/mcount.S b/arch/csky/abiv2/mcount.S
index c633379956f5c3671333ab4f4b4d080becfa6bde..326402e65f9e0bc610215eb63a78fd4dbaa54e55 100644
--- a/arch/csky/abiv2/mcount.S
+++ b/arch/csky/abiv2/mcount.S
@@ -61,10 +61,17 @@
 	addi	sp, 16
 .endm
 
+.macro nop32_stub
+	nop32
+	nop32
+	nop32
+.endm
+
 ENTRY(ftrace_stub)
 	jmp	lr
 END(ftrace_stub)
 
+#ifndef CONFIG_DYNAMIC_FTRACE
 ENTRY(_mcount)
 	mcount_enter
 
@@ -76,7 +83,7 @@ ENTRY(_mcount)
 	bf	skip_ftrace
 
 	mov	a0, lr
-	subi	a0, MCOUNT_INSN_SIZE
+	subi	a0, 4
 	ldw	a1, (sp, 24)
 
 	jsr	r26
@@ -101,13 +108,41 @@ skip_ftrace:
 	mcount_exit
 #endif
 END(_mcount)
+#else /* CONFIG_DYNAMIC_FTRACE */
+ENTRY(_mcount)
+	mov	t1, lr
+	ldw	lr, (sp, 0)
+	addi	sp, 4
+	jmp	t1
+ENDPROC(_mcount)
+
+ENTRY(ftrace_caller)
+	mcount_enter
+
+	ldw	a0, (sp, 16)
+	subi	a0, 4
+	ldw	a1, (sp, 24)
+
+	nop
+GLOBAL(ftrace_call)
+	nop32_stub
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	nop
+GLOBAL(ftrace_graph_call)
+	nop32_stub
+#endif
+
+	mcount_exit
+ENDPROC(ftrace_caller)
+#endif /* CONFIG_DYNAMIC_FTRACE */
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 ENTRY(ftrace_graph_caller)
 	mov	a0, sp
 	addi	a0, 24
 	ldw	a1, (sp, 16)
-	subi	a1, MCOUNT_INSN_SIZE
+	subi	a1, 4
 	mov	a2, r8
 	lrw	r26, prepare_ftrace_return
 	jsr	r26
diff --git a/arch/csky/abiv2/memmove.S b/arch/csky/abiv2/memmove.S
index b0c42ecf18896525238d5f0062f9614c4ce2ccb5..5721e73ad3d8f4185059952a903337be80698d36 100644
--- a/arch/csky/abiv2/memmove.S
+++ b/arch/csky/abiv2/memmove.S
@@ -35,11 +35,7 @@ ENTRY(memmove)
 .L_len_larger_16bytes:
 	subi	r1, 16
 	subi	r0, 16
-#if defined(__CSKY_VDSPV2__)
-	vldx.8	vr0, (r1), r19
-	PRE_BNEZAD (r18)
-	vstx.8	vr0, (r0), r19
-#elif defined(__CK860__)
+#if defined(__CK860__)
 	ldw	r3, (r1, 12)
 	stw	r3, (r0, 12)
 	ldw	r3, (r1, 8)
diff --git a/arch/csky/include/asm/Kbuild b/arch/csky/include/asm/Kbuild
index 95f4e550db8a162e896410be3d3ab411857687c6..a9b63efef4162f598255cd796eeebbd3bcf6b6d3 100644
--- a/arch/csky/include/asm/Kbuild
+++ b/arch/csky/include/asm/Kbuild
@@ -12,7 +12,6 @@ generic-y += dma-mapping.h
 generic-y += emergency-restart.h
 generic-y += exec.h
 generic-y += fb.h
-generic-y += ftrace.h
 generic-y += futex.h
 generic-y += gpio.h
 generic-y += hardirq.h
diff --git a/arch/csky/include/asm/ftrace.h b/arch/csky/include/asm/ftrace.h
index 7547c45312a8e60b144a8e95db73dcee03116791..ba35d93ecda2b7f8a58171c0ba0030fbe11d2ae1 100644
--- a/arch/csky/include/asm/ftrace.h
+++ b/arch/csky/include/asm/ftrace.h
@@ -4,10 +4,26 @@
 #ifndef __ASM_CSKY_FTRACE_H
 #define __ASM_CSKY_FTRACE_H
 
-#define MCOUNT_INSN_SIZE 4
+#define MCOUNT_INSN_SIZE	14
 
 #define HAVE_FUNCTION_GRAPH_FP_TEST
 
 #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
 
+#define MCOUNT_ADDR	((unsigned long)_mcount)
+
+#ifndef __ASSEMBLY__
+
+extern void _mcount(unsigned long);
+
+extern void ftrace_graph_call(void);
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	return addr;
+}
+
+struct dyn_arch_ftrace {
+};
+#endif /* !__ASSEMBLY__ */
 #endif /* __ASM_CSKY_FTRACE_H */
diff --git a/arch/csky/include/asm/mmu_context.h b/arch/csky/include/asm/mmu_context.h
index b2905c0485a72a177c00c69fceea5be092624cdb..734db3a122e1e72012f206f2de764b010d5b727a 100644
--- a/arch/csky/include/asm/mmu_context.h
+++ b/arch/csky/include/asm/mmu_context.h
@@ -14,23 +14,10 @@
 #include <linux/sched.h>
 #include <abi/ckmmu.h>
 
-static inline void tlbmiss_handler_setup_pgd(unsigned long pgd, bool kernel)
-{
-	pgd -= PAGE_OFFSET;
-	pgd += PHYS_OFFSET;
-	pgd |= 1;
-	setup_pgd(pgd, kernel);
-}
-
 #define TLBMISS_HANDLER_SETUP_PGD(pgd) \
-	tlbmiss_handler_setup_pgd((unsigned long)pgd, 0)
+	setup_pgd(__pa(pgd), false)
 #define TLBMISS_HANDLER_SETUP_PGD_KERNEL(pgd) \
-	tlbmiss_handler_setup_pgd((unsigned long)pgd, 1)
-
-static inline unsigned long tlb_get_pgd(void)
-{
-	return ((get_pgd() - PHYS_OFFSET) & ~1) + PAGE_OFFSET;
-}
+	setup_pgd(__pa(pgd), true)
 
 #define cpu_context(cpu, mm)	((mm)->context.asid[cpu])
 #define cpu_asid(cpu, mm)	(cpu_context((cpu), (mm)) & ASID_MASK)
diff --git a/arch/csky/include/asm/page.h b/arch/csky/include/asm/page.h
index 73cf2bd66a138c8b576571bbc1cc6900fa7c9f06..9738eacefdc7e4b940b7ca0fe4a9f3f2d146002d 100644
--- a/arch/csky/include/asm/page.h
+++ b/arch/csky/include/asm/page.h
@@ -8,7 +8,7 @@
 #include <linux/const.h>
 
 /*
- * PAGE_SHIFT determines the page size
+ * PAGE_SHIFT determines the page size: 4KB
  */
 #define PAGE_SHIFT	12
 #define PAGE_SIZE	(_AC(1, UL) << PAGE_SHIFT)
@@ -17,12 +17,18 @@
 #define THREAD_MASK	(~(THREAD_SIZE - 1))
 #define THREAD_SHIFT	(PAGE_SHIFT + 1)
 
+
 /*
- * NOTE: virtual isn't really correct, actually it should be the offset into the
- * memory node, but we have no highmem, so that works for now.
- * TODO: implement (fast) pfn<->pgdat_idx conversion functions, this makes lots
- * of the shifts unnecessary.
+ * For C-SKY "User-space:Kernel-space" is "2GB:2GB" fixed by hardware and there
+ * are two segment registers (MSA0 + MSA1) to mapping 512MB + 512MB physical
+ * address region. We use them mapping kernel 1GB direct-map address area and
+ * for more than 1GB of memory we use highmem.
  */
+#define PAGE_OFFSET	0x80000000
+#define SSEG_SIZE	0x20000000
+#define LOWMEM_LIMIT	(SSEG_SIZE * 2)
+
+#define PHYS_OFFSET_OFFSET (CONFIG_RAM_BASE & (SSEG_SIZE - 1))
 
 #ifndef __ASSEMBLY__
 
@@ -50,9 +56,6 @@ struct page;
 
 struct vm_area_struct;
 
-/*
- * These are used to make use of C type-checking..
- */
 typedef struct { unsigned long pte_low; } pte_t;
 #define pte_val(x)	((x).pte_low)
 
@@ -69,18 +72,13 @@ typedef struct page *pgtable_t;
 #define __pgd(x)	((pgd_t) { (x) })
 #define __pgprot(x)	((pgprot_t) { (x) })
 
-#endif /* !__ASSEMBLY__ */
+extern unsigned long va_pa_offset;
 
-#define PHYS_OFFSET		(CONFIG_RAM_BASE & ~(LOWMEM_LIMIT - 1))
-#define PHYS_OFFSET_OFFSET	(CONFIG_RAM_BASE & (LOWMEM_LIMIT - 1))
-#define ARCH_PFN_OFFSET		PFN_DOWN(CONFIG_RAM_BASE)
+#define ARCH_PFN_OFFSET	PFN_DOWN(va_pa_offset + PHYS_OFFSET_OFFSET)
 
-#define	PAGE_OFFSET	0x80000000
-#define LOWMEM_LIMIT	0x40000000
+#define __pa(x)		 ((unsigned long)(x) - PAGE_OFFSET + va_pa_offset)
+#define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET - va_pa_offset))
 
-#define __pa(x)		((unsigned long)(x) - PAGE_OFFSET + PHYS_OFFSET)
-#define __va(x)		((void *)((unsigned long)(x) + PAGE_OFFSET - \
-				  PHYS_OFFSET))
 #define __pa_symbol(x)	__pa(RELOC_HIDE((unsigned long)(x), 0))
 
 #define MAP_NR(x)	PFN_DOWN((unsigned long)(x) - PAGE_OFFSET - \
@@ -90,15 +88,10 @@ typedef struct page *pgtable_t;
 #define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
 				VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-/*
- * main RAM and kernel working space are coincident at 0x80000000, but to make
- * life more interesting, there's also an uncached virtual shadow at 0xb0000000
- * - these mappings are fixed in the MMU
- */
-
 #define pfn_to_kaddr(x)	__va(PFN_PHYS(x))
 
 #include <asm-generic/memory_model.h>
 #include <asm-generic/getorder.h>
 
+#endif /* !__ASSEMBLY__ */
 #endif /* __ASM_CSKY_PAGE_H */
diff --git a/arch/csky/include/asm/perf_event.h b/arch/csky/include/asm/perf_event.h
index ea819312229404bb9d6af29a1bd03201904596d2..572093e110017156b7052cd2a5e6c80615f2b79b 100644
--- a/arch/csky/include/asm/perf_event.h
+++ b/arch/csky/include/asm/perf_event.h
@@ -4,4 +4,12 @@
 #ifndef __ASM_CSKY_PERF_EVENT_H
 #define __ASM_CSKY_PERF_EVENT_H
 
+#include <abi/regdef.h>
+
+#define perf_arch_fetch_caller_regs(regs, __ip) { \
+	(regs)->pc = (__ip); \
+	regs_fp(regs) = (unsigned long) __builtin_frame_address(0); \
+	asm volatile("mov %0, sp\n":"=r"((regs)->usp)); \
+}
+
 #endif /* __ASM_PERF_EVENT_ELF_H */
diff --git a/arch/csky/include/asm/ptrace.h b/arch/csky/include/asm/ptrace.h
new file mode 100644
index 0000000000000000000000000000000000000000..d0aba7b32417a021bff332992d02d8b587f3fa41
--- /dev/null
+++ b/arch/csky/include/asm/ptrace.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
+
+#ifndef __ASM_CSKY_PTRACE_H
+#define __ASM_CSKY_PTRACE_H
+
+#include <uapi/asm/ptrace.h>
+#include <asm/traps.h>
+#include <linux/types.h>
+
+#ifndef __ASSEMBLY__
+
+#define PS_S	0x80000000 /* Supervisor Mode */
+
+#define arch_has_single_step() (1)
+#define current_pt_regs() \
+({ (struct pt_regs *)((char *)current_thread_info() + THREAD_SIZE) - 1; })
+
+#define user_stack_pointer(regs) ((regs)->usp)
+
+#define user_mode(regs) (!((regs)->sr & PS_S))
+#define instruction_pointer(regs) ((regs)->pc)
+#define profile_pc(regs) instruction_pointer(regs)
+
+static inline bool in_syscall(struct pt_regs const *regs)
+{
+	return ((regs->sr >> 16) & 0xff) == VEC_TRAP0;
+}
+
+static inline void forget_syscall(struct pt_regs *regs)
+{
+	regs->sr &= ~(0xff << 16);
+}
+
+static inline unsigned long regs_return_value(struct pt_regs *regs)
+{
+	return regs->a0;
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_CSKY_PTRACE_H */
diff --git a/arch/csky/include/asm/syscall.h b/arch/csky/include/asm/syscall.h
index bc0d8717d28b3b0a89615fe99e229eb7f2e5fe07..f624fa3bbc22fef3e3d0fe4ecbebe8b4d8c5c6fb 100644
--- a/arch/csky/include/asm/syscall.h
+++ b/arch/csky/include/asm/syscall.h
@@ -8,12 +8,21 @@
 #include <abi/regdef.h>
 #include <uapi/linux/audit.h>
 
+extern void *sys_call_table[];
+
 static inline int
 syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
 {
 	return regs_syscallid(regs);
 }
 
+static inline void
+syscall_set_nr(struct task_struct *task, struct pt_regs *regs,
+	       int sysno)
+{
+	regs_syscallid(regs) = sysno;
+}
+
 static inline void
 syscall_rollback(struct task_struct *task, struct pt_regs *regs)
 {
diff --git a/arch/csky/include/asm/thread_info.h b/arch/csky/include/asm/thread_info.h
index 0e9d035d712b675a96353408a426bd118fb14528..0b546a55a8bfcdbb477345ee21ef8e826ea15d7f 100644
--- a/arch/csky/include/asm/thread_info.h
+++ b/arch/csky/include/asm/thread_info.h
@@ -51,29 +51,26 @@ static inline struct thread_info *current_thread_info(void)
 
 #endif /* !__ASSEMBLY__ */
 
-/* entry.S relies on these definitions!
- * bits 0-5 are tested at every exception exit
- */
 #define TIF_SIGPENDING		0	/* signal pending */
 #define TIF_NOTIFY_RESUME	1       /* callback before returning to user */
 #define TIF_NEED_RESCHED	2	/* rescheduling necessary */
-#define TIF_SYSCALL_TRACE	5	/* syscall trace active */
-#define TIF_DELAYED_TRACE	14	/* single step a syscall */
+#define TIF_SYSCALL_TRACE	3	/* syscall trace active */
+#define TIF_SYSCALL_TRACEPOINT	4       /* syscall tracepoint instrumentation */
+#define TIF_SYSCALL_AUDIT	5	/* syscall auditing */
 #define TIF_POLLING_NRFLAG	16	/* poll_idle() is TIF_NEED_RESCHED */
 #define TIF_MEMDIE		18      /* is terminating due to OOM killer */
-#define TIF_FREEZE		19	/* thread is freezing for suspend */
 #define TIF_RESTORE_SIGMASK	20	/* restore signal mask in do_signal() */
 #define TIF_SECCOMP		21	/* secure computing */
 
-#define _TIF_SIGPENDING         (1 << TIF_SIGPENDING)
-#define _TIF_NOTIFY_RESUME      (1 << TIF_NOTIFY_RESUME)
-#define _TIF_NEED_RESCHED       (1 << TIF_NEED_RESCHED)
-#define _TIF_SYSCALL_TRACE      (1 << TIF_SYSCALL_TRACE)
-#define _TIF_DELAYED_TRACE	(1 << TIF_DELAYED_TRACE)
-#define _TIF_POLLING_NRFLAG     (1 << TIF_POLLING_NRFLAG)
+#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
+#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
+#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
+#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
+#define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_MEMDIE		(1 << TIF_MEMDIE)
-#define _TIF_FREEZE             (1 << TIF_FREEZE)
-#define _TIF_RESTORE_SIGMASK    (1 << TIF_RESTORE_SIGMASK)
-#define _TIF_SECCOMP            (1 << TIF_SECCOMP)
+#define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
+#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 
 #endif	/* _ASM_CSKY_THREAD_INFO_H */
diff --git a/arch/csky/include/asm/unistd.h b/arch/csky/include/asm/unistd.h
index 284487477a617c7890c7dd752c01989fbd2e37db..da7a182956157860626d6426a8f4c8497e33b1be 100644
--- a/arch/csky/include/asm/unistd.h
+++ b/arch/csky/include/asm/unistd.h
@@ -2,3 +2,5 @@
 // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #include <uapi/asm/unistd.h>
+
+#define NR_syscalls (__NR_syscalls)
diff --git a/arch/csky/include/uapi/asm/perf_regs.h b/arch/csky/include/uapi/asm/perf_regs.h
new file mode 100644
index 0000000000000000000000000000000000000000..ee323d818592c6ce559603f0da15d715b236880c
--- /dev/null
+++ b/arch/csky/include/uapi/asm/perf_regs.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd.
+
+#ifndef _ASM_CSKY_PERF_REGS_H
+#define _ASM_CSKY_PERF_REGS_H
+
+/* Index of struct pt_regs */
+enum perf_event_csky_regs {
+	PERF_REG_CSKY_TLS,
+	PERF_REG_CSKY_LR,
+	PERF_REG_CSKY_PC,
+	PERF_REG_CSKY_SR,
+	PERF_REG_CSKY_SP,
+	PERF_REG_CSKY_ORIG_A0,
+	PERF_REG_CSKY_A0,
+	PERF_REG_CSKY_A1,
+	PERF_REG_CSKY_A2,
+	PERF_REG_CSKY_A3,
+	PERF_REG_CSKY_REGS0,
+	PERF_REG_CSKY_REGS1,
+	PERF_REG_CSKY_REGS2,
+	PERF_REG_CSKY_REGS3,
+	PERF_REG_CSKY_REGS4,
+	PERF_REG_CSKY_REGS5,
+	PERF_REG_CSKY_REGS6,
+	PERF_REG_CSKY_REGS7,
+	PERF_REG_CSKY_REGS8,
+	PERF_REG_CSKY_REGS9,
+#if defined(__CSKYABIV2__)
+	PERF_REG_CSKY_EXREGS0,
+	PERF_REG_CSKY_EXREGS1,
+	PERF_REG_CSKY_EXREGS2,
+	PERF_REG_CSKY_EXREGS3,
+	PERF_REG_CSKY_EXREGS4,
+	PERF_REG_CSKY_EXREGS5,
+	PERF_REG_CSKY_EXREGS6,
+	PERF_REG_CSKY_EXREGS7,
+	PERF_REG_CSKY_EXREGS8,
+	PERF_REG_CSKY_EXREGS9,
+	PERF_REG_CSKY_EXREGS10,
+	PERF_REG_CSKY_EXREGS11,
+	PERF_REG_CSKY_EXREGS12,
+	PERF_REG_CSKY_EXREGS13,
+	PERF_REG_CSKY_EXREGS14,
+	PERF_REG_CSKY_HI,
+	PERF_REG_CSKY_LO,
+	PERF_REG_CSKY_DCSR,
+#endif
+	PERF_REG_CSKY_MAX,
+};
+#endif /* _ASM_CSKY_PERF_REGS_H */
diff --git a/arch/csky/include/uapi/asm/ptrace.h b/arch/csky/include/uapi/asm/ptrace.h
index a4eaa8ddf0b1d070e58548a4a0c40d2c931942aa..4e248d5b86efa42327bb2aa58a287ad3eab564a4 100644
--- a/arch/csky/include/uapi/asm/ptrace.h
+++ b/arch/csky/include/uapi/asm/ptrace.h
@@ -48,20 +48,5 @@ struct user_fp {
 	unsigned long	reserved;
 };
 
-#ifdef __KERNEL__
-
-#define PS_S	0x80000000 /* Supervisor Mode */
-
-#define arch_has_single_step() (1)
-#define current_pt_regs() \
-({ (struct pt_regs *)((char *)current_thread_info() + THREAD_SIZE) - 1; })
-
-#define user_stack_pointer(regs) ((regs)->usp)
-
-#define user_mode(regs) (!((regs)->sr & PS_S))
-#define instruction_pointer(regs) ((regs)->pc)
-#define profile_pc(regs) instruction_pointer(regs)
-
-#endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 #endif /* _CSKY_PTRACE_H */
diff --git a/arch/csky/kernel/Makefile b/arch/csky/kernel/Makefile
index 484e6d3a364719b521c6586a4a06df9d57d12b82..1624b04bffb52946a1fa88f13977dc587f1ff376 100644
--- a/arch/csky/kernel/Makefile
+++ b/arch/csky/kernel/Makefile
@@ -9,6 +9,8 @@ obj-$(CONFIG_SMP)			+= smp.o
 obj-$(CONFIG_FUNCTION_TRACER)		+= ftrace.o
 obj-$(CONFIG_STACKTRACE)		+= stacktrace.o
 obj-$(CONFIG_CSKY_PMU_V1)		+= perf_event.o
+obj-$(CONFIG_PERF_EVENTS)		+= perf_callchain.o
+obj-$(CONFIG_HAVE_PERF_REGS)            += perf_regs.o
 
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
diff --git a/arch/csky/kernel/atomic.S b/arch/csky/kernel/atomic.S
index d2357c8f85bdfd1d9bb2dfdb7e62642c8ba358c4..5b84f11485aeb8c6794699c54250cf8c54f5a91d 100644
--- a/arch/csky/kernel/atomic.S
+++ b/arch/csky/kernel/atomic.S
@@ -12,11 +12,10 @@
  * If *ptr != oldval && return 1,
  * else *ptr = newval return 0.
  */
-#ifdef CONFIG_CPU_HAS_LDSTEX
 ENTRY(csky_cmpxchg)
 	USPTOKSP
 	mfcr	a3, epc
-	INCTRAP	a3
+	addi	a3, TRAP0_SIZE
 
 	subi    sp, 8
 	stw     a3, (sp, 0)
@@ -24,6 +23,7 @@ ENTRY(csky_cmpxchg)
 	stw     a3, (sp, 4)
 
 	psrset	ee
+#ifdef CONFIG_CPU_HAS_LDSTEX
 1:
 	ldex	a3, (a2)
 	cmpne	a0, a3
@@ -33,27 +33,7 @@ ENTRY(csky_cmpxchg)
 	bez	a3, 1b
 2:
 	sync.is
-	mvc	a0
-	ldw	a3, (sp, 0)
-	mtcr	a3, epc
-	ldw     a3, (sp, 4)
-	mtcr	a3, epsr
-	addi	sp, 8
-	KSPTOUSP
-	rte
-END(csky_cmpxchg)
 #else
-ENTRY(csky_cmpxchg)
-	USPTOKSP
-	mfcr	a3, epc
-	INCTRAP	a3
-
-	subi    sp, 8
-	stw     a3, (sp, 0)
-	mfcr    a3, epsr
-	stw     a3, (sp, 4)
-
-	psrset	ee
 1:
 	ldw	a3, (a2)
 	cmpne	a0, a3
@@ -61,6 +41,7 @@ ENTRY(csky_cmpxchg)
 2:
 	stw	a1, (a2)
 3:
+#endif
 	mvc	a0
 	ldw	a3, (sp, 0)
 	mtcr	a3, epc
@@ -71,6 +52,7 @@ ENTRY(csky_cmpxchg)
 	rte
 END(csky_cmpxchg)
 
+#ifndef CONFIG_CPU_HAS_LDSTEX
 /*
  * Called from tlbmodified exception
  */
diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S
index 5137ed9062bdc1448e0ccb491ef67f3abc2d734d..a7e84ccccbd8ee47ff0cdba63b4f5a6f4c647fed 100644
--- a/arch/csky/kernel/entry.S
+++ b/arch/csky/kernel/entry.S
@@ -40,7 +40,8 @@ ENTRY(csky_\name)
 	WR_MCIR	a2
 #endif
 	bclri   r6, 0
-	lrw	a2, PHYS_OFFSET
+	lrw	a2, va_pa_offset
+	ld.w	a2, (a2, 0)
 	subu	r6, a2
 	bseti	r6, 31
 
@@ -50,7 +51,8 @@ ENTRY(csky_\name)
 	addu    r6, a2
 	ldw     r6, (r6)
 
-	lrw	a2, PHYS_OFFSET
+	lrw	a2, va_pa_offset
+	ld.w	a2, (a2, 0)
 	subu	r6, a2
 	bseti	r6, 31
 
@@ -91,7 +93,7 @@ ENTRY(csky_\name)
 	mfcr    a3, ss2
 	mfcr    r6, ss3
 	mfcr    a2, ss4
-	SAVE_ALL EPC_KEEP
+	SAVE_ALL 0
 .endm
 .macro tlbop_end is_write
 	RD_MEH	a2
@@ -99,7 +101,6 @@ ENTRY(csky_\name)
 	mov     a0, sp
 	movi    a1, \is_write
 	jbsr    do_page_fault
-	movi    r11_sig, 0             /* r11 = 0, Not a syscall. */
 	jmpi    ret_from_exception
 .endm
 
@@ -118,7 +119,7 @@ jbsr csky_cmpxchg_fixup
 tlbop_end 1
 
 ENTRY(csky_systemcall)
-	SAVE_ALL EPC_INCREASE
+	SAVE_ALL TRAP0_SIZE
 
 	psrset  ee, ie
 
@@ -136,8 +137,9 @@ ENTRY(csky_systemcall)
 	bmaski  r10, THREAD_SHIFT
 	andn    r9, r10
 	ldw     r8, (r9, TINFO_FLAGS)
-	btsti   r8, TIF_SYSCALL_TRACE
-	bt      1f
+	ANDI_R3	r8, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT)
+	cmpnei	r8, 0
+	bt      csky_syscall_trace
 #if defined(__CSKYABIV2__)
 	subi    sp, 8
 	stw  	r5, (sp, 0x4)
@@ -150,10 +152,9 @@ ENTRY(csky_systemcall)
 	stw     a0, (sp, LSAVE_A0)      /* Save return value */
 	jmpi    ret_from_exception
 
-1:
-	movi	a0, 0                   /* enter system call */
-	mov	a1, sp                  /* sp = pt_regs pointer */
-	jbsr	syscall_trace
+csky_syscall_trace:
+	mov	a0, sp                  /* sp = pt_regs pointer */
+	jbsr	syscall_trace_enter
 	/* Prepare args before do system call */
 	ldw	a0, (sp, LSAVE_A0)
 	ldw	a1, (sp, LSAVE_A1)
@@ -173,9 +174,8 @@ ENTRY(csky_systemcall)
 #endif
 	stw	a0, (sp, LSAVE_A0)	/* Save return value */
 
-	movi    a0, 1                   /* leave system call */
-	mov     a1, sp                  /* right now, sp --> pt_regs */
-	jbsr    syscall_trace
+	mov     a0, sp                  /* right now, sp --> pt_regs */
+	jbsr    syscall_trace_exit
 	br	ret_from_exception
 
 ENTRY(ret_from_kernel_thread)
@@ -190,14 +190,11 @@ ENTRY(ret_from_fork)
 	bmaski	r10, THREAD_SHIFT
 	andn	r9, r10
 	ldw	r8, (r9, TINFO_FLAGS)
-	movi	r11_sig, 1
-	btsti	r8, TIF_SYSCALL_TRACE
-	bf	3f
-	movi	a0, 1
-	mov	a1, sp			/* sp = pt_regs pointer */
-	jbsr	syscall_trace
-3:
-	jbsr	ret_from_exception
+	ANDI_R3	r8, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT)
+	cmpnei	r8, 0
+	bf	ret_from_exception
+	mov	a0, sp			/* sp = pt_regs pointer */
+	jbsr	syscall_trace_exit
 
 ret_from_exception:
 	ld	syscallid, (sp, LSAVE_PSR)
@@ -212,41 +209,30 @@ ret_from_exception:
 	bmaski	r10, THREAD_SHIFT
 	andn	r9, r10
 
-resume_userspace:
 	ldw	r8, (r9, TINFO_FLAGS)
 	andi	r8, (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED)
 	cmpnei	r8, 0
 	bt	exit_work
-1:  RESTORE_ALL
+1:
+	RESTORE_ALL
 
 exit_work:
+	lrw	syscallid, ret_from_exception
+	mov	lr, syscallid
+
 	btsti	r8, TIF_NEED_RESCHED
 	bt	work_resched
-	/* If thread_info->flag is empty, RESTORE_ALL */
-	cmpnei	r8, 0
-	bf	1b
-	mov	a1, sp
-	mov	a0, r8
-	mov	a2, r11_sig		/* syscall? */
-	btsti	r8, TIF_SIGPENDING	/* delivering a signal? */
-	/* prevent further restarts(set r11 = 0) */
-	clrt	r11_sig
-	jbsr	do_notify_resume	/* do signals */
-	br	resume_userspace
+
+	mov	a0, sp
+	mov	a1, r8
+	jmpi	do_notify_resume
 
 work_resched:
-	lrw	syscallid, ret_from_exception
-	mov	r15, syscallid		/* Return address in link */
 	jmpi	schedule
 
-ENTRY(sys_rt_sigreturn)
-	movi	r11_sig, 0
-	jmpi	do_rt_sigreturn
-
 ENTRY(csky_trap)
-	SAVE_ALL EPC_KEEP
+	SAVE_ALL 0
 	psrset	ee
-	movi	r11_sig, 0             /* r11 = 0, Not a syscall. */
 	mov	a0, sp                 /* Push Stack pointer arg */
 	jbsr	trap_c                 /* Call C-level trap handler */
 	jmpi	ret_from_exception
@@ -261,7 +247,7 @@ ENTRY(csky_get_tls)
 
 	/* increase epc for continue */
 	mfcr	a0, epc
-	INCTRAP	a0
+	addi	a0, TRAP0_SIZE
 	mtcr	a0, epc
 
 	/* get current task thread_info with kernel 8K stack */
@@ -278,9 +264,8 @@ ENTRY(csky_get_tls)
 	rte
 
 ENTRY(csky_irq)
-	SAVE_ALL EPC_KEEP
+	SAVE_ALL 0
 	psrset	ee
-	movi	r11_sig, 0		/* r11 = 0, Not a syscall. */
 
 #ifdef CONFIG_PREEMPT
 	mov	r9, sp			/* Get current stack  pointer */
diff --git a/arch/csky/kernel/ftrace.c b/arch/csky/kernel/ftrace.c
index 274c431f18103971996b97a8bf7f704bc0c0736c..44f4880179b7d1b403521530d3dfd0655e1cd341 100644
--- a/arch/csky/kernel/ftrace.c
+++ b/arch/csky/kernel/ftrace.c
@@ -3,6 +3,137 @@
 
 #include <linux/ftrace.h>
 #include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+#define NOP		0x4000
+#define NOP32_HI	0xc400
+#define NOP32_LO	0x4820
+#define PUSH_LR		0x14d0
+#define MOVIH_LINK	0xea3a
+#define ORI_LINK	0xef5a
+#define JSR_LINK	0xe8fa
+#define BSR_LINK	0xe000
+
+/*
+ * Gcc-csky with -pg will insert stub in function prologue:
+ *	push	lr
+ *	jbsr	_mcount
+ *	nop32
+ *	nop32
+ *
+ * If the (callee - current_pc) is less then 64MB, we'll use bsr:
+ *	push	lr
+ *	bsr	_mcount
+ *	nop32
+ *	nop32
+ * else we'll use (movih + ori + jsr):
+ *	push	lr
+ *	movih	r26, ...
+ *	ori	r26, ...
+ *	jsr	r26
+ *
+ * (r26 is our reserved link-reg)
+ *
+ */
+static inline void make_jbsr(unsigned long callee, unsigned long pc,
+			     uint16_t *call, bool nolr)
+{
+	long offset;
+
+	call[0]	= nolr ? NOP : PUSH_LR;
+
+	offset = (long) callee - (long) pc;
+
+	if (unlikely(offset < -67108864 || offset > 67108864)) {
+		call[1] = MOVIH_LINK;
+		call[2] = callee >> 16;
+		call[3] = ORI_LINK;
+		call[4] = callee & 0xffff;
+		call[5] = JSR_LINK;
+		call[6] = 0;
+	} else {
+		offset = offset >> 1;
+
+		call[1] = BSR_LINK |
+			 ((uint16_t)((unsigned long) offset >> 16) & 0x3ff);
+		call[2] = (uint16_t)((unsigned long) offset & 0xffff);
+		call[3] = call[5] = NOP32_HI;
+		call[4] = call[6] = NOP32_LO;
+	}
+}
+
+static uint16_t nops[7] = {NOP, NOP32_HI, NOP32_LO, NOP32_HI, NOP32_LO,
+				NOP32_HI, NOP32_LO};
+static int ftrace_check_current_nop(unsigned long hook)
+{
+	uint16_t olds[7];
+	unsigned long hook_pos = hook - 2;
+
+	if (probe_kernel_read((void *)olds, (void *)hook_pos, sizeof(nops)))
+		return -EFAULT;
+
+	if (memcmp((void *)nops, (void *)olds, sizeof(nops))) {
+		pr_err("%p: nop but get (%04x %04x %04x %04x %04x %04x %04x)\n",
+			(void *)hook_pos,
+			olds[0], olds[1], olds[2], olds[3], olds[4], olds[5],
+			olds[6]);
+
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ftrace_modify_code(unsigned long hook, unsigned long target,
+			      bool enable, bool nolr)
+{
+	uint16_t call[7];
+
+	unsigned long hook_pos = hook - 2;
+	int ret = 0;
+
+	make_jbsr(target, hook, call, nolr);
+
+	ret = probe_kernel_write((void *)hook_pos, enable ? call : nops,
+				 sizeof(nops));
+	if (ret)
+		return -EPERM;
+
+	flush_icache_range(hook_pos, hook_pos + MCOUNT_INSN_SIZE);
+
+	return 0;
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	int ret = ftrace_check_current_nop(rec->ip);
+
+	if (ret)
+		return ret;
+
+	return ftrace_modify_code(rec->ip, addr, true, false);
+}
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+		    unsigned long addr)
+{
+	return ftrace_modify_code(rec->ip, addr, false, false);
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	int ret = ftrace_modify_code((unsigned long)&ftrace_call,
+				(unsigned long)func, true, true);
+	return ret;
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+	return 0;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
@@ -43,8 +174,21 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 			*(unsigned long *)frame_pointer = return_hooker;
 	}
 }
-#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+int ftrace_enable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_code((unsigned long)&ftrace_graph_call,
+			(unsigned long)&ftrace_graph_caller, true, true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_code((unsigned long)&ftrace_graph_call,
+			(unsigned long)&ftrace_graph_caller, false, true);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 /* _mcount is defined in abi's mcount.S */
-extern void _mcount(void);
 EXPORT_SYMBOL(_mcount);
diff --git a/arch/csky/kernel/head.S b/arch/csky/kernel/head.S
index 9c4ec473b76ba0792822e6fd968b83e22efc7a89..61989f9241c021b8e4d12b554aed9247fd14641d 100644
--- a/arch/csky/kernel/head.S
+++ b/arch/csky/kernel/head.S
@@ -7,16 +7,11 @@
 
 __HEAD
 ENTRY(_start)
-	/* set super user mode */
-	lrw	a3, DEFAULT_PSR_VALUE
-	mtcr    a3, psr
-	psrset  ee
-
-	SETUP_MMU a3
+	SETUP_MMU
 
 	/* set stack point */
-	lrw     a3, init_thread_union + THREAD_SIZE
-	mov	sp, a3
+	lrw     r6, init_thread_union + THREAD_SIZE
+	mov	sp, r6
 
 	jmpi	csky_start
 END(_start)
@@ -24,53 +19,12 @@ END(_start)
 #ifdef CONFIG_SMP
 .align 10
 ENTRY(_start_smp_secondary)
-	/* Invalid I/Dcache BTB BHT */
-	movi	a3, 7
-	lsli	a3, 16
-	addi	a3, (1<<4) | 3
-	mtcr	a3, cr17
-
-	tlbi.alls
-
-	/* setup PAGEMASK */
-	movi	a3, 0
-	mtcr	a3, cr<6, 15>
-
-	/* setup MEL0/MEL1 */
-	grs	a0, _start_smp_pc
-_start_smp_pc:
-	bmaski  a1, 13
-	andn    a0, a1
-	movi    a1, 0x00000006
-	movi    a2, 0x00001006
-	or      a1, a0
-	or      a2, a0
-	mtcr	a1, cr<2, 15>
-	mtcr	a2, cr<3, 15>
-
-	/* setup MEH */
-	mtcr	a0, cr<4, 15>
-
-	/* write TLB */
-	bgeni   a3, 28
-	mtcr	a3, cr<8, 15>
-
-	SETUP_MMU a3
-
-	/* enable MMU */
-	movi	a3, 1
-	mtcr    a3, cr18
-
-	jmpi	_goto_mmu_on
-_goto_mmu_on:
-	lrw	a3, DEFAULT_PSR_VALUE
-	mtcr    a3, psr
-	psrset  ee
+	SETUP_MMU
 
 	/* set stack point */
-	lrw     a3, secondary_stack
-	ld.w	a3, (a3, 0)
-	mov	sp, a3
+	lrw     r6, secondary_stack
+	ld.w	r6, (r6, 0)
+	mov	sp, r6
 
 	jmpi	csky_start_secondary
 END(_start_smp_secondary)
diff --git a/arch/csky/kernel/perf_callchain.c b/arch/csky/kernel/perf_callchain.c
new file mode 100644
index 0000000000000000000000000000000000000000..e68ff375c8f88fe71a5006bf465df25ab3873655
--- /dev/null
+++ b/arch/csky/kernel/perf_callchain.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd.
+
+#include <linux/perf_event.h>
+#include <linux/uaccess.h>
+
+/* Kernel callchain */
+struct stackframe {
+	unsigned long fp;
+	unsigned long lr;
+};
+
+static int unwind_frame_kernel(struct stackframe *frame)
+{
+	if (kstack_end((void *)frame->fp))
+		return -EPERM;
+	if (frame->fp & 0x3 || frame->fp < TASK_SIZE)
+		return -EPERM;
+
+	*frame = *(struct stackframe *)frame->fp;
+	if (__kernel_text_address(frame->lr)) {
+		int graph = 0;
+
+		frame->lr = ftrace_graph_ret_addr(NULL, &graph, frame->lr,
+				NULL);
+	}
+	return 0;
+}
+
+static void notrace walk_stackframe(struct stackframe *fr,
+			struct perf_callchain_entry_ctx *entry)
+{
+	do {
+		perf_callchain_store(entry, fr->lr);
+	} while (unwind_frame_kernel(fr) >= 0);
+}
+
+/*
+ * Get the return address for a single stackframe and return a pointer to the
+ * next frame tail.
+ */
+static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry,
+			unsigned long fp, unsigned long reg_lr)
+{
+	struct stackframe buftail;
+	unsigned long lr = 0;
+	unsigned long *user_frame_tail = (unsigned long *)fp;
+
+	/* Check accessibility of one struct frame_tail beyond */
+	if (!access_ok(user_frame_tail, sizeof(buftail)))
+		return 0;
+	if (__copy_from_user_inatomic(&buftail, user_frame_tail,
+				      sizeof(buftail)))
+		return 0;
+
+	if (reg_lr != 0)
+		lr = reg_lr;
+	else
+		lr = buftail.lr;
+
+	fp = buftail.fp;
+	perf_callchain_store(entry, lr);
+
+	return fp;
+}
+
+/*
+ * This will be called when the target is in user mode
+ * This function will only be called when we use
+ * "PERF_SAMPLE_CALLCHAIN" in
+ * kernel/events/core.c:perf_prepare_sample()
+ *
+ * How to trigger perf_callchain_[user/kernel] :
+ * $ perf record -e cpu-clock --call-graph fp ./program
+ * $ perf report --call-graph
+ *
+ * On C-SKY platform, the program being sampled and the C library
+ * need to be compiled with * -mbacktrace, otherwise the user
+ * stack will not contain function frame.
+ */
+void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+			 struct pt_regs *regs)
+{
+	unsigned long fp = 0;
+
+	/* C-SKY does not support virtualization. */
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
+		return;
+
+	fp = regs->regs[4];
+	perf_callchain_store(entry, regs->pc);
+
+	/*
+	 * While backtrace from leaf function, lr is normally
+	 * not saved inside frame on C-SKY, so get lr from pt_regs
+	 * at the sample point. However, lr value can be incorrect if
+	 * lr is used as temp register
+	 */
+	fp = user_backtrace(entry, fp, regs->lr);
+
+	while (fp && !(fp & 0x3) && entry->nr < entry->max_stack)
+		fp = user_backtrace(entry, fp, 0);
+}
+
+void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+			   struct pt_regs *regs)
+{
+	struct stackframe fr;
+
+	/* C-SKY does not support virtualization. */
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		pr_warn("C-SKY does not support perf in guest mode!");
+		return;
+	}
+
+	fr.fp = regs->regs[4];
+	fr.lr = regs->lr;
+	walk_stackframe(&fr, entry);
+}
diff --git a/arch/csky/kernel/perf_regs.c b/arch/csky/kernel/perf_regs.c
new file mode 100644
index 0000000000000000000000000000000000000000..eb32838b8210f8a43bf4b2a335c43760d46f91ef
--- /dev/null
+++ b/arch/csky/kernel/perf_regs.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd.
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/bug.h>
+#include <asm/perf_regs.h>
+#include <asm/ptrace.h>
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+	if (WARN_ON_ONCE((u32)idx >= PERF_REG_CSKY_MAX))
+		return 0;
+
+	return (u64)*((u32 *)regs + idx);
+}
+
+#define REG_RESERVED (~((1ULL << PERF_REG_CSKY_MAX) - 1))
+
+int perf_reg_validate(u64 mask)
+{
+	if (!mask || mask & REG_RESERVED)
+		return -EINVAL;
+
+	return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+	return PERF_SAMPLE_REGS_ABI_32;
+}
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+			struct pt_regs *regs,
+			struct pt_regs *regs_user_copy)
+{
+	regs_user->regs = task_pt_regs(current);
+	regs_user->abi = perf_reg_abi(current);
+}
diff --git a/arch/csky/kernel/ptrace.c b/arch/csky/kernel/ptrace.c
index f2f12fff36f70c90d260c8052e6ba5feb69b306c..313623a19ecbf6bd3c0e05ca870af9672c6039ea 100644
--- a/arch/csky/kernel/ptrace.c
+++ b/arch/csky/kernel/ptrace.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
+#include <linux/audit.h>
 #include <linux/elf.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
@@ -11,6 +12,7 @@
 #include <linux/sched/task_stack.h>
 #include <linux/signal.h>
 #include <linux/smp.h>
+#include <linux/tracehook.h>
 #include <linux/uaccess.h>
 #include <linux/user.h>
 
@@ -22,6 +24,9 @@
 
 #include <abi/regdef.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
 /* sets the trace bits. */
 #define TRACE_MODE_SI      (1 << 14)
 #define TRACE_MODE_RUN     0
@@ -207,35 +212,27 @@ long arch_ptrace(struct task_struct *child, long request,
 	return ret;
 }
 
-/*
- * If process's system calls is traces, do some corresponding handles in this
- * function before entering system call function and after exiting system call
- * function.
- */
-asmlinkage void syscall_trace(int why, struct pt_regs *regs)
+asmlinkage void syscall_trace_enter(struct pt_regs *regs)
 {
-	long saved_why;
-	/*
-	 * Save saved_why, why is used to denote syscall entry/exit;
-	 * why = 0:entry, why = 1: exit
-	 */
-	saved_why = regs->regs[SYSTRACE_SAVENUM];
-	regs->regs[SYSTRACE_SAVENUM] = why;
-
-	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
-					? 0x80 : 0));
-
-	/*
-	 * this isn't the same as continuing with a signal, but it will do
-	 * for normal use.  strace only continues with a signal if the
-	 * stopping signal is not SIGTRAP.  -brl
-	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
-	}
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		if (tracehook_report_syscall_entry(regs))
+			syscall_set_nr(current, regs, -1);
+
+	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+		trace_sys_enter(regs, syscall_get_nr(current, regs));
+
+	audit_syscall_entry(regs_syscallid(regs), regs->a0, regs->a1, regs->a2, regs->a3);
+}
+
+asmlinkage void syscall_trace_exit(struct pt_regs *regs)
+{
+	audit_syscall_exit(regs);
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall_exit(regs, 0);
 
-	regs->regs[SYSTRACE_SAVENUM] = saved_why;
+	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+		trace_sys_exit(regs, syscall_get_return_value(current, regs));
 }
 
 extern void show_stack(struct task_struct *task, unsigned long *stack);
diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c
index dff8b89444ec5571a6eeb82d36ac2f7f0329b8d7..23ee604aafdb620fca5ba01f8c7474f261349258 100644
--- a/arch/csky/kernel/setup.c
+++ b/arch/csky/kernel/setup.c
@@ -142,18 +142,24 @@ void __init setup_arch(char **cmdline_p)
 #endif
 }
 
-asmlinkage __visible void __init csky_start(unsigned int unused, void *param)
+unsigned long va_pa_offset;
+EXPORT_SYMBOL(va_pa_offset);
+
+asmlinkage __visible void __init csky_start(unsigned int unused,
+					    void *dtb_start)
 {
 	/* Clean up bss section */
 	memset(__bss_start, 0, __bss_stop - __bss_start);
 
+	va_pa_offset = read_mmu_msa0() & ~(SSEG_SIZE - 1);
+
 	pre_trap_init();
 	pre_mmu_init();
 
-	if (param == NULL)
+	if (dtb_start == NULL)
 		early_init_dt_scan(__dtb_start);
 	else
-		early_init_dt_scan(param);
+		early_init_dt_scan(dtb_start);
 
 	start_kernel();
 
diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c
index 207a891479d26e63100ea88c7665a673a6574f62..04a43cfd4e09f3babe35685e3dcabcb1085aa6e2 100644
--- a/arch/csky/kernel/signal.c
+++ b/arch/csky/kernel/signal.c
@@ -1,26 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
 #include <linux/signal.h>
+#include <linux/uaccess.h>
 #include <linux/syscalls.h>
-#include <linux/errno.h>
-#include <linux/wait.h>
-#include <linux/ptrace.h>
-#include <linux/unistd.h>
-#include <linux/stddef.h>
-#include <linux/highuid.h>
-#include <linux/personality.h>
-#include <linux/tty.h>
-#include <linux/binfmts.h>
 #include <linux/tracehook.h>
-#include <linux/freezer.h>
-#include <linux/uaccess.h>
 
-#include <asm/setup.h>
-#include <asm/pgtable.h>
 #include <asm/traps.h>
 #include <asm/ucontext.h>
 #include <asm/vdso.h>
@@ -29,110 +13,117 @@
 
 #ifdef CONFIG_CPU_HAS_FPU
 #include <abi/fpu.h>
-
-static int restore_fpu_state(struct sigcontext *sc)
+static int restore_fpu_state(struct sigcontext __user *sc)
 {
 	int err = 0;
 	struct user_fp user_fp;
 
-	err = copy_from_user(&user_fp, &sc->sc_user_fp, sizeof(user_fp));
+	err = __copy_from_user(&user_fp, &sc->sc_user_fp, sizeof(user_fp));
 
 	restore_from_user_fp(&user_fp);
 
 	return err;
 }
 
-static int save_fpu_state(struct sigcontext *sc)
+static int save_fpu_state(struct sigcontext __user *sc)
 {
 	struct user_fp user_fp;
 
 	save_to_user_fp(&user_fp);
 
-	return copy_to_user(&sc->sc_user_fp, &user_fp, sizeof(user_fp));
+	return __copy_to_user(&sc->sc_user_fp, &user_fp, sizeof(user_fp));
 }
 #else
-static inline int restore_fpu_state(struct sigcontext *sc) { return 0; }
-static inline int save_fpu_state(struct sigcontext *sc) { return 0; }
+#define restore_fpu_state(sigcontext)	(0)
+#define save_fpu_state(sigcontext)	(0)
 #endif
 
 struct rt_sigframe {
-	int sig;
-	struct siginfo *pinfo;
-	void *puc;
 	struct siginfo info;
 	struct ucontext uc;
 };
 
-static int
-restore_sigframe(struct pt_regs *regs,
-		 struct sigcontext *sc, int *pr2)
+static long restore_sigcontext(struct pt_regs *regs,
+	struct sigcontext __user *sc)
 {
 	int err = 0;
 
-	/* Always make any pending restarted system calls return -EINTR */
-	current_thread_info()->task->restart_block.fn = do_no_restart_syscall;
-
-	err |= copy_from_user(regs, &sc->sc_pt_regs, sizeof(struct pt_regs));
+	/* sc_pt_regs is structured the same as the start of pt_regs */
+	err |= __copy_from_user(regs, &sc->sc_pt_regs, sizeof(struct pt_regs));
 
+	/* Restore the floating-point state. */
 	err |= restore_fpu_state(sc);
 
-	*pr2 = regs->a0;
 	return err;
 }
 
-asmlinkage int
-do_rt_sigreturn(void)
+SYSCALL_DEFINE0(rt_sigreturn)
 {
-	sigset_t set;
-	int a0;
 	struct pt_regs *regs = current_pt_regs();
-	struct rt_sigframe *frame = (struct rt_sigframe *)(regs->usp);
+	struct rt_sigframe __user *frame;
+	struct task_struct *task;
+	sigset_t set;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	frame = (struct rt_sigframe __user *)regs->usp;
 
 	if (!access_ok(frame, sizeof(*frame)))
 		goto badframe;
+
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, (sigmask(SIGKILL) | sigmask(SIGSTOP)));
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	set_current_blocked(&set);
 
-	if (restore_sigframe(regs, &frame->uc.uc_mcontext, &a0))
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 
-	return a0;
+	if (restore_altstack(&frame->uc.uc_stack))
+		goto badframe;
+
+	return regs->a0;
 
 badframe:
-	force_sig(SIGSEGV, current);
+	task = current;
+	force_sig(SIGSEGV, task);
 	return 0;
 }
 
-static int setup_sigframe(struct sigcontext *sc, struct pt_regs *regs)
+static int setup_sigcontext(struct rt_sigframe __user *frame,
+	struct pt_regs *regs)
 {
+	struct sigcontext __user *sc = &frame->uc.uc_mcontext;
 	int err = 0;
 
-	err |= copy_to_user(&sc->sc_pt_regs, regs, sizeof(struct pt_regs));
+	err |= __copy_to_user(&sc->sc_pt_regs, regs, sizeof(struct pt_regs));
 	err |= save_fpu_state(sc);
 
 	return err;
 }
 
-static inline void *
-get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
+static inline void __user *get_sigframe(struct ksignal *ksig,
+	struct pt_regs *regs, size_t framesize)
 {
-	unsigned long usp;
+	unsigned long sp;
+	/* Default to using normal stack */
+	sp = regs->usp;
+
+	/*
+	 * If we are on the alternate signal stack and would overflow it, don't.
+	 * Return an always-bogus address instead so we will die with SIGSEGV.
+	 */
+	if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize)))
+		return (void __user __force *)(-1UL);
 
-	/* Default to using normal stack.  */
-	usp = regs->usp;
+	/* This is the X/Open sanctioned signal stack switching. */
+	sp = sigsp(sp, ksig) - framesize;
 
-	/* This is the X/Open sanctioned signal stack switching.  */
-	if ((ka->sa.sa_flags & SA_ONSTACK) && !sas_ss_flags(usp)) {
-		if (!on_sig_stack(usp))
-			usp = current->sas_ss_sp + current->sas_ss_size;
-	}
-	return (void *)((usp - frame_size) & -8UL);
+	/* Align the stack frame. */
+	sp &= -8UL;
+
+	return (void __user *)sp;
 }
 
 static int
@@ -140,205 +131,128 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
 {
 	struct rt_sigframe *frame;
 	int err = 0;
-
 	struct csky_vdso *vdso = current->mm->context.vdso;
 
-	frame = get_sigframe(&ksig->ka, regs, sizeof(*frame));
-	if (!frame)
-		return 1;
+	frame = get_sigframe(ksig, regs, sizeof(*frame));
+	if (!access_ok(frame, sizeof(*frame)))
+		return -EFAULT;
 
-	err |= __put_user(ksig->sig, &frame->sig);
-	err |= __put_user(&frame->info, &frame->pinfo);
-	err |= __put_user(&frame->uc, &frame->puc);
 	err |= copy_siginfo_to_user(&frame->info, &ksig->info);
 
-	/* Create the ucontext.  */
+	/* Create the ucontext. */
 	err |= __put_user(0, &frame->uc.uc_flags);
-	err |= __put_user(0, &frame->uc.uc_link);
-	err |= __put_user((void *)current->sas_ss_sp,
-			&frame->uc.uc_stack.ss_sp);
-	err |= __put_user(sas_ss_flags(regs->usp),
-			&frame->uc.uc_stack.ss_flags);
-	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
-	err |= setup_sigframe(&frame->uc.uc_mcontext, regs);
-	err |= copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-
+	err |= __put_user(NULL, &frame->uc.uc_link);
+	err |= __save_altstack(&frame->uc.uc_stack, regs->usp);
+	err |= setup_sigcontext(frame, regs);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
-	/* Set up registers for signal handler */
-	regs->usp = (unsigned long)frame;
-	regs->pc = (unsigned long)ksig->ka.sa.sa_handler;
-	regs->lr = (unsigned long)vdso->rt_signal_retcode;
+	/* Set up to return from userspace. */
+	regs->lr = (unsigned long)(vdso->rt_signal_retcode);
 
-adjust_stack:
-	regs->a0 = ksig->sig; /* first arg is signo */
-	regs->a1 = (unsigned long)(&(frame->info));
-	regs->a2 = (unsigned long)(&(frame->uc));
-	return err;
+	/*
+	 * Set up registers for signal handler.
+	 * Registers that we don't modify keep the value they had from
+	 * user-space at the time we took the signal.
+	 * We always pass siginfo and mcontext, regardless of SA_SIGINFO,
+	 * since some things rely on this (e.g. glibc's debug/segfault.c).
+	 */
+	regs->pc  = (unsigned long)ksig->ka.sa.sa_handler;
+	regs->usp = (unsigned long)frame;
+	regs->a0  = ksig->sig;				/* a0: signal number */
+	regs->a1  = (unsigned long)(&(frame->info));	/* a1: siginfo pointer */
+	regs->a2  = (unsigned long)(&(frame->uc));	/* a2: ucontext pointer */
 
-give_sigsegv:
-	if (ksig->sig == SIGSEGV)
-		ksig->ka.sa.sa_handler = SIG_DFL;
-	force_sig(SIGSEGV, current);
-	goto adjust_stack;
+	return 0;
 }
 
-/*
- * OK, we're invoking a handler
- */
-static int
-handle_signal(struct ksignal *ksig, struct pt_regs *regs)
+static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 {
-	int ret;
 	sigset_t *oldset = sigmask_to_save();
+	int ret;
 
-	/*
-	 * set up the stack frame, regardless of SA_SIGINFO,
-	 * and pass info anyway.
-	 */
-	ret = setup_rt_frame(ksig, oldset, regs);
+	/* Are we from a system call? */
+	if (in_syscall(regs)) {
+		/* Avoid additional syscall restarting via ret_from_exception */
+		forget_syscall(regs);
+
+		/* If so, check system call restarting.. */
+		switch (regs->a0) {
+		case -ERESTART_RESTARTBLOCK:
+		case -ERESTARTNOHAND:
+			regs->a0 = -EINTR;
+			break;
 
-	if (ret != 0) {
-		force_sigsegv(ksig->sig, current);
-		return ret;
+		case -ERESTARTSYS:
+			if (!(ksig->ka.sa.sa_flags & SA_RESTART)) {
+				regs->a0 = -EINTR;
+				break;
+			}
+			/* fallthrough */
+		case -ERESTARTNOINTR:
+			regs->a0 = regs->orig_a0;
+			regs->pc -= TRAP0_SIZE;
+			break;
+		}
 	}
 
-	/* Block the signal if we were successful. */
-	spin_lock_irq(&current->sighand->siglock);
-	sigorsets(&current->blocked, &current->blocked, &ksig->ka.sa.sa_mask);
-	if (!(ksig->ka.sa.sa_flags & SA_NODEFER))
-		sigaddset(&current->blocked, ksig->sig);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	/* Set up the stack frame */
+	ret = setup_rt_frame(ksig, oldset, regs);
 
-	return 0;
+	signal_setup_done(ret, ksig, 0);
 }
 
-/*
- * Note that 'init' is a special process: it doesn't get signals it doesn't
- * want to handle. Thus you cannot kill init even with a SIGKILL even by
- * mistake.
- *
- * Note that we go through the signals twice: once to check the signals
- * that the kernel can handle, and then we build all the user-level signal
- * handling stack-frames in one go after that.
- */
-static void do_signal(struct pt_regs *regs, int syscall)
+static void do_signal(struct pt_regs *regs)
 {
-	unsigned int retval = 0, continue_addr = 0, restart_addr = 0;
 	struct ksignal ksig;
 
-	/*
-	 * We want the common case to go fast, which
-	 * is why we may in certain cases get here from
-	 * kernel mode. Just return without doing anything
-	 * if so.
-	 */
-	if (!user_mode(regs))
+	if (get_signal(&ksig)) {
+		/* Actually deliver the signal */
+		handle_signal(&ksig, regs);
 		return;
+	}
 
-	/*
-	 * If we were from a system call, check for system call restarting...
-	 */
-	if (syscall) {
-		continue_addr = regs->pc;
-#if defined(__CSKYABIV2__)
-		restart_addr = continue_addr - 4;
-#else
-		restart_addr = continue_addr - 2;
-#endif
-		retval = regs->a0;
+	/* Did we come from a system call? */
+	if (in_syscall(regs)) {
+		/* Avoid additional syscall restarting via ret_from_exception */
+		forget_syscall(regs);
 
-		/*
-		 * Prepare for system call restart.  We do this here so that a
-		 * debugger will see the already changed.
-		 */
-		switch (retval) {
+		/* Restart the system call - no handlers present */
+		switch (regs->a0) {
 		case -ERESTARTNOHAND:
 		case -ERESTARTSYS:
 		case -ERESTARTNOINTR:
 			regs->a0 = regs->orig_a0;
-			regs->pc = restart_addr;
+			regs->pc -= TRAP0_SIZE;
 			break;
 		case -ERESTART_RESTARTBLOCK:
-			regs->a0 = -EINTR;
+			regs->a0 = regs->orig_a0;
+			regs_syscallid(regs) = __NR_restart_syscall;
+			regs->pc -= TRAP0_SIZE;
 			break;
 		}
 	}
 
-	if (try_to_freeze())
-		goto no_signal;
-
 	/*
-	 * Get the signal to deliver.  When running under ptrace, at this
-	 * point the debugger may change all our registers ...
+	 * If there is no signal to deliver, we just put the saved
+	 * sigmask back.
 	 */
-	if (get_signal(&ksig)) {
-		/*
-		 * Depending on the signal settings we may need to revert the
-		 * decision to restart the system call.  But skip this if a
-		 * debugger has chosen to restart at a different PC.
-		 */
-		if (regs->pc == restart_addr) {
-			if (retval == -ERESTARTNOHAND ||
-			    (retval == -ERESTARTSYS &&
-			     !(ksig.ka.sa.sa_flags & SA_RESTART))) {
-				regs->a0 = -EINTR;
-				regs->pc = continue_addr;
-			}
-		}
-
-		/* Whee!  Actually deliver the signal.  */
-		if (handle_signal(&ksig, regs) == 0) {
-			/*
-			 * A signal was successfully delivered; the saved
-			 * sigmask will have been stored in the signal frame,
-			 * and will be restored by sigreturn, so we can simply
-			 * clear the TIF_RESTORE_SIGMASK flag.
-			 */
-			if (test_thread_flag(TIF_RESTORE_SIGMASK))
-				clear_thread_flag(TIF_RESTORE_SIGMASK);
-		}
-		return;
-	}
-
-no_signal:
-	if (syscall) {
-		/*
-		 * Handle restarting a different system call.  As above,
-		 * if a debugger has chosen to restart at a different PC,
-		 * ignore the restart.
-		 */
-		if (retval == -ERESTART_RESTARTBLOCK
-				&& regs->pc == continue_addr) {
-#if defined(__CSKYABIV2__)
-			regs->regs[3] = __NR_restart_syscall;
-			regs->pc -= 4;
-#else
-			regs->regs[9] = __NR_restart_syscall;
-			regs->pc -= 2;
-#endif
-		}
-
-		/*
-		 * If there's no signal to deliver, we just put the saved
-		 * sigmask back.
-		 */
-		if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-			clear_thread_flag(TIF_RESTORE_SIGMASK);
-			sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-		}
-	}
+	restore_saved_sigmask();
 }
 
-asmlinkage void
-do_notify_resume(unsigned int thread_flags, struct pt_regs *regs, int syscall)
+/*
+ * notification of userspace execution resumption
+ * - triggered by the _TIF_WORK_MASK flags
+ */
+asmlinkage void do_notify_resume(struct pt_regs *regs,
+	unsigned long thread_info_flags)
 {
-	if (thread_flags & _TIF_SIGPENDING)
-		do_signal(regs, syscall);
+	/* Handle pending signal delivery */
+	if (thread_info_flags & _TIF_SIGPENDING)
+		do_signal(regs);
 
-	if (thread_flags & _TIF_NOTIFY_RESUME) {
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
 	}
diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c
index d6f4b66b93e21c8ede70e2cc4070861d6df7730d..18041f46ded11df26ad5dec41c4bc68363b3bafc 100644
--- a/arch/csky/mm/fault.c
+++ b/arch/csky/mm/fault.c
@@ -15,9 +15,9 @@
 #include <linux/smp.h>
 #include <linux/version.h>
 #include <linux/vt_kern.h>
-#include <linux/kernel.h>
 #include <linux/extable.h>
 #include <linux/uaccess.h>
+#include <linux/perf_event.h>
 
 #include <asm/hardirq.h>
 #include <asm/mmu_context.h>
@@ -82,7 +82,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
 
 		unsigned long pgd_base;
 
-		pgd_base = tlb_get_pgd();
+		pgd_base = (unsigned long)__va(get_pgd());
 		pgd = (pgd_t *)pgd_base + offset;
 		pgd_k = init_mm.pgd + offset;
 
@@ -107,6 +107,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
 		return;
 	}
 #endif
+
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 	/*
 	 * If we're in an interrupt or have no user
 	 * context, we must not take the fault..
@@ -154,10 +156,15 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
 			goto bad_area;
 		BUG();
 	}
-	if (fault & VM_FAULT_MAJOR)
+	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-	else
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
+			      address);
+	} else {
 		tsk->min_flt++;
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
+			      address);
+	}
 
 	up_read(&mm->mmap_sem);
 	return;
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 68841d01162cff667d3e33fec14120b355f23413..f716668992458c1493fad7837c21996c1507ec73 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -397,6 +397,9 @@ if ($arch eq "x86_64") {
 } elsif ($arch eq "nds32") {
     $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_NDS32_HI20_RELA\\s+_mcount\$";
     $alignment = 2;
+} elsif ($arch eq "csky") {
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_CKCORE_PCREL_JSR_IMM26BY2\\s+_mcount\$";
+    $alignment = 2;
 } else {
     die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
 }