Login | Register For Free | Help
Search for: (Advanced)

Mailing List Archive: Linux: Kernel

[PATCH] show per-process swap usage via procfs v3

 

 

Linux kernel RSS feed   Index | Next | Previous | View Threaded


kamezawa.hiroyu at jp

Nov 10, 2009, 6:25 PM

Post #1 of 4 (325 views)
Permalink
[PATCH] show per-process swap usage via procfs v3

Updated Documentation/filesystems/proc.txt

==
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu [at] jp>

Now, anon_rss and file_rss is counted as RSS and exported via /proc.
RSS usage is important information but one more information which
is often asked by users is "usage of swap".(user support team said.)

This patch counts swap entry usage per process and show it via
/proc/<pid>/status. I think status file is robust against new entry.
Then, it is the first candidate..

After this, /proc/<pid>/status includes following line
<snip>
VmPeak: 315360 kB
VmSize: 315360 kB
VmLck: 0 kB
VmHWM: 180452 kB
VmRSS: 180452 kB
VmData: 311624 kB
VmStk: 84 kB
VmExe: 4 kB
VmLib: 1568 kB
VmPTE: 640 kB
VmSwap: 131240 kB <=== new information

Note:
Because this patch catches swap_pte on page table, this will
not catch shmem's swapout. It's already accounted in per-shmem
inode and we don't need to do more.

Changelog: 2009/11/11
- added an update for Documentation/filesystems/proc.txt
Changelog: 2009/11/06
- fixed bad use of is_migration_entry. Now, non_swap_entry() is used.
Changelog: 2009/11/03
- clean up.
- fixed initialization bug at fork (init_mm())

Reviewed-by: Minchan Kim <minchan.kim [at] gmail>
Acked-by: Acked-by; David Rientjes <rientjes [at] google>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu [at] jp>
---
Documentation/filesystems/proc.txt | 2 ++
fs/proc/task_mmu.c | 9 ++++++---
include/linux/mm_types.h | 1 +
kernel/fork.c | 1 +
mm/memory.c | 30 +++++++++++++++++++++---------
mm/rmap.c | 1 +
mm/swapfile.c | 1 +
7 files changed, 33 insertions(+), 12 deletions(-)

Index: mm-test-kernel/include/linux/mm_types.h
===================================================================
--- mm-test-kernel.orig/include/linux/mm_types.h
+++ mm-test-kernel/include/linux/mm_types.h
@@ -228,6 +228,7 @@ struct mm_struct {
*/
mm_counter_t _file_rss;
mm_counter_t _anon_rss;
+ mm_counter_t _swap_usage;

unsigned long hiwater_rss; /* High-watermark of RSS usage */
unsigned long hiwater_vm; /* High-water virtual memory usage */
Index: mm-test-kernel/mm/memory.c
===================================================================
--- mm-test-kernel.orig/mm/memory.c
+++ mm-test-kernel/mm/memory.c
@@ -376,12 +376,15 @@ int __pte_alloc_kernel(pmd_t *pmd, unsig
return 0;
}

-static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
+static inline void
+add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss, int swap_usage)
{
if (file_rss)
add_mm_counter(mm, file_rss, file_rss);
if (anon_rss)
add_mm_counter(mm, anon_rss, anon_rss);
+ if (swap_usage)
+ add_mm_counter(mm, swap_usage, swap_usage);
}

/*
@@ -597,7 +600,9 @@ copy_one_pte(struct mm_struct *dst_mm, s
&src_mm->mmlist);
spin_unlock(&mmlist_lock);
}
- if (is_write_migration_entry(entry) &&
+ if (!non_swap_entry(entry))
+ rss[2]++;
+ else if (is_write_migration_entry(entry) &&
is_cow_mapping(vm_flags)) {
/*
* COW mappings require pages in both parent
@@ -648,11 +653,11 @@ static int copy_pte_range(struct mm_stru
pte_t *src_pte, *dst_pte;
spinlock_t *src_ptl, *dst_ptl;
int progress = 0;
- int rss[2];
+ int rss[3];
swp_entry_t entry = (swp_entry_t){0};

again:
- rss[1] = rss[0] = 0;
+ rss[2] = rss[1] = rss[0] = 0;
dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
if (!dst_pte)
return -ENOMEM;
@@ -688,7 +693,7 @@ again:
arch_leave_lazy_mmu_mode();
spin_unlock(src_ptl);
pte_unmap_nested(orig_src_pte);
- add_mm_rss(dst_mm, rss[0], rss[1]);
+ add_mm_rss(dst_mm, rss[0], rss[1], rss[2]);
pte_unmap_unlock(orig_dst_pte, dst_ptl);
cond_resched();

@@ -818,6 +823,7 @@ static unsigned long zap_pte_range(struc
spinlock_t *ptl;
int file_rss = 0;
int anon_rss = 0;
+ int swap_usage = 0;

pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
arch_enter_lazy_mmu_mode();
@@ -887,13 +893,18 @@ static unsigned long zap_pte_range(struc
if (pte_file(ptent)) {
if (unlikely(!(vma->vm_flags & VM_NONLINEAR)))
print_bad_pte(vma, addr, ptent, NULL);
- } else if
- (unlikely(!free_swap_and_cache(pte_to_swp_entry(ptent))))
- print_bad_pte(vma, addr, ptent, NULL);
+ } else {
+ swp_entry_t ent = pte_to_swp_entry(ptent);
+
+ if (!non_swap_entry(ent))
+ swap_usage--;
+ if (unlikely(!free_swap_and_cache(ent)))
+ print_bad_pte(vma, addr, ptent, NULL);
+ }
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));

- add_mm_rss(mm, file_rss, anon_rss);
+ add_mm_rss(mm, file_rss, anon_rss, swap_usage);
arch_leave_lazy_mmu_mode();
pte_unmap_unlock(pte - 1, ptl);

@@ -2595,6 +2606,7 @@ static int do_swap_page(struct mm_struct
*/

inc_mm_counter(mm, anon_rss);
+ dec_mm_counter(mm, swap_usage);
pte = mk_pte(page, vma->vm_page_prot);
if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
Index: mm-test-kernel/mm/swapfile.c
===================================================================
--- mm-test-kernel.orig/mm/swapfile.c
+++ mm-test-kernel/mm/swapfile.c
@@ -837,6 +837,7 @@ static int unuse_pte(struct vm_area_stru
}

inc_mm_counter(vma->vm_mm, anon_rss);
+ dec_mm_counter(vma->vm_mm, swap_usage);
get_page(page);
set_pte_at(vma->vm_mm, addr, pte,
pte_mkold(mk_pte(page, vma->vm_page_prot)));
Index: mm-test-kernel/fs/proc/task_mmu.c
===================================================================
--- mm-test-kernel.orig/fs/proc/task_mmu.c
+++ mm-test-kernel/fs/proc/task_mmu.c
@@ -17,7 +17,7 @@
void task_mem(struct seq_file *m, struct mm_struct *mm)
{
unsigned long data, text, lib;
- unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
+ unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss, swap;

/*
* Note: to minimize their overhead, mm maintains hiwater_vm and
@@ -36,6 +36,7 @@ void task_mem(struct seq_file *m, struct
data = mm->total_vm - mm->shared_vm - mm->stack_vm;
text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
+ swap = get_mm_counter(mm, swap_usage);
seq_printf(m,
"VmPeak:\t%8lu kB\n"
"VmSize:\t%8lu kB\n"
@@ -46,7 +47,8 @@ void task_mem(struct seq_file *m, struct
"VmStk:\t%8lu kB\n"
"VmExe:\t%8lu kB\n"
"VmLib:\t%8lu kB\n"
- "VmPTE:\t%8lu kB\n",
+ "VmPTE:\t%8lu kB\n"
+ "VmSwap:\t%8lu kB\n",
hiwater_vm << (PAGE_SHIFT-10),
(total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
mm->locked_vm << (PAGE_SHIFT-10),
@@ -54,7 +56,8 @@ void task_mem(struct seq_file *m, struct
total_rss << (PAGE_SHIFT-10),
data << (PAGE_SHIFT-10),
mm->stack_vm << (PAGE_SHIFT-10), text, lib,
- (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10);
+ (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10,
+ swap << (PAGE_SHIFT - 10));
}

unsigned long task_vsize(struct mm_struct *mm)
Index: mm-test-kernel/mm/rmap.c
===================================================================
--- mm-test-kernel.orig/mm/rmap.c
+++ mm-test-kernel/mm/rmap.c
@@ -834,6 +834,7 @@ static int try_to_unmap_one(struct page
spin_unlock(&mmlist_lock);
}
dec_mm_counter(mm, anon_rss);
+ inc_mm_counter(mm, swap_usage);
} else if (PAGE_MIGRATION) {
/*
* Store the pfn of the page in a special migration
Index: mm-test-kernel/kernel/fork.c
===================================================================
--- mm-test-kernel.orig/kernel/fork.c
+++ mm-test-kernel/kernel/fork.c
@@ -454,6 +454,7 @@ static struct mm_struct * mm_init(struct
mm->nr_ptes = 0;
set_mm_counter(mm, file_rss, 0);
set_mm_counter(mm, anon_rss, 0);
+ set_mm_counter(mm, swap_usage, 0);
spin_lock_init(&mm->page_table_lock);
mm->free_area_cache = TASK_UNMAPPED_BASE;
mm->cached_hole_size = ~0UL;
Index: mm-test-kernel/Documentation/filesystems/proc.txt
===================================================================
--- mm-test-kernel.orig/Documentation/filesystems/proc.txt
+++ mm-test-kernel/Documentation/filesystems/proc.txt
@@ -163,6 +163,7 @@ read the file /proc/PID/status:
VmExe: 68 kB
VmLib: 1412 kB
VmPTE: 20 kb
+ VmSwap: 0 kb
Threads: 1
SigQ: 0/28578
SigPnd: 0000000000000000
@@ -213,6 +214,7 @@ Table 1-2: Contents of the statm files (
VmExe size of text segment
VmLib size of shared library code
VmPTE size of page table entries
+ VmSwap size of swapped out private rss.
Threads number of threads
SigQ number of signals queued/max. number for queue
SigPnd bitmap of pending signals for the thread

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


cl at linux-foundation

Nov 12, 2009, 7:20 AM

Post #2 of 4 (279 views)
Permalink
Re: [PATCH] show per-process swap usage via procfs v3 [In reply to]

On Wed, 11 Nov 2009, KAMEZAWA Hiroyuki wrote:

>
> Index: mm-test-kernel/include/linux/mm_types.h
> ===================================================================
> --- mm-test-kernel.orig/include/linux/mm_types.h
> +++ mm-test-kernel/include/linux/mm_types.h
> @@ -228,6 +228,7 @@ struct mm_struct {
> */
> mm_counter_t _file_rss;
> mm_counter_t _anon_rss;
> + mm_counter_t _swap_usage;

This is going to be another hit on vm performance if we get down this
road.

At least put

#ifdef CONFIG_SWAP ?

around this so that we can switch it off?

> @@ -597,7 +600,9 @@ copy_one_pte(struct mm_struct *dst_mm, s
> &src_mm->mmlist);
> spin_unlock(&mmlist_lock);
> }
> - if (is_write_migration_entry(entry) &&
> + if (!non_swap_entry(entry))
> + rss[2]++;
> + else if (is_write_migration_entry(entry) &&
> is_cow_mapping(vm_flags)) {
> /*

What are the implications for fork performance?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


kamezawa.hiroyu at jp

Nov 12, 2009, 5:51 PM

Post #3 of 4 (278 views)
Permalink
Re: [PATCH] show per-process swap usage via procfs v3 [In reply to]

On Thu, 12 Nov 2009 10:20:29 -0500 (EST)
Christoph Lameter <cl [at] linux-foundation> wrote:

> On Wed, 11 Nov 2009, KAMEZAWA Hiroyuki wrote:
>
> >
> > Index: mm-test-kernel/include/linux/mm_types.h
> > ===================================================================
> > --- mm-test-kernel.orig/include/linux/mm_types.h
> > +++ mm-test-kernel/include/linux/mm_types.h
> > @@ -228,6 +228,7 @@ struct mm_struct {
> > */
> > mm_counter_t _file_rss;
> > mm_counter_t _anon_rss;
> > + mm_counter_t _swap_usage;
>
> This is going to be another hit on vm performance if we get down this
> road.
>
> At least put
>
> #ifdef CONFIG_SWAP ?
>
> around this so that we can switch it off?
>
Hmm, okay. But I'm not sure I can do it in clean way.
(Or, I'll wait for you updates for mm_counters, or I do by myself.)

> > @@ -597,7 +600,9 @@ copy_one_pte(struct mm_struct *dst_mm, s
> > &src_mm->mmlist);
> > spin_unlock(&mmlist_lock);
> > }
> > - if (is_write_migration_entry(entry) &&
> > + if (!non_swap_entry(entry))
> > + rss[2]++;
> > + else if (is_write_migration_entry(entry) &&
> > is_cow_mapping(vm_flags)) {
> > /*
>
> What are the implications for fork performance?

This path is executed when page table entry contains a entry of
!pte_none() && !pte_present().

There are not very big chance to reach here.(this path is under unlikely()).

Thanks,
-Kame

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


kamezawa.hiroyu at jp

Nov 12, 2009, 6:35 PM

Post #4 of 4 (277 views)
Permalink
Re: [PATCH] show per-process swap usage via procfs v3 [In reply to]

On Fri, 13 Nov 2009 10:51:12 +0900
KAMEZAWA Hiroyuki <kamezawa.hiroyu [at] jp> wrote:
> > > @@ -597,7 +600,9 @@ copy_one_pte(struct mm_struct *dst_mm, s
> > > &src_mm->mmlist);
> > > spin_unlock(&mmlist_lock);
> > > }
> > > - if (is_write_migration_entry(entry) &&
> > > + if (!non_swap_entry(entry))
> > > + rss[2]++;
> > > + else if (is_write_migration_entry(entry) &&
> > > is_cow_mapping(vm_flags)) {
> > > /*
> >
> > What are the implications for fork performance?
>
> This path is executed when page table entry contains a entry of
> !pte_none() && !pte_present().
>
> There are not very big chance to reach here.(this path is under unlikely()).
>

[before]
text data bss dec hex filename
6649003 3221828 10232816 20103647 132c1df vmlinux
[after]
text data bss dec hex filename
6649243 3221828 10232816 20103887 132c2cf vmlinux

Now, 240 bytes of text size..Hmm.

Thanks,
-Kame

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Linux kernel RSS feed   Index | Next | Previous | View Threaded
 
 


Interested in having your list archived? Contact Gossamer Threads
 
  Web Applications & Managed Hosting Powered by Gossamer Threads Inc.