2 arch/arm/configs/rx51_defconfig | 2
3 drivers/block/Kconfig | 2
4 drivers/block/Makefile | 1
5 drivers/block/ramzswap/Kconfig | 22
6 drivers/block/ramzswap/Makefile | 3
7 drivers/block/ramzswap/compat.h | 13
8 drivers/block/ramzswap/ramzswap.txt | 51 +
9 drivers/block/ramzswap/ramzswap_drv.c | 1557 ++++++++++++++++++++++++++++++++
10 drivers/block/ramzswap/ramzswap_drv.h | 210 ++++
11 drivers/block/ramzswap/ramzswap_ioctl.h | 50 +
12 drivers/block/ramzswap/xvmalloc.c | 507 ++++++++++
13 drivers/block/ramzswap/xvmalloc.h | 30
14 drivers/block/ramzswap/xvmalloc_int.h | 86 +
15 13 files changed, 2534 insertions(+)
17 Index: kernel-power-2.6.28/arch/arm/configs/rx51_defconfig
18 ===================================================================
19 --- kernel-power-2.6.28.orig/arch/arm/configs/rx51_defconfig
20 +++ kernel-power-2.6.28/arch/arm/configs/rx51_defconfig
22 # CONFIG_BLK_DEV_XIP is not set
23 # CONFIG_CDROM_PKTCDVD is not set
24 # CONFIG_ATA_OVER_ETH is not set
26 +# CONFIG_RAMZSWAP_STATS is not set
28 # CONFIG_EEPROM_93CX6 is not set
29 CONFIG_NOKIA_AV_DETECT=m
30 Index: kernel-power-2.6.28/drivers/block/Kconfig
31 ===================================================================
32 --- kernel-power-2.6.28.orig/drivers/block/Kconfig
33 +++ kernel-power-2.6.28/drivers/block/Kconfig
38 +source "drivers/block/ramzswap/Kconfig"
41 Index: kernel-power-2.6.28/drivers/block/Makefile
42 ===================================================================
43 --- kernel-power-2.6.28.orig/drivers/block/Makefile
44 +++ kernel-power-2.6.28/drivers/block/Makefile
46 obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
47 obj-$(CONFIG_BLK_DEV_UB) += ub.o
48 obj-$(CONFIG_BLK_DEV_HD) += hd.o
49 +obj-$(CONFIG_RAMZSWAP) += ramzswap/
51 obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
52 Index: kernel-power-2.6.28/drivers/block/ramzswap/Kconfig
53 ===================================================================
55 +++ kernel-power-2.6.28/drivers/block/ramzswap/Kconfig
59 + tristate "Compressed in-memory swap device (ramzswap)"
62 + select LZO_DECOMPRESS
65 + Creates virtual block devices which can (only) be used as swap
66 + disks. Pages swapped to these disks are compressed and stored in
69 + See ramzswap.txt for more information.
70 + Project home: http://compcache.googlecode.com/
72 +config RAMZSWAP_STATS
73 + bool "Enable ramzswap stats"
77 + Enable statistics collection for ramzswap. This adds only a minimal
78 + overhead. In unsure, say Y.
79 Index: kernel-power-2.6.28/drivers/block/ramzswap/Makefile
80 ===================================================================
82 +++ kernel-power-2.6.28/drivers/block/ramzswap/Makefile
84 +ramzswap-objs := ramzswap_drv.o xvmalloc.o
86 +obj-$(CONFIG_RAMZSWAP) += ramzswap.o
87 Index: kernel-power-2.6.28/drivers/block/ramzswap/compat.h
88 ===================================================================
90 +++ kernel-power-2.6.28/drivers/block/ramzswap/compat.h
92 +#ifndef _RAMZSWAP_COMPAT_H_
93 +#define _RAMZSWAP_COMPAT_H_
95 +/* Uncomment this if you are using swap free notify patch */
96 +#define CONFIG_SWAP_FREE_NOTIFY
98 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31))
99 +#define blk_queue_physical_block_size(q, size) \
100 + blk_queue_hardsect_size(q, size)
101 +#define blk_queue_logical_block_size(q, size)
105 Index: kernel-power-2.6.28/drivers/block/ramzswap/ramzswap.txt
106 ===================================================================
108 +++ kernel-power-2.6.28/drivers/block/ramzswap/ramzswap.txt
110 +ramzswap: Compressed RAM based swap device
111 +-------------------------------------------
113 +Project home: http://compcache.googlecode.com/
117 +The ramzswap module creates RAM based block devices which can (only) be used as
118 +swap disks. Pages swapped to these devices are compressed and stored in memory
119 +itself. See project home for use cases, performance numbers and a lot more.
121 +Individual ramzswap devices are configured and initialized using rzscontrol
122 +userspace utility as shown in examples below. See rzscontrol man page for more
127 +Following shows a typical sequence of steps for using ramzswap.
130 + modprobe ramzswap num_devices=4
131 + This creates 4 (uninitialized) devices: /dev/ramzswap{0,1,2,3}
132 + (num_devices parameter is optional. Default: 1)
135 + Use rzscontrol utility to configure and initialize individual
136 + ramzswap devices. Example:
137 + rzscontrol /dev/ramzswap2 --init # uses default value of disksize_kb
139 + *See rzscontrol man page for more details and examples*
142 + swapon /dev/ramzswap2 # or any other initialized ramzswap device
145 + rzscontrol /dev/ramzswap2 --stats
148 + swapoff /dev/ramzswap2
151 + rzscontrol /dev/ramzswap2 --reset
152 + (This frees all the memory allocated for this device).
155 +Please report any problems at:
156 + - Mailing list: linux-mm-cc at laptop dot org
157 + - Issue tracker: http://code.google.com/p/compcache/issues/list
161 Index: kernel-power-2.6.28/drivers/block/ramzswap/ramzswap_drv.c
162 ===================================================================
164 +++ kernel-power-2.6.28/drivers/block/ramzswap/ramzswap_drv.c
167 + * Compressed RAM based swap device
169 + * Copyright (C) 2008, 2009, 2010 Nitin Gupta
171 + * This code is released using a dual license strategy: BSD/GPL
172 + * You can choose the licence that better fits your requirements.
174 + * Released under the terms of 3-clause BSD License
175 + * Released under the terms of GNU General Public License Version 2.0
177 + * Project home: http://compcache.googlecode.com
180 +#define KMSG_COMPONENT "ramzswap"
181 +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
183 +#include <linux/module.h>
184 +#include <linux/kernel.h>
185 +#include <linux/bitops.h>
186 +#include <linux/blkdev.h>
187 +#include <linux/buffer_head.h>
188 +#include <linux/device.h>
189 +#include <linux/genhd.h>
190 +#include <linux/highmem.h>
191 +#include <linux/lzo.h>
192 +#include <linux/string.h>
193 +#include <linux/swap.h>
194 +#include <linux/swapops.h>
195 +#include <linux/vmalloc.h>
196 +#include <linux/version.h>
199 +#include "ramzswap_drv.h"
201 +/* Module params (documentation at end) */
202 +static unsigned int num_devices;
203 +static unsigned long disksize_kb;
204 +static unsigned long memlimit_kb;
205 +static char backing_swap[MAX_SWAP_NAME_LEN];
208 +static int ramzswap_major;
209 +static struct ramzswap *devices;
212 + * Pages that compress to larger than this size are
213 + * forwarded to backing swap, if present or stored
214 + * uncompressed in memory otherwise.
216 +static unsigned int max_zpage_size;
218 +static int rzs_test_flag(struct ramzswap *rzs, u32 index,
219 + enum rzs_pageflags flag)
221 + return rzs->table[index].flags & BIT(flag);
224 +static void rzs_set_flag(struct ramzswap *rzs, u32 index,
225 + enum rzs_pageflags flag)
227 + rzs->table[index].flags |= BIT(flag);
230 +static void rzs_clear_flag(struct ramzswap *rzs, u32 index,
231 + enum rzs_pageflags flag)
233 + rzs->table[index].flags &= ~BIT(flag);
236 +static int page_zero_filled(void *ptr)
239 + unsigned long *page;
241 + page = (unsigned long *)ptr;
243 + for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
252 + * memlimit cannot be greater than backing disk size.
254 +static void ramzswap_set_memlimit(struct ramzswap *rzs, size_t totalram_bytes)
256 + int memlimit_valid = 1;
258 + if (!rzs->memlimit) {
259 + pr_info("Memory limit not set.\n");
260 + memlimit_valid = 0;
263 + if (rzs->memlimit > rzs->disksize) {
264 + pr_info("Memory limit cannot be greater than "
265 + "disksize: limit=%zu, disksize=%zu\n",
266 + rzs->memlimit, rzs->disksize);
267 + memlimit_valid = 0;
270 + if (!memlimit_valid) {
271 + size_t mempart, disksize;
272 + pr_info("Using default: smaller of (%u%% of RAM) and "
273 + "(backing disk size).\n",
274 + default_memlimit_perc_ram);
275 + mempart = default_memlimit_perc_ram * (totalram_bytes / 100);
276 + disksize = rzs->disksize;
277 + rzs->memlimit = mempart > disksize ? disksize : mempart;
280 + if (rzs->memlimit > totalram_bytes / 2) {
282 + "Its not advisable setting limit more than half of "
283 + "size of memory since we expect a 2:1 compression ratio. "
284 + "Limit represents amount of *compressed* data we can keep "
286 + "\tMemory Size: %zu kB\n"
287 + "\tLimit you selected: %zu kB\n"
288 + "Continuing anyway ...\n",
289 + totalram_bytes >> 10, rzs->memlimit >> 10
293 + rzs->memlimit &= PAGE_MASK;
294 + BUG_ON(!rzs->memlimit);
297 +static void ramzswap_set_disksize(struct ramzswap *rzs, size_t totalram_bytes)
299 + if (!rzs->disksize) {
301 + "disk size not provided. You can use disksize_kb module "
302 + "param to specify size.\nUsing default: (%u%% of RAM).\n",
303 + default_disksize_perc_ram
305 + rzs->disksize = default_disksize_perc_ram *
306 + (totalram_bytes / 100);
309 + if (rzs->disksize > 2 * (totalram_bytes)) {
311 + "There is little point creating a ramzswap of greater than "
312 + "twice the size of memory since we expect a 2:1 compression "
313 + "ratio. Note that ramzswap uses about 0.1%% of the size of "
314 + "the swap device when not in use so a huge ramzswap is "
316 + "\tMemory Size: %zu kB\n"
317 + "\tSize you selected: %zu kB\n"
318 + "Continuing anyway ...\n",
319 + totalram_bytes >> 10, rzs->disksize
323 + rzs->disksize &= PAGE_MASK;
327 + * Swap header (1st page of swap device) contains information
328 + * to indentify it as a swap partition. Prepare such a header
329 + * for ramzswap device (ramzswap0) so that swapon can identify
330 + * it as swap partition. In case backing swap device is provided,
331 + * copy its swap header.
333 +static int setup_swap_header(struct ramzswap *rzs, union swap_header *s)
337 + struct address_space *mapping;
338 + union swap_header *backing_swap_header;
341 + * There is no backing swap device. Create a swap header
342 + * that is acceptable by swapon.
344 + if (!rzs->backing_swap) {
345 + s->info.version = 1;
346 + s->info.last_page = (rzs->disksize >> PAGE_SHIFT) - 1;
347 + s->info.nr_badpages = 0;
348 + memcpy(s->magic.magic, "SWAPSPACE2", 10);
353 + * We have a backing swap device. Copy its swap header
354 + * to ramzswap device header. If this header contains
355 + * invalid information (backing device not a swap
356 + * partition, etc.), swapon will fail for ramzswap
357 + * which is correct behavior - we don't want to swap
358 + * over filesystem partition!
361 + /* Read the backing swap header (code from sys_swapon) */
362 + mapping = rzs->swap_file->f_mapping;
363 + if (!mapping->a_ops->readpage) {
368 + page = read_mapping_page(mapping, 0, rzs->swap_file);
369 + if (IS_ERR(page)) {
370 + ret = PTR_ERR(page);
374 + backing_swap_header = kmap(page);
375 + memcpy(s, backing_swap_header, sizeof(*s));
376 + if (s->info.nr_badpages) {
377 + pr_info("Cannot use backing swap with bad pages (%u)\n",
378 + s->info.nr_badpages);
382 + * ramzswap disksize equals number of usable pages in backing
383 + * swap. Set last_page in swap header to match this disksize
384 + * ('last_page' means 0-based index of last usable swap page).
386 + s->info.last_page = (rzs->disksize >> PAGE_SHIFT) - 1;
393 +/*static void flush_dcache_page(struct page *page)
395 +#if defined(CONFIG_ARM)
398 + * Ugly hack to get flush_dcache_page() work on ARM.
399 + * page_mapping(page) == NULL after clearing this swap cache flag.
400 + * Without clearing this flag, flush_dcache_page() will simply set
401 + * "PG_dcache_dirty" bit and return.
403 + /*if (PageSwapCache(page)) {
405 + ClearPageSwapCache(page);
408 + flush_dcache_page(page);
409 +#if defined(CONFIG_ARM)
411 + SetPageSwapCache(page);
415 +static void ramzswap_ioctl_get_stats(struct ramzswap *rzs,
416 + struct ramzswap_ioctl_stats *s)
418 + strncpy(s->backing_swap_name, rzs->backing_swap_name,
419 + MAX_SWAP_NAME_LEN - 1);
420 + s->backing_swap_name[MAX_SWAP_NAME_LEN - 1] = '\0';
422 + s->disksize = rzs->disksize;
423 + s->memlimit = rzs->memlimit;
425 +#if defined(CONFIG_RAMZSWAP_STATS)
427 + struct ramzswap_stats *rs = &rzs->stats;
428 + size_t succ_writes, mem_used;
429 + unsigned int good_compress_perc = 0, no_compress_perc = 0;
431 + mem_used = xv_get_total_size_bytes(rzs->mem_pool)
432 + + (rs->pages_expand << PAGE_SHIFT);
433 + succ_writes = stat64_read(rzs, &rs->num_writes) -
434 + stat64_read(rzs, &rs->failed_writes);
436 + if (succ_writes && rs->pages_stored) {
437 + good_compress_perc = rs->good_compress * 100
438 + / rs->pages_stored;
439 + no_compress_perc = rs->pages_expand * 100
440 + / rs->pages_stored;
443 + s->num_reads = stat64_read(rzs, &rs->num_reads);
444 + s->num_writes = stat64_read(rzs, &rs->num_writes);
445 + s->failed_reads = stat64_read(rzs, &rs->failed_reads);
446 + s->failed_writes = stat64_read(rzs, &rs->failed_writes);
447 + s->invalid_io = stat64_read(rzs, &rs->invalid_io);
448 + s->notify_free = stat64_read(rzs, &rs->notify_free);
449 + s->pages_zero = rs->pages_zero;
451 + s->good_compress_pct = good_compress_perc;
452 + s->pages_expand_pct = no_compress_perc;
454 + s->pages_stored = rs->pages_stored;
455 + s->pages_used = mem_used >> PAGE_SHIFT;
456 + s->orig_data_size = rs->pages_stored << PAGE_SHIFT;
457 + s->compr_data_size = rs->compr_size;
458 + s->mem_used_total = mem_used;
460 + s->bdev_num_reads = stat64_read(rzs, &rs->bdev_num_reads);
461 + s->bdev_num_writes = stat64_read(rzs, &rs->bdev_num_writes);
463 +#endif /* CONFIG_RAMZSWAP_STATS */
466 +static int add_backing_swap_extent(struct ramzswap *rzs,
467 + pgoff_t phy_pagenum,
471 + struct list_head *head;
472 + struct page *curr_page, *new_page;
473 + unsigned int extents_per_page = PAGE_SIZE /
474 + sizeof(struct ramzswap_backing_extent);
476 + idx = rzs->num_extents % extents_per_page;
478 + new_page = alloc_page(__GFP_ZERO);
482 + if (rzs->num_extents) {
483 + curr_page = virt_to_page(rzs->curr_extent);
484 + head = &curr_page->lru;
486 + head = &rzs->backing_swap_extent_list;
489 + list_add(&new_page->lru, head);
490 + rzs->curr_extent = page_address(new_page);
493 + rzs->curr_extent->phy_pagenum = phy_pagenum;
494 + rzs->curr_extent->num_pages = num_pages;
496 + pr_debug("add_extent: idx=%u, phy_pgnum=%lu, num_pgs=%lu, "
497 + "pg_last=%lu, curr_ext=%p\n", idx, phy_pagenum, num_pages,
498 + phy_pagenum + num_pages - 1, rzs->curr_extent);
500 + if (idx != extents_per_page - 1)
501 + rzs->curr_extent++;
506 +static int setup_backing_swap_extents(struct ramzswap *rzs,
507 + struct inode *inode, unsigned long *num_pages)
511 + unsigned blocks_per_page;
512 + pgoff_t contig_pages = 0, total_pages = 0;
513 + pgoff_t pagenum = 0, prev_pagenum = 0;
514 + sector_t probe_block = 0;
515 + sector_t last_block;
517 + blkbits = inode->i_blkbits;
518 + blocks_per_page = PAGE_SIZE >> blkbits;
520 + last_block = i_size_read(inode) >> blkbits;
521 + while (probe_block + blocks_per_page <= last_block) {
522 + unsigned block_in_page;
523 + sector_t first_block;
525 + first_block = bmap(inode, probe_block);
526 + if (first_block == 0)
529 + /* It must be PAGE_SIZE aligned on-disk */
530 + if (first_block & (blocks_per_page - 1)) {
535 + /* All blocks within this page must be contiguous on disk */
536 + for (block_in_page = 1; block_in_page < blocks_per_page;
540 + block = bmap(inode, probe_block + block_in_page);
543 + if (block != first_block + block_in_page) {
544 + /* Discontiguity */
551 + * We found a PAGE_SIZE length, PAGE_SIZE aligned
554 + pagenum = first_block >> (PAGE_SHIFT - blkbits);
556 + if (total_pages && (pagenum != prev_pagenum + 1)) {
557 + ret = add_backing_swap_extent(rzs, prev_pagenum -
558 + (contig_pages - 1), contig_pages);
561 + rzs->num_extents++;
566 + prev_pagenum = pagenum;
567 + probe_block += blocks_per_page;
573 + if (contig_pages) {
574 + pr_debug("adding last extent: pagenum=%lu, "
575 + "contig_pages=%lu\n", pagenum, contig_pages);
576 + ret = add_backing_swap_extent(rzs,
577 + prev_pagenum - (contig_pages - 1), contig_pages);
580 + rzs->num_extents++;
582 + if (!rzs->num_extents) {
583 + pr_err("No swap extents found!\n");
588 + *num_pages = total_pages;
589 + pr_info("Found %lu extents containing %luk\n",
590 + rzs->num_extents, *num_pages << (PAGE_SHIFT - 10));
595 + pr_err("Backing swapfile has holes\n");
599 + while (ret && !list_empty(&rzs->backing_swap_extent_list)) {
601 + struct list_head *entry = rzs->backing_swap_extent_list.next;
602 + page = list_entry(entry, struct page, lru);
609 +static void map_backing_swap_extents(struct ramzswap *rzs)
611 + struct ramzswap_backing_extent *se;
612 + struct page *table_page, *se_page;
613 + unsigned long num_pages, num_table_pages, entry;
614 + unsigned long se_idx, span;
615 + unsigned entries_per_page = PAGE_SIZE / sizeof(*rzs->table);
616 + unsigned extents_per_page = PAGE_SIZE / sizeof(*se);
618 + /* True for block device */
619 + if (!rzs->num_extents)
622 + se_page = list_entry(rzs->backing_swap_extent_list.next,
624 + se = page_address(se_page);
625 + span = se->num_pages;
626 + num_pages = rzs->disksize >> PAGE_SHIFT;
627 + num_table_pages = DIV_ROUND_UP(num_pages * sizeof(*rzs->table),
632 + while (num_table_pages--) {
633 + table_page = vmalloc_to_page(&rzs->table[entry]);
634 + while (span <= entry) {
636 + if (se_idx == rzs->num_extents)
639 + if (!(se_idx % extents_per_page)) {
640 + se_page = list_entry(se_page->lru.next,
642 + se = page_address(se_page);
646 + span += se->num_pages;
648 + table_page->mapping = (struct address_space *)se;
649 + table_page->private = se->num_pages - (span - entry);
650 + pr_debug("map_table: entry=%lu, span=%lu, map=%p, priv=%lu\n",
651 + entry, span, table_page->mapping, table_page->private);
652 + entry += entries_per_page;
657 + * Check if value of backing_swap module param is sane.
658 + * Claim this device and set ramzswap size equal to
659 + * size of this block device.
661 +static int setup_backing_swap(struct ramzswap *rzs)
665 + unsigned long num_pages = 0;
666 + struct inode *inode;
667 + struct file *swap_file;
668 + struct address_space *mapping;
669 + struct block_device *bdev = NULL;
671 + if (!rzs->backing_swap_name[0]) {
672 + pr_debug("backing_swap param not given\n");
676 + pr_debug("Using backing swap device: %s\n", rzs->backing_swap_name);
678 + swap_file = filp_open(rzs->backing_swap_name,
679 + O_RDWR | O_LARGEFILE, 0);
680 + if (IS_ERR(swap_file)) {
681 + pr_err("Error opening backing device: %s\n",
682 + rzs->backing_swap_name);
687 + mapping = swap_file->f_mapping;
688 + inode = mapping->host;
690 + if (S_ISBLK(inode->i_mode)) {
691 + bdev = I_BDEV(inode);
692 + ret = bd_claim(bdev, setup_backing_swap);
697 + disksize = i_size_read(inode);
699 + pr_err("Error reading backing swap size.\n");
702 + } else if (S_ISREG(inode->i_mode)) {
703 + bdev = inode->i_sb->s_bdev;
704 + if (IS_SWAPFILE(inode)) {
708 + ret = setup_backing_swap_extents(rzs, inode, &num_pages);
711 + disksize = num_pages << PAGE_SHIFT;
716 + rzs->swap_file = swap_file;
717 + rzs->backing_swap = bdev;
718 + rzs->disksize = disksize;
725 + filp_close(swap_file, NULL);
728 + rzs->backing_swap = NULL;
733 + * Map logical page number 'pagenum' to physical page number
734 + * on backing swap device. For block device, this is a nop.
736 +static u32 map_backing_swap_page(struct ramzswap *rzs, u32 pagenum)
738 + u32 skip_pages, entries_per_page;
739 + size_t delta, se_offset, skipped;
740 + struct page *table_page, *se_page;
741 + struct ramzswap_backing_extent *se;
743 + if (!rzs->num_extents)
746 + entries_per_page = PAGE_SIZE / sizeof(*rzs->table);
748 + table_page = vmalloc_to_page(&rzs->table[pagenum]);
749 + se = (struct ramzswap_backing_extent *)table_page->mapping;
750 + se_page = virt_to_page(se);
752 + skip_pages = pagenum - (pagenum / entries_per_page * entries_per_page);
753 + se_offset = table_page->private + skip_pages;
755 + if (se_offset < se->num_pages)
756 + return se->phy_pagenum + se_offset;
758 + skipped = se->num_pages - table_page->private;
760 + struct ramzswap_backing_extent *se_base;
761 + u32 se_entries_per_page = PAGE_SIZE / sizeof(*se);
763 + /* Get next swap extent */
764 + se_base = (struct ramzswap_backing_extent *)
765 + page_address(se_page);
766 + if (se - se_base == se_entries_per_page - 1) {
767 + se_page = list_entry(se_page->lru.next,
769 + se = page_address(se_page);
774 + skipped += se->num_pages;
775 + } while (skipped < skip_pages);
777 + delta = skipped - skip_pages;
778 + se_offset = se->num_pages - delta;
780 + return se->phy_pagenum + se_offset;
783 +static void ramzswap_free_page(struct ramzswap *rzs, size_t index)
785 +#if defined(CONFIG_RAMZSWAP_STATS)
789 + struct page *page = rzs->table[index].page;
790 + u32 offset = rzs->table[index].offset;
792 + if (unlikely(!page)) {
794 + * No memory is allocated for zero filled pages.
795 + * Simply clear zero page flag.
797 + if (rzs_test_flag(rzs, index, RZS_ZERO)) {
798 + rzs_clear_flag(rzs, index, RZS_ZERO);
799 + stat_dec(&rzs->stats.pages_zero);
804 + if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) {
805 +#if defined(CONFIG_RAMZSWAP_STATS)
808 + stat_dec(&rzs->stats.pages_expand);
810 + rzs_clear_flag(rzs, index, RZS_UNCOMPRESSED);
814 +#if defined(CONFIG_RAMZSWAP_STATS)
815 + obj = kmap_atomic(page, KM_USER0) + offset;
816 + clen = xv_get_object_size(obj) - sizeof(struct zobj_header);
817 + kunmap_atomic(obj, KM_USER0);
818 + if (clen <= PAGE_SIZE / 2)
819 + stat_dec(&rzs->stats.good_compress);
821 + xv_free(rzs->mem_pool, page, offset);
824 +#if defined(CONFIG_RAMZSWAP_STATS)
825 + rzs->stats.compr_size -= clen;
826 + stat_dec(&rzs->stats.pages_stored);
829 + rzs->table[index].page = NULL;
830 + rzs->table[index].offset = 0;
833 +static int handle_zero_page(struct bio *bio)
836 + struct page *page = bio->bi_io_vec[0].bv_page;
838 + user_mem = kmap_atomic(page, KM_USER0);
839 + memset(user_mem, 0, PAGE_SIZE);
840 + kunmap_atomic(user_mem, KM_USER0);
842 + flush_dcache_page(page);
844 + set_bit(BIO_UPTODATE, &bio->bi_flags);
849 +static int handle_uncompressed_page(struct ramzswap *rzs, struct bio *bio)
853 + unsigned char *user_mem, *cmem;
855 + page = bio->bi_io_vec[0].bv_page;
856 + index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
858 + user_mem = kmap_atomic(page, KM_USER0);
859 + cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
860 + rzs->table[index].offset;
862 + memcpy(user_mem, cmem, PAGE_SIZE);
863 + kunmap_atomic(user_mem, KM_USER0);
864 + kunmap_atomic(cmem, KM_USER1);
866 + flush_dcache_page(page);
868 + set_bit(BIO_UPTODATE, &bio->bi_flags);
875 + * Called when request page is not present in ramzswap.
876 + * Its either in backing swap device (if present) or
877 + * this is an attempt to read before any previous write
878 + * to this location - this happens due to readahead when
879 + * swap device is read from user-space (e.g. during swapon)
881 +static int handle_ramzswap_fault(struct ramzswap *rzs, struct bio *bio)
884 + * Always forward such requests to backing swap
885 + * device (if present)
887 + if (rzs->backing_swap) {
889 + stat64_dec(rzs, &rzs->stats.num_reads);
890 + stat64_inc(rzs, &rzs->stats.bdev_num_reads);
891 + bio->bi_bdev = rzs->backing_swap;
894 + * In case backing swap is a file, find the right offset within
895 + * the file corresponding to logical position 'index'. For block
896 + * device, this is a nop.
898 + pagenum = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
899 + bio->bi_sector = map_backing_swap_page(rzs, pagenum)
900 + << SECTORS_PER_PAGE_SHIFT;
905 + * Its unlikely event in case backing dev is
908 + pr_debug("Read before write on swap device: "
909 + "sector=%lu, size=%u, offset=%u\n",
910 + (ulong)(bio->bi_sector), bio->bi_size,
911 + bio->bi_io_vec[0].bv_offset);
913 + /* Do nothing. Just return success */
914 + set_bit(BIO_UPTODATE, &bio->bi_flags);
919 +static int ramzswap_read(struct ramzswap *rzs, struct bio *bio)
925 + struct zobj_header *zheader;
926 + unsigned char *user_mem, *cmem;
928 + stat64_inc(rzs, &rzs->stats.num_reads);
930 + page = bio->bi_io_vec[0].bv_page;
931 + index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
933 + if (rzs_test_flag(rzs, index, RZS_ZERO))
934 + return handle_zero_page(bio);
936 + /* Requested page is not present in compressed area */
937 + if (!rzs->table[index].page)
938 + return handle_ramzswap_fault(rzs, bio);
940 + /* Page is stored uncompressed since it's incompressible */
941 + if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
942 + return handle_uncompressed_page(rzs, bio);
944 + user_mem = kmap_atomic(page, KM_USER0);
947 + cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
948 + rzs->table[index].offset;
950 + ret = lzo1x_decompress_safe(
951 + cmem + sizeof(*zheader),
952 + xv_get_object_size(cmem) - sizeof(*zheader),
955 + kunmap_atomic(user_mem, KM_USER0);
956 + kunmap_atomic(cmem, KM_USER1);
958 + /* should NEVER happen */
959 + if (unlikely(ret != LZO_E_OK)) {
960 + pr_err("Decompression failed! err=%d, page=%u\n",
962 + stat64_inc(rzs, &rzs->stats.failed_reads);
966 + flush_dcache_page(page);
968 + set_bit(BIO_UPTODATE, &bio->bi_flags);
977 +static int ramzswap_write(struct ramzswap *rzs, struct bio *bio)
979 + int ret, fwd_write_request = 0;
982 + struct zobj_header *zheader;
983 + struct page *page, *page_store;
984 + unsigned char *user_mem, *cmem, *src;
986 + stat64_inc(rzs, &rzs->stats.num_writes);
988 + page = bio->bi_io_vec[0].bv_page;
989 + index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
991 + src = rzs->compress_buffer;
993 + if (rzs->table[index].page || rzs_test_flag(rzs, index, RZS_ZERO))
994 + ramzswap_free_page(rzs, index);
996 + mutex_lock(&rzs->lock);
998 + user_mem = kmap_atomic(page, KM_USER0);
999 + if (page_zero_filled(user_mem)) {
1000 + kunmap_atomic(user_mem, KM_USER0);
1001 + rzs_set_flag(rzs, index, RZS_ZERO);
1002 + mutex_unlock(&rzs->lock);
1003 + stat_inc(&rzs->stats.pages_zero);
1005 + set_bit(BIO_UPTODATE, &bio->bi_flags);
1006 + bio_endio(bio, 0);
1010 + if (rzs->backing_swap &&
1011 + (rzs->stats.compr_size > rzs->memlimit - PAGE_SIZE)) {
1012 + kunmap_atomic(user_mem, KM_USER0);
1013 + mutex_unlock(&rzs->lock);
1014 + fwd_write_request = 1;
1018 + ret = lzo1x_1_compress(user_mem, PAGE_SIZE, src, &clen,
1019 + rzs->compress_workmem);
1021 + kunmap_atomic(user_mem, KM_USER0);
1023 + if (unlikely(ret != LZO_E_OK)) {
1024 + mutex_unlock(&rzs->lock);
1025 + pr_err("Compression failed! err=%d\n", ret);
1026 + stat64_inc(rzs, &rzs->stats.failed_writes);
1031 + * Page is incompressible. Forward it to backing swap
1032 + * if present. Otherwise, store it as-is (uncompressed)
1033 + * since we do not want to return too many swap write
1034 + * errors which has side effect of hanging the system.
1036 + if (unlikely(clen > max_zpage_size)) {
1037 + if (rzs->backing_swap) {
1038 + mutex_unlock(&rzs->lock);
1039 + fwd_write_request = 1;
1044 + page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
1045 + if (unlikely(!page_store)) {
1046 + mutex_unlock(&rzs->lock);
1047 + pr_info("Error allocating memory for incompressible "
1048 + "page: %u\n", index);
1049 + stat64_inc(rzs, &rzs->stats.failed_writes);
1054 + rzs_set_flag(rzs, index, RZS_UNCOMPRESSED);
1055 + stat_inc(&rzs->stats.pages_expand);
1056 + rzs->table[index].page = page_store;
1057 + src = kmap_atomic(page, KM_USER0);
1061 + if (xv_malloc(rzs->mem_pool, clen + sizeof(*zheader),
1062 + &rzs->table[index].page, &offset,
1063 + GFP_NOIO | __GFP_HIGHMEM)) {
1064 + mutex_unlock(&rzs->lock);
1065 + pr_info("Error allocating memory for compressed "
1066 + "page: %u, size=%zu\n", index, clen);
1067 + stat64_inc(rzs, &rzs->stats.failed_writes);
1068 + if (rzs->backing_swap)
1069 + fwd_write_request = 1;
1074 + rzs->table[index].offset = offset;
1076 + cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
1077 + rzs->table[index].offset;
1080 + /* Back-reference needed for memory defragmentation */
1081 + if (!rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)) {
1082 + zheader = (struct zobj_header *)cmem;
1083 + zheader->table_idx = index;
1084 + cmem += sizeof(*zheader);
1088 + memcpy(cmem, src, clen);
1090 + kunmap_atomic(cmem, KM_USER1);
1091 + if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
1092 + kunmap_atomic(src, KM_USER0);
1094 + /* Update stats */
1095 + rzs->stats.compr_size += clen;
1096 + stat_inc(&rzs->stats.pages_stored);
1097 + if (clen <= PAGE_SIZE / 2)
1098 + stat_inc(&rzs->stats.good_compress);
1100 + mutex_unlock(&rzs->lock);
1102 + set_bit(BIO_UPTODATE, &bio->bi_flags);
1103 + bio_endio(bio, 0);
1107 + if (fwd_write_request) {
1108 + stat64_inc(rzs, &rzs->stats.bdev_num_writes);
1109 + bio->bi_bdev = rzs->backing_swap;
1112 + * TODO: We currently have linear mapping of ramzswap and
1113 + * backing swap sectors. This is not desired since we want
1114 + * to optimize writes to backing swap to minimize disk seeks
1115 + * or have effective wear leveling (for SSDs). Also, a
1116 + * non-linear mapping is required to implement compressed
1117 + * on-disk swapping.
1119 + bio->bi_sector = get_backing_swap_page()
1120 + << SECTORS_PER_PAGE_SHIFT;
1123 + * In case backing swap is a file, find the right offset within
1124 + * the file corresponding to logical position 'index'. For block
1125 + * device, this is a nop.
1127 + bio->bi_sector = map_backing_swap_page(rzs, index)
1128 + << SECTORS_PER_PAGE_SHIFT;
1132 + bio_io_error(bio);
1138 + * Check if request is within bounds and page aligned.
1140 +static inline int valid_swap_request(struct ramzswap *rzs, struct bio *bio)
1143 + (bio->bi_sector >= (rzs->disksize >> SECTOR_SHIFT)) ||
1144 + (bio->bi_sector & (SECTORS_PER_PAGE - 1)) ||
1145 + (bio->bi_vcnt != 1) ||
1146 + (bio->bi_size != PAGE_SIZE) ||
1147 + (bio->bi_io_vec[0].bv_offset != 0))) {
1152 + /* swap request is valid */
1157 + * Handler function for all ramzswap I/O requests.
1159 +static int ramzswap_make_request(struct request_queue *queue, struct bio *bio)
1162 + struct ramzswap *rzs = queue->queuedata;
1164 + if (unlikely(!rzs->init_done)) {
1165 + bio_io_error(bio);
1169 + if (!valid_swap_request(rzs, bio)) {
1170 + stat64_inc(rzs, &rzs->stats.invalid_io);
1171 + bio_io_error(bio);
1175 + switch (bio_data_dir(bio)) {
1177 + ret = ramzswap_read(rzs, bio);
1181 + ret = ramzswap_write(rzs, bio);
1188 +static void reset_device(struct ramzswap *rzs, struct block_device *bdev)
1190 + int is_backing_blkdev = 0;
1191 + size_t index, num_pages;
1192 + unsigned entries_per_page;
1193 + unsigned long num_table_pages, entry = 0;
1198 + rzs->init_done = 0;
1200 + if (rzs->backing_swap && !rzs->num_extents)
1201 + is_backing_blkdev = 1;
1203 + num_pages = rzs->disksize >> PAGE_SHIFT;
1205 + /* Free various per-device buffers */
1206 + kfree(rzs->compress_workmem);
1207 + free_pages((unsigned long)rzs->compress_buffer, 1);
1209 + rzs->compress_workmem = NULL;
1210 + rzs->compress_buffer = NULL;
1212 + /* Free all pages that are still in this ramzswap device */
1213 + for (index = 0; index < num_pages; index++) {
1214 + struct page *page;
1217 + page = rzs->table[index].page;
1218 + offset = rzs->table[index].offset;
1223 + if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
1224 + __free_page(page);
1226 + xv_free(rzs->mem_pool, page, offset);
1229 + entries_per_page = PAGE_SIZE / sizeof(*rzs->table);
1230 + num_table_pages = DIV_ROUND_UP(num_pages * sizeof(*rzs->table),
1233 + * Set page->mapping to NULL for every table page.
1234 + * Otherwise, we will hit bad_page() during free.
1236 + while (rzs->num_extents && num_table_pages--) {
1237 + struct page *page;
1238 + page = vmalloc_to_page(&rzs->table[entry]);
1239 + page->mapping = NULL;
1240 + entry += entries_per_page;
1242 + vfree(rzs->table);
1243 + rzs->table = NULL;
1245 + xv_destroy_pool(rzs->mem_pool);
1246 + rzs->mem_pool = NULL;
1248 + /* Free all swap extent pages */
1249 + while (!list_empty(&rzs->backing_swap_extent_list)) {
1250 + struct page *page;
1251 + struct list_head *entry;
1252 + entry = rzs->backing_swap_extent_list.next;
1253 + page = list_entry(entry, struct page, lru);
1255 + __free_page(page);
1257 + INIT_LIST_HEAD(&rzs->backing_swap_extent_list);
1258 + rzs->num_extents = 0;
1260 + /* Close backing swap device, if present */
1261 + if (rzs->backing_swap) {
1262 + if (is_backing_blkdev)
1263 + bd_release(rzs->backing_swap);
1264 + filp_close(rzs->swap_file, NULL);
1265 + rzs->backing_swap = NULL;
1266 + memset(rzs->backing_swap_name, 0, MAX_SWAP_NAME_LEN);
1270 + memset(&rzs->stats, 0, sizeof(rzs->stats));
1272 + rzs->disksize = 0;
1273 + rzs->memlimit = 0;
1276 +static int ramzswap_ioctl_init_device(struct ramzswap *rzs)
1280 + struct page *page;
1281 + union swap_header *swap_header;
1283 + if (rzs->init_done) {
1284 + pr_info("Device already initialized!\n");
1288 + dev_id = rzs - devices;
1290 + ret = setup_backing_swap(rzs);
1294 + if (rzs->backing_swap)
1295 + ramzswap_set_memlimit(rzs, totalram_pages << PAGE_SHIFT);
1297 + ramzswap_set_disksize(rzs, totalram_pages << PAGE_SHIFT);
1299 + rzs->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
1300 + if (!rzs->compress_workmem) {
1301 + pr_err("Error allocating compressor working memory!\n");
1306 + rzs->compress_buffer = (void *)__get_free_pages(__GFP_ZERO, 1);
1307 + if (!rzs->compress_buffer) {
1308 + pr_err("Error allocating compressor buffer space\n");
1313 + num_pages = rzs->disksize >> PAGE_SHIFT;
1314 + rzs->table = vmalloc(num_pages * sizeof(*rzs->table));
1315 + if (!rzs->table) {
1316 + pr_err("Error allocating ramzswap address table\n");
1317 + /* To prevent accessing table entries during cleanup */
1318 + rzs->disksize = 0;
1322 + memset(rzs->table, 0, num_pages * sizeof(*rzs->table));
1324 + map_backing_swap_extents(rzs);
1326 + page = alloc_page(__GFP_ZERO);
1328 + pr_err("Error allocating swap header page\n");
1332 + rzs->table[0].page = page;
1333 + rzs_set_flag(rzs, 0, RZS_UNCOMPRESSED);
1335 + swap_header = kmap(page);
1336 + ret = setup_swap_header(rzs, swap_header);
1339 + pr_err("Error setting swap header\n");
1343 + set_capacity(rzs->disk, rzs->disksize >> SECTOR_SHIFT);
1346 + * We have ident mapping of sectors for ramzswap and
1347 + * and the backing swap device. So, this queue flag
1348 + * should be according to backing dev.
1350 + if (!rzs->backing_swap ||
1351 + blk_queue_nonrot(rzs->backing_swap->bd_disk->queue))
1352 + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, rzs->disk->queue);
1354 + rzs->mem_pool = xv_create_pool();
1355 + if (!rzs->mem_pool) {
1356 + pr_err("Error creating memory pool\n");
1362 + * Pages that compress to size greater than this are forwarded
1363 + * to physical swap disk (if backing dev is provided)
1364 + * TODO: make this configurable
1366 + if (rzs->backing_swap)
1367 + max_zpage_size = max_zpage_size_bdev;
1369 + max_zpage_size = max_zpage_size_nobdev;
1370 + pr_debug("Max compressed page size: %u bytes\n", max_zpage_size);
1372 + rzs->init_done = 1;
1374 + if (rzs->backing_swap) {
1375 + pr_info("/dev/ramzswap%d initialized: "
1376 + "backing_swap=%s, memlimit_kb=%zu\n",
1377 + dev_id, rzs->backing_swap_name, rzs->memlimit >> 10);
1379 + pr_info("/dev/ramzswap%d initialized: "
1380 + "disksize_kb=%zu", dev_id, rzs->disksize >> 10);
1385 + reset_device(rzs, NULL);
1387 + pr_err("Initialization failed: err=%d\n", ret);
1391 +static int ramzswap_ioctl_reset_device(struct ramzswap *rzs,
1392 + struct block_device *bdev)
1394 + if (rzs->init_done)
1395 + reset_device(rzs, bdev);
1400 +static int ramzswap_ioctl(struct block_device *bdev, fmode_t mode,
1401 + unsigned int cmd, unsigned long arg)
1404 + size_t disksize_kb, memlimit_kb;
1406 + struct ramzswap *rzs = bdev->bd_disk->private_data;
1409 + case RZSIO_SET_DISKSIZE_KB:
1410 + if (rzs->init_done) {
1414 + if (copy_from_user(&disksize_kb, (void *)arg,
1415 + _IOC_SIZE(cmd))) {
1419 + rzs->disksize = disksize_kb << 10;
1420 + pr_debug("Disk size set to %zu kB\n", disksize_kb);
1423 + case RZSIO_SET_MEMLIMIT_KB:
1424 + if (rzs->init_done) {
1425 + /* TODO: allow changing memlimit */
1429 + if (copy_from_user(&memlimit_kb, (void *)arg,
1430 + _IOC_SIZE(cmd))) {
1434 + rzs->memlimit = memlimit_kb << 10;
1435 + pr_debug("Memory limit set to %zu kB\n", memlimit_kb);
1438 + case RZSIO_SET_BACKING_SWAP:
1439 + if (rzs->init_done) {
1444 + if (copy_from_user(&rzs->backing_swap_name, (void *)arg,
1445 + _IOC_SIZE(cmd))) {
1449 + rzs->backing_swap_name[MAX_SWAP_NAME_LEN - 1] = '\0';
1450 + pr_debug("Backing swap set to %s\n", rzs->backing_swap_name);
1453 + case RZSIO_GET_STATS:
1455 + struct ramzswap_ioctl_stats *stats;
1456 + if (!rzs->init_done) {
1460 + stats = kzalloc(sizeof(*stats), GFP_KERNEL);
1465 + ramzswap_ioctl_get_stats(rzs, stats);
1466 + if (copy_to_user((void *)arg, stats, sizeof(*stats))) {
1475 + ret = ramzswap_ioctl_init_device(rzs);
1479 + /* Do not reset an active device! */
1480 + if (bdev->bd_holders) {
1484 + ret = ramzswap_ioctl_reset_device(rzs, bdev);
1488 + pr_info("Invalid ioctl %u\n", cmd);
1496 +#if defined(CONFIG_SWAP_FREE_NOTIFY)
1498 +void ramzswap_slot_free_notify(struct block_device *bdev, sector_t bi_sector)
1500 + struct ramzswap *rzs = bdev->bd_disk->private_data;
1501 + ramzswap_free_page(rzs, bi_sector >> SECTORS_PER_PAGE_SHIFT);
1502 + stat64_inc(rzs, &rzs->stats.notify_free);
1507 +static struct block_device_operations ramzswap_devops = {
1508 + .ioctl = ramzswap_ioctl,
1509 +#if defined(CONFIG_SWAP_FREE_NOTIFY)
1510 + .swap_slot_free_notify = ramzswap_slot_free_notify,
1512 + .owner = THIS_MODULE
1515 +static int create_device(struct ramzswap *rzs, int device_id)
1519 + mutex_init(&rzs->lock);
1520 + spin_lock_init(&rzs->stat64_lock);
1521 + INIT_LIST_HEAD(&rzs->backing_swap_extent_list);
1523 + rzs->queue = blk_alloc_queue(GFP_KERNEL);
1524 + if (!rzs->queue) {
1525 + pr_err("Error allocating disk queue for device %d\n",
1531 + blk_queue_make_request(rzs->queue, ramzswap_make_request);
1532 + rzs->queue->queuedata = rzs;
1534 + /* gendisk structure */
1535 + rzs->disk = alloc_disk(1);
1537 + blk_cleanup_queue(rzs->queue);
1538 + pr_warning("Error allocating disk structure for device %d\n",
1544 + rzs->disk->major = ramzswap_major;
1545 + rzs->disk->first_minor = device_id;
1546 + rzs->disk->fops = &ramzswap_devops;
1547 + rzs->disk->queue = rzs->queue;
1548 + rzs->disk->private_data = rzs;
1549 + snprintf(rzs->disk->disk_name, 16, "ramzswap%d", device_id);
1551 + * Actual capacity set using RZSIO_SET_DISKSIZE_KB ioctl
1552 + * or set equal to backing swap device (if provided)
1554 + set_capacity(rzs->disk, 0);
1556 + blk_queue_physical_block_size(rzs->disk->queue, PAGE_SIZE);
1557 + blk_queue_logical_block_size(rzs->disk->queue, PAGE_SIZE);
1559 + add_disk(rzs->disk);
1560 + rzs->disk->flags &= ~GENHD_FL_REMAP_SWAPPED_PAGES;
1562 + rzs->init_done = 0;
1568 +static void destroy_device(struct ramzswap *rzs)
1571 + del_gendisk(rzs->disk);
1572 + put_disk(rzs->disk);
1576 + blk_cleanup_queue(rzs->queue);
1579 +static int __init ramzswap_init(void)
1582 + struct ramzswap *rzs;
1584 + if (num_devices > max_num_devices) {
1585 + pr_warning("Invalid value for num_devices: %u\n",
1591 + ramzswap_major = register_blkdev(0, "ramzswap");
1592 + if (ramzswap_major <= 0) {
1593 + pr_warning("Unable to get major number\n");
1598 + if (!num_devices) {
1599 + pr_info("num_devices not specified. Using default: 1\n");
1603 + /* Allocate the device array and initialize each one */
1604 + pr_debug("Creating %u devices ...\n", num_devices);
1605 + devices = kzalloc(num_devices * sizeof(struct ramzswap), GFP_KERNEL);
1611 + for (dev_id = 0; dev_id < num_devices; dev_id++) {
1612 + if (create_device(&devices[dev_id], dev_id)) {
1614 + goto free_devices;
1619 + * Initialize the first device (/dev/ramzswap0)
1620 + * if parameters are provided
1622 + rzs = &devices[0];
1625 + * User specifies either <disksize_kb> or <backing_swap, memlimit_kb>
1627 + if (disksize_kb) {
1628 + rzs->disksize = disksize_kb << 10;
1629 + ret = ramzswap_ioctl_init_device(rzs);
1631 + goto free_devices;
1635 + if (backing_swap[0]) {
1636 + rzs->memlimit = memlimit_kb << 10;
1637 + strncpy(rzs->backing_swap_name, backing_swap,
1638 + MAX_SWAP_NAME_LEN);
1639 + rzs->backing_swap_name[MAX_SWAP_NAME_LEN - 1] = '\0';
1640 + ret = ramzswap_ioctl_init_device(rzs);
1642 + goto free_devices;
1646 + /* User specified memlimit_kb but not backing_swap */
1647 + if (memlimit_kb) {
1648 + pr_info("memlimit_kb parameter is valid only when "
1649 + "backing_swap is also specified. Aborting.\n");
1651 + goto free_devices;
1658 + destroy_device(&devices[--dev_id]);
1660 + unregister_blkdev(ramzswap_major, "ramzswap");
1665 +static void __exit ramzswap_exit(void)
1668 + struct ramzswap *rzs;
1670 + for (i = 0; i < num_devices; i++) {
1671 + rzs = &devices[i];
1673 + destroy_device(rzs);
1674 + if (rzs->init_done)
1675 + reset_device(rzs, NULL);
1678 + unregister_blkdev(ramzswap_major, "ramzswap");
1681 + pr_debug("Cleanup done!\n");
1685 + * Module parameters
1688 +/* Optional: default = 1 */
1689 +module_param(num_devices, uint, 0);
1690 +MODULE_PARM_DESC(num_devices, "Number of ramzswap devices");
1693 + * User specifies either <disksize_kb> or <backing_swap, memlimit_kb>
1694 + * parameters. You must specify these parameters if the first device
1695 + * has to be initialized on module load without using rzscontrol utility.
1696 + * This is useful for embedded system, where shipping an additional binary
1697 + * (rzscontrol) might not be desirable.
1699 + * These parameters are used to initialize just the first (/dev/ramzswap0)
1700 + * device. To initialize additional devices, use rzscontrol utility. If
1701 + * these parameters are not provided, then the first device is also
1702 + * left in unitialized state.
1705 +/* Optional: default = 25% of RAM */
1706 +module_param(disksize_kb, ulong, 0);
1707 +MODULE_PARM_DESC(disksize_kb, "Disksize in KB");
1709 +/* Optional: default = 15% of RAM */
1710 +module_param(memlimit_kb, ulong, 0);
1711 +MODULE_PARM_DESC(memlimit_kb, "Memlimit in KB");
1713 +/* Optional: default = <NULL> */
1714 +module_param_string(backing_swap, backing_swap, sizeof(backing_swap), 0);
1715 +MODULE_PARM_DESC(backing_swap, "Backing swap name");
1717 +module_init(ramzswap_init);
1718 +module_exit(ramzswap_exit);
1720 +MODULE_LICENSE("Dual BSD/GPL");
1721 +MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
1722 +MODULE_DESCRIPTION("Compressed RAM Based Swap Device");
1723 Index: kernel-power-2.6.28/drivers/block/ramzswap/ramzswap_drv.h
1724 ===================================================================
1726 +++ kernel-power-2.6.28/drivers/block/ramzswap/ramzswap_drv.h
1729 + * Compressed RAM based swap device
1731 + * Copyright (C) 2008, 2009, 2010 Nitin Gupta
1733 + * This code is released using a dual license strategy: BSD/GPL
1734 + * You can choose the licence that better fits your requirements.
1736 + * Released under the terms of 3-clause BSD License
1737 + * Released under the terms of GNU General Public License Version 2.0
1739 + * Project home: http://compcache.googlecode.com
1742 +#ifndef _RAMZSWAP_DRV_H_
1743 +#define _RAMZSWAP_DRV_H_
1745 +#include <linux/spinlock.h>
1746 +#include <linux/mutex.h>
1748 +#include "ramzswap_ioctl.h"
1749 +#include "xvmalloc.h"
1752 + * Some arbitrary value. This is just to catch
1753 + * invalid value for num_devices module parameter.
1755 +static const unsigned max_num_devices = 32;
1758 + * Stored at beginning of each compressed object.
1760 + * It stores back-reference to table entry which points to this
1761 + * object. This is required to support memory defragmentation or
1762 + * migrating compressed pages to backing swap disk.
1764 +struct zobj_header {
1770 +/*-- Configurable parameters */
1772 +/* Default ramzswap disk size: 25% of total RAM */
1773 +static const unsigned default_disksize_perc_ram = 25;
1774 +static const unsigned default_memlimit_perc_ram = 15;
1777 + * Max compressed page size when backing device is provided.
1778 + * Pages that compress to size greater than this are sent to
1779 + * physical swap disk.
1781 +static const unsigned max_zpage_size_bdev = PAGE_SIZE / 2;
1784 + * Max compressed page size when there is no backing dev.
1785 + * Pages that compress to size greater than this are stored
1786 + * uncompressed in memory.
1788 +static const unsigned max_zpage_size_nobdev = PAGE_SIZE / 4 * 3;
1791 + * NOTE: max_zpage_size_{bdev,nobdev} sizes must be
1792 + * less than or equal to:
1793 + * XV_MAX_ALLOC_SIZE - sizeof(struct zobj_header)
1794 + * since otherwise xv_malloc would always return failure.
1797 +/*-- End of configurable params */
1799 +#define SECTOR_SHIFT 9
1800 +#define SECTOR_SIZE (1 << SECTOR_SHIFT)
1801 +#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
1802 +#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT)
1804 +/* Flags for ramzswap pages (table[page_no].flags) */
1805 +enum rzs_pageflags {
1806 + /* Page is stored uncompressed */
1809 + /* Page consists entirely of zeros */
1812 + __NR_RZS_PAGEFLAGS,
1815 +/*-- Data structures */
1818 + * Allocated for each swap slot, indexed by page no.
1819 + * These table entries must fit exactly in a page.
1822 + struct page *page;
1824 + u8 count; /* object ref count (not yet used) */
1826 +} __attribute__((aligned(4)));
1829 + * Swap extent information in case backing swap is a regular
1830 + * file. These extent entries must fit exactly in a page.
1832 +struct ramzswap_backing_extent {
1833 + pgoff_t phy_pagenum;
1834 + pgoff_t num_pages;
1835 +} __attribute__((aligned(4)));
1837 +struct ramzswap_stats {
1839 + size_t compr_size; /* compressed size of pages stored -
1840 + * needed to enforce memlimit */
1842 +#if defined(CONFIG_RAMZSWAP_STATS)
1843 + u64 num_reads; /* failed + successful */
1844 + u64 num_writes; /* --do-- */
1845 + u64 failed_reads; /* should NEVER! happen */
1846 + u64 failed_writes; /* can happen when memory is too low */
1847 + u64 invalid_io; /* non-swap I/O requests */
1848 + u64 notify_free; /* no. of swap slot free notifications */
1849 + u32 pages_zero; /* no. of zero filled pages */
1850 + u32 pages_stored; /* no. of pages currently stored */
1851 + u32 good_compress; /* % of pages with compression ratio<=50% */
1852 + u32 pages_expand; /* % of incompressible pages */
1853 + u64 bdev_num_reads; /* no. of reads on backing dev */
1854 + u64 bdev_num_writes; /* no. of writes on backing dev */
1859 + struct xv_pool *mem_pool;
1860 + void *compress_workmem;
1861 + void *compress_buffer;
1862 + struct table *table;
1863 + spinlock_t stat64_lock; /* protect 64-bit stats */
1864 + struct mutex lock;
1865 + struct request_queue *queue;
1866 + struct gendisk *disk;
1869 + * This is limit on compressed data size (stats.compr_size)
1870 + * Its applicable only when backing swap device is present.
1872 + size_t memlimit; /* bytes */
1874 + * This is limit on amount of *uncompressed* worth of data
1875 + * we can hold. When backing swap device is provided, it is
1876 + * set equal to device size.
1878 + size_t disksize; /* bytes */
1880 + struct ramzswap_stats stats;
1882 + /* backing swap device info */
1883 + struct ramzswap_backing_extent *curr_extent;
1884 + struct list_head backing_swap_extent_list;
1885 + unsigned long num_extents;
1886 + char backing_swap_name[MAX_SWAP_NAME_LEN];
1887 + struct block_device *backing_swap;
1888 + struct file *swap_file;
1893 +/* Debugging and Stats */
1894 +#if defined(CONFIG_RAMZSWAP_STATS)
1895 +static void stat_inc(u32 *v)
1900 +static void stat_dec(u32 *v)
1905 +static void stat64_inc(struct ramzswap *rzs, u64 *v)
1907 + spin_lock(&rzs->stat64_lock);
1909 + spin_unlock(&rzs->stat64_lock);
1912 +static void stat64_dec(struct ramzswap *rzs, u64 *v)
1914 + spin_lock(&rzs->stat64_lock);
1916 + spin_unlock(&rzs->stat64_lock);
1919 +static u64 stat64_read(struct ramzswap *rzs, u64 *v)
1923 + spin_lock(&rzs->stat64_lock);
1925 + spin_unlock(&rzs->stat64_lock);
1930 +#define stat_inc(v)
1931 +#define stat_dec(v)
1932 +#define stat64_inc(r, v)
1933 +#define stat64_dec(r, v)
1934 +#define stat64_read(r, v)
1935 +#endif /* CONFIG_RAMZSWAP_STATS */
1938 Index: kernel-power-2.6.28/drivers/block/ramzswap/ramzswap_ioctl.h
1939 ===================================================================
1941 +++ kernel-power-2.6.28/drivers/block/ramzswap/ramzswap_ioctl.h
1944 + * Compressed RAM based swap device
1946 + * Copyright (C) 2008, 2009, 2010 Nitin Gupta
1948 + * This code is released using a dual license strategy: BSD/GPL
1949 + * You can choose the licence that better fits your requirements.
1951 + * Released under the terms of 3-clause BSD License
1952 + * Released under the terms of GNU General Public License Version 2.0
1954 + * Project home: http://compcache.googlecode.com
1957 +#ifndef _RAMZSWAP_IOCTL_H_
1958 +#define _RAMZSWAP_IOCTL_H_
1960 +#define MAX_SWAP_NAME_LEN 128
1962 +struct ramzswap_ioctl_stats {
1963 + char backing_swap_name[MAX_SWAP_NAME_LEN];
1964 + u64 memlimit; /* only applicable if backing swap present */
1965 + u64 disksize; /* user specified or equal to backing swap
1966 + * size (if present) */
1967 + u64 num_reads; /* failed + successful */
1968 + u64 num_writes; /* --do-- */
1969 + u64 failed_reads; /* should NEVER! happen */
1970 + u64 failed_writes; /* can happen when memory is too low */
1971 + u64 invalid_io; /* non-swap I/O requests */
1972 + u64 notify_free; /* no. of swap slot free notifications */
1973 + u32 pages_zero; /* no. of zero filled pages */
1974 + u32 good_compress_pct; /* no. of pages with compression ratio<=50% */
1975 + u32 pages_expand_pct; /* no. of incompressible pages */
1978 + u64 orig_data_size;
1979 + u64 compr_data_size;
1980 + u64 mem_used_total;
1981 + u64 bdev_num_reads; /* no. of reads on backing dev */
1982 + u64 bdev_num_writes; /* no. of writes on backing dev */
1983 +} __attribute__ ((packed, aligned(4)));
1985 +#define RZSIO_SET_DISKSIZE_KB _IOW('z', 0, size_t)
1986 +#define RZSIO_SET_MEMLIMIT_KB _IOW('z', 1, size_t)
1987 +#define RZSIO_SET_BACKING_SWAP _IOW('z', 2, unsigned char[MAX_SWAP_NAME_LEN])
1988 +#define RZSIO_GET_STATS _IOR('z', 3, struct ramzswap_ioctl_stats)
1989 +#define RZSIO_INIT _IO('z', 4)
1990 +#define RZSIO_RESET _IO('z', 5)
1993 Index: kernel-power-2.6.28/drivers/block/ramzswap/xvmalloc.c
1994 ===================================================================
1996 +++ kernel-power-2.6.28/drivers/block/ramzswap/xvmalloc.c
1999 + * xvmalloc memory allocator
2001 + * Copyright (C) 2008, 2009, 2010 Nitin Gupta
2003 + * This code is released using a dual license strategy: BSD/GPL
2004 + * You can choose the licence that better fits your requirements.
2006 + * Released under the terms of 3-clause BSD License
2007 + * Released under the terms of GNU General Public License Version 2.0
2010 +#include <linux/bitops.h>
2011 +#include <linux/errno.h>
2012 +#include <linux/highmem.h>
2013 +#include <linux/init.h>
2014 +#include <linux/string.h>
2015 +#include <linux/slab.h>
2017 +#include "xvmalloc.h"
2018 +#include "xvmalloc_int.h"
2020 +static void stat_inc(u64 *value)
2022 + *value = *value + 1;
2025 +static void stat_dec(u64 *value)
2027 + *value = *value - 1;
2030 +static int test_flag(struct block_header *block, enum blockflags flag)
2032 + return block->prev & BIT(flag);
2035 +static void set_flag(struct block_header *block, enum blockflags flag)
2037 + block->prev |= BIT(flag);
2040 +static void clear_flag(struct block_header *block, enum blockflags flag)
2042 + block->prev &= ~BIT(flag);
2046 + * Given <page, offset> pair, provide a derefrencable pointer.
2047 + * This is called from xv_malloc/xv_free path, so it
2048 + * needs to be fast.
2050 +static void *get_ptr_atomic(struct page *page, u16 offset, enum km_type type)
2052 + unsigned char *base;
2054 + base = kmap_atomic(page, type);
2055 + return base + offset;
2058 +static void put_ptr_atomic(void *ptr, enum km_type type)
2060 + kunmap_atomic(ptr, type);
2063 +static u32 get_blockprev(struct block_header *block)
2065 + return block->prev & PREV_MASK;
2068 +static void set_blockprev(struct block_header *block, u16 new_offset)
2070 + block->prev = new_offset | (block->prev & FLAGS_MASK);
2073 +static struct block_header *BLOCK_NEXT(struct block_header *block)
2075 + return (struct block_header *)
2076 + ((char *)block + block->size + XV_ALIGN);
2080 + * Get index of free list containing blocks of maximum size
2081 + * which is less than or equal to given size.
2083 +static u32 get_index_for_insert(u32 size)
2085 + if (unlikely(size > XV_MAX_ALLOC_SIZE))
2086 + size = XV_MAX_ALLOC_SIZE;
2087 + size &= ~FL_DELTA_MASK;
2088 + return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
2092 + * Get index of free list having blocks of size greater than
2093 + * or equal to requested size.
2095 +static u32 get_index(u32 size)
2097 + if (unlikely(size < XV_MIN_ALLOC_SIZE))
2098 + size = XV_MIN_ALLOC_SIZE;
2099 + size = ALIGN(size, FL_DELTA);
2100 + return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
2104 + * find_block - find block of at least given size
2105 + * @pool: memory pool to search from
2106 + * @size: size of block required
2107 + * @page: page containing required block
2108 + * @offset: offset within the page where block is located.
2110 + * Searches two level bitmap to locate block of at least
2111 + * the given size. If such a block is found, it provides
2112 + * <page, offset> to identify this block and returns index
2113 + * in freelist where we found this block.
2114 + * Otherwise, returns 0 and <page, offset> params are not touched.
2116 +static u32 find_block(struct xv_pool *pool, u32 size,
2117 + struct page **page, u32 *offset)
2119 + ulong flbitmap, slbitmap;
2120 + u32 flindex, slindex, slbitstart;
2122 + /* There are no free blocks in this pool */
2123 + if (!pool->flbitmap)
2126 + /* Get freelist index correspoding to this size */
2127 + slindex = get_index(size);
2128 + slbitmap = pool->slbitmap[slindex / BITS_PER_LONG];
2129 + slbitstart = slindex % BITS_PER_LONG;
2132 + * If freelist is not empty at this index, we found the
2133 + * block - head of this list. This is approximate best-fit match.
2135 + if (test_bit(slbitstart, &slbitmap)) {
2136 + *page = pool->freelist[slindex].page;
2137 + *offset = pool->freelist[slindex].offset;
2142 + * No best-fit found. Search a bit further in bitmap for a free block.
2143 + * Second level bitmap consists of series of 32-bit chunks. Search
2144 + * further in the chunk where we expected a best-fit, starting from
2145 + * index location found above.
2148 + slbitmap >>= slbitstart;
2150 + /* Skip this search if we were already at end of this bitmap chunk */
2151 + if ((slbitstart != BITS_PER_LONG) && slbitmap) {
2152 + slindex += __ffs(slbitmap) + 1;
2153 + *page = pool->freelist[slindex].page;
2154 + *offset = pool->freelist[slindex].offset;
2158 + /* Now do a full two-level bitmap search to find next nearest fit */
2159 + flindex = slindex / BITS_PER_LONG;
2161 + flbitmap = (pool->flbitmap) >> (flindex + 1);
2165 + flindex += __ffs(flbitmap) + 1;
2166 + slbitmap = pool->slbitmap[flindex];
2167 + slindex = (flindex * BITS_PER_LONG) + __ffs(slbitmap);
2168 + *page = pool->freelist[slindex].page;
2169 + *offset = pool->freelist[slindex].offset;
2175 + * Insert block at <page, offset> in freelist of given pool.
2176 + * freelist used depends on block size.
2178 +static void insert_block(struct xv_pool *pool, struct page *page, u32 offset,
2179 + struct block_header *block)
2181 + u32 flindex, slindex;
2182 + struct block_header *nextblock;
2184 + slindex = get_index_for_insert(block->size);
2185 + flindex = slindex / BITS_PER_LONG;
2187 + block->link.prev_page = 0;
2188 + block->link.prev_offset = 0;
2189 + block->link.next_page = pool->freelist[slindex].page;
2190 + block->link.next_offset = pool->freelist[slindex].offset;
2191 + pool->freelist[slindex].page = page;
2192 + pool->freelist[slindex].offset = offset;
2194 + if (block->link.next_page) {
2195 + nextblock = get_ptr_atomic(block->link.next_page,
2196 + block->link.next_offset, KM_USER1);
2197 + nextblock->link.prev_page = page;
2198 + nextblock->link.prev_offset = offset;
2199 + put_ptr_atomic(nextblock, KM_USER1);
2202 + __set_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]);
2203 + __set_bit(flindex, &pool->flbitmap);
2207 + * Remove block from head of freelist. Index 'slindex' identifies the freelist.
2209 +static void remove_block_head(struct xv_pool *pool,
2210 + struct block_header *block, u32 slindex)
2212 + struct block_header *tmpblock;
2213 + u32 flindex = slindex / BITS_PER_LONG;
2215 + pool->freelist[slindex].page = block->link.next_page;
2216 + pool->freelist[slindex].offset = block->link.next_offset;
2217 + block->link.prev_page = 0;
2218 + block->link.prev_offset = 0;
2220 + if (!pool->freelist[slindex].page) {
2221 + __clear_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]);
2222 + if (!pool->slbitmap[flindex])
2223 + __clear_bit(flindex, &pool->flbitmap);
2226 + * DEBUG ONLY: We need not reinitialize freelist head previous
2227 + * pointer to 0 - we never depend on its value. But just for
2228 + * sanity, lets do it.
2230 + tmpblock = get_ptr_atomic(pool->freelist[slindex].page,
2231 + pool->freelist[slindex].offset, KM_USER1);
2232 + tmpblock->link.prev_page = 0;
2233 + tmpblock->link.prev_offset = 0;
2234 + put_ptr_atomic(tmpblock, KM_USER1);
2239 + * Remove block from freelist. Index 'slindex' identifies the freelist.
2241 +static void remove_block(struct xv_pool *pool, struct page *page, u32 offset,
2242 + struct block_header *block, u32 slindex)
2245 + struct block_header *tmpblock;
2247 + if (pool->freelist[slindex].page == page
2248 + && pool->freelist[slindex].offset == offset) {
2249 + remove_block_head(pool, block, slindex);
2253 + flindex = slindex / BITS_PER_LONG;
2255 + if (block->link.prev_page) {
2256 + tmpblock = get_ptr_atomic(block->link.prev_page,
2257 + block->link.prev_offset, KM_USER1);
2258 + tmpblock->link.next_page = block->link.next_page;
2259 + tmpblock->link.next_offset = block->link.next_offset;
2260 + put_ptr_atomic(tmpblock, KM_USER1);
2263 + if (block->link.next_page) {
2264 + tmpblock = get_ptr_atomic(block->link.next_page,
2265 + block->link.next_offset, KM_USER1);
2266 + tmpblock->link.prev_page = block->link.prev_page;
2267 + tmpblock->link.prev_offset = block->link.prev_offset;
2268 + put_ptr_atomic(tmpblock, KM_USER1);
2273 + * Allocate a page and add it to freelist of given pool.
2275 +static int grow_pool(struct xv_pool *pool, gfp_t flags)
2277 + struct page *page;
2278 + struct block_header *block;
2280 + page = alloc_page(flags);
2281 + if (unlikely(!page))
2284 + stat_inc(&pool->total_pages);
2286 + spin_lock(&pool->lock);
2287 + block = get_ptr_atomic(page, 0, KM_USER0);
2289 + block->size = PAGE_SIZE - XV_ALIGN;
2290 + set_flag(block, BLOCK_FREE);
2291 + clear_flag(block, PREV_FREE);
2292 + set_blockprev(block, 0);
2294 + insert_block(pool, page, 0, block);
2296 + put_ptr_atomic(block, KM_USER0);
2297 + spin_unlock(&pool->lock);
2303 + * Create a memory pool. Allocates freelist, bitmaps and other
2304 + * per-pool metadata.
2306 +struct xv_pool *xv_create_pool(void)
2309 + struct xv_pool *pool;
2311 + ovhd_size = roundup(sizeof(*pool), PAGE_SIZE);
2312 + pool = kzalloc(ovhd_size, GFP_KERNEL);
2316 + spin_lock_init(&pool->lock);
2321 +void xv_destroy_pool(struct xv_pool *pool)
2327 + * xv_malloc - Allocate block of given size from pool.
2328 + * @pool: pool to allocate from
2329 + * @size: size of block to allocate
2330 + * @page: page no. that holds the object
2331 + * @offset: location of object within page
2333 + * On success, <page, offset> identifies block allocated
2334 + * and 0 is returned. On failure, <page, offset> is set to
2335 + * 0 and -ENOMEM is returned.
2337 + * Allocation requests with size > XV_MAX_ALLOC_SIZE will fail.
2339 +int xv_malloc(struct xv_pool *pool, u32 size, struct page **page,
2340 + u32 *offset, gfp_t flags)
2343 + u32 index, tmpsize, origsize, tmpoffset;
2344 + struct block_header *block, *tmpblock;
2350 + if (unlikely(!size || size > XV_MAX_ALLOC_SIZE))
2353 + size = ALIGN(size, XV_ALIGN);
2355 + spin_lock(&pool->lock);
2357 + index = find_block(pool, size, page, offset);
2360 + spin_unlock(&pool->lock);
2361 + if (flags & GFP_NOWAIT)
2363 + error = grow_pool(pool, flags);
2364 + if (unlikely(error))
2367 + spin_lock(&pool->lock);
2368 + index = find_block(pool, size, page, offset);
2372 + spin_unlock(&pool->lock);
2376 + block = get_ptr_atomic(*page, *offset, KM_USER0);
2378 + remove_block_head(pool, block, index);
2380 + /* Split the block if required */
2381 + tmpoffset = *offset + size + XV_ALIGN;
2382 + tmpsize = block->size - size;
2383 + tmpblock = (struct block_header *)((char *)block + size + XV_ALIGN);
2385 + tmpblock->size = tmpsize - XV_ALIGN;
2386 + set_flag(tmpblock, BLOCK_FREE);
2387 + clear_flag(tmpblock, PREV_FREE);
2389 + set_blockprev(tmpblock, *offset);
2390 + if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
2391 + insert_block(pool, *page, tmpoffset, tmpblock);
2393 + if (tmpoffset + XV_ALIGN + tmpblock->size != PAGE_SIZE) {
2394 + tmpblock = BLOCK_NEXT(tmpblock);
2395 + set_blockprev(tmpblock, tmpoffset);
2398 + /* This block is exact fit */
2399 + if (tmpoffset != PAGE_SIZE)
2400 + clear_flag(tmpblock, PREV_FREE);
2403 + block->size = origsize;
2404 + clear_flag(block, BLOCK_FREE);
2406 + put_ptr_atomic(block, KM_USER0);
2407 + spin_unlock(&pool->lock);
2409 + *offset += XV_ALIGN;
2415 + * Free block identified with <page, offset>
2417 +void xv_free(struct xv_pool *pool, struct page *page, u32 offset)
2420 + struct block_header *block, *tmpblock;
2422 + offset -= XV_ALIGN;
2424 + spin_lock(&pool->lock);
2426 + page_start = get_ptr_atomic(page, 0, KM_USER0);
2427 + block = (struct block_header *)((char *)page_start + offset);
2429 + /* Catch double free bugs */
2430 + BUG_ON(test_flag(block, BLOCK_FREE));
2432 + block->size = ALIGN(block->size, XV_ALIGN);
2434 + tmpblock = BLOCK_NEXT(block);
2435 + if (offset + block->size + XV_ALIGN == PAGE_SIZE)
2438 + /* Merge next block if its free */
2439 + if (tmpblock && test_flag(tmpblock, BLOCK_FREE)) {
2441 + * Blocks smaller than XV_MIN_ALLOC_SIZE
2442 + * are not inserted in any free list.
2444 + if (tmpblock->size >= XV_MIN_ALLOC_SIZE) {
2445 + remove_block(pool, page,
2446 + offset + block->size + XV_ALIGN, tmpblock,
2447 + get_index_for_insert(tmpblock->size));
2449 + block->size += tmpblock->size + XV_ALIGN;
2452 + /* Merge previous block if its free */
2453 + if (test_flag(block, PREV_FREE)) {
2454 + tmpblock = (struct block_header *)((char *)(page_start) +
2455 + get_blockprev(block));
2456 + offset = offset - tmpblock->size - XV_ALIGN;
2458 + if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
2459 + remove_block(pool, page, offset, tmpblock,
2460 + get_index_for_insert(tmpblock->size));
2462 + tmpblock->size += block->size + XV_ALIGN;
2466 + /* No used objects in this page. Free it. */
2467 + if (block->size == PAGE_SIZE - XV_ALIGN) {
2468 + put_ptr_atomic(page_start, KM_USER0);
2469 + spin_unlock(&pool->lock);
2471 + __free_page(page);
2472 + stat_dec(&pool->total_pages);
2476 + set_flag(block, BLOCK_FREE);
2477 + if (block->size >= XV_MIN_ALLOC_SIZE)
2478 + insert_block(pool, page, offset, block);
2480 + if (offset + block->size + XV_ALIGN != PAGE_SIZE) {
2481 + tmpblock = BLOCK_NEXT(block);
2482 + set_flag(tmpblock, PREV_FREE);
2483 + set_blockprev(tmpblock, offset);
2486 + put_ptr_atomic(page_start, KM_USER0);
2487 + spin_unlock(&pool->lock);
2490 +u32 xv_get_object_size(void *obj)
2492 + struct block_header *blk;
2494 + blk = (struct block_header *)((char *)(obj) - XV_ALIGN);
2499 + * Returns total memory used by allocator (userdata + metadata)
2501 +u64 xv_get_total_size_bytes(struct xv_pool *pool)
2503 + return pool->total_pages << PAGE_SHIFT;
2505 Index: kernel-power-2.6.28/drivers/block/ramzswap/xvmalloc.h
2506 ===================================================================
2508 +++ kernel-power-2.6.28/drivers/block/ramzswap/xvmalloc.h
2511 + * xvmalloc memory allocator
2513 + * Copyright (C) 2008, 2009, 2010 Nitin Gupta
2515 + * This code is released using a dual license strategy: BSD/GPL
2516 + * You can choose the licence that better fits your requirements.
2518 + * Released under the terms of 3-clause BSD License
2519 + * Released under the terms of GNU General Public License Version 2.0
2522 +#ifndef _XV_MALLOC_H_
2523 +#define _XV_MALLOC_H_
2525 +#include <linux/types.h>
2529 +struct xv_pool *xv_create_pool(void);
2530 +void xv_destroy_pool(struct xv_pool *pool);
2532 +int xv_malloc(struct xv_pool *pool, u32 size, struct page **page,
2533 + u32 *offset, gfp_t flags);
2534 +void xv_free(struct xv_pool *pool, struct page *page, u32 offset);
2536 +u32 xv_get_object_size(void *obj);
2537 +u64 xv_get_total_size_bytes(struct xv_pool *pool);
2540 Index: kernel-power-2.6.28/drivers/block/ramzswap/xvmalloc_int.h
2541 ===================================================================
2543 +++ kernel-power-2.6.28/drivers/block/ramzswap/xvmalloc_int.h
2546 + * xvmalloc memory allocator
2548 + * Copyright (C) 2008, 2009, 2010 Nitin Gupta
2550 + * This code is released using a dual license strategy: BSD/GPL
2551 + * You can choose the licence that better fits your requirements.
2553 + * Released under the terms of 3-clause BSD License
2554 + * Released under the terms of GNU General Public License Version 2.0
2557 +#ifndef _XV_MALLOC_INT_H_
2558 +#define _XV_MALLOC_INT_H_
2560 +#include <linux/kernel.h>
2561 +#include <linux/types.h>
2563 +/* User configurable params */
2565 +/* Must be power of two */
2566 +#define XV_ALIGN_SHIFT 2
2567 +#define XV_ALIGN (1 << XV_ALIGN_SHIFT)
2568 +#define XV_ALIGN_MASK (XV_ALIGN - 1)
2570 +/* This must be greater than sizeof(link_free) */
2571 +#define XV_MIN_ALLOC_SIZE 32
2572 +#define XV_MAX_ALLOC_SIZE (PAGE_SIZE - XV_ALIGN)
2574 +/* Free lists are separated by FL_DELTA bytes */
2575 +#define FL_DELTA_SHIFT 3
2576 +#define FL_DELTA (1 << FL_DELTA_SHIFT)
2577 +#define FL_DELTA_MASK (FL_DELTA - 1)
2578 +#define NUM_FREE_LISTS ((XV_MAX_ALLOC_SIZE - XV_MIN_ALLOC_SIZE) \
2581 +#define MAX_FLI DIV_ROUND_UP(NUM_FREE_LISTS, BITS_PER_LONG)
2583 +/* End of user params */
2591 +#define FLAGS_MASK XV_ALIGN_MASK
2592 +#define PREV_MASK (~FLAGS_MASK)
2594 +struct freelist_entry {
2595 + struct page *page;
2601 + struct page *prev_page;
2602 + struct page *next_page;
2607 +struct block_header {
2609 + /* This common header must be XV_ALIGN bytes */
2610 + u8 common[XV_ALIGN];
2616 + struct link_free link;
2621 + ulong slbitmap[MAX_FLI];
2624 + struct freelist_entry freelist[NUM_FREE_LISTS];