vcs.maemo.org Git - kernel-bfs/blob - kernel-bfs-2.6.28/debian/patches/adding-ramzswap-driver.diff

   1 ---
   2  arch/arm/configs/rx51_defconfig         |    2
   3  drivers/block/Kconfig                   |    2
   4  drivers/block/Makefile                  |    1
   5  drivers/block/ramzswap/Kconfig          |   22
   6  drivers/block/ramzswap/Makefile         |    3
   7  drivers/block/ramzswap/compat.h         |   13
   8  drivers/block/ramzswap/ramzswap.txt     |   51 +
   9  drivers/block/ramzswap/ramzswap_drv.c   | 1557 ++++++++++++++++++++++++++++++++
  10  drivers/block/ramzswap/ramzswap_drv.h   |  210 ++++
  11  drivers/block/ramzswap/ramzswap_ioctl.h |   50 +
  12  drivers/block/ramzswap/xvmalloc.c       |  507 ++++++++++
  13  drivers/block/ramzswap/xvmalloc.h       |   30
  14  drivers/block/ramzswap/xvmalloc_int.h   |   86 +
  15  13 files changed, 2534 insertions(+)
  16
  17 Index: kernel-power-2.6.28/arch/arm/configs/rx51_defconfig
  18 ===================================================================
  19 --- kernel-power-2.6.28.orig/arch/arm/configs/rx51_defconfig
  20 +++ kernel-power-2.6.28/arch/arm/configs/rx51_defconfig
  21 @@ -830,6 +830,8 @@
  22  # CONFIG_BLK_DEV_XIP is not set
  23  # CONFIG_CDROM_PKTCDVD is not set
  24  # CONFIG_ATA_OVER_ETH is not set
  25 +CONFIG_RAMZSWAP=m
  26 +# CONFIG_RAMZSWAP_STATS is not set
  27  CONFIG_MISC_DEVICES=y
  28  # CONFIG_EEPROM_93CX6 is not set
  29  CONFIG_NOKIA_AV_DETECT=m
  30 Index: kernel-power-2.6.28/drivers/block/Kconfig
  31 ===================================================================
  32 --- kernel-power-2.6.28.orig/drivers/block/Kconfig
  33 +++ kernel-power-2.6.28/drivers/block/Kconfig
  34 @@ -446,4 +446,6 @@
  35
  36           If unsure, say N.
  37
  38 +source "drivers/block/ramzswap/Kconfig"
  39 +
  40  endif # BLK_DEV
  41 Index: kernel-power-2.6.28/drivers/block/Makefile
  42 ===================================================================
  43 --- kernel-power-2.6.28.orig/drivers/block/Makefile
  44 +++ kernel-power-2.6.28/drivers/block/Makefile
  45 @@ -30,5 +30,6 @@
  46  obj-$(CONFIG_BLK_DEV_SX8)      += sx8.o
  47  obj-$(CONFIG_BLK_DEV_UB)       += ub.o
  48  obj-$(CONFIG_BLK_DEV_HD)       += hd.o
  49 +obj-$(CONFIG_RAMZSWAP)         += ramzswap/
  50
  51  obj-$(CONFIG_XEN_BLKDEV_FRONTEND)      += xen-blkfront.o
  52 Index: kernel-power-2.6.28/drivers/block/ramzswap/Kconfig
  53 ===================================================================
  54 --- /dev/null
  55 +++ kernel-power-2.6.28/drivers/block/ramzswap/Kconfig
  56 @@ -0,0 +1,22 @@
  57 +
  58 +config RAMZSWAP
  59 +       tristate "Compressed in-memory swap device (ramzswap)"
  60 +       depends on SWAP
  61 +       select LZO_COMPRESS
  62 +       select LZO_DECOMPRESS
  63 +       default n
  64 +       help
  65 +         Creates virtual block devices which can (only) be used as swap
  66 +         disks. Pages swapped to these disks are compressed and stored in
  67 +         memory itself.
  68 +
  69 +         See ramzswap.txt for more information.
  70 +         Project home: http://compcache.googlecode.com/
  71 +
  72 +config RAMZSWAP_STATS
  73 +       bool "Enable ramzswap stats"
  74 +       depends on RAMZSWAP
  75 +       default y
  76 +       help
  77 +         Enable statistics collection for ramzswap. This adds only a minimal
  78 +         overhead. In unsure, say Y.
  79 Index: kernel-power-2.6.28/drivers/block/ramzswap/Makefile
  80 ===================================================================
  81 --- /dev/null
  82 +++ kernel-power-2.6.28/drivers/block/ramzswap/Makefile
  83 @@ -0,0 +1,3 @@
  84 +ramzswap-objs  :=      ramzswap_drv.o xvmalloc.o
  85 +
  86 +obj-$(CONFIG_RAMZSWAP) +=      ramzswap.o
  87 Index: kernel-power-2.6.28/drivers/block/ramzswap/compat.h
  88 ===================================================================
  89 --- /dev/null
  90 +++ kernel-power-2.6.28/drivers/block/ramzswap/compat.h
  91 @@ -0,0 +1,13 @@
  92 +#ifndef _RAMZSWAP_COMPAT_H_
  93 +#define _RAMZSWAP_COMPAT_H_
  94 +
  95 +/* Uncomment this if you are using swap free notify patch */
  96 +#define CONFIG_SWAP_FREE_NOTIFY
  97 +
  98 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31))
  99 +#define blk_queue_physical_block_size(q, size) \
 100 +       blk_queue_hardsect_size(q, size)
 101 +#define blk_queue_logical_block_size(q, size)
 102 +#endif
 103 +
 104 +#endif
 105 Index: kernel-power-2.6.28/drivers/block/ramzswap/ramzswap.txt
 106 ===================================================================
 107 --- /dev/null
 108 +++ kernel-power-2.6.28/drivers/block/ramzswap/ramzswap.txt
 109 @@ -0,0 +1,51 @@
 110 +ramzswap: Compressed RAM based swap device
 111 +-------------------------------------------
 112 +
 113 +Project home: http://compcache.googlecode.com/
 114 +
 115 +* Introduction
 116 +
 117 +The ramzswap module creates RAM based block devices which can (only) be used as
 118 +swap disks. Pages swapped to these devices are compressed and stored in memory
 119 +itself. See project home for use cases, performance numbers and a lot more.
 120 +
 121 +Individual ramzswap devices are configured and initialized using rzscontrol
 122 +userspace utility as shown in examples below. See rzscontrol man page for more
 123 +details.
 124 +
 125 +* Usage
 126 +
 127 +Following shows a typical sequence of steps for using ramzswap.
 128 +
 129 +1) Load Modules:
 130 +       modprobe ramzswap num_devices=4
 131 +       This creates 4 (uninitialized) devices: /dev/ramzswap{0,1,2,3}
 132 +       (num_devices parameter is optional. Default: 1)
 133 +
 134 +2) Initialize:
 135 +       Use rzscontrol utility to configure and initialize individual
 136 +       ramzswap devices. Example:
 137 +       rzscontrol /dev/ramzswap2 --init # uses default value of disksize_kb
 138 +
 139 +       *See rzscontrol man page for more details and examples*
 140 +
 141 +3) Activate:
 142 +       swapon /dev/ramzswap2 # or any other initialized ramzswap device
 143 +
 144 +4) Stats:
 145 +       rzscontrol /dev/ramzswap2 --stats
 146 +
 147 +5) Deactivate:
 148 +       swapoff /dev/ramzswap2
 149 +
 150 +6) Reset:
 151 +       rzscontrol /dev/ramzswap2 --reset
 152 +       (This frees all the memory allocated for this device).
 153 +
 154 +
 155 +Please report any problems at:
 156 + - Mailing list: linux-mm-cc at laptop dot org
 157 + - Issue tracker: http://code.google.com/p/compcache/issues/list
 158 +
 159 +Nitin Gupta
 160 +ngupta@vflare.org
 161 Index: kernel-power-2.6.28/drivers/block/ramzswap/ramzswap_drv.c
 162 ===================================================================
 163 --- /dev/null
 164 +++ kernel-power-2.6.28/drivers/block/ramzswap/ramzswap_drv.c
 165 @@ -0,0 +1,1557 @@
 166 +/*
 167 + * Compressed RAM based swap device
 168 + *
 169 + * Copyright (C) 2008, 2009, 2010  Nitin Gupta
 170 + *
 171 + * This code is released using a dual license strategy: BSD/GPL
 172 + * You can choose the licence that better fits your requirements.
 173 + *
 174 + * Released under the terms of 3-clause BSD License
 175 + * Released under the terms of GNU General Public License Version 2.0
 176 + *
 177 + * Project home: http://compcache.googlecode.com
 178 + */
 179 +
 180 +#define KMSG_COMPONENT "ramzswap"
 181 +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 182 +
 183 +#include <linux/module.h>
 184 +#include <linux/kernel.h>
 185 +#include <linux/bitops.h>
 186 +#include <linux/blkdev.h>
 187 +#include <linux/buffer_head.h>
 188 +#include <linux/device.h>
 189 +#include <linux/genhd.h>
 190 +#include <linux/highmem.h>
 191 +#include <linux/lzo.h>
 192 +#include <linux/string.h>
 193 +#include <linux/swap.h>
 194 +#include <linux/swapops.h>
 195 +#include <linux/vmalloc.h>
 196 +#include <linux/version.h>
 197 +
 198 +#include "compat.h"
 199 +#include "ramzswap_drv.h"
 200 +
 201 +/* Module params (documentation at end) */
 202 +static unsigned int num_devices;
 203 +static unsigned long disksize_kb;
 204 +static unsigned long memlimit_kb;
 205 +static char backing_swap[MAX_SWAP_NAME_LEN];
 206 +
 207 +/* Globals */
 208 +static int ramzswap_major;
 209 +static struct ramzswap *devices;
 210 +
 211 +/*
 212 + * Pages that compress to larger than this size are
 213 + * forwarded to backing swap, if present or stored
 214 + * uncompressed in memory otherwise.
 215 + */
 216 +static unsigned int max_zpage_size;
 217 +
 218 +static int rzs_test_flag(struct ramzswap *rzs, u32 index,
 219 +                       enum rzs_pageflags flag)
 220 +{
 221 +       return rzs->table[index].flags & BIT(flag);
 222 +}
 223 +
 224 +static void rzs_set_flag(struct ramzswap *rzs, u32 index,
 225 +                       enum rzs_pageflags flag)
 226 +{
 227 +       rzs->table[index].flags |= BIT(flag);
 228 +}
 229 +
 230 +static void rzs_clear_flag(struct ramzswap *rzs, u32 index,
 231 +                       enum rzs_pageflags flag)
 232 +{
 233 +       rzs->table[index].flags &= ~BIT(flag);
 234 +}
 235 +
 236 +static int page_zero_filled(void *ptr)
 237 +{
 238 +       unsigned int pos;
 239 +       unsigned long *page;
 240 +
 241 +       page = (unsigned long *)ptr;
 242 +
 243 +       for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
 244 +               if (page[pos])
 245 +                       return 0;
 246 +       }
 247 +
 248 +       return 1;
 249 +}
 250 +
 251 +/*
 252 + * memlimit cannot be greater than backing disk size.
 253 + */
 254 +static void ramzswap_set_memlimit(struct ramzswap *rzs, size_t totalram_bytes)
 255 +{
 256 +       int memlimit_valid = 1;
 257 +
 258 +       if (!rzs->memlimit) {
 259 +               pr_info("Memory limit not set.\n");
 260 +               memlimit_valid = 0;
 261 +       }
 262 +
 263 +       if (rzs->memlimit > rzs->disksize) {
 264 +               pr_info("Memory limit cannot be greater than "
 265 +                       "disksize: limit=%zu, disksize=%zu\n",
 266 +                       rzs->memlimit, rzs->disksize);
 267 +               memlimit_valid = 0;
 268 +       }
 269 +
 270 +       if (!memlimit_valid) {
 271 +               size_t mempart, disksize;
 272 +               pr_info("Using default: smaller of (%u%% of RAM) and "
 273 +                       "(backing disk size).\n",
 274 +                       default_memlimit_perc_ram);
 275 +               mempart = default_memlimit_perc_ram * (totalram_bytes / 100);
 276 +               disksize = rzs->disksize;
 277 +               rzs->memlimit = mempart > disksize ? disksize : mempart;
 278 +       }
 279 +
 280 +       if (rzs->memlimit > totalram_bytes / 2) {
 281 +               pr_info(
 282 +               "Its not advisable setting limit more than half of "
 283 +               "size of memory since we expect a 2:1 compression ratio. "
 284 +               "Limit represents amount of *compressed* data we can keep "
 285 +               "in memory!\n"
 286 +               "\tMemory Size: %zu kB\n"
 287 +               "\tLimit you selected: %zu kB\n"
 288 +               "Continuing anyway ...\n",
 289 +               totalram_bytes >> 10, rzs->memlimit >> 10
 290 +               );
 291 +       }
 292 +
 293 +       rzs->memlimit &= PAGE_MASK;
 294 +       BUG_ON(!rzs->memlimit);
 295 +}
 296 +
 297 +static void ramzswap_set_disksize(struct ramzswap *rzs, size_t totalram_bytes)
 298 +{
 299 +       if (!rzs->disksize) {
 300 +               pr_info(
 301 +               "disk size not provided. You can use disksize_kb module "
 302 +               "param to specify size.\nUsing default: (%u%% of RAM).\n",
 303 +               default_disksize_perc_ram
 304 +               );
 305 +               rzs->disksize = default_disksize_perc_ram *
 306 +                                       (totalram_bytes / 100);
 307 +       }
 308 +
 309 +       if (rzs->disksize > 2 * (totalram_bytes)) {
 310 +               pr_info(
 311 +               "There is little point creating a ramzswap of greater than "
 312 +               "twice the size of memory since we expect a 2:1 compression "
 313 +               "ratio. Note that ramzswap uses about 0.1%% of the size of "
 314 +               "the swap device when not in use so a huge ramzswap is "
 315 +               "wasteful.\n"
 316 +               "\tMemory Size: %zu kB\n"
 317 +               "\tSize you selected: %zu kB\n"
 318 +               "Continuing anyway ...\n",
 319 +               totalram_bytes >> 10, rzs->disksize
 320 +               );
 321 +       }
 322 +
 323 +       rzs->disksize &= PAGE_MASK;
 324 +}
 325 +
 326 +/*
 327 + * Swap header (1st page of swap device) contains information
 328 + * to indentify it as a swap partition. Prepare such a header
 329 + * for ramzswap device (ramzswap0) so that swapon can identify
 330 + * it as swap partition. In case backing swap device is provided,
 331 + * copy its swap header.
 332 + */
 333 +static int setup_swap_header(struct ramzswap *rzs, union swap_header *s)
 334 +{
 335 +       int ret = 0;
 336 +       struct page *page;
 337 +       struct address_space *mapping;
 338 +       union swap_header *backing_swap_header;
 339 +
 340 +       /*
 341 +        * There is no backing swap device. Create a swap header
 342 +        * that is acceptable by swapon.
 343 +        */
 344 +       if (!rzs->backing_swap) {
 345 +               s->info.version = 1;
 346 +               s->info.last_page = (rzs->disksize >> PAGE_SHIFT) - 1;
 347 +               s->info.nr_badpages = 0;
 348 +               memcpy(s->magic.magic, "SWAPSPACE2", 10);
 349 +               return 0;
 350 +       }
 351 +
 352 +       /*
 353 +        * We have a backing swap device. Copy its swap header
 354 +        * to ramzswap device header. If this header contains
 355 +        * invalid information (backing device not a swap
 356 +        * partition, etc.), swapon will fail for ramzswap
 357 +        * which is correct behavior - we don't want to swap
 358 +        * over filesystem partition!
 359 +        */
 360 +
 361 +       /* Read the backing swap header (code from sys_swapon) */
 362 +       mapping = rzs->swap_file->f_mapping;
 363 +       if (!mapping->a_ops->readpage) {
 364 +               ret = -EINVAL;
 365 +               goto out;
 366 +       }
 367 +
 368 +       page = read_mapping_page(mapping, 0, rzs->swap_file);
 369 +       if (IS_ERR(page)) {
 370 +               ret = PTR_ERR(page);
 371 +               goto out;
 372 +       }
 373 +
 374 +       backing_swap_header = kmap(page);
 375 +       memcpy(s, backing_swap_header, sizeof(*s));
 376 +       if (s->info.nr_badpages) {
 377 +               pr_info("Cannot use backing swap with bad pages (%u)\n",
 378 +                       s->info.nr_badpages);
 379 +               ret = -EINVAL;
 380 +       }
 381 +       /*
 382 +        * ramzswap disksize equals number of usable pages in backing
 383 +        * swap. Set last_page in swap header to match this disksize
 384 +        * ('last_page' means 0-based index of last usable swap page).
 385 +        */
 386 +       s->info.last_page = (rzs->disksize >> PAGE_SHIFT) - 1;
 387 +       kunmap(page);
 388 +
 389 +out:
 390 +       return ret;
 391 +}
 392 +
 393 +/*static void flush_dcache_page(struct page *page)
 394 +{
 395 +#if defined(CONFIG_ARM)
 396 +       int flag = 0;*/
 397 +       /*
 398 +        * Ugly hack to get flush_dcache_page() work on ARM.
 399 +        * page_mapping(page) == NULL after clearing this swap cache flag.
 400 +        * Without clearing this flag, flush_dcache_page() will simply set
 401 +        * "PG_dcache_dirty" bit and return.
 402 +        */
 403 +       /*if (PageSwapCache(page)) {
 404 +               flag = 1;
 405 +               ClearPageSwapCache(page);
 406 +       }
 407 +#endif
 408 +       flush_dcache_page(page);
 409 +#if defined(CONFIG_ARM)
 410 +       if (flag)
 411 +               SetPageSwapCache(page);
 412 +#endif
 413 +}*/
 414 +
 415 +static void ramzswap_ioctl_get_stats(struct ramzswap *rzs,
 416 +                       struct ramzswap_ioctl_stats *s)
 417 +{
 418 +       strncpy(s->backing_swap_name, rzs->backing_swap_name,
 419 +               MAX_SWAP_NAME_LEN - 1);
 420 +       s->backing_swap_name[MAX_SWAP_NAME_LEN - 1] = '\0';
 421 +
 422 +       s->disksize = rzs->disksize;
 423 +       s->memlimit = rzs->memlimit;
 424 +
 425 +#if defined(CONFIG_RAMZSWAP_STATS)
 426 +       {
 427 +       struct ramzswap_stats *rs = &rzs->stats;
 428 +       size_t succ_writes, mem_used;
 429 +       unsigned int good_compress_perc = 0, no_compress_perc = 0;
 430 +
 431 +       mem_used = xv_get_total_size_bytes(rzs->mem_pool)
 432 +                       + (rs->pages_expand << PAGE_SHIFT);
 433 +       succ_writes = stat64_read(rzs, &rs->num_writes) -
 434 +                       stat64_read(rzs, &rs->failed_writes);
 435 +
 436 +       if (succ_writes && rs->pages_stored) {
 437 +               good_compress_perc = rs->good_compress * 100
 438 +                                       / rs->pages_stored;
 439 +               no_compress_perc = rs->pages_expand * 100
 440 +                                       / rs->pages_stored;
 441 +       }
 442 +
 443 +       s->num_reads = stat64_read(rzs, &rs->num_reads);
 444 +       s->num_writes = stat64_read(rzs, &rs->num_writes);
 445 +       s->failed_reads = stat64_read(rzs, &rs->failed_reads);
 446 +       s->failed_writes = stat64_read(rzs, &rs->failed_writes);
 447 +       s->invalid_io = stat64_read(rzs, &rs->invalid_io);
 448 +       s->notify_free = stat64_read(rzs, &rs->notify_free);
 449 +       s->pages_zero = rs->pages_zero;
 450 +
 451 +       s->good_compress_pct = good_compress_perc;
 452 +       s->pages_expand_pct = no_compress_perc;
 453 +
 454 +       s->pages_stored = rs->pages_stored;
 455 +       s->pages_used = mem_used >> PAGE_SHIFT;
 456 +       s->orig_data_size = rs->pages_stored << PAGE_SHIFT;
 457 +       s->compr_data_size = rs->compr_size;
 458 +       s->mem_used_total = mem_used;
 459 +
 460 +       s->bdev_num_reads = stat64_read(rzs, &rs->bdev_num_reads);
 461 +       s->bdev_num_writes = stat64_read(rzs, &rs->bdev_num_writes);
 462 +       }
 463 +#endif /* CONFIG_RAMZSWAP_STATS */
 464 +}
 465 +
 466 +static int add_backing_swap_extent(struct ramzswap *rzs,
 467 +                               pgoff_t phy_pagenum,
 468 +                               pgoff_t num_pages)
 469 +{
 470 +       unsigned int idx;
 471 +       struct list_head *head;
 472 +       struct page *curr_page, *new_page;
 473 +       unsigned int extents_per_page = PAGE_SIZE /
 474 +                               sizeof(struct ramzswap_backing_extent);
 475 +
 476 +       idx = rzs->num_extents % extents_per_page;
 477 +       if (!idx) {
 478 +               new_page = alloc_page(__GFP_ZERO);
 479 +               if (!new_page)
 480 +                       return -ENOMEM;
 481 +
 482 +               if (rzs->num_extents) {
 483 +                       curr_page = virt_to_page(rzs->curr_extent);
 484 +                       head = &curr_page->lru;
 485 +               } else {
 486 +                       head = &rzs->backing_swap_extent_list;
 487 +               }
 488 +
 489 +               list_add(&new_page->lru, head);
 490 +               rzs->curr_extent = page_address(new_page);
 491 +       }
 492 +
 493 +       rzs->curr_extent->phy_pagenum = phy_pagenum;
 494 +       rzs->curr_extent->num_pages = num_pages;
 495 +
 496 +       pr_debug("add_extent: idx=%u, phy_pgnum=%lu, num_pgs=%lu, "
 497 +               "pg_last=%lu, curr_ext=%p\n", idx, phy_pagenum, num_pages,
 498 +               phy_pagenum + num_pages - 1, rzs->curr_extent);
 499 +
 500 +       if (idx != extents_per_page - 1)
 501 +               rzs->curr_extent++;
 502 +
 503 +       return 0;
 504 +}
 505 +
 506 +static int setup_backing_swap_extents(struct ramzswap *rzs,
 507 +                               struct inode *inode, unsigned long *num_pages)
 508 +{
 509 +       int ret = 0;
 510 +       unsigned blkbits;
 511 +       unsigned blocks_per_page;
 512 +       pgoff_t contig_pages = 0, total_pages = 0;
 513 +       pgoff_t pagenum = 0, prev_pagenum = 0;
 514 +       sector_t probe_block = 0;
 515 +       sector_t last_block;
 516 +
 517 +       blkbits = inode->i_blkbits;
 518 +       blocks_per_page = PAGE_SIZE >> blkbits;
 519 +
 520 +       last_block = i_size_read(inode) >> blkbits;
 521 +       while (probe_block + blocks_per_page <= last_block) {
 522 +               unsigned block_in_page;
 523 +               sector_t first_block;
 524 +
 525 +               first_block = bmap(inode, probe_block);
 526 +               if (first_block == 0)
 527 +                       goto bad_bmap;
 528 +
 529 +               /* It must be PAGE_SIZE aligned on-disk */
 530 +               if (first_block & (blocks_per_page - 1)) {
 531 +                       probe_block++;
 532 +                       goto probe_next;
 533 +               }
 534 +
 535 +               /* All blocks within this page must be contiguous on disk */
 536 +               for (block_in_page = 1; block_in_page < blocks_per_page;
 537 +                                       block_in_page++) {
 538 +                       sector_t block;
 539 +
 540 +                       block = bmap(inode, probe_block + block_in_page);
 541 +                       if (block == 0)
 542 +                               goto bad_bmap;
 543 +                       if (block != first_block + block_in_page) {
 544 +                               /* Discontiguity */
 545 +                               probe_block++;
 546 +                               goto probe_next;
 547 +                       }
 548 +               }
 549 +
 550 +               /*
 551 +                * We found a PAGE_SIZE length, PAGE_SIZE aligned
 552 +                * run of blocks.
 553 +                */
 554 +               pagenum = first_block >> (PAGE_SHIFT - blkbits);
 555 +
 556 +               if (total_pages && (pagenum != prev_pagenum + 1)) {
 557 +                       ret = add_backing_swap_extent(rzs, prev_pagenum -
 558 +                                       (contig_pages - 1), contig_pages);
 559 +                       if (ret < 0)
 560 +                               goto out;
 561 +                       rzs->num_extents++;
 562 +                       contig_pages = 0;
 563 +               }
 564 +               total_pages++;
 565 +               contig_pages++;
 566 +               prev_pagenum = pagenum;
 567 +               probe_block += blocks_per_page;
 568 +
 569 +probe_next:
 570 +               continue;
 571 +       }
 572 +
 573 +       if (contig_pages) {
 574 +               pr_debug("adding last extent: pagenum=%lu, "
 575 +                       "contig_pages=%lu\n", pagenum, contig_pages);
 576 +               ret = add_backing_swap_extent(rzs,
 577 +                       prev_pagenum - (contig_pages - 1), contig_pages);
 578 +               if (ret < 0)
 579 +                       goto out;
 580 +               rzs->num_extents++;
 581 +       }
 582 +       if (!rzs->num_extents) {
 583 +               pr_err("No swap extents found!\n");
 584 +               ret = -EINVAL;
 585 +       }
 586 +
 587 +       if (!ret) {
 588 +               *num_pages = total_pages;
 589 +               pr_info("Found %lu extents containing %luk\n",
 590 +                       rzs->num_extents, *num_pages << (PAGE_SHIFT - 10));
 591 +       }
 592 +       goto out;
 593 +
 594 +bad_bmap:
 595 +       pr_err("Backing swapfile has holes\n");
 596 +       ret = -EINVAL;
 597 +
 598 +out:
 599 +       while (ret && !list_empty(&rzs->backing_swap_extent_list)) {
 600 +               struct page *page;
 601 +               struct list_head *entry = rzs->backing_swap_extent_list.next;
 602 +               page = list_entry(entry, struct page, lru);
 603 +               list_del(entry);
 604 +               __free_page(page);
 605 +       }
 606 +       return ret;
 607 +}
 608 +
 609 +static void map_backing_swap_extents(struct ramzswap *rzs)
 610 +{
 611 +       struct ramzswap_backing_extent *se;
 612 +       struct page *table_page, *se_page;
 613 +       unsigned long num_pages, num_table_pages, entry;
 614 +       unsigned long se_idx, span;
 615 +       unsigned entries_per_page = PAGE_SIZE / sizeof(*rzs->table);
 616 +       unsigned extents_per_page = PAGE_SIZE / sizeof(*se);
 617 +
 618 +       /* True for block device */
 619 +       if (!rzs->num_extents)
 620 +               return;
 621 +
 622 +       se_page = list_entry(rzs->backing_swap_extent_list.next,
 623 +                                       struct page, lru);
 624 +       se = page_address(se_page);
 625 +       span = se->num_pages;
 626 +       num_pages = rzs->disksize >> PAGE_SHIFT;
 627 +       num_table_pages = DIV_ROUND_UP(num_pages * sizeof(*rzs->table),
 628 +                                                       PAGE_SIZE);
 629 +
 630 +       entry = 0;
 631 +       se_idx = 0;
 632 +       while (num_table_pages--) {
 633 +               table_page = vmalloc_to_page(&rzs->table[entry]);
 634 +               while (span <= entry) {
 635 +                       se_idx++;
 636 +                       if (se_idx == rzs->num_extents)
 637 +                               BUG();
 638 +
 639 +                       if (!(se_idx % extents_per_page)) {
 640 +                               se_page = list_entry(se_page->lru.next,
 641 +                                               struct page, lru);
 642 +                               se = page_address(se_page);
 643 +                       } else
 644 +                               se++;
 645 +
 646 +                       span += se->num_pages;
 647 +               }
 648 +               table_page->mapping = (struct address_space *)se;
 649 +               table_page->private = se->num_pages - (span - entry);
 650 +               pr_debug("map_table: entry=%lu, span=%lu, map=%p, priv=%lu\n",
 651 +                       entry, span, table_page->mapping, table_page->private);
 652 +               entry += entries_per_page;
 653 +       }
 654 +}
 655 +
 656 +/*
 657 + * Check if value of backing_swap module param is sane.
 658 + * Claim this device and set ramzswap size equal to
 659 + * size of this block device.
 660 + */
 661 +static int setup_backing_swap(struct ramzswap *rzs)
 662 +{
 663 +       int ret = 0;
 664 +       size_t disksize;
 665 +       unsigned long num_pages = 0;
 666 +       struct inode *inode;
 667 +       struct file *swap_file;
 668 +       struct address_space *mapping;
 669 +       struct block_device *bdev = NULL;
 670 +
 671 +       if (!rzs->backing_swap_name[0]) {
 672 +               pr_debug("backing_swap param not given\n");
 673 +               goto out;
 674 +       }
 675 +
 676 +       pr_debug("Using backing swap device: %s\n", rzs->backing_swap_name);
 677 +
 678 +       swap_file = filp_open(rzs->backing_swap_name,
 679 +                               O_RDWR | O_LARGEFILE, 0);
 680 +       if (IS_ERR(swap_file)) {
 681 +               pr_err("Error opening backing device: %s\n",
 682 +                       rzs->backing_swap_name);
 683 +               ret = -EINVAL;
 684 +               goto out;
 685 +       }
 686 +
 687 +       mapping = swap_file->f_mapping;
 688 +       inode = mapping->host;
 689 +
 690 +       if (S_ISBLK(inode->i_mode)) {
 691 +               bdev = I_BDEV(inode);
 692 +               ret = bd_claim(bdev, setup_backing_swap);
 693 +               if (ret < 0) {
 694 +                       bdev = NULL;
 695 +                       goto bad_param;
 696 +               }
 697 +               disksize = i_size_read(inode);
 698 +               if (!disksize) {
 699 +                       pr_err("Error reading backing swap size.\n");
 700 +                       goto bad_param;
 701 +               }
 702 +       } else if (S_ISREG(inode->i_mode)) {
 703 +               bdev = inode->i_sb->s_bdev;
 704 +               if (IS_SWAPFILE(inode)) {
 705 +                       ret = -EBUSY;
 706 +                       goto bad_param;
 707 +               }
 708 +               ret = setup_backing_swap_extents(rzs, inode, &num_pages);
 709 +               if (ret < 0)
 710 +                       goto bad_param;
 711 +               disksize = num_pages << PAGE_SHIFT;
 712 +       } else {
 713 +               goto bad_param;
 714 +       }
 715 +
 716 +       rzs->swap_file = swap_file;
 717 +       rzs->backing_swap = bdev;
 718 +       rzs->disksize = disksize;
 719 +
 720 +       return 0;
 721 +
 722 +bad_param:
 723 +       if (bdev)
 724 +               bd_release(bdev);
 725 +       filp_close(swap_file, NULL);
 726 +
 727 +out:
 728 +       rzs->backing_swap = NULL;
 729 +       return ret;
 730 +}
 731 +
 732 +/*
 733 + * Map logical page number 'pagenum' to physical page number
 734 + * on backing swap device. For block device, this is a nop.
 735 + */
 736 +static u32 map_backing_swap_page(struct ramzswap *rzs, u32 pagenum)
 737 +{
 738 +       u32 skip_pages, entries_per_page;
 739 +       size_t delta, se_offset, skipped;
 740 +       struct page *table_page, *se_page;
 741 +       struct ramzswap_backing_extent *se;
 742 +
 743 +       if (!rzs->num_extents)
 744 +               return pagenum;
 745 +
 746 +       entries_per_page = PAGE_SIZE / sizeof(*rzs->table);
 747 +
 748 +       table_page = vmalloc_to_page(&rzs->table[pagenum]);
 749 +       se = (struct ramzswap_backing_extent *)table_page->mapping;
 750 +       se_page = virt_to_page(se);
 751 +
 752 +       skip_pages = pagenum - (pagenum / entries_per_page * entries_per_page);
 753 +       se_offset = table_page->private + skip_pages;
 754 +
 755 +       if (se_offset < se->num_pages)
 756 +               return se->phy_pagenum + se_offset;
 757 +
 758 +       skipped = se->num_pages - table_page->private;
 759 +       do {
 760 +               struct ramzswap_backing_extent *se_base;
 761 +               u32 se_entries_per_page = PAGE_SIZE / sizeof(*se);
 762 +
 763 +               /* Get next swap extent */
 764 +               se_base = (struct ramzswap_backing_extent *)
 765 +                                               page_address(se_page);
 766 +               if (se - se_base == se_entries_per_page - 1) {
 767 +                       se_page = list_entry(se_page->lru.next,
 768 +                                               struct page, lru);
 769 +                       se = page_address(se_page);
 770 +               } else {
 771 +                       se++;
 772 +               }
 773 +
 774 +               skipped += se->num_pages;
 775 +       } while (skipped < skip_pages);
 776 +
 777 +       delta = skipped - skip_pages;
 778 +       se_offset = se->num_pages - delta;
 779 +
 780 +       return se->phy_pagenum + se_offset;
 781 +}
 782 +
 783 +static void ramzswap_free_page(struct ramzswap *rzs, size_t index)
 784 +{
 785 +#if defined(CONFIG_RAMZSWAP_STATS)
 786 +       u32 clen;
 787 +       void *obj;
 788 +#endif
 789 +       struct page *page = rzs->table[index].page;
 790 +       u32 offset = rzs->table[index].offset;
 791 +
 792 +       if (unlikely(!page)) {
 793 +               /*
 794 +                * No memory is allocated for zero filled pages.
 795 +                * Simply clear zero page flag.
 796 +                */
 797 +               if (rzs_test_flag(rzs, index, RZS_ZERO)) {
 798 +                       rzs_clear_flag(rzs, index, RZS_ZERO);
 799 +                       stat_dec(&rzs->stats.pages_zero);
 800 +               }
 801 +               return;
 802 +       }
 803 +
 804 +       if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) {
 805 +#if defined(CONFIG_RAMZSWAP_STATS)
 806 +               clen = PAGE_SIZE;
 807 +#endif
 808 +               stat_dec(&rzs->stats.pages_expand);
 809 +               __free_page(page);
 810 +               rzs_clear_flag(rzs, index, RZS_UNCOMPRESSED);
 811 +               goto out;
 812 +       }
 813 +
 814 +#if defined(CONFIG_RAMZSWAP_STATS)
 815 +       obj = kmap_atomic(page, KM_USER0) + offset;
 816 +       clen = xv_get_object_size(obj) - sizeof(struct zobj_header);
 817 +       kunmap_atomic(obj, KM_USER0);
 818 +       if (clen <= PAGE_SIZE / 2)
 819 +               stat_dec(&rzs->stats.good_compress);
 820 +#endif
 821 +       xv_free(rzs->mem_pool, page, offset);
 822 +
 823 +out:
 824 +#if defined(CONFIG_RAMZSWAP_STATS)
 825 +       rzs->stats.compr_size -= clen;
 826 +       stat_dec(&rzs->stats.pages_stored);
 827 +#endif
 828 +
 829 +       rzs->table[index].page = NULL;
 830 +       rzs->table[index].offset = 0;
 831 +}
 832 +
 833 +static int handle_zero_page(struct bio *bio)
 834 +{
 835 +       void *user_mem;
 836 +       struct page *page = bio->bi_io_vec[0].bv_page;
 837 +
 838 +       user_mem = kmap_atomic(page, KM_USER0);
 839 +       memset(user_mem, 0, PAGE_SIZE);
 840 +       kunmap_atomic(user_mem, KM_USER0);
 841 +
 842 +       flush_dcache_page(page);
 843 +
 844 +       set_bit(BIO_UPTODATE, &bio->bi_flags);
 845 +       bio_endio(bio, 0);
 846 +       return 0;
 847 +}
 848 +
 849 +static int handle_uncompressed_page(struct ramzswap *rzs, struct bio *bio)
 850 +{
 851 +       u32 index;
 852 +       struct page *page;
 853 +       unsigned char *user_mem, *cmem;
 854 +
 855 +       page = bio->bi_io_vec[0].bv_page;
 856 +       index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
 857 +
 858 +       user_mem = kmap_atomic(page, KM_USER0);
 859 +       cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
 860 +                       rzs->table[index].offset;
 861 +
 862 +       memcpy(user_mem, cmem, PAGE_SIZE);
 863 +       kunmap_atomic(user_mem, KM_USER0);
 864 +       kunmap_atomic(cmem, KM_USER1);
 865 +
 866 +       flush_dcache_page(page);
 867 +
 868 +       set_bit(BIO_UPTODATE, &bio->bi_flags);
 869 +       bio_endio(bio, 0);
 870 +       return 0;
 871 +}
 872 +
 873 +
 874 +/*
 875 + * Called when request page is not present in ramzswap.
 876 + * Its either in backing swap device (if present) or
 877 + * this is an attempt to read before any previous write
 878 + * to this location - this happens due to readahead when
 879 + * swap device is read from user-space (e.g. during swapon)
 880 + */
 881 +static int handle_ramzswap_fault(struct ramzswap *rzs, struct bio *bio)
 882 +{
 883 +       /*
 884 +        * Always forward such requests to backing swap
 885 +        * device (if present)
 886 +        */
 887 +       if (rzs->backing_swap) {
 888 +               u32 pagenum;
 889 +               stat64_dec(rzs, &rzs->stats.num_reads);
 890 +               stat64_inc(rzs, &rzs->stats.bdev_num_reads);
 891 +               bio->bi_bdev = rzs->backing_swap;
 892 +
 893 +               /*
 894 +                * In case backing swap is a file, find the right offset within
 895 +                * the file corresponding to logical position 'index'. For block
 896 +                * device, this is a nop.
 897 +                */
 898 +               pagenum = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
 899 +               bio->bi_sector = map_backing_swap_page(rzs, pagenum)
 900 +                                       << SECTORS_PER_PAGE_SHIFT;
 901 +               return 1;
 902 +       }
 903 +
 904 +       /*
 905 +        * Its unlikely event in case backing dev is
 906 +        * not present
 907 +        */
 908 +       pr_debug("Read before write on swap device: "
 909 +               "sector=%lu, size=%u, offset=%u\n",
 910 +               (ulong)(bio->bi_sector), bio->bi_size,
 911 +               bio->bi_io_vec[0].bv_offset);
 912 +
 913 +       /* Do nothing. Just return success */
 914 +       set_bit(BIO_UPTODATE, &bio->bi_flags);
 915 +       bio_endio(bio, 0);
 916 +       return 0;
 917 +}
 918 +
 919 +static int ramzswap_read(struct ramzswap *rzs, struct bio *bio)
 920 +{
 921 +       int ret;
 922 +       u32 index;
 923 +       size_t clen;
 924 +       struct page *page;
 925 +       struct zobj_header *zheader;
 926 +       unsigned char *user_mem, *cmem;
 927 +
 928 +       stat64_inc(rzs, &rzs->stats.num_reads);
 929 +
 930 +       page = bio->bi_io_vec[0].bv_page;
 931 +       index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
 932 +
 933 +       if (rzs_test_flag(rzs, index, RZS_ZERO))
 934 +               return handle_zero_page(bio);
 935 +
 936 +       /* Requested page is not present in compressed area */
 937 +       if (!rzs->table[index].page)
 938 +               return handle_ramzswap_fault(rzs, bio);
 939 +
 940 +       /* Page is stored uncompressed since it's incompressible */
 941 +       if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
 942 +               return handle_uncompressed_page(rzs, bio);
 943 +
 944 +       user_mem = kmap_atomic(page, KM_USER0);
 945 +       clen = PAGE_SIZE;
 946 +
 947 +       cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
 948 +                       rzs->table[index].offset;
 949 +
 950 +       ret = lzo1x_decompress_safe(
 951 +               cmem + sizeof(*zheader),
 952 +               xv_get_object_size(cmem) - sizeof(*zheader),
 953 +               user_mem, &clen);
 954 +
 955 +       kunmap_atomic(user_mem, KM_USER0);
 956 +       kunmap_atomic(cmem, KM_USER1);
 957 +
 958 +       /* should NEVER happen */
 959 +       if (unlikely(ret != LZO_E_OK)) {
 960 +               pr_err("Decompression failed! err=%d, page=%u\n",
 961 +                       ret, index);
 962 +               stat64_inc(rzs, &rzs->stats.failed_reads);
 963 +               goto out;
 964 +       }
 965 +
 966 +       flush_dcache_page(page);
 967 +
 968 +       set_bit(BIO_UPTODATE, &bio->bi_flags);
 969 +       bio_endio(bio, 0);
 970 +       return 0;
 971 +
 972 +out:
 973 +       bio_io_error(bio);
 974 +       return 0;
 975 +}
 976 +
 977 +static int ramzswap_write(struct ramzswap *rzs, struct bio *bio)
 978 +{
 979 +       int ret, fwd_write_request = 0;
 980 +       u32 offset, index;
 981 +       size_t clen;
 982 +       struct zobj_header *zheader;
 983 +       struct page *page, *page_store;
 984 +       unsigned char *user_mem, *cmem, *src;
 985 +
 986 +       stat64_inc(rzs, &rzs->stats.num_writes);
 987 +
 988 +       page = bio->bi_io_vec[0].bv_page;
 989 +       index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
 990 +
 991 +       src = rzs->compress_buffer;
 992 +
 993 +       if (rzs->table[index].page || rzs_test_flag(rzs, index, RZS_ZERO))
 994 +               ramzswap_free_page(rzs, index);
 995 +
 996 +       mutex_lock(&rzs->lock);
 997 +
 998 +       user_mem = kmap_atomic(page, KM_USER0);
 999 +       if (page_zero_filled(user_mem)) {
1000 +               kunmap_atomic(user_mem, KM_USER0);
1001 +               rzs_set_flag(rzs, index, RZS_ZERO);
1002 +               mutex_unlock(&rzs->lock);
1003 +               stat_inc(&rzs->stats.pages_zero);
1004 +
1005 +               set_bit(BIO_UPTODATE, &bio->bi_flags);
1006 +               bio_endio(bio, 0);
1007 +               return 0;
1008 +       }
1009 +
1010 +       if (rzs->backing_swap &&
1011 +               (rzs->stats.compr_size > rzs->memlimit - PAGE_SIZE)) {
1012 +               kunmap_atomic(user_mem, KM_USER0);
1013 +               mutex_unlock(&rzs->lock);
1014 +               fwd_write_request = 1;
1015 +               goto out;
1016 +       }
1017 +
1018 +       ret = lzo1x_1_compress(user_mem, PAGE_SIZE, src, &clen,
1019 +                               rzs->compress_workmem);
1020 +
1021 +       kunmap_atomic(user_mem, KM_USER0);
1022 +
1023 +       if (unlikely(ret != LZO_E_OK)) {
1024 +               mutex_unlock(&rzs->lock);
1025 +               pr_err("Compression failed! err=%d\n", ret);
1026 +               stat64_inc(rzs, &rzs->stats.failed_writes);
1027 +               goto out;
1028 +       }
1029 +
1030 +       /*
1031 +        * Page is incompressible. Forward it to backing swap
1032 +        * if present. Otherwise, store it as-is (uncompressed)
1033 +        * since we do not want to return too many swap write
1034 +        * errors which has side effect of hanging the system.
1035 +        */
1036 +       if (unlikely(clen > max_zpage_size)) {
1037 +               if (rzs->backing_swap) {
1038 +                       mutex_unlock(&rzs->lock);
1039 +                       fwd_write_request = 1;
1040 +                       goto out;
1041 +               }
1042 +
1043 +               clen = PAGE_SIZE;
1044 +               page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
1045 +               if (unlikely(!page_store)) {
1046 +                       mutex_unlock(&rzs->lock);
1047 +                       pr_info("Error allocating memory for incompressible "
1048 +                               "page: %u\n", index);
1049 +                       stat64_inc(rzs, &rzs->stats.failed_writes);
1050 +                       goto out;
1051 +               }
1052 +
1053 +               offset = 0;
1054 +               rzs_set_flag(rzs, index, RZS_UNCOMPRESSED);
1055 +               stat_inc(&rzs->stats.pages_expand);
1056 +               rzs->table[index].page = page_store;
1057 +               src = kmap_atomic(page, KM_USER0);
1058 +               goto memstore;
1059 +       }
1060 +
1061 +       if (xv_malloc(rzs->mem_pool, clen + sizeof(*zheader),
1062 +                       &rzs->table[index].page, &offset,
1063 +                       GFP_NOIO | __GFP_HIGHMEM)) {
1064 +               mutex_unlock(&rzs->lock);
1065 +               pr_info("Error allocating memory for compressed "
1066 +                       "page: %u, size=%zu\n", index, clen);
1067 +               stat64_inc(rzs, &rzs->stats.failed_writes);
1068 +               if (rzs->backing_swap)
1069 +                       fwd_write_request = 1;
1070 +               goto out;
1071 +       }
1072 +
1073 +memstore:
1074 +       rzs->table[index].offset = offset;
1075 +
1076 +       cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
1077 +                       rzs->table[index].offset;
1078 +
1079 +#if 0
1080 +       /* Back-reference needed for memory defragmentation */
1081 +       if (!rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)) {
1082 +               zheader = (struct zobj_header *)cmem;
1083 +               zheader->table_idx = index;
1084 +               cmem += sizeof(*zheader);
1085 +       }
1086 +#endif
1087 +
1088 +       memcpy(cmem, src, clen);
1089 +
1090 +       kunmap_atomic(cmem, KM_USER1);
1091 +       if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
1092 +               kunmap_atomic(src, KM_USER0);
1093 +
1094 +       /* Update stats */
1095 +       rzs->stats.compr_size += clen;
1096 +       stat_inc(&rzs->stats.pages_stored);
1097 +       if (clen <= PAGE_SIZE / 2)
1098 +               stat_inc(&rzs->stats.good_compress);
1099 +
1100 +       mutex_unlock(&rzs->lock);
1101 +
1102 +       set_bit(BIO_UPTODATE, &bio->bi_flags);
1103 +       bio_endio(bio, 0);
1104 +       return 0;
1105 +
1106 +out:
1107 +       if (fwd_write_request) {
1108 +               stat64_inc(rzs, &rzs->stats.bdev_num_writes);
1109 +               bio->bi_bdev = rzs->backing_swap;
1110 +#if 0
1111 +               /*
1112 +                * TODO: We currently have linear mapping of ramzswap and
1113 +                * backing swap sectors. This is not desired since we want
1114 +                * to optimize writes to backing swap to minimize disk seeks
1115 +                * or have effective wear leveling (for SSDs). Also, a
1116 +                * non-linear mapping is required to implement compressed
1117 +                * on-disk swapping.
1118 +                */
1119 +                bio->bi_sector = get_backing_swap_page()
1120 +                                       << SECTORS_PER_PAGE_SHIFT;
1121 +#endif
1122 +               /*
1123 +                * In case backing swap is a file, find the right offset within
1124 +                * the file corresponding to logical position 'index'. For block
1125 +                * device, this is a nop.
1126 +                */
1127 +               bio->bi_sector = map_backing_swap_page(rzs, index)
1128 +                                       << SECTORS_PER_PAGE_SHIFT;
1129 +               return 1;
1130 +       }
1131 +
1132 +       bio_io_error(bio);
1133 +       return 0;
1134 +}
1135 +
1136 +
1137 +/*
1138 + * Check if request is within bounds and page aligned.
1139 + */
1140 +static inline int valid_swap_request(struct ramzswap *rzs, struct bio *bio)
1141 +{
1142 +       if (unlikely(
1143 +               (bio->bi_sector >= (rzs->disksize >> SECTOR_SHIFT)) ||
1144 +               (bio->bi_sector & (SECTORS_PER_PAGE - 1)) ||
1145 +               (bio->bi_vcnt != 1) ||
1146 +               (bio->bi_size != PAGE_SIZE) ||
1147 +               (bio->bi_io_vec[0].bv_offset != 0))) {
1148 +
1149 +               return 0;
1150 +       }
1151 +
1152 +       /* swap request is valid */
1153 +       return 1;
1154 +}
1155 +
1156 +/*
1157 + * Handler function for all ramzswap I/O requests.
1158 + */
1159 +static int ramzswap_make_request(struct request_queue *queue, struct bio *bio)
1160 +{
1161 +       int ret = 0;
1162 +       struct ramzswap *rzs = queue->queuedata;
1163 +
1164 +       if (unlikely(!rzs->init_done)) {
1165 +               bio_io_error(bio);
1166 +               return 0;
1167 +       }
1168 +
1169 +       if (!valid_swap_request(rzs, bio)) {
1170 +               stat64_inc(rzs, &rzs->stats.invalid_io);
1171 +               bio_io_error(bio);
1172 +               return 0;
1173 +       }
1174 +
1175 +       switch (bio_data_dir(bio)) {
1176 +       case READ:
1177 +               ret = ramzswap_read(rzs, bio);
1178 +               break;
1179 +
1180 +       case WRITE:
1181 +               ret = ramzswap_write(rzs, bio);
1182 +               break;
1183 +       }
1184 +
1185 +       return ret;
1186 +}
1187 +
1188 +static void reset_device(struct ramzswap *rzs, struct block_device *bdev)
1189 +{
1190 +       int is_backing_blkdev = 0;
1191 +       size_t index, num_pages;
1192 +       unsigned entries_per_page;
1193 +       unsigned long num_table_pages, entry = 0;
1194 +
1195 +       if (bdev)
1196 +               fsync_bdev(bdev);
1197 +
1198 +       rzs->init_done = 0;
1199 +
1200 +       if (rzs->backing_swap && !rzs->num_extents)
1201 +               is_backing_blkdev = 1;
1202 +
1203 +       num_pages = rzs->disksize >> PAGE_SHIFT;
1204 +
1205 +       /* Free various per-device buffers */
1206 +       kfree(rzs->compress_workmem);
1207 +       free_pages((unsigned long)rzs->compress_buffer, 1);
1208 +
1209 +       rzs->compress_workmem = NULL;
1210 +       rzs->compress_buffer = NULL;
1211 +
1212 +       /* Free all pages that are still in this ramzswap device */
1213 +       for (index = 0; index < num_pages; index++) {
1214 +               struct page *page;
1215 +               u16 offset;
1216 +
1217 +               page = rzs->table[index].page;
1218 +               offset = rzs->table[index].offset;
1219 +
1220 +               if (!page)
1221 +                       continue;
1222 +
1223 +               if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
1224 +                       __free_page(page);
1225 +               else
1226 +                       xv_free(rzs->mem_pool, page, offset);
1227 +       }
1228 +
1229 +       entries_per_page = PAGE_SIZE / sizeof(*rzs->table);
1230 +       num_table_pages = DIV_ROUND_UP(num_pages * sizeof(*rzs->table),
1231 +                                       PAGE_SIZE);
1232 +       /*
1233 +        * Set page->mapping to NULL for every table page.
1234 +        * Otherwise, we will hit bad_page() during free.
1235 +        */
1236 +       while (rzs->num_extents && num_table_pages--) {
1237 +               struct page *page;
1238 +               page = vmalloc_to_page(&rzs->table[entry]);
1239 +               page->mapping = NULL;
1240 +               entry += entries_per_page;
1241 +       }
1242 +       vfree(rzs->table);
1243 +       rzs->table = NULL;
1244 +
1245 +       xv_destroy_pool(rzs->mem_pool);
1246 +       rzs->mem_pool = NULL;
1247 +
1248 +       /* Free all swap extent pages */
1249 +       while (!list_empty(&rzs->backing_swap_extent_list)) {
1250 +               struct page *page;
1251 +               struct list_head *entry;
1252 +               entry = rzs->backing_swap_extent_list.next;
1253 +               page = list_entry(entry, struct page, lru);
1254 +               list_del(entry);
1255 +               __free_page(page);
1256 +       }
1257 +       INIT_LIST_HEAD(&rzs->backing_swap_extent_list);
1258 +       rzs->num_extents = 0;
1259 +
1260 +       /* Close backing swap device, if present */
1261 +       if (rzs->backing_swap) {
1262 +               if (is_backing_blkdev)
1263 +                       bd_release(rzs->backing_swap);
1264 +               filp_close(rzs->swap_file, NULL);
1265 +               rzs->backing_swap = NULL;
1266 +               memset(rzs->backing_swap_name, 0, MAX_SWAP_NAME_LEN);
1267 +       }
1268 +
1269 +       /* Reset stats */
1270 +       memset(&rzs->stats, 0, sizeof(rzs->stats));
1271 +
1272 +       rzs->disksize = 0;
1273 +       rzs->memlimit = 0;
1274 +}
1275 +
1276 +static int ramzswap_ioctl_init_device(struct ramzswap *rzs)
1277 +{
1278 +       int ret, dev_id;
1279 +       size_t num_pages;
1280 +       struct page *page;
1281 +       union swap_header *swap_header;
1282 +
1283 +       if (rzs->init_done) {
1284 +               pr_info("Device already initialized!\n");
1285 +               return -EBUSY;
1286 +       }
1287 +
1288 +       dev_id = rzs - devices;
1289 +
1290 +       ret = setup_backing_swap(rzs);
1291 +       if (ret)
1292 +               goto fail;
1293 +
1294 +       if (rzs->backing_swap)
1295 +               ramzswap_set_memlimit(rzs, totalram_pages << PAGE_SHIFT);
1296 +       else
1297 +               ramzswap_set_disksize(rzs, totalram_pages << PAGE_SHIFT);
1298 +
1299 +       rzs->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
1300 +       if (!rzs->compress_workmem) {
1301 +               pr_err("Error allocating compressor working memory!\n");
1302 +               ret = -ENOMEM;
1303 +               goto fail;
1304 +       }
1305 +
1306 +       rzs->compress_buffer = (void *)__get_free_pages(__GFP_ZERO, 1);
1307 +       if (!rzs->compress_buffer) {
1308 +               pr_err("Error allocating compressor buffer space\n");
1309 +               ret = -ENOMEM;
1310 +               goto fail;
1311 +       }
1312 +
1313 +       num_pages = rzs->disksize >> PAGE_SHIFT;
1314 +       rzs->table = vmalloc(num_pages * sizeof(*rzs->table));
1315 +       if (!rzs->table) {
1316 +               pr_err("Error allocating ramzswap address table\n");
1317 +               /* To prevent accessing table entries during cleanup */
1318 +               rzs->disksize = 0;
1319 +               ret = -ENOMEM;
1320 +               goto fail;
1321 +       }
1322 +       memset(rzs->table, 0, num_pages * sizeof(*rzs->table));
1323 +
1324 +       map_backing_swap_extents(rzs);
1325 +
1326 +       page = alloc_page(__GFP_ZERO);
1327 +       if (!page) {
1328 +               pr_err("Error allocating swap header page\n");
1329 +               ret = -ENOMEM;
1330 +               goto fail;
1331 +       }
1332 +       rzs->table[0].page = page;
1333 +       rzs_set_flag(rzs, 0, RZS_UNCOMPRESSED);
1334 +
1335 +       swap_header = kmap(page);
1336 +       ret = setup_swap_header(rzs, swap_header);
1337 +       kunmap(page);
1338 +       if (ret) {
1339 +               pr_err("Error setting swap header\n");
1340 +               goto fail;
1341 +       }
1342 +
1343 +       set_capacity(rzs->disk, rzs->disksize >> SECTOR_SHIFT);
1344 +
1345 +       /*
1346 +        * We have ident mapping of sectors for ramzswap and
1347 +        * and the backing swap device. So, this queue flag
1348 +        * should be according to backing dev.
1349 +        */
1350 +       if (!rzs->backing_swap ||
1351 +                       blk_queue_nonrot(rzs->backing_swap->bd_disk->queue))
1352 +               queue_flag_set_unlocked(QUEUE_FLAG_NONROT, rzs->disk->queue);
1353 +
1354 +       rzs->mem_pool = xv_create_pool();
1355 +       if (!rzs->mem_pool) {
1356 +               pr_err("Error creating memory pool\n");
1357 +               ret = -ENOMEM;
1358 +               goto fail;
1359 +       }
1360 +
1361 +       /*
1362 +        * Pages that compress to size greater than this are forwarded
1363 +        * to physical swap disk (if backing dev is provided)
1364 +        * TODO: make this configurable
1365 +        */
1366 +       if (rzs->backing_swap)
1367 +               max_zpage_size = max_zpage_size_bdev;
1368 +       else
1369 +               max_zpage_size = max_zpage_size_nobdev;
1370 +       pr_debug("Max compressed page size: %u bytes\n", max_zpage_size);
1371 +
1372 +       rzs->init_done = 1;
1373 +
1374 +       if (rzs->backing_swap) {
1375 +               pr_info("/dev/ramzswap%d initialized: "
1376 +                       "backing_swap=%s, memlimit_kb=%zu\n",
1377 +                       dev_id, rzs->backing_swap_name, rzs->memlimit >> 10);
1378 +       } else {
1379 +               pr_info("/dev/ramzswap%d initialized: "
1380 +                       "disksize_kb=%zu", dev_id, rzs->disksize >> 10);
1381 +       }
1382 +       return 0;
1383 +
1384 +fail:
1385 +       reset_device(rzs, NULL);
1386 +
1387 +       pr_err("Initialization failed: err=%d\n", ret);
1388 +       return ret;
1389 +}
1390 +
1391 +static int ramzswap_ioctl_reset_device(struct ramzswap *rzs,
1392 +                               struct block_device *bdev)
1393 +{
1394 +       if (rzs->init_done)
1395 +               reset_device(rzs, bdev);
1396 +
1397 +       return 0;
1398 +}
1399 +
1400 +static int ramzswap_ioctl(struct block_device *bdev, fmode_t mode,
1401 +                       unsigned int cmd, unsigned long arg)
1402 +{
1403 +       int ret = 0;
1404 +       size_t disksize_kb, memlimit_kb;
1405 +
1406 +       struct ramzswap *rzs = bdev->bd_disk->private_data;
1407 +
1408 +       switch (cmd) {
1409 +       case RZSIO_SET_DISKSIZE_KB:
1410 +               if (rzs->init_done) {
1411 +                       ret = -EBUSY;
1412 +                       goto out;
1413 +               }
1414 +               if (copy_from_user(&disksize_kb, (void *)arg,
1415 +                                               _IOC_SIZE(cmd))) {
1416 +                       ret = -EFAULT;
1417 +                       goto out;
1418 +               }
1419 +               rzs->disksize = disksize_kb << 10;
1420 +               pr_debug("Disk size set to %zu kB\n", disksize_kb);
1421 +               break;
1422 +
1423 +       case RZSIO_SET_MEMLIMIT_KB:
1424 +               if (rzs->init_done) {
1425 +                       /* TODO: allow changing memlimit */
1426 +                       ret = -EBUSY;
1427 +                       goto out;
1428 +               }
1429 +               if (copy_from_user(&memlimit_kb, (void *)arg,
1430 +                                               _IOC_SIZE(cmd))) {
1431 +                       ret = -EFAULT;
1432 +                       goto out;
1433 +               }
1434 +               rzs->memlimit = memlimit_kb << 10;
1435 +               pr_debug("Memory limit set to %zu kB\n", memlimit_kb);
1436 +               break;
1437 +
1438 +       case RZSIO_SET_BACKING_SWAP:
1439 +               if (rzs->init_done) {
1440 +                       ret = -EBUSY;
1441 +                       goto out;
1442 +               }
1443 +
1444 +               if (copy_from_user(&rzs->backing_swap_name, (void *)arg,
1445 +                                               _IOC_SIZE(cmd))) {
1446 +                       ret = -EFAULT;
1447 +                       goto out;
1448 +               }
1449 +               rzs->backing_swap_name[MAX_SWAP_NAME_LEN - 1] = '\0';
1450 +               pr_debug("Backing swap set to %s\n", rzs->backing_swap_name);
1451 +               break;
1452 +
1453 +       case RZSIO_GET_STATS:
1454 +       {
1455 +               struct ramzswap_ioctl_stats *stats;
1456 +               if (!rzs->init_done) {
1457 +                       ret = -ENOTTY;
1458 +                       goto out;
1459 +               }
1460 +               stats = kzalloc(sizeof(*stats), GFP_KERNEL);
1461 +               if (!stats) {
1462 +                       ret = -ENOMEM;
1463 +                       goto out;
1464 +               }
1465 +               ramzswap_ioctl_get_stats(rzs, stats);
1466 +               if (copy_to_user((void *)arg, stats, sizeof(*stats))) {
1467 +                       kfree(stats);
1468 +                       ret = -EFAULT;
1469 +                       goto out;
1470 +               }
1471 +               kfree(stats);
1472 +               break;
1473 +       }
1474 +       case RZSIO_INIT:
1475 +               ret = ramzswap_ioctl_init_device(rzs);
1476 +               break;
1477 +
1478 +       case RZSIO_RESET:
1479 +               /* Do not reset an active device! */
1480 +               if (bdev->bd_holders) {
1481 +                       ret = -EBUSY;
1482 +                       goto out;
1483 +               }
1484 +               ret = ramzswap_ioctl_reset_device(rzs, bdev);
1485 +               break;
1486 +
1487 +       default:
1488 +               pr_info("Invalid ioctl %u\n", cmd);
1489 +               ret = -ENOTTY;
1490 +       }
1491 +
1492 +out:
1493 +       return ret;
1494 +}
1495 +
1496 +#if defined(CONFIG_SWAP_FREE_NOTIFY)
1497 +
1498 +void ramzswap_slot_free_notify(struct block_device *bdev, sector_t bi_sector)
1499 +{
1500 +       struct ramzswap *rzs = bdev->bd_disk->private_data;
1501 +       ramzswap_free_page(rzs, bi_sector >> SECTORS_PER_PAGE_SHIFT);
1502 +       stat64_inc(rzs, &rzs->stats.notify_free);
1503 +}
1504 +
1505 +#endif
1506 +
1507 +static struct block_device_operations ramzswap_devops = {
1508 +       .ioctl = ramzswap_ioctl,
1509 +#if defined(CONFIG_SWAP_FREE_NOTIFY)
1510 +       .swap_slot_free_notify = ramzswap_slot_free_notify,
1511 +#endif
1512 +       .owner = THIS_MODULE
1513 +};
1514 +
1515 +static int create_device(struct ramzswap *rzs, int device_id)
1516 +{
1517 +       int ret = 0;
1518 +
1519 +       mutex_init(&rzs->lock);
1520 +       spin_lock_init(&rzs->stat64_lock);
1521 +       INIT_LIST_HEAD(&rzs->backing_swap_extent_list);
1522 +
1523 +       rzs->queue = blk_alloc_queue(GFP_KERNEL);
1524 +       if (!rzs->queue) {
1525 +               pr_err("Error allocating disk queue for device %d\n",
1526 +                       device_id);
1527 +               ret = -ENOMEM;
1528 +               goto out;
1529 +       }
1530 +
1531 +       blk_queue_make_request(rzs->queue, ramzswap_make_request);
1532 +       rzs->queue->queuedata = rzs;
1533 +
1534 +        /* gendisk structure */
1535 +       rzs->disk = alloc_disk(1);
1536 +       if (!rzs->disk) {
1537 +               blk_cleanup_queue(rzs->queue);
1538 +               pr_warning("Error allocating disk structure for device %d\n",
1539 +                       device_id);
1540 +               ret = -ENOMEM;
1541 +               goto out;
1542 +       }
1543 +
1544 +       rzs->disk->major = ramzswap_major;
1545 +       rzs->disk->first_minor = device_id;
1546 +       rzs->disk->fops = &ramzswap_devops;
1547 +       rzs->disk->queue = rzs->queue;
1548 +       rzs->disk->private_data = rzs;
1549 +       snprintf(rzs->disk->disk_name, 16, "ramzswap%d", device_id);
1550 +       /*
1551 +        * Actual capacity set using RZSIO_SET_DISKSIZE_KB ioctl
1552 +        * or set equal to backing swap device (if provided)
1553 +        */
1554 +       set_capacity(rzs->disk, 0);
1555 +
1556 +       blk_queue_physical_block_size(rzs->disk->queue, PAGE_SIZE);
1557 +       blk_queue_logical_block_size(rzs->disk->queue, PAGE_SIZE);
1558 +
1559 +       add_disk(rzs->disk);
1560 +       rzs->disk->flags &= ~GENHD_FL_REMAP_SWAPPED_PAGES;
1561 +
1562 +       rzs->init_done = 0;
1563 +
1564 +out:
1565 +       return ret;
1566 +}
1567 +
1568 +static void destroy_device(struct ramzswap *rzs)
1569 +{
1570 +       if (rzs->disk) {
1571 +               del_gendisk(rzs->disk);
1572 +               put_disk(rzs->disk);
1573 +       }
1574 +
1575 +       if (rzs->queue)
1576 +               blk_cleanup_queue(rzs->queue);
1577 +}
1578 +
1579 +static int __init ramzswap_init(void)
1580 +{
1581 +       int ret, dev_id;
1582 +       struct ramzswap *rzs;
1583 +
1584 +       if (num_devices > max_num_devices) {
1585 +               pr_warning("Invalid value for num_devices: %u\n",
1586 +                               num_devices);
1587 +               ret = -EINVAL;
1588 +               goto out;
1589 +       }
1590 +
1591 +       ramzswap_major = register_blkdev(0, "ramzswap");
1592 +       if (ramzswap_major <= 0) {
1593 +               pr_warning("Unable to get major number\n");
1594 +               ret = -EBUSY;
1595 +               goto out;
1596 +       }
1597 +
1598 +       if (!num_devices) {
1599 +               pr_info("num_devices not specified. Using default: 1\n");
1600 +               num_devices = 1;
1601 +       }
1602 +
1603 +       /* Allocate the device array and initialize each one */
1604 +       pr_debug("Creating %u devices ...\n", num_devices);
1605 +       devices = kzalloc(num_devices * sizeof(struct ramzswap), GFP_KERNEL);
1606 +       if (!devices) {
1607 +               ret = -ENOMEM;
1608 +               goto unregister;
1609 +       }
1610 +
1611 +       for (dev_id = 0; dev_id < num_devices; dev_id++) {
1612 +               if (create_device(&devices[dev_id], dev_id)) {
1613 +                       ret = -ENOMEM;
1614 +                       goto free_devices;
1615 +               }
1616 +       }
1617 +
1618 +       /*
1619 +        * Initialize the first device (/dev/ramzswap0)
1620 +        * if parameters are provided
1621 +        */
1622 +       rzs = &devices[0];
1623 +
1624 +       /*
1625 +        * User specifies either <disksize_kb> or <backing_swap, memlimit_kb>
1626 +        */
1627 +       if (disksize_kb) {
1628 +               rzs->disksize = disksize_kb << 10;
1629 +               ret = ramzswap_ioctl_init_device(rzs);
1630 +               if (ret)
1631 +                       goto free_devices;
1632 +               goto out;
1633 +       }
1634 +
1635 +       if (backing_swap[0]) {
1636 +               rzs->memlimit = memlimit_kb << 10;
1637 +               strncpy(rzs->backing_swap_name, backing_swap,
1638 +                       MAX_SWAP_NAME_LEN);
1639 +               rzs->backing_swap_name[MAX_SWAP_NAME_LEN - 1] = '\0';
1640 +               ret = ramzswap_ioctl_init_device(rzs);
1641 +               if (ret)
1642 +                       goto free_devices;
1643 +               goto out;
1644 +       }
1645 +
1646 +       /* User specified memlimit_kb but not backing_swap */
1647 +       if (memlimit_kb) {
1648 +               pr_info("memlimit_kb parameter is valid only when "
1649 +                       "backing_swap is also specified. Aborting.\n");
1650 +               ret = -EINVAL;
1651 +               goto free_devices;
1652 +       }
1653 +
1654 +       return 0;
1655 +
1656 +free_devices:
1657 +       while(dev_id)
1658 +               destroy_device(&devices[--dev_id]);
1659 +unregister:
1660 +       unregister_blkdev(ramzswap_major, "ramzswap");
1661 +out:
1662 +       return ret;
1663 +}
1664 +
1665 +static void __exit ramzswap_exit(void)
1666 +{
1667 +       int i;
1668 +       struct ramzswap *rzs;
1669 +
1670 +       for (i = 0; i < num_devices; i++) {
1671 +               rzs = &devices[i];
1672 +
1673 +               destroy_device(rzs);
1674 +               if (rzs->init_done)
1675 +                       reset_device(rzs, NULL);
1676 +       }
1677 +
1678 +       unregister_blkdev(ramzswap_major, "ramzswap");
1679 +
1680 +       kfree(devices);
1681 +       pr_debug("Cleanup done!\n");
1682 +}
1683 +
1684 +/*
1685 + * Module parameters
1686 + */
1687 +
1688 +/* Optional: default = 1 */
1689 +module_param(num_devices, uint, 0);
1690 +MODULE_PARM_DESC(num_devices, "Number of ramzswap devices");
1691 +
1692 +/*
1693 + * User specifies either <disksize_kb> or <backing_swap, memlimit_kb>
1694 + * parameters. You must specify these parameters if the first device
1695 + * has to be initialized on module load without using rzscontrol utility.
1696 + * This is useful for embedded system, where shipping an additional binary
1697 + * (rzscontrol) might not be desirable.
1698 + *
1699 + * These parameters are used to initialize just the first (/dev/ramzswap0)
1700 + * device. To initialize additional devices, use rzscontrol utility. If
1701 + * these parameters are not provided, then the first device is also
1702 + * left in unitialized state.
1703 + */
1704 +
1705 +/* Optional: default = 25% of RAM */
1706 +module_param(disksize_kb, ulong, 0);
1707 +MODULE_PARM_DESC(disksize_kb, "Disksize in KB");
1708 +
1709 +/* Optional: default = 15% of RAM */
1710 +module_param(memlimit_kb, ulong, 0);
1711 +MODULE_PARM_DESC(memlimit_kb, "Memlimit in KB");
1712 +
1713 +/* Optional: default = <NULL> */
1714 +module_param_string(backing_swap, backing_swap, sizeof(backing_swap), 0);
1715 +MODULE_PARM_DESC(backing_swap, "Backing swap name");
1716 +
1717 +module_init(ramzswap_init);
1718 +module_exit(ramzswap_exit);
1719 +
1720 +MODULE_LICENSE("Dual BSD/GPL");
1721 +MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
1722 +MODULE_DESCRIPTION("Compressed RAM Based Swap Device");
1723 Index: kernel-power-2.6.28/drivers/block/ramzswap/ramzswap_drv.h
1724 ===================================================================
1725 --- /dev/null
1726 +++ kernel-power-2.6.28/drivers/block/ramzswap/ramzswap_drv.h
1727 @@ -0,0 +1,210 @@
1728 +/*
1729 + * Compressed RAM based swap device
1730 + *
1731 + * Copyright (C) 2008, 2009, 2010  Nitin Gupta
1732 + *
1733 + * This code is released using a dual license strategy: BSD/GPL
1734 + * You can choose the licence that better fits your requirements.
1735 + *
1736 + * Released under the terms of 3-clause BSD License
1737 + * Released under the terms of GNU General Public License Version 2.0
1738 + *
1739 + * Project home: http://compcache.googlecode.com
1740 + */
1741 +
1742 +#ifndef _RAMZSWAP_DRV_H_
1743 +#define _RAMZSWAP_DRV_H_
1744 +
1745 +#include <linux/spinlock.h>
1746 +#include <linux/mutex.h>
1747 +
1748 +#include "ramzswap_ioctl.h"
1749 +#include "xvmalloc.h"
1750 +
1751 +/*
1752 + * Some arbitrary value. This is just to catch
1753 + * invalid value for num_devices module parameter.
1754 + */
1755 +static const unsigned max_num_devices = 32;
1756 +
1757 +/*
1758 + * Stored at beginning of each compressed object.
1759 + *
1760 + * It stores back-reference to table entry which points to this
1761 + * object. This is required to support memory defragmentation or
1762 + * migrating compressed pages to backing swap disk.
1763 + */
1764 +struct zobj_header {
1765 +#if 0
1766 +       u32 table_idx;
1767 +#endif
1768 +};
1769 +
1770 +/*-- Configurable parameters */
1771 +
1772 +/* Default ramzswap disk size: 25% of total RAM */
1773 +static const unsigned default_disksize_perc_ram = 25;
1774 +static const unsigned default_memlimit_perc_ram = 15;
1775 +
1776 +/*
1777 + * Max compressed page size when backing device is provided.
1778 + * Pages that compress to size greater than this are sent to
1779 + * physical swap disk.
1780 + */
1781 +static const unsigned max_zpage_size_bdev = PAGE_SIZE / 2;
1782 +
1783 +/*
1784 + * Max compressed page size when there is no backing dev.
1785 + * Pages that compress to size greater than this are stored
1786 + * uncompressed in memory.
1787 + */
1788 +static const unsigned max_zpage_size_nobdev = PAGE_SIZE / 4 * 3;
1789 +
1790 +/*
1791 + * NOTE: max_zpage_size_{bdev,nobdev} sizes must be
1792 + * less than or equal to:
1793 + *   XV_MAX_ALLOC_SIZE - sizeof(struct zobj_header)
1794 + * since otherwise xv_malloc would always return failure.
1795 + */
1796 +
1797 +/*-- End of configurable params */
1798 +
1799 +#define SECTOR_SHIFT           9
1800 +#define SECTOR_SIZE            (1 << SECTOR_SHIFT)
1801 +#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
1802 +#define SECTORS_PER_PAGE       (1 << SECTORS_PER_PAGE_SHIFT)
1803 +
1804 +/* Flags for ramzswap pages (table[page_no].flags) */
1805 +enum rzs_pageflags {
1806 +       /* Page is stored uncompressed */
1807 +       RZS_UNCOMPRESSED,
1808 +
1809 +       /* Page consists entirely of zeros */
1810 +       RZS_ZERO,
1811 +
1812 +       __NR_RZS_PAGEFLAGS,
1813 +};
1814 +
1815 +/*-- Data structures */
1816 +
1817 +/*
1818 + * Allocated for each swap slot, indexed by page no.
1819 + * These table entries must fit exactly in a page.
1820 + */
1821 +struct table {
1822 +       struct page *page;
1823 +       u16 offset;
1824 +       u8 count;       /* object ref count (not yet used) */
1825 +       u8 flags;
1826 +} __attribute__((aligned(4)));
1827 +
1828 +/*
1829 + * Swap extent information in case backing swap is a regular
1830 + * file. These extent entries must fit exactly in a page.
1831 + */
1832 +struct ramzswap_backing_extent {
1833 +       pgoff_t phy_pagenum;
1834 +       pgoff_t num_pages;
1835 +} __attribute__((aligned(4)));
1836 +
1837 +struct ramzswap_stats {
1838 +       /* basic stats */
1839 +       size_t compr_size;      /* compressed size of pages stored -
1840 +                                * needed to enforce memlimit */
1841 +       /* more stats */
1842 +#if defined(CONFIG_RAMZSWAP_STATS)
1843 +       u64 num_reads;          /* failed + successful */
1844 +       u64 num_writes;         /* --do-- */
1845 +       u64 failed_reads;       /* should NEVER! happen */
1846 +       u64 failed_writes;      /* can happen when memory is too low */
1847 +       u64 invalid_io;         /* non-swap I/O requests */
1848 +       u64 notify_free;        /* no. of swap slot free notifications */
1849 +       u32 pages_zero;         /* no. of zero filled pages */
1850 +       u32 pages_stored;       /* no. of pages currently stored */
1851 +       u32 good_compress;      /* % of pages with compression ratio<=50% */
1852 +       u32 pages_expand;       /* % of incompressible pages */
1853 +       u64 bdev_num_reads;     /* no. of reads on backing dev */
1854 +       u64 bdev_num_writes;    /* no. of writes on backing dev */
1855 +#endif
1856 +};
1857 +
1858 +struct ramzswap {
1859 +       struct xv_pool *mem_pool;
1860 +       void *compress_workmem;
1861 +       void *compress_buffer;
1862 +       struct table *table;
1863 +       spinlock_t stat64_lock; /* protect 64-bit stats */
1864 +       struct mutex lock;
1865 +       struct request_queue *queue;
1866 +       struct gendisk *disk;
1867 +       int init_done;
1868 +       /*
1869 +        * This is limit on compressed data size (stats.compr_size)
1870 +        * Its applicable only when backing swap device is present.
1871 +        */
1872 +       size_t memlimit;        /* bytes */
1873 +       /*
1874 +        * This is limit on amount of *uncompressed* worth of data
1875 +        * we can hold. When backing swap device is provided, it is
1876 +        * set equal to device size.
1877 +        */
1878 +       size_t disksize;        /* bytes */
1879 +
1880 +       struct ramzswap_stats stats;
1881 +
1882 +       /* backing swap device info */
1883 +       struct ramzswap_backing_extent *curr_extent;
1884 +       struct list_head backing_swap_extent_list;
1885 +       unsigned long num_extents;
1886 +       char backing_swap_name[MAX_SWAP_NAME_LEN];
1887 +       struct block_device *backing_swap;
1888 +       struct file *swap_file;
1889 +};
1890 +
1891 +/*-- */
1892 +
1893 +/* Debugging and Stats */
1894 +#if defined(CONFIG_RAMZSWAP_STATS)
1895 +static void stat_inc(u32 *v)
1896 +{
1897 +       *v = *v + 1;
1898 +}
1899 +
1900 +static void stat_dec(u32 *v)
1901 +{
1902 +       *v = *v - 1;
1903 +}
1904 +
1905 +static void stat64_inc(struct ramzswap *rzs, u64 *v)
1906 +{
1907 +       spin_lock(&rzs->stat64_lock);
1908 +       *v = *v + 1;
1909 +       spin_unlock(&rzs->stat64_lock);
1910 +}
1911 +
1912 +static void stat64_dec(struct ramzswap *rzs, u64 *v)
1913 +{
1914 +       spin_lock(&rzs->stat64_lock);
1915 +       *v = *v - 1;
1916 +       spin_unlock(&rzs->stat64_lock);
1917 +}
1918 +
1919 +static u64 stat64_read(struct ramzswap *rzs, u64 *v)
1920 +{
1921 +       u64 val;
1922 +
1923 +       spin_lock(&rzs->stat64_lock);
1924 +       val = *v;
1925 +       spin_unlock(&rzs->stat64_lock);
1926 +
1927 +       return val;
1928 +}
1929 +#else
1930 +#define stat_inc(v)
1931 +#define stat_dec(v)
1932 +#define stat64_inc(r, v)
1933 +#define stat64_dec(r, v)
1934 +#define stat64_read(r, v)
1935 +#endif /* CONFIG_RAMZSWAP_STATS */
1936 +
1937 +#endif
1938 Index: kernel-power-2.6.28/drivers/block/ramzswap/ramzswap_ioctl.h
1939 ===================================================================
1940 --- /dev/null
1941 +++ kernel-power-2.6.28/drivers/block/ramzswap/ramzswap_ioctl.h
1942 @@ -0,0 +1,50 @@
1943 +/*
1944 + * Compressed RAM based swap device
1945 + *
1946 + * Copyright (C) 2008, 2009, 2010  Nitin Gupta
1947 + *
1948 + * This code is released using a dual license strategy: BSD/GPL
1949 + * You can choose the licence that better fits your requirements.
1950 + *
1951 + * Released under the terms of 3-clause BSD License
1952 + * Released under the terms of GNU General Public License Version 2.0
1953 + *
1954 + * Project home: http://compcache.googlecode.com
1955 + */
1956 +
1957 +#ifndef _RAMZSWAP_IOCTL_H_
1958 +#define _RAMZSWAP_IOCTL_H_
1959 +
1960 +#define MAX_SWAP_NAME_LEN 128
1961 +
1962 +struct ramzswap_ioctl_stats {
1963 +       char backing_swap_name[MAX_SWAP_NAME_LEN];
1964 +       u64 memlimit;           /* only applicable if backing swap present */
1965 +       u64 disksize;           /* user specified or equal to backing swap
1966 +                                * size (if present) */
1967 +       u64 num_reads;          /* failed + successful */
1968 +       u64 num_writes;         /* --do-- */
1969 +       u64 failed_reads;       /* should NEVER! happen */
1970 +       u64 failed_writes;      /* can happen when memory is too low */
1971 +       u64 invalid_io;         /* non-swap I/O requests */
1972 +       u64 notify_free;        /* no. of swap slot free notifications */
1973 +       u32 pages_zero;         /* no. of zero filled pages */
1974 +       u32 good_compress_pct;  /* no. of pages with compression ratio<=50% */
1975 +       u32 pages_expand_pct;   /* no. of incompressible pages */
1976 +       u32 pages_stored;
1977 +       u32 pages_used;
1978 +       u64 orig_data_size;
1979 +       u64 compr_data_size;
1980 +       u64 mem_used_total;
1981 +       u64 bdev_num_reads;     /* no. of reads on backing dev */
1982 +       u64 bdev_num_writes;    /* no. of writes on backing dev */
1983 +} __attribute__ ((packed, aligned(4)));
1984 +
1985 +#define RZSIO_SET_DISKSIZE_KB  _IOW('z', 0, size_t)
1986 +#define RZSIO_SET_MEMLIMIT_KB  _IOW('z', 1, size_t)
1987 +#define RZSIO_SET_BACKING_SWAP _IOW('z', 2, unsigned char[MAX_SWAP_NAME_LEN])
1988 +#define RZSIO_GET_STATS                _IOR('z', 3, struct ramzswap_ioctl_stats)
1989 +#define RZSIO_INIT             _IO('z', 4)
1990 +#define RZSIO_RESET            _IO('z', 5)
1991 +
1992 +#endif
1993 Index: kernel-power-2.6.28/drivers/block/ramzswap/xvmalloc.c
1994 ===================================================================
1995 --- /dev/null
1996 +++ kernel-power-2.6.28/drivers/block/ramzswap/xvmalloc.c
1997 @@ -0,0 +1,507 @@
1998 +/*
1999 + * xvmalloc memory allocator
2000 + *
2001 + * Copyright (C) 2008, 2009, 2010  Nitin Gupta
2002 + *
2003 + * This code is released using a dual license strategy: BSD/GPL
2004 + * You can choose the licence that better fits your requirements.
2005 + *
2006 + * Released under the terms of 3-clause BSD License
2007 + * Released under the terms of GNU General Public License Version 2.0
2008 + */
2009 +
2010 +#include <linux/bitops.h>
2011 +#include <linux/errno.h>
2012 +#include <linux/highmem.h>
2013 +#include <linux/init.h>
2014 +#include <linux/string.h>
2015 +#include <linux/slab.h>
2016 +
2017 +#include "xvmalloc.h"
2018 +#include "xvmalloc_int.h"
2019 +
2020 +static void stat_inc(u64 *value)
2021 +{
2022 +       *value = *value + 1;
2023 +}
2024 +
2025 +static void stat_dec(u64 *value)
2026 +{
2027 +       *value = *value - 1;
2028 +}
2029 +
2030 +static int test_flag(struct block_header *block, enum blockflags flag)
2031 +{
2032 +       return block->prev & BIT(flag);
2033 +}
2034 +
2035 +static void set_flag(struct block_header *block, enum blockflags flag)
2036 +{
2037 +       block->prev |= BIT(flag);
2038 +}
2039 +
2040 +static void clear_flag(struct block_header *block, enum blockflags flag)
2041 +{
2042 +       block->prev &= ~BIT(flag);
2043 +}
2044 +
2045 +/*
2046 + * Given <page, offset> pair, provide a derefrencable pointer.
2047 + * This is called from xv_malloc/xv_free path, so it
2048 + * needs to be fast.
2049 + */
2050 +static void *get_ptr_atomic(struct page *page, u16 offset, enum km_type type)
2051 +{
2052 +       unsigned char *base;
2053 +
2054 +       base = kmap_atomic(page, type);
2055 +       return base + offset;
2056 +}
2057 +
2058 +static void put_ptr_atomic(void *ptr, enum km_type type)
2059 +{
2060 +       kunmap_atomic(ptr, type);
2061 +}
2062 +
2063 +static u32 get_blockprev(struct block_header *block)
2064 +{
2065 +       return block->prev & PREV_MASK;
2066 +}
2067 +
2068 +static void set_blockprev(struct block_header *block, u16 new_offset)
2069 +{
2070 +       block->prev = new_offset | (block->prev & FLAGS_MASK);
2071 +}
2072 +
2073 +static struct block_header *BLOCK_NEXT(struct block_header *block)
2074 +{
2075 +       return (struct block_header *)
2076 +               ((char *)block + block->size + XV_ALIGN);
2077 +}
2078 +
2079 +/*
2080 + * Get index of free list containing blocks of maximum size
2081 + * which is less than or equal to given size.
2082 + */
2083 +static u32 get_index_for_insert(u32 size)
2084 +{
2085 +       if (unlikely(size > XV_MAX_ALLOC_SIZE))
2086 +               size = XV_MAX_ALLOC_SIZE;
2087 +       size &= ~FL_DELTA_MASK;
2088 +       return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
2089 +}
2090 +
2091 +/*
2092 + * Get index of free list having blocks of size greater than
2093 + * or equal to requested size.
2094 + */
2095 +static u32 get_index(u32 size)
2096 +{
2097 +       if (unlikely(size < XV_MIN_ALLOC_SIZE))
2098 +               size = XV_MIN_ALLOC_SIZE;
2099 +       size = ALIGN(size, FL_DELTA);
2100 +       return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
2101 +}
2102 +
2103 +/**
2104 + * find_block - find block of at least given size
2105 + * @pool: memory pool to search from
2106 + * @size: size of block required
2107 + * @page: page containing required block
2108 + * @offset: offset within the page where block is located.
2109 + *
2110 + * Searches two level bitmap to locate block of at least
2111 + * the given size. If such a block is found, it provides
2112 + * <page, offset> to identify this block and returns index
2113 + * in freelist where we found this block.
2114 + * Otherwise, returns 0 and <page, offset> params are not touched.
2115 + */
2116 +static u32 find_block(struct xv_pool *pool, u32 size,
2117 +                       struct page **page, u32 *offset)
2118 +{
2119 +       ulong flbitmap, slbitmap;
2120 +       u32 flindex, slindex, slbitstart;
2121 +
2122 +       /* There are no free blocks in this pool */
2123 +       if (!pool->flbitmap)
2124 +               return 0;
2125 +
2126 +       /* Get freelist index correspoding to this size */
2127 +       slindex = get_index(size);
2128 +       slbitmap = pool->slbitmap[slindex / BITS_PER_LONG];
2129 +       slbitstart = slindex % BITS_PER_LONG;
2130 +
2131 +       /*
2132 +        * If freelist is not empty at this index, we found the
2133 +        * block - head of this list. This is approximate best-fit match.
2134 +        */
2135 +       if (test_bit(slbitstart, &slbitmap)) {
2136 +               *page = pool->freelist[slindex].page;
2137 +               *offset = pool->freelist[slindex].offset;
2138 +               return slindex;
2139 +       }
2140 +
2141 +       /*
2142 +        * No best-fit found. Search a bit further in bitmap for a free block.
2143 +        * Second level bitmap consists of series of 32-bit chunks. Search
2144 +        * further in the chunk where we expected a best-fit, starting from
2145 +        * index location found above.
2146 +        */
2147 +       slbitstart++;
2148 +       slbitmap >>= slbitstart;
2149 +
2150 +       /* Skip this search if we were already at end of this bitmap chunk */
2151 +       if ((slbitstart != BITS_PER_LONG) && slbitmap) {
2152 +               slindex += __ffs(slbitmap) + 1;
2153 +               *page = pool->freelist[slindex].page;
2154 +               *offset = pool->freelist[slindex].offset;
2155 +               return slindex;
2156 +       }
2157 +
2158 +       /* Now do a full two-level bitmap search to find next nearest fit */
2159 +       flindex = slindex / BITS_PER_LONG;
2160 +
2161 +       flbitmap = (pool->flbitmap) >> (flindex + 1);
2162 +       if (!flbitmap)
2163 +               return 0;
2164 +
2165 +       flindex += __ffs(flbitmap) + 1;
2166 +       slbitmap = pool->slbitmap[flindex];
2167 +       slindex = (flindex * BITS_PER_LONG) + __ffs(slbitmap);
2168 +       *page = pool->freelist[slindex].page;
2169 +       *offset = pool->freelist[slindex].offset;
2170 +
2171 +       return slindex;
2172 +}
2173 +
2174 +/*
2175 + * Insert block at <page, offset> in freelist of given pool.
2176 + * freelist used depends on block size.
2177 + */
2178 +static void insert_block(struct xv_pool *pool, struct page *page, u32 offset,
2179 +                       struct block_header *block)
2180 +{
2181 +       u32 flindex, slindex;
2182 +       struct block_header *nextblock;
2183 +
2184 +       slindex = get_index_for_insert(block->size);
2185 +       flindex = slindex / BITS_PER_LONG;
2186 +
2187 +       block->link.prev_page = 0;
2188 +       block->link.prev_offset = 0;
2189 +       block->link.next_page = pool->freelist[slindex].page;
2190 +       block->link.next_offset = pool->freelist[slindex].offset;
2191 +       pool->freelist[slindex].page = page;
2192 +       pool->freelist[slindex].offset = offset;
2193 +
2194 +       if (block->link.next_page) {
2195 +               nextblock = get_ptr_atomic(block->link.next_page,
2196 +                                       block->link.next_offset, KM_USER1);
2197 +               nextblock->link.prev_page = page;
2198 +               nextblock->link.prev_offset = offset;
2199 +               put_ptr_atomic(nextblock, KM_USER1);
2200 +       }
2201 +
2202 +       __set_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]);
2203 +       __set_bit(flindex, &pool->flbitmap);
2204 +}
2205 +
2206 +/*
2207 + * Remove block from head of freelist. Index 'slindex' identifies the freelist.
2208 + */
2209 +static void remove_block_head(struct xv_pool *pool,
2210 +                       struct block_header *block, u32 slindex)
2211 +{
2212 +       struct block_header *tmpblock;
2213 +       u32 flindex = slindex / BITS_PER_LONG;
2214 +
2215 +       pool->freelist[slindex].page = block->link.next_page;
2216 +       pool->freelist[slindex].offset = block->link.next_offset;
2217 +       block->link.prev_page = 0;
2218 +       block->link.prev_offset = 0;
2219 +
2220 +       if (!pool->freelist[slindex].page) {
2221 +               __clear_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]);
2222 +               if (!pool->slbitmap[flindex])
2223 +                       __clear_bit(flindex, &pool->flbitmap);
2224 +       } else {
2225 +               /*
2226 +                * DEBUG ONLY: We need not reinitialize freelist head previous
2227 +                * pointer to 0 - we never depend on its value. But just for
2228 +                * sanity, lets do it.
2229 +                */
2230 +               tmpblock = get_ptr_atomic(pool->freelist[slindex].page,
2231 +                               pool->freelist[slindex].offset, KM_USER1);
2232 +               tmpblock->link.prev_page = 0;
2233 +               tmpblock->link.prev_offset = 0;
2234 +               put_ptr_atomic(tmpblock, KM_USER1);
2235 +       }
2236 +}
2237 +
2238 +/*
2239 + * Remove block from freelist. Index 'slindex' identifies the freelist.
2240 + */
2241 +static void remove_block(struct xv_pool *pool, struct page *page, u32 offset,
2242 +                       struct block_header *block, u32 slindex)
2243 +{
2244 +       u32 flindex;
2245 +       struct block_header *tmpblock;
2246 +
2247 +       if (pool->freelist[slindex].page == page
2248 +          && pool->freelist[slindex].offset == offset) {
2249 +               remove_block_head(pool, block, slindex);
2250 +               return;
2251 +       }
2252 +
2253 +       flindex = slindex / BITS_PER_LONG;
2254 +
2255 +       if (block->link.prev_page) {
2256 +               tmpblock = get_ptr_atomic(block->link.prev_page,
2257 +                               block->link.prev_offset, KM_USER1);
2258 +               tmpblock->link.next_page = block->link.next_page;
2259 +               tmpblock->link.next_offset = block->link.next_offset;
2260 +               put_ptr_atomic(tmpblock, KM_USER1);
2261 +       }
2262 +
2263 +       if (block->link.next_page) {
2264 +               tmpblock = get_ptr_atomic(block->link.next_page,
2265 +                               block->link.next_offset, KM_USER1);
2266 +               tmpblock->link.prev_page = block->link.prev_page;
2267 +               tmpblock->link.prev_offset = block->link.prev_offset;
2268 +               put_ptr_atomic(tmpblock, KM_USER1);
2269 +       }
2270 +}
2271 +
2272 +/*
2273 + * Allocate a page and add it to freelist of given pool.
2274 + */
2275 +static int grow_pool(struct xv_pool *pool, gfp_t flags)
2276 +{
2277 +       struct page *page;
2278 +       struct block_header *block;
2279 +
2280 +       page = alloc_page(flags);
2281 +       if (unlikely(!page))
2282 +               return -ENOMEM;
2283 +
2284 +       stat_inc(&pool->total_pages);
2285 +
2286 +       spin_lock(&pool->lock);
2287 +       block = get_ptr_atomic(page, 0, KM_USER0);
2288 +
2289 +       block->size = PAGE_SIZE - XV_ALIGN;
2290 +       set_flag(block, BLOCK_FREE);
2291 +       clear_flag(block, PREV_FREE);
2292 +       set_blockprev(block, 0);
2293 +
2294 +       insert_block(pool, page, 0, block);
2295 +
2296 +       put_ptr_atomic(block, KM_USER0);
2297 +       spin_unlock(&pool->lock);
2298 +
2299 +       return 0;
2300 +}
2301 +
2302 +/*
2303 + * Create a memory pool. Allocates freelist, bitmaps and other
2304 + * per-pool metadata.
2305 + */
2306 +struct xv_pool *xv_create_pool(void)
2307 +{
2308 +       u32 ovhd_size;
2309 +       struct xv_pool *pool;
2310 +
2311 +       ovhd_size = roundup(sizeof(*pool), PAGE_SIZE);
2312 +       pool = kzalloc(ovhd_size, GFP_KERNEL);
2313 +       if (!pool)
2314 +               return NULL;
2315 +
2316 +       spin_lock_init(&pool->lock);
2317 +
2318 +       return pool;
2319 +}
2320 +
2321 +void xv_destroy_pool(struct xv_pool *pool)
2322 +{
2323 +       kfree(pool);
2324 +}
2325 +
2326 +/**
2327 + * xv_malloc - Allocate block of given size from pool.
2328 + * @pool: pool to allocate from
2329 + * @size: size of block to allocate
2330 + * @page: page no. that holds the object
2331 + * @offset: location of object within page
2332 + *
2333 + * On success, <page, offset> identifies block allocated
2334 + * and 0 is returned. On failure, <page, offset> is set to
2335 + * 0 and -ENOMEM is returned.
2336 + *
2337 + * Allocation requests with size > XV_MAX_ALLOC_SIZE will fail.
2338 + */
2339 +int xv_malloc(struct xv_pool *pool, u32 size, struct page **page,
2340 +               u32 *offset, gfp_t flags)
2341 +{
2342 +       int error;
2343 +       u32 index, tmpsize, origsize, tmpoffset;
2344 +       struct block_header *block, *tmpblock;
2345 +
2346 +       *page = NULL;
2347 +       *offset = 0;
2348 +       origsize = size;
2349 +
2350 +       if (unlikely(!size || size > XV_MAX_ALLOC_SIZE))
2351 +               return -ENOMEM;
2352 +
2353 +       size = ALIGN(size, XV_ALIGN);
2354 +
2355 +       spin_lock(&pool->lock);
2356 +
2357 +       index = find_block(pool, size, page, offset);
2358 +
2359 +       if (!*page) {
2360 +               spin_unlock(&pool->lock);
2361 +               if (flags & GFP_NOWAIT)
2362 +                       return -ENOMEM;
2363 +               error = grow_pool(pool, flags);
2364 +               if (unlikely(error))
2365 +                       return error;
2366 +
2367 +               spin_lock(&pool->lock);
2368 +               index = find_block(pool, size, page, offset);
2369 +       }
2370 +
2371 +       if (!*page) {
2372 +               spin_unlock(&pool->lock);
2373 +               return -ENOMEM;
2374 +       }
2375 +
2376 +       block = get_ptr_atomic(*page, *offset, KM_USER0);
2377 +
2378 +       remove_block_head(pool, block, index);
2379 +
2380 +       /* Split the block if required */
2381 +       tmpoffset = *offset + size + XV_ALIGN;
2382 +       tmpsize = block->size - size;
2383 +       tmpblock = (struct block_header *)((char *)block + size + XV_ALIGN);
2384 +       if (tmpsize) {
2385 +               tmpblock->size = tmpsize - XV_ALIGN;
2386 +               set_flag(tmpblock, BLOCK_FREE);
2387 +               clear_flag(tmpblock, PREV_FREE);
2388 +
2389 +               set_blockprev(tmpblock, *offset);
2390 +               if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
2391 +                       insert_block(pool, *page, tmpoffset, tmpblock);
2392 +
2393 +               if (tmpoffset + XV_ALIGN + tmpblock->size != PAGE_SIZE) {
2394 +                       tmpblock = BLOCK_NEXT(tmpblock);
2395 +                       set_blockprev(tmpblock, tmpoffset);
2396 +               }
2397 +       } else {
2398 +               /* This block is exact fit */
2399 +               if (tmpoffset != PAGE_SIZE)
2400 +                       clear_flag(tmpblock, PREV_FREE);
2401 +       }
2402 +
2403 +       block->size = origsize;
2404 +       clear_flag(block, BLOCK_FREE);
2405 +
2406 +       put_ptr_atomic(block, KM_USER0);
2407 +       spin_unlock(&pool->lock);
2408 +
2409 +       *offset += XV_ALIGN;
2410 +
2411 +       return 0;
2412 +}
2413 +
2414 +/*
2415 + * Free block identified with <page, offset>
2416 + */
2417 +void xv_free(struct xv_pool *pool, struct page *page, u32 offset)
2418 +{
2419 +       void *page_start;
2420 +       struct block_header *block, *tmpblock;
2421 +
2422 +       offset -= XV_ALIGN;
2423 +
2424 +       spin_lock(&pool->lock);
2425 +
2426 +       page_start = get_ptr_atomic(page, 0, KM_USER0);
2427 +       block = (struct block_header *)((char *)page_start + offset);
2428 +
2429 +       /* Catch double free bugs */
2430 +       BUG_ON(test_flag(block, BLOCK_FREE));
2431 +
2432 +       block->size = ALIGN(block->size, XV_ALIGN);
2433 +
2434 +       tmpblock = BLOCK_NEXT(block);
2435 +       if (offset + block->size + XV_ALIGN == PAGE_SIZE)
2436 +               tmpblock = NULL;
2437 +
2438 +       /* Merge next block if its free */
2439 +       if (tmpblock && test_flag(tmpblock, BLOCK_FREE)) {
2440 +               /*
2441 +                * Blocks smaller than XV_MIN_ALLOC_SIZE
2442 +                * are not inserted in any free list.
2443 +                */
2444 +               if (tmpblock->size >= XV_MIN_ALLOC_SIZE) {
2445 +                       remove_block(pool, page,
2446 +                                   offset + block->size + XV_ALIGN, tmpblock,
2447 +                                   get_index_for_insert(tmpblock->size));
2448 +               }
2449 +               block->size += tmpblock->size + XV_ALIGN;
2450 +       }
2451 +
2452 +       /* Merge previous block if its free */
2453 +       if (test_flag(block, PREV_FREE)) {
2454 +               tmpblock = (struct block_header *)((char *)(page_start) +
2455 +                                               get_blockprev(block));
2456 +               offset = offset - tmpblock->size - XV_ALIGN;
2457 +
2458 +               if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
2459 +                       remove_block(pool, page, offset, tmpblock,
2460 +                                   get_index_for_insert(tmpblock->size));
2461 +
2462 +               tmpblock->size += block->size + XV_ALIGN;
2463 +               block = tmpblock;
2464 +       }
2465 +
2466 +       /* No used objects in this page. Free it. */
2467 +       if (block->size == PAGE_SIZE - XV_ALIGN) {
2468 +               put_ptr_atomic(page_start, KM_USER0);
2469 +               spin_unlock(&pool->lock);
2470 +
2471 +               __free_page(page);
2472 +               stat_dec(&pool->total_pages);
2473 +               return;
2474 +       }
2475 +
2476 +       set_flag(block, BLOCK_FREE);
2477 +       if (block->size >= XV_MIN_ALLOC_SIZE)
2478 +               insert_block(pool, page, offset, block);
2479 +
2480 +       if (offset + block->size + XV_ALIGN != PAGE_SIZE) {
2481 +               tmpblock = BLOCK_NEXT(block);
2482 +               set_flag(tmpblock, PREV_FREE);
2483 +               set_blockprev(tmpblock, offset);
2484 +       }
2485 +
2486 +       put_ptr_atomic(page_start, KM_USER0);
2487 +       spin_unlock(&pool->lock);
2488 +}
2489 +
2490 +u32 xv_get_object_size(void *obj)
2491 +{
2492 +       struct block_header *blk;
2493 +
2494 +       blk = (struct block_header *)((char *)(obj) - XV_ALIGN);
2495 +       return blk->size;
2496 +}
2497 +
2498 +/*
2499 + * Returns total memory used by allocator (userdata + metadata)
2500 + */
2501 +u64 xv_get_total_size_bytes(struct xv_pool *pool)
2502 +{
2503 +       return pool->total_pages << PAGE_SHIFT;
2504 +}
2505 Index: kernel-power-2.6.28/drivers/block/ramzswap/xvmalloc.h
2506 ===================================================================
2507 --- /dev/null
2508 +++ kernel-power-2.6.28/drivers/block/ramzswap/xvmalloc.h
2509 @@ -0,0 +1,30 @@
2510 +/*
2511 + * xvmalloc memory allocator
2512 + *
2513 + * Copyright (C) 2008, 2009, 2010  Nitin Gupta
2514 + *
2515 + * This code is released using a dual license strategy: BSD/GPL
2516 + * You can choose the licence that better fits your requirements.
2517 + *
2518 + * Released under the terms of 3-clause BSD License
2519 + * Released under the terms of GNU General Public License Version 2.0
2520 + */
2521 +
2522 +#ifndef _XV_MALLOC_H_
2523 +#define _XV_MALLOC_H_
2524 +
2525 +#include <linux/types.h>
2526 +
2527 +struct xv_pool;
2528 +
2529 +struct xv_pool *xv_create_pool(void);
2530 +void xv_destroy_pool(struct xv_pool *pool);
2531 +
2532 +int xv_malloc(struct xv_pool *pool, u32 size, struct page **page,
2533 +                       u32 *offset, gfp_t flags);
2534 +void xv_free(struct xv_pool *pool, struct page *page, u32 offset);
2535 +
2536 +u32 xv_get_object_size(void *obj);
2537 +u64 xv_get_total_size_bytes(struct xv_pool *pool);
2538 +
2539 +#endif
2540 Index: kernel-power-2.6.28/drivers/block/ramzswap/xvmalloc_int.h
2541 ===================================================================
2542 --- /dev/null
2543 +++ kernel-power-2.6.28/drivers/block/ramzswap/xvmalloc_int.h
2544 @@ -0,0 +1,86 @@
2545 +/*
2546 + * xvmalloc memory allocator
2547 + *
2548 + * Copyright (C) 2008, 2009, 2010  Nitin Gupta
2549 + *
2550 + * This code is released using a dual license strategy: BSD/GPL
2551 + * You can choose the licence that better fits your requirements.
2552 + *
2553 + * Released under the terms of 3-clause BSD License
2554 + * Released under the terms of GNU General Public License Version 2.0
2555 + */
2556 +
2557 +#ifndef _XV_MALLOC_INT_H_
2558 +#define _XV_MALLOC_INT_H_
2559 +
2560 +#include <linux/kernel.h>
2561 +#include <linux/types.h>
2562 +
2563 +/* User configurable params */
2564 +
2565 +/* Must be power of two */
2566 +#define XV_ALIGN_SHIFT 2
2567 +#define XV_ALIGN       (1 << XV_ALIGN_SHIFT)
2568 +#define XV_ALIGN_MASK  (XV_ALIGN - 1)
2569 +
2570 +/* This must be greater than sizeof(link_free) */
2571 +#define XV_MIN_ALLOC_SIZE      32
2572 +#define XV_MAX_ALLOC_SIZE      (PAGE_SIZE - XV_ALIGN)
2573 +
2574 +/* Free lists are separated by FL_DELTA bytes */
2575 +#define FL_DELTA_SHIFT 3
2576 +#define FL_DELTA       (1 << FL_DELTA_SHIFT)
2577 +#define FL_DELTA_MASK  (FL_DELTA - 1)
2578 +#define NUM_FREE_LISTS ((XV_MAX_ALLOC_SIZE - XV_MIN_ALLOC_SIZE) \
2579 +                               / FL_DELTA + 1)
2580 +
2581 +#define MAX_FLI                DIV_ROUND_UP(NUM_FREE_LISTS, BITS_PER_LONG)
2582 +
2583 +/* End of user params */
2584 +
2585 +enum blockflags {
2586 +       BLOCK_FREE,
2587 +       PREV_FREE,
2588 +       __NR_BLOCKFLAGS,
2589 +};
2590 +
2591 +#define FLAGS_MASK     XV_ALIGN_MASK
2592 +#define PREV_MASK      (~FLAGS_MASK)
2593 +
2594 +struct freelist_entry {
2595 +       struct page *page;
2596 +       u16 offset;
2597 +       u16 pad;
2598 +};
2599 +
2600 +struct link_free {
2601 +       struct page *prev_page;
2602 +       struct page *next_page;
2603 +       u16 prev_offset;
2604 +       u16 next_offset;
2605 +};
2606 +
2607 +struct block_header {
2608 +       union {
2609 +               /* This common header must be XV_ALIGN bytes */
2610 +               u8 common[XV_ALIGN];
2611 +               struct {
2612 +                       u16 size;
2613 +                       u16 prev;
2614 +               };
2615 +       };
2616 +       struct link_free link;
2617 +};
2618 +
2619 +struct xv_pool {
2620 +       ulong flbitmap;
2621 +       ulong slbitmap[MAX_FLI];
2622 +       spinlock_t lock;
2623 +
2624 +       struct freelist_entry freelist[NUM_FREE_LISTS];
2625 +
2626 +       /* stats */
2627 +       u64 total_pages;
2628 +};
2629 +
2630 +#endif