2 files changed, 83 insertions, 70 deletions
diff --git a/Docs/source-object-backing-store b/Docs/source-object-backing-store
index e55a99db3..5d4d3049d 100644
--- a/Docs/source-object-backing-store
+++ b/Docs/source-object-backing-store
@@ -34,8 +34,8 @@ As the backing store only holds cache data one should not expect a
 great deal of effort to be expended converting formats (i.e. the cache
 may simply be discarded).
 
-Layout version 1
-----------------
+Layout version 1.1
+------------------
 
 An object has an identifier value generated from the url (NetSurf
 backing stores uses the url as the unique key). The value used is
@@ -54,7 +54,7 @@ overhead of reconstructing it at initialisation and to keep the data
 used to improve the eviction decisions.
 
 Each object is stored and retrived directly into the filesystem using
-a filename generated from a base64url encoding of an address
+a filename generated from a RFC4648 base32 encoding of an address
 value. The objects address is derived from the identifier by cropping
 it to a shorter length.
 
@@ -71,17 +71,27 @@ E.g. Linux based systems can easily cope with several megabytes of
 mmaped index but RISC OS might want to limit this to a few megabytes
 of heap at most.
 
-The files are stored on disc using their base64url address value.
+The files are stored on disc using their base32 address value.
 By creating a directory for each character of the encoded filename
 (except the last which is of course the leafname) we create a
-directory structure where no directory has more than 64 entries.
+directory structure where no directory has more than 32 entries.
 
-E.g. A 19bit address of 0x1 would be base64url encoded into AAAB
+E.g. A 19bit address of 0x1 would be base32 encoded into AAAB
 resulting in the data being stored in a file path of
-"/store/prefix/data/B/A/A/BAAAAA".
+"/store/prefix/d/B/A/A/BAAAAA".
 
 An address of 0x00040001 encodes to BAAB and a file path of
-"/store/prefix/meta/B/A/A/BAABAA"
+"/store/prefix/m/B/A/A/BAABAAA"
+
+Version 1.0
+-----------
+
+The version 1 layout was identical to the 1.1 except base64url
+encoding was used, this proved problematic as some systems filesystems
+were case insensitive so upper and lower case letetrs collided.
+
+There is no upgrade provision from the previous version simply delete
+the cache directory.
 
 Control files
 ~~~~~~~~~~~~~
@@ -99,7 +109,7 @@ filesystem.
 
 Each control file table entry is 28 bytes and consists of
 
- - signed 64 but value for last use time
+ - signed 64 bit value for last use time
 
  - 32bit full url hash allowing for index reconstruction and
    addiitonal collision detection. Also the possibility of increasing
diff --git a/content/fs_backing_store.c b/content/fs_backing_store.c
index d29fcaac7..fde17ed62 100644
--- a/content/fs_backing_store.c
+++ b/content/fs_backing_store.c
@@ -54,7 +54,7 @@
 #define DEFAULT_ENTRY_SIZE 16
 
 /** Backing store file format version */
-#define CONTROL_VERSION 100
+#define CONTROL_VERSION 110
 
 /** Get address from ident */
 #define BS_ADDRESS(ident, state) ((ident) & ((1 << state->ident_bits) - 1))
@@ -215,6 +215,26 @@ remove_store_entry(struct store_state *state,
 /**
  * Generate a filename for an object.
  *
+ * this generates the filename for an object on disc. It is necessary
+ * for this to generate a filename which conforms to the limitations
+ * of all the filesystems the cache can be placed upon.
+ *
+ * From http://en.wikipedia.org/wiki/Comparison_of_file_systems#Limits
+ * the relevant subset is:
+ *  - path elements no longer than 8 characters
+ *  - acceptable characters are A-Z, 0-9
+ *  - short total path lengths (255 or less)
+ *
+ * The short total path lengths mean the encoding must represent as
+ * much data as possible in the least number of characters.
+ *
+ * To achieve all these goals we use RFC4648 base32 encoding which packs
+ * 5bits into each character of the filename.
+ *
+ * @note Version 1.00 of the cache implementation used base64 to
+ * encode this, however that did not meet the requirement for only
+ * using uppercase characters.
+ *
  * @param state The store state to use.
  * @param ident The identifier to use.
  * @return The filename string or NULL on allocation error.
@@ -225,96 +245,79 @@ store_fname(struct store_state *state,
 	    enum backing_store_flags flags)
 {
 	char *fname = NULL;
-	uint8_t b64u_i[7]; /* base64 ident */
-	uint8_t b64u_d[6][2]; /* base64 ident as separate components */
+	uint8_t b32u_i[8]; /* base32 encoded ident */
+	uint8_t b32u_d[6][2]; /* base64 ident as separate components */
 	const char *dat;
 
-	/** Base64url encoding table */
+	/* RFC4648 base32 encoding table */
 	static const uint8_t encoding_table[] = {
 		'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 		'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 		'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
-		'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
-		'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
-		'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
-		'w', 'x', 'y', 'z', '0', '1', '2', '3',
-		'4', '5', '6', '7', '8', '9', '-', '_'
+		'Y', 'Z', '2', '3', '4', '5', '6', '7'
 	};
 
-	/* base64 encode ident */
-	b64u_i[0] = b64u_d[0][0] = encoding_table[(ident      ) & 0x3f];
-	b64u_i[1] = b64u_d[1][0] = encoding_table[(ident >>  6) & 0x3f];
-	b64u_i[2] = b64u_d[2][0] = encoding_table[(ident >> 12) & 0x3f];
-	b64u_i[3] = b64u_d[3][0] = encoding_table[(ident >> 18) & 0x3f];
-	b64u_i[4] = b64u_d[4][0] = encoding_table[(ident >> 24) & 0x3f];
-	b64u_i[5] = b64u_d[5][0] = encoding_table[(ident >> 30) & 0x3f];
+	/* base32 encode ident */
+	b32u_i[0] = b32u_d[0][0] = encoding_table[(ident      ) & 0x1f];
+	b32u_i[1] = b32u_d[1][0] = encoding_table[(ident >>  5) & 0x1f];
+	b32u_i[2] = b32u_d[2][0] = encoding_table[(ident >> 10) & 0x1f];
+	b32u_i[3] = b32u_d[3][0] = encoding_table[(ident >> 15) & 0x1f];
+	b32u_i[4] = b32u_d[4][0] = encoding_table[(ident >> 20) & 0x1f];
+	b32u_i[5] = b32u_d[5][0] = encoding_table[(ident >> 25) & 0x1f];
+	b32u_i[6] = encoding_table[(ident >> 30) & 0x1f];
 	/* null terminate strings */
-	b64u_i[6] = b64u_d[0][1] = b64u_d[1][1] = b64u_d[2][1] =
-		b64u_d[3][1] = b64u_d[4][1] = b64u_d[5][1] = 0;
+	b32u_i[7] = b32u_d[0][1] = b32u_d[1][1] = b32u_d[2][1] =
+		b32u_d[3][1] = b32u_d[4][1] = b32u_d[5][1] = 0;
 
 	if ((flags & BACKING_STORE_META) != 0) {
-		dat = "meta";
+		dat = "m"; /* metadata */
 	} else {
-		dat = "data";
+		dat = "d"; /* data */
 	}
 
-	/* number of chars with usefully encoded data in b64 */
-	switch(((state->ident_bits + 5) / 6)) {
+	/* number of chars with usefully encoded data in base 32 */
+	switch(((state->ident_bits + 4) / 5)) {
 	case 1:
-		netsurf_mkpath(&fname, NULL, 3,
-			       state->path,
-			       dat,
-			       b64u_i);
+		netsurf_mkpath(&fname, NULL, 3, state->path, dat,
+			       b32u_i);
 		break;
 
 	case 2:
-		netsurf_mkpath(&fname, NULL, 4,
-			       state->path,
-			       dat,
-			       b64u_d[0],
-			       b64u_i);
+		netsurf_mkpath(&fname, NULL, 4, state->path, dat,
+			       b32u_d[0],
+			       b32u_i);
 		break;
 
 	case 3:
-		netsurf_mkpath(&fname, NULL, 5,
-			       state->path,
-			       dat,
-			       b64u_d[0],
-			       b64u_d[1],
-			       b64u_i);
+		netsurf_mkpath(&fname, NULL, 5, state->path, dat,
+			       b32u_d[0], b32u_d[1],
+			       b32u_i);
 		break;
 
 	case 4:
-		netsurf_mkpath(&fname, NULL, 6,
-			       state->path,
-			       dat,
-			       b64u_d[0],
-			       b64u_d[1],
-			       b64u_d[2],
-			       b64u_i);
+		netsurf_mkpath(&fname, NULL, 6, state->path, dat,
+			       b32u_d[0], b32u_d[1], b32u_d[2],
+			       b32u_i);
 		break;
 
 	case 5:
-		netsurf_mkpath(&fname, NULL, 7,
-			       state->path,
-			       dat,
-			       b64u_d[0],
-			       b64u_d[1],
-			       b64u_d[2],
-			       b64u_d[3],
-			       b64u_i);
+		netsurf_mkpath(&fname, NULL, 7, state->path, dat,
+			       b32u_d[0], b32u_d[1], b32u_d[2], b32u_d[3],
+			       b32u_i);
 		break;
 
 	case 6:
-		netsurf_mkpath(&fname, NULL, 8,
-			       state->path,
-			       dat,
-			       b64u_d[0],
-			       b64u_d[1],
-			       b64u_d[2],
-			       b64u_d[3],
-			       b64u_d[4],
-			       b64u_i);
+		netsurf_mkpath(&fname, NULL, 8, state->path, dat,
+			       b32u_d[0], b32u_d[1], b32u_d[2], b32u_d[3],
+			       b32u_d[4],
+			       b32u_i);
+		break;
+
+	case 7:
+		netsurf_mkpath(&fname, NULL, 9, state->path, dat,
+			       b32u_d[0], b32u_d[1], b32u_d[2], b32u_d[3],
+			       b32u_d[4], b32u_d[5],
+			       b32u_i);
 		break;
 
 	default: