1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
|
; This source code in this file is licensed to You by Castle Technology
; Limited ("Castle") and its licensors on contractual terms and conditions
; ("Licence") which entitle you freely to modify and/or to distribute this
; source code subject to Your compliance with the terms of the Licence.
;
; This source code has been made available to You without any warranties
; whatsoever. Consequently, Your use, modification and distribution of this
; source code is entirely at Your own risk and neither Castle, its licensors
; nor any other person who has contributed to this source code shall be
; liable to You for any loss or damage which You may suffer as a result of
; Your use, modification or distribution of this source code.
;
; Full details of Your rights and obligations are set out in the Licence.
; You should have received a copy of the Licence with this source code file.
; If You have not received a copy, the text of the Licence is available
; online at www.castle-technology.co.uk/riscosbaselicence.htm
;
;
; s.UnSqueeze by RCC 25-Aug-87
; This is a bit of code to be included in self-decompressing images to
; expand the image in place. See elsewhere for details of the compression
; algorithm.
;
; ***********************************
; *** C h a n g e L i s t ***
; ***********************************
; Date Name Description
; ---- ---- -----------
; 13-Feb-90 TDobson Minor optimisation which saves 1 instruction for
; every output word that isn't a "short" or a "long".
AREA |M2$$Data|, DATA
; EXPORT |UnSqueeze_C$|
; EXPORT |UnSqueeze_CS$|
EXPORT |UnSqueeze_UnSqueezeBase|
EXPORT |UnSqueeze_UnSqueezeLimit|
; EXPORT |UnSqueeze_D$|
; EXPORT |UnSqueeze_FindUnSqueezeCode|
; IMPORT |SYSTEM.STKOVF|
; IMPORT |SYSTEM.RAISE|
|UnSqueeze_D$|
% 4
AREA |M2$$Code|, CODE, READONLY
R0 RN 0
R1 RN 1
R2 RN 2
R3 RN 3
R4 RN 4
R5 RN 5
R6 RN 6
R7 RN 7
R8 RN 8
R9 RN 9
R10 RN 10
R11 RN 11
R12 RN 12
R13 RN 13
LR RN 14
PC RN 15
|UnSqueeze_CS$| EQU 40
|UnSqueeze_C$|
StackSize * 64
decodedSize * 0
encodedSize * 4
tableSize * 8
nShorts * 12
nLongs * 16
sizeToMove * 20
GBLL expand_memcheck
expand_memcheck SETL {TRUE}
[ expand_memcheck
; GET hdr:ListOpts
; GET hdr:Macros
; GET hdr:System
; GET hdr:MsgTrans
OS_GetEnv EQU &10
OS_GenerateError EQU &2b
XOS_SynchroniseCodeAreas EQU &2006e
XMessageTrans_ErrorLookup EQU &61506
]
; Constants defining partition of nibble value space: these must match
; corresponding values in mod.squeeze.
NibsLong * 7
NibsShort * (14-NibsLong)
MinShort * (2+NibsLong)
MinLong * 2
; Code between UnSqueezeBase and UnSqueezeLimit will be copied into
; the start of a squeezed image, and when the image is loaded it will
; jump to the start.
; Before start of unsqueeze code there will be 6 words to tell
; it where the data is, how big it is etc.
|UnsqueezeDataBlock|
NOP
NOP
NOP
NOP
NOP
NOP
|UnSqueeze_UnSqueezeBase|
|UnsqueezeAIFImage|
; If it was an AIF image, we enter here and overwrite the BL decompress
; xpand relies on the first instruction here being MOV r0, #<imm>
MOV R0, #&E1000000 ; hex for instruction MOV r0, r0
ORR R0, R0, #&00A00000
SUB R1, LR, PC ; mode independent status bit removal
ADD R1, PC, R1 ; R1 = LR (- any PSR bits if there) + 4
STR R0, [R1, #-8]! ; overwrite the instruction we just BL'ed from
|UnsqueezeADFSImage|
[ UnsqueezeADFSImage - UnsqueezeAIFImage <> 5*4
! 1, "Change AIFPRELUDE in squeeze.h"
]
; We arrive here knowing very little about anything.
; First find out where we are, and where the tables start.
ADR R0, |UnsqueezeDataBlock| ; R0 points to data (PC-relative)
LDMIA R0, {R8-R13} ; load all the data
; R13 := sizeToMove
; R12 := nLongs
; R11 := nShorts
SUB R10, R0, R10 ; R10 := base of encoded tables
SUB R9, R10, R9 ; R9 := base of encoded data
ADD R8, R9, R8 ; R8 := top of decoded image
; We only need nLongs and nShorts while we are decoding the tables.
; Afterwards we will re-use the registers for pointers to start
; of tables.
; SWI &10 ; GetEnv - returns RAM limit in R1
; SUB R6, R1, #&4000 ; grab 16K workspace, remember table base
ADR R6, |UnSqueeze_UnSqueezeLimit|+24 ; top of squeezed image
CMP R6, R8 ; find highest of top of squeezed and unsqueezed image
MOVLO R6, R8 ; use SWI if you prefer... (UNIX ?)
; Allocate space for tables
ADD R1, R11, R12 ; nLongs + nShorts
ADD R7, R6, R1, LSL #2 ; curFree += (nLongs + nShorts) * 4;
[ expand_memcheck
SWI OS_GetEnv ; returns RAM limit in R1
; R7 points to end of tables; add space required for copied-up
; decode routine.
ADD R2,R7, #|UnSqueeze_UnSqueezeLimit| + 24 - decodeImage
; if PC < the RAM limit, and R2 > the RAM limit, we're in trouble
; (if PC > the RAM limit, we assume we're not in the application slot)
CMP PC,R1
CMPLO R1,R2
BLO expand_would_overwrite
]
MOV R5, R10 ; R5 is ptr into encoded tables
MOV R4, #0 ; this is the first table el
decodeTab
; Require: R11 -- no of els left to decode
; R6 -- ptr into decoded table
; R5 -- ptr into encoding
; R4 -- = 0 iff this is the shorts table (i.e. 4-byte vals)
; I believe this loop could be made good deal smaller and possibly
; faster, but it's only a couple of hundred bytes and it works.
MOV R2, R6 ; stash away base of first table
MOV R3, #-1 ; start as if previous entry was -1
decodeEntry
SUBS R11, R11, #1 ; while (--nEntries >= 0) {
BLT decodedTab ; assert: previous word is in R3
LDRB R1, [R5], #1 ; byte = *p++
SUBS R0, R1, #10
BGE greaterThan9
literalOrOnes
CMPS R1, #0
BNE ones
literal
LDRB R0, [R5], #1
LDRB R1, [R5], #1
ORR R0, R0, R1, LSL #8
LDRB R1, [R5], #1
ORR R0, R0, R1, LSL #16
CMPS R4, #0 ; in the 4-byte (short encodings) table?
LDREQB R1, [R5], #1 ; yes, so include the 4th byte
ORREQ R0, R0, R1, LSL #24 ; in the resultant word
ADD R3, R3, R0
STR R3, [R6], #4
B decodeEntry
ones
SUB R11, R11, R1
ADD R11, R11, #1
anotherOne ; Have number of increment-by-ones in R1
ADD R3, R3, #1
STR R3, [R6], #4
SUBS R1, R1, #1
BGT anotherOne
B decodeEntry
greaterThan9
CMPS R1, #92
ADDLT R3, R3, R0
STRLT R3, [R6], #4
BLT decodeEntry
greaterThan91
SUBS R0, R1, #174
BLT oneMore
twoMore
LDRB R1, [R5], #1
ORR R0, R1, R0, LSL #16
LDRB R1, [R5], #1
ORR R0, R0, R1, LSL #8
ADD R3, R3, R0
STR R3, [R6], #4
B decodeEntry
oneMore
SUBS R0, R1, #92
LDRB R1, [R5], #1
ORR R0, R1, R0, LSL #8
ADD R3, R3, R0
STR R3, [R6], #4
B decodeEntry ; } /* end while (--nEntries >= 0) { */
decodedTab
CMPS R4, #0 ; if isShorts then
BNE finishLongs ; else finishLongs
finishShorts
MOV R11, R12 ; no of els to decode = nLongs
MOV R12, R2 ; R12 = &shorts[0]
MOV R2, R6 ; stash away start of longs table
MOV R4, #1 ; next table is longs
B decodeTab
[ {TRUE}
; ROL has adopted a policy in their fork of the OS of not allowing
; compressed binaries to run unless their version of UnsqueezeAIF
; recognises characteristic instruction sequences within the
; application's unsqueeze code, which it knows how to patch up and
; run during Service_UKCompression 0. The justification for this is
; apparently that they didn't like the fact that the binary is
; still compressed during Service_UKCompression 1 if UnsqueezeAIF
; drops back to letting the application unsqueeze code run itself
; when normal execution at &8000 starts. So, basically they're
; saying: if this is an application which UnsqueezeAIF doesn't
; currently know for a fact to handle its own cache coherency, then
; on the off-chance that one day in the future it might be
; desirable to be able to patch the application using AppPatcher -
; and yet it would for some reason be impractical to update
; UnsqueezeAIF at *that* point to recognise these cases (!?!) -
; then the OS should already refuse to run the application!
;
; On the other hand, there is a real downside, in that this policy
; hinders the development of alternative compression schemes, or as
; happened in squeeze 5.09, the fixing of certain bugs that have
; a demonstrable effect on real hardware.
;
; Since ROL has failed for nearly 7 years now to adapt their OS
; to cope with squeeze 5.09, we don't have much option but to try
; to work around it. By inserting a pre-StrongARM code sequence
; here (where it will never be executed, but after the start of
; the unsqueeze code where UnsqueezeAIF starts looking for it), we
; can trick UnsqueezeAIF into thinking it recognises us and trusts
; us to be run.
FakeUnsqSignature
LDMIA R5!,{R0-R3}
STMIA R7!,{R0-R3}
CMP R5,R6
BLT FakeUnsqSignature
MOV PC,R4
]
finishLongs
MOV R11, R2 ; R11 = &longs[0]
decodedBothTabs
; Now have: R13 = sizeToMove
; R12 = &shorts[0]
; R11 = &longs[0]
; R10 = top of encoded data
; R9 = base of encoded data
; R8 = top of decoded data
; R7 = curFree - base of unused memory
; R0..R6 unused
moveRestOfCode
; Decompression is going to scribble on us here, so copy the
; rest of the code up into free space.
ADR R5, decodeImage
ADR R6, |UnSqueeze_UnSqueezeLimit|+24 ; allow for branch to exec addr
MOV R4, R7 ; we will jump to R4
; The following code is what is recognised by UnSqzAIF to interfere
; in the decompression (to do OS_SynchroniseCodeAreas). Changing it
; will stop it interfering.
moveCode
LDMIA R5!, {R0-R3}
STMIA R7!, {R0-R3} ; NB this updates free space pointer as we go
CMPS R5, R6
BLT moveCode
MOV R1, R4 ; this instruction causes a non-match in UnSqzAIF
ADD R2, R1, #|UnSqueeze_UnSqueezeLimit|+28-decodeImage
MOV R0, #1
SWI XOS_SynchroniseCodeAreas ; we've written some code.
MOV PC, R4 ; jump to the new copy of the rest of the code
[ expand_memcheck
; If we were to let the expansion occur, either a data abort would
; occur, or we would overwrite our parent application.
expand_would_overwrite
ADR R0, error_block - 6 * 4
LDMIB R0!, {R1,R2,R4-R7}
SWI XMessageTrans_ErrorLookup
LDR R1,[R0]
TEQ R1, #0
ADRNE R0, error_block_failed
SWI OS_GenerateError
DCD 0, 0, 0, 0, 0
error_block
DCD 0
DCB "NoMem", 0
ALIGN
error_block_failed
DCD 0
DCB "Not enough memory", 0
ALIGN
]
decodeImage
; The code from here on gets executed only after it is copied
; elsewhere. This is confusing, but necessary.
; Most of the data gets decoded in place, but we have to go round twice
; just in case we have to copy some data elsewhere, so first
; time round we use a higher R9 (bottom of encoded data).
ADD R9, R9, R13 ; base = base + sizeToMove
; top of encoded data in R10
; base of encoded data in R9
; top of decoded data in R8
; ptr to shorts in R12
; ptr to longs in R11
; R0..R6 are free for workspace
; For the moment, we want to overwrite the first word of the image,
; I think this is just a kludge...
SUB R8, R8, #4
decodePair
CMPS R10, R9 ; Have we reached the base ?
BLE doneDecode
LDRB R6, [R10, #-1]! ; byte value
; The words will be put in R4 and R5, to be STMDB'd
AND R3, R6, #15 ; first nibble
SUBS R0, R3, #MinShort ; idx = (val - 8)
BLT notshort0
short0
LDRB R1, [R10, #-1]!
ORR R0, R1, R0, LSL #8
LDR R4, [R12, R0, LSL #2] ; w = shorts[(nibble-8)<<8 | *p--]
B gotFirst
notshort0
SUBS R0, R3, #MinLong ; idx = (val - 2)
BLT notlong0
long0
LDRB R1, [R10, #-1]!
ORR R0, R1, R0, LSL #8
LDR R0, [R11, R0, LSL #2] ; w = longs[(nibble-2)<<8 | *p--]
LDRB R1, [R10, #-1]!
ORR R4, R1, R0, LSL #8
B gotFirst
notlong0
MOVS R4, R3 ; TMD 13-Feb-90: combine 2 instructions here
; used to be CMPS R3,#0; MOVEQ R4,R3
BEQ gotFirst
literal0
LDRB R0, [R10, #-1]!
LDRB R1, [R10, #-1]!
ORR R0, R0, R1, LSL #8
LDRB R1, [R10, #-1]!
ORR R0, R0, R1, LSL #16
LDRB R1, [R10, #-1]!
ORR R4, R0, R1, LSL #24
gotFirst
; Phew! We have the first word of the pair (in R4), now we have
; to do (almost) the same again, result in R5, and STMDB.
MOV R3, R6, LSR #4 ; second nibble
SUBS R0, R3, #MinShort ; idx = (val - 8)
BLT notshort1
short1
LDRB R1, [R10, #-1]!
ORR R0, R1, R0, LSL #8
LDR R5, [R12, R0, LSL #2] ; w = shorts[(nibble-8)<<8 | *p--]
STMDB R8!, {R4,R5}
B decodePair
notshort1
SUBS R0, R3, #MinLong ; idx = (val - 2)
BLT notlong1
long1
LDRB R1, [R10, #-1]!
ORR R0, R1, R0, LSL #8
LDR R0, [R11, R0, LSL #2] ; w = longs[(nibble-2)<<8 | *p--]
LDRB R1, [R10, #-1]!
ORR R5, R1, R0, LSL #8
STMDB R8!, {R4,R5}
B decodePair
notlong1
MOVS R5, R3 ; TMD 13-Feb-90: combine 2 instructions here
; used to be CMPS R3,#0; MOVEQ R5,R3
; This doesn't pay off much
STMEQDB R8!, {R4,R5} ; might be better to swap round
BEQ decodePair ; literal and zero, to save 3S on
literal1 ; the longer path ?
LDRB R0, [R10, #-1]!
LDRB R1, [R10, #-1]! ; If I had the right byte-sex and
ORR R0, R0, R1, LSL #8 ; a couple of registers to spare,
LDRB R1, [R10, #-1]! ; could do this in 15S instead of 22S
ORR R0, R0, R1, LSL #16 ; using the load non-aligned word code
LDRB R1, [R10, #-1]! ; given in ARM CPU Manual.
ORR R5, R0, R1, LSL #24
STMDB R8!, {R4,R5}
B decodePair
doneDecode
CMPS R13, #0 ; Any data need to be copied elsewhere ?
BLE runImage ; No -- just run the image
SUB R6, R9, R13 ; R6 points to base of encoded data
MOV R9, R7 ; R9 points to base of copied data
ADD R10, R9, R13 ; R10 is current pointer into copied data
moveEncoded
LDMIA R6!, {R0-R3}
STMIA R7!, {R0-R3}
SUBS R13, R13, #16
BGT moveEncoded
B decodePair ; Carry on decoding
; Now R8 should be a pointer to the first word of the decoded image,
; so lets cross our fingers and jump to it...
runImage
ADR r2, decodeImage-4
MOV R0, #1
; [up to 3 SUB instructions here] R8 adjusted to point back to AIF header
; SUB R1, R8, #4
; SWI XOS_SynchroniseCodeAreas
; MOV PC, R8
|UnSqueeze_UnSqueezeLimit|
; Now the bit of code that actually runs in the image compression program:
; this just tells it where the decompression code lives. Are you confused ?
; Entry point to PROCEDURE FindUnSqueezeCode
; Parameters: base: [FP,#-20]/R0 limit: [FP,#-16]/R1
;|UnSqueeze_FindUnSqueezeCode|
; ADR R2, |UnSqueeze_UnSqueezeBase|
; STR R2, [R0]
; ADR R2, |UnSqueeze_UnSqueezeLimit|
; STR R2, [R1]
; MOV PC, LR
END
|