unsqueeze.s


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468

; This source code in this file is licensed to You by Castle Technology
; Limited ("Castle") and its licensors on contractual terms and conditions
; ("Licence") which entitle you freely to modify and/or to distribute this
; source code subject to Your compliance with the terms of the Licence.
; 
; This source code has been made available to You without any warranties
; whatsoever. Consequently, Your use, modification and distribution of this
; source code is entirely at Your own risk and neither Castle, its licensors
; nor any other person who has contributed to this source code shall be
; liable to You for any loss or damage which You may suffer as a result of
; Your use, modification or distribution of this source code.
; 
; Full details of Your rights and obligations are set out in the Licence.
; You should have received a copy of the Licence with this source code file.
; If You have not received a copy, the text of the Licence is available
; online at www.castle-technology.co.uk/riscosbaselicence.htm
; 
;
; s.UnSqueeze by RCC 25-Aug-87
; This is a bit of code to be included in self-decompressing images to
; expand the image in place.  See elsewhere for details of the compression
; algorithm.
;
; ***********************************
; ***    C h a n g e   L i s t    ***
; ***********************************

; Date       Name       Description
; ----       ----       -----------
; 13-Feb-90  TDobson    Minor optimisation which saves 1 instruction for
;                       every output word that isn't a "short" or a "long".

        AREA |M2$$Data|, DATA
;        EXPORT |UnSqueeze_C$|
;        EXPORT |UnSqueeze_CS$|
        EXPORT |UnSqueeze_UnSqueezeBase|
        EXPORT |UnSqueeze_UnSqueezeLimit|
;        EXPORT |UnSqueeze_D$|
;        EXPORT |UnSqueeze_FindUnSqueezeCode|
;        IMPORT  |SYSTEM.STKOVF|
;        IMPORT  |SYSTEM.RAISE|
|UnSqueeze_D$|
        %       4
        AREA |M2$$Code|, CODE, READONLY

R0 RN 0
R1 RN 1
R2 RN 2
R3 RN 3
R4 RN 4
R5 RN 5
R6 RN 6
R7 RN 7
R8 RN 8
R9 RN 9
R10 RN 10
R11 RN 11
R12 RN 12
R13 RN 13
LR RN 14
PC RN 15

|UnSqueeze_CS$| EQU     40
|UnSqueeze_C$|

StackSize   * 64
decodedSize * 0
encodedSize * 4
tableSize   * 8
nShorts     * 12
nLongs      * 16
sizeToMove  * 20

	GBLL	expand_memcheck
expand_memcheck	SETL	{TRUE}

	[ expand_memcheck
;	GET	hdr:ListOpts
;	GET	hdr:Macros
;	GET	hdr:System
;	GET	hdr:MsgTrans
OS_GetEnv             EQU &10
OS_GenerateError      EQU &2b
XOS_SynchroniseCodeAreas EQU &2006e
XMessageTrans_ErrorLookup EQU &61506
	]

; Constants defining partition of nibble value space: these must match
; corresponding values in mod.squeeze.

NibsLong    * 7
NibsShort   * (14-NibsLong)
MinShort    * (2+NibsLong)
MinLong     * 2

; Code between UnSqueezeBase and UnSqueezeLimit will be copied into
; the start of a squeezed image, and when the image is loaded it will
; jump to the start.

; Before start of unsqueeze code there will be 6 words to tell
; it where the data is, how big it is etc.

|UnsqueezeDataBlock|
        NOP
        NOP
        NOP
        NOP
        NOP
        NOP

|UnSqueeze_UnSqueezeBase|
|UnsqueezeAIFImage|
        ; If it was an AIF image, we enter here and overwrite the BL decompress
        ; xpand relies on the first instruction here being MOV r0, #<imm>
        MOV   R0, #&E1000000 ; hex for instruction MOV r0, r0
        ORR   R0, R0, #&00A00000
        SUB   R1, LR, PC     ; mode independent status bit removal
        ADD   R1, PC, R1     ; R1 = LR (- any PSR bits if there) + 4
        STR   R0, [R1, #-8]! ; overwrite the instruction we just BL'ed from

|UnsqueezeADFSImage|
        [ UnsqueezeADFSImage - UnsqueezeAIFImage <> 5*4
        ! 1, "Change AIFPRELUDE in squeeze.h"
        ]
        ; We arrive here knowing very little about anything.
        ; First find out where we are, and where the tables start.

        ADR   R0, |UnsqueezeDataBlock|  ; R0 points to data (PC-relative)
        LDMIA R0, {R8-R13}  ; load all the data
                            ; R13 := sizeToMove
                            ; R12 := nLongs
                            ; R11 := nShorts
        SUB   R10, R0, R10  ; R10 := base of encoded tables
        SUB   R9, R10, R9   ; R9  := base of encoded data
        ADD   R8, R9, R8    ; R8  := top of decoded image

        ; We only need nLongs and nShorts while we are decoding the tables.
        ; Afterwards we will re-use the registers for pointers to start
        ; of tables.

        ; SWI   &10            ; GetEnv - returns RAM limit in R1
        ; SUB   R6, R1, #&4000 ; grab 16K workspace, remember table base
        ADR   R6, |UnSqueeze_UnSqueezeLimit|+24 ; top of squeezed image
        CMP   R6, R8  ; find highest of top of squeezed and unsqueezed image
        MOVLO R6, R8  ; use SWI if you prefer... (UNIX ?)

        ; Allocate space for tables
        ADD   R1, R11, R12  ; nLongs + nShorts
        ADD   R7, R6, R1, LSL #2 ; curFree += (nLongs + nShorts) * 4;

	[ expand_memcheck
	SWI   OS_GetEnv     ; returns RAM limit in R1
        ; R7 points to end of tables; add space required for copied-up
        ; decode routine.
        ADD   R2,R7, #|UnSqueeze_UnSqueezeLimit| + 24 - decodeImage
        ; if PC < the RAM limit, and R2 > the RAM limit, we're in trouble
        ; (if PC > the RAM limit, we assume we're not in the application slot)
        CMP   PC,R1
        CMPLO R1,R2
	BLO   expand_would_overwrite
	]

        MOV   R5, R10       ; R5 is ptr into encoded tables
        MOV   R4, #0        ; this is the first table el
decodeTab
        ; Require:  R11 -- no of els left to decode
        ;           R6  -- ptr into decoded table
        ;           R5  -- ptr into encoding
        ;           R4  -- = 0 iff this is the shorts table (i.e. 4-byte vals)

; I believe this loop could be made good deal smaller and possibly
; faster, but it's only a couple of hundred bytes and it works.


        MOV   R2, R6        ; stash away base of first table
        MOV   R3, #-1       ; start as if previous entry was -1
decodeEntry
        SUBS  R11, R11, #1  ; while (--nEntries >= 0) {
        BLT   decodedTab    ; assert: previous word is in R3
        LDRB  R1, [R5], #1  ; byte = *p++
        SUBS  R0, R1, #10
        BGE   greaterThan9
literalOrOnes
        CMPS  R1, #0
        BNE   ones
literal
        LDRB  R0, [R5], #1
        LDRB  R1, [R5], #1
        ORR   R0, R0, R1, LSL #8
        LDRB  R1, [R5], #1
        ORR   R0, R0, R1, LSL #16
        CMPS  R4, #0                 ; in the 4-byte (short encodings) table?
        LDREQB R1, [R5], #1          ; yes, so include the 4th byte
        ORREQ  R0, R0, R1, LSL #24   ; in the resultant word
        ADD   R3, R3, R0
        STR   R3, [R6], #4
        B     decodeEntry
ones
        SUB   R11, R11, R1
        ADD   R11, R11, #1
anotherOne        ; Have number of increment-by-ones in R1
        ADD   R3, R3, #1
        STR   R3, [R6], #4
        SUBS  R1, R1, #1
        BGT   anotherOne
        B     decodeEntry
greaterThan9
        CMPS  R1, #92
        ADDLT R3, R3, R0
        STRLT R3, [R6], #4
        BLT   decodeEntry
greaterThan91
        SUBS  R0, R1, #174
        BLT   oneMore
twoMore
        LDRB  R1, [R5], #1
        ORR   R0, R1, R0, LSL #16
        LDRB  R1, [R5], #1
        ORR   R0, R0, R1, LSL #8
        ADD   R3, R3, R0
        STR   R3, [R6], #4
        B     decodeEntry
oneMore
        SUBS  R0, R1, #92
        LDRB  R1, [R5], #1
        ORR   R0, R1, R0, LSL #8
        ADD   R3, R3, R0
        STR   R3, [R6], #4
        B     decodeEntry   ; } /* end while (--nEntries >= 0) { */

decodedTab
        CMPS  R4, #0        ; if isShorts then
        BNE   finishLongs   ; else finishLongs
finishShorts
        MOV   R11, R12      ; no of els to decode = nLongs
        MOV   R12, R2       ; R12 = &shorts[0]
        MOV   R2, R6        ; stash away start of longs table
        MOV   R4, #1        ; next table is longs
        B     decodeTab
      [ {TRUE}
        ; ROL has adopted a policy in their fork of the OS of not allowing
        ; compressed binaries to run unless their version of UnsqueezeAIF
        ; recognises characteristic instruction sequences within the
        ; application's unsqueeze code, which it knows how to patch up and
        ; run during Service_UKCompression 0. The justification for this is
        ; apparently that they didn't like the fact that the binary is
        ; still compressed during Service_UKCompression 1 if UnsqueezeAIF
        ; drops back to letting the application unsqueeze code run itself
        ; when normal execution at &8000 starts. So, basically they're
        ; saying: if this is an application which UnsqueezeAIF doesn't
        ; currently know for a fact to handle its own cache coherency, then
        ; on the off-chance that one day in the future it might be
        ; desirable to be able to patch the application using AppPatcher -
        ; and yet it would for some reason be impractical to update
        ; UnsqueezeAIF at *that* point to recognise these cases (!?!) -
        ; then the OS should already refuse to run the application!
        ;
        ; On the other hand, there is a real downside, in that this policy
        ; hinders the development of alternative compression schemes, or as
        ; happened in squeeze 5.09, the fixing of certain bugs that have
        ; a demonstrable effect on real hardware.
        ;
        ; Since ROL has failed for nearly 7 years now to adapt their OS
        ; to cope with squeeze 5.09, we don't have much option but to try
        ; to work around it. By inserting a pre-StrongARM code sequence
        ; here (where it will never be executed, but after the start of
        ; the unsqueeze code where UnsqueezeAIF starts looking for it), we
        ; can trick UnsqueezeAIF into thinking it recognises us and trusts
        ; us to be run.
FakeUnsqSignature
        LDMIA   R5!,{R0-R3}
        STMIA   R7!,{R0-R3}
        CMP     R5,R6
        BLT     FakeUnsqSignature
        MOV     PC,R4
      ]
finishLongs
        MOV   R11, R2       ; R11 = &longs[0]
decodedBothTabs
        ; Now have:  R13 = sizeToMove
        ;            R12 = &shorts[0]
        ;            R11 = &longs[0]
        ;            R10 = top of encoded data
        ;            R9  = base of encoded data
        ;            R8  = top of decoded data
        ;            R7  = curFree - base of unused memory
        ;            R0..R6 unused

moveRestOfCode
        ; Decompression is going to scribble on us here, so copy the
        ; rest of the code up into free space.
        ADR   R5, decodeImage
        ADR   R6, |UnSqueeze_UnSqueezeLimit|+24 ; allow for branch to exec addr
        MOV   R4, R7  ; we will jump to R4

        ; The following code is what is recognised by UnSqzAIF to interfere
        ; in the decompression (to do OS_SynchroniseCodeAreas).  Changing it
        ; will stop it interfering.
moveCode
        LDMIA R5!, {R0-R3}
        STMIA R7!, {R0-R3}  ; NB this updates free space pointer as we go
        CMPS  R5, R6
        BLT   moveCode
        MOV   R1, R4        ; this instruction causes a non-match in UnSqzAIF
        ADD   R2, R1, #|UnSqueeze_UnSqueezeLimit|+28-decodeImage
        MOV   R0, #1
        SWI   XOS_SynchroniseCodeAreas  ; we've written some code.
        MOV   PC, R4        ; jump to the new copy of the rest of the code

	[ expand_memcheck
	; If we were to let the expansion occur, either a data abort would
	; occur, or we would overwrite our parent application.
expand_would_overwrite
	ADR   R0, error_block - 6 * 4
	LDMIB R0!, {R1,R2,R4-R7}
	SWI   XMessageTrans_ErrorLookup
	LDR   R1,[R0]
	TEQ   R1, #0
 	ADRNE R0, error_block_failed
	SWI   OS_GenerateError
	DCD   0, 0, 0, 0, 0
error_block
	DCD   0
	DCB   "NoMem", 0
	ALIGN
error_block_failed
	DCD   0
	DCB   "Not enough memory", 0
	ALIGN
	]

decodeImage
        ; The code from here on gets executed only after it is copied
        ; elsewhere.  This is confusing, but necessary.

        ; Most of the data gets decoded in place, but we have to go round twice
        ; just in case we have to copy some data elsewhere, so first
        ; time round we use a higher R9 (bottom of encoded data).

        ADD   R9, R9, R13   ; base = base + sizeToMove

        ; top of encoded data in R10
        ; base of encoded data in R9
        ; top of decoded data in R8
        ; ptr to shorts in R12
        ; ptr to longs  in R11
        ; R0..R6 are free for workspace

; For the moment, we want to overwrite the first word of the image,
; I think this is just a kludge...
        SUB   R8, R8, #4

decodePair
        CMPS  R10, R9           ; Have we reached the base ?
        BLE   doneDecode
        LDRB  R6, [R10, #-1]!   ; byte value
        ; The words will be put in R4 and R5, to be STMDB'd
        AND   R3, R6, #15       ; first nibble
        SUBS  R0, R3, #MinShort ; idx = (val - 8)
        BLT   notshort0
short0
        LDRB  R1, [R10, #-1]!
        ORR   R0, R1, R0, LSL #8
        LDR   R4, [R12, R0, LSL #2]    ; w = shorts[(nibble-8)<<8 | *p--]
        B     gotFirst
notshort0
        SUBS  R0, R3, #MinLong         ; idx = (val - 2)
        BLT   notlong0
long0
        LDRB  R1, [R10, #-1]!
        ORR   R0, R1, R0, LSL #8
        LDR   R0, [R11, R0, LSL #2]    ; w = longs[(nibble-2)<<8 | *p--]
        LDRB  R1, [R10, #-1]!
        ORR   R4, R1, R0, LSL #8
        B     gotFirst
notlong0
        MOVS  R4, R3            ; TMD 13-Feb-90: combine 2 instructions here
                                ; used to be CMPS R3,#0; MOVEQ R4,R3
        BEQ   gotFirst
literal0
        LDRB  R0, [R10, #-1]!
        LDRB  R1, [R10, #-1]!
        ORR   R0, R0, R1, LSL #8
        LDRB  R1, [R10, #-1]!
        ORR   R0, R0, R1, LSL #16
        LDRB  R1, [R10, #-1]!
        ORR   R4, R0, R1, LSL #24

gotFirst
        ; Phew!  We have the first word of the pair (in R4), now we have
        ; to do (almost) the same again, result in R5, and STMDB.

        MOV   R3, R6, LSR #4     ; second nibble
        SUBS  R0, R3, #MinShort  ; idx = (val - 8)
        BLT   notshort1
short1
        LDRB  R1, [R10, #-1]!
        ORR   R0, R1, R0, LSL #8
        LDR   R5, [R12, R0, LSL #2]    ; w = shorts[(nibble-8)<<8 | *p--]
        STMDB R8!, {R4,R5}
        B     decodePair
notshort1
        SUBS  R0, R3, #MinLong        ; idx = (val - 2)
        BLT   notlong1
long1
        LDRB  R1, [R10, #-1]!
        ORR   R0, R1, R0, LSL #8
        LDR   R0, [R11, R0, LSL #2]    ; w = longs[(nibble-2)<<8 | *p--]
        LDRB  R1, [R10, #-1]!
        ORR   R5, R1, R0, LSL #8
        STMDB R8!, {R4,R5}
        B     decodePair
notlong1
        MOVS  R5, R3            ; TMD 13-Feb-90: combine 2 instructions here
                                ; used to be CMPS R3,#0; MOVEQ R5,R3

                                       ; This doesn't pay off much
        STMEQDB R8!, {R4,R5}           ; might be better to swap round
        BEQ   decodePair               ; literal and zero, to save 3S on
literal1                               ; the longer path ?
        LDRB  R0, [R10, #-1]!
        LDRB  R1, [R10, #-1]!          ; If I had the right byte-sex and
        ORR   R0, R0, R1, LSL #8       ; a couple of registers to spare,
        LDRB  R1, [R10, #-1]!          ; could do this in 15S instead of 22S
        ORR   R0, R0, R1, LSL #16      ; using the load non-aligned word code
        LDRB  R1, [R10, #-1]!          ; given in ARM CPU Manual.
        ORR   R5, R0, R1, LSL #24
        STMDB R8!, {R4,R5}
        B     decodePair

doneDecode
        CMPS  R13, #0      ; Any data need to be copied elsewhere ?
        BLE   runImage     ; No -- just run the image
        SUB   R6, R9, R13  ; R6 points to base of encoded data
        MOV   R9, R7       ; R9 points to base of copied data
        ADD   R10, R9, R13 ; R10 is current pointer into copied data
moveEncoded
        LDMIA R6!, {R0-R3}
        STMIA R7!, {R0-R3}
        SUBS  R13, R13, #16
        BGT   moveEncoded
        B     decodePair   ; Carry on decoding

; Now R8 should be a pointer to the first word of the decoded image,
; so lets cross our fingers and jump to it...
runImage
        ADR   r2, decodeImage-4
        MOV   R0, #1
;       [up to 3 SUB instructions here] R8 adjusted to point back to AIF header
;       SUB   R1, R8, #4
;       SWI   XOS_SynchroniseCodeAreas
;       MOV   PC, R8

|UnSqueeze_UnSqueezeLimit|

; Now the bit of code that actually runs in the image compression program:
; this just tells it where the decompression code lives.  Are you confused ?

; Entry point to PROCEDURE FindUnSqueezeCode
; Parameters: base: [FP,#-20]/R0  limit: [FP,#-16]/R1
;|UnSqueeze_FindUnSqueezeCode|
;        ADR   R2, |UnSqueeze_UnSqueezeBase|
;        STR   R2, [R0]
;        ADR   R2, |UnSqueeze_UnSqueezeLimit|
;        STR   R2, [R1]
;        MOV   PC, LR

        END