summaryrefslogtreecommitdiff
path: root/utils/ascii.h
blob: 94d988aedd7554ccbab9300c4073c9b3f7717957 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
/*
 * Copyright 2016 Michael Drake <tlsa@netsurf-browser.org>
 *
 * This file is part of NetSurf, http://www.netsurf-browser.org/
 *
 * NetSurf is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2 of the License.
 *
 * NetSurf is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * \file utils/ascii.h
 * \brief Helpers for ASCII string handling.
 *
 * These helpers for string parsing will have the correct effect for parsing
 * ASCII text (as used by most web specs), regardless of system locale.
 */

#ifndef _NETSURF_UTILS_ASCII_H_
#define _NETSURF_UTILS_ASCII_H_

#include <errno.h>
#include <stdlib.h>
#include <limits.h>

/**
 * Test whether a character is a whitespace character.
 *
 * \param[in] c  Character to test.
 * \return true iff `c` is whitespace, else false.
 */
static inline bool ascii_is_space(char c)
{
	return (c == ' '  || c == '\t' ||
	        c == '\n' || c == '\v' ||
	        c == '\f' || c == '\r');
}

/**
 * Test whether a character is lower-case alphabetical.
 *
 * \param[in] c  Character to test.
 * \return true iff `c` is lower-case alphabetical, else false.
 */
static inline bool ascii_is_alpha_lower(char c)
{
	return (c >= 'a' && c <= 'z');
}

/**
 * Test whether a character is upper-case alphabetical.
 *
 * \param[in] c  Character to test.
 * \return true iff `c` is upper-case alphabetical, else false.
 */
static inline bool ascii_is_alpha_upper(char c)
{
	return (c >= 'A' && c <= 'Z');
}

/**
 * Test whether a character is alphabetical (upper or lower case).
 *
 * \param[in] c  Character to test.
 * \return true iff `c` is alphabetical, else false.
 */
static inline bool ascii_is_alpha(char c)
{
	return (ascii_is_alpha_lower(c) || ascii_is_alpha_upper(c));
}

/**
 * Test whether a character is a decimal digit.
 *
 * \param[in] c  Character to test.
 * \return true iff `c` is a decimal digit, else false.
 */
static inline bool ascii_is_digit(char c)
{
	return (c >= '0' && c <= '9');
}

/**
 * Test whether a character is a positive/negative numerical sign.
 *
 * \param[in] c  Character to test.
 * \return true iff `c` is a sign, else false.
 */
static inline bool ascii_is_sign(char c)
{
	return (c == '-' || c == '+');
}

/**
 * Test whether a character is alphanumerical (upper or lower case).
 *
 * \param[in] c  Character to test.
 * \return true iff `c` is alphanumerical, else false.
 */
static inline bool ascii_is_alphanumerical(char c)
{
	return (ascii_is_alpha(c) || ascii_is_digit(c));
}

/**
 * Test whether a character is 'a' to 'f' (lowercase).
 *
 * \param[in] c  Character to test.
 * \return true iff `c` is 'a' to 'f' (lowercase), else false.
 */
static inline bool ascii_is_af_lower(char c)
{
	return (c >= 'a' && c <= 'f');
}

/**
 * Test whether a character is hexadecimal (lower case).
 *
 * \param[in] c  Character to test.
 * \return true iff `c` is hexadecimal, else false.
 */
static inline bool ascii_is_hex_lower(char c)
{
	return (ascii_is_digit(c) || ascii_is_af_lower(c));
}

/**
 * Test whether a character is 'A' to 'F' (uppercase).
 *
 * \param[in] c  Character to test.
 * \return true iff `c` is 'A' to 'F' (uppercase), else false.
 */
static inline bool ascii_is_af_upper(char c)
{
	return (c >= 'A' && c <= 'F');
}

/**
 * Test whether a character is hexadecimal (upper case).
 *
 * \param[in] c  Character to test.
 * \return true iff `c` is hexadecimal, else false.
 */
static inline bool ascii_is_hex_upper(char c)
{
	return (ascii_is_digit(c) || ascii_is_af_upper(c));
}

/**
 * Test whether a character is hexadecimal (upper or lower case).
 *
 * \param[in] c  Character to test.
 * \return true iff `c` is hexadecimal, else false.
 */
static inline bool ascii_is_hex(char c)
{
	return (ascii_is_digit(c) ||
			ascii_is_af_upper(c) ||
			ascii_is_af_lower(c));
}

/**
 * Convert a hexadecimal character to its value.
 *
 * \param[in] c  Character to convert.
 * \return value of character (0-15), or -256 if not a hexadecimal character.
 */
static inline int ascii_hex_to_value(char c)
{
	if (ascii_is_digit(c)) {
		return c - '0';
	} else if (ascii_is_af_lower(c)) {
		return c - 'a' + 10;
	} else if (ascii_is_af_upper(c)) {
		return c - 'A' + 10;
	}

	/* Invalid hex */
	return -256;
}

/**
 * Converts two hexadecimal characters to a single number
 *
 * \param[in] c1  most significant hex digit.
 * \param[in] c2  least significant hex digit.
 * \return the total value of the two digit hex number (0-255),
 *         or -ve if input not hex.
 */
static inline int ascii_hex_to_value_2_chars(char c1, char c2)
{
	return 16 * ascii_hex_to_value(c1) + ascii_hex_to_value(c2);
}

/**
 * Convert an upper case character to lower case.
 *
 * If the given character is not upper case alphabetical, it is returned
 * unchanged.
 *
 * \param[in] c  Character to convert.
 * \return lower case conversion of `c` else `c`.
 */
static inline char ascii_to_lower(char c)
{
	return (ascii_is_alpha_upper(c)) ? (c + 'a' - 'A') : c;
}

/**
 * Convert a lower case character to upper case.
 *
 * If the given character is not lower case alphabetical, it is returned
 * unchanged.
 *
 * \param[in] c  Character to convert.
 * \return upper case conversion of `c` else `c`.
 */
static inline char ascii_to_upper(char c)
{
	return (ascii_is_alpha_lower(c)) ? (c + 'A' - 'a') : c;
}

/**
 * Count consecutive lower case alphabetical characters in string.
 *
 * \param[in] str  String to count characters in.
 * \return number of consecutive lower case characters at start of `str`.
 */
static inline size_t ascii_count_alpha_lower(const char *str)
{
	size_t count = 0;
	while (ascii_is_alpha_lower(*(str++))) {
		count++;
	}
	return count;
}

/**
 * Count consecutive upper case alphabetical characters in string.
 *
 * \param[in] str  String to count characters in.
 * \return number of consecutive upper case characters at start of `str`.
 */
static inline size_t ascii_count_alpha_upper(const char *str)
{
	size_t count = 0;
	while (ascii_is_alpha_upper(*(str++))) {
		count++;
	}
	return count;
}

/**
 * Count consecutive alphabetical characters in string (upper or lower case).
 *
 * \param[in] str  String to count characters in.
 * \return number of consecutive alphabetical characters at start of `str`.
 */
static inline size_t ascii_count_alpha(const char *str)
{
	size_t count = 0;
	while (ascii_is_alpha(*(str++))) {
		count++;
	}
	return count;
}

/**
 * Count consecutive decial digit characters in string.
 *
 * \param[in] str  String to count characters in.
 * \return number of consecutive decimal digit characters at start of `str`.
 */
static inline size_t ascii_count_digit(const char *str)
{
	size_t count = 0;
	while (ascii_is_digit(*(str++))) {
		count++;
	}
	return count;
}

/**
 * Count consecutive characters either decimal digit or colon in string.
 *
 * \param[in] str  String to count characters in.
 * \return number of consecutive decimal or ':' characters at start of `str`.
 */
static inline size_t ascii_count_digit_or_colon(const char *str)
{
	size_t count = 0;
	while (ascii_is_digit(*str) || *str == ':') {
		count++;
		str++;
	}
	return count;
}

/**
 * Test for string equality (case insensitive).
 *
 * \param[in] s1  First string to compare.
 * \param[in] s2  Second string to compare.
 * \return true iff strings are equivalent, else false.
 */
static inline bool ascii_strings_equal_caseless(
		const char *s1, const char *s2)
{
	while (*s1 != '\0') {
		if (ascii_to_lower(*s1) != ascii_to_lower(*s2)) {
			break;
		}
		s1++;
		s2++;
	}
	return (ascii_to_lower(*s1) == ascii_to_lower(*s2));
}

/**
 * Test for string equality (case sensitive).
 *
 * \param[in] s1  First string to compare.
 * \param[in] s2  Second string to compare.
 * \return true iff strings are equal, else false.
 */
static inline bool ascii_strings_equal(
		const char *s1, const char *s2)
{
	while (*s1 != '\0') {
		if (*s1 != *s2) {
			break;
		}
		s1++;
		s2++;
	}
	return (*s1 == *s2);
}

/**
 * Count consecutive equal ascii characters (case insensitive).
 *
 * \param[in] s1  First string to compare.
 * \param[in] s2  Second string to compare.
 * \return number of equivalent characters.
 */
static inline size_t ascii_strings_count_equal_caseless(
		const char *s1, const char *s2)
{
	const char *s = s1;
	while (*s1 != '\0') {
		if (ascii_to_lower(*s1) != ascii_to_lower(*s2)) {
			break;
		}
		s1++;
		s2++;
	}
	return s1 - s;
}

/**
 * Count consecutive equal ascii characters (case sensitive).
 *
 * \param[in] s1  First string to compare.
 * \param[in] s2  Second string to compare.
 * \return number of equal characters.
 */
static inline size_t ascii_strings_count_equal(
		const char *s1, const char *s2)
{
	const char *s = s1;
	while (*s1 != '\0') {
		if (*s1 != *s2) {
			break;
		}
		s1++;
		s2++;
	}
	return s1 - s;
}

/**
 * Parse an int out of a string.
 *
 * \param[in]  str  String to parse integer out of.
 * \param[out] res  Returns parsed integer.
 * \return The number of characters consumed in `str`.
 *         Returning 0 indicates failure to parse an integer out of the string.
 */
static inline size_t ascii_string_to_int(const char *str, int *res)
{
	char *end = NULL;
	long long temp = strtoll(str, &end, 10);

	if (end == str || errno == ERANGE ||
			temp < INT_MIN || temp > INT_MAX) {
		return 0;
	}

	*res = temp;
	return end - str;
}

#endif