Bug Summary

File:utf8.c
Warning:line 136, column 16
Potential leak of memory pointed to by 'unistr'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name utf8.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/tmp/build/foma/foma-0.10.0+g279~a2d32b38 -resource-dir /usr/lib/llvm-16/lib/clang/16 -D _GNU_SOURCE -I /tmp/build/foma/foma-0.10.0+g279~a2d32b38 -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -Wno-missing-field-initializers -Wno-deprecated -Wno-unused-parameter -std=c18 -fdebug-compilation-dir=/tmp/build/foma/foma-0.10.0+g279~a2d32b38 -ferror-limit 19 -fvisibility=hidden -fgnuc-version=4.2.1 -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/build/foma/scan-build/2024-09-11-155945-2678-1 -x c /tmp/build/foma/foma-0.10.0+g279~a2d32b38/utf8.c
1/* Foma: a finite-state toolkit and library. */
2/* Copyright © 2008-2021 Mans Hulden */
3
4/* This file is part of foma. */
5
6/* Licensed under the Apache License, Version 2.0 (the "License"); */
7/* you may not use this file except in compliance with the License. */
8/* You may obtain a copy of the License at */
9
10/* http://www.apache.org/licenses/LICENSE-2.0 */
11
12/* Unless required by applicable law or agreed to in writing, software */
13/* distributed under the License is distributed on an "AS IS" BASIS, */
14/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
15/* See the License for the specific language governing permissions and */
16/* limitations under the License. */
17
18#include <stdlib.h>
19#include <string.h>
20#include "foma.h"
21
22unsigned char *int2utf8str(int codepoint);
23
24static int hexstrtoint(char *str);
25
26/* Removes trailing character c, as well as spaces and tabs */
27char *remove_trailing(char *s, char c) {
28 int i, len;
29 len = strlen(s)-1;
30 for (i = len; i>=0 ; i--) {
31 if (*(s+i) != c && *(s+i) != ' ' && *(s+i) != '\t') {
32 break;
33 }
34 *(s+i) = '\0';
35 }
36 return(s);
37}
38
39/* Remove trailing space and \t */
40char *trim(char *string) {
41 int i;
42 if (string == NULL((void*)0))
43 return(string);
44 for (i = strlen(string) - 1; i >=0; i--) {
45 if (*(string+i) != ' ' && *(string+i) != '\t')
46 break;
47 *(string+i) = '\0';
48 }
49 return(string);
50}
51
52/* Reverses string in-place */
53char *xstrrev(char *str) {
54 char *p1, *p2;
55 if (! str || ! *str)
56 return str;
57 for (p1 = str, p2 = str + strlen(str) - 1; p2 > p1; ++p1, --p2) {
58 *p1 ^= *p2;
59 *p2 ^= *p1;
60 *p1 ^= *p2;
61 }
62 return str;
63}
64
65char *escape_string(char *string, char chr) {
66 size_t i,j;
67 char *newstring;
68 for (i=0,j=0; i < strlen(string); i++) {
69 if (string[i] == chr) {
70 j++;
71 }
72 }
73 if (j>0) {
74 newstring = calloc((strlen(string)+j),sizeof(char));
75 for (i=0,j=0; i<strlen(string); i++, j++) {
76 if (string[i] == chr) {
77 newstring[j++] = '\\';
78 newstring[j] = chr;
79 } else {
80 newstring[j] = string[i];
81 }
82 }
83 return(newstring);
84 } else {
85 return(string);
86 }
87}
88
89/* Substitute first \n for \0 */
90void strip_newline(char *s) {
91 int i, len;
92 len = strlen(s);
93 /* remove the null terminator */
94 for (i = 0; i < len; i++ ) {
95 if (s[i] == '\n' ) {
96 s[i] = '\0';
97 return;
98 }
99 }
100}
101/* Removes initial and final quote, and decodes the string if it contains special chars */
102void dequote_string(char *s) {
103 int len, i, j;
104 len = strlen(s);
105 if (*s == 0x22 && *(s+len-1) == 0x22) {
1
Assuming the condition is true
2
Assuming the condition is true
3
Taking true branch
106 for (i = 1, j = 0; i<len-1; i++, j++) {
4
Assuming the condition is false
5
Loop condition is false. Execution continues on line 110
107 *(s+j) = *(s+i);
108
109 }
110 *(s+j) = '\0';
111 decode_quoted(s);
6
Calling 'decode_quoted'
112 }
113}
114
115/* Decode quoted strings. This includes: */
116/* Changing \uXXXX sequences to their unicode equivalents */
117
118void decode_quoted(char *s) {
119 int len, i, j, skip;
120 unsigned char *unistr;
121
122 len = strlen(s);
123 for (i=0, j=0; i < len; ) {
7
Assuming 'i' is < 'len'
10
Assuming 'i' is < 'len'
24
Assuming 'i' is >= 'len'
25
Loop condition is false. Execution continues on line 136
124 if (*(s+i) == 0x5c && len-i > 5 && *(s+i+1) == 0x75 && ishexstr(s+i+2)) {
11
Assuming the condition is true
12
Assuming the condition is true
13
Assuming the condition is true
14
Assuming the condition is true
15
Taking true branch
125 for (unistr=utf8code16tostr(s+i+2); *unistr; j++, unistr++) {
16
Calling 'utf8code16tostr'
22
Returned allocated memory
23
Loop condition is false. Execution continues on line 128
126 *(s+j) = *unistr;
127 }
128 i += 6;
129 } else {
130 for(skip = utf8skip(s+i)+1; skip > 0; skip--) {
8
Loop condition is true. Entering loop body
9
Loop condition is false. Execution continues on line 123
131 *(s+j) = *(s+i);
132 i++; j++;
133 }
134 }
135 }
136 *(s+j) = *(s+i);
26
Potential leak of memory pointed to by 'unistr'
137}
138
139
140/* Replace equal length substrings in s */
141char *streqrep(char *s, char *oldstring, char *newstring) {
142 char *ptr;
143 int len;
144 len = strlen(oldstring);
145
146 while ((ptr = strstr(s, oldstring)) != NULL((void*)0)) {
147 memcpy(ptr, newstring, len);
148 }
149 return(s);
150}
151
152int ishexstr (char *str) {
153 int i;
154 for (i=0; i<4; i++) {
155 if ((*(str+i) > 0x2f && *(str+i) < 0x3a) || (*(str+i) > 0x40 && *(str+i) < 0x47) || (*(str+i) > 0x60 && *(str+i) < 0x67))
156 continue;
157 return 0;
158 }
159 return 1;
160}
161int utf8strlen(char *str) {
162 int i,j, len;
163 len = strlen(str);
164 for (i=0, j=0; *(str+i) != '\0' && i < len;j++ ) {
165 i = i + utf8skip(str+i) + 1;
166 }
167 return j;
168}
169
170/* Checks if the next character in the string is a combining character */
171/* according to Unicode 7.0 */
172/* i.e. codepoints 0300-036F Combining Diacritical Marks */
173/* 1AB0-1ABE Combining Diacritical Marks Extended */
174/* 1DC0-1DFF Combining Diacritical Marks Supplement */
175/* 20D0-20F0 Combining Diacritical Marks for Symbols */
176/* FE20-FE2D Combining Half Marks */
177/* Returns number of bytes of char. representation, or 0 if not combining */
178
179int utf8iscombining(unsigned char *s) {
180 if (*s == '\0' || *(s+1) == '\0')
181 return 0;
182 if (!(*s == 0xcc || *s == 0xcd || *s == 0xe1 || *s == 0xe2 || *s == 0xef))
183 return 0;
184 /* 0300-036F */
185 if (*s == 0xcc && *(s+1) >= 0x80 && *(s+1) <= 0xbf)
186 return 2;
187 if (*s == 0xcd && *(s+1) >= 0x80 && *(s+1) <= 0xaf)
188 return 2;
189 if (*(s+2) == '\0')
190 return 0;
191 /* 1AB0-1ABE */
192 if (*s == 0xe1 && *(s+1) == 0xaa && *(s+2) >= 0xb0 && *(s+2) <= 0xbe)
193 return 3;
194 /* 1DC0-1DFF */
195 if (*s == 0xe1 && *(s+1) == 0xb7 && *(s+2) >= 0x80 && *(s+2) <= 0xbf)
196 return 3;
197 /* 20D0-20F0 */
198 if (*s == 0xe2 && *(s+1) == 0x83 && *(s+2) >= 0x90 && *(s+2) <= 0xb0)
199 return 3;
200 /* FE20-FE2D */
201 if (*s == 0xef && *(s+1) == 0xb8 && *(s+2) >= 0xa0 && *(s+2) <= 0xad)
202 return 3;
203 return 0;
204}
205
206int utf8skip(char *str) {
207 unsigned char s;
208
209 s = (unsigned char)(unsigned int) (*str);
210 if (s < 0x80)
211 return 0;
212 if ((s & 0xe0) == 0xc0) {
213 return 1;
214 }
215 if ((s & 0xf0) == 0xe0) {
216 return 2;
217 }
218 if ((s & 0xf8) == 0xf0) {
219 return 3;
220 }
221 return -1;
222}
223
224unsigned char *utf8code16tostr(char *str) {
225 int codepoint;
226 codepoint = (hexstrtoint(str) << 8) + hexstrtoint(str+2);
227 return(int2utf8str(codepoint));
17
Calling 'int2utf8str'
21
Returned allocated memory
228}
229
230unsigned char *int2utf8str(int codepoint) {
231 unsigned char *value;
232 value = malloc(sizeof(unsigned char)*5);
18
Memory is allocated
233
234 if (codepoint < 0x80) {
19
Assuming 'codepoint' is < 128
20
Taking true branch
235 *(value) = (unsigned char)(codepoint);
236 *(value+1) = 0;
237 return(value);
238 } else if (codepoint < 0x800) {
239 *(value) = (0xc0 | (unsigned char)(codepoint >> 6));
240 *(value+1) = (0x80 | (unsigned char)(codepoint & 0x3f));
241 *(value+2) = 0;
242 return(value);
243 } else if (codepoint < 0x10000) {
244 *(value) = (0xe0 | (unsigned char)(codepoint >> 12));
245 *(value+1) = (0x80 | (unsigned char)((codepoint >> 6) & 0x3f));
246 *(value+2) = (0x80 | (unsigned char)(codepoint & 0x3f));
247 *(value+3) = 0;
248 return(value);
249 } else {
250 return (0);
251 }
252}
253
254int hexstrtoint(char *str) {
255 int hex;
256
257 if (*str > 0x60) {
258 hex = (*str - 0x57) << 4;
259 } else if (*str > 0x40) {
260 hex = (*str - 0x37) << 4;
261 } else {
262 hex = (*str - 0x30) << 4;
263 }
264 if (*(str+1) > 0x60) {
265 hex += (*(str+1) - 0x57);
266 } else if (*(str+1) > 0x40) {
267 hex += (*(str+1) - 0x37);
268 } else {
269 hex += (*(str+1) - 0x30);
270 }
271 return hex;
272}