Statistics
| Revision:

root / trunk / plugins / decoder / decoder.c @ 1857

History | View | Annotate | Download (32.8 KB)

1
/* 
2
    Copyright (C) 2005-2010  Erik van Pienbroek
3

                
4
    This program is free software; you can redistribute it and/or modify
5
    it under the terms of the GNU General Public License as published by
6
    the Free Software Foundation; either version 2 of the License, or
7
    (at your option) any later version.
8

                
9
    This program is distributed in the hope that it will be useful,
10
    but WITHOUT ANY WARRANTY; without even the implied warranty of
11
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
    GNU General Public License for more details.
13

                
14
    You should have received a copy of the GNU General Public License
15
    along with this program; if not, write to the Free Software
16
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
*/
18

                
19
#include 
20
#include 
21
#include 
22
#include 
23
#include 
24
#include 
25
#include 
26
#include 
27

                
28
#include "nntpgrab_plugin.h"
29
#include "yydecode.h"
30
#include "crc32.h"
31
#include "marshalers.h"
32

                
33
#include "config.h"
34

                
35
// yydecode.c
36
int decode(const char *inname, const char *forced_outname, struct decoded_file **decoded_list, int *saved_errno, char **errmsg);
37

                
38
static NNTPGrabDecoderRes
39
nntpgrab_plugin_decoder_decode_file(NGPlugin *plugin_data, const char *collection_name, NNTPFile *file, const char *temp_directory, const char *target_directory, char **real_filename, int *saved_errno, char **errmsg);
40

                
41
void
42
nntpgrab_plugin_initialize(NGPlugin *plugin_data)
43
{
44
    ng_plugin_set_name(plugin_data, "Decoder");
45
    ng_plugin_set_version(plugin_data, PACKAGE_VERSION);
46
    ng_plugin_set_author(plugin_data, "Erik van Pienbroek");
47
    ng_plugin_set_url(plugin_data, "https://www.nntpgrab.nl");
48
    ng_plugin_set_description(plugin_data, "A plugin which is responsible for decoding yEnc and UU-encoded files");
49

                
50
    ng_plugin_register_function(plugin_data,
51
                                "decode_file",
52
                                NG_PLUGIN_FUNCTION(nntpgrab_plugin_decoder_decode_file),
53
                                ng_plugin_marshal_INT__STRING_POINTER_STRING_STRING_POINTER_POINTER_POINTER,
54
                                G_TYPE_INT,
55
                                7, G_TYPE_STRING, G_TYPE_POINTER, G_TYPE_STRING,
56
                                   G_TYPE_STRING, G_TYPE_POINTER, G_TYPE_POINTER, G_TYPE_POINTER);
57
}
58

                
59
ngboolean
60
nntpgrab_plugin_load(NGPlugin *plugin_data, char **errmsg)
61
{
62
    return TRUE;
63
}
64

                
65
ngboolean
66
nntpgrab_plugin_can_unload(NGPlugin *plugin_data, char **reason)
67
{
68
    return TRUE;
69
}
70

                
71
void
72
nntpgrab_plugin_unload(NGPlugin *plugin_data)
73
{
74
}
75

                
76
void
77
nntpgrab_plugin_destroy(NGPlugin *plugin_data)
78
{
79
}
80

                
81
int
82
nntpgrab_plugin_get_version(void)
83
{
84
    return NNTPGRAB_PLUGIN_API_VERSION;
85
}
86

                
87
/* MD5 code copied from plugins/jsonrpc/mongoose.c */
88
typedef struct MD5Context {
89
        guint32         buf[4];
90
        guint32         bits[2];
91
        unsigned char   in[64];
92
} MD5_CTX;
93

                
94
#if 0 
95
#if __BYTE_ORDER == 1234
96
#define byteReverse(buf, len)        /* Nothing */
97
#else
98
/* 
99
 * Note: this code is harmless on little-endian machines.
100
 */
101
static void
102
byteReverse(unsigned char *buf, unsigned longs)
103
{
104
        guint32 t;
105
        do {
106
                t = (guint32) ((unsigned) buf[3] << 8 | buf[2]) << 16 |
107
                        ((unsigned) buf[1] << 8 | buf[0]);
108
                *(guint32 *) buf = t;
109
                buf += 4;
110
        } while (--longs);
111
}
112
#endif /* __BYTE_ORDER */
113

                
114
/* The four core functions - F1 is optimized somewhat */
115

                
116
/* #define F1(x, y, z) (x & y | ~x & z) */
117
#define F1(x, y, z) (z ^ (x & (y ^ z)))
118
#define F2(x, y, z) F1(z, x, y)
119
#define F3(x, y, z) (x ^ y ^ z)
120
#define F4(x, y, z) (y ^ (x | ~z))
121

                
122
/* This is the central step in the MD5 algorithm. */
123
#define MD5STEP(f, w, x, y, z, data, s) \
124
( w += f(x, y, z) + data,  w = w<>(32-s),  w += x )
125

                
126
/* 
127
 * Start MD5 accumulation.  Set bit count to 0 and buffer to mysterious
128
 * initialization constants.
129
 */
130
static void
131
MD5Init(MD5_CTX *ctx)
132
{
133
        ctx->buf[0] = 0x67452301;
134
        ctx->buf[1] = 0xefcdab89;
135
        ctx->buf[2] = 0x98badcfe;
136
        ctx->buf[3] = 0x10325476;
137

                
138
        ctx->bits[0] = 0;
139
        ctx->bits[1] = 0;
140

                
141
        memset(ctx->in, 0, sizeof(ctx->in));
142
}
143

                
144
/* 
145
 * The core of the MD5 algorithm, this alters an existing MD5 hash to
146
 * reflect the addition of 16 longwords of new data.  MD5Update blocks
147
 * the data and converts bytes into longwords for this routine.
148
 */
149
static void
150
MD5Transform(guint32 buf[4], guint32 const in[16])
151
{
152
        register guint32 a, b, c, d;
153

                
154
        a = buf[0];
155
        b = buf[1];
156
        c = buf[2];
157
        d = buf[3];
158

                
159
        MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
160
        MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
161
        MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
162
        MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
163
        MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
164
        MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
165
        MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
166
        MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
167
        MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
168
        MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
169
        MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
170
        MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
171
        MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
172
        MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
173
        MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
174
        MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
175

                
176
        MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
177
        MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
178
        MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
179
        MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
180
        MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
181
        MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
182
        MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
183
        MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
184
        MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
185
        MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
186
        MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
187
        MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
188
        MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
189
        MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
190
        MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
191
        MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
192

                
193
        MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
194
        MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
195
        MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
196
        MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
197
        MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
198
        MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
199
        MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
200
        MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
201
        MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
202
        MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
203
        MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
204
        MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
205
        MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
206
        MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
207
        MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
208
        MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
209

                
210
        MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
211
        MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
212
        MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
213
        MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
214
        MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
215
        MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
216
        MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
217
        MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
218
        MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
219
        MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
220
        MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
221
        MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
222
        MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
223
        MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
224
        MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
225
        MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
226

                
227
        buf[0] += a;
228
        buf[1] += b;
229
        buf[2] += c;
230
        buf[3] += d;
231
}
232

                
233
/* 
234
 * Update context to reflect the concatenation of another buffer full
235
 * of bytes.
236
 */
237
static void
238
MD5Update(MD5_CTX *ctx, unsigned char const *buf, unsigned len)
239
{
240
        guint32 t;
241

                
242
        if (len == 0) {
243
            return;
244
        }
245

                
246
        /* Update bitcount */
247

                
248
        t = ctx->bits[0];
249
        if ((ctx->bits[0] = t + ((guint32) len << 3)) < t)
250
                ctx->bits[1]++;                /* Carry from low to high */
251
        ctx->bits[1] += len >> 29;
252

                
253
        t = (t >> 3) & 0x3f;        /* Bytes already in shsInfo->data */
254

                
255
        /* Handle any leading odd-sized chunks */
256

                
257
        if (t) {
258
                unsigned char *p = (unsigned char *) ctx->in + t;
259

                
260
                t = 64 - t;
261
                if (len < t) {
262
                        memcpy(p, buf, len);
263
                        return;
264
                }
265
                memcpy(p, buf, t);
266
                byteReverse(ctx->in, 16);
267
                MD5Transform(ctx->buf, (guint32 *) ctx->in);
268
                buf += t;
269
                len -= t;
270
        }
271
        /* Process data in 64-byte chunks */
272

                
273
        while (len >= 64) {
274
                memcpy(ctx->in, buf, 64);
275
                byteReverse(ctx->in, 16);
276
                MD5Transform(ctx->buf, (guint32 *) ctx->in);
277
                buf += 64;
278
                len -= 64;
279
        }
280

                
281
        /* Handle any remaining bytes of data. */
282

                
283
        memcpy(ctx->in, buf, len);
284
}
285

                
286
/* 
287
 * Final wrapup - pad to 64-byte boundary with the bit pattern
288
 * 1 0* (64-bit count of bits processed, MSB-first)
289
 */
290
static void
291
MD5Final(unsigned char digest[16], MD5_CTX *ctx)
292
{
293
        unsigned count;
294
        unsigned char *p;
295

                
296
        /* Compute number of bytes mod 64 */
297
        count = (ctx->bits[0] >> 3) & 0x3F;
298

                
299
        /* Set the first char of padding to 0x80. This is safe since there is 
300
           always at least one byte free */
301
        p = ctx->in + count;
302
        *p++ = 0x80;
303

                
304
        /* Bytes of padding needed to make 64 bytes */
305
        count = 64 - 1 - count;
306

                
307
        /* Pad out to 56 mod 64 */
308
        if (count < 8) {
309
                /* Two lots of padding: Pad the first block to 64 bytes */
310
                memset(p, 0, count);
311
                byteReverse(ctx->in, 16);
312
                MD5Transform(ctx->buf, (guint32 *) ctx->in);
313

                
314
                /* Now fill the next block with 56 bytes */
315
                memset(ctx->in, 0, 56);
316
        } else {
317
                /* Pad block to 56 bytes */
318
                memset(p, 0, count - 8);
319
        }
320
        byteReverse(ctx->in, 14);
321

                
322
        /* Append length in bits and transform */
323
        ((guint32 *) ctx->in)[14] = ctx->bits[0];
324
        ((guint32 *) ctx->in)[15] = ctx->bits[1];
325

                
326
        MD5Transform(ctx->buf, (guint32 *) ctx->in);
327
        byteReverse((unsigned char *) ctx->buf, 4);
328
        memcpy(digest, ctx->buf, 16);
329
        memset((char *) ctx, 0, sizeof(ctx));        /* In case it's sensitive */
330
}
331
#endif
332

                
333
static void
334
strip_newline(char *line)
335
{
336
    if (line[strlen(line) - 1]  == '\n') {
337
        line[strlen(line) - 1] = '\0';
338
    }
339

                
340
    if (line[strlen(line) - 1]  == '\r') {
341
        line[strlen(line) - 1] = '\0';
342
    }
343
}
344

                
345
static gboolean
346
parse_ybegin_header(const char *line, int *part, int *num_lines, guint64 *size, char *name, int name_length)
347
{
348
    int i;
349
    int len;
350

                
351
    /* line must be in one of the following notations: */
352
    /* =ybegin line=128 size=100000 name=abc.rar */
353
    /* =ybegin part=1 line=128 size=50000000 name=abc.rar */
354

                
355
    g_return_val_if_fail(line != NULL, FALSE);
356
    g_return_val_if_fail(part != NULL, FALSE);
357
    g_return_val_if_fail(line != NULL, FALSE);
358
    g_return_val_if_fail(size != NULL, FALSE);
359
    g_return_val_if_fail(name != NULL, FALSE);
360
    g_return_val_if_fail(name_length > 0, FALSE);
361

                
362
    *part = -1;
363
    *num_lines = -1;
364
    *size = -1;
365
    memset(name, 0, name_length);
366

                
367
    len = strlen(line);
368
    for (i = 0; i < len; i++) {
369
        if (!strncmp(line + i, "part=", 5)) {
370
            *part = atoi(line + i + 5);
371
        } else if (!strncmp(line + i, "line=", 5)) {
372
            *num_lines = atoi(line + i + 5);
373
        } else if (!strncmp(line + i, "size=", 5)) {
374
            *size = g_ascii_strtoull (line + i + 5, NULL, 10);
375
        } else if (!strncmp(line + i, "name=", 5)) {
376
            strcpy(name, line + i + 5);
377
            strip_newline(name);
378
        }
379
    }
380

                
381
#if 0 
382
g_print("part = %i\n", *part);
383
g_print("num_lines = %i\n", *num_lines);
384
g_print("size = %li\n", *size);
385
g_print("filename = %s\n\n", name);
386
#endif
387

                
388
    if (*num_lines > 0 && *size > 0 && strlen(name) > 0) {  /* NOTE: The part field is optional */
389
        return TRUE;
390
    } else {
391
        return FALSE;
392
    }
393
}
394

                
395
static gboolean
396
parse_ypart_header(const char *line, guint64 *begin, guint64 *end)
397
{
398
    int i;
399
    int len;
400

                
401
    /* line must be in the following notation: */
402
    /* =ypart begin=1 end=640000 */
403

                
404
    g_return_val_if_fail(line != NULL, FALSE);
405
    g_return_val_if_fail(begin != NULL, FALSE);
406
    g_return_val_if_fail(end != NULL, FALSE);
407

                
408
    *begin = -1;
409
    *end = -1;
410

                
411
    len = strlen(line);
412
    for (i = 0; i < len; i++) {
413
        if (!strncmp(line + i, "begin=", 6)) {
414
            *begin = g_ascii_strtoull(line + i + 6, NULL, 10);
415
        } else if (!strncmp(line + i, "end=", 4)) {
416
            *end = g_ascii_strtoull(line + i + 4, NULL, 10);
417
        }
418
    }
419

                
420
#if 0 
421
g_print("begin = %li\n", *begin);
422
g_print("end = %li\n", *end);
423
#endif
424

                
425
    if (*begin >= 0 && *end > 0) {
426
        return TRUE;
427
    } else {
428
        return FALSE;
429
    }
430
}
431

                
432
static gboolean
433
try_to_use_new_decoder(NGPlugin *plugin_data, const char *filename_in, const char *target_directory, NNTPGrabDecoderRes *decoder_result, char **errmsg, FILE **prev_file_out_fp, char *prev_filename_out, int prev_filename_out_length, char *prev_real_filename_out, int prev_real_filename_out_length, MD5_CTX *md5_ctx)
434
{
435
    int part = -1;
436
    int num_lines = -1;
437
    guint64 size = -1;
438
    char filename_out[1024];
439
    guint64 begin = -1;
440
    guint64 end = -1;
441
    FILE *fp;
442
    gboolean escape_char_detected = FALSE;
443
    char path[4096];
444
    char buf[1024];
445
    struct crc32_ctx crc32_context;
446

                
447
    g_return_val_if_fail(filename_in != NULL, FALSE);
448
    g_return_val_if_fail(target_directory != NULL, FALSE);
449
    g_return_val_if_fail(decoder_result != NULL, FALSE);
450
    /* NOTE: errmsg MIGHT be NULL */
451
    g_return_val_if_fail(prev_file_out_fp != NULL, FALSE);
452
    g_return_val_if_fail(prev_filename_out != NULL, FALSE);
453
    g_return_val_if_fail(prev_filename_out_length > 0, FALSE);
454

                
455
    *decoder_result = DECODER_RESULT_COMPLETE;
456

                
457
    /* Try to read the first line from the given filename to detected if this is an yEnc encoded file */
458
    fp = g_fopen(filename_in, "rb");
459

                
460
    if (!fp) {
461
        *decoder_result = DECODER_RESULT_ERROR;
462
        if (errmsg) {
463
            *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to open file '%s': %s"), __FILE__, __LINE__, filename_in, strerror(errno));
464
        }
465

                
466
        return TRUE;
467
    }
468

                
469
    /* Try to read the header from this file */
470
    do {
471
        if (!fgets(buf, sizeof(buf) - 1, fp)) {
472
            if (feof(fp)) {
473
                /* The file doesn't contain any data at all */
474
                *decoder_result = DECODER_RESULT_INCOMPLETE;
475
                ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_DEBUG, __FILE__ ":%i File %s is empty\n", __LINE__, filename_in);
476
            } else {
477
                *decoder_result = DECODER_RESULT_ERROR;
478
                if (errmsg) {
479
                    *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to read the first line from file '%s': %s"), __FILE__, __LINE__, filename_in, strerror(errno));
480
                }
481
            }
482

                
483
            fclose(fp);
484

                
485
            return TRUE;
486
        }
487

                
488
        strip_newline(buf);
489

                
490
        if (strlen(buf) == 0) {
491
            continue;
492
        }
493

                
494
        if (strncmp(buf, "=ybegin", 7) != 0) {
495
            /* Other encoding found. Process this with a different decoder */
496
            fclose(fp);
497

                
498
            return FALSE;
499
        }
500

                
501
        break;
502
    } while (TRUE);
503

                
504
    /* yEnc encoded file found! */
505

                
506
    /* The header is now in buf. Try to extract information from this header */
507
    memset(filename_out, 0, sizeof(filename_out));
508
    if (!parse_ybegin_header(buf, &part, &num_lines, &size, filename_out, sizeof(filename_out) - 1)) {
509
        /* Header incomplete. Ignore this file */
510
        ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_DEBUG, __FILE__ ":%i yBegin-header incomplete, buf = %s\n", __LINE__, buf);
511
        *decoder_result = DECODER_RESULT_INCOMPLETE;
512
        fclose(fp);
513

                
514
        return TRUE;
515
    }
516

                
517
    /* Translate the filename to UTF-8 if it isn't already */
518
    if (!g_utf8_validate(filename_out, -1, NULL)) {
519
        /* Filename is probably in Windows-1252 charset */
520
        char *tmp = g_convert(filename_out, -1, "utf-8", "windows-1252", NULL, NULL, NULL);
521
        if (!tmp) {
522
            *decoder_result = DECODER_RESULT_ERROR;
523
            if (errmsg) {
524
                *errmsg = g_strdup_printf(_("%s:%i The filename '%s' is using an unknown character set. Decoding cannot continue"), __FILE__, __LINE__, filename_out);
525
            }
526

                
527
            fclose(fp);
528

                
529
            return TRUE;
530
        }
531

                
532
        memset(filename_out, 0, sizeof(filename_out));
533
        strncpy(filename_out, tmp, sizeof(filename_out) - 1);
534
        g_free(tmp);
535
    }
536

                
537
    /* If this is a multipart file, another header is sent */
538
    if (part != -1) {
539
        if (!fgets(buf, sizeof(buf) - 1, fp)) {
540
            *decoder_result = DECODER_RESULT_ERROR;
541
            if (errmsg) {
542
                *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to read the first line from file '%s': %s"), __FILE__, __LINE__, filename_in, strerror(errno));
543
            }
544

                
545
            fclose(fp);
546

                
547
            return TRUE;
548
        }
549

                
550
        if (!parse_ypart_header(buf, &begin, &end)) {
551
            /* Header incomplete. Ignore this file */
552
            ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_DEBUG, __FILE__ ":%i Part-header incomplete, buf = %s\n", __LINE__, buf);
553
            *decoder_result = DECODER_RESULT_INCOMPLETE;
554
            fclose(fp);
555

                
556
            return TRUE;
557
        }
558
    }
559

                
560
    /* Open the output file (if it wasn't already open) */
561
    memset(path, 0, sizeof(path));
562
    g_snprintf(path, sizeof(path) - 1, "%s%s%s", target_directory, G_DIR_SEPARATOR_S, filename_out);
563

                
564
    if (strcmp(filename_out, prev_filename_out) != 0) {
565
        char path_orig[4096];
566
        int suffix = 1;
567
        char *filename_tmp;
568

                
569
        /* Output file wasn't open yet. Open it now */
570
        if (*prev_file_out_fp != NULL) {
571
            if (fclose(*prev_file_out_fp) != 0) {
572
                *decoder_result = DECODER_RESULT_ERROR;
573
                if (errmsg) {
574
                    *errmsg = g_strdup_printf(_("%s:%i Unable to close file '%s': %s"), __FILE__, __LINE__, prev_filename_out, strerror(errno));
575
                }
576
                *prev_file_out_fp = NULL;
577
                return TRUE;
578
            }
579
        }
580

                
581
        /* If there's already a file at 'path', append a suffix to it and try again */
582
        strcpy(path_orig, path);
583
        while (g_file_test(path, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) {
584
            g_snprintf(path, sizeof(path) - 1, "%s.%i", path_orig, suffix);
585
            suffix++;
586
        }
587

                
588
        *prev_file_out_fp = g_fopen(path, "wb");
589

                
590
        if (!(*prev_file_out_fp)) {
591
            *decoder_result = DECODER_RESULT_ERROR;
592
            if (errmsg) {
593
                *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to open file for writing '%s': %s"), __FILE__, __LINE__, path, strerror(errno));
594
            }
595

                
596
            fclose(fp);
597

                
598
            return TRUE;
599
        }
600

                
601
        strncpy(prev_filename_out, filename_out, prev_filename_out_length);
602
        filename_tmp = g_path_get_basename(path);
603
        strncpy(prev_real_filename_out, filename_tmp, prev_real_filename_out_length);
604
        g_free(filename_tmp);
605
    }
606

                
607
    g_return_val_if_fail(*prev_file_out_fp != NULL, TRUE);
608

                
609
    /* Seek to the right position in the output file if we're decoding a multi-part file */
610
    if (begin > 0) {
611
        (void) fseek(*prev_file_out_fp, begin - 1, SEEK_SET);
612
    }
613

                
614
    ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_DEBUG, "Now starting decoding file at position %"G_GUINT64_FORMAT"\n", begin);
615

                
616
    crc32_init_ctx(&crc32_context);
617

                
618
    /* Start the real decoding */
619
    do {
620
        int len = fread(buf, 1, sizeof(buf), fp);
621
        int i;
622
        char buf_out[1024];
623
        int len_out = 0;
624

                
625
        memset(buf_out, 0, sizeof(buf_out));
626

                
627
        if (len == 0) {
628
            if (feof(fp)) {
629
                /* All data is read */
630
                break;
631
            }
632
        } else if (len < 0) {
633
            /* Some error occured while reading */
634
            *decoder_result = DECODER_RESULT_ERROR;
635
            if (errmsg) {
636
                *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to read from file '%s': %s"), __FILE__, __LINE__, filename_in, strerror(errno));
637
            }
638

                
639
            fclose(fp);
640

                
641
            return TRUE;
642
        }
643

                
644
        for (i = 0; i < len; i++) {
645
            unsigned char ch = buf[i];
646

                
647
            if (ch == '\r' || ch == '\n') {
648
                /* Ignore newlines */
649
                continue;
650
            } else if (ch == '=') {
651
                /* Escape character detected. Un-escape the next byte */
652
                escape_char_detected = TRUE;
653
            } else if (escape_char_detected && ch == 'y') {     // =y detected. This is probably the end of the part
654
                char buf_new[1024];
655
                int buf_new_length;
656
                char *ptr;
657
                guint32 crc32_expected = 0;
658

                
659
                memset(&buf_new, 0, sizeof(buf_new));
660
                buf_new_length = sizeof(buf) - i - 1;
661

                
662
                memcpy(buf_new, buf + i + 1, buf_new_length);
663

                
664
                /* Try to put more data in the buffer */
665
                if (fread(buf_new + buf_new_length, 1, sizeof(buf_new) - buf_new_length, fp) < 0) {
666
                    *decoder_result = DECODER_RESULT_ERROR;
667
                    if (errmsg) {
668
                        *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to read from file '%s': %s"), __FILE__, __LINE__, filename_in, strerror(errno));
669
                    }
670

                
671
                    fclose(fp);
672

                
673
                    return TRUE;
674
                }
675

                
676
                /* Do we really have an =yend ? */
677
                if (strncmp(buf_new, "end", 3) != 0) {
678
                    *decoder_result = DECODER_RESULT_INCOMPLETE;
679
                }
680

                
681
                /* Flush the buffer */
682
                if (len_out > 0 && fwrite(buf_out, 1, len_out, *prev_file_out_fp) <= 0) {
683
                    *decoder_result = DECODER_RESULT_ERROR;
684
                    if (errmsg) {
685
                        *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to write to file '%s': %s"), __FILE__, __LINE__, filename_out, strerror(errno));
686
                    }
687
                }
688

                
689
                fclose(fp);
690

                
691
#if 0 
692
                /* Update the MD5 */
693
                MD5Update(md5_ctx, (unsigned char*) buf_out, len_out);
694
#endif
695

                
696
                /* Calculate the CRC value of this part */
697
                crc32_process_bytes(buf_out, len_out, &crc32_context);
698

                
699
                /* Verify the CRC value of this part */
700
                if ((ptr = strstr(buf_new, "pcrc32="))) {
701
                    char crc[9];
702
                    memcpy(crc, ptr + 7, 8);
703
                    crc[8] = '\0';
704

                
705
                    crc32_expected = atocrc32(crc);
706
                } else if ((ptr = strstr(buf_new, "crc32="))) {
707
                    char crc[9];
708
                    memcpy(crc, ptr + 6, 8);
709
                    crc[8] = '\0';
710

                
711
                    crc32_expected = atocrc32(crc);
712
                } else {
713
                    /* No CRC32 value found. Ignore verification */
714
                    ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_DEBUG, __FILE__ ":%i No CRC32 value found in file %s\n", __LINE__, filename_in);
715
                }
716

                
717
                if (crc32_expected != 0 && crc32_read_ctx(&crc32_context) != crc32_expected) {
718
                    ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_INFO, "Part CRC32 error for file %s -- got 0x%08x, should be 0x%08x\n", filename_in, crc32_read_ctx(&crc32_context), crc32_expected);
719
                    *decoder_result = DECODER_RESULT_INCOMPLETE;
720
                }
721

                
722
                crc32_finish_ctx(&crc32_context);
723

                
724
                return TRUE;
725
            } else {
726
                ch = ch - 42 - (escape_char_detected ? 64 : 0);
727
                if (ch < 0) {
728
                    ch += 256;
729
                }
730

                
731
                buf_out[len_out++] = ch;
732
                g_return_val_if_fail(len_out <= sizeof(buf_out), TRUE);
733

                
734
                escape_char_detected = FALSE;
735
            }
736
        }
737

                
738
        /* Calculate the CRC and MD5 values of this part */
739
        crc32_process_bytes(buf_out, len_out, &crc32_context);
740
#if 0 
741
        MD5Update(md5_ctx, (unsigned char*) buf_out, len_out);
742
#endif
743

                
744
        /* Flush the buffer */
745
        if (fwrite(buf_out, 1, len_out, *prev_file_out_fp) <= 0) {
746
            *decoder_result = DECODER_RESULT_ERROR;
747
            if (errmsg) {
748
                *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to write to file '%s': %s"), __FILE__, __LINE__, filename_out, strerror(errno));
749
            }
750

                
751
            fclose(fp);
752

                
753
            return TRUE;
754
        }
755
    } while (TRUE);
756

                
757
    fclose(fp);
758

                
759
    /* Unexpected end of file detected, file must be incomplete */
760
    *decoder_result = DECODER_RESULT_INCOMPLETE;
761

                
762
    return TRUE;
763
}
764

                
765
static NNTPGrabDecoderRes
766
nntpgrab_plugin_decoder_decode_file(NGPlugin *plugin_data, const char *collection_name, NNTPFile *file, const char *temp_directory, const char *target_directory, char **real_filename, int *saved_errno, char **errmsg)
767
{
768
    int i;
769
#if 0 
770
    GList *lines = NULL;
771
#endif
772
    NNTPGrabDecoderRes ret = DECODER_RESULT_COMPLETE;
773
    gboolean file_found = FALSE;
774
    struct decoded_file *decoded_list = NULL;
775
    struct decoded_file *list;
776
    gboolean new_decoder_used = FALSE;
777
    char filename[1024];
778
    gboolean incomplete_file = FALSE;
779
    char target_path[4096];
780
    char prev_filename_out[1024];
781
    char prev_real_filename_out[1024];
782
    FILE *prev_file_fp = NULL;
783
    MD5_CTX md5_ctx;
784

                
785
    g_return_val_if_fail(collection_name != NULL, DECODER_RESULT_ERROR);
786
    g_return_val_if_fail(file != NULL, DECODER_RESULT_ERROR);
787
    g_return_val_if_fail(temp_directory != NULL, DECODER_RESULT_ERROR);
788
    g_return_val_if_fail(target_directory != NULL, DECODER_RESULT_ERROR);
789
    g_return_val_if_fail(real_filename != NULL, DECODER_RESULT_ERROR);
790
    g_return_val_if_fail(saved_errno != NULL, DECODER_RESULT_ERROR);
791

                
792
    *saved_errno = 0;
793

                
794
    memset(prev_filename_out, 0, sizeof(prev_filename_out));
795
    memset(prev_real_filename_out, 0, sizeof(prev_real_filename_out));
796

                
797
    memset(target_path, 0, sizeof(target_path));
798
    g_snprintf(target_path, sizeof(target_path) - 1, "%s"G_DIR_SEPARATOR_S"%s", target_directory, collection_name);
799

                
800
    ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_INFO, _("File '%s' of collection '%s' is now being decoded to directory '%s'"), file->subject, collection_name, target_path);
801

                
802
    if (g_mkdir_with_parents(target_path, 0755) == -1) {
803
        ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_INFO, _("Creation of folder '%s' failed, errno = %i, %s\n"), target_path, errno, strerror(errno));
804
    }
805

                
806
    // The yydecode code assumes all the resulting files will be saved in the current directory
807
    // so we need to change to the wanted directory before starting the decode
808
    if (g_chdir(target_path) == -1) {
809
        if (errmsg) {
810
            *errmsg = g_strdup_printf(_("Unable to navigate to folder '%s': %s"), target_path, strerror(errno));
811
        }
812
        *saved_errno = ENOENT;
813

                
814
        ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_INFO, _("Unable to navigate to folder '%s': %s"), target_path, strerror(errno));
815

                
816
        return DECODER_RESULT_ERROR;
817
    }
818

                
819
#if 0 
820
    MD5Init(&md5_ctx);
821
#endif
822

                
823
    // Load all the individual parts
824
    for (i = 0; i < file->num_parts; i++) {
825
        struct decoded_file *list;
826

                
827
        memset(filename, 0, sizeof(filename) - 1);
828
        g_snprintf(filename, sizeof(filename) - 1, "%s%s%s.%i", temp_directory, G_DIR_SEPARATOR_S, file->tmp_filename, i + 1);
829

                
830
        if (!g_file_test(filename, G_FILE_TEST_IS_REGULAR | G_FILE_TEST_EXISTS)) {
831
            continue;
832
        }
833

                
834
        file_found = TRUE;
835

                
836
        if (try_to_use_new_decoder(plugin_data, filename, target_path, &ret, errmsg, &prev_file_fp, prev_filename_out, sizeof(prev_filename_out) - 1, prev_real_filename_out, sizeof(prev_real_filename_out) - 1, &md5_ctx)) {
837
#if 0 
838
g_print("prev_file_fp = %#x\n", prev_file_fp);
839
g_print("prev_real_filename_out = %s\n", prev_real_filename_out);
840
g_print("ret = %i\n", ret);
841
#endif
842
            *real_filename = g_strdup(prev_real_filename_out);
843
            new_decoder_used = TRUE;
844
            if (ret == DECODER_RESULT_ERROR) {
845
                g_print("Error during decode\n");
846
                if (errmsg && *errmsg) {
847
                    g_print("errmsg = %s\n", *errmsg);
848
                }
849

                
850
                break;
851
            } else if (ret == DECODER_RESULT_INCOMPLETE) {
852
                incomplete_file = TRUE;
853
            }
854

                
855
            continue;
856
        }
857

                
858
        if (decode(filename, NULL, &decoded_list, saved_errno, errmsg) != EXIT_SUCCESS) {
859
            g_print("Error during decode\n");
860
            if (errmsg && *errmsg) {
861
                g_print("errmsg = %s\n", *errmsg);
862
            }
863

                
864
            ret = DECODER_RESULT_ERROR;
865

                
866
            while (decoded_list) {
867
                list = decoded_list->next;
868

                
869
                if (decoded_list->handle) {
870
                    fclose(decoded_list->handle);
871
                    decoded_list->handle = NULL;
872
                }
873

                
874
                if (decoded_list->filename) free(decoded_list->filename);
875
                if (decoded_list->outname)  free(decoded_list->outname);
876
                if (decoded_list->status)   free(decoded_list->status);
877
                free(decoded_list);
878
                decoded_list = list;
879
            }
880
        }
881

                
882
        if (ret == DECODER_RESULT_ERROR) {
883
            break;
884
        }
885
    }
886

                
887
    if (new_decoder_used) {
888
#if 0 
889
        unsigned char md5_digest[16];
890
        int j;
891
#endif
892

                
893
        /* Close any file descriptors which may have left open */
894
        if (prev_file_fp) {
895
            if (fclose(prev_file_fp) != 0) {
896
                ret = DECODER_RESULT_ERROR;
897
                if (errmsg) {
898
                    *errmsg = g_strdup_printf(_("%s:%i Unable to close file '%s': %s"), __FILE__, __LINE__, prev_filename_out, strerror(errno));
899
                }
900
            }
901

                
902
            prev_file_fp = NULL;
903
        }
904
#if 0 
905
        MD5Final(md5_digest, &md5_ctx);
906
#endif
907

                
908
        memset(file->md5sum, 0, sizeof(file->md5sum));
909
#if 0 
910
        for (j = 0; j < 16; j++) {
911
            sprintf(file->md5sum + j * 2, "%02x", md5_digest[j]);
912
        }
913
#endif
914

                
915
        if (incomplete_file && ret != DECODER_RESULT_ERROR) {
916
            ret = DECODER_RESULT_INCOMPLETE;
917
        }
918
    } else {
919
        if (decoded_list) {
920
            *real_filename = g_strdup(decoded_list->outname);
921
            for(i = 0; i < decoded_list->total_parts; i++) {
922
                switch(decoded_list->status[i]) {
923
                    case part_missing:
924
                    case part_broken:
925
                        ret = DECODER_RESULT_INCOMPLETE;
926
                        break;
927

                
928
                    case part_intact:
929
                    case part_duplicated:
930
                        // don't do anything
931
                        break;
932
                }
933
            }
934

                
935
            while (decoded_list) {
936
                list = decoded_list->next;
937

                
938
                if (decoded_list->handle) {
939
                    fclose(decoded_list->handle);
940
                    decoded_list->handle = NULL;
941
                }
942

                
943
                if (decoded_list->filename) free(decoded_list->filename);
944
                if (decoded_list->outname)  free(decoded_list->outname);
945
                if (decoded_list->status)   free(decoded_list->status);
946
                free(decoded_list);
947
                decoded_list = list;
948
            }
949
        }
950
    }
951

                
952
    if (!file_found) {
953
        if (errmsg) {
954
            *errmsg = g_strdup(_("No data to decode"));
955
        }
956

                
957
        if (saved_errno) {
958
            *saved_errno = -1;
959
        }
960

                
961
        ret = DECODER_RESULT_NO_PARTS_AVAIL;
962
    }
963

                
964
    if (ret != DECODER_RESULT_ERROR) {
965
        char filename[1024];
966

                
967
        // Remove all the temp parts on success
968
        for (i = 0; i < file->num_parts; i++) {
969
            memset(&filename, 0, sizeof(filename));
970
            g_snprintf(filename, sizeof(filename) - 1, "%s%s%s.%i", temp_directory, G_DIR_SEPARATOR_S, file->tmp_filename, i + 1);
971
            g_unlink(filename);
972

                
973
        }
974
    }
975

                
976
    ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_INFO, _("Decoding completed with return value %i"), ret);
977

                
978
    // Change the current directory back to another directory to prevent the NNTPGrab process
979
    // from 'claiming' the directory in which the decoded files were saved
980
    g_chdir(target_directory);
981

                
982
    return ret;
983
}