Statistics
| Revision:

root / trunk / plugins / decoder / decoder.c @ 1797

History | View | Annotate | Download (32.7 KB)

1
/* 
2
    Copyright (C) 2005-2010  Erik van Pienbroek
3

                
4
    This program is free software; you can redistribute it and/or modify
5
    it under the terms of the GNU General Public License as published by
6
    the Free Software Foundation; either version 2 of the License, or
7
    (at your option) any later version.
8

                
9
    This program is distributed in the hope that it will be useful,
10
    but WITHOUT ANY WARRANTY; without even the implied warranty of
11
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
    GNU General Public License for more details.
13

                
14
    You should have received a copy of the GNU General Public License
15
    along with this program; if not, write to the Free Software
16
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
*/
18

                
19
#include 
20
#include 
21
#include 
22
#include 
23
#include 
24
#include 
25
#include 
26
#include 
27

                
28
#include "nntpgrab_plugin.h"
29
#include "yydecode.h"
30
#include "crc32.h"
31
#include "marshalers.h"
32

                
33
#include "config.h"
34

                
35
// yydecode.c
36
int decode(const char *inname, const char *forced_outname, struct decoded_file **decoded_list, int *saved_errno, char **errmsg);
37

                
38
static NNTPGrabDecoderRes
39
nntpgrab_plugin_decoder_decode_file(NGPlugin *plugin_data, const char *collection_name, NNTPFile *file, const char *temp_directory, const char *target_directory, int *saved_errno, char **errmsg);
40

                
41
void
42
nntpgrab_plugin_initialize(NGPlugin *plugin_data)
43
{
44
    ng_plugin_set_name(plugin_data, "Decoder");
45
    ng_plugin_set_version(plugin_data, PACKAGE_VERSION);
46
    ng_plugin_set_author(plugin_data, "Erik van Pienbroek");
47
    ng_plugin_set_url(plugin_data, "https://www.nntpgrab.nl");
48
    ng_plugin_set_description(plugin_data, "A plugin which is responsible for decoding yEnc and UU-encoded files");
49

                
50
    ng_plugin_register_function(plugin_data,
51
                                "decode_file",
52
                                NG_PLUGIN_FUNCTION(nntpgrab_plugin_decoder_decode_file),
53
                                ng_plugin_marshal_INT__STRING_POINTER_STRING_STRING_POINTER_POINTER,
54
                                G_TYPE_INT,
55
                                6, G_TYPE_STRING, G_TYPE_POINTER, G_TYPE_STRING,
56
                                   G_TYPE_STRING, G_TYPE_POINTER, G_TYPE_POINTER);
57
}
58

                
59
ngboolean
60
nntpgrab_plugin_load(NGPlugin *plugin_data, char **errmsg)
61
{
62
    return TRUE;
63
}
64

                
65
ngboolean
66
nntpgrab_plugin_can_unload(NGPlugin *plugin_data, char **reason)
67
{
68
    return TRUE;
69
}
70

                
71
void
72
nntpgrab_plugin_unload(NGPlugin *plugin_data)
73
{
74
}
75

                
76
void
77
nntpgrab_plugin_destroy(NGPlugin *plugin_data)
78
{
79
}
80

                
81
int
82
nntpgrab_plugin_get_version(void)
83
{
84
    return NNTPGRAB_PLUGIN_API_VERSION;
85
}
86

                
87
/* MD5 code copied from plugins/jsonrpc/mongoose.c */
88
typedef struct MD5Context {
89
        guint32         buf[4];
90
        guint32         bits[2];
91
        unsigned char   in[64];
92
} MD5_CTX;
93

                
94
#if __BYTE_ORDER == 1234
95
#define byteReverse(buf, len)        /* Nothing */
96
#else
97
/* 
98
 * Note: this code is harmless on little-endian machines.
99
 */
100
static void
101
byteReverse(unsigned char *buf, unsigned longs)
102
{
103
        guint32 t;
104
        do {
105
                t = (guint32) ((unsigned) buf[3] << 8 | buf[2]) << 16 |
106
                        ((unsigned) buf[1] << 8 | buf[0]);
107
                *(guint32 *) buf = t;
108
                buf += 4;
109
        } while (--longs);
110
}
111
#endif /* __BYTE_ORDER */
112

                
113
/* The four core functions - F1 is optimized somewhat */
114

                
115
/* #define F1(x, y, z) (x & y | ~x & z) */
116
#define F1(x, y, z) (z ^ (x & (y ^ z)))
117
#define F2(x, y, z) F1(z, x, y)
118
#define F3(x, y, z) (x ^ y ^ z)
119
#define F4(x, y, z) (y ^ (x | ~z))
120

                
121
/* This is the central step in the MD5 algorithm. */
122
#define MD5STEP(f, w, x, y, z, data, s) \
123
( w += f(x, y, z) + data,  w = w<>(32-s),  w += x )
124

                
125
/* 
126
 * Start MD5 accumulation.  Set bit count to 0 and buffer to mysterious
127
 * initialization constants.
128
 */
129
static void
130
MD5Init(MD5_CTX *ctx)
131
{
132
        ctx->buf[0] = 0x67452301;
133
        ctx->buf[1] = 0xefcdab89;
134
        ctx->buf[2] = 0x98badcfe;
135
        ctx->buf[3] = 0x10325476;
136

                
137
        ctx->bits[0] = 0;
138
        ctx->bits[1] = 0;
139

                
140
        memset(ctx->in, 0, sizeof(ctx->in));
141
}
142

                
143
/* 
144
 * The core of the MD5 algorithm, this alters an existing MD5 hash to
145
 * reflect the addition of 16 longwords of new data.  MD5Update blocks
146
 * the data and converts bytes into longwords for this routine.
147
 */
148
static void
149
MD5Transform(guint32 buf[4], guint32 const in[16])
150
{
151
        register guint32 a, b, c, d;
152

                
153
        a = buf[0];
154
        b = buf[1];
155
        c = buf[2];
156
        d = buf[3];
157

                
158
        MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
159
        MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
160
        MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
161
        MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
162
        MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
163
        MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
164
        MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
165
        MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
166
        MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
167
        MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
168
        MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
169
        MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
170
        MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
171
        MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
172
        MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
173
        MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
174

                
175
        MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
176
        MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
177
        MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
178
        MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
179
        MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
180
        MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
181
        MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
182
        MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
183
        MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
184
        MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
185
        MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
186
        MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
187
        MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
188
        MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
189
        MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
190
        MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
191

                
192
        MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
193
        MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
194
        MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
195
        MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
196
        MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
197
        MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
198
        MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
199
        MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
200
        MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
201
        MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
202
        MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
203
        MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
204
        MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
205
        MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
206
        MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
207
        MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
208

                
209
        MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
210
        MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
211
        MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
212
        MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
213
        MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
214
        MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
215
        MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
216
        MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
217
        MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
218
        MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
219
        MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
220
        MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
221
        MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
222
        MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
223
        MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
224
        MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
225

                
226
        buf[0] += a;
227
        buf[1] += b;
228
        buf[2] += c;
229
        buf[3] += d;
230
}
231

                
232
/* 
233
 * Update context to reflect the concatenation of another buffer full
234
 * of bytes.
235
 */
236
static void
237
MD5Update(MD5_CTX *ctx, unsigned char const *buf, unsigned len)
238
{
239
        guint32 t;
240

                
241
        if (len == 0) {
242
            return;
243
        }
244

                
245
        /* Update bitcount */
246

                
247
        t = ctx->bits[0];
248
        if ((ctx->bits[0] = t + ((guint32) len << 3)) < t)
249
                ctx->bits[1]++;                /* Carry from low to high */
250
        ctx->bits[1] += len >> 29;
251

                
252
        t = (t >> 3) & 0x3f;        /* Bytes already in shsInfo->data */
253

                
254
        /* Handle any leading odd-sized chunks */
255

                
256
        if (t) {
257
                unsigned char *p = (unsigned char *) ctx->in + t;
258

                
259
                t = 64 - t;
260
                if (len < t) {
261
                        memcpy(p, buf, len);
262
                        return;
263
                }
264
                memcpy(p, buf, t);
265
                byteReverse(ctx->in, 16);
266
                MD5Transform(ctx->buf, (guint32 *) ctx->in);
267
                buf += t;
268
                len -= t;
269
        }
270
        /* Process data in 64-byte chunks */
271

                
272
        while (len >= 64) {
273
                memcpy(ctx->in, buf, 64);
274
                byteReverse(ctx->in, 16);
275
                MD5Transform(ctx->buf, (guint32 *) ctx->in);
276
                buf += 64;
277
                len -= 64;
278
        }
279

                
280
        /* Handle any remaining bytes of data. */
281

                
282
        memcpy(ctx->in, buf, len);
283
}
284

                
285
/* 
286
 * Final wrapup - pad to 64-byte boundary with the bit pattern
287
 * 1 0* (64-bit count of bits processed, MSB-first)
288
 */
289
static void
290
MD5Final(unsigned char digest[16], MD5_CTX *ctx)
291
{
292
        unsigned count;
293
        unsigned char *p;
294

                
295
        /* Compute number of bytes mod 64 */
296
        count = (ctx->bits[0] >> 3) & 0x3F;
297

                
298
        /* Set the first char of padding to 0x80. This is safe since there is 
299
           always at least one byte free */
300
        p = ctx->in + count;
301
        *p++ = 0x80;
302

                
303
        /* Bytes of padding needed to make 64 bytes */
304
        count = 64 - 1 - count;
305

                
306
        /* Pad out to 56 mod 64 */
307
        if (count < 8) {
308
                /* Two lots of padding: Pad the first block to 64 bytes */
309
                memset(p, 0, count);
310
                byteReverse(ctx->in, 16);
311
                MD5Transform(ctx->buf, (guint32 *) ctx->in);
312

                
313
                /* Now fill the next block with 56 bytes */
314
                memset(ctx->in, 0, 56);
315
        } else {
316
                /* Pad block to 56 bytes */
317
                memset(p, 0, count - 8);
318
        }
319
        byteReverse(ctx->in, 14);
320

                
321
        /* Append length in bits and transform */
322
        ((guint32 *) ctx->in)[14] = ctx->bits[0];
323
        ((guint32 *) ctx->in)[15] = ctx->bits[1];
324

                
325
        MD5Transform(ctx->buf, (guint32 *) ctx->in);
326
        byteReverse((unsigned char *) ctx->buf, 4);
327
        memcpy(digest, ctx->buf, 16);
328
        memset((char *) ctx, 0, sizeof(ctx));        /* In case it's sensitive */
329
}
330

                
331
static void
332
strip_newline(char *line)
333
{
334
    if (line[strlen(line) - 1]  == '\n') {
335
        line[strlen(line) - 1] = '\0';
336
    }
337

                
338
    if (line[strlen(line) - 1]  == '\r') {
339
        line[strlen(line) - 1] = '\0';
340
    }
341
}
342

                
343
static gboolean
344
parse_ybegin_header(const char *line, int *part, int *num_lines, guint64 *size, char *name, int name_length)
345
{
346
    int i;
347
    int len;
348

                
349
    /* line must be in one of the following notations: */
350
    /* =ybegin line=128 size=100000 name=abc.rar */
351
    /* =ybegin part=1 line=128 size=50000000 name=abc.rar */
352

                
353
    g_return_val_if_fail(line != NULL, FALSE);
354
    g_return_val_if_fail(part != NULL, FALSE);
355
    g_return_val_if_fail(line != NULL, FALSE);
356
    g_return_val_if_fail(size != NULL, FALSE);
357
    g_return_val_if_fail(name != NULL, FALSE);
358
    g_return_val_if_fail(name_length > 0, FALSE);
359

                
360
    *part = -1;
361
    *num_lines = -1;
362
    *size = -1;
363
    memset(name, 0, name_length);
364

                
365
    len = strlen(line);
366
    for (i = 0; i < len; i++) {
367
        if (!strncmp(line + i, "part=", 5)) {
368
            *part = atoi(line + i + 5);
369
        } else if (!strncmp(line + i, "line=", 5)) {
370
            *num_lines = atoi(line + i + 5);
371
        } else if (!strncmp(line + i, "size=", 5)) {
372
            *size = g_ascii_strtoull (line + i + 5, NULL, 10);
373
        } else if (!strncmp(line + i, "name=", 5)) {
374
            strcpy(name, line + i + 5);
375
            strip_newline(name);
376
        }
377
    }
378

                
379
#if 0 
380
g_print("part = %i\n", *part);
381
g_print("num_lines = %i\n", *num_lines);
382
g_print("size = %li\n", *size);
383
g_print("filename = %s\n\n", name);
384
#endif
385

                
386
    if (*num_lines > 0 && *size > 0 && strlen(name) > 0) {  /* NOTE: The part field is optional */
387
        return TRUE;
388
    } else {
389
        return FALSE;
390
    }
391
}
392

                
393
static gboolean
394
parse_ypart_header(const char *line, guint64 *begin, guint64 *end)
395
{
396
    int i;
397
    int len;
398

                
399
    /* line must be in the following notation: */
400
    /* =ypart begin=1 end=640000 */
401

                
402
    g_return_val_if_fail(line != NULL, FALSE);
403
    g_return_val_if_fail(begin != NULL, FALSE);
404
    g_return_val_if_fail(end != NULL, FALSE);
405

                
406
    *begin = -1;
407
    *end = -1;
408

                
409
    len = strlen(line);
410
    for (i = 0; i < len; i++) {
411
        if (!strncmp(line + i, "begin=", 6)) {
412
            *begin = g_ascii_strtoull(line + i + 6, NULL, 10);
413
        } else if (!strncmp(line + i, "end=", 4)) {
414
            *end = g_ascii_strtoull(line + i + 4, NULL, 10);
415
        }
416
    }
417

                
418
#if 0 
419
g_print("begin = %li\n", *begin);
420
g_print("end = %li\n", *end);
421
#endif
422

                
423
    if (*begin >= 0 && *end > 0) {
424
        return TRUE;
425
    } else {
426
        return FALSE;
427
    }
428
}
429

                
430
static gboolean
431
try_to_use_new_decoder(NGPlugin *plugin_data, const char *filename_in, const char *target_directory, NNTPGrabDecoderRes *decoder_result, char **errmsg, FILE **prev_file_out_fp, char *prev_filename_out, int prev_filename_out_length, char *prev_real_filename_out, int prev_real_filename_out_length, MD5_CTX *md5_ctx)
432
{
433
    int part = -1;
434
    int num_lines = -1;
435
    guint64 size = -1;
436
    char filename_out[1024];
437
    guint64 begin = -1;
438
    guint64 end = -1;
439
    FILE *fp;
440
    gboolean escape_char_detected = FALSE;
441
    char path[4096];
442
    char buf[1024];
443
    struct crc32_ctx crc32_context;
444

                
445
    g_return_val_if_fail(filename_in != NULL, FALSE);
446
    g_return_val_if_fail(target_directory != NULL, FALSE);
447
    g_return_val_if_fail(decoder_result != NULL, FALSE);
448
    /* NOTE: errmsg MIGHT be NULL */
449
    g_return_val_if_fail(prev_file_out_fp != NULL, FALSE);
450
    g_return_val_if_fail(prev_filename_out != NULL, FALSE);
451
    g_return_val_if_fail(prev_filename_out_length > 0, FALSE);
452

                
453
    *decoder_result = DECODER_RESULT_COMPLETE;
454

                
455
    /* Try to read the first line from the given filename to detected if this is an yEnc encoded file */
456
    fp = g_fopen(filename_in, "rb");
457

                
458
    if (!fp) {
459
        *decoder_result = DECODER_RESULT_ERROR;
460
        if (errmsg) {
461
            *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to open file '%s': %s"), __FILE__, __LINE__, filename_in, strerror(errno));
462
        }
463

                
464
        return TRUE;
465
    }
466

                
467
    /* Try to read the header from this file */
468
    do {
469
        if (!fgets(buf, sizeof(buf) - 1, fp)) {
470
            if (feof(fp)) {
471
                /* The file doesn't contain any data at all */
472
                *decoder_result = DECODER_RESULT_INCOMPLETE;
473
                ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_DEBUG, __FILE__ ":%i File %s is empty\n", __LINE__, filename_in);
474
            } else {
475
                *decoder_result = DECODER_RESULT_ERROR;
476
                if (errmsg) {
477
                    *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to read the first line from file '%s': %s"), __FILE__, __LINE__, filename_in, strerror(errno));
478
                }
479
            }
480

                
481
            fclose(fp);
482

                
483
            return TRUE;
484
        }
485

                
486
        strip_newline(buf);
487

                
488
        if (strlen(buf) == 0) {
489
            continue;
490
        }
491

                
492
        if (strncmp(buf, "=ybegin", 7) != 0) {
493
            /* Other encoding found. Process this with a different decoder */
494
            fclose(fp);
495

                
496
            return FALSE;
497
        }
498

                
499
        break;
500
    } while (TRUE);
501

                
502
    /* yEnc encoded file found! */
503

                
504
    /* The header is now in buf. Try to extract information from this header */
505
    memset(filename_out, 0, sizeof(filename_out));
506
    if (!parse_ybegin_header(buf, &part, &num_lines, &size, filename_out, sizeof(filename_out) - 1)) {
507
        /* Header incomplete. Ignore this file */
508
        ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_DEBUG, __FILE__ ":%i yBegin-header incomplete, buf = %s\n", __LINE__, buf);
509
        *decoder_result = DECODER_RESULT_INCOMPLETE;
510
        fclose(fp);
511

                
512
        return TRUE;
513
    }
514

                
515
    /* Translate the filename to UTF-8 if it isn't already */
516
    if (!g_utf8_validate(filename_out, -1, NULL)) {
517
        /* Filename is probably in Windows-1252 charset */
518
        char *tmp = g_convert(filename_out, -1, "utf-8", "windows-1252", NULL, NULL, NULL);
519
        if (!tmp) {
520
            *decoder_result = DECODER_RESULT_ERROR;
521
            if (errmsg) {
522
                *errmsg = g_strdup_printf(_("%s:%i The filename '%s' is using an unknown character set. Decoding cannot continue"), __FILE__, __LINE__, filename_out);
523
            }
524

                
525
            fclose(fp);
526

                
527
            return TRUE;
528
        }
529

                
530
        memset(filename_out, 0, sizeof(filename_out));
531
        strncpy(filename_out, tmp, sizeof(filename_out) - 1);
532
        g_free(tmp);
533
    }
534

                
535
    /* If this is a multipart file, another header is sent */
536
    if (part != -1) {
537
        if (!fgets(buf, sizeof(buf) - 1, fp)) {
538
            *decoder_result = DECODER_RESULT_ERROR;
539
            if (errmsg) {
540
                *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to read the first line from file '%s': %s"), __FILE__, __LINE__, filename_in, strerror(errno));
541
            }
542

                
543
            fclose(fp);
544

                
545
            return TRUE;
546
        }
547

                
548
        if (!parse_ypart_header(buf, &begin, &end)) {
549
            /* Header incomplete. Ignore this file */
550
            ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_DEBUG, __FILE__ ":%i Part-header incomplete, buf = %s\n", __LINE__, buf);
551
            *decoder_result = DECODER_RESULT_INCOMPLETE;
552
            fclose(fp);
553

                
554
            return TRUE;
555
        }
556
    }
557

                
558
    /* Open the output file (if it wasn't already open) */
559
    memset(path, 0, sizeof(path));
560
    g_snprintf(path, sizeof(path) - 1, "%s%s%s", target_directory, G_DIR_SEPARATOR_S, filename_out);
561

                
562
    if (strcmp(filename_out, prev_filename_out) != 0) {
563
        char path_orig[4096];
564
        int suffix = 1;
565
        char *filename_tmp;
566

                
567
        /* Output file wasn't open yet. Open it now */
568
        if (*prev_file_out_fp != NULL) {
569
            if (fclose(*prev_file_out_fp) != 0) {
570
                *decoder_result = DECODER_RESULT_ERROR;
571
                if (errmsg) {
572
                    *errmsg = g_strdup_printf(_("%s:%i Unable to close file '%s': %s"), __FILE__, __LINE__, prev_filename_out, strerror(errno));
573
                }
574
                *prev_file_out_fp = NULL;
575
                return TRUE;
576
            }
577
        }
578

                
579
        /* If there's already a file at 'path', append a suffix to it and try again */
580
        strcpy(path_orig, path);
581
        while (g_file_test(path, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) {
582
            g_snprintf(path, sizeof(path) - 1, "%s.%i", path_orig, suffix);
583
            suffix++;
584
        }
585

                
586
        *prev_file_out_fp = g_fopen(path, "wb");
587

                
588
        if (!(*prev_file_out_fp)) {
589
            *decoder_result = DECODER_RESULT_ERROR;
590
            if (errmsg) {
591
                *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to open file for writing '%s': %s"), __FILE__, __LINE__, path, strerror(errno));
592
            }
593

                
594
            fclose(fp);
595

                
596
            return TRUE;
597
        }
598

                
599
        strncpy(prev_filename_out, filename_out, prev_filename_out_length);
600
        filename_tmp = g_path_get_basename(path);
601
        strncpy(prev_real_filename_out, filename_tmp, prev_real_filename_out_length);
602
        g_free(filename_tmp);
603
    }
604

                
605
    g_return_val_if_fail(*prev_file_out_fp != NULL, TRUE);
606

                
607
    /* Seek to the right position in the output file if we're decoding a multi-part file */
608
    if (begin > 0) {
609
        (void) fseek(*prev_file_out_fp, begin - 1, SEEK_SET);
610
    }
611

                
612
    ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_DEBUG, "Now starting decoding file at position %"G_GUINT64_FORMAT"\n", begin);
613

                
614
    crc32_init_ctx(&crc32_context);
615

                
616
    /* Start the real decoding */
617
    do {
618
        int len = fread(buf, 1, sizeof(buf), fp);
619
        int i;
620
        char buf_out[1024];
621
        int len_out = 0;
622

                
623
        memset(buf_out, 0, sizeof(buf_out));
624

                
625
        if (len == 0) {
626
            if (feof(fp)) {
627
                /* All data is read */
628
                break;
629
            }
630
        } else if (len < 0) {
631
            /* Some error occured while reading */
632
            *decoder_result = DECODER_RESULT_ERROR;
633
            if (errmsg) {
634
                *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to read from file '%s': %s"), __FILE__, __LINE__, filename_in, strerror(errno));
635
            }
636

                
637
            fclose(fp);
638

                
639
            return TRUE;
640
        }
641

                
642
        for (i = 0; i < len; i++) {
643
            unsigned char ch = buf[i];
644

                
645
            if (ch == '\r' || ch == '\n') {
646
                /* Ignore newlines */
647
                continue;
648
            } else if (ch == '=') {
649
                /* Escape character detected. Un-escape the next byte */
650
                escape_char_detected = TRUE;
651
            } else if (escape_char_detected && ch == 'y') {     // =y detected. This is probably the end of the part
652
                char buf_new[1024];
653
                int buf_new_length;
654
                char *ptr;
655
                guint32 crc32_expected = 0;
656

                
657
                memset(&buf_new, 0, sizeof(buf_new));
658
                buf_new_length = sizeof(buf) - i - 1;
659

                
660
                memcpy(buf_new, buf + i + 1, buf_new_length);
661

                
662
                /* Try to put more data in the buffer */
663
                if (fread(buf_new + buf_new_length, 1, sizeof(buf_new) - buf_new_length, fp) < 0) {
664
                    *decoder_result = DECODER_RESULT_ERROR;
665
                    if (errmsg) {
666
                        *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to read from file '%s': %s"), __FILE__, __LINE__, filename_in, strerror(errno));
667
                    }
668

                
669
                    fclose(fp);
670

                
671
                    return TRUE;
672
                }
673

                
674
                /* Do we really have an =yend ? */
675
                if (strncmp(buf_new, "end", 3) != 0) {
676
                    *decoder_result = DECODER_RESULT_INCOMPLETE;
677
                }
678

                
679
                /* Flush the buffer */
680
                if (len_out > 0 && fwrite(buf_out, 1, len_out, *prev_file_out_fp) <= 0) {
681
                    *decoder_result = DECODER_RESULT_ERROR;
682
                    if (errmsg) {
683
                        *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to write to file '%s': %s"), __FILE__, __LINE__, filename_out, strerror(errno));
684
                    }
685
                }
686

                
687
                fclose(fp);
688

                
689
#if 0 
690
                /* Update the MD5 */
691
                MD5Update(md5_ctx, (unsigned char*) buf_out, len_out);
692
#endif
693

                
694
                /* Calculate the CRC value of this part */
695
                crc32_process_bytes(buf_out, len_out, &crc32_context);
696

                
697
                /* Verify the CRC value of this part */
698
                if ((ptr = strstr(buf_new, "pcrc32="))) {
699
                    char crc[9];
700
                    memcpy(crc, ptr + 7, 8);
701
                    crc[8] = '\0';
702

                
703
                    crc32_expected = atocrc32(crc);
704
                } else if ((ptr = strstr(buf_new, "crc32="))) {
705
                    char crc[9];
706
                    memcpy(crc, ptr + 6, 8);
707
                    crc[8] = '\0';
708

                
709
                    crc32_expected = atocrc32(crc);
710
                } else {
711
                    /* No CRC32 value found. Ignore verification */
712
                    ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_DEBUG, __FILE__ ":%i No CRC32 value found in file %s\n", __LINE__, filename_in);
713
                }
714

                
715
                if (crc32_expected != 0 && crc32_read_ctx(&crc32_context) != crc32_expected) {
716
                    ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_INFO, "Part CRC32 error for file %s -- got 0x%08x, should be 0x%08x\n", filename_in, crc32_read_ctx(&crc32_context), crc32_expected);
717
                    *decoder_result = DECODER_RESULT_INCOMPLETE;
718
                }
719

                
720
                crc32_finish_ctx(&crc32_context);
721

                
722
                return TRUE;
723
            } else {
724
                ch = ch - 42 - (escape_char_detected ? 64 : 0);
725
                if (ch < 0) {
726
                    ch += 256;
727
                }
728

                
729
                buf_out[len_out++] = ch;
730
                g_return_val_if_fail(len_out <= sizeof(buf_out), TRUE);
731

                
732
                escape_char_detected = FALSE;
733
            }
734
        }
735

                
736
        /* Calculate the CRC and MD5 values of this part */
737
        crc32_process_bytes(buf_out, len_out, &crc32_context);
738
#if 0 
739
        MD5Update(md5_ctx, (unsigned char*) buf_out, len_out);
740
#endif
741

                
742
        /* Flush the buffer */
743
        if (fwrite(buf_out, 1, len_out, *prev_file_out_fp) <= 0) {
744
            *decoder_result = DECODER_RESULT_ERROR;
745
            if (errmsg) {
746
                *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to write to file '%s': %s"), __FILE__, __LINE__, filename_out, strerror(errno));
747
            }
748

                
749
            fclose(fp);
750

                
751
            return TRUE;
752
        }
753
    } while (TRUE);
754

                
755
    fclose(fp);
756

                
757
    /* Unexpected end of file detected, file must be incomplete */
758
    *decoder_result = DECODER_RESULT_INCOMPLETE;
759

                
760
    return TRUE;
761
}
762

                
763
static NNTPGrabDecoderRes
764
nntpgrab_plugin_decoder_decode_file(NGPlugin *plugin_data, const char *collection_name, NNTPFile *file, const char *temp_directory, const char *target_directory, int *saved_errno, char **errmsg)
765
{
766
    int i;
767
#if 0 
768
    GList *lines = NULL;
769
#endif
770
    NNTPGrabDecoderRes ret = DECODER_RESULT_COMPLETE;
771
    gboolean file_found = FALSE;
772
    struct decoded_file *decoded_list = NULL;
773
    struct decoded_file *list;
774
    gboolean new_decoder_used = FALSE;
775
    char filename[1024];
776
    gboolean incomplete_file = FALSE;
777
    char target_path[4096];
778
    char prev_filename_out[1024];
779
    char prev_real_filename_out[1024];
780
    FILE *prev_file_fp = NULL;
781
    MD5_CTX md5_ctx;
782

                
783
    g_return_val_if_fail(collection_name != NULL, DECODER_RESULT_ERROR);
784
    g_return_val_if_fail(file != NULL, DECODER_RESULT_ERROR);
785
    g_return_val_if_fail(temp_directory != NULL, DECODER_RESULT_ERROR);
786
    g_return_val_if_fail(target_directory != NULL, DECODER_RESULT_ERROR);
787
    g_return_val_if_fail(saved_errno != NULL, DECODER_RESULT_ERROR);
788

                
789
    *saved_errno = 0;
790

                
791
    memset(prev_filename_out, 0, sizeof(prev_filename_out));
792
    memset(prev_real_filename_out, 0, sizeof(prev_real_filename_out));
793

                
794
    memset(target_path, 0, sizeof(target_path));
795
    g_snprintf(target_path, sizeof(target_path) - 1, "%s"G_DIR_SEPARATOR_S"%s", target_directory, collection_name);
796

                
797
    ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_INFO, _("File '%s' of collection '%s' is now being decoded to directory '%s'"), file->subject, collection_name, target_path);
798

                
799
    if (g_mkdir_with_parents(target_path, 0755) == -1) {
800
        ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_INFO, _("Creation of folder '%s' failed, errno = %i, %s\n"), target_path, errno, strerror(errno));
801
    }
802

                
803
    // The yydecode code assumes all the resulting files will be saved in the current directory
804
    // so we need to change to the wanted directory before starting the decode
805
    if (g_chdir(target_path) == -1) {
806
        if (errmsg) {
807
            *errmsg = g_strdup_printf(_("Unable to navigate to folder '%s': %s"), target_path, strerror(errno));
808
        }
809
        *saved_errno = ENOENT;
810

                
811
        ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_INFO, _("Unable to navigate to folder '%s': %s"), target_path, strerror(errno));
812

                
813
        return DECODER_RESULT_ERROR;
814
    }
815

                
816
    MD5Init(&md5_ctx);
817

                
818
    // Load all the individual parts
819
    for (i = 0; i < file->num_parts; i++) {
820
        struct decoded_file *list;
821

                
822
        memset(filename, 0, sizeof(filename) - 1);
823
        g_snprintf(filename, sizeof(filename) - 1, "%s%s%s.%i", temp_directory, G_DIR_SEPARATOR_S, file->tmp_filename, i + 1);
824

                
825
        if (!g_file_test(filename, G_FILE_TEST_IS_REGULAR | G_FILE_TEST_EXISTS)) {
826
            continue;
827
        }
828

                
829
        file_found = TRUE;
830

                
831
        if (try_to_use_new_decoder(plugin_data, filename, target_path, &ret, errmsg, &prev_file_fp, prev_filename_out, sizeof(prev_filename_out) - 1, prev_real_filename_out, sizeof(prev_real_filename_out) - 1, &md5_ctx)) {
832
#if 0 
833
g_print("prev_file_fp = %#x\n", prev_file_fp);
834
g_print("prev_real_filename_out = %s\n", prev_real_filename_out);
835
g_print("ret = %i\n", ret);
836
#endif
837
            strncpy((char*) file->real_filename, prev_real_filename_out, sizeof(file->real_filename) - 1);
838
            new_decoder_used = TRUE;
839
            if (ret == DECODER_RESULT_ERROR) {
840
                g_print("Error during decode\n");
841
                if (errmsg && *errmsg) {
842
                    g_print("errmsg = %s\n", *errmsg);
843
                }
844

                
845
                break;
846
            } else if (ret == DECODER_RESULT_INCOMPLETE) {
847
                incomplete_file = TRUE;
848
            }
849

                
850
            continue;
851
        }
852

                
853
        if (decode(filename, NULL, &decoded_list, saved_errno, errmsg) != EXIT_SUCCESS) {
854
            g_print("Error during decode\n");
855
            if (errmsg && *errmsg) {
856
                g_print("errmsg = %s\n", *errmsg);
857
            }
858

                
859
            ret = DECODER_RESULT_ERROR;
860

                
861
            while (decoded_list) {
862
                list = decoded_list->next;
863

                
864
                if (decoded_list->handle) {
865
                    fclose(decoded_list->handle);
866
                    decoded_list->handle = NULL;
867
                }
868

                
869
                if (decoded_list->filename) free(decoded_list->filename);
870
                if (decoded_list->outname)  free(decoded_list->outname);
871
                if (decoded_list->status)   free(decoded_list->status);
872
                free(decoded_list);
873
                decoded_list = list;
874
            }
875
        }
876

                
877
        if (ret == DECODER_RESULT_ERROR) {
878
            break;
879
        }
880
    }
881

                
882
    if (new_decoder_used) {
883
        unsigned char md5_digest[16];
884
        int j;
885

                
886
        /* Close any file descriptors which may have left open */
887
        if (prev_file_fp) {
888
            if (fclose(prev_file_fp) != 0) {
889
                ret = DECODER_RESULT_ERROR;
890
                if (errmsg) {
891
                    *errmsg = g_strdup_printf(_("%s:%i Unable to close file '%s': %s"), __FILE__, __LINE__, prev_filename_out, strerror(errno));
892
                }
893
            }
894

                
895
            prev_file_fp = NULL;
896
        }
897

                
898
        MD5Final(md5_digest, &md5_ctx);
899

                
900
        memset(file->md5sum, 0, sizeof(file->md5sum));
901
        for (j = 0; j < 16; j++) {
902
            sprintf(file->md5sum + j * 2, "%02x", md5_digest[j]);
903
        }
904

                
905
        if (incomplete_file && ret != DECODER_RESULT_ERROR) {
906
            ret = DECODER_RESULT_INCOMPLETE;
907
        }
908
    } else {
909
        if (decoded_list) {
910
            strncpy((char*) file->real_filename, decoded_list->outname, sizeof(file->real_filename) - 1);
911
            for(i = 0; i < decoded_list->total_parts; i++) {
912
                switch(decoded_list->status[i]) {
913
                    case part_missing:
914
                    case part_broken:
915
                        ret = DECODER_RESULT_INCOMPLETE;
916
                        break;
917

                
918
                    case part_intact:
919
                    case part_duplicated:
920
                        // don't do anything
921
                        break;
922
                }
923
            }
924

                
925
            while (decoded_list) {
926
                list = decoded_list->next;
927

                
928
                if (decoded_list->handle) {
929
                    fclose(decoded_list->handle);
930
                    decoded_list->handle = NULL;
931
                }
932

                
933
                if (decoded_list->filename) free(decoded_list->filename);
934
                if (decoded_list->outname)  free(decoded_list->outname);
935
                if (decoded_list->status)   free(decoded_list->status);
936
                free(decoded_list);
937
                decoded_list = list;
938
            }
939
        }
940
    }
941

                
942
    if (!file_found) {
943
        if (errmsg) {
944
            *errmsg = g_strdup(_("No data to decode"));
945
        }
946

                
947
        if (saved_errno) {
948
            *saved_errno = -1;
949
        }
950

                
951
        ret = DECODER_RESULT_NO_PARTS_AVAIL;
952
    }
953

                
954
    if (ret != DECODER_RESULT_ERROR) {
955
        char filename[1024];
956

                
957
        // Remove all the temp parts on success
958
        for (i = 0; i < file->num_parts; i++) {
959
            memset(&filename, 0, sizeof(filename));
960
            g_snprintf(filename, sizeof(filename) - 1, "%s%s%s.%i", temp_directory, G_DIR_SEPARATOR_S, file->tmp_filename, i + 1);
961
            g_unlink(filename);
962

                
963
        }
964
    }
965

                
966
    ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_INFO, _("Decoding completed with return value %i"), ret);
967

                
968
    // Change the current directory back to another directory to prevent the NNTPGrab process
969
    // from 'claiming' the directory in which the decoded files were saved
970
    g_chdir(target_directory);
971

                
972
    return ret;
973
}