Statistics
| Revision:

root / trunk / plugins / decoder / decoder.c @ 1914

History | View | Annotate | Download (33 KB)

1
/* 
2
    Copyright (C) 2005-2010  Erik van Pienbroek
3

                
4
    This program is free software; you can redistribute it and/or modify
5
    it under the terms of the GNU General Public License as published by
6
    the Free Software Foundation; either version 2 of the License, or
7
    (at your option) any later version.
8

                
9
    This program is distributed in the hope that it will be useful,
10
    but WITHOUT ANY WARRANTY; without even the implied warranty of
11
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
    GNU General Public License for more details.
13

                
14
    You should have received a copy of the GNU General Public License
15
    along with this program; if not, write to the Free Software
16
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
*/
18

                
19
#include 
20
#include 
21
#include 
22
#include 
23
#include 
24
#include 
25
#include 
26
#include 
27

                
28
#include "nntpgrab_plugin.h"
29
#include "collections.h"
30
#include "yydecode.h"
31
#include "crc32.h"
32
#include "marshalers.h"
33

                
34
#include "config.h"
35

                
36
// yydecode.c
37
int decode(const char *inname, const char *forced_outname, struct decoded_file **decoded_list, int *saved_errno, char **errmsg);
38

                
39
static NNTPGrabDecoderRes
40
nntpgrab_plugin_decoder_decode_file(NGPlugin *plugin_data, const char *collection_name, NNTPFile *file, const char *temp_directory, const char *target_directory, char **real_filename, int *saved_errno, char **errmsg);
41

                
42
void
43
nntpgrab_plugin_initialize(NGPlugin *plugin_data)
44
{
45
    ng_plugin_set_name(plugin_data, "Decoder");
46
    ng_plugin_set_version(plugin_data, PACKAGE_VERSION);
47
    ng_plugin_set_author(plugin_data, "Erik van Pienbroek");
48
    ng_plugin_set_url(plugin_data, "https://www.nntpgrab.nl");
49
    ng_plugin_set_description(plugin_data, "A plugin which is responsible for decoding yEnc and UU-encoded files");
50

                
51
    ng_plugin_register_function(plugin_data,
52
                                "decode_file",
53
                                NG_PLUGIN_FUNCTION(nntpgrab_plugin_decoder_decode_file),
54
                                ng_plugin_marshal_INT__STRING_POINTER_STRING_STRING_POINTER_POINTER_POINTER,
55
                                G_TYPE_INT,
56
                                7, G_TYPE_STRING, G_TYPE_POINTER, G_TYPE_STRING,
57
                                   G_TYPE_STRING, G_TYPE_POINTER, G_TYPE_POINTER, G_TYPE_POINTER);
58
}
59

                
60
ngboolean
61
nntpgrab_plugin_load(NGPlugin *plugin_data, char **errmsg)
62
{
63
    return TRUE;
64
}
65

                
66
ngboolean
67
nntpgrab_plugin_can_unload(NGPlugin *plugin_data, char **reason)
68
{
69
    return TRUE;
70
}
71

                
72
void
73
nntpgrab_plugin_unload(NGPlugin *plugin_data)
74
{
75
}
76

                
77
void
78
nntpgrab_plugin_destroy(NGPlugin *plugin_data)
79
{
80
}
81

                
82
int
83
nntpgrab_plugin_get_version(void)
84
{
85
    return NNTPGRAB_PLUGIN_API_VERSION;
86
}
87

                
88
NGVariant *
89
nntpgrab_plugin_call_plugin_method(NGPlugin *plugin_data, const char *method, NGVariant *parameters, char **errmsg)
90
{
91
    return NULL;
92
}
93

                
94
/* MD5 code copied from plugins/jsonrpc/mongoose.c */
95
typedef struct MD5Context {
96
        guint32         buf[4];
97
        guint32         bits[2];
98
        unsigned char   in[64];
99
} MD5_CTX;
100

                
101
#if 0 
102
#if __BYTE_ORDER == 1234
103
#define byteReverse(buf, len)        /* Nothing */
104
#else
105
/* 
106
 * Note: this code is harmless on little-endian machines.
107
 */
108
static void
109
byteReverse(unsigned char *buf, unsigned longs)
110
{
111
        guint32 t;
112
        do {
113
                t = (guint32) ((unsigned) buf[3] << 8 | buf[2]) << 16 |
114
                        ((unsigned) buf[1] << 8 | buf[0]);
115
                *(guint32 *) buf = t;
116
                buf += 4;
117
        } while (--longs);
118
}
119
#endif /* __BYTE_ORDER */
120

                
121
/* The four core functions - F1 is optimized somewhat */
122

                
123
/* #define F1(x, y, z) (x & y | ~x & z) */
124
#define F1(x, y, z) (z ^ (x & (y ^ z)))
125
#define F2(x, y, z) F1(z, x, y)
126
#define F3(x, y, z) (x ^ y ^ z)
127
#define F4(x, y, z) (y ^ (x | ~z))
128

                
129
/* This is the central step in the MD5 algorithm. */
130
#define MD5STEP(f, w, x, y, z, data, s) \
131
( w += f(x, y, z) + data,  w = w<>(32-s),  w += x )
132

                
133
/* 
134
 * Start MD5 accumulation.  Set bit count to 0 and buffer to mysterious
135
 * initialization constants.
136
 */
137
static void
138
MD5Init(MD5_CTX *ctx)
139
{
140
        ctx->buf[0] = 0x67452301;
141
        ctx->buf[1] = 0xefcdab89;
142
        ctx->buf[2] = 0x98badcfe;
143
        ctx->buf[3] = 0x10325476;
144

                
145
        ctx->bits[0] = 0;
146
        ctx->bits[1] = 0;
147

                
148
        memset(ctx->in, 0, sizeof(ctx->in));
149
}
150

                
151
/* 
152
 * The core of the MD5 algorithm, this alters an existing MD5 hash to
153
 * reflect the addition of 16 longwords of new data.  MD5Update blocks
154
 * the data and converts bytes into longwords for this routine.
155
 */
156
static void
157
MD5Transform(guint32 buf[4], guint32 const in[16])
158
{
159
        register guint32 a, b, c, d;
160

                
161
        a = buf[0];
162
        b = buf[1];
163
        c = buf[2];
164
        d = buf[3];
165

                
166
        MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
167
        MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
168
        MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
169
        MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
170
        MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
171
        MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
172
        MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
173
        MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
174
        MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
175
        MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
176
        MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
177
        MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
178
        MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
179
        MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
180
        MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
181
        MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
182

                
183
        MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
184
        MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
185
        MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
186
        MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
187
        MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
188
        MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
189
        MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
190
        MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
191
        MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
192
        MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
193
        MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
194
        MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
195
        MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
196
        MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
197
        MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
198
        MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
199

                
200
        MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
201
        MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
202
        MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
203
        MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
204
        MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
205
        MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
206
        MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
207
        MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
208
        MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
209
        MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
210
        MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
211
        MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
212
        MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
213
        MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
214
        MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
215
        MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
216

                
217
        MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
218
        MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
219
        MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
220
        MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
221
        MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
222
        MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
223
        MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
224
        MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
225
        MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
226
        MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
227
        MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
228
        MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
229
        MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
230
        MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
231
        MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
232
        MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
233

                
234
        buf[0] += a;
235
        buf[1] += b;
236
        buf[2] += c;
237
        buf[3] += d;
238
}
239

                
240
/* 
241
 * Update context to reflect the concatenation of another buffer full
242
 * of bytes.
243
 */
244
static void
245
MD5Update(MD5_CTX *ctx, unsigned char const *buf, unsigned len)
246
{
247
        guint32 t;
248

                
249
        if (len == 0) {
250
            return;
251
        }
252

                
253
        /* Update bitcount */
254

                
255
        t = ctx->bits[0];
256
        if ((ctx->bits[0] = t + ((guint32) len << 3)) < t)
257
                ctx->bits[1]++;                /* Carry from low to high */
258
        ctx->bits[1] += len >> 29;
259

                
260
        t = (t >> 3) & 0x3f;        /* Bytes already in shsInfo->data */
261

                
262
        /* Handle any leading odd-sized chunks */
263

                
264
        if (t) {
265
                unsigned char *p = (unsigned char *) ctx->in + t;
266

                
267
                t = 64 - t;
268
                if (len < t) {
269
                        memcpy(p, buf, len);
270
                        return;
271
                }
272
                memcpy(p, buf, t);
273
                byteReverse(ctx->in, 16);
274
                MD5Transform(ctx->buf, (guint32 *) ctx->in);
275
                buf += t;
276
                len -= t;
277
        }
278
        /* Process data in 64-byte chunks */
279

                
280
        while (len >= 64) {
281
                memcpy(ctx->in, buf, 64);
282
                byteReverse(ctx->in, 16);
283
                MD5Transform(ctx->buf, (guint32 *) ctx->in);
284
                buf += 64;
285
                len -= 64;
286
        }
287

                
288
        /* Handle any remaining bytes of data. */
289

                
290
        memcpy(ctx->in, buf, len);
291
}
292

                
293
/* 
294
 * Final wrapup - pad to 64-byte boundary with the bit pattern
295
 * 1 0* (64-bit count of bits processed, MSB-first)
296
 */
297
static void
298
MD5Final(unsigned char digest[16], MD5_CTX *ctx)
299
{
300
        unsigned count;
301
        unsigned char *p;
302

                
303
        /* Compute number of bytes mod 64 */
304
        count = (ctx->bits[0] >> 3) & 0x3F;
305

                
306
        /* Set the first char of padding to 0x80. This is safe since there is 
307
           always at least one byte free */
308
        p = ctx->in + count;
309
        *p++ = 0x80;
310

                
311
        /* Bytes of padding needed to make 64 bytes */
312
        count = 64 - 1 - count;
313

                
314
        /* Pad out to 56 mod 64 */
315
        if (count < 8) {
316
                /* Two lots of padding: Pad the first block to 64 bytes */
317
                memset(p, 0, count);
318
                byteReverse(ctx->in, 16);
319
                MD5Transform(ctx->buf, (guint32 *) ctx->in);
320

                
321
                /* Now fill the next block with 56 bytes */
322
                memset(ctx->in, 0, 56);
323
        } else {
324
                /* Pad block to 56 bytes */
325
                memset(p, 0, count - 8);
326
        }
327
        byteReverse(ctx->in, 14);
328

                
329
        /* Append length in bits and transform */
330
        ((guint32 *) ctx->in)[14] = ctx->bits[0];
331
        ((guint32 *) ctx->in)[15] = ctx->bits[1];
332

                
333
        MD5Transform(ctx->buf, (guint32 *) ctx->in);
334
        byteReverse((unsigned char *) ctx->buf, 4);
335
        memcpy(digest, ctx->buf, 16);
336
        memset((char *) ctx, 0, sizeof(ctx));        /* In case it's sensitive */
337
}
338
#endif
339

                
340
static void
341
strip_newline(char *line)
342
{
343
    if (line[strlen(line) - 1]  == '\n') {
344
        line[strlen(line) - 1] = '\0';
345
    }
346

                
347
    if (line[strlen(line) - 1]  == '\r') {
348
        line[strlen(line) - 1] = '\0';
349
    }
350
}
351

                
352
static gboolean
353
parse_ybegin_header(const char *line, int *part, int *num_lines, guint64 *size, char *name, int name_length)
354
{
355
    int i;
356
    int len;
357

                
358
    /* line must be in one of the following notations: */
359
    /* =ybegin line=128 size=100000 name=abc.rar */
360
    /* =ybegin part=1 line=128 size=50000000 name=abc.rar */
361

                
362
    g_return_val_if_fail(line != NULL, FALSE);
363
    g_return_val_if_fail(part != NULL, FALSE);
364
    g_return_val_if_fail(line != NULL, FALSE);
365
    g_return_val_if_fail(size != NULL, FALSE);
366
    g_return_val_if_fail(name != NULL, FALSE);
367
    g_return_val_if_fail(name_length > 0, FALSE);
368

                
369
    *part = -1;
370
    *num_lines = -1;
371
    *size = -1;
372
    memset(name, 0, name_length);
373

                
374
    len = strlen(line);
375
    for (i = 0; i < len; i++) {
376
        if (!strncmp(line + i, "part=", 5)) {
377
            *part = atoi(line + i + 5);
378
        } else if (!strncmp(line + i, "line=", 5)) {
379
            *num_lines = atoi(line + i + 5);
380
        } else if (!strncmp(line + i, "size=", 5)) {
381
            *size = g_ascii_strtoull (line + i + 5, NULL, 10);
382
        } else if (!strncmp(line + i, "name=", 5)) {
383
            strcpy(name, line + i + 5);
384
            strip_newline(name);
385
        }
386
    }
387

                
388
#if 0 
389
g_print("part = %i\n", *part);
390
g_print("num_lines = %i\n", *num_lines);
391
g_print("size = %li\n", *size);
392
g_print("filename = %s\n\n", name);
393
#endif
394

                
395
    if (*num_lines > 0 && *size > 0 && strlen(name) > 0) {  /* NOTE: The part field is optional */
396
        return TRUE;
397
    } else {
398
        return FALSE;
399
    }
400
}
401

                
402
static gboolean
403
parse_ypart_header(const char *line, guint64 *begin, guint64 *end)
404
{
405
    int i;
406
    int len;
407

                
408
    /* line must be in the following notation: */
409
    /* =ypart begin=1 end=640000 */
410

                
411
    g_return_val_if_fail(line != NULL, FALSE);
412
    g_return_val_if_fail(begin != NULL, FALSE);
413
    g_return_val_if_fail(end != NULL, FALSE);
414

                
415
    *begin = -1;
416
    *end = -1;
417

                
418
    len = strlen(line);
419
    for (i = 0; i < len; i++) {
420
        if (!strncmp(line + i, "begin=", 6)) {
421
            *begin = g_ascii_strtoull(line + i + 6, NULL, 10);
422
        } else if (!strncmp(line + i, "end=", 4)) {
423
            *end = g_ascii_strtoull(line + i + 4, NULL, 10);
424
        }
425
    }
426

                
427
#if 0 
428
g_print("begin = %li\n", *begin);
429
g_print("end = %li\n", *end);
430
#endif
431

                
432
    if (*begin >= 0 && *end > 0) {
433
        return TRUE;
434
    } else {
435
        return FALSE;
436
    }
437
}
438

                
439
static gboolean
440
try_to_use_new_decoder(NGPlugin *plugin_data, const char *filename_in, const char *target_directory, NNTPGrabDecoderRes *decoder_result, char **errmsg, FILE **prev_file_out_fp, char *prev_filename_out, int prev_filename_out_length, char *prev_real_filename_out, int prev_real_filename_out_length, MD5_CTX *md5_ctx)
441
{
442
    int part = -1;
443
    int num_lines = -1;
444
    guint64 size = -1;
445
    char filename_out[1024];
446
    guint64 begin = -1;
447
    guint64 end = -1;
448
    FILE *fp;
449
    gboolean escape_char_detected = FALSE;
450
    char path[4096];
451
    char buf[1024];
452
    struct crc32_ctx crc32_context;
453

                
454
    g_return_val_if_fail(filename_in != NULL, FALSE);
455
    g_return_val_if_fail(target_directory != NULL, FALSE);
456
    g_return_val_if_fail(decoder_result != NULL, FALSE);
457
    /* NOTE: errmsg MIGHT be NULL */
458
    g_return_val_if_fail(prev_file_out_fp != NULL, FALSE);
459
    g_return_val_if_fail(prev_filename_out != NULL, FALSE);
460
    g_return_val_if_fail(prev_filename_out_length > 0, FALSE);
461

                
462
    *decoder_result = DECODER_RESULT_COMPLETE;
463

                
464
    /* Try to read the first line from the given filename to detected if this is an yEnc encoded file */
465
    fp = g_fopen(filename_in, "rb");
466

                
467
    if (!fp) {
468
        *decoder_result = DECODER_RESULT_ERROR;
469
        if (errmsg) {
470
            *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to open file '%s': %s"), __FILE__, __LINE__, filename_in, strerror(errno));
471
        }
472

                
473
        return TRUE;
474
    }
475

                
476
    /* Try to read the header from this file */
477
    do {
478
        if (!fgets(buf, sizeof(buf) - 1, fp)) {
479
            if (feof(fp)) {
480
                /* The file doesn't contain any data at all */
481
                *decoder_result = DECODER_RESULT_INCOMPLETE;
482
                ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_DEBUG, __FILE__ ":%i File %s is empty\n", __LINE__, filename_in);
483
            } else {
484
                *decoder_result = DECODER_RESULT_ERROR;
485
                if (errmsg) {
486
                    *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to read the first line from file '%s': %s"), __FILE__, __LINE__, filename_in, strerror(errno));
487
                }
488
            }
489

                
490
            fclose(fp);
491

                
492
            return TRUE;
493
        }
494

                
495
        strip_newline(buf);
496

                
497
        if (strlen(buf) == 0) {
498
            continue;
499
        }
500

                
501
        if (strncmp(buf, "=ybegin", 7) != 0) {
502
            /* Other encoding found. Process this with a different decoder */
503
            fclose(fp);
504

                
505
            return FALSE;
506
        }
507

                
508
        break;
509
    } while (TRUE);
510

                
511
    /* yEnc encoded file found! */
512

                
513
    /* The header is now in buf. Try to extract information from this header */
514
    memset(filename_out, 0, sizeof(filename_out));
515
    if (!parse_ybegin_header(buf, &part, &num_lines, &size, filename_out, sizeof(filename_out) - 1)) {
516
        /* Header incomplete. Ignore this file */
517
        ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_DEBUG, __FILE__ ":%i yBegin-header incomplete, buf = %s\n", __LINE__, buf);
518
        *decoder_result = DECODER_RESULT_INCOMPLETE;
519
        fclose(fp);
520

                
521
        return TRUE;
522
    }
523

                
524
    /* Translate the filename to UTF-8 if it isn't already */
525
    if (!g_utf8_validate(filename_out, -1, NULL)) {
526
        /* Filename is probably in Windows-1252 charset */
527
        char *tmp = g_convert(filename_out, -1, "utf-8", "windows-1252", NULL, NULL, NULL);
528
        if (!tmp) {
529
            *decoder_result = DECODER_RESULT_ERROR;
530
            if (errmsg) {
531
                *errmsg = g_strdup_printf(_("%s:%i The filename '%s' is using an unknown character set. Decoding cannot continue"), __FILE__, __LINE__, filename_out);
532
            }
533

                
534
            fclose(fp);
535

                
536
            return TRUE;
537
        }
538

                
539
        memset(filename_out, 0, sizeof(filename_out));
540
        strncpy(filename_out, tmp, sizeof(filename_out) - 1);
541
        g_free(tmp);
542
    }
543

                
544
    /* If this is a multipart file, another header is sent */
545
    if (part != -1) {
546
        if (!fgets(buf, sizeof(buf) - 1, fp)) {
547
            *decoder_result = DECODER_RESULT_ERROR;
548
            if (errmsg) {
549
                *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to read the first line from file '%s': %s"), __FILE__, __LINE__, filename_in, strerror(errno));
550
            }
551

                
552
            fclose(fp);
553

                
554
            return TRUE;
555
        }
556

                
557
        if (!parse_ypart_header(buf, &begin, &end)) {
558
            /* Header incomplete. Ignore this file */
559
            ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_DEBUG, __FILE__ ":%i Part-header incomplete, buf = %s\n", __LINE__, buf);
560
            *decoder_result = DECODER_RESULT_INCOMPLETE;
561
            fclose(fp);
562

                
563
            return TRUE;
564
        }
565
    }
566

                
567
    /* Open the output file (if it wasn't already open) */
568
    memset(path, 0, sizeof(path));
569
    g_snprintf(path, sizeof(path) - 1, "%s%s%s", target_directory, G_DIR_SEPARATOR_S, filename_out);
570

                
571
    if (strcmp(filename_out, prev_filename_out) != 0) {
572
        char path_orig[4096];
573
        int suffix = 1;
574
        char *filename_tmp;
575

                
576
        /* Output file wasn't open yet. Open it now */
577
        if (*prev_file_out_fp != NULL) {
578
            if (fclose(*prev_file_out_fp) != 0) {
579
                *decoder_result = DECODER_RESULT_ERROR;
580
                if (errmsg) {
581
                    *errmsg = g_strdup_printf(_("%s:%i Unable to close file '%s': %s"), __FILE__, __LINE__, prev_filename_out, strerror(errno));
582
                }
583
                *prev_file_out_fp = NULL;
584
                return TRUE;
585
            }
586
        }
587

                
588
        /* If there's already a file at 'path', append a suffix to it and try again */
589
        strcpy(path_orig, path);
590
        while (g_file_test(path, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) {
591
            g_snprintf(path, sizeof(path) - 1, "%s.%i", path_orig, suffix);
592
            suffix++;
593
        }
594

                
595
        *prev_file_out_fp = g_fopen(path, "wb");
596

                
597
        if (!(*prev_file_out_fp)) {
598
            *decoder_result = DECODER_RESULT_ERROR;
599
            if (errmsg) {
600
                *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to open file for writing '%s': %s"), __FILE__, __LINE__, path, strerror(errno));
601
            }
602

                
603
            fclose(fp);
604

                
605
            return TRUE;
606
        }
607

                
608
        strncpy(prev_filename_out, filename_out, prev_filename_out_length);
609
        filename_tmp = g_path_get_basename(path);
610
        strncpy(prev_real_filename_out, filename_tmp, prev_real_filename_out_length);
611
        g_free(filename_tmp);
612
    }
613

                
614
    g_return_val_if_fail(*prev_file_out_fp != NULL, TRUE);
615

                
616
    /* Seek to the right position in the output file if we're decoding a multi-part file */
617
    if (begin > 0) {
618
        (void) fseek(*prev_file_out_fp, begin - 1, SEEK_SET);
619
    }
620

                
621
    ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_DEBUG, "Now starting decoding file at position %"G_GUINT64_FORMAT"\n", begin);
622

                
623
    crc32_init_ctx(&crc32_context);
624

                
625
    /* Start the real decoding */
626
    do {
627
        int len = fread(buf, 1, sizeof(buf), fp);
628
        int i;
629
        char buf_out[1024];
630
        int len_out = 0;
631

                
632
        memset(buf_out, 0, sizeof(buf_out));
633

                
634
        if (len == 0) {
635
            if (feof(fp)) {
636
                /* All data is read */
637
                break;
638
            }
639
        } else if (len < 0) {
640
            /* Some error occured while reading */
641
            *decoder_result = DECODER_RESULT_ERROR;
642
            if (errmsg) {
643
                *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to read from file '%s': %s"), __FILE__, __LINE__, filename_in, strerror(errno));
644
            }
645

                
646
            fclose(fp);
647

                
648
            return TRUE;
649
        }
650

                
651
        for (i = 0; i < len; i++) {
652
            unsigned char ch = buf[i];
653

                
654
            if (ch == '\r' || ch == '\n') {
655
                /* Ignore newlines */
656
                continue;
657
            } else if (ch == '=') {
658
                /* Escape character detected. Un-escape the next byte */
659
                escape_char_detected = TRUE;
660
            } else if (escape_char_detected && ch == 'y') {     // =y detected. This is probably the end of the part
661
                char buf_new[1024];
662
                int buf_new_length;
663
                char *ptr;
664
                guint32 crc32_expected = 0;
665

                
666
                memset(&buf_new, 0, sizeof(buf_new));
667
                buf_new_length = sizeof(buf) - i - 1;
668

                
669
                memcpy(buf_new, buf + i + 1, buf_new_length);
670

                
671
                /* Try to put more data in the buffer */
672
                if (fread(buf_new + buf_new_length, 1, sizeof(buf_new) - buf_new_length, fp) < 0) {
673
                    *decoder_result = DECODER_RESULT_ERROR;
674
                    if (errmsg) {
675
                        *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to read from file '%s': %s"), __FILE__, __LINE__, filename_in, strerror(errno));
676
                    }
677

                
678
                    fclose(fp);
679

                
680
                    return TRUE;
681
                }
682

                
683
                /* Do we really have an =yend ? */
684
                if (strncmp(buf_new, "end", 3) != 0) {
685
                    *decoder_result = DECODER_RESULT_INCOMPLETE;
686
                }
687

                
688
                /* Flush the buffer */
689
                if (len_out > 0 && fwrite(buf_out, 1, len_out, *prev_file_out_fp) <= 0) {
690
                    *decoder_result = DECODER_RESULT_ERROR;
691
                    if (errmsg) {
692
                        *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to write to file '%s': %s"), __FILE__, __LINE__, filename_out, strerror(errno));
693
                    }
694
                }
695

                
696
                fclose(fp);
697

                
698
#if 0 
699
                /* Update the MD5 */
700
                MD5Update(md5_ctx, (unsigned char*) buf_out, len_out);
701
#endif
702

                
703
                /* Calculate the CRC value of this part */
704
                crc32_process_bytes(buf_out, len_out, &crc32_context);
705

                
706
                /* Verify the CRC value of this part */
707
                if ((ptr = strstr(buf_new, "pcrc32="))) {
708
                    char crc[9];
709
                    memcpy(crc, ptr + 7, 8);
710
                    crc[8] = '\0';
711

                
712
                    crc32_expected = atocrc32(crc);
713
                } else if ((ptr = strstr(buf_new, "crc32="))) {
714
                    char crc[9];
715
                    memcpy(crc, ptr + 6, 8);
716
                    crc[8] = '\0';
717

                
718
                    crc32_expected = atocrc32(crc);
719
                } else {
720
                    /* No CRC32 value found. Ignore verification */
721
                    ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_DEBUG, __FILE__ ":%i No CRC32 value found in file %s\n", __LINE__, filename_in);
722
                }
723

                
724
                if (crc32_expected != 0 && crc32_read_ctx(&crc32_context) != crc32_expected) {
725
                    ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_INFO, "Part CRC32 error for file %s -- got 0x%08x, should be 0x%08x\n", filename_in, crc32_read_ctx(&crc32_context), crc32_expected);
726
                    *decoder_result = DECODER_RESULT_INCOMPLETE;
727
                }
728

                
729
                crc32_finish_ctx(&crc32_context);
730

                
731
                return TRUE;
732
            } else {
733
                ch = ch - 42 - (escape_char_detected ? 64 : 0);
734
                if (ch < 0) {
735
                    ch += 256;
736
                }
737

                
738
                buf_out[len_out++] = ch;
739
                g_return_val_if_fail(len_out <= sizeof(buf_out), TRUE);
740

                
741
                escape_char_detected = FALSE;
742
            }
743
        }
744

                
745
        /* Calculate the CRC and MD5 values of this part */
746
        crc32_process_bytes(buf_out, len_out, &crc32_context);
747
#if 0 
748
        MD5Update(md5_ctx, (unsigned char*) buf_out, len_out);
749
#endif
750

                
751
        /* Flush the buffer */
752
        if (fwrite(buf_out, 1, len_out, *prev_file_out_fp) <= 0) {
753
            *decoder_result = DECODER_RESULT_ERROR;
754
            if (errmsg) {
755
                *errmsg = g_strdup_printf(_("%s:%i The decoder wasn't able to write to file '%s': %s"), __FILE__, __LINE__, filename_out, strerror(errno));
756
            }
757

                
758
            fclose(fp);
759

                
760
            return TRUE;
761
        }
762
    } while (TRUE);
763

                
764
    fclose(fp);
765

                
766
    /* Unexpected end of file detected, file must be incomplete */
767
    *decoder_result = DECODER_RESULT_INCOMPLETE;
768

                
769
    return TRUE;
770
}
771

                
772
static NNTPGrabDecoderRes
773
nntpgrab_plugin_decoder_decode_file(NGPlugin *plugin_data, const char *collection_name, NNTPFile *file, const char *temp_directory, const char *target_directory, char **real_filename, int *saved_errno, char **errmsg)
774
{
775
    int i;
776
#if 0 
777
    GList *lines = NULL;
778
#endif
779
    NNTPGrabDecoderRes ret = DECODER_RESULT_COMPLETE;
780
    gboolean file_found = FALSE;
781
    struct decoded_file *decoded_list = NULL;
782
    struct decoded_file *list;
783
    gboolean new_decoder_used = FALSE;
784
    char filename[1024];
785
    gboolean incomplete_file = FALSE;
786
    char target_path[4096];
787
    char prev_filename_out[1024];
788
    char prev_real_filename_out[1024];
789
    FILE *prev_file_fp = NULL;
790
    MD5_CTX md5_ctx;
791

                
792
    g_return_val_if_fail(collection_name != NULL, DECODER_RESULT_ERROR);
793
    g_return_val_if_fail(file != NULL, DECODER_RESULT_ERROR);
794
    g_return_val_if_fail(temp_directory != NULL, DECODER_RESULT_ERROR);
795
    g_return_val_if_fail(target_directory != NULL, DECODER_RESULT_ERROR);
796
    g_return_val_if_fail(real_filename != NULL, DECODER_RESULT_ERROR);
797
    g_return_val_if_fail(saved_errno != NULL, DECODER_RESULT_ERROR);
798

                
799
    *saved_errno = 0;
800

                
801
    memset(prev_filename_out, 0, sizeof(prev_filename_out));
802
    memset(prev_real_filename_out, 0, sizeof(prev_real_filename_out));
803

                
804
    memset(target_path, 0, sizeof(target_path));
805
    g_snprintf(target_path, sizeof(target_path) - 1, "%s"G_DIR_SEPARATOR_S"%s", target_directory, collection_name);
806

                
807
    ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_INFO, _("File '%s' of collection '%s' is now being decoded to directory '%s'"), file->subject, collection_name, target_path);
808

                
809
    if (g_mkdir_with_parents(target_path, 0755) == -1) {
810
        ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_INFO, _("Creation of folder '%s' failed, errno = %i, %s\n"), target_path, errno, strerror(errno));
811
    }
812

                
813
    // The yydecode code assumes all the resulting files will be saved in the current directory
814
    // so we need to change to the wanted directory before starting the decode
815
    if (g_chdir(target_path) == -1) {
816
        if (errmsg) {
817
            *errmsg = g_strdup_printf(_("Unable to navigate to folder '%s': %s"), target_path, strerror(errno));
818
        }
819
        *saved_errno = ENOENT;
820

                
821
        ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_INFO, _("Unable to navigate to folder '%s': %s"), target_path, strerror(errno));
822

                
823
        return DECODER_RESULT_ERROR;
824
    }
825

                
826
#if 0 
827
    MD5Init(&md5_ctx);
828
#endif
829

                
830
    // Load all the individual parts
831
    for (i = 0; i < file->num_parts; i++) {
832
        struct decoded_file *list;
833

                
834
        memset(filename, 0, sizeof(filename) - 1);
835
        g_snprintf(filename, sizeof(filename) - 1, "%s%s%s.%i", temp_directory, G_DIR_SEPARATOR_S, file->tmp_filename, i + 1);
836

                
837
        if (!g_file_test(filename, G_FILE_TEST_IS_REGULAR | G_FILE_TEST_EXISTS)) {
838
            continue;
839
        }
840

                
841
        file_found = TRUE;
842

                
843
        if (try_to_use_new_decoder(plugin_data, filename, target_path, &ret, errmsg, &prev_file_fp, prev_filename_out, sizeof(prev_filename_out) - 1, prev_real_filename_out, sizeof(prev_real_filename_out) - 1, &md5_ctx)) {
844
#if 0 
845
g_print("prev_file_fp = %#x\n", prev_file_fp);
846
g_print("prev_real_filename_out = %s\n", prev_real_filename_out);
847
g_print("ret = %i\n", ret);
848
#endif
849
            *real_filename = g_strdup(prev_real_filename_out);
850
            new_decoder_used = TRUE;
851
            if (ret == DECODER_RESULT_ERROR) {
852
                g_print("Error during decode\n");
853
                if (errmsg && *errmsg) {
854
                    g_print("errmsg = %s\n", *errmsg);
855
                }
856

                
857
                break;
858
            } else if (ret == DECODER_RESULT_INCOMPLETE) {
859
                incomplete_file = TRUE;
860
            }
861

                
862
            continue;
863
        }
864

                
865
        if (decode(filename, NULL, &decoded_list, saved_errno, errmsg) != EXIT_SUCCESS) {
866
            g_print("Error during decode\n");
867
            if (errmsg && *errmsg) {
868
                g_print("errmsg = %s\n", *errmsg);
869
            }
870

                
871
            ret = DECODER_RESULT_ERROR;
872

                
873
            while (decoded_list) {
874
                list = decoded_list->next;
875

                
876
                if (decoded_list->handle) {
877
                    fclose(decoded_list->handle);
878
                    decoded_list->handle = NULL;
879
                }
880

                
881
                if (decoded_list->filename) free(decoded_list->filename);
882
                if (decoded_list->outname)  free(decoded_list->outname);
883
                if (decoded_list->status)   free(decoded_list->status);
884
                free(decoded_list);
885
                decoded_list = list;
886
            }
887
        }
888

                
889
        if (ret == DECODER_RESULT_ERROR) {
890
            break;
891
        }
892
    }
893

                
894
    if (new_decoder_used) {
895
#if 0 
896
        unsigned char md5_digest[16];
897
        int j;
898
#endif
899

                
900
        /* Close any file descriptors which may have left open */
901
        if (prev_file_fp) {
902
            if (fclose(prev_file_fp) != 0) {
903
                ret = DECODER_RESULT_ERROR;
904
                if (errmsg) {
905
                    *errmsg = g_strdup_printf(_("%s:%i Unable to close file '%s': %s"), __FILE__, __LINE__, prev_filename_out, strerror(errno));
906
                }
907
            }
908

                
909
            prev_file_fp = NULL;
910
        }
911
#if 0 
912
        MD5Final(md5_digest, &md5_ctx);
913
#endif
914

                
915
        memset(file->md5sum, 0, sizeof(file->md5sum));
916
#if 0 
917
        for (j = 0; j < 16; j++) {
918
            sprintf(file->md5sum + j * 2, "%02x", md5_digest[j]);
919
        }
920
#endif
921

                
922
        if (incomplete_file && ret != DECODER_RESULT_ERROR) {
923
            ret = DECODER_RESULT_INCOMPLETE;
924
        }
925
    } else {
926
        if (decoded_list) {
927
            *real_filename = g_strdup(decoded_list->outname);
928
            for(i = 0; i < decoded_list->total_parts; i++) {
929
                switch(decoded_list->status[i]) {
930
                    case part_missing:
931
                    case part_broken:
932
                        ret = DECODER_RESULT_INCOMPLETE;
933
                        break;
934

                
935
                    case part_intact:
936
                    case part_duplicated:
937
                        // don't do anything
938
                        break;
939
                }
940
            }
941

                
942
            while (decoded_list) {
943
                list = decoded_list->next;
944

                
945
                if (decoded_list->handle) {
946
                    fclose(decoded_list->handle);
947
                    decoded_list->handle = NULL;
948
                }
949

                
950
                if (decoded_list->filename) free(decoded_list->filename);
951
                if (decoded_list->outname)  free(decoded_list->outname);
952
                if (decoded_list->status)   free(decoded_list->status);
953
                free(decoded_list);
954
                decoded_list = list;
955
            }
956
        }
957
    }
958

                
959
    if (!file_found) {
960
        if (errmsg) {
961
            *errmsg = g_strdup(_("No data to decode"));
962
        }
963

                
964
        if (saved_errno) {
965
            *saved_errno = -1;
966
        }
967

                
968
        ret = DECODER_RESULT_NO_PARTS_AVAIL;
969
    }
970

                
971
    if (ret != DECODER_RESULT_ERROR) {
972
        char filename[1024];
973

                
974
        // Remove all the temp parts on success
975
        for (i = 0; i < file->num_parts; i++) {
976
            memset(&filename, 0, sizeof(filename));
977
            g_snprintf(filename, sizeof(filename) - 1, "%s%s%s.%i", temp_directory, G_DIR_SEPARATOR_S, file->tmp_filename, i + 1);
978
            g_unlink(filename);
979

                
980
        }
981
    }
982

                
983
    ng_plugin_emit_log_msg(plugin_data, NG_LOG_LEVEL_INFO, _("Decoding completed with return value %i"), ret);
984

                
985
    // Change the current directory back to another directory to prevent the NNTPGrab process
986
    // from 'claiming' the directory in which the decoded files were saved
987
    g_chdir(target_directory);
988

                
989
    return ret;
990
}