indra/llmessage/llmime.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613

/** 
 * @file llmime.cpp
 * @author Phoenix
 * @date 2006-12-20
 * @brief Implementation of mime tools.
 *
 * Copyright (c) 2006-$CurrentYear$, Linden Research, Inc.
 * $License$
 */

#include "linden_common.h"
#include "llmime.h"

#include <vector>

#include "llmemorystream.h"

/**
 * Useful constants.
 */
// Headers specified in rfc-2045 will be canonicalized below.
static const std::string CONTENT_LENGTH("Content-Length");
static const std::string CONTENT_TYPE("Content-Type");
static const S32 KNOWN_HEADER_COUNT = 6;
static const std::string KNOWN_HEADER[KNOWN_HEADER_COUNT] =
{
	CONTENT_LENGTH,
	CONTENT_TYPE,
	std::string("MIME-Version"),
	std::string("Content-Transfer-Encoding"),
	std::string("Content-ID"),
	std::string("Content-Description"),
};

// parser helpers
static const std::string MULTIPART("multipart");
static const std::string BOUNDARY("boundary");
static const std::string END_OF_CONTENT_PARAMETER("\r\n ;\t");
static const std::string SEPARATOR_PREFIX("--");
//static const std::string SEPARATOR_SUFFIX("\r\n");

/*
Content-Type: multipart/mixed; boundary="segment"
Content-Length: 24832

--segment
Content-Type: image/j2c
Content-Length: 23715

<data>

--segment
Content-Type: text/xml; charset=UTF-8

<meta data>
EOF

*/

/**
 * LLMimeIndex
 */

/** 
 * @class LLMimeIndex::Impl
 * @brief Implementation details of the mime index class.
 * @see LLMimeIndex
 */
class LLMimeIndex::Impl
{
public:
	Impl() : mOffset(-1), mUseCount(1)
	{}
	Impl(LLSD headers, S32 offset) :
		mHeaders(headers), mOffset(offset), mUseCount(1)
	{}
public:
	LLSD mHeaders;
	S32 mOffset;
	S32 mUseCount;

	typedef std::vector<LLMimeIndex> sub_part_t;
	sub_part_t mAttachments;
};

LLSD LLMimeIndex::headers() const
{
	return mImpl->mHeaders;
}

S32 LLMimeIndex::offset() const
{
	return mImpl->mOffset;
}

S32 LLMimeIndex::contentLength() const
{
	// Find the content length in the headers.
	S32 length = -1;
	LLSD content_length = mImpl->mHeaders[CONTENT_LENGTH];
	if(content_length.isDefined())
	{
		length = content_length.asInteger();
	}
	return length;
}

std::string LLMimeIndex::contentType() const
{
	std::string type;
	LLSD content_type = mImpl->mHeaders[CONTENT_TYPE];
	if(content_type.isDefined())
	{
		type = content_type.asString();
	}
	return type;
}

bool LLMimeIndex::isMultipart() const
{
	bool multipart = false;
	LLSD content_type = mImpl->mHeaders[CONTENT_TYPE];
	if(content_type.isDefined())
	{
		std::string type = content_type.asString();
		int comp = type.compare(0, MULTIPART.size(), MULTIPART);
		if(0 == comp)
		{
			multipart = true;
		}
	}
	return multipart;
}

S32 LLMimeIndex::subPartCount() const
{
	return mImpl->mAttachments.size();
}

LLMimeIndex LLMimeIndex::subPart(S32 index) const
{
	LLMimeIndex part;
	if((index >= 0) && (index < (S32)mImpl->mAttachments.size()))
	{
		part = mImpl->mAttachments[index];
	}
	return part;
}

LLMimeIndex::LLMimeIndex() : mImpl(new LLMimeIndex::Impl)
{
}

LLMimeIndex::LLMimeIndex(LLSD headers, S32 content_offset) :
	mImpl(new LLMimeIndex::Impl(headers, content_offset))
{
}

LLMimeIndex::LLMimeIndex(const LLMimeIndex& mime) :
	mImpl(mime.mImpl)
{
	++mImpl->mUseCount;
}

LLMimeIndex::~LLMimeIndex()
{
	if(0 == --mImpl->mUseCount)
	{
		delete mImpl;
	}
}

LLMimeIndex& LLMimeIndex::operator=(const LLMimeIndex& mime)
{
	// Increment use count first so that we handle self assignment
	// automatically.
	++mime.mImpl->mUseCount;
	if(0 == --mImpl->mUseCount)
	{
		delete mImpl;
	}
	mImpl = mime.mImpl;
	return *this;
}

bool LLMimeIndex::attachSubPart(LLMimeIndex sub_part)
{
	// *FIX: Should we check for multi-part?
	if(mImpl->mAttachments.size() < S32_MAX)
	{
		mImpl->mAttachments.push_back(sub_part);
		return true;
	}
	return false;
}

/**
 * LLMimeParser
 */
/** 
 * @class LLMimeParser::Impl
 * @brief Implementation details of the mime parser class.
 * @see LLMimeParser
 */
class LLMimeParser::Impl
{
public:
	// @brief Constructor.
	Impl();

	// @brief Reset this for a new parse.
	void reset();

	/** 
	 * @brief Parse a mime entity to find the index information.
	 *
	 * This method will scan the istr until a single complete mime
	 * entity is read, an EOF, or limit bytes have been scanned. The
	 * istr will be modified by this parsing, so pass in a temporary
	 * stream or rewind/reset the stream after this call.
	 * @param istr An istream which contains a mime entity.
	 * @param limit The maximum number of bytes to scan.
	 * @param separator The multipart separator if it is known.
	 * @param is_subpart Set true if parsing a multipart sub part.
	 * @param index[out] The parsed output.
	 * @return Returns true if an index was parsed and no errors occurred.
	 */
	bool parseIndex(
		std::istream& istr,
		S32 limit,
		const std::string& separator,
		bool is_subpart,
		LLMimeIndex& index);

protected:
	/**
	 * @brief parse the headers.
	 *
	 * At the end of a successful parse, mScanCount will be at the
	 * start of the content.
	 * @param istr The input stream.
	 * @param limit maximum number of bytes to process
	 * @param headers[out] A map of the headers found.
	 * @return Returns true if the parse was successful.
	 */
	bool parseHeaders(std::istream& istr, S32 limit, LLSD& headers);

	/**
	 * @brief Figure out the separator string from a content type header.
	 * 
	 * @param multipart_content_type The content type value from the headers.
	 * @return Returns the separator string.
	 */
	std::string findSeparator(std::string multipart_content_type);

	/**
	 * @brief Scan through istr past the separator.
	 *
	 * @param istr The input stream.
	 * @param limit Maximum number of bytes to scan.
	 * @param separator The multipart separator.
	 */
	void scanPastSeparator(
		std::istream& istr,
		S32 limit,
		const std::string& separator);

	/**
	 * @brief Scan through istr past the content of the current mime part.
	 *
	 * @param istr The input stream.
	 * @param limit Maximum number of bytes to scan.
	 * @param headers The headers for this mime part.
	 * @param separator The multipart separator if known.
	 */
	void scanPastContent(
		std::istream& istr,
		S32 limit,
		LLSD headers,
		const std::string separator);

	/**
	 * @brief Eat CRLF.
	 *
	 * This method has no concept of the limit, so ensure you have at
	 * least 2 characters left to eat before hitting the limit. This
	 * method will increment mScanCount as it goes.
	 * @param istr The input stream.
	 * @return Returns true if CRLF was found and consumed off of istr.
	 */
	bool eatCRLF(std::istream& istr);

	// @brief Returns true if parsing should continue.
	bool continueParse() const { return (!mError && mContinue); }

	// @brief anonymous enumeration for parse buffer size.
	enum
	{
		LINE_BUFFER_LENGTH = 1024
	};

protected:
	S32 mScanCount;
	bool mContinue;
	bool mError;
	char mBuffer[LINE_BUFFER_LENGTH];
};

LLMimeParser::Impl::Impl()
{
	reset();
}

void LLMimeParser::Impl::reset()
{
	mScanCount = 0;
	mContinue = true;
	mError = false;
	mBuffer[0] = '\0';
}

bool LLMimeParser::Impl::parseIndex(
	std::istream& istr,
	S32 limit,
	const std::string& separator,
	bool is_subpart,
	LLMimeIndex& index)
{
	LLSD headers;
	bool parsed_something = false;
	if(parseHeaders(istr, limit, headers))
	{
		parsed_something = true;
		LLMimeIndex mime(headers, mScanCount);
		index = mime;
		if(index.isMultipart())
		{
			// Figure out the separator, scan past it, and recurse.
			std::string ct = headers[CONTENT_TYPE].asString();
			std::string sep = findSeparator(ct);
			scanPastSeparator(istr, limit, sep);
			while(continueParse() && parseIndex(istr, limit, sep, true, mime))
			{
				index.attachSubPart(mime);
			}
		}
		else
		{
			// Scan to the end of content.
			scanPastContent(istr, limit, headers, separator);
			if(is_subpart)
			{
				scanPastSeparator(istr, limit, separator);
			}
		}
	}
	if(mError) return false;
	return parsed_something;
}

bool LLMimeParser::Impl::parseHeaders(
	std::istream& istr,
	S32 limit,
	LLSD& headers)
{
	while(continueParse())
	{
		// Get the next line.
		// We subtract 1 from the limit so that we make sure
		// not to read past limit when we get() the newline.
		S32 max_get = llmin((S32)LINE_BUFFER_LENGTH, limit - mScanCount - 1);
		istr.getline(mBuffer, max_get, '\r');
		mScanCount += istr.gcount();
		int c = istr.get();
		if(EOF == c)
		{
			mContinue = false;
			return false;
		}
		++mScanCount;
		if(c != '\n')
		{
			mError = true;
			return false;
		}
		if(mScanCount >= limit)
		{
			mContinue = false;
		}

		// Check if that's the end of headers.
		if('\0' == mBuffer[0])
		{
			break;
		}

		// Split out the name and value.
		// *NOTE: The use of strchr() here is safe since mBuffer is
		// guaranteed to be NULL terminated from the call to getline()
		// above.
		char* colon = strchr(mBuffer, ':');
		if(!colon)
		{
			mError = true;
			return false;
		}

		// Cononicalize the name part, and store the name: value in
		// the headers structure. We do this by iterating through
		// 'known' headers and replacing the value found with the
		// correct one.
		// *NOTE: Not so efficient, but iterating through a small
		// subset should not be too much of an issue.
		std::string name(mBuffer, colon++ - mBuffer);
		while(isspace(*colon)) ++colon;
		std::string value(colon);
		for(S32 ii = 0; ii < KNOWN_HEADER_COUNT; ++ii)
		{
			if(0 == LLString::compareInsensitive(
				name.c_str(),
				KNOWN_HEADER[ii].c_str()))
			{
				name = KNOWN_HEADER[ii];
				break;
			}
		}
		headers[name] = value;
	}
	if(headers.isUndefined()) return false;
	return true;
}

std::string LLMimeParser::Impl::findSeparator(std::string header)
{
	//                               01234567890
	//Content-Type: multipart/mixed; boundary="segment"
	std::string separator;
	std::string::size_type pos = header.find(BOUNDARY);
	if(std::string::npos == pos) return separator;
	pos += BOUNDARY.size() + 1;
	std::string::size_type end;
	if(header[pos] == '"')
	{
		// the boundary is quoted, find the end from pos, and take the
		// substring.
		end = header.find('"', ++pos);
		if(std::string::npos == end)
		{
			// poorly formed boundary.
			mError = true;
		}
	}
	else
	{
		// otherwise, it's every character until a whitespace, end of
		// line, or another parameter begins.
		end = header.find_first_of(END_OF_CONTENT_PARAMETER, pos);
		if(std::string::npos == end)
		{
			// it goes to the end of the string.
			end = header.size();
		}
	}
	if(!mError) separator = header.substr(pos, end - pos);
	return separator;
}

void LLMimeParser::Impl::scanPastSeparator(
	std::istream& istr,
	S32 limit,
	const std::string& sep)
{
	std::ostringstream ostr;
	ostr << SEPARATOR_PREFIX << sep;
	std::string separator = ostr.str();
	bool found_separator = false;
	while(!found_separator && continueParse())
	{
		// Subtract 1 from the limit so that we make sure not to read
		// past limit when we get() the newline.
		S32 max_get = llmin((S32)LINE_BUFFER_LENGTH, limit - mScanCount - 1);
		istr.getline(mBuffer, max_get, '\r');
		mScanCount += istr.gcount();
		if(istr.gcount() >= LINE_BUFFER_LENGTH - 1)
		{
			// that's way too long to be a separator, so ignore it.
			continue;
		}
		int c = istr.get();
		if(EOF == c)
		{
			mContinue = false;
			return;
		}
		++mScanCount;
		if(c != '\n')
		{
			mError = true;
			return;
		}
		if(mScanCount >= limit)
		{
			mContinue = false;
		}
		if(0 == LLString::compareStrings(mBuffer, separator.c_str()))
		{
			found_separator = true;
		}
	}
}

void LLMimeParser::Impl::scanPastContent(
	std::istream& istr,
	S32 limit,
	LLSD headers,
	const std::string separator)
{
	if(headers.has(CONTENT_LENGTH))
	{
		S32 content_length = headers[CONTENT_LENGTH].asInteger();
		// Subtract 2 here for the \r\n after the content.
		S32 max_skip = llmin(content_length, limit - mScanCount - 2);
		istr.ignore(max_skip);
		mScanCount += max_skip;

		// *NOTE: Check for hitting the limit and eof here before
		// checking for the trailing EOF, because our mime parser has
		// to gracefully handle incomplete mime entites.
		if((mScanCount >= limit) || istr.eof())
		{
			mContinue = false;
		}
		else if(!eatCRLF(istr))
		{
			mError = true;
			return;
		}
	}
}

bool LLMimeParser::Impl::eatCRLF(std::istream& istr)
{
	int c = istr.get();
	++mScanCount;
	if(c != '\r')
	{
		return false;
	}
	c = istr.get();
	++mScanCount;
	if(c != '\n')
	{
		return false;
	}
	return true;
}
	

LLMimeParser::LLMimeParser() : mImpl(* new LLMimeParser::Impl)
{
}

LLMimeParser::~LLMimeParser()
{
	delete & mImpl;
}

void LLMimeParser::reset()
{
	mImpl.reset();
}

bool LLMimeParser::parseIndex(std::istream& istr, LLMimeIndex& index)
{
	std::string separator;
	return mImpl.parseIndex(istr, S32_MAX, separator, false, index);
}

bool LLMimeParser::parseIndex(
	const std::vector<U8>& buffer,
	LLMimeIndex& index)
{
	LLMemoryStream mstr(&buffer[0], buffer.size());
	return parseIndex(mstr, buffer.size() + 1, index);
}

bool LLMimeParser::parseIndex(
	std::istream& istr,
	S32 limit,
	LLMimeIndex& index)
{
	std::string separator;
	return mImpl.parseIndex(istr, limit, separator, false, index);
}

bool LLMimeParser::parseIndex(const U8* buffer, S32 length, LLMimeIndex& index)
{
	LLMemoryStream mstr(buffer, length);
	return parseIndex(mstr, length + 1, index);
}

/*
bool LLMimeParser::verify(std::istream& isr, LLMimeIndex& index) const
{
	return false;
}

bool LLMimeParser::verify(U8* buffer, S32 length, LLMimeIndex& index) const
{
	LLMemoryStream mstr(buffer, length);
	return verify(mstr, index);
}
*/