MongoDB C++ Driver  legacy-1.1.2
json.h
1 /* Copyright 2014 MongoDB Inc.
2  *
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #pragma once
17 
18 #include <string>
19 
20 #include "mongo/bson/bsonobj.h"
21 #include "mongo/base/status.h"
22 #include "mongo/client/export_macros.h"
23 
24 namespace mongo {
25 
39 MONGO_CLIENT_API BSONObj MONGO_CLIENT_FUNC fromjson(const std::string& str);
40 
42 MONGO_CLIENT_API BSONObj MONGO_CLIENT_FUNC fromjson(const char* str, int* len = NULL);
43 
59 MONGO_CLIENT_API bool isArray(const StringData& str);
60 
68 MONGO_CLIENT_API std::string tojson(const BSONArray& arr,
69  JsonStringFormat format = Strict,
70  bool pretty = false);
71 
79 MONGO_CLIENT_API std::string tojson(const BSONObj& obj,
80  JsonStringFormat format = Strict,
81  bool pretty = false);
82 
88 class JParse {
89 public:
90  explicit JParse(const StringData& str);
91 
92  /*
93  * Notation: All-uppercase symbols denote non-terminals; all other
94  * symbols are literals.
95  */
96 
97  /*
98  * VALUE :
99  * STRING
100  * | NUMBER
101  * | NUMBERINT
102  * | NUMBERLONG
103  * | OBJECT
104  * | ARRAY
105  *
106  * | true
107  * | false
108  * | null
109  * | undefined
110  *
111  * | NaN
112  * | Infinity
113  * | -Infinity
114  *
115  * | DATE
116  * | TIMESTAMP
117  * | REGEX
118  * | OBJECTID
119  * | DBREF
120  *
121  * | new CONSTRUCTOR
122  */
123 private:
124  Status value(const StringData& fieldName, BSONObjBuilder&);
125 
126  /*
127  * OBJECT :
128  * {}
129  * | { MEMBERS }
130  * | SPECIALOBJECT
131  *
132  * MEMBERS :
133  * PAIR
134  * | PAIR , MEMBERS
135  *
136  * PAIR :
137  * FIELD : VALUE
138  *
139  * SPECIALOBJECT :
140  * OIDOBJECT
141  * | BINARYOBJECT
142  * | DATEOBJECT
143  * | TIMESTAMPOBJECT
144  * | REGEXOBJECT
145  * | REFOBJECT
146  * | UNDEFINEDOBJECT
147  * | NUMBERLONGOBJECT
148  * | MINKEYOBJECT
149  * | MAXKEYOBJECT
150  *
151  */
152 public:
153  Status object(const StringData& fieldName, BSONObjBuilder&, bool subObj = true);
154  Status parse(BSONObjBuilder& builder);
155  bool isArray();
156 
157 private:
158  /* The following functions are called with the '{' and the first
159  * field already parsed since they are both implied given the
160  * context. */
161  /*
162  * OIDOBJECT :
163  * { FIELD("$oid") : <24 character hex string> }
164  */
165  Status objectIdObject(const StringData& fieldName, BSONObjBuilder&);
166 
167  /*
168  * BINARYOBJECT :
169  * { FIELD("$binary") : <base64 representation of a binary string>,
170  * FIELD("$type") : <hexadecimal representation of a single byte
171  * indicating the data type> }
172  */
173  Status binaryObject(const StringData& fieldName, BSONObjBuilder&);
174 
175  /*
176  * DATEOBJECT :
177  * { FIELD("$date") : <64 bit signed integer for milliseconds since epoch> }
178  */
179  Status dateObject(const StringData& fieldName, BSONObjBuilder&);
180 
181  /*
182  * TIMESTAMPOBJECT :
183  * { FIELD("$timestamp") : {
184  * FIELD("t") : <32 bit unsigned integer for seconds since epoch>,
185  * FIELD("i") : <32 bit unsigned integer for the increment> } }
186  */
187  Status timestampObject(const StringData& fieldName, BSONObjBuilder&);
188 
189  /*
190  * NOTE: the rules for the body of the regex are different here,
191  * since it is quoted instead of surrounded by slashes.
192  * REGEXOBJECT :
193  * { FIELD("$regex") : <string representing body of regex> }
194  * | { FIELD("$regex") : <string representing body of regex>,
195  * FIELD("$options") : <string representing regex options> }
196  */
197  Status regexObject(const StringData& fieldName, BSONObjBuilder&);
198 
199  /*
200  * REFOBJECT :
201  * { FIELD("$ref") : <string representing collection name>,
202  * FIELD("$id") : <24 character hex string> }
203  * | { FIELD("$ref") : STRING , FIELD("$id") : OBJECTID }
204  * | { FIELD("$ref") : STRING , FIELD("$id") : OIDOBJECT }
205  */
206  Status dbRefObject(const StringData& fieldName, BSONObjBuilder&);
207 
208  /*
209  * UNDEFINEDOBJECT :
210  * { FIELD("$undefined") : true }
211  */
212  Status undefinedObject(const StringData& fieldName, BSONObjBuilder&);
213 
214  /*
215  * NUMBERLONGOBJECT :
216  * { FIELD("$numberLong") : "<number>" }
217  */
218  Status numberLongObject(const StringData& fieldName, BSONObjBuilder&);
219 
220  /*
221  * MINKEYOBJECT :
222  * { FIELD("$minKey") : 1 }
223  */
224  Status minKeyObject(const StringData& fieldName, BSONObjBuilder& builder);
225 
226  /*
227  * MAXKEYOBJECT :
228  * { FIELD("$maxKey") : 1 }
229  */
230  Status maxKeyObject(const StringData& fieldName, BSONObjBuilder& builder);
231 
232  /*
233  * ARRAY :
234  * []
235  * | [ ELEMENTS ]
236  *
237  * ELEMENTS :
238  * VALUE
239  * | VALUE , ELEMENTS
240  */
241  Status array(const StringData& fieldName, BSONObjBuilder&, bool subObj = true);
242 
243  /*
244  * NOTE: Currently only Date can be preceded by the "new" keyword
245  * CONSTRUCTOR :
246  * DATE
247  */
248  Status constructor(const StringData& fieldName, BSONObjBuilder&);
249 
250  /* The following functions only parse the body of the constructor
251  * between the parentheses, not including the constructor name */
252  /*
253  * DATE :
254  * Date( <64 bit signed integer for milliseconds since epoch> )
255  */
256  Status date(const StringData& fieldName, BSONObjBuilder&);
257 
258  /*
259  * TIMESTAMP :
260  * Timestamp( <32 bit unsigned integer for seconds since epoch>,
261  * <32 bit unsigned integer for the increment> )
262  */
263  Status timestamp(const StringData& fieldName, BSONObjBuilder&);
264 
265  /*
266  * OBJECTID :
267  * ObjectId( <24 character hex string> )
268  */
269  Status objectId(const StringData& fieldName, BSONObjBuilder&);
270 
271  /*
272  * NUMBERLONG :
273  * NumberLong( <number> )
274  */
275  Status numberLong(const StringData& fieldName, BSONObjBuilder&);
276 
277  /*
278  * NUMBERINT :
279  * NumberInt( <number> )
280  */
281  Status numberInt(const StringData& fieldName, BSONObjBuilder&);
282 
283  /*
284  * DBREF :
285  * Dbref( <namespace string> , <24 character hex string> )
286  */
287  Status dbRef(const StringData& fieldName, BSONObjBuilder&);
288 
289  /*
290  * REGEX :
291  * / REGEXCHARS / REGEXOPTIONS
292  *
293  * REGEXCHARS :
294  * REGEXCHAR
295  * | REGEXCHAR REGEXCHARS
296  *
297  * REGEXCHAR :
298  * any-Unicode-character-except-/-or-\-or-CONTROLCHAR
299  * | \"
300  * | \'
301  * | \\
302  * | \/
303  * | \b
304  * | \f
305  * | \n
306  * | \r
307  * | \t
308  * | \v
309  * | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
310  * | \any-Unicode-character-except-x-or-[0-7]
311  *
312  * REGEXOPTIONS :
313  * REGEXOPTION
314  * | REGEXOPTION REGEXOPTIONS
315  *
316  * REGEXOPTION :
317  * g | i | m | s
318  */
319  Status regex(const StringData& fieldName, BSONObjBuilder&);
320  Status regexPat(std::string* result);
321  Status regexOpt(std::string* result);
322  Status regexOptCheck(const StringData& opt);
323 
324  /*
325  * NUMBER :
326  *
327  * NOTE: Number parsing is based on standard library functions, not
328  * necessarily on the JSON numeric grammar.
329  *
330  * Number as value - strtoll and strtod
331  * Date - strtoll
332  * Timestamp - strtoul for both timestamp and increment and '-'
333  * before a number explicity disallowed
334  */
335  Status number(const StringData& fieldName, BSONObjBuilder&);
336 
337  /*
338  * FIELD :
339  * STRING
340  * | [a-zA-Z$_] FIELDCHARS
341  *
342  * FIELDCHARS :
343  * [a-zA-Z0-9$_]
344  * | [a-zA-Z0-9$_] FIELDCHARS
345  */
346  Status field(std::string* result);
347 
348  /*
349  * STRING :
350  * " "
351  * | ' '
352  * | " CHARS "
353  * | ' CHARS '
354  */
355  Status quotedString(std::string* result);
356 
357  /*
358  * CHARS :
359  * CHAR
360  * | CHAR CHARS
361  *
362  * Note: " or ' may be allowed depending on whether the string is
363  * double or single quoted
364  *
365  * CHAR :
366  * any-Unicode-character-except-"-or-'-or-\-or-CONTROLCHAR
367  * | \"
368  * | \'
369  * | \\
370  * | \/
371  * | \b
372  * | \f
373  * | \n
374  * | \r
375  * | \t
376  * | \v
377  * | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
378  * | \any-Unicode-character-except-x-or-[0-9]
379  *
380  * HEXDIGIT : [0..9a..fA..F]
381  *
382  * per http://www.ietf.org/rfc/rfc4627.txt, control characters are
383  * (U+0000 through U+001F). U+007F is not mentioned as a control
384  * character.
385  * CONTROLCHAR : [0x00..0x1F]
386  *
387  * If there is not an error, result will contain a null terminated
388  * string, but there is no guarantee that it will not contain other
389  * null characters.
390  */
391  Status chars(std::string* result, const char* terminalSet, const char* allowedSet = NULL);
392 
399  std::string encodeUTF8(unsigned char first, unsigned char second) const;
400 
407  inline bool peekToken(const char* token);
408 
415  inline bool readToken(const char* token);
416 
423  bool readTokenImpl(const char* token, bool advance = true);
424 
429  bool readField(const StringData& field);
430 
435  bool match(char matchChar, const char* matchSet) const;
436 
440  bool isHexString(const StringData&) const;
441 
446  bool isBase64String(const StringData&) const;
447 
452  Status parseError(const StringData& msg);
453 
454 public:
455  inline int offset() {
456  return (_input - _buf);
457  }
458 
459 private:
460  /*
461  * _buf - start of our input buffer
462  * _input - cursor we advance in our input buffer
463  * _input_end - sentinel for the end of our input buffer
464  *
465  * _buf is the null terminated buffer containing the JSON string we
466  * are parsing. _input_end points to the null byte at the end of
467  * the buffer. strtoll, strtol, and strtod will access the null
468  * byte at the end of the buffer because they are assuming a c-style
469  * string.
470  */
471  const char* const _buf;
472  const char* _input;
473  const char* const _input_end;
474 };
475 
476 } // namespace mongo
JsonStringFormat
Formatting mode for generating JSON from BSON.
Definition: oid.h:205
Status represents an error state or the absence thereof.
Definition: status.h:50
MONGO_CLIENT_API BSONObj MONGO_CLIENT_FUNC fromjson(const std::string &str)
Create a BSONObj from a JSON http://www.json.org, http://www.ietf.org/rfc/rfc4627.txt string.
A StringData object wraps a 'const string&' or a 'const char*' without copying its contents...
Definition: string_data.h:43
Utility functions for parsing numbers from strings.
Definition: compare_numbers.h:20
Parser class.
Definition: json.h:88
strict RFC format
Definition: oid.h:207
Utility for creating a BSONObj.
Definition: bsonobjbuilder.h:53
MONGO_CLIENT_API bool isArray(const StringData &str)
Tests whether the JSON string is an Array.
MONGO_CLIENT_API std::string tojson(const BSONArray &arr, JsonStringFormat format=Strict, bool pretty=false)
Convert a BSONArray to a JSON string.