MongoDB C++ Driver  legacy-1.0.5
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
json.h
1 /* Copyright 2014 MongoDB Inc.
2  *
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #pragma once
17 
18 #include <string>
19 
20 #include "mongo/bson/bsonobj.h"
21 #include "mongo/base/status.h"
22 #include "mongo/client/export_macros.h"
23 
24 namespace mongo {
25 
39  MONGO_CLIENT_API BSONObj MONGO_CLIENT_FUNC fromjson(const std::string& str);
40 
42  MONGO_CLIENT_API BSONObj MONGO_CLIENT_FUNC fromjson(const char* str, int* len=NULL);
43 
59  MONGO_CLIENT_API bool isArray(const StringData& str);
60 
68  MONGO_CLIENT_API std::string tojson(
69  const BSONArray& arr,
70  JsonStringFormat format = Strict,
71  bool pretty = false
72  );
73 
81  MONGO_CLIENT_API std::string tojson(
82  const BSONObj& obj,
83  JsonStringFormat format = Strict,
84  bool pretty = false
85  );
86 
92  class JParse {
93  public:
94  explicit JParse(const StringData& str);
95 
96  /*
97  * Notation: All-uppercase symbols denote non-terminals; all other
98  * symbols are literals.
99  */
100 
101  /*
102  * VALUE :
103  * STRING
104  * | NUMBER
105  * | NUMBERINT
106  * | NUMBERLONG
107  * | OBJECT
108  * | ARRAY
109  *
110  * | true
111  * | false
112  * | null
113  * | undefined
114  *
115  * | NaN
116  * | Infinity
117  * | -Infinity
118  *
119  * | DATE
120  * | TIMESTAMP
121  * | REGEX
122  * | OBJECTID
123  * | DBREF
124  *
125  * | new CONSTRUCTOR
126  */
127  private:
128  Status value(const StringData& fieldName, BSONObjBuilder&);
129 
130  /*
131  * OBJECT :
132  * {}
133  * | { MEMBERS }
134  * | SPECIALOBJECT
135  *
136  * MEMBERS :
137  * PAIR
138  * | PAIR , MEMBERS
139  *
140  * PAIR :
141  * FIELD : VALUE
142  *
143  * SPECIALOBJECT :
144  * OIDOBJECT
145  * | BINARYOBJECT
146  * | DATEOBJECT
147  * | TIMESTAMPOBJECT
148  * | REGEXOBJECT
149  * | REFOBJECT
150  * | UNDEFINEDOBJECT
151  * | NUMBERLONGOBJECT
152  * | MINKEYOBJECT
153  * | MAXKEYOBJECT
154  *
155  */
156  public:
157  Status object(const StringData& fieldName, BSONObjBuilder&, bool subObj=true);
158  Status parse(BSONObjBuilder& builder);
159  bool isArray();
160 
161  private:
162  /* The following functions are called with the '{' and the first
163  * field already parsed since they are both implied given the
164  * context. */
165  /*
166  * OIDOBJECT :
167  * { FIELD("$oid") : <24 character hex string> }
168  */
169  Status objectIdObject(const StringData& fieldName, BSONObjBuilder&);
170 
171  /*
172  * BINARYOBJECT :
173  * { FIELD("$binary") : <base64 representation of a binary string>,
174  * FIELD("$type") : <hexadecimal representation of a single byte
175  * indicating the data type> }
176  */
177  Status binaryObject(const StringData& fieldName, BSONObjBuilder&);
178 
179  /*
180  * DATEOBJECT :
181  * { FIELD("$date") : <64 bit signed integer for milliseconds since epoch> }
182  */
183  Status dateObject(const StringData& fieldName, BSONObjBuilder&);
184 
185  /*
186  * TIMESTAMPOBJECT :
187  * { FIELD("$timestamp") : {
188  * FIELD("t") : <32 bit unsigned integer for seconds since epoch>,
189  * FIELD("i") : <32 bit unsigned integer for the increment> } }
190  */
191  Status timestampObject(const StringData& fieldName, BSONObjBuilder&);
192 
193  /*
194  * NOTE: the rules for the body of the regex are different here,
195  * since it is quoted instead of surrounded by slashes.
196  * REGEXOBJECT :
197  * { FIELD("$regex") : <string representing body of regex> }
198  * | { FIELD("$regex") : <string representing body of regex>,
199  * FIELD("$options") : <string representing regex options> }
200  */
201  Status regexObject(const StringData& fieldName, BSONObjBuilder&);
202 
203  /*
204  * REFOBJECT :
205  * { FIELD("$ref") : <string representing collection name>,
206  * FIELD("$id") : <24 character hex string> }
207  * | { FIELD("$ref") : STRING , FIELD("$id") : OBJECTID }
208  * | { FIELD("$ref") : STRING , FIELD("$id") : OIDOBJECT }
209  */
210  Status dbRefObject(const StringData& fieldName, BSONObjBuilder&);
211 
212  /*
213  * UNDEFINEDOBJECT :
214  * { FIELD("$undefined") : true }
215  */
216  Status undefinedObject(const StringData& fieldName, BSONObjBuilder&);
217 
218  /*
219  * NUMBERLONGOBJECT :
220  * { FIELD("$numberLong") : "<number>" }
221  */
222  Status numberLongObject(const StringData& fieldName, BSONObjBuilder&);
223 
224  /*
225  * MINKEYOBJECT :
226  * { FIELD("$minKey") : 1 }
227  */
228  Status minKeyObject(const StringData& fieldName, BSONObjBuilder& builder);
229 
230  /*
231  * MAXKEYOBJECT :
232  * { FIELD("$maxKey") : 1 }
233  */
234  Status maxKeyObject(const StringData& fieldName, BSONObjBuilder& builder);
235 
236  /*
237  * ARRAY :
238  * []
239  * | [ ELEMENTS ]
240  *
241  * ELEMENTS :
242  * VALUE
243  * | VALUE , ELEMENTS
244  */
245  Status array(const StringData& fieldName, BSONObjBuilder&, bool subObj=true);
246 
247  /*
248  * NOTE: Currently only Date can be preceded by the "new" keyword
249  * CONSTRUCTOR :
250  * DATE
251  */
252  Status constructor(const StringData& fieldName, BSONObjBuilder&);
253 
254  /* The following functions only parse the body of the constructor
255  * between the parentheses, not including the constructor name */
256  /*
257  * DATE :
258  * Date( <64 bit signed integer for milliseconds since epoch> )
259  */
260  Status date(const StringData& fieldName, BSONObjBuilder&);
261 
262  /*
263  * TIMESTAMP :
264  * Timestamp( <32 bit unsigned integer for seconds since epoch>,
265  * <32 bit unsigned integer for the increment> )
266  */
267  Status timestamp(const StringData& fieldName, BSONObjBuilder&);
268 
269  /*
270  * OBJECTID :
271  * ObjectId( <24 character hex string> )
272  */
273  Status objectId(const StringData& fieldName, BSONObjBuilder&);
274 
275  /*
276  * NUMBERLONG :
277  * NumberLong( <number> )
278  */
279  Status numberLong(const StringData& fieldName, BSONObjBuilder&);
280 
281  /*
282  * NUMBERINT :
283  * NumberInt( <number> )
284  */
285  Status numberInt(const StringData& fieldName, BSONObjBuilder&);
286 
287  /*
288  * DBREF :
289  * Dbref( <namespace string> , <24 character hex string> )
290  */
291  Status dbRef(const StringData& fieldName, BSONObjBuilder&);
292 
293  /*
294  * REGEX :
295  * / REGEXCHARS / REGEXOPTIONS
296  *
297  * REGEXCHARS :
298  * REGEXCHAR
299  * | REGEXCHAR REGEXCHARS
300  *
301  * REGEXCHAR :
302  * any-Unicode-character-except-/-or-\-or-CONTROLCHAR
303  * | \"
304  * | \'
305  * | \\
306  * | \/
307  * | \b
308  * | \f
309  * | \n
310  * | \r
311  * | \t
312  * | \v
313  * | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
314  * | \any-Unicode-character-except-x-or-[0-7]
315  *
316  * REGEXOPTIONS :
317  * REGEXOPTION
318  * | REGEXOPTION REGEXOPTIONS
319  *
320  * REGEXOPTION :
321  * g | i | m | s
322  */
323  Status regex(const StringData& fieldName, BSONObjBuilder&);
324  Status regexPat(std::string* result);
325  Status regexOpt(std::string* result);
326  Status regexOptCheck(const StringData& opt);
327 
328  /*
329  * NUMBER :
330  *
331  * NOTE: Number parsing is based on standard library functions, not
332  * necessarily on the JSON numeric grammar.
333  *
334  * Number as value - strtoll and strtod
335  * Date - strtoll
336  * Timestamp - strtoul for both timestamp and increment and '-'
337  * before a number explicity disallowed
338  */
339  Status number(const StringData& fieldName, BSONObjBuilder&);
340 
341  /*
342  * FIELD :
343  * STRING
344  * | [a-zA-Z$_] FIELDCHARS
345  *
346  * FIELDCHARS :
347  * [a-zA-Z0-9$_]
348  * | [a-zA-Z0-9$_] FIELDCHARS
349  */
350  Status field(std::string* result);
351 
352  /*
353  * STRING :
354  * " "
355  * | ' '
356  * | " CHARS "
357  * | ' CHARS '
358  */
359  Status quotedString(std::string* result);
360 
361  /*
362  * CHARS :
363  * CHAR
364  * | CHAR CHARS
365  *
366  * Note: " or ' may be allowed depending on whether the string is
367  * double or single quoted
368  *
369  * CHAR :
370  * any-Unicode-character-except-"-or-'-or-\-or-CONTROLCHAR
371  * | \"
372  * | \'
373  * | \\
374  * | \/
375  * | \b
376  * | \f
377  * | \n
378  * | \r
379  * | \t
380  * | \v
381  * | \u HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
382  * | \any-Unicode-character-except-x-or-[0-9]
383  *
384  * HEXDIGIT : [0..9a..fA..F]
385  *
386  * per http://www.ietf.org/rfc/rfc4627.txt, control characters are
387  * (U+0000 through U+001F). U+007F is not mentioned as a control
388  * character.
389  * CONTROLCHAR : [0x00..0x1F]
390  *
391  * If there is not an error, result will contain a null terminated
392  * string, but there is no guarantee that it will not contain other
393  * null characters.
394  */
395  Status chars(std::string* result, const char* terminalSet, const char* allowedSet=NULL);
396 
403  std::string encodeUTF8(unsigned char first, unsigned char second) const;
404 
411  inline bool peekToken(const char* token);
412 
419  inline bool readToken(const char* token);
420 
427  bool readTokenImpl(const char* token, bool advance=true);
428 
433  bool readField(const StringData& field);
434 
439  bool match(char matchChar, const char* matchSet) const;
440 
444  bool isHexString(const StringData&) const;
445 
450  bool isBase64String(const StringData&) const;
451 
456  Status parseError(const StringData& msg);
457  public:
458  inline int offset() { return (_input - _buf); }
459 
460  private:
461  /*
462  * _buf - start of our input buffer
463  * _input - cursor we advance in our input buffer
464  * _input_end - sentinel for the end of our input buffer
465  *
466  * _buf is the null terminated buffer containing the JSON string we
467  * are parsing. _input_end points to the null byte at the end of
468  * the buffer. strtoll, strtol, and strtod will access the null
469  * byte at the end of the buffer because they are assuming a c-style
470  * string.
471  */
472  const char* const _buf;
473  const char* _input;
474  const char* const _input_end;
475  };
476 
477 } // namespace mongo
JsonStringFormat
Formatting mode for generating JSON from BSON.
Definition: oid.h:204
MONGO_CLIENT_API BSONObj MONGO_CLIENT_FUNC fromjson(const std::string &str)
Create a BSONObj from a JSON http://www.json.org, http://www.ietf.org/rfc/rfc4627.txt string.
the main MongoDB namespace
Definition: bulk_operation_builder.h:24
Parser class.
Definition: json.h:92
strict RFC format
Definition: oid.h:206
Utility for creating a BSONObj.
Definition: bsonobjbuilder.h:53
MONGO_CLIENT_API bool isArray(const StringData &str)
Tests whether the JSON string is an Array.
MONGO_CLIENT_API Status(MONGO_CLIENT_FUNC *saslClientAuthenticate)(DBClientWithCommands *client
Attempts to authenticate "client" using the SASL protocol.
MONGO_CLIENT_API std::string tojson(const BSONArray &arr, JsonStringFormat format=Strict, bool pretty=false)
Convert a BSONArray to a JSON string.