privacore-open-source-searc.../Json.cpp
Ivan Skytte Jørgensen beeddcf35d Got rid of gb-include.h
2018-07-26 17:29:51 +02:00

556 lines
14 KiB
C++

#include "Json.h"
#include "Log.h"
#include "Errno.h"
#include "utf8_fast.h"
#include "fctypes.h"
#include <stdlib.h>
class JsonItem *Json::addNewItem () {
JsonItem *ji = (JsonItem *)m_sb.getBufPtr();
if(m_sb.getAvail() < (int32_t)sizeof(JsonItem)) {
log("json: preventing buffer breach");
return NULL;
}
// otherwise we got room
m_sb.incrementLength(sizeof(JsonItem));
if ( m_prev ) m_prev->m_next = ji;
ji->m_prev = m_prev;
ji->m_next = NULL;
// we are the new prev now
m_prev = ji;
// value null for now
ji->m_type = JT_NULL;
// parent on stack
JsonItem *parent = NULL;
if ( m_stackPtr > 0 ) parent = m_stack[m_stackPtr-1];
ji->m_parent = parent;
// . if our parent was an array, we are an element in that array
// . if it is an array of objects, then the name will be overwritten
if ( parent ) { // && parent->m_type == JT_ARRAY ) {
// inherit object name from parent
ji->m_name = parent->m_name;
ji->m_nameLen = parent->m_nameLen;
}
return ji;
}
JsonItem *Json::getFirstItem ( ) {
if ( m_sb.length() <= 0 ) return NULL;
return (JsonItem *)m_sb.getBufStart();
}
JsonItem *Json::getItem ( char *name ) {
JsonItem *ji = getFirstItem();
// traverse the json
for ( ; ji ; ji = ji->m_next ) {
// just get STRINGS or NUMS
if ( ji->m_type != JT_STRING &&
ji->m_type != JT_NUMBER &&
ji->m_type != JT_ARRAY )
continue;
// check name
char *name2 = ji->m_name;
if ( ! name2 ) return NULL; // array with empty name...
if ( strcmp(name2,name) == 0 ) return ji;
}
return NULL;
}
#include "Mem.h" // strlen()
JsonItem *Json::parseJsonStringIntoJsonItems (const char *json ) {
m_prev = NULL;
m_stackPtr = 0;
m_sb.purge();
JsonItem *ji = NULL;
if ( ! json ) return NULL;
// how much space will we need to avoid any reallocs?
const char *p = json;
bool inQuote = false;
int32_t need = 0;
for ( ; *p ; p++ ) {
// ignore any escaped char. also \x1234
if ( *p == '\\' ) {
if ( p[1] ) p++;
continue;
}
if ( *p == '\"' )
inQuote = ! inQuote;
if ( inQuote )
continue;
if ( *p == '{' ||
*p == ',' ||
*p == '[' ||
*p == ':' )
// +1 for null terminating string of each item
need += sizeof(JsonItem) +1;
}
// plus the length of the string to store it decoded etc.
need += p - json;
// plus a \0 for the value and a \0 for the name of each jsonitem
need += 2;
// prevent cores for now
need += 10;
// . to prevent safebuf from reallocating do this
// . safeMemcpy() calls reserve(m_length+len) and reserves
// tries to alloc m_length + (m_length+len) so since,
// m_length+len should never be more than "need" we need to
// double up here
need *= 2;
// this should be enough
if ( ! m_sb.reserve ( need ) ) return NULL;
// for testing if we realloc
char *mem = m_sb.getBufStart();
int32_t size;
char *NAME = NULL;
int32_t NAMELEN = 0;
// reset p
p = json;
// json maybe bad utf8 causing us to miss the \0 char, so use "pend"
const char *pend = json + strlen(json);
// scan
for ( ; p < pend ; p += size ) {
// get size
size = getUtf8CharSize ( p );
// skip spaces
if ( is_wspace_a (*p) )
continue;
// skip commas
if ( *p == ',' ) continue;
// did we hit a '{'? that means the existing json item
// is a parent of the item(s) inside the {}'s
if ( *p == '{' ) {
// if ji is non-null it must be a name like in
// \"stats\":{\"fetchTime\":2069,....}
// . this indicates the start of a json object
// . addNewItem() will push the current item on stack
ji = addNewItem();
if ( ! ji ) return NULL;
// current ji is an object type then
ji->m_type = JT_OBJECT;
// set the name
ji->m_name = NAME;
ji->m_nameLen = NAMELEN;
ji->m_valueLen = 0;
// this goes on the stack
if ( m_stackPtr >= MAXJSONPARENTS ) return NULL;
m_stack[m_stackPtr++] = ji;
// and null this
ji = NULL;
continue;
}
// pop the stack?
if ( *p == '}' ) {
// just pop it and restore name cursor
if ( m_stackPtr > 0 ) {
JsonItem *px = m_stack[m_stackPtr-1];
NAME = px->m_name;
NAMELEN = px->m_nameLen;
m_stackPtr--;
}
continue;
}
// array of things?
if ( *p == '[' ) {
// make a newitem to put on stack
ji = addNewItem();
if ( ! ji ) return NULL;
// current ji is an object type then
ji->m_type = JT_ARRAY;
// start of array hack. HACK!
//ji->m_valueLong = (int32_t)p;
ji->m_valueArray = p;
// set the name
ji->m_name = NAME;
ji->m_nameLen = NAMELEN;
// init to a bogus value. should be set below.
// at least this should avoid a core in XmlDoc.cpp
// getTokenizedDiffbotReply()
ji->m_valueLen = 0;
// this goes on the stack
if ( m_stackPtr >= MAXJSONPARENTS ) return NULL;
m_stack[m_stackPtr++] = ji;
ji = NULL;
continue;
}
// pop the stack?
if ( *p == ']' ) {
// just pop it and restore name cursor
if ( m_stackPtr > 0 ) {
JsonItem *px = m_stack[m_stackPtr-1];
NAME = px->m_name;
NAMELEN = px->m_nameLen;
// start of array hack. HACK!
char *start = (char *)px->m_valueArray;//Long;
// include ending ']' in length of array
px->m_valueLen = p - start + 1;
m_stackPtr--;
}
continue;
}
// a quote?
if ( *p == '\"' ) {
// find end of quote
const char *end = p + 1;
for ( ; *end ; end++ ) {
// skip two chars if escaped
if ( *end == '\\' && end[1] ) {
end++;
continue;
}
// this quote is unescaped then
if ( *end == '\"' ) break;
}
// field?
const char *x = end + 1;
// skip spaces
for ( ; *x && is_wspace_a(*x) ; x++ );
// define the string
const char *str = p + 1;
int32_t slen = end - str;
// . if a colon follows, it was a field
if ( *x == ':' ) {
// we can't be the first thing in the safebuf
// json must start with { or [ i guess
// otherwise getFirstItem() won't work!
if ( m_sb.m_length==0 ) {
log("json: length is 0");
g_errno = EBADJSONPARSER;
return NULL;
}
// let's push this now so we can \0 term
char *savedStr = m_sb.getBufPtr();
m_sb.safeMemcpy ( str , slen );
m_sb.pushChar('\0');
// just set the name cursor
NAME = savedStr;//str;
NAMELEN = slen;
}
// . otherwise, it was field value, so index it
// . TODO: later make field names compounded to
// better represent nesting?
// . added 'else if (NAME){' fix for json=\"too small\"
else if ( NAME ) {
// make a new one in safebuf. our
// parent will be the array type item.
ji = addNewItem();
if ( ! ji ) return NULL;
// we are a string
ji->m_type = JT_STRING;
// use name cursor
ji->m_name = NAME;
ji->m_nameLen = NAMELEN;
// get length decoded
int32_t curr = m_sb.length();
// store decoded string right after jsonitem
if ( !m_sb.safeDecodeJSONToUtf8 (str,slen))
return NULL;
// store length decoded json
ji->m_valueLen = m_sb.length() - curr;
// end with a \0
m_sb.pushChar('\0');
// ok, this one is done
ji = NULL;
}
else {
log("json: fieldless name in json");
g_errno = EBADJSONPARSER;
return NULL;
}
// skip over the string
size = 0;
p = x;
continue;
}
// true or false?
if ( (*p == 't' && strncmp(p,"true",4)==0) ||
(*p == 'f' && strncmp(p,"false",5)==0) ) {
// make a new one
ji = addNewItem();
if ( ! ji ) return NULL;
// copy the number as a string as well
int32_t curr = m_sb.length();
// what is the length of it?
int32_t slen = 4;
ji->m_valueLong = 1;
ji->m_value64 = 1;
ji->m_valueDouble = 1.0;
if ( *p == 'f' ) {
slen = 5;
ji->m_valueLong = 0;
ji->m_value64 = 0;
ji->m_valueDouble = 0;
}
// store decoded string right after jsonitem
if ( !m_sb.safeDecodeJSONToUtf8 (p,slen))
return NULL;
// store length decoded json
ji->m_valueLen = m_sb.length() - curr;
// end with a \0
m_sb.pushChar('\0');
ji->m_type = JT_NUMBER;
// use name cursor
ji->m_name = NAME;
ji->m_nameLen = NAMELEN;
ji = NULL;
// skip over the string
size = 1;
//p = end;
continue;
}
// if we hit a digit they might not be in quotes like
// "crawled":123
if ( is_digit ( *p ) ||
// like .123 ?
( *p == '.' && is_digit(p[1]) ) ) {
// find end of the number
const char *end = p + 1;
// . allow '.' for decimal numbers
// . TODO: allow E for exponent
for ( ; *end && (is_digit(*end) || *end=='.');end++) ;
// define the string
const char *str = p;
int32_t slen = end - str;
// make a new one
ji = addNewItem();
if ( ! ji ) return NULL;
// back up over negative sign?
if ( str > json && str[-1] == '-' ) str--;
// decode
//char c = str[slen];
//str[slen] = '\0';
ji->m_valueLong = atol(str);
ji->m_value64 = atoll(str);
ji->m_valueDouble = atof(str);
// copy the number as a string as well
int32_t curr = m_sb.length();
// store decoded string right after jsonitem
if ( !m_sb.safeDecodeJSONToUtf8 ( str, slen))
return NULL;
// store length decoded json
ji->m_valueLen = m_sb.length() - curr;
// end with a \0
m_sb.pushChar('\0');
//str[slen] = c;
ji->m_type = JT_NUMBER;
// use name cursor
ji->m_name = NAME;
ji->m_nameLen = NAMELEN;
ji = NULL;
// skip over the string
size = 0;
p = end;
continue;
}
}
// for testing if we realloc
char *memEnd = m_sb.getBufStart();
// bitch if we had to do a realloc. should never happen but i
// saw it happen once, so do not core on that.
if ( mem != memEnd )
log("json: json parser reallocated buffer. inefficient.");
// return NULL if no json items were found
if ( m_sb.length() <= 0 ) return NULL;
return (JsonItem *)m_sb.getBufStart();
}
bool JsonItem::getCompoundName ( SafeBuf &nameBuf ) {
// reset, but don't free mem etc. just set m_length to 0
nameBuf.reset();
// get its full compound name like "meta.twitter.title"
JsonItem *p = this;//ji;
char *lastName = NULL;
char *nameArray[20];
int32_t numNames = 0;
for ( ; p ; p = p->m_parent ) {
// empty name?
if ( ! p->m_name ) continue;
if ( ! p->m_name[0] ) continue;
// dup? can happen with arrays. parent of string
// in object, has same name as his parent, the
// name of the array. "dupname":[{"a":"b"},{"c":"d"}]
if ( p->m_name == lastName ) continue;
// update
lastName = p->m_name;
// add it up
nameArray[numNames++] = p->m_name;
// breach?
if ( numNames < 15 ) continue;
log("build: too many names in json tag");
break;
}
// assemble the names in reverse order which is correct order
for ( int32_t i = 1 ; i <= numNames ; i++ ) {
// copy into our safebuf
if ( ! nameBuf.safeStrcpy ( nameArray[numNames-i]) )
return false;
// separate names with periods
if ( ! nameBuf.pushChar('.') ) return false;
}
// remove last period
nameBuf.removeLastChar('.');
// and null terminate
if ( ! nameBuf.nullTerm() ) return false;
// change all :'s in names to .'s since : is reserved!
char *px = nameBuf.getBufStart();
for ( ; *px ; px++ ) if ( *px == ':' ) *px = '.';
return true;
}
// is this json item in an array of json items?
bool JsonItem::isInArray ( ) {
JsonItem *p = this;//ji;
for ( ; p ; p = p->m_parent ) {
// empty name? it's just a "value item" then, i guess.
//if ( ! p->m_name ) continue;
//if ( ! p->m_name[0] ) continue;
if ( p->m_type == JT_ARRAY ) return true;
}
return false;
}
// convert numbers and bools to strings for this one
char *JsonItem::getValueAsString ( int32_t *valueLen ) {
// strings are the same
if ( m_type == JT_STRING ) {
*valueLen = getValueLen();
return getValue();
}
// numbers...
// seems like when this overflowed when it was 64 bytes
// it went into s_vbuf in Version.cpp
static char s_numBuf[256];
if ( almostEqualDouble((double)m_valueLong, m_valueDouble) ) {
*valueLen = snprintf ( s_numBuf,255,"%" PRId32, m_valueLong );
return s_numBuf;
}
if ( almostEqualDouble((double)m_value64, m_valueDouble) ) {
*valueLen = snprintf ( s_numBuf,255,"%" PRId64, m_value64 );
return s_numBuf;
}
// otherwise return the number as it was written in the json
// because it might have too many digits for printing as a double
*valueLen = m_valueLen;
return (char *)this + sizeof(JsonItem);
// *valueLen = snprintf ( s_numBuf,255,"%f", m_valueDouble );
// return s_numBuf;
}
bool endsInCurly ( char *s , int32_t slen ) {
char *e = s + slen - 1;
// don't backup more than 30 chars
char *m = e - 30;
if ( m < s ) m = s;
// \0?
if ( e > m && *e == '\0' ) e--;
// scan backwards, skipping whitespace
for ( ; e > m && is_wspace_a(*e) ; e-- );
// should be a } now to be valid json
if ( e >= m && *e == '}' ) return true;
return false;
}
// Accepts a json string which has a top level object and a "key":val pair
// return false unless jsonStr has the new key:val
bool Json::prependKey(SafeBuf& jsonStr, char* keyVal) {
int32_t ndx = jsonStr.indexOf('{');
// no object? try array? fail for now
if( ndx == -1 || ndx == jsonStr.length() - 1 ) return false;
ndx++; //the insert pos
if(ndx == jsonStr.length()) return false;
// find if the object had any other keys
int32_t jsonStrLen = jsonStr.length();
int32_t i = ndx;
while(i < jsonStrLen && isspace(jsonStr[i])) i++;
if( i == jsonStrLen ) return false;
if (jsonStr[i] != '}') {
jsonStr.insert(",\n", i);
} //else we are the only item, no comma
return jsonStr.insert(keyVal, ndx);
}
// bool Json::printToString(SafeBuf& out, JsonItem* ji = NULL) {
// if(!ji) ji = getFirstItem();
// for ( ; ji ; ji = ji->m_next ) {
// switch (ji->m_type) {
// case JT_NULL:
// out.safeMemcpy("null", 4);
// break;
// case JT_NUMBER:
// int32_t vl;
// char* v = ji->getValueAsString(&vl);
// out.safeMemcpy(v, vl);
// break;
// case JT_STRING:
// int32_t vl;
// char* v = ji->getValueAsString(&vl);
// out.pushChar('"');
// out.safeMemcpy(v, vl);
// out.pushChar('"');
// break;
// case JT_ARRAY:
// // wha? really? I would've thought this would contain
// // jsonitems and not a string
// safeMemcpy(ji->m_valueArray, ji->m_valueArray);
// break;
// case JT_OBJECT:
// out.pushChar('{');
// out.safeMemcpy(v, vl);
// out.pushChar("\"");
// break;
// }
// }
// out->
// }