123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196 |
- #import "XPathQuery.h"
- #import <libxml/tree.h>
- #import <libxml/parser.h>
- #import <libxml/HTMLparser.h>
- #import <libxml/xpath.h>
- #import <libxml/xpathInternals.h>
- NSDictionary *DictionaryForNode(xmlNodePtr currentNode, NSMutableDictionary *parentResult,BOOL parentContent);
- NSArray *PerformXPathQuery(xmlDocPtr doc, NSString *query);
- NSDictionary *DictionaryForNode(xmlNodePtr currentNode, NSMutableDictionary *parentResult,BOOL parentContent)
- {
- NSMutableDictionary *resultForNode = [NSMutableDictionary dictionary];
- if (currentNode->name) {
- NSString *currentNodeContent = [NSString stringWithCString:(const char *)currentNode->name
- encoding:NSUTF8StringEncoding];
- resultForNode[@"nodeName"] = currentNodeContent;
- }
- xmlChar *nodeContent = xmlNodeGetContent(currentNode);
- if (nodeContent != NULL) {
- NSString *currentNodeContent = [NSString stringWithCString:(const char *)nodeContent
- encoding:NSUTF8StringEncoding];
- if ([resultForNode[@"nodeName"] isEqual:@"text"] && parentResult) {
- if (parentContent) {
- NSCharacterSet *charactersToTrim = [NSCharacterSet whitespaceAndNewlineCharacterSet];
- parentResult[@"nodeContent"] = [currentNodeContent stringByTrimmingCharactersInSet:charactersToTrim];
- return nil;
- }
- if (currentNodeContent != nil) {
- resultForNode[@"nodeContent"] = currentNodeContent;
- }
- return resultForNode;
- } else {
- resultForNode[@"nodeContent"] = currentNodeContent;
- }
- xmlFree(nodeContent);
- }
- xmlAttr *attribute = currentNode->properties;
- if (attribute) {
- NSMutableArray *attributeArray = [NSMutableArray array];
- while (attribute) {
- NSMutableDictionary *attributeDictionary = [NSMutableDictionary dictionary];
- NSString *attributeName = [NSString stringWithCString:(const char *)attribute->name
- encoding:NSUTF8StringEncoding];
- if (attributeName) {
- attributeDictionary[@"attributeName"] = attributeName;
- }
-
- if (attribute->children) {
- NSDictionary *childDictionary = DictionaryForNode(attribute->children, attributeDictionary, true);
- if (childDictionary) {
- attributeDictionary[@"attributeContent"] = childDictionary;
- }
- }
- if ([attributeDictionary count] > 0) {
- [attributeArray addObject:attributeDictionary];
- }
- attribute = attribute->next;
- }
- if ([attributeArray count] > 0) {
- resultForNode[@"nodeAttributeArray"] = attributeArray;
- }
- }
- xmlNodePtr childNode = currentNode->children;
- if (childNode) {
- NSMutableArray *childContentArray = [NSMutableArray array];
- while (childNode) {
- NSDictionary *childDictionary = DictionaryForNode(childNode, resultForNode,false);
- if (childDictionary) {
- [childContentArray addObject:childDictionary];
- }
- childNode = childNode->next;
- }
- if ([childContentArray count] > 0) {
- resultForNode[@"nodeChildArray"] = childContentArray;
- }
- }
- xmlBufferPtr buffer = xmlBufferCreate();
- xmlNodeDump(buffer, currentNode->doc, currentNode, 0, 0);
- NSString *rawContent = [NSString stringWithCString:(const char *)buffer->content encoding:NSUTF8StringEncoding];
- if (rawContent != nil) {
- resultForNode[@"raw"] = rawContent;
- }
- xmlBufferFree(buffer);
- return resultForNode;
- }
- NSArray *PerformXPathQuery(xmlDocPtr doc, NSString *query)
- {
- xmlXPathContextPtr xpathCtx;
- xmlXPathObjectPtr xpathObj;
-
- if (query == nil || ![query isKindOfClass:[NSString class]]) {
- return nil;
- }
-
-
- xpathCtx = xmlXPathNewContext(doc);
- if(xpathCtx == NULL) {
- NSLog(@"Unable to create XPath context.");
- return nil;
- }
-
- xpathObj = xmlXPathEvalExpression((xmlChar *)[query cStringUsingEncoding:NSUTF8StringEncoding], xpathCtx);
- if(xpathObj == NULL) {
- NSLog(@"Unable to evaluate XPath.");
- xmlXPathFreeContext(xpathCtx);
- return nil;
- }
- xmlNodeSetPtr nodes = xpathObj->nodesetval;
- if (!nodes) {
- NSLog(@"Nodes was nil.");
- xmlXPathFreeObject(xpathObj);
- xmlXPathFreeContext(xpathCtx);
- return nil;
- }
- NSMutableArray *resultNodes = [NSMutableArray array];
- for (NSInteger i = 0; i < nodes->nodeNr; i++) {
- NSDictionary *nodeDictionary = DictionaryForNode(nodes->nodeTab[i], nil,false);
- if (nodeDictionary) {
- [resultNodes addObject:nodeDictionary];
- }
- }
-
- xmlXPathFreeObject(xpathObj);
- xmlXPathFreeContext(xpathCtx);
- return resultNodes;
- }
- NSArray *PerformHTMLXPathQuery(NSData *document, NSString *query) {
- return PerformHTMLXPathQueryWithEncoding(document, query, nil);
- }
- NSArray *PerformHTMLXPathQueryWithEncoding(NSData *document, NSString *query,NSString *encoding)
- {
- xmlDocPtr doc;
-
- const char *encoded = encoding ? [encoding cStringUsingEncoding:NSUTF8StringEncoding] : NULL;
- doc = htmlReadMemory([document bytes], (int)[document length], "", encoded, HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR);
- if (doc == NULL) {
- NSLog(@"Unable to parse.");
- return nil;
- }
-
- NSArray *result = PerformXPathQuery(doc, query);
- xmlFreeDoc(doc);
-
- return result;
- }
- NSArray *PerformXMLXPathQuery(NSData *document, NSString *query) {
- return PerformXMLXPathQueryWithEncoding(document, query, nil);
- }
- NSArray *PerformXMLXPathQueryWithEncoding(NSData *document, NSString *query,NSString *encoding)
- {
- xmlDocPtr doc;
-
-
- const char *encoded = encoding ? [encoding cStringUsingEncoding:NSUTF8StringEncoding] : NULL;
- doc = xmlReadMemory([document bytes], (int)[document length], "", encoded, XML_PARSE_RECOVER);
-
- if (doc == NULL) {
- NSLog(@"Unable to parse.");
- return nil;
- }
-
- NSArray *result = PerformXPathQuery(doc, query);
- xmlFreeDoc(doc);
-
- return result;
- }
|