Line data Source code
1 : /*
2 : * contrib/xml2/xpath.c
3 : *
4 : * Parser interface for DOM-based parser (libxml) rather than
5 : * stream-based SAX-type parser
6 : */
7 : #include "postgres.h"
8 :
9 : #include "access/htup_details.h"
10 : #include "executor/spi.h"
11 : #include "fmgr.h"
12 : #include "funcapi.h"
13 : #include "lib/stringinfo.h"
14 : #include "utils/builtins.h"
15 : #include "utils/xml.h"
16 :
17 : /* libxml includes */
18 :
19 : #include <libxml/xpath.h>
20 : #include <libxml/tree.h>
21 : #include <libxml/xmlmemory.h>
22 : #include <libxml/xmlerror.h>
23 : #include <libxml/parserInternals.h>
24 :
25 0 : PG_MODULE_MAGIC_EXT(
26 : .name = "xml2",
27 : .version = PG_VERSION
28 : );
29 :
30 : /* exported for use by xslt_proc.c */
31 :
32 : PgXmlErrorContext *pgxml_parser_init(PgXmlStrictness strictness);
33 :
34 : /* workspace for pgxml_xpath() */
35 :
36 : typedef struct
37 : {
38 : xmlDocPtr doctree;
39 : xmlXPathContextPtr ctxt;
40 : xmlXPathObjectPtr res;
41 : } xpath_workspace;
42 :
43 : /* local declarations */
44 :
45 : static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
46 : xmlChar *toptagname, xmlChar *septagname,
47 : xmlChar *plainsep);
48 :
49 : static text *pgxml_result_to_text(xmlXPathObjectPtr res, xmlChar *toptag,
50 : xmlChar *septag, xmlChar *plainsep);
51 :
52 : static xmlChar *pgxml_texttoxmlchar(text *textstring);
53 :
54 : static xpath_workspace *pgxml_xpath(text *document, xmlChar *xpath,
55 : PgXmlErrorContext *xmlerrcxt);
56 :
57 : static void cleanup_workspace(xpath_workspace *workspace);
58 :
59 :
60 : /*
61 : * Initialize for xml parsing.
62 : *
63 : * As with the underlying pg_xml_init function, calls to this MUST be followed
64 : * by a PG_TRY block that guarantees that pg_xml_done is called.
65 : */
66 : PgXmlErrorContext *
67 0 : pgxml_parser_init(PgXmlStrictness strictness)
68 : {
69 0 : PgXmlErrorContext *xmlerrcxt;
70 :
71 : /* Set up error handling (we share the core's error handler) */
72 0 : xmlerrcxt = pg_xml_init(strictness);
73 :
74 : /* Note: we're assuming an elog cannot be thrown by the following calls */
75 :
76 : /* Initialize libxml */
77 0 : xmlInitParser();
78 :
79 0 : return xmlerrcxt;
80 0 : }
81 :
82 :
83 : /* Encodes special characters (<, >, &, " and \r) as XML entities */
84 :
85 0 : PG_FUNCTION_INFO_V1(xml_encode_special_chars);
86 :
87 : Datum
88 0 : xml_encode_special_chars(PG_FUNCTION_ARGS)
89 : {
90 0 : text *tin = PG_GETARG_TEXT_PP(0);
91 0 : text *volatile tout = NULL;
92 0 : xmlChar *volatile tt = NULL;
93 0 : PgXmlErrorContext *xmlerrcxt;
94 :
95 0 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
96 :
97 0 : PG_TRY();
98 : {
99 0 : xmlChar *ts;
100 :
101 0 : ts = pgxml_texttoxmlchar(tin);
102 :
103 0 : tt = xmlEncodeSpecialChars(NULL, ts);
104 0 : if (tt == NULL || pg_xml_error_occurred(xmlerrcxt))
105 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
106 : "could not allocate xmlChar");
107 0 : pfree(ts);
108 :
109 0 : tout = cstring_to_text((char *) tt);
110 0 : }
111 0 : PG_CATCH();
112 : {
113 0 : if (tt != NULL)
114 0 : xmlFree(tt);
115 :
116 0 : pg_xml_done(xmlerrcxt, true);
117 :
118 0 : PG_RE_THROW();
119 : }
120 0 : PG_END_TRY();
121 :
122 0 : if (tt != NULL)
123 0 : xmlFree(tt);
124 :
125 0 : pg_xml_done(xmlerrcxt, false);
126 :
127 0 : PG_RETURN_TEXT_P(tout);
128 0 : }
129 :
130 : /*
131 : * Function translates a nodeset into a text representation
132 : *
133 : * iterates over each node in the set and calls xmlNodeDump to write it to
134 : * an xmlBuffer -from which an xmlChar * string is returned.
135 : *
136 : * each representation is surrounded by <tagname> ... </tagname>
137 : *
138 : * plainsep is an ordinary (not tag) separator - if used, then nodes are
139 : * cast to string as output method
140 : */
141 : static xmlChar *
142 0 : pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
143 : xmlChar *toptagname,
144 : xmlChar *septagname,
145 : xmlChar *plainsep)
146 : {
147 0 : volatile xmlBufferPtr buf = NULL;
148 0 : xmlChar *volatile result = NULL;
149 0 : PgXmlErrorContext *xmlerrcxt;
150 :
151 : /* spin up some error handling */
152 0 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
153 :
154 0 : PG_TRY();
155 : {
156 0 : buf = xmlBufferCreate();
157 :
158 0 : if (buf == NULL || pg_xml_error_occurred(xmlerrcxt))
159 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
160 : "could not allocate xmlBuffer");
161 :
162 0 : if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
163 : {
164 0 : xmlBufferWriteChar(buf, "<");
165 0 : xmlBufferWriteCHAR(buf, toptagname);
166 0 : xmlBufferWriteChar(buf, ">");
167 0 : }
168 0 : if (nodeset != NULL)
169 : {
170 0 : for (int i = 0; i < nodeset->nodeNr; i++)
171 : {
172 0 : if (plainsep != NULL)
173 : {
174 0 : xmlBufferWriteCHAR(buf,
175 0 : xmlXPathCastNodeToString(nodeset->nodeTab[i]));
176 :
177 : /* If this isn't the last entry, write the plain sep. */
178 0 : if (i < (nodeset->nodeNr) - 1)
179 0 : xmlBufferWriteChar(buf, (char *) plainsep);
180 0 : }
181 : else
182 : {
183 0 : if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
184 : {
185 0 : xmlBufferWriteChar(buf, "<");
186 0 : xmlBufferWriteCHAR(buf, septagname);
187 0 : xmlBufferWriteChar(buf, ">");
188 0 : }
189 0 : xmlNodeDump(buf,
190 0 : nodeset->nodeTab[i]->doc,
191 0 : nodeset->nodeTab[i],
192 : 1, 0);
193 :
194 0 : if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
195 : {
196 0 : xmlBufferWriteChar(buf, "</");
197 0 : xmlBufferWriteCHAR(buf, septagname);
198 0 : xmlBufferWriteChar(buf, ">");
199 0 : }
200 : }
201 0 : }
202 0 : }
203 :
204 0 : if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
205 : {
206 0 : xmlBufferWriteChar(buf, "</");
207 0 : xmlBufferWriteCHAR(buf, toptagname);
208 0 : xmlBufferWriteChar(buf, ">");
209 0 : }
210 :
211 0 : result = xmlStrdup(xmlBufferContent(buf));
212 0 : if (result == NULL || pg_xml_error_occurred(xmlerrcxt))
213 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
214 : "could not allocate result");
215 : }
216 0 : PG_CATCH();
217 : {
218 0 : if (buf)
219 0 : xmlBufferFree(buf);
220 :
221 0 : pg_xml_done(xmlerrcxt, true);
222 :
223 0 : PG_RE_THROW();
224 : }
225 0 : PG_END_TRY();
226 :
227 0 : xmlBufferFree(buf);
228 0 : pg_xml_done(xmlerrcxt, false);
229 :
230 0 : return result;
231 0 : }
232 :
233 :
234 : /* Translate a PostgreSQL "varlena" -i.e. a variable length parameter
235 : * into the libxml2 representation
236 : */
237 : static xmlChar *
238 0 : pgxml_texttoxmlchar(text *textstring)
239 : {
240 0 : return (xmlChar *) text_to_cstring(textstring);
241 : }
242 :
243 : /* Publicly visible XPath functions */
244 :
245 : /*
246 : * This is a "raw" xpath function. Check that it returns child elements
247 : * properly
248 : */
249 0 : PG_FUNCTION_INFO_V1(xpath_nodeset);
250 :
251 : Datum
252 0 : xpath_nodeset(PG_FUNCTION_ARGS)
253 : {
254 0 : text *document = PG_GETARG_TEXT_PP(0);
255 0 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
256 0 : xmlChar *toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(2));
257 0 : xmlChar *septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(3));
258 0 : xmlChar *xpath;
259 0 : text *volatile xpres = NULL;
260 0 : xpath_workspace *volatile workspace = NULL;
261 0 : PgXmlErrorContext *xmlerrcxt;
262 :
263 0 : xpath = pgxml_texttoxmlchar(xpathsupp);
264 0 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
265 :
266 0 : PG_TRY();
267 : {
268 0 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
269 0 : xpres = pgxml_result_to_text(workspace->res, toptag, septag, NULL);
270 : }
271 0 : PG_CATCH();
272 : {
273 0 : if (workspace)
274 0 : cleanup_workspace(workspace);
275 :
276 0 : pg_xml_done(xmlerrcxt, true);
277 0 : PG_RE_THROW();
278 : }
279 0 : PG_END_TRY();
280 :
281 0 : cleanup_workspace(workspace);
282 0 : pg_xml_done(xmlerrcxt, false);
283 :
284 0 : pfree(xpath);
285 :
286 0 : if (xpres == NULL)
287 0 : PG_RETURN_NULL();
288 0 : PG_RETURN_TEXT_P(xpres);
289 0 : }
290 :
291 : /*
292 : * The following function is almost identical, but returns the elements in
293 : * a list.
294 : */
295 0 : PG_FUNCTION_INFO_V1(xpath_list);
296 :
297 : Datum
298 0 : xpath_list(PG_FUNCTION_ARGS)
299 : {
300 0 : text *document = PG_GETARG_TEXT_PP(0);
301 0 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
302 0 : xmlChar *plainsep = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(2));
303 0 : xmlChar *xpath;
304 0 : text *volatile xpres = NULL;
305 0 : xpath_workspace *volatile workspace = NULL;
306 0 : PgXmlErrorContext *xmlerrcxt;
307 :
308 0 : xpath = pgxml_texttoxmlchar(xpathsupp);
309 0 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
310 :
311 0 : PG_TRY();
312 : {
313 0 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
314 0 : xpres = pgxml_result_to_text(workspace->res, NULL, NULL, plainsep);
315 : }
316 0 : PG_CATCH();
317 : {
318 0 : if (workspace)
319 0 : cleanup_workspace(workspace);
320 :
321 0 : pg_xml_done(xmlerrcxt, true);
322 0 : PG_RE_THROW();
323 : }
324 0 : PG_END_TRY();
325 :
326 0 : cleanup_workspace(workspace);
327 0 : pg_xml_done(xmlerrcxt, false);
328 :
329 0 : pfree(xpath);
330 :
331 0 : if (xpres == NULL)
332 0 : PG_RETURN_NULL();
333 0 : PG_RETURN_TEXT_P(xpres);
334 0 : }
335 :
336 :
337 0 : PG_FUNCTION_INFO_V1(xpath_string);
338 :
339 : Datum
340 0 : xpath_string(PG_FUNCTION_ARGS)
341 : {
342 0 : text *document = PG_GETARG_TEXT_PP(0);
343 0 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
344 0 : xmlChar *xpath;
345 0 : int32 pathsize;
346 0 : text *volatile xpres = NULL;
347 0 : xpath_workspace *volatile workspace = NULL;
348 0 : PgXmlErrorContext *xmlerrcxt;
349 :
350 0 : pathsize = VARSIZE_ANY_EXHDR(xpathsupp);
351 :
352 : /*
353 : * We encapsulate the supplied path with "string()" = 8 chars + 1 for NUL
354 : * at end
355 : */
356 : /* We could try casting to string using the libxml function? */
357 :
358 0 : xpath = (xmlChar *) palloc(pathsize + 9);
359 0 : memcpy(xpath, "string(", 7);
360 0 : memcpy(xpath + 7, VARDATA_ANY(xpathsupp), pathsize);
361 0 : xpath[pathsize + 7] = ')';
362 0 : xpath[pathsize + 8] = '\0';
363 :
364 0 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
365 :
366 0 : PG_TRY();
367 : {
368 0 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
369 0 : xpres = pgxml_result_to_text(workspace->res, NULL, NULL, NULL);
370 : }
371 0 : PG_CATCH();
372 : {
373 0 : if (workspace)
374 0 : cleanup_workspace(workspace);
375 :
376 0 : pg_xml_done(xmlerrcxt, true);
377 0 : PG_RE_THROW();
378 : }
379 0 : PG_END_TRY();
380 :
381 0 : cleanup_workspace(workspace);
382 0 : pg_xml_done(xmlerrcxt, false);
383 :
384 0 : pfree(xpath);
385 :
386 0 : if (xpres == NULL)
387 0 : PG_RETURN_NULL();
388 0 : PG_RETURN_TEXT_P(xpres);
389 0 : }
390 :
391 :
392 0 : PG_FUNCTION_INFO_V1(xpath_number);
393 :
394 : Datum
395 0 : xpath_number(PG_FUNCTION_ARGS)
396 : {
397 0 : text *document = PG_GETARG_TEXT_PP(0);
398 0 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
399 0 : xmlChar *xpath;
400 0 : volatile float4 fRes = 0.0;
401 0 : volatile bool isNull = false;
402 0 : xpath_workspace *volatile workspace = NULL;
403 0 : PgXmlErrorContext *xmlerrcxt;
404 :
405 0 : xpath = pgxml_texttoxmlchar(xpathsupp);
406 0 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
407 :
408 0 : PG_TRY();
409 : {
410 0 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
411 0 : pfree(xpath);
412 :
413 0 : if (workspace->res == NULL)
414 0 : isNull = true;
415 : else
416 0 : fRes = xmlXPathCastToNumber(workspace->res);
417 : }
418 0 : PG_CATCH();
419 : {
420 0 : if (workspace)
421 0 : cleanup_workspace(workspace);
422 :
423 0 : pg_xml_done(xmlerrcxt, true);
424 0 : PG_RE_THROW();
425 : }
426 0 : PG_END_TRY();
427 :
428 0 : cleanup_workspace(workspace);
429 0 : pg_xml_done(xmlerrcxt, false);
430 :
431 0 : if (isNull || xmlXPathIsNaN(fRes))
432 0 : PG_RETURN_NULL();
433 :
434 0 : PG_RETURN_FLOAT4(fRes);
435 0 : }
436 :
437 :
438 0 : PG_FUNCTION_INFO_V1(xpath_bool);
439 :
440 : Datum
441 0 : xpath_bool(PG_FUNCTION_ARGS)
442 : {
443 0 : text *document = PG_GETARG_TEXT_PP(0);
444 0 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
445 0 : xmlChar *xpath;
446 0 : volatile int bRes = 0;
447 0 : xpath_workspace *volatile workspace = NULL;
448 0 : PgXmlErrorContext *xmlerrcxt;
449 :
450 0 : xpath = pgxml_texttoxmlchar(xpathsupp);
451 0 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
452 :
453 0 : PG_TRY();
454 : {
455 0 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
456 0 : pfree(xpath);
457 :
458 0 : if (workspace->res == NULL)
459 0 : bRes = 0;
460 : else
461 0 : bRes = xmlXPathCastToBoolean(workspace->res);
462 : }
463 0 : PG_CATCH();
464 : {
465 0 : if (workspace)
466 0 : cleanup_workspace(workspace);
467 :
468 0 : pg_xml_done(xmlerrcxt, true);
469 0 : PG_RE_THROW();
470 : }
471 0 : PG_END_TRY();
472 :
473 0 : cleanup_workspace(workspace);
474 0 : pg_xml_done(xmlerrcxt, false);
475 :
476 0 : PG_RETURN_BOOL(bRes);
477 0 : }
478 :
479 :
480 :
481 : /* Core function to evaluate XPath query */
482 :
483 : static xpath_workspace *
484 0 : pgxml_xpath(text *document, xmlChar *xpath, PgXmlErrorContext *xmlerrcxt)
485 : {
486 0 : int32 docsize = VARSIZE_ANY_EXHDR(document);
487 0 : xmlXPathCompExprPtr comppath;
488 0 : xpath_workspace *workspace = palloc0_object(xpath_workspace);
489 :
490 0 : workspace->doctree = NULL;
491 0 : workspace->ctxt = NULL;
492 0 : workspace->res = NULL;
493 :
494 0 : workspace->doctree = xmlReadMemory((char *) VARDATA_ANY(document),
495 0 : docsize, NULL, NULL,
496 : XML_PARSE_NOENT);
497 0 : if (workspace->doctree != NULL)
498 : {
499 0 : workspace->ctxt = xmlXPathNewContext(workspace->doctree);
500 0 : workspace->ctxt->node = xmlDocGetRootElement(workspace->doctree);
501 :
502 : /* compile the path */
503 0 : comppath = xmlXPathCtxtCompile(workspace->ctxt, xpath);
504 0 : if (comppath == NULL || pg_xml_error_occurred(xmlerrcxt))
505 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
506 : "XPath Syntax Error");
507 :
508 : /* Now evaluate the path expression. */
509 0 : workspace->res = xmlXPathCompiledEval(comppath, workspace->ctxt);
510 :
511 0 : xmlXPathFreeCompExpr(comppath);
512 0 : }
513 :
514 0 : return workspace;
515 0 : }
516 :
517 : /* Clean up after processing the result of pgxml_xpath() */
518 : static void
519 0 : cleanup_workspace(xpath_workspace *workspace)
520 : {
521 0 : if (workspace->res)
522 0 : xmlXPathFreeObject(workspace->res);
523 0 : workspace->res = NULL;
524 0 : if (workspace->ctxt)
525 0 : xmlXPathFreeContext(workspace->ctxt);
526 0 : workspace->ctxt = NULL;
527 0 : if (workspace->doctree)
528 0 : xmlFreeDoc(workspace->doctree);
529 0 : workspace->doctree = NULL;
530 0 : }
531 :
532 : static text *
533 0 : pgxml_result_to_text(xmlXPathObjectPtr res,
534 : xmlChar *toptag,
535 : xmlChar *septag,
536 : xmlChar *plainsep)
537 : {
538 0 : xmlChar *volatile xpresstr = NULL;
539 0 : text *volatile xpres = NULL;
540 0 : PgXmlErrorContext *xmlerrcxt;
541 :
542 0 : if (res == NULL)
543 0 : return NULL;
544 :
545 : /* spin some error handling */
546 0 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
547 :
548 0 : PG_TRY();
549 : {
550 0 : switch (res->type)
551 : {
552 : case XPATH_NODESET:
553 0 : xpresstr = pgxmlNodeSetToText(res->nodesetval,
554 0 : toptag,
555 0 : septag, plainsep);
556 0 : break;
557 :
558 : case XPATH_STRING:
559 0 : xpresstr = xmlStrdup(res->stringval);
560 0 : if (xpresstr == NULL || pg_xml_error_occurred(xmlerrcxt))
561 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
562 : "could not allocate result");
563 0 : break;
564 :
565 : default:
566 0 : elog(NOTICE, "unsupported XQuery result: %d", res->type);
567 0 : xpresstr = xmlStrdup((const xmlChar *) "<unsupported/>");
568 0 : if (xpresstr == NULL || pg_xml_error_occurred(xmlerrcxt))
569 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
570 : "could not allocate result");
571 0 : }
572 :
573 : /* Now convert this result back to text */
574 0 : xpres = cstring_to_text((char *) xpresstr);
575 : }
576 0 : PG_CATCH();
577 : {
578 0 : if (xpresstr != NULL)
579 0 : xmlFree(xpresstr);
580 :
581 0 : pg_xml_done(xmlerrcxt, true);
582 :
583 0 : PG_RE_THROW();
584 : }
585 0 : PG_END_TRY();
586 :
587 : /* Free various storage */
588 0 : xmlFree(xpresstr);
589 :
590 0 : pg_xml_done(xmlerrcxt, false);
591 :
592 0 : return xpres;
593 0 : }
594 :
595 : /*
596 : * xpath_table is a table function. It needs some tidying (as do the
597 : * other functions here!
598 : */
599 0 : PG_FUNCTION_INFO_V1(xpath_table);
600 :
601 : Datum
602 0 : xpath_table(PG_FUNCTION_ARGS)
603 : {
604 : /* Function parameters */
605 0 : char *pkeyfield = text_to_cstring(PG_GETARG_TEXT_PP(0));
606 0 : char *xmlfield = text_to_cstring(PG_GETARG_TEXT_PP(1));
607 0 : char *relname = text_to_cstring(PG_GETARG_TEXT_PP(2));
608 0 : char *xpathset = text_to_cstring(PG_GETARG_TEXT_PP(3));
609 0 : char *condition = text_to_cstring(PG_GETARG_TEXT_PP(4));
610 :
611 : /* SPI (input tuple) support */
612 0 : SPITupleTable *tuptable;
613 0 : HeapTuple spi_tuple;
614 0 : TupleDesc spi_tupdesc;
615 :
616 :
617 0 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
618 0 : AttInMetadata *attinmeta;
619 :
620 0 : char **values;
621 0 : xmlChar **xpaths;
622 0 : char *pos;
623 0 : const char *pathsep = "|";
624 :
625 0 : int numpaths;
626 0 : int ret;
627 0 : uint64 proc;
628 0 : int j;
629 0 : int rownr; /* For issuing multiple rows from one original
630 : * document */
631 0 : bool had_values; /* To determine end of nodeset results */
632 0 : StringInfoData query_buf;
633 0 : PgXmlErrorContext *xmlerrcxt;
634 0 : volatile xmlDocPtr doctree = NULL;
635 :
636 0 : InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC);
637 :
638 : /* must have at least one output column (for the pkey) */
639 0 : if (rsinfo->setDesc->natts < 1)
640 0 : ereport(ERROR,
641 : (errcode(ERRCODE_SYNTAX_ERROR),
642 : errmsg("xpath_table must have at least one output column")));
643 :
644 : /*
645 : * At the moment we assume that the returned attributes make sense for the
646 : * XPath specified (i.e. we trust the caller). It's not fatal if they get
647 : * it wrong - the input function for the column type will raise an error
648 : * if the path result can't be converted into the correct binary
649 : * representation.
650 : */
651 :
652 0 : attinmeta = TupleDescGetAttInMetadata(rsinfo->setDesc);
653 :
654 0 : values = (char **) palloc(rsinfo->setDesc->natts * sizeof(char *));
655 0 : xpaths = (xmlChar **) palloc(rsinfo->setDesc->natts * sizeof(xmlChar *));
656 :
657 : /*
658 : * Split XPaths. xpathset is a writable CString.
659 : *
660 : * Note that we stop splitting once we've done all needed for tupdesc
661 : */
662 0 : numpaths = 0;
663 0 : pos = xpathset;
664 0 : while (numpaths < (rsinfo->setDesc->natts - 1))
665 : {
666 0 : xpaths[numpaths++] = (xmlChar *) pos;
667 0 : pos = strstr(pos, pathsep);
668 0 : if (pos != NULL)
669 : {
670 0 : *pos = '\0';
671 0 : pos++;
672 0 : }
673 : else
674 0 : break;
675 : }
676 :
677 : /* Now build query */
678 0 : initStringInfo(&query_buf);
679 :
680 : /* Build initial sql statement */
681 0 : appendStringInfo(&query_buf, "SELECT %s, %s FROM %s WHERE %s",
682 0 : pkeyfield,
683 0 : xmlfield,
684 0 : relname,
685 0 : condition);
686 :
687 0 : SPI_connect();
688 :
689 0 : if ((ret = SPI_exec(query_buf.data, 0)) != SPI_OK_SELECT)
690 0 : elog(ERROR, "xpath_table: SPI execution failed for query %s",
691 : query_buf.data);
692 :
693 0 : proc = SPI_processed;
694 0 : tuptable = SPI_tuptable;
695 0 : spi_tupdesc = tuptable->tupdesc;
696 :
697 : /*
698 : * Check that SPI returned correct result. If you put a comma into one of
699 : * the function parameters, this will catch it when the SPI query returns
700 : * e.g. 3 columns.
701 : */
702 0 : if (spi_tupdesc->natts != 2)
703 : {
704 0 : ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
705 : errmsg("expression returning multiple columns is not valid in parameter list"),
706 : errdetail("Expected two columns in SPI result, got %d.", spi_tupdesc->natts)));
707 0 : }
708 :
709 : /*
710 : * Setup the parser. This should happen after we are done evaluating the
711 : * query, in case it calls functions that set up libxml differently.
712 : */
713 0 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
714 :
715 0 : PG_TRY();
716 : {
717 : /* For each row i.e. document returned from SPI */
718 0 : uint64 i;
719 :
720 0 : for (i = 0; i < proc; i++)
721 : {
722 0 : char *pkey;
723 0 : char *xmldoc;
724 0 : xmlXPathContextPtr ctxt;
725 0 : xmlXPathObjectPtr res;
726 0 : xmlChar *resstr;
727 0 : xmlXPathCompExprPtr comppath;
728 0 : HeapTuple ret_tuple;
729 :
730 : /* Extract the row data as C Strings */
731 0 : spi_tuple = tuptable->vals[i];
732 0 : pkey = SPI_getvalue(spi_tuple, spi_tupdesc, 1);
733 0 : xmldoc = SPI_getvalue(spi_tuple, spi_tupdesc, 2);
734 :
735 : /*
736 : * Clear the values array, so that not-well-formed documents
737 : * return NULL in all columns. Note that this also means that
738 : * spare columns will be NULL.
739 : */
740 0 : for (j = 0; j < rsinfo->setDesc->natts; j++)
741 0 : values[j] = NULL;
742 :
743 : /* Insert primary key */
744 0 : values[0] = pkey;
745 :
746 : /* Parse the document */
747 0 : if (xmldoc)
748 0 : doctree = xmlReadMemory(xmldoc, strlen(xmldoc),
749 : NULL, NULL,
750 : XML_PARSE_NOENT);
751 : else /* treat NULL as not well-formed */
752 0 : doctree = NULL;
753 :
754 0 : if (doctree == NULL)
755 : {
756 : /* not well-formed, so output all-NULL tuple */
757 0 : ret_tuple = BuildTupleFromCStrings(attinmeta, values);
758 0 : tuplestore_puttuple(rsinfo->setResult, ret_tuple);
759 0 : heap_freetuple(ret_tuple);
760 0 : }
761 : else
762 : {
763 : /* New loop here - we have to deal with nodeset results */
764 0 : rownr = 0;
765 :
766 0 : do
767 : {
768 : /* Now evaluate the set of xpaths. */
769 0 : had_values = false;
770 0 : for (j = 0; j < numpaths; j++)
771 : {
772 0 : ctxt = xmlXPathNewContext(doctree);
773 0 : if (ctxt == NULL || pg_xml_error_occurred(xmlerrcxt))
774 0 : xml_ereport(xmlerrcxt,
775 : ERROR, ERRCODE_OUT_OF_MEMORY,
776 : "could not allocate XPath context");
777 :
778 0 : ctxt->node = xmlDocGetRootElement(doctree);
779 :
780 : /* compile the path */
781 0 : comppath = xmlXPathCtxtCompile(ctxt, xpaths[j]);
782 0 : if (comppath == NULL || pg_xml_error_occurred(xmlerrcxt))
783 0 : xml_ereport(xmlerrcxt, ERROR,
784 : ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
785 : "XPath Syntax Error");
786 :
787 : /* Now evaluate the path expression. */
788 0 : res = xmlXPathCompiledEval(comppath, ctxt);
789 0 : xmlXPathFreeCompExpr(comppath);
790 :
791 0 : if (res != NULL)
792 : {
793 0 : switch (res->type)
794 : {
795 : case XPATH_NODESET:
796 : /* We see if this nodeset has enough nodes */
797 0 : if (res->nodesetval != NULL &&
798 0 : rownr < res->nodesetval->nodeNr)
799 : {
800 0 : resstr = xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]);
801 0 : if (resstr == NULL || pg_xml_error_occurred(xmlerrcxt))
802 0 : xml_ereport(xmlerrcxt,
803 : ERROR, ERRCODE_OUT_OF_MEMORY,
804 : "could not allocate result");
805 0 : had_values = true;
806 0 : }
807 : else
808 0 : resstr = NULL;
809 :
810 0 : break;
811 :
812 : case XPATH_STRING:
813 0 : resstr = xmlStrdup(res->stringval);
814 0 : if (resstr == NULL || pg_xml_error_occurred(xmlerrcxt))
815 0 : xml_ereport(xmlerrcxt,
816 : ERROR, ERRCODE_OUT_OF_MEMORY,
817 : "could not allocate result");
818 0 : break;
819 :
820 : default:
821 0 : elog(NOTICE, "unsupported XQuery result: %d", res->type);
822 0 : resstr = xmlStrdup((const xmlChar *) "<unsupported/>");
823 0 : if (resstr == NULL || pg_xml_error_occurred(xmlerrcxt))
824 0 : xml_ereport(xmlerrcxt,
825 : ERROR, ERRCODE_OUT_OF_MEMORY,
826 : "could not allocate result");
827 0 : }
828 :
829 : /*
830 : * Insert this into the appropriate column in the
831 : * result tuple.
832 : */
833 0 : values[j + 1] = (char *) resstr;
834 0 : }
835 0 : xmlXPathFreeContext(ctxt);
836 0 : }
837 :
838 : /* Now add the tuple to the output, if there is one. */
839 0 : if (had_values)
840 : {
841 0 : ret_tuple = BuildTupleFromCStrings(attinmeta, values);
842 0 : tuplestore_puttuple(rsinfo->setResult, ret_tuple);
843 0 : heap_freetuple(ret_tuple);
844 0 : }
845 :
846 0 : rownr++;
847 0 : } while (had_values);
848 : }
849 :
850 0 : if (doctree != NULL)
851 0 : xmlFreeDoc(doctree);
852 0 : doctree = NULL;
853 :
854 0 : if (pkey)
855 0 : pfree(pkey);
856 0 : if (xmldoc)
857 0 : pfree(xmldoc);
858 0 : }
859 0 : }
860 0 : PG_CATCH();
861 : {
862 0 : if (doctree != NULL)
863 0 : xmlFreeDoc(doctree);
864 :
865 0 : pg_xml_done(xmlerrcxt, true);
866 :
867 0 : PG_RE_THROW();
868 : }
869 0 : PG_END_TRY();
870 :
871 0 : if (doctree != NULL)
872 0 : xmlFreeDoc(doctree);
873 :
874 0 : pg_xml_done(xmlerrcxt, false);
875 :
876 0 : SPI_finish();
877 :
878 : /*
879 : * SFRM_Materialize mode expects us to return a NULL Datum. The actual
880 : * tuples are in our tuplestore and passed back through rsinfo->setResult.
881 : * rsinfo->setDesc is set to the tuple description that we actually used
882 : * to build our tuples with, so the caller can verify we did what it was
883 : * expecting.
884 : */
885 0 : return (Datum) 0;
886 0 : }
|