cURL

curl's project page on SourceForge.net

Sponsors:
Haxx

cURL > libcurl > API > Example Source Codes > htmltitle.cpp

htmltitle.cpp

All Examples

10-at-a-time
anyauthput
asiohiper (C++)
cacertinmem
certinfo
chkspeed
cookie_interface
curlgtk
curlx
debug
evhiperfifo
externalsocket
fileupload
fopen
ftp-wildcard
ftpget
ftpgetinfo
ftpgetresp
ftpsget
ftpupload
ftpuploadresume
getinfo
getinmemory
ghiper
hiperfifo
href_extractor
htmltidy
htmltitle (C++)
http-post
httpcustomheader
httpput
https
imap-append
imap-copy
imap-create
imap-delete
imap-examine
imap-fetch
imap-list
imap-lsub
imap-multi
imap-noop
imap-search
imap-ssl
imap-store
imap-tls
multi-app
multi-debugcallback
multi-double
multi-post
multi-single
multi-uv
multithread
opensslthreadlock
persistant
pop3-dele
pop3-list
pop3-multi
pop3-noop
pop3-retr
pop3-ssl
pop3-stat
pop3-tls
pop3-top
pop3-uidl
post-callback
postinmemory
postit2
progressfunc
resolve
rtsp
sampleconv
sendrecv
sepheaders
sessioninfo
sftpget
simple
simplepost
simplessl
smooth-gtk-thread
smtp-expn
smtp-mail
smtp-multi
smtp-ssl
smtp-tls
smtp-vrfy
synctime
threaded-ssl
url2file
usercertinmem
xmlstream

Download htmltitle.cpp

/***************************************************************************
 *                                  _   _ ____  _
 *  Project                     ___| | | |  _ \| |
 *                             / __| | | | |_) | |
 *                            | (__| |_| |  _ <| |___
 *                             \___|\___/|_| \_\_____|
 *
 * Copyright (C) 1998 - 2011, Daniel Stenberg, <daniel@haxx.se>, et al.
 *
 * This software is licensed as described in the file COPYING, which
 * you should have received as part of this distribution. The terms
 * are also available at http://curl.haxx.se/docs/copyright.html.
 *
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
 * copies of the Software, and permit persons to whom the Software is
 * furnished to do so, under the terms of the COPYING file.
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 * KIND, either express or implied.
 *
 ***************************************************************************/ 
// Get a web page, parse it with libxml.
//
// Written by Lars Nilsson
//
// GNU C++ compile command line suggestion (edit paths accordingly):
//
// g++ -Wall -I/opt/curl/include -I/opt/libxml/include/libxml2 htmltitle.cpp \ 
// -o htmltitle -L/opt/curl/lib -L/opt/libxml/lib -lcurl -lxml2

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <string>
#include <curl/curl.h>
#include <libxml/HTMLparser.h>
 
//
//  Case-insensitive string comparison
//

#ifdef _MSC_VER
#define COMPARE(a, b) (!stricmp((a), (b)))
#else
#define COMPARE(a, b) (!strcasecmp((a), (b)))
#endif
 
//
//  libxml callback context structure
//

struct Context
{
  Context(): addTitle(false) { }
 
  bool addTitle;
  std::string title;
};
 
//
//  libcurl variables for error strings and returned data

static char errorBuffer[CURL_ERROR_SIZE];
static std::string buffer;
 
//
//  libcurl write callback function
//

static int writer(char *data, size_t size, size_t nmemb,
                  std::string *writerData)
{
  if (writerData == NULL)
    return 0;
 
  writerData->append(data, size*nmemb);
 
  return size * nmemb;
}
 
//
//  libcurl connection initialization
//

static bool init(CURL *&conn, char *url)
{
  CURLcode code;
 
  conn = curl_easy_init();
 
  if (conn == NULL)
  {
    fprintf(stderr, "Failed to create CURL connection\n");
 
    exit(EXIT_FAILURE);
  }
 
  code = curl_easy_setopt(conn, CURLOPT_ERRORBUFFER, errorBuffer);
  if (code != CURLE_OK)
  {
    fprintf(stderr, "Failed to set error buffer [%d]\n", code);
 
    return false;
  }
 
  code = curl_easy_setopt(conn, CURLOPT_URL, url);
  if (code != CURLE_OK)
  {
    fprintf(stderr, "Failed to set URL [%s]\n", errorBuffer);
 
    return false;
  }
 
  code = curl_easy_setopt(conn, CURLOPT_FOLLOWLOCATION, 1L);
  if (code != CURLE_OK)
  {
    fprintf(stderr, "Failed to set redirect option [%s]\n", errorBuffer);
 
    return false;
  }
 
  code = curl_easy_setopt(conn, CURLOPT_WRITEFUNCTION, writer);
  if (code != CURLE_OK)
  {
    fprintf(stderr, "Failed to set writer [%s]\n", errorBuffer);
 
    return false;
  }
 
  code = curl_easy_setopt(conn, CURLOPT_WRITEDATA, &buffer);
  if (code != CURLE_OK)
  {
    fprintf(stderr, "Failed to set write data [%s]\n", errorBuffer);
 
    return false;
  }
 
  return true;
}
 
//
//  libxml start element callback function
//

static void StartElement(void *voidContext,
                         const xmlChar *name,
                         const xmlChar **attributes)
{
  Context *context = (Context *)voidContext;
 
  if (COMPARE((char *)name, "TITLE"))
  {
    context->title = "";
    context->addTitle = true;
  }
  (void) attributes;
}
 
//
//  libxml end element callback function
//

static void EndElement(void *voidContext,
                       const xmlChar *name)
{
  Context *context = (Context *)voidContext;
 
  if (COMPARE((char *)name, "TITLE"))
    context->addTitle = false;
}
 
//
//  Text handling helper function
//

static void handleCharacters(Context *context,
                             const xmlChar *chars,
                             int length)
{
  if (context->addTitle)
    context->title.append((char *)chars, length);
}
 
//
//  libxml PCDATA callback function
//

static void Characters(void *voidContext,
                       const xmlChar *chars,
                       int length)
{
  Context *context = (Context *)voidContext;
 
  handleCharacters(context, chars, length);
}
 
//
//  libxml CDATA callback function
//

static void cdata(void *voidContext,
                  const xmlChar *chars,
                  int length)
{
  Context *context = (Context *)voidContext;
 
  handleCharacters(context, chars, length);
}
 
//
//  libxml SAX callback structure
//

static htmlSAXHandler saxHandler =
{
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  StartElement,
  EndElement,
  NULL,
  Characters,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  cdata,
  NULL
};
 
//
//  Parse given (assumed to be) HTML text and return the title
//

static void parseHtml(const std::string &html,
                      std::string &title)
{
  htmlParserCtxtPtr ctxt;
  Context context;
 
  ctxt = htmlCreatePushParserCtxt(&saxHandler, &context, "", 0, "",
                                  XML_CHAR_ENCODING_NONE);
 
  htmlParseChunk(ctxt, html.c_str(), html.size(), 0);
  htmlParseChunk(ctxt, "", 0, 1);
 
  htmlFreeParserCtxt(ctxt);
 
  title = context.title;
}
 
int main(int argc, char *argv[])
{
  CURL *conn = NULL;
  CURLcode code;
  std::string title;
 
  // Ensure one argument is given

  if (argc != 2)
  {
    fprintf(stderr, "Usage: %s <url>\n", argv[0]);
 
    exit(EXIT_FAILURE);
  }
 
  curl_global_init(CURL_GLOBAL_DEFAULT);
 
  // Initialize CURL connection

  if (!init(conn, argv[1]))
  {
    fprintf(stderr, "Connection initializion failed\n");
 
    exit(EXIT_FAILURE);
  }
 
  // Retrieve content for the URL

  code = curl_easy_perform(conn);
  curl_easy_cleanup(conn);
 
  if (code != CURLE_OK)
  {
    fprintf(stderr, "Failed to get '%s' [%s]\n", argv[1], errorBuffer);
 
    exit(EXIT_FAILURE);
  }
 
  // Parse the (assumed) HTML code

  parseHtml(buffer, title);
 
  // Display the extracted title

  printf("Title: %s\n", title.c_str());
 
  return EXIT_SUCCESS;
}

You'll also find all examples in the distribution archive, in the docs/examples directory.

donate! Page updated March 30, 2014.
web site info

File upload with ASP.NET