/****************************************************************************** * Googler -- A command line interface to google. * * Copyright (C) 2001 Hal Duston * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the Free Software * * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * * ******************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #ifdef __GLIBC__ # include #endif #define countof(x) (sizeof(x)/sizeof(x[0])) struct options { int use_cache; int num_results; const char *exact_phrase; const char *one_word; const char *exclude_word; const char *filetype; const char *usage_rights; const char *safe_search; const char *language; const char *date; const char *occurs; const char *site; const char *proxy_name; int proxy_port; const char *authorization; const char *user_agent; size_t query_start; int display_url; char domain; char ft; }; static const char server_name[] = "www.google.com"; static const char program_name[] = "googler"; static const char version[] = "1.0"; int main(int argc, char * const argv[]); static const char *build_string(const struct options *opts, int argc, char * const argv[]); static const char *encode_string(const char *string); static const char *build_url(const struct options *opts, const char *encoded_string); static const char *build_request(const struct options *opts, const char *url); static char *query_google(const struct options *opts, const char *request); static FILE *open_connection(const struct options *opts); static const char *base64_encode(const char *input); static void display_result(const struct options *opts, char *result); static const struct options *get_options(int argc, char * const argv[]); static void option_help(void); static void option_version(void); int main(int argc, char * const argv[]) { const struct options *opts; const char *string; const char *encoded_string; const char *url; const char *request; char *result; if((opts = get_options(argc, argv)) != NULL) { if((string = build_string(opts, argc, argv)) != NULL) { if((encoded_string = encode_string(string)) != NULL) { if((url = build_url(opts, encoded_string)) != NULL) { if(opts->display_url != 1) { if((request = build_request(opts, url)) != NULL) { if((result = query_google(opts, request)) != NULL) { display_result(opts, result); free(result); } } } else { fprintf(stdout, "http://%s%s\n", server_name, url); } free(url); } free(encoded_string); } free(string); } } exit(0); return 0; } static const char *build_string(const struct options *opts, int argc, char * const argv[]) { size_t size = 256; size_t j = 0; char *string; size_t len; size_t i; if((string = malloc(size)) != NULL) { for(i = opts->query_start; i < (size_t)argc; ++i) { len = strlen(argv[i]); if(j + len >= size) { size += 256; if((string = realloc(string, size)) == NULL) { perror("realloc"); return NULL; } } memcpy(&string[j], argv[i], len); j += len; string[j] = ' '; ++j; } --j; string[j] = '\0'; } else { perror("malloc"); } return string ? realloc(string, j + 1) : string; } static const char *encode_string(const char *string) { size_t size = 256; size_t j = 0; char *encoded_string; size_t len; size_t i; int ch; if((encoded_string = malloc(size)) != NULL) { len = strlen(string); for(i = 0; i < len; ++i) { ch = string[i]; if(j >= size) { size += 256; if((encoded_string = realloc(encoded_string, size)) == NULL) { perror("realloc"); return NULL; } } switch(ch) { case ' ': encoded_string[j] = '+'; break; case '$': case '%': case '&': case '+': case ',': case '/': case ':': case ';': case '=': case '?': case '@': encoded_string[j] = '%'; ++j; encoded_string[j] = (ch >> 4) + (((ch >> 4) > 9) ? ('a' - 10) : '0'); ++j; encoded_string[j] = (ch & 0x0f) + (((ch & 0x0f) > 9) ? ('a' - 10) : '0'); break; default: encoded_string[j] = ch; break; } ++j; } encoded_string[j] = '\0'; } else { perror("malloc"); } return encoded_string ? realloc(encoded_string, j + 1) : encoded_string; } static const char *build_url(const struct options *opts, const char *encoded_string) { size_t len = 0; char *url; if((url = malloc(strlen(encoded_string) + 128)) != NULL) { sprintf(&url[len], "/search?as_q="); len += strlen(&url[len]); if(encoded_string != NULL) { sprintf(&url[len], "%s", encoded_string); len += strlen(&url[len]); } if(opts->num_results != 0) { sprintf(&url[len], "&num=%d", opts->num_results); len += strlen(&url[len]); } sprintf(&url[len], "&hl=%s", "en"); len += strlen(&url[len]); sprintf(&url[len], "&ie=%s", "ISO-8859-1"); len += strlen(&url[len]); sprintf(&url[len], "&btnG=%s", "Google+Search"); len += strlen(&url[len]); if(opts->exact_phrase != NULL) { sprintf(&url[len], "&as_epq=%s", opts->exact_phrase); len += strlen(&url[len]); } if(opts->one_word != NULL) { sprintf(&url[len], "&as_oq=%s", opts->one_word); len += strlen(&url[len]); } if(opts->exclude_word != NULL) { sprintf(&url[len], "&as_eq=%s", opts->exclude_word); len += strlen(&url[len]); } if(opts->filetype != NULL) { sprintf(&url[len], "&as_ft=%c&as_filetype=%s", opts->ft, opts->filetype); len += strlen(&url[len]); } if(opts->date != NULL) { sprintf(&url[len], "&as_qdr=%s", opts->date); len += strlen(&url[len]); } if(opts->occurs != NULL) { sprintf(&url[len], "&as_occt=%s", opts->occurs); len += strlen(&url[len]); } if(opts->site) { sprintf(&url[len], "&as_dt%c=&as_sitesearch=%s", opts->domain, opts->site); len += strlen(&url[len]); } if(opts->usage_rights != NULL) { sprintf(&url[len], "&as_rights=%s", opts->usage_rights); len += strlen(&url[len]); } if(opts->safe_search != NULL) { sprintf(&url[len], "&safe=%s", opts->safe_search); len += strlen(&url[len]); } if(opts->language != NULL) { sprintf(&url[len], "&lr=lang_%s", opts->language); len += strlen(&url[len]); } } else { perror("malloc"); } return url ? realloc(url, len + 1) : url; } static const char *build_request(const struct options *opts, const char *url) { const char *authorization; char *request; size_t len; len = 0; if((request = malloc(strlen(url) + 256)) != NULL) { if(opts->proxy_name) { sprintf(&request[len], "GET http://%s%s HTTP/1.0\r\n", server_name, url); } else { sprintf(&request[len], "GET %s HTTP/1.0\r\n", url); } len += strlen(&request[len]); sprintf(&request[len], "Host: %s\r\n", server_name); len += strlen(&request[len]); sprintf(&request[len], "Accept: %s\r\n", "text/html, text/plain, text/sgml, */*"); len += strlen(&request[len]); sprintf(&request[len], "Accept-Language: %s\r\n", opts->language ? opts->language : "en"); len += strlen(&request[len]); if(opts->proxy_name) { sprintf(&request[len], "Proxy-Connection: %s\r\n", "Keep-Alive"); len += strlen(&request[len]); } if(opts->authorization) { if((authorization = base64_encode(opts->authorization)) != NULL) { sprintf(&request[len], "Authorization: %s %s\r\n", "Basic", authorization); len += strlen(&request[len]); } } sprintf(&request[len], "Pragma: %s\r\n", "No-Cache"); len += strlen(&request[len]); if(opts->user_agent) { sprintf(&request[len], "User-Agent: %s\r\n", opts->user_agent); } else { sprintf(&request[len], "User-Agent: %s/%s\r\n", program_name, version); } len += strlen(&request[len]); } else { perror("malloc"); } return request ? realloc(request, len + 1) : request; } static char *query_google(const struct options *opts, const char *request) { char *result = NULL; size_t size = 8192; size_t len = 0; FILE *sockfile; if((sockfile = open_connection(opts)) != NULL) { if(fprintf(sockfile, "%s\r\n", request) > 0) { if(!fflush(sockfile)) { if((result = malloc(size)) != NULL) { len = fread(&result[len], 1, size - len, sockfile); if(!ferror(sockfile)) { while(!feof(sockfile)) { len += fread(&result[len], 1, size - len, sockfile); if(!ferror(sockfile)) { if(len >= size - 8192) { size += 8192; if((result = realloc(result, size)) == NULL) { perror("realloc"); return NULL; } } else { perror("fread"); break; } } } } else { perror("fread"); } if(fclose(sockfile)) { perror("fclose"); } result[len] = '\0'; } else { perror("malloc"); } } else { perror("fflush"); } } else { perror("fprintf"); } } return realloc(result, len); } static FILE *open_connection(const struct options *opts) { struct hostent *server_host; struct sockaddr_in sa_in; const char *hostname; FILE *sockfile = NULL; u_long addr; u_short port; int sock; if(opts->proxy_name != NULL) { hostname = opts->proxy_name; port = opts->proxy_port; } else { hostname = server_name; port = 80; } if((server_host = gethostbyname(hostname)) != NULL || (((addr = inet_addr(hostname)) > 0) && (server_host = gethostbyaddr((char *)&addr, sizeof (addr), AF_INET)) != NULL)) { if((sock = socket(PF_INET, SOCK_STREAM, 0)) >= 0) { memset(&sa_in, 0, sizeof(sa_in)); sa_in.sin_family = AF_INET; sa_in.sin_port = htons(port); memcpy(&sa_in.sin_addr.s_addr, server_host->h_addr, 4); if(!connect(sock, (struct sockaddr *)&sa_in, sizeof(sa_in))) { if((sockfile = fdopen(sock, "r+")) == NULL) { perror("fdopen"); } } else { perror("connect"); } } else { perror("socket"); } } else { perror("gethostby"); } return sockfile; } static const char *base64_encode(const char *input) { static char base64_alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; char *output; char *ptr; size_t len; size_t i; len = strlen(input); if((output = malloc((len * 8) / 6 + 1)) != NULL) { memset(output, '=', (len * 8) / 6); output[(len * 8) / 6] = '\0'; ptr = output; for(i = 0; i < len; i += 3) { if(ptr[0] == '\0') { break; } ptr[0] = base64_alphabet[input[i] >> 2]; if(ptr[1] == '\0') { break; } ptr[1] = base64_alphabet[((input[i] & 0x03) << 4) | (input[i + 1] >> 4)]; if(ptr[2] == '\0') { break; } ptr[2] = base64_alphabet[((input[i + 1] & 0x0f) << 2) | (input[i + 2] >> 6)]; if(ptr[3] == '\0') { break; } ptr[3] = base64_alphabet[(input[i + 2] & 0x3f)]; ptr += 4; } switch(i - len) { case 2: { ptr[-2] = '='; /* NO BREAK */ } case 1: { ptr[-1] = '='; break; } } ptr[0] = '\0'; } else { perror("malloc"); } return output; } static void display_result(const struct options *opts, char *result) { const char *pattern; size_t errbuf_size; char *errbuf; regex_t reg; regmatch_t match[2]; regoff_t rm_eo; int errcode; char *start; char *end; if(opts->use_cache) { pattern = "]*\\)>Cached"; } else { pattern = "]*\\)>"; } /* Only show one hit per site by default. */ start = result; while((start = strstr(start, "
")) != NULL) { if((end = strstr(start, "
")) != NULL) { memmove(start, end, strlen(&end[12])); } } if(!(errcode = regcomp(®, pattern, REG_ICASE))) { rm_eo = 0; while(!(errcode = regexec(®, &result[rm_eo], 2, match, 0))) { if(match[1].rm_eo >= 0) { printf("%.*s\n", (int)(match[1].rm_eo - match[1].rm_so), &result[rm_eo + match[1].rm_so]); } rm_eo += match[0].rm_eo; } if(errcode != 0) { if(errcode != REG_NOMATCH) { errbuf_size = regerror(errcode, ®, NULL, 0); if((errbuf = malloc(errbuf_size)) != NULL) { regerror(errcode, ®, errbuf, errbuf_size); fprintf(stderr, "regexec: %s\n", errbuf); free(errbuf); } else { perror("malloc"); } } } regfree(®); } else { errbuf_size = regerror(errcode, ®, NULL, 0); if((errbuf = malloc(errbuf_size)) != NULL) { regerror(errcode, ®, errbuf, errbuf_size); fprintf(stderr, "regcomp: %s\n", errbuf); free(errbuf); } else { perror("malloc"); } } } static const struct options *get_options(int argc, char * const argv[]) { static const char *langs[] = { "ar", "bg", "ca", "zh-CN", "zh-TW", "hr", "cs", "da", "nl", "en", "et", "fi", "fr", "de", "el", "iw", "hu", "is", "id", "it", "ja", "ko", "lv", "lt", "no", "pl", "pt", "ro", "ru", "sr", "sk", "sl", "es", "sv", "tr", }; static struct options opts; static const char short_opt[] = ":?VZn:l:f:o:d:i:e:L:Scu:p:a:"; #if defined(__GLIBC__) static const struct option long_options[] = { { "help", no_argument, NULL, '?' }, { "version", no_argument, NULL, 'V' }, { "url", no_argument, NULL, 'Z' }, { "numresults", required_argument, NULL, 'n' }, { "language", required_argument, NULL, 'l' }, { "filetype", required_argument, NULL, 'f' }, { "occurs", required_argument, NULL, 'o' }, { "date", required_argument, NULL, 'd' }, { "include", required_argument, NULL, 'i' }, { "exclude", required_argument, NULL, 'e' }, { "license", required_argument, NULL, 'L' }, { "safesearch", no_argument, NULL, 'S' }, { "cache", no_argument, NULL, 'c' }, { "useragent", required_argument, NULL, 'u' }, { "proxy", required_argument, NULL, 'p' }, { "auth", required_argument, NULL, 'a' }, { NULL, 0, NULL, '\0' } }; int option_index = 0; #endif const struct options *opts_ptr = &opts; int opt_char; size_t i; opterr = 0; #ifdef __GLIBC__ while((opt_char = getopt_long(argc, argv, short_opt, long_options, &option_index)) != EOF) #else while((opt_char = getopt(argc, argv, short_opt)) != EOF) #endif { switch(opt_char) { case 0: { break; } case ':': case '?': { if(opt_char == '?' && optopt != '?') { fprintf(stderr, "Invalid option %s\n", argv[optind - 1]); } else if(opt_char == ':') { fprintf(stderr, "Invalid option value %s\n", argv[optind - 1]); } option_help(); opts_ptr = NULL; break; } case 'V': { option_version(); opts_ptr = NULL; break; } case 'Z': { opts.display_url = 1; break; } case 'n': { opts.num_results = atoi(optarg); break; } case 'l': { for(i = 0; i < countof(langs); ++i) { if(!strcmp(optarg, langs[i])) { opts.language = optarg; } } break; } case 'o': { if(optarg[0] == 'a') { opts.occurs = "all"; } else if(optarg[0] == 't') { opts.occurs = "title"; } else if(optarg[0] == 'u') { opts.occurs = "url"; } else if(optarg[0] == 'l') { opts.occurs = "links"; } else { fprintf(stderr, "Invalid option value %s\n", argv[optind - 1]); } break; } case 'd': { if(optarg[0] == '3') { opts.date = "3m"; } else if(optarg[0] == '6') { opts.date = "6m"; } else if(optarg[0] == 'y') { opts.date = "y"; } else { fprintf(stderr, "Invalid option value %s\n", argv[optind - 1]); } break; } case 'i': case 'e': { opts.domain = opt_char; opts.site = optarg; break; } case 'L': { if(optarg[0] == 'n' || !strcmp(optarg, "none")) { opts.usage_rights = ""; } else if(optarg[0] == 'f' || !strcmp(optarg, "free")) { opts.usage_rights = "(cc_publicdomain|cc_attribute|cc_sharealike|cc_noncommercial|cc_nonderived)"; } else if(optarg[0] == 'c' || !strcmp(optarg, "commercial")) { opts.usage_rights = "(cc_publicdomain|cc_attribute|cc_sharealike|cc_nonderived).-(cc_noncommercial)"; } else if(optarg[0] == 'm' || !strcmp(optarg, "modify")) { opts.usage_rights = "(cc_publicdomain|cc_attribute|cc_sharealike|cc_noncommercial).-(cc_nonderived)"; } else if(optarg[0] == 's' || !strcmp(optarg, "sell")) { opts.usage_rights = "(cc_publicdomain|cc_attribute|cc_sharealike).-(cc_noncommercial|cc_nonderived)"; } else { fprintf(stderr, "Invalid option value %s\n", argv[optind - 1]); } break; } case 'S': { opts.safe_search = "active"; break; } case 'p': { char *proxy_sep; proxy_sep = strchr(optarg, ':'); if(proxy_sep != NULL) { opts.proxy_name = optarg; *proxy_sep = '\0'; opts.proxy_port = atoi(&proxy_sep[1]); } else { fprintf(stderr, "Invalid option value %s\n", argv[optind - 1]); } break; } case 'a': { char *auth_sep; auth_sep = strchr(optarg, ':'); if(auth_sep != NULL) { opts.authorization = optarg; } else { fprintf(stderr, "Invalid option value %s\n", argv[optind - 1]); } break; } case 'c': { opts.use_cache = 1; break; } case 'u': { opts.user_agent = optarg; break; } default: { break; } } } opts.query_start = optind; return opts_ptr; } static void option_help(void) { printf( "Usage: %s [OPTION] ...\n" "\n" #ifdef __GLIBC__ " -n n, --numresults n Limit number of results to n\n" " -l xx, --language xx Restrict query to language xx\n" " -f xx, --filetype xx Only return results of the filetype xx\n" " -d --date Page updated anytime\n" " 3, 3 in the last 3 months\n" " 6, 6 in the last 6 months\n" " y, y in the last year\n" " -o a, --occurs any Occurs anwhere in the page\n" " t, title in the title of the page\n" " u, url in the url of the page\n" " l, links in links to the page\n" " -i xx, --include xx Only return results from site xx\n" " -e xx, --exclude xx Don't return results from site xx\n" " -L n, --license none Return results that are not filtered by license\n" " f, free free to use or share\n" " c, commerical free to use or share, even commercially\n" " m, modify free to use or share or modify\n" " s, sell free to use or share or modify, even commercially\n" " -S, --safesearch SafeSearch\n" " -c, --cache Return Google's cache url.\n" " -u xx, --useragent xx User agent to report to google\n" " -p xx:nn, --proxy xx:nn Proxy server is xx and port is nn\n" " -a id:pw, --auth id:pw Authenticate with user id and password pw\n" /* Standard options */ " -?, --help Display this help and exit\n" " -V, --version Output version information and exit\n" #else " -n n Limit number of results to n\n" " -l xx Restrict query to language xx\n" " -f xx, Only return results of the filetype xx\n" " -d Page updated anytime\n" " 3 in the last 3 months\n" " 6 in the last 6 months\n" " y in the last year\n" " -o a Occurs anwhere in the page\n" " t in the title of the page\n" " u in the url of the page\n" " l in links to the page\n" " -i xx Only return results from site xx\n" " -e xx Don't return results from site xx\n" " -L n, Return results that are not filtered by license\n" " f, free to use or share\n" " c, free to use or share, even commercially\n" " m, free to use or share or modify\n" " s, free to use or share or modify, even commercially\n" " -S, SafeSearch\n" " -c Return Google's cache url.\n" " -u xx User agent to report to google\n" " -p xx:nn Proxy server is xx and port is nn\n" " -a id:pw Authenticate with user id and password pw\n" /* Standard options */ " -? Display this help and exit\n" " -V Output version information and exit\n" #endif "\n" "Report bugs to hald@sound.net\n", program_name); } static void option_version(void) { printf("%s %s\n", program_name, version); }