使用 VC++ 获取网页源码(Unicode 版本)
要在 VC++ 中获取网页源码并正确处理 Unicode 编码,可以使用以下几种方法:

(图片来源网络,侵删)
使用 WinINet API
#include <windows.h>
#include <wininet.h>
#include <tchar.h>
#include <string>
#pragma comment(lib, "wininet.lib")
std::wstring GetWebPageSource(const std::wstring& url)
{
std::wstring result;
HINTERNET hInternet = NULL;
HINTERNET hConnect = NULL;
HINTERNET hRequest = NULL;
// 初始化 WinINet
hInternet = InternetOpen(L"Mozilla/5.0", INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL, 0);
if (hInternet)
{
// 连接到服务器
hConnect = InternetConnect(hInternet, L"example.com", INTERNET_DEFAULT_HTTP_PORT, NULL, NULL, INTERNET_SERVICE_HTTP, 0, 0);
if (hConnect)
{
// 打开请求
hRequest = HttpOpenRequest(hConnect, L"GET", L"/", NULL, NULL, NULL, 0, 0);
if (hRequest)
{
// 发送请求
if (HttpSendRequest(hRequest, NULL, 0, NULL, 0))
{
char buffer[4096];
DWORD bytesRead;
// 读取响应数据
while (InternetReadFile(hRequest, buffer, sizeof(buffer) - 1, &bytesRead) && bytesRead > 0)
{
buffer[bytesRead] = '\0';
// 转换为宽字符串(假设是UTF-8编码)
int len = MultiByteToWideChar(CP_UTF8, 0, buffer, -1, NULL, 0);
if (len > 0)
{
wchar_t* wbuffer = new wchar_t[len];
MultiByteToWideChar(CP_UTF8, 0, buffer, -1, wbuffer, len);
result += wbuffer;
delete[] wbuffer;
}
}
}
InternetCloseHandle(hRequest);
}
InternetCloseHandle(hConnect);
}
InternetCloseHandle(hInternet);
}
return result;
}
使用 WinHTTP API(更现代)
#include <windows.h>
#include <winhttp.h>
#include <string>
#include <vector>
#pragma comment(lib, "winhttp.lib")
std::wstring GetWebPageSourceWinHTTP(const std::wstring& url)
{
std::wstring result;
HINTERNET hSession = NULL;
HINTERNET hConnect = NULL;
HINTERNET hRequest = NULL;
// 解析URL
WCHAR szHostName[256] = {0};
WCHAR szUrlPath[256] = {0};
URL_COMPONENTS urlComp = {0};
urlComp.dwStructSize = sizeof(urlComp);
urlComp.lpszHostName = szHostName;
urlComp.dwHostNameLength = _countof(szHostName);
urlComp.lpszUrlPath = szUrlPath;
urlComp.dwUrlPathLength = _countof(szUrlPath);
if (!WinHttpCrackUrl(url.c_str(), url.length(), 0, &urlComp))
{
return L"";
}
// 初始化 WinHTTP
hSession = WinHttpOpen(L"WinHTTP Example", WINHTTP_ACCESS_TYPE_DEFAULT_PROXY,
WINHTTP_NO_PROXY_NAME, WINHTTP_NO_PROXY_BYPASS, 0);
if (hSession)
{
// 连接到服务器
hConnect = WinHttpConnect(hSession, szHostName, urlComp.nPort, 0);
if (hConnect)
{
// 打开请求
hRequest = WinHttpOpenRequest(hConnect, L"GET", szUrlPath, NULL,
WINHTTP_NO_REFERER, WINHTTP_DEFAULT_ACCEPT_TYPES,
WINHTTP_FLAG_SECURE);
if (hRequest)
{
// 发送请求
if (WinHttpSendRequest(hRequest, WINHTTP_NO_ADDITIONAL_HEADERS, 0,
WINHTTP_NO_REQUEST_DATA, 0, 0, 0))
{
// 接收响应
if (WinHttpReceiveResponse(hRequest, NULL))
{
DWORD dwSize = 0;
DWORD dwDownloaded = 0;
// 获取数据大小
dwSize = WinHttpQueryDataAvailable(hRequest, &dwDownloaded);
if (dwSize)
{
std::vector<char> buffer(dwDownloaded);
// 读取数据
if (WinHttpReadData(hRequest, buffer.data(), dwDownloaded, &dwDownloaded))
{
// 转换为宽字符串(假设是UTF-8编码)
int len = MultiByteToWideChar(CP_UTF8, 0, buffer.data(), -1, NULL, 0);
if (len > 0)
{
std::vector<wchar_t> wbuffer(len);
MultiByteToWideChar(CP_UTF8, 0, buffer.data(), -1, wbuffer.data(), len);
result = wbuffer.data();
}
}
}
}
}
WinHttpCloseHandle(hRequest);
}
WinHttpCloseHandle(hConnect);
}
WinHttpCloseHandle(hSession);
}
return result;
}
使用第三方库(如 libcurl)
如果不想使用 Windows 原生 API,可以使用跨平台的 libcurl 库:
#include <curl/curl.h>
#include <string>
static size_t WriteCallback(void* contents, size_t size, size_t nmemb, void* userp)
{
((std::string*)userp)->append((char*)contents, size * nmemb);
return size * nmemb;
}
std::string GetWebPageSourceCurl(const std::string& url)
{
CURL* curl;
CURLcode res;
std::string readBuffer;
curl = curl_easy_init();
if (curl) {
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &readBuffer);
res = curl_easy_perform(curl);
curl_easy_cleanup(curl);
}
return readBuffer;
}
注意事项
- 编码处理:网页可能使用不同的编码(UTF-8、GBK等),需要根据实际情况调整代码
- 错误处理:实际应用中需要添加更完善的错误处理
- 内存管理:注意释放分配的资源
- 线程安全:WinINet 不是线程安全的,在多线程应用中使用 WinHTTP 更合适
代码提供了基本的获取网页源码功能,可以根据实际需求进行扩展和优化。

(图片来源网络,侵删)
