Skip to content

Commit

Permalink
feat: 从站点地图中提取链接 #3
Browse files Browse the repository at this point in the history
  • Loading branch information
renfei committed Sep 24, 2024
1 parent 71d5071 commit ee0ad6c
Show file tree
Hide file tree
Showing 6 changed files with 147 additions and 30 deletions.
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,20 @@ java -jar Indexing.jar
> ```
> ~~其中```d182b3f28525f2d3wgacfbs36e696dba```就是要取出的Token。~~
### 关于从站点地图中提取链接
此功能在```1.0.3```版本中加入。需求来自:[github.com/renfei/Indexing/issues/3](https://github.com/renfei/Indexing/issues/3)
需要注意:Indexing 接口的本意是:新内容产生时,及时通知搜索引擎爬取,确保新内容的及时收录。
网站地图包含了全站连接,其中包括陈旧的内容,所以会产生以下问题:
- Indexing 接口滥用,因为提交的并不是新产生的内容,包含大量陈旧可能违反搜索引擎的用户使用协议,可能导致接口权限被收回
- 全站链接数量过大,部分接口每天每月有使用限额,一口气提交会导致接口使用额度耗尽
- 搜索引擎本就会爬取站点地图,重复提交里面的内容并不会有什么特殊的处理
请合理使用您的 API 资源,避免滥用被搜索引擎封禁 API 权限。
### 代码仓库
- Gitee:[https://gitee.com/rnf/Indexing](https://gitee.com/rnf/Indexing)
- Github:[https://github.com/renfei/Indexing](https://github.com/renfei/Indexing)
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

<groupId>net.renfei</groupId>
<artifactId>Indexing</artifactId>
<version>1.0.2</version>
<version>1.0.3</version>
<name>Indexing</name>
<url>https://www.renfei.net/kitbox/indexing</url>
<description>Indexing - 搜索引擎推送工具 - SEO 工具箱</description>
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/net/renfei/indexing/Application.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
*/
public class Application {
public static final MainWindow MAIN_WINDOW = new MainWindow();
public static final String VERSION = "1.0.2";
public static final String VERSION = "1.0.3";
public static void main(String[] args) {
javax.swing.JFrame frame = new javax.swing.JFrame("Indexing - 搜索引擎推送工具 - SEO 工具箱");
frame.setContentPane(MAIN_WINDOW.mainPanel);
frame.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE);
frame.pack();
frame.setSize(1000, 618);
frame.setSize(1000, 718);
frame.setLocationRelativeTo(null);
frame.setJMenuBar(new MenuBar());
MAIN_WINDOW.init();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package net.renfei.indexing.service;

import net.renfei.indexing.ui.MainWindow;
import net.renfei.sdk.utils.BeanUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.util.HashSet;
import java.util.Set;

public class ExtractSitemapService implements Runnable {
private MainWindow mainWindow;

public ExtractSitemapService(MainWindow mainWindow) {
this.mainWindow = mainWindow;
}

@Override
public void run() {
String site = mainWindow.siteUrl.getText();
if (BeanUtils.isEmpty(site)) {
mainWindow.setLog("【站点URL】不能为空。");
return;
}
if (site.endsWith("/")) {
site += "sitemap.xml";
} else {
site += "/sitemap.xml";
}
mainWindow.setLog("访问 " + site);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
try {
DocumentBuilder db = dbf.newDocumentBuilder();
Document document = db.parse(site);
NodeList sitemapList = document.getElementsByTagName("sitemap");
if (sitemapList.getLength() > 0) {
mainWindow.setLog("检测到站点地图集合");
for (int i = 0; i < sitemapList.getLength(); i++) {
Node item = sitemapList.item(i);
NodeList childNodes = item.getChildNodes();
for (int j = 0; j < childNodes.getLength(); j++) {
String nodeName = childNodes.item(j).getTextContent().trim();
if (!nodeName.isEmpty() && "loc".equalsIgnoreCase(childNodes.item(j).getNodeName())) {
getUrl(nodeName);
}
}
}
} else {
getUrl(site);
}
mainWindow.setLog("执行结束");
} catch (Exception e) {
mainWindow.setLog("\n[!] 发生错误:\r\n" + e.getMessage() + "\r\n如果您认为不是您的错误,请联系开发者:[email protected]\r\n");
}
}

private void getUrl(String url) {
mainWindow.setLog("发现网站地图:" + url);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
try {
DocumentBuilder db = dbf.newDocumentBuilder();
Document document = db.parse(url);
NodeList sitemapList = document.getElementsByTagName("url");
for (int i = 0; i < sitemapList.getLength(); i++) {
Node item = sitemapList.item(i);
NodeList childNodes = item.getChildNodes();
for (int j = 0; j < childNodes.getLength(); j++) {
String nodeName = childNodes.item(j).getTextContent().trim();
if ("loc".equalsIgnoreCase(childNodes.item(j).getNodeName())) {
mainWindow.urls.append("\r\n" + nodeName);
}
}
}
} catch (Exception e) {
mainWindow.setLog("\n[!] 发生错误:\r\n" + e.getMessage() + "\r\n如果您认为不是您的错误,请联系开发者:[email protected]\r\n");
}
}
}
28 changes: 18 additions & 10 deletions src/main/java/net/renfei/indexing/ui/MainWindow.form
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
</scrollpane>
</children>
</splitpane>
<grid id="b0205" layout-manager="GridLayoutManager" row-count="16" column-count="2" same-size-horizontally="false" same-size-vertically="false" hgap="2" vgap="4">
<grid id="b0205" layout-manager="GridLayoutManager" row-count="17" column-count="2" same-size-horizontally="false" same-size-vertically="false" hgap="2" vgap="4">
<margin top="0" left="0" bottom="0" right="0"/>
<constraints>
<splitpane position="left"/>
Expand Down Expand Up @@ -92,7 +92,7 @@
</component>
<component id="df307" class="javax.swing.JLabel">
<constraints>
<grid row="8" column="0" row-span="1" col-span="1" vsize-policy="0" hsize-policy="0" anchor="8" fill="0" indent="0" use-parent-layout="false">
<grid row="9" column="0" row-span="1" col-span="1" vsize-policy="0" hsize-policy="0" anchor="8" fill="0" indent="0" use-parent-layout="false">
<preferred-size width="30" height="16"/>
</grid>
</constraints>
Expand All @@ -102,7 +102,7 @@
</component>
<component id="ae66c" class="javax.swing.JCheckBox" binding="chkBaiduPuTong">
<constraints>
<grid row="8" column="1" row-span="1" col-span="1" vsize-policy="0" hsize-policy="3" anchor="8" fill="0" indent="0" use-parent-layout="false">
<grid row="9" column="1" row-span="1" col-span="1" vsize-policy="0" hsize-policy="3" anchor="8" fill="0" indent="0" use-parent-layout="false">
<preferred-size width="30" height="21"/>
</grid>
</constraints>
Expand All @@ -113,7 +113,7 @@
</component>
<component id="3df7d" class="javax.swing.JCheckBox" binding="chkBaiDuKuiSu">
<constraints>
<grid row="9" column="1" row-span="1" col-span="1" vsize-policy="0" hsize-policy="3" anchor="8" fill="0" indent="0" use-parent-layout="false">
<grid row="10" column="1" row-span="1" col-span="1" vsize-policy="0" hsize-policy="3" anchor="8" fill="0" indent="0" use-parent-layout="false">
<preferred-size width="30" height="21"/>
</grid>
</constraints>
Expand All @@ -123,7 +123,7 @@
</component>
<component id="3631d" class="javax.swing.JCheckBox" binding="chkGoogle">
<constraints>
<grid row="11" column="1" row-span="1" col-span="1" vsize-policy="0" hsize-policy="3" anchor="8" fill="0" indent="0" use-parent-layout="false">
<grid row="12" column="1" row-span="1" col-span="1" vsize-policy="0" hsize-policy="3" anchor="8" fill="0" indent="0" use-parent-layout="false">
<preferred-size width="30" height="21"/>
</grid>
</constraints>
Expand All @@ -133,7 +133,7 @@
</component>
<component id="3df5f" class="javax.swing.JButton" binding="execButton">
<constraints>
<grid row="13" column="1" row-span="1" col-span="1" vsize-policy="0" hsize-policy="3" anchor="0" fill="1" indent="0" use-parent-layout="false">
<grid row="14" column="1" row-span="1" col-span="1" vsize-policy="0" hsize-policy="3" anchor="0" fill="1" indent="0" use-parent-layout="false">
<preferred-size width="30" height="30"/>
</grid>
</constraints>
Expand All @@ -153,7 +153,7 @@
</component>
<component id="3d3d7" class="javax.swing.JLabel">
<constraints>
<grid row="14" column="0" row-span="1" col-span="1" vsize-policy="0" hsize-policy="0" anchor="8" fill="0" indent="0" use-parent-layout="false">
<grid row="15" column="0" row-span="1" col-span="1" vsize-policy="0" hsize-policy="0" anchor="8" fill="0" indent="0" use-parent-layout="false">
<preferred-size width="30" height="16"/>
</grid>
</constraints>
Expand All @@ -163,7 +163,7 @@
</component>
<component id="7e77b" class="javax.swing.JCheckBox" binding="chkBing">
<constraints>
<grid row="10" column="1" row-span="1" col-span="1" vsize-policy="0" hsize-policy="3" anchor="8" fill="0" indent="0" use-parent-layout="false">
<grid row="11" column="1" row-span="1" col-span="1" vsize-policy="0" hsize-policy="3" anchor="8" fill="0" indent="0" use-parent-layout="false">
<preferred-size width="30" height="21"/>
</grid>
</constraints>
Expand Down Expand Up @@ -209,7 +209,7 @@
</component>
<component id="56fab" class="javax.swing.JTextArea" binding="explain">
<constraints>
<grid row="15" column="1" row-span="1" col-span="1" vsize-policy="6" hsize-policy="6" anchor="0" fill="3" indent="0" use-parent-layout="false">
<grid row="16" column="1" row-span="1" col-span="1" vsize-policy="6" hsize-policy="6" anchor="0" fill="3" indent="0" use-parent-layout="false">
<preferred-size width="30" height="50"/>
</grid>
</constraints>
Expand Down Expand Up @@ -260,14 +260,22 @@
</component>
<component id="8db76" class="javax.swing.JCheckBox" binding="chkSo">
<constraints>
<grid row="12" column="1" row-span="1" col-span="1" vsize-policy="0" hsize-policy="3" anchor="8" fill="0" indent="0" use-parent-layout="false">
<grid row="13" column="1" row-span="1" col-span="1" vsize-policy="0" hsize-policy="3" anchor="8" fill="0" indent="0" use-parent-layout="false">
<preferred-size width="30" height="21"/>
</grid>
</constraints>
<properties>
<text value="360搜索"/>
</properties>
</component>
<component id="852d4" class="javax.swing.JButton" binding="extractSitemapButtonButton">
<constraints>
<grid row="7" column="1" row-span="1" col-span="1" vsize-policy="0" hsize-policy="3" anchor="0" fill="1" indent="0" use-parent-layout="false"/>
</constraints>
<properties>
<text value="从站点地图中提取链接"/>
</properties>
</component>
</children>
</grid>
</children>
Expand Down
48 changes: 31 additions & 17 deletions src/main/java/net/renfei/indexing/ui/MainWindow.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

import com.intellij.uiDesigner.core.GridConstraints;
import com.intellij.uiDesigner.core.GridLayoutManager;
import com.intellij.uiDesigner.core.Spacer;
import net.renfei.indexing.entity.ConfigVO;
import net.renfei.indexing.service.ConfigFileService;
import net.renfei.indexing.service.ExecService;
import net.renfei.indexing.service.ExtractSitemapService;
import net.renfei.sdk.utils.BeanUtils;
import net.renfei.sdk.utils.DateUtils;

Expand All @@ -17,6 +17,8 @@
import java.util.ArrayList;
import java.util.List;

import static javax.swing.JOptionPane.WARNING_MESSAGE;

/**
* 主窗体
*
Expand All @@ -42,29 +44,41 @@ public class MainWindow {
public JCheckBox saveConfig;
public JTextField soToken;
public JCheckBox chkSo;
private JButton extractSitemapButtonButton;

public void init() {
urlsScroPane.setVerticalScrollBarPolicy(JScrollPane.VERTICAL_SCROLLBAR_AS_NEEDED);
logsScroPane.setVerticalScrollBarPolicy(JScrollPane.VERTICAL_SCROLLBAR_AS_NEEDED);
MainWindow mainWindow = this;
execButton.addActionListener(new ActionListener() {
@Override
public void actionPerformed(ActionEvent e) {
setLog("开始执行");
Thread execService = new Thread(new ExecService(mainWindow), "ExecService");
execService.start();
execButton.addActionListener(e -> {
setLog("开始执行");
Thread execService = new Thread(new ExecService(mainWindow), "ExecService");
execService.start();
});
googleJson.addActionListener(e -> {
JFileChooser fc = new JFileChooser("/");
int val = fc.showOpenDialog(null);
if (val == JFileChooser.APPROVE_OPTION) {
googleJson.setText(fc.getSelectedFile().getPath());
} else {
googleJson.setText("点击选择JSON文件");
}
});
googleJson.addActionListener(new ActionListener() {
@Override
public void actionPerformed(ActionEvent e) {
JFileChooser fc = new JFileChooser("/");
int val = fc.showOpenDialog(null);
if (val == JFileChooser.APPROVE_OPTION) {
googleJson.setText(fc.getSelectedFile().getPath());
} else {
googleJson.setText("点击选择JSON文件");
}
extractSitemapButtonButton.addActionListener(e -> {
int opt = JOptionPane.showConfirmDialog(extractSitemapButtonButton,
"点击确认将从网站地图文件(sitemap.xml)中提取链接,但是请注意:\n\n" +
"Indexing 接口的本意是:新内容产生时,及时通知搜索引擎爬取,确保新内容的及时收录。\n" +
"网站地图包含了全站连接,其中包括陈旧的内容,所以会产生以下问题:\n\n" +
"1.Indexing 接口滥用,因为提交的并不是新产生的内容,包含大量陈旧可能违反搜索引擎的用户使用协议,可能导致接口权限被收回\n" +
"2.全站链接数量过大,部分接口每天每月有使用限额,一口气提交会导致接口使用额度耗尽\n\n" +
"请知晓以上说明,自行合理的使用 API。",
"从站点地图中提取链接",
JOptionPane.YES_NO_OPTION, WARNING_MESSAGE);
if (opt == JOptionPane.YES_OPTION) {
//确认继续操作
setLog("从站点地图中提取链接");
Thread execService = new Thread(new ExtractSitemapService(mainWindow), "ExtractSitemapService");
execService.start();
}
});
ConfigVO configVO = ConfigFileService.getConfig();
Expand Down

0 comments on commit ee0ad6c

Please sign in to comment.