From 5e387196451feef9fc4f0b0bebb669fa23107c00 Mon Sep 17 00:00:00 2001 From: Henry Date: Sat, 9 Feb 2019 04:28:11 +0000 Subject: [PATCH 1/3] feat: add ftchinese parser --- fixtures/www.ftchinese.com/1549682835827.html | 558 ++++++++++++++++++ src/extractors/custom/index.js | 1 + .../custom/www.ftchinese.com/index.js | 44 ++ .../custom/www.ftchinese.com/index.test.js | 107 ++++ 4 files changed, 710 insertions(+) create mode 100644 fixtures/www.ftchinese.com/1549682835827.html create mode 100644 src/extractors/custom/www.ftchinese.com/index.js create mode 100644 src/extractors/custom/www.ftchinese.com/index.test.js diff --git a/fixtures/www.ftchinese.com/1549682835827.html b/fixtures/www.ftchinese.com/1549682835827.html new file mode 100644 index 000000000..648208c18 --- /dev/null +++ b/fixtures/www.ftchinese.com/1549682835827.html @@ -0,0 +1,558 @@ + + + + + + +欧盟否决西门子-阿尔斯通铁路业务合并案 - - FT中文网 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+ +
+ +
+
+
+ + +
+
+
+
+
+
+
+
+
登录×
+ +
+
+
+
+
+
+
+
+
+ + + + +
+
+
+
+
+
+ +
+
+
+
+

+为您推荐 +

+
    +
+
+
+
+
+
+
+
+
反垄断
+

欧盟否决西门子-阿尔斯通铁路业务合并案

+
欧盟反垄断专员没有接受西门子和阿尔斯通提出的合并理由,即这次合并对于欧洲企业与中国国有竞争对手展开竞争是必要的。
+
+
+
+
+ + + + + + + +
+ +
+ + + + +
+
+
+ +
+ + +

德国和法国铁路设备制造商西门子(Siemens)与阿尔斯通(Alstom)之间的合并已被欧盟竞争执法机构否决,监管者不接受这样一个观点,即这一联姻对于欧洲与由国家支持的中国竞争对手竞争是必要的。

欧盟竞争事务专员玛格丽特•维斯特格(Margrethe Vestager)周三宣布了这项否决。此前,其他欧盟专员以及各国监管机构支持她的立场,即这笔交易可能导致价格上涨并限制选择,最终可能有损欧洲乘客的利益。

“如果没有足够的补救措施,这一合并将导致保证乘客安全以及用于下一代高速列车的信号系统价格上涨,”维斯特格表示。她补充称:“两家公司不愿意应对我们严重的竞争担忧。”

这一否决将加剧一场长期辩论,焦点问题是欧盟严格的反垄断规则是否需要更新,以便让欧盟企业与由国家支持的中国企业展开竞争。在各自政府的支持下,西门子和阿尔斯通表示,合并其铁路业务对于抵御来自中国中车(CRRC)的竞争是必要的;后者是世界最大的铁路车辆制造商。

然而,竞争官员们得出结论认为,中国企业不太可能很快在欧洲销售火车,这意味着欧盟严格的反垄断规则要求这两家公司出售可观的火车和信号资产。

法国财政部长布鲁诺•勒梅尔(Bruno Le Maire)形容维斯特格的决定“是一个政治错误:欧盟委员会的职责是捍卫欧洲的经济利益……拒绝阿尔斯通与西门子合并将有利于中国的经济和产业利益”。这些情绪得到德国经济部长彼得•阿尔特迈尔(Peter Altmaier)的呼应,他呼吁修改欧盟竞争规则,使大型并购交易变得更加容易。

西门子周三表示了失望。“保护本地顾客利益绝不能意味着欧洲无法与中国、美国等领先国家同处一个公平的竞争环境,”该公司首席执行官凯飒(Joe Kaeser)表示。

阿尔斯通表示,这对于欧洲工业是一个“明显的挫折”。阿尔斯通首席执行官亨利•波帕-拉法基(Henri Poupart-Lafarge)表示:“如果说我有一个遗憾,那就是我用了‘欧洲冠军企业’这个短语。”

然而,英国、西班牙、荷兰和比利时的国家监管机构一致公开表示这两家公司提议的补救措施远远不够,并对这一否决表示欢迎。负责监管英国铁路的英国铁路监管办公室(Office of Rail Regulation)表示:“我们从一开始就明确表示,这次(合并)对英国乘客、货运客户和纳税人来说都是一笔糟糕的交易。”

智库布鲁盖尔(Bruegel)的访问研究员马修•海姆(Mathew Heim)说:“支持欧洲冠军企业不是欧洲竞争法律的职能。

“相反,欧洲需要一项连贯的计划,以培育在全球竞争环境下能够蓬勃发展、而无需诉诸保护主义的关键欧洲产业。”

西门子-阿尔斯通提议合并之际,欧洲正处于一个政治过渡期。今年5月的欧盟选举,以及今年布鲁塞尔领导层的变动,可能带来改革竞争政策的机会。

欧洲反垄断主管承认中国国有企业构成经济威胁,同时为竞争规则做了辩护——她辩称,这些规则帮助欧盟公司保持创新能力和国际竞争力。

维斯特格指出,欧盟有其他手段来制定产业政策,如外来投资筛查、反补贴与贸易防御措施,以及推动封闭的市场提供互惠准入。

阿尔斯通的波帕-拉法基周二告诉《费加罗报》(Le Figaro),不会第二次尝试合并,两家公司今后将各走各的路。

帕特里克•麦吉(Patrick McGee)法兰克福、戴维•基奥恩(David Keohane)巴黎补充报道

译者/和风

+
+
+ +
+
+
+版权声明:本文版权归FT中文网所有,未经允许任何单位或个人不得转载,复制或以任何其他方式使用本文全部或部分,侵权必究。 +
+
+
+

读者评论

+
+
+ +
+
+
+
用户名
+ +
密码
+ + +
+ +
+
+ + +
+
+
+
+
+
+
+ +
+

相关文章

+ +
+
+
+
+

相关话题

+ +
+
+
+
+
+ + + +
+
+
+
+
+
+
设置字号×
+ +
最小
+
较小
+
默认
+
较大
+
最大
+
+
+
+
+
+
+
+
+
+
+
+
分享×
+ +
+
+
+
+
+ + + + + + \ No newline at end of file diff --git a/src/extractors/custom/index.js b/src/extractors/custom/index.js index a90badd45..aee7532c9 100644 --- a/src/extractors/custom/index.js +++ b/src/extractors/custom/index.js @@ -91,3 +91,4 @@ export * from './www.slate.com'; export * from './ici.radio-canada.ca'; export * from './www.fortinet.com'; export * from './www.fastcompany.com'; +export * from './www.ftchinese.com'; diff --git a/src/extractors/custom/www.ftchinese.com/index.js b/src/extractors/custom/www.ftchinese.com/index.js new file mode 100644 index 000000000..297ab1962 --- /dev/null +++ b/src/extractors/custom/www.ftchinese.com/index.js @@ -0,0 +1,44 @@ +export const WwwFtchineseComExtractor = { + domain: 'www.ftchinese.com', + + title: { + selectors: ['h1.story-headline'], + }, + + author: { + selectors: ['span.story-author > a'], + }, + + date_published: { + selectors: ['.story-time'], + }, + + dek: { + selectors: ['.story-lead'], + }, + + lead_image_url: { + selectors: [['.story-image.image >figure', 'data-url']], + }, + + content: { + selectors: ['#story-body-container'], + + // Is there anything in the content you selected that needs transformed + // before it's consumable content? E.g., unusual lazy loaded images + transforms: {}, + + // Is there anything that is in the result that shouldn't be? + // The clean selectors will remove anything that matches from + // the result + clean: [ + 'div.story-theme', + 'h1.story-headline', + 'div.story-byline', + 'div.mpu-container-instory', + 'div#story-action-placeholder', + 'div.clearfloat', + '.o-ads.in-article-advert', + ], + }, +}; diff --git a/src/extractors/custom/www.ftchinese.com/index.test.js b/src/extractors/custom/www.ftchinese.com/index.test.js new file mode 100644 index 000000000..c70f862c9 --- /dev/null +++ b/src/extractors/custom/www.ftchinese.com/index.test.js @@ -0,0 +1,107 @@ +import assert from 'assert'; +import fs from 'fs'; +import URL from 'url'; +import cheerio from 'cheerio'; + +import Mercury from 'mercury'; +import getExtractor from 'extractors/get-extractor'; +import { excerptContent } from 'utils/text'; + +describe('WwwFtchineseComExtractor', () => { + describe('initial test case', () => { + let result; + let url; + beforeAll(() => { + url = 'http://www.ftchinese.com/story/001081367'; + const html = fs.readFileSync( + './fixtures/www.ftchinese.com/1549682835827.html' + ); + result = Mercury.parse(url, html, { fallback: false }); + }); + + it('is selected properly', () => { + // This test should be passing by default. + // It sanity checks that the correct parser + // is being selected for URLs from this domain + const extractor = getExtractor(url); + assert.equal(extractor.domain, URL.parse(url).hostname); + }); + + it('returns the title', async () => { + // To pass this test, fill out the title selector + // in ./src/extractors/custom/www.ftchinese.com/index.js. + const { title } = await result; + + // Update these values with the expected values from + // the article. + assert.equal(title, `欧盟否决西门子-阿尔斯通铁路业务合并案`); + }); + + it('returns the author', async () => { + // To pass this test, fill out the author selector + // in ./src/extractors/custom/www.ftchinese.com/index.js. + const { author } = await result; + + // Update these values with the expected values from + // the article. + assert.equal(author, '罗谢尔•托普兰斯基'); + }); + + it('returns the date_published', async () => { + // To pass this test, fill out the date_published selector + // in ./src/extractors/custom/www.ftchinese.com/index.js. + const { date_published } = await result; + + // Update these values with the expected values from + // the article. + assert.equal(date_published, '2019-01-02T06:21:00.000Z'); + }); + + it('returns the dek', async () => { + // To pass this test, fill out the dek selector + // in ./src/extractors/custom/www.ftchinese.com/index.js. + const { dek } = await result; + + // Update these values with the expected values from + // the article. + assert.equal(dek, null); + }); + + it('returns the lead_image_url', async () => { + // To pass this test, fill out the lead_image_url selector + // in ./src/extractors/custom/www.ftchinese.com/index.js. + const { lead_image_url } = await result; + + // Update these values with the expected values from + // the article. + assert.equal( + lead_image_url, + `http://i.ftimg.net/picture/7/000083507_piclink.jpg` + ); + }); + + it('returns the content', async () => { + // To pass this test, fill out the content selector + // in ./src/extractors/custom/www.ftchinese.com/index.js. + // You may also want to make use of the clean and transform + // options. + const { content } = await result; + + const $ = cheerio.load(content || ''); + + const first1 = excerptContent( + $('*') + .first() + .text(), + 1 + ); + + // Update these values with the expected values from + // the article. + assert.equal( + first1, + '德国和法国铁路设备制造商西门子(Siemens)与阿尔斯通(Alstom)之间的合并已被欧盟竞争执法机构否决,监管者不接受这样一个观点,即这一联姻对于欧洲与由国家支持的中国竞争对手竞争是必要的。欧盟竞争事务专员玛格丽特•维斯特格(Margrethe' + ); + }); + }); +}); From 64e7137f88278f0be983caab8cc097ae5687d70d Mon Sep 17 00:00:00 2001 From: Henry Date: Wed, 13 Feb 2019 00:37:10 +0000 Subject: [PATCH 2/3] fix: ci/circleci: test-web --- src/extractors/custom/www.ftchinese.com/index.test.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/extractors/custom/www.ftchinese.com/index.test.js b/src/extractors/custom/www.ftchinese.com/index.test.js index c70f862c9..f5daef7ae 100644 --- a/src/extractors/custom/www.ftchinese.com/index.test.js +++ b/src/extractors/custom/www.ftchinese.com/index.test.js @@ -1,5 +1,4 @@ import assert from 'assert'; -import fs from 'fs'; import URL from 'url'; import cheerio from 'cheerio'; @@ -7,6 +6,8 @@ import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; import { excerptContent } from 'utils/text'; +const fs = require('fs'); + describe('WwwFtchineseComExtractor', () => { describe('initial test case', () => { let result; From 2f90a2342f9ade3ba0d7b3ea0fd9c43483771aea Mon Sep 17 00:00:00 2001 From: Henry Date: Mon, 18 Feb 2019 01:56:45 +0000 Subject: [PATCH 3/3] fix: update test url as the old one is now behind the paywall --- ...{1549682835827.html => 1550454222229.html} | 203 +++++++----------- .../custom/www.ftchinese.com/index.test.js | 17 +- 2 files changed, 92 insertions(+), 128 deletions(-) rename fixtures/www.ftchinese.com/{1549682835827.html => 1550454222229.html} (50%) diff --git a/fixtures/www.ftchinese.com/1549682835827.html b/fixtures/www.ftchinese.com/1550454222229.html similarity index 50% rename from fixtures/www.ftchinese.com/1549682835827.html rename to fixtures/www.ftchinese.com/1550454222229.html index 648208c18..8ad8c0609 100644 --- a/fixtures/www.ftchinese.com/1549682835827.html +++ b/fixtures/www.ftchinese.com/1550454222229.html @@ -4,8 +4,8 @@ -欧盟否决西门子-阿尔斯通铁路业务合并案 - - FT中文网 - +英国认为华为风险可控 - - FT中文网 + @@ -21,8 +21,8 @@ - - + + @@ -42,18 +42,18 @@ - - - - - + + + + + - - - + + + @@ -147,11 +147,40 @@ - -